aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-08-30 19:10:42 +0200
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-08-30 19:10:42 +0200
commitcaac487ae23a9785602cf235f5b4a2b6749f2c18 (patch)
treee81ae59a6e06a0373afdfbf8f1ee94f16a2770f2
parent1522f289301f37da0324570297c65256d8a32316 (diff)
downloadcompcert-kvx-caac487ae23a9785602cf235f5b4a2b6749f2c18.tar.gz
compcert-kvx-caac487ae23a9785602cf235f5b4a2b6749f2c18.zip
fma
-rw-r--r--mppa_k1c/Asm.v8
-rw-r--r--mppa_k1c/Asmblockdeps.v4
-rw-r--r--mppa_k1c/Asmblockgen.v25
-rw-r--r--mppa_k1c/Asmvliw.v8
-rw-r--r--mppa_k1c/Builtins1.v12
-rw-r--r--mppa_k1c/CBuiltins.ml6
-rw-r--r--mppa_k1c/ExtValues.v8
-rw-r--r--mppa_k1c/Machregs.v2
-rw-r--r--mppa_k1c/PostpassSchedulingOracle.ml11
-rw-r--r--mppa_k1c/SelectOp.vp16
-rw-r--r--mppa_k1c/SelectOpproof.v53
-rw-r--r--mppa_k1c/TargetPrinter.ml8
-rw-r--r--mppa_k1c/ValueAOp.v8
-rw-r--r--test/monniaux/builtins/fma.c12
14 files changed, 166 insertions, 15 deletions
diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v
index b7818aaf..ce376af9 100644
--- a/mppa_k1c/Asm.v
+++ b/mppa_k1c/Asm.v
@@ -200,6 +200,10 @@ Inductive instruction : Type :=
| Psllw (rd rs1 rs2: ireg) (**r shift left logical word *)
| Pmaddw (rd rs1 rs2: ireg) (**r multiply-add words *)
| Pmsubw (rd rs1 rs2: ireg) (**r multiply-add words *)
+ | Pfmaddfw (rd rs1 rs2: ireg) (**r float fused multiply-add words *)
+ | Pfmsubfw (rd rs1 rs2: ireg) (**r float fused multiply-subtract words *)
+ | Pfmaddfl (rd rs1 rs2: ireg) (**r float fused multiply-add longs *)
+ | Pfmsubfl (rd rs1 rs2: ireg) (**r float fused multiply-subtract longs *)
| Paddl (rd rs1 rs2: ireg) (**r add long *)
| Paddxl (shift : shift1_4) (rd rs1 rs2: ireg) (**r add long shift *)
@@ -452,6 +456,10 @@ Definition basic_to_instruction (b: basic) :=
| PArithARRR Asmvliw.Pmaddl rd rs1 rs2 => Pmaddl rd rs1 rs2
| PArithARRR Asmvliw.Pmsubw rd rs1 rs2 => Pmsubw rd rs1 rs2
| PArithARRR Asmvliw.Pmsubl rd rs1 rs2 => Pmsubl rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmaddfw rd rs1 rs2 => Pfmaddfw rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmaddfl rd rs1 rs2 => Pfmaddfl rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmsubfw rd rs1 rs2 => Pfmsubfw rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmsubfl rd rs1 rs2 => Pfmsubfl rd rs1 rs2
| PArithARRR (Asmvliw.Pcmove cond) rd rs1 rs2=> Pcmove cond rd rs1 rs2
| PArithARRR (Asmvliw.Pcmoveu cond) rd rs1 rs2=> Pcmoveu cond rd rs1 rs2
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index 61caeaf1..6743ae4c 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -1483,6 +1483,10 @@ Definition string_of_name_arrr (n: arith_name_arrr): pstring :=
| Pmsubl => "Pmsubl"
| Pcmove _ => "Pcmove"
| Pcmoveu _ => "Pcmoveu"
+ | Pfmaddfw => "Pfmaddfw"
+ | Pfmaddfl => "Pfmaddfl"
+ | Pfmsubfw => "Pfmsubfw"
+ | Pfmsubfl => "Pfmsubfl"
end.
Definition string_of_name_arr (n: arith_name_arr): pstring :=
diff --git a/mppa_k1c/Asmblockgen.v b/mppa_k1c/Asmblockgen.v
index c2a36ff7..c717af95 100644
--- a/mppa_k1c/Asmblockgen.v
+++ b/mppa_k1c/Asmblockgen.v
@@ -797,6 +797,31 @@ Definition transl_op
do rd <- freg_of res; do rs <- freg_of a1;
OK (Pfinvw rd rs ::i k)
+ | Ofmaddf, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do rs1 <- freg_of a1;
+ do rs2 <- freg_of a2;
+ do rs3 <- freg_of a3;
+ OK (Pfmaddfl rs1 rs2 rs3 ::i k)
+ | Ofmaddfs, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do rs1 <- freg_of a1;
+ do rs2 <- freg_of a2;
+ do rs3 <- freg_of a3;
+ OK (Pfmaddfw rs1 rs2 rs3 ::i k)
+ | Ofmsubf, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do rs1 <- freg_of a1;
+ do rs2 <- freg_of a2;
+ do rs3 <- freg_of a3;
+ OK (Pfmsubfl rs1 rs2 rs3 ::i k)
+ | Ofmsubfs, a1 :: a2 :: a3 :: nil =>
+ assertion (mreg_eq a1 res);
+ do rs1 <- freg_of a1;
+ do rs2 <- freg_of a2;
+ do rs3 <- freg_of a3;
+ OK (Pfmsubfw rs1 rs2 rs3 ::i k)
+
| Osingleofint, a1 :: nil =>
do rd <- freg_of res; do rs <- ireg_of a1;
OK (Pfloatwrnsz rd rs ::i k)
diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v
index cb9ce7ae..b0f8501d 100644
--- a/mppa_k1c/Asmvliw.v
+++ b/mppa_k1c/Asmvliw.v
@@ -528,6 +528,10 @@ Inductive arith_name_arrr : Type :=
| Pmsubl (**r multiply subtract long *)
| Pcmove (bt: btest) (**r conditional move *)
| Pcmoveu (bt: btest) (**r conditional move, test on unsigned semantics *)
+ | Pfmaddfw (**r float fused multiply add word *)
+ | Pfmaddfl (**r float fused multiply add long *)
+ | Pfmsubfw (**r float fused multiply subtract word *)
+ | Pfmsubfl (**r float fused multiply subtract long *)
.
Inductive arith_name_arri32 : Type :=
@@ -1177,6 +1181,10 @@ Definition arith_eval_arrr n v1 v2 v3 :=
| Pmsubl => Val.subl v1 (Val.mull v2 v3)
| Pcmove bt => cmove bt v1 v2 v3
| Pcmoveu bt => cmoveu bt v1 v2 v3
+ | Pfmaddfw => ExtValues.fmaddfs v1 v2 v3
+ | Pfmaddfl => ExtValues.fmaddf v1 v2 v3
+ | Pfmsubfw => ExtValues.fmsubfs v1 v2 v3
+ | Pfmsubfl => ExtValues.fmsubf v1 v2 v3
end.
Definition arith_eval_arr n v1 v2 :=
diff --git a/mppa_k1c/Builtins1.v b/mppa_k1c/Builtins1.v
index 5187ea7d..6186961f 100644
--- a/mppa_k1c/Builtins1.v
+++ b/mppa_k1c/Builtins1.v
@@ -24,7 +24,9 @@ Inductive platform_builtin : Type :=
| BI_fmax
| BI_fminf
| BI_fmaxf
-| BI_fabsf.
+| BI_fabsf
+| BI_fma
+| BI_fmaf.
Local Open Scope string_scope.
@@ -34,6 +36,8 @@ Definition platform_builtin_table : list (string * platform_builtin) :=
:: ("__builtin_fminf", BI_fminf)
:: ("__builtin_fmaxf", BI_fmaxf)
:: ("__builtin_fabsf", BI_fabsf)
+ :: ("__builtin_fma", BI_fma)
+ :: ("__builtin_fmaf", BI_fmaf)
:: nil.
Definition platform_builtin_sig (b: platform_builtin) : signature :=
@@ -44,6 +48,10 @@ Definition platform_builtin_sig (b: platform_builtin) : signature :=
mksignature (Tsingle :: Tsingle :: nil) (Some Tsingle) cc_default
| BI_fabsf =>
mksignature (Tsingle :: nil) (Some Tsingle) cc_default
+ | BI_fma =>
+ mksignature (Tfloat :: Tfloat :: Tfloat :: nil) (Some Tfloat) cc_default
+ | BI_fmaf =>
+ mksignature (Tsingle :: Tsingle :: Tsingle :: nil) (Some Tsingle) cc_default
end.
Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_res (platform_builtin_sig b)) :=
@@ -53,4 +61,6 @@ Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_re
| BI_fminf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.min
| BI_fmaxf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.max
| BI_fabsf => mkbuiltin_n1t Tsingle Tsingle Float32.abs
+ | BI_fma => mkbuiltin_n3t Tfloat Tfloat Tfloat Tfloat Float.fma
+ | BI_fmaf => mkbuiltin_n3t Tsingle Tsingle Tsingle Tsingle Float32.fma
end.
diff --git a/mppa_k1c/CBuiltins.ml b/mppa_k1c/CBuiltins.ml
index c0022cb1..3ae6baa7 100644
--- a/mppa_k1c/CBuiltins.ml
+++ b/mppa_k1c/CBuiltins.ml
@@ -130,6 +130,12 @@ let builtins = {
"__builtin_fminf",
(TFloat(FFloat, []),
[TFloat(FFloat, []); TFloat(FFloat, [])], false);
+ "__builtin_fma",
+ (TFloat(FDouble, []),
+ [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false);
+ "__builtin_fmaf",
+ (TFloat(FFloat, []),
+ [TFloat(FFloat, []); TFloat(FFloat, []); TFloat(FFloat, [])], false);
]
}
diff --git a/mppa_k1c/ExtValues.v b/mppa_k1c/ExtValues.v
index 3e4b70b5..a8e24c86 100644
--- a/mppa_k1c/ExtValues.v
+++ b/mppa_k1c/ExtValues.v
@@ -715,8 +715,8 @@ Definition triple_op_single f v1 v2 v3 :=
| _, _, _ => Vundef
end.
-Definition fmaddf := triple_op_float Float.fma.
-Definition fmaddfs := triple_op_single Float32.fma.
+Definition fmaddf := triple_op_float (fun f1 f2 f3 => Float.fma f2 f3 f1).
+Definition fmaddfs := triple_op_single (fun f1 f2 f3 => Float32.fma f2 f3 f1).
-Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma f1 (Float.neg f2) f3).
-Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma f1 (Float32.neg f2) f3).
+Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma (Float.neg f2) f3 f1).
+Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma (Float32.neg f2) f3 f1).
diff --git a/mppa_k1c/Machregs.v b/mppa_k1c/Machregs.v
index 5a7d42b4..8098b5d1 100644
--- a/mppa_k1c/Machregs.v
+++ b/mppa_k1c/Machregs.v
@@ -213,6 +213,8 @@ Global Opaque
Definition two_address_op (op: operation) : bool :=
match op with
+ | Ofmaddf | Ofmaddfs
+ | Ofmsubf | Ofmsubfs
| Omadd | Omaddimm _
| Omaddl | Omaddlimm _
| Omsub | Omsubl
diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml
index e2baa2c0..628ae609 100644
--- a/mppa_k1c/PostpassSchedulingOracle.ml
+++ b/mppa_k1c/PostpassSchedulingOracle.ml
@@ -32,6 +32,7 @@ type real_instruction =
| Fabsd | Fabsw | Fnegw | Fnegd
| Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw
| Fmind | Fminw | Fmaxd | Fmaxw | Finvw
+ | Ffmaw | Ffmad | Ffmsw | Ffmsd
| Fnarrowdw | Fwidenlwd | Floatwz | Floatuwz | Floatdz | Floatudz | Fixedwz | Fixeduwz | Fixeddz | Fixedudz
| Fcompw | Fcompd
@@ -173,6 +174,10 @@ let arith_arr_real = function
| Pinsfl (_, _) -> Insf
let arith_arrr_real = function
+ | Pfmaddfw -> Ffmaw
+ | Pfmaddfl -> Ffmad
+ | Pfmsubfw -> Ffmsw
+ | Pfmsubfl -> Ffmsd
| Pmaddw -> Maddw
| Pmaddl -> Maddd
| Pmsubw -> Msbfw
@@ -587,7 +592,8 @@ let rec_to_usage r =
| Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd
| Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite
| Fnarrowdw -> alu_full
- | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw -> mau
+ | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw
+ | Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau
let real_inst_to_latency = function
@@ -608,7 +614,8 @@ let real_inst_to_latency = function
| Set -> 4 (* According to the manual should be 3, but I measured 4 *)
| Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *)
| Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1
- | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw -> 4
+ | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw
+ | Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4
let rec_to_info r : inst_info =
let usage = rec_to_usage r
diff --git a/mppa_k1c/SelectOp.vp b/mppa_k1c/SelectOp.vp
index 6539184c..71078046 100644
--- a/mppa_k1c/SelectOp.vp
+++ b/mppa_k1c/SelectOp.vp
@@ -683,7 +683,18 @@ Nondetfunction divfs_base (e1: expr) :=
else divfs_baseX e1)
| _ => divfs_baseX e1
end.
-End SELECT.
+
+Nondetfunction gen_fma args :=
+ match args with
+ | e1:::e2:::e3:::Enil => Some (Eop Ofmaddf (e3:::e1:::e2:::Enil))
+ | _ => None
+ end.
+
+Nondetfunction gen_fmaf args :=
+ match args with
+ | e1:::e2:::e3:::Enil => Some (Eop Ofmaddfs (e3:::e1:::e2:::Enil))
+ | _ => None
+ end.
Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr :=
match b with
@@ -692,7 +703,10 @@ Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr
| BI_fminf => Some (Eop Ominfs args)
| BI_fmaxf => Some (Eop Omaxfs args)
| BI_fabsf => Some (Eop Oabsfs args)
+ | BI_fma => gen_fma args
+ | BI_fmaf => gen_fmaf args
end.
+End SELECT.
(* Local Variables: *)
(* mode: coq *)
diff --git a/mppa_k1c/SelectOpproof.v b/mppa_k1c/SelectOpproof.v
index 7805a1be..08bcff12 100644
--- a/mppa_k1c/SelectOpproof.v
+++ b/mppa_k1c/SelectOpproof.v
@@ -1663,6 +1663,50 @@ Qed.
(** Platform-specific known builtins *)
+Lemma eval_fma:
+ forall al a vl v le,
+ gen_fma al = Some a ->
+ eval_exprlist ge sp e m le al vl ->
+ platform_builtin_sem BI_fma vl = Some v ->
+ exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
+Proof.
+ unfold gen_fma.
+ intros until le.
+ intro Heval.
+ destruct (gen_fma_match _) in *; try discriminate.
+ inversion Heval; subst a; clear Heval.
+ intro; InvEval.
+ intro Heval.
+ simpl in Heval.
+ inv Heval.
+ TrivialExists.
+ destruct v0; simpl; trivial;
+ destruct v1; simpl; trivial;
+ destruct v2; simpl; trivial.
+Qed.
+
+Lemma eval_fmaf:
+ forall al a vl v le,
+ gen_fmaf al = Some a ->
+ eval_exprlist ge sp e m le al vl ->
+ platform_builtin_sem BI_fmaf vl = Some v ->
+ exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
+Proof.
+ unfold gen_fmaf.
+ intros until le.
+ intro Heval.
+ destruct (gen_fmaf_match _) in *; try discriminate.
+ inversion Heval; subst a; clear Heval.
+ intro; InvEval.
+ intro Heval.
+ simpl in Heval.
+ inv Heval.
+ TrivialExists.
+ destruct v0; simpl; trivial;
+ destruct v1; simpl; trivial;
+ destruct v2; simpl; trivial.
+Qed.
+
Theorem eval_platform_builtin:
forall bf al a vl v le,
platform_builtin bf al = Some a ->
@@ -1670,9 +1714,12 @@ Theorem eval_platform_builtin:
platform_builtin_sem bf vl = Some v ->
exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'.
Proof.
- destruct bf; intros until le; intro Heval; inversion Heval; subst a; clear Heval.
- all: exists v; split; trivial;
- try repeat (try econstructor; try eassumption).
+ destruct bf; intros until le; intro Heval.
+ all: try (inversion Heval; subst a; clear Heval;
+ exists v; split; trivial;
+ repeat (try econstructor; try eassumption)).
+ - apply eval_fma; assumption.
+ - apply eval_fmaf; assumption.
Qed.
End CMCONSTR.
diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml
index 3d3b56a2..e626d2b4 100644
--- a/mppa_k1c/TargetPrinter.ml
+++ b/mppa_k1c/TargetPrinter.ml
@@ -574,6 +574,10 @@ module Target (*: TARGET*) =
fprintf oc " maddw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
| Pmsubw (rd, rs1, rs2) ->
fprintf oc " msbfw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmaddfw (rd, rs1, rs2) ->
+ fprintf oc " ffmaw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmsubfw (rd, rs1, rs2) ->
+ fprintf oc " ffmsw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
| Paddl (rd, rs1, rs2) ->
fprintf oc " addd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
@@ -615,6 +619,10 @@ module Target (*: TARGET*) =
fprintf oc " maddd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
| Pmsubl (rd, rs1, rs2) ->
fprintf oc " msbfd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmaddfl (rd, rs1, rs2) ->
+ fprintf oc " ffmad %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
+ | Pfmsubfl (rd, rs1, rs2) ->
+ fprintf oc " ffmsd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
| Pfaddd (rd, rs1, rs2) ->
fprintf oc " faddd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2
diff --git a/mppa_k1c/ValueAOp.v b/mppa_k1c/ValueAOp.v
index 4c5fcf71..2c9bdf3e 100644
--- a/mppa_k1c/ValueAOp.v
+++ b/mppa_k1c/ValueAOp.v
@@ -33,10 +33,10 @@ Definition triple_op_single (sem: float32 -> float32 -> float32 -> float32) (x y
| _, _, _ => ntop3 x y z
end.
-Definition fmaddf := triple_op_float Float.fma.
-Definition fmsubf := triple_op_float (fun x y z => Float.fma x (Float.neg y) z).
-Definition fmaddfs := triple_op_single Float32.fma.
-Definition fmsubfs := triple_op_single (fun x y z => Float32.fma x (Float32.neg y) z).
+Definition fmaddf := triple_op_float (fun x y z => Float.fma y z x).
+Definition fmsubf := triple_op_float (fun x y z => Float.fma (Float.neg y) z x).
+Definition fmaddfs := triple_op_single (fun x y z => Float32.fma y z x).
+Definition fmsubfs := triple_op_single (fun x y z => Float32.fma (Float32.neg y) z x).
Definition invfs (y : aval) :=
match y with
diff --git a/test/monniaux/builtins/fma.c b/test/monniaux/builtins/fma.c
new file mode 100644
index 00000000..0952e8f3
--- /dev/null
+++ b/test/monniaux/builtins/fma.c
@@ -0,0 +1,12 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv) {
+ if (argc < 4) return 1;
+ double x = strtod(argv[1], NULL);
+ double y = strtod(argv[2], NULL);
+ double z = strtod(argv[3], NULL);
+ printf("%g %g %g\n", __builtin_fma(x, y, z), fma(x, y, z), x*y + z);
+ return 0;
+}