diff options
author | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2019-08-30 19:10:42 +0200 |
---|---|---|
committer | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2019-08-30 19:10:42 +0200 |
commit | caac487ae23a9785602cf235f5b4a2b6749f2c18 (patch) | |
tree | e81ae59a6e06a0373afdfbf8f1ee94f16a2770f2 | |
parent | 1522f289301f37da0324570297c65256d8a32316 (diff) | |
download | compcert-kvx-caac487ae23a9785602cf235f5b4a2b6749f2c18.tar.gz compcert-kvx-caac487ae23a9785602cf235f5b4a2b6749f2c18.zip |
fma
-rw-r--r-- | mppa_k1c/Asm.v | 8 | ||||
-rw-r--r-- | mppa_k1c/Asmblockdeps.v | 4 | ||||
-rw-r--r-- | mppa_k1c/Asmblockgen.v | 25 | ||||
-rw-r--r-- | mppa_k1c/Asmvliw.v | 8 | ||||
-rw-r--r-- | mppa_k1c/Builtins1.v | 12 | ||||
-rw-r--r-- | mppa_k1c/CBuiltins.ml | 6 | ||||
-rw-r--r-- | mppa_k1c/ExtValues.v | 8 | ||||
-rw-r--r-- | mppa_k1c/Machregs.v | 2 | ||||
-rw-r--r-- | mppa_k1c/PostpassSchedulingOracle.ml | 11 | ||||
-rw-r--r-- | mppa_k1c/SelectOp.vp | 16 | ||||
-rw-r--r-- | mppa_k1c/SelectOpproof.v | 53 | ||||
-rw-r--r-- | mppa_k1c/TargetPrinter.ml | 8 | ||||
-rw-r--r-- | mppa_k1c/ValueAOp.v | 8 | ||||
-rw-r--r-- | test/monniaux/builtins/fma.c | 12 |
14 files changed, 166 insertions, 15 deletions
diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v index b7818aaf..ce376af9 100644 --- a/mppa_k1c/Asm.v +++ b/mppa_k1c/Asm.v @@ -200,6 +200,10 @@ Inductive instruction : Type := | Psllw (rd rs1 rs2: ireg) (**r shift left logical word *)
| Pmaddw (rd rs1 rs2: ireg) (**r multiply-add words *)
| Pmsubw (rd rs1 rs2: ireg) (**r multiply-add words *)
+ | Pfmaddfw (rd rs1 rs2: ireg) (**r float fused multiply-add words *)
+ | Pfmsubfw (rd rs1 rs2: ireg) (**r float fused multiply-subtract words *)
+ | Pfmaddfl (rd rs1 rs2: ireg) (**r float fused multiply-add longs *)
+ | Pfmsubfl (rd rs1 rs2: ireg) (**r float fused multiply-subtract longs *)
| Paddl (rd rs1 rs2: ireg) (**r add long *)
| Paddxl (shift : shift1_4) (rd rs1 rs2: ireg) (**r add long shift *)
@@ -452,6 +456,10 @@ Definition basic_to_instruction (b: basic) := | PArithARRR Asmvliw.Pmaddl rd rs1 rs2 => Pmaddl rd rs1 rs2
| PArithARRR Asmvliw.Pmsubw rd rs1 rs2 => Pmsubw rd rs1 rs2
| PArithARRR Asmvliw.Pmsubl rd rs1 rs2 => Pmsubl rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmaddfw rd rs1 rs2 => Pfmaddfw rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmaddfl rd rs1 rs2 => Pfmaddfl rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmsubfw rd rs1 rs2 => Pfmsubfw rd rs1 rs2
+ | PArithARRR Asmvliw.Pfmsubfl rd rs1 rs2 => Pfmsubfl rd rs1 rs2
| PArithARRR (Asmvliw.Pcmove cond) rd rs1 rs2=> Pcmove cond rd rs1 rs2
| PArithARRR (Asmvliw.Pcmoveu cond) rd rs1 rs2=> Pcmoveu cond rd rs1 rs2
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v index 61caeaf1..6743ae4c 100644 --- a/mppa_k1c/Asmblockdeps.v +++ b/mppa_k1c/Asmblockdeps.v @@ -1483,6 +1483,10 @@ Definition string_of_name_arrr (n: arith_name_arrr): pstring := | Pmsubl => "Pmsubl" | Pcmove _ => "Pcmove" | Pcmoveu _ => "Pcmoveu" + | Pfmaddfw => "Pfmaddfw" + | Pfmaddfl => "Pfmaddfl" + | Pfmsubfw => "Pfmsubfw" + | Pfmsubfl => "Pfmsubfl" end. Definition string_of_name_arr (n: arith_name_arr): pstring := diff --git a/mppa_k1c/Asmblockgen.v b/mppa_k1c/Asmblockgen.v index c2a36ff7..c717af95 100644 --- a/mppa_k1c/Asmblockgen.v +++ b/mppa_k1c/Asmblockgen.v @@ -797,6 +797,31 @@ Definition transl_op do rd <- freg_of res; do rs <- freg_of a1; OK (Pfinvw rd rs ::i k) + | Ofmaddf, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do rs1 <- freg_of a1; + do rs2 <- freg_of a2; + do rs3 <- freg_of a3; + OK (Pfmaddfl rs1 rs2 rs3 ::i k) + | Ofmaddfs, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do rs1 <- freg_of a1; + do rs2 <- freg_of a2; + do rs3 <- freg_of a3; + OK (Pfmaddfw rs1 rs2 rs3 ::i k) + | Ofmsubf, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do rs1 <- freg_of a1; + do rs2 <- freg_of a2; + do rs3 <- freg_of a3; + OK (Pfmsubfl rs1 rs2 rs3 ::i k) + | Ofmsubfs, a1 :: a2 :: a3 :: nil => + assertion (mreg_eq a1 res); + do rs1 <- freg_of a1; + do rs2 <- freg_of a2; + do rs3 <- freg_of a3; + OK (Pfmsubfw rs1 rs2 rs3 ::i k) + | Osingleofint, a1 :: nil => do rd <- freg_of res; do rs <- ireg_of a1; OK (Pfloatwrnsz rd rs ::i k) diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v index cb9ce7ae..b0f8501d 100644 --- a/mppa_k1c/Asmvliw.v +++ b/mppa_k1c/Asmvliw.v @@ -528,6 +528,10 @@ Inductive arith_name_arrr : Type := | Pmsubl (**r multiply subtract long *) | Pcmove (bt: btest) (**r conditional move *) | Pcmoveu (bt: btest) (**r conditional move, test on unsigned semantics *) + | Pfmaddfw (**r float fused multiply add word *) + | Pfmaddfl (**r float fused multiply add long *) + | Pfmsubfw (**r float fused multiply subtract word *) + | Pfmsubfl (**r float fused multiply subtract long *) . Inductive arith_name_arri32 : Type := @@ -1177,6 +1181,10 @@ Definition arith_eval_arrr n v1 v2 v3 := | Pmsubl => Val.subl v1 (Val.mull v2 v3) | Pcmove bt => cmove bt v1 v2 v3 | Pcmoveu bt => cmoveu bt v1 v2 v3 + | Pfmaddfw => ExtValues.fmaddfs v1 v2 v3 + | Pfmaddfl => ExtValues.fmaddf v1 v2 v3 + | Pfmsubfw => ExtValues.fmsubfs v1 v2 v3 + | Pfmsubfl => ExtValues.fmsubf v1 v2 v3 end. Definition arith_eval_arr n v1 v2 := diff --git a/mppa_k1c/Builtins1.v b/mppa_k1c/Builtins1.v index 5187ea7d..6186961f 100644 --- a/mppa_k1c/Builtins1.v +++ b/mppa_k1c/Builtins1.v @@ -24,7 +24,9 @@ Inductive platform_builtin : Type := | BI_fmax | BI_fminf | BI_fmaxf -| BI_fabsf. +| BI_fabsf +| BI_fma +| BI_fmaf. Local Open Scope string_scope. @@ -34,6 +36,8 @@ Definition platform_builtin_table : list (string * platform_builtin) := :: ("__builtin_fminf", BI_fminf) :: ("__builtin_fmaxf", BI_fmaxf) :: ("__builtin_fabsf", BI_fabsf) + :: ("__builtin_fma", BI_fma) + :: ("__builtin_fmaf", BI_fmaf) :: nil. Definition platform_builtin_sig (b: platform_builtin) : signature := @@ -44,6 +48,10 @@ Definition platform_builtin_sig (b: platform_builtin) : signature := mksignature (Tsingle :: Tsingle :: nil) (Some Tsingle) cc_default | BI_fabsf => mksignature (Tsingle :: nil) (Some Tsingle) cc_default + | BI_fma => + mksignature (Tfloat :: Tfloat :: Tfloat :: nil) (Some Tfloat) cc_default + | BI_fmaf => + mksignature (Tsingle :: Tsingle :: Tsingle :: nil) (Some Tsingle) cc_default end. Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_res (platform_builtin_sig b)) := @@ -53,4 +61,6 @@ Definition platform_builtin_sem (b: platform_builtin) : builtin_sem (proj_sig_re | BI_fminf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.min | BI_fmaxf => mkbuiltin_n2t Tsingle Tsingle Tsingle ExtFloat32.max | BI_fabsf => mkbuiltin_n1t Tsingle Tsingle Float32.abs + | BI_fma => mkbuiltin_n3t Tfloat Tfloat Tfloat Tfloat Float.fma + | BI_fmaf => mkbuiltin_n3t Tsingle Tsingle Tsingle Tsingle Float32.fma end. diff --git a/mppa_k1c/CBuiltins.ml b/mppa_k1c/CBuiltins.ml index c0022cb1..3ae6baa7 100644 --- a/mppa_k1c/CBuiltins.ml +++ b/mppa_k1c/CBuiltins.ml @@ -130,6 +130,12 @@ let builtins = { "__builtin_fminf", (TFloat(FFloat, []), [TFloat(FFloat, []); TFloat(FFloat, [])], false); + "__builtin_fma", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], false); + "__builtin_fmaf", + (TFloat(FFloat, []), + [TFloat(FFloat, []); TFloat(FFloat, []); TFloat(FFloat, [])], false); ] } diff --git a/mppa_k1c/ExtValues.v b/mppa_k1c/ExtValues.v index 3e4b70b5..a8e24c86 100644 --- a/mppa_k1c/ExtValues.v +++ b/mppa_k1c/ExtValues.v @@ -715,8 +715,8 @@ Definition triple_op_single f v1 v2 v3 := | _, _, _ => Vundef end. -Definition fmaddf := triple_op_float Float.fma. -Definition fmaddfs := triple_op_single Float32.fma. +Definition fmaddf := triple_op_float (fun f1 f2 f3 => Float.fma f2 f3 f1). +Definition fmaddfs := triple_op_single (fun f1 f2 f3 => Float32.fma f2 f3 f1). -Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma f1 (Float.neg f2) f3). -Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma f1 (Float32.neg f2) f3). +Definition fmsubf := triple_op_float (fun f1 f2 f3 => Float.fma (Float.neg f2) f3 f1). +Definition fmsubfs := triple_op_single (fun f1 f2 f3 => Float32.fma (Float32.neg f2) f3 f1). diff --git a/mppa_k1c/Machregs.v b/mppa_k1c/Machregs.v index 5a7d42b4..8098b5d1 100644 --- a/mppa_k1c/Machregs.v +++ b/mppa_k1c/Machregs.v @@ -213,6 +213,8 @@ Global Opaque Definition two_address_op (op: operation) : bool := match op with + | Ofmaddf | Ofmaddfs + | Ofmsubf | Ofmsubfs | Omadd | Omaddimm _ | Omaddl | Omaddlimm _ | Omsub | Omsubl diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml index e2baa2c0..628ae609 100644 --- a/mppa_k1c/PostpassSchedulingOracle.ml +++ b/mppa_k1c/PostpassSchedulingOracle.ml @@ -32,6 +32,7 @@ type real_instruction = | Fabsd | Fabsw | Fnegw | Fnegd | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Fmind | Fminw | Fmaxd | Fmaxw | Finvw + | Ffmaw | Ffmad | Ffmsw | Ffmsd | Fnarrowdw | Fwidenlwd | Floatwz | Floatuwz | Floatdz | Floatudz | Fixedwz | Fixeduwz | Fixeddz | Fixedudz | Fcompw | Fcompd @@ -173,6 +174,10 @@ let arith_arr_real = function | Pinsfl (_, _) -> Insf let arith_arrr_real = function + | Pfmaddfw -> Ffmaw + | Pfmaddfl -> Ffmad + | Pfmsubfw -> Ffmsw + | Pfmsubfl -> Ffmsd | Pmaddw -> Maddw | Pmaddl -> Maddd | Pmsubw -> Msbfw @@ -587,7 +592,8 @@ let rec_to_usage r = | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite | Fnarrowdw -> alu_full - | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw -> mau + | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw + | Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau let real_inst_to_latency = function @@ -608,7 +614,8 @@ let real_inst_to_latency = function | Set -> 4 (* According to the manual should be 3, but I measured 4 *) | Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *) | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1 - | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw -> 4 + | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw + | Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4 let rec_to_info r : inst_info = let usage = rec_to_usage r diff --git a/mppa_k1c/SelectOp.vp b/mppa_k1c/SelectOp.vp index 6539184c..71078046 100644 --- a/mppa_k1c/SelectOp.vp +++ b/mppa_k1c/SelectOp.vp @@ -683,7 +683,18 @@ Nondetfunction divfs_base (e1: expr) := else divfs_baseX e1) | _ => divfs_baseX e1 end. -End SELECT. + +Nondetfunction gen_fma args := + match args with + | e1:::e2:::e3:::Enil => Some (Eop Ofmaddf (e3:::e1:::e2:::Enil)) + | _ => None + end. + +Nondetfunction gen_fmaf args := + match args with + | e1:::e2:::e3:::Enil => Some (Eop Ofmaddfs (e3:::e1:::e2:::Enil)) + | _ => None + end. Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr := match b with @@ -692,7 +703,10 @@ Definition platform_builtin (b: platform_builtin) (args: exprlist) : option expr | BI_fminf => Some (Eop Ominfs args) | BI_fmaxf => Some (Eop Omaxfs args) | BI_fabsf => Some (Eop Oabsfs args) + | BI_fma => gen_fma args + | BI_fmaf => gen_fmaf args end. +End SELECT. (* Local Variables: *) (* mode: coq *) diff --git a/mppa_k1c/SelectOpproof.v b/mppa_k1c/SelectOpproof.v index 7805a1be..08bcff12 100644 --- a/mppa_k1c/SelectOpproof.v +++ b/mppa_k1c/SelectOpproof.v @@ -1663,6 +1663,50 @@ Qed. (** Platform-specific known builtins *) +Lemma eval_fma: + forall al a vl v le, + gen_fma al = Some a -> + eval_exprlist ge sp e m le al vl -> + platform_builtin_sem BI_fma vl = Some v -> + exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'. +Proof. + unfold gen_fma. + intros until le. + intro Heval. + destruct (gen_fma_match _) in *; try discriminate. + inversion Heval; subst a; clear Heval. + intro; InvEval. + intro Heval. + simpl in Heval. + inv Heval. + TrivialExists. + destruct v0; simpl; trivial; + destruct v1; simpl; trivial; + destruct v2; simpl; trivial. +Qed. + +Lemma eval_fmaf: + forall al a vl v le, + gen_fmaf al = Some a -> + eval_exprlist ge sp e m le al vl -> + platform_builtin_sem BI_fmaf vl = Some v -> + exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'. +Proof. + unfold gen_fmaf. + intros until le. + intro Heval. + destruct (gen_fmaf_match _) in *; try discriminate. + inversion Heval; subst a; clear Heval. + intro; InvEval. + intro Heval. + simpl in Heval. + inv Heval. + TrivialExists. + destruct v0; simpl; trivial; + destruct v1; simpl; trivial; + destruct v2; simpl; trivial. +Qed. + Theorem eval_platform_builtin: forall bf al a vl v le, platform_builtin bf al = Some a -> @@ -1670,9 +1714,12 @@ Theorem eval_platform_builtin: platform_builtin_sem bf vl = Some v -> exists v', eval_expr ge sp e m le a v' /\ Val.lessdef v v'. Proof. - destruct bf; intros until le; intro Heval; inversion Heval; subst a; clear Heval. - all: exists v; split; trivial; - try repeat (try econstructor; try eassumption). + destruct bf; intros until le; intro Heval. + all: try (inversion Heval; subst a; clear Heval; + exists v; split; trivial; + repeat (try econstructor; try eassumption)). + - apply eval_fma; assumption. + - apply eval_fmaf; assumption. Qed. End CMCONSTR. diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml index 3d3b56a2..e626d2b4 100644 --- a/mppa_k1c/TargetPrinter.ml +++ b/mppa_k1c/TargetPrinter.ml @@ -574,6 +574,10 @@ module Target (*: TARGET*) = fprintf oc " maddw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 | Pmsubw (rd, rs1, rs2) -> fprintf oc " msbfw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmaddfw (rd, rs1, rs2) -> + fprintf oc " ffmaw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmsubfw (rd, rs1, rs2) -> + fprintf oc " ffmsw %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 | Paddl (rd, rs1, rs2) -> fprintf oc " addd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 @@ -615,6 +619,10 @@ module Target (*: TARGET*) = fprintf oc " maddd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 | Pmsubl (rd, rs1, rs2) -> fprintf oc " msbfd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmaddfl (rd, rs1, rs2) -> + fprintf oc " ffmad %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 + | Pfmsubfl (rd, rs1, rs2) -> + fprintf oc " ffmsd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 | Pfaddd (rd, rs1, rs2) -> fprintf oc " faddd %a = %a, %a\n" ireg rd ireg rs1 ireg rs2 diff --git a/mppa_k1c/ValueAOp.v b/mppa_k1c/ValueAOp.v index 4c5fcf71..2c9bdf3e 100644 --- a/mppa_k1c/ValueAOp.v +++ b/mppa_k1c/ValueAOp.v @@ -33,10 +33,10 @@ Definition triple_op_single (sem: float32 -> float32 -> float32 -> float32) (x y | _, _, _ => ntop3 x y z end. -Definition fmaddf := triple_op_float Float.fma. -Definition fmsubf := triple_op_float (fun x y z => Float.fma x (Float.neg y) z). -Definition fmaddfs := triple_op_single Float32.fma. -Definition fmsubfs := triple_op_single (fun x y z => Float32.fma x (Float32.neg y) z). +Definition fmaddf := triple_op_float (fun x y z => Float.fma y z x). +Definition fmsubf := triple_op_float (fun x y z => Float.fma (Float.neg y) z x). +Definition fmaddfs := triple_op_single (fun x y z => Float32.fma y z x). +Definition fmsubfs := triple_op_single (fun x y z => Float32.fma (Float32.neg y) z x). Definition invfs (y : aval) := match y with diff --git a/test/monniaux/builtins/fma.c b/test/monniaux/builtins/fma.c new file mode 100644 index 00000000..0952e8f3 --- /dev/null +++ b/test/monniaux/builtins/fma.c @@ -0,0 +1,12 @@ +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +int main(int argc, char **argv) { + if (argc < 4) return 1; + double x = strtod(argv[1], NULL); + double y = strtod(argv[2], NULL); + double z = strtod(argv[3], NULL); + printf("%g %g %g\n", __builtin_fma(x, y, z), fma(x, y, z), x*y + z); + return 0; +} |