diff options
author | xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> | 2014-05-05 11:39:57 +0000 |
---|---|---|
committer | xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> | 2014-05-05 11:39:57 +0000 |
commit | af2b5e2efd6bf7d682e74a3fde5d54e960fa34af (patch) | |
tree | 50a50efc714ac699660edf905b28f89d70f80e4e | |
parent | f126a1c0f2bc6434b6478c863ad910bf996ffbe1 (diff) | |
download | compcert-af2b5e2efd6bf7d682e74a3fde5d54e960fa34af.tar.gz compcert-af2b5e2efd6bf7d682e74a3fde5d54e960fa34af.zip |
Fused multiply-add for IA32.
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2481 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
-rw-r--r-- | Changelog | 6 | ||||
-rw-r--r-- | ia32/CBuiltins.ml | 16 | ||||
-rw-r--r-- | ia32/PrintAsm.ml | 19 | ||||
-rw-r--r-- | test/regression/builtins-ia32.c | 8 |
4 files changed, 45 insertions, 4 deletions
@@ -1,9 +1,5 @@ Language features: - Support for C99 designated initializers. (ISO C99 section 6.7.8.) -- Traditional, pre-Standard function definitions are no longer supported, e.g. - int f(i) int i; { return i + 1; } // no longer supported - Use Standard form instead: - int f(int i) { return i + 1; } Improvements in confidence: - The parser is now formally verified against the ISO C99 grammar plus @@ -24,6 +20,8 @@ Optimizations: Usability: - Option "-timings" to print compilation times for various passes. - Various tweaks in IRC graph coloring to reduce compilation time. +- IA32: add built-in functions for fused multiply-add + (require a recent processor with FMA3 extensions). Improvements in ABI conformance: - New target platform: ARM with EABI "hard float" calling conventions diff --git a/ia32/CBuiltins.ml b/ia32/CBuiltins.ml index 6dbabf18..60461260 100644 --- a/ia32/CBuiltins.ml +++ b/ia32/CBuiltins.ml @@ -36,6 +36,22 @@ let builtins = { (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false); "__builtin_fmin", (TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false); + "__builtin_fmadd", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], + false); + "__builtin_fmsub", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], + false); + "__builtin_fnmadd", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], + false); + "__builtin_fnmsub", + (TFloat(FDouble, []), + [TFloat(FDouble, []); TFloat(FDouble, []); TFloat(FDouble, [])], + false); (* Memory accesses *) "__builtin_read16_reversed", (TInt(IUShort, []), [TPtr(TInt(IUShort, [AConst]), [])], false); diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml index 9a2648a0..33e19f73 100644 --- a/ia32/PrintAsm.ml +++ b/ia32/PrintAsm.ml @@ -475,6 +475,25 @@ let print_builtin_inline oc name args res = fprintf oc " movapd %a, %a\n" freg a1 freg res; fprintf oc " minsd %a, %a\n" freg a2 freg res end + | ("__builtin_fmadd"|"__builtin_fmsub"|"__builtin_fnmadd"|"__builtin_fnmsub"), + [FR a1; FR a2; FR a3], [FR res] -> + let opcode = + match name with + | "__builtin_fmadd" -> "vfmadd" + | "__builtin_fmsub" -> "vfmsub" + | "__builtin_fnmadd" -> "vfnmadd" + | "__builtin_fnmsub" -> "vfnmsub" + | _ -> assert false in + if res = a1 then + fprintf oc " %s132sd %a, %a, %a\n" opcode freg a2 freg a3 freg res + else if res = a2 then + fprintf oc " %s213sd %a, %a, %a\n" opcode freg a3 freg a1 freg res + else if res = a3 then + fprintf oc " %s231sd %a, %a, %a\n" opcode freg a1 freg a2 freg res + else begin + fprintf oc " movapd %a, %a\n" freg a3 freg res; + fprintf oc " %s231sd %a, %a, %a\n" opcode freg a1 freg a2 freg res + end (* 64-bit integer arithmetic *) | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> assert (ah = EDX && al = EAX && rh = EDX && rl = EAX); diff --git a/test/regression/builtins-ia32.c b/test/regression/builtins-ia32.c index 4ce54889..91455188 100644 --- a/test/regression/builtins-ia32.c +++ b/test/regression/builtins-ia32.c @@ -8,6 +8,7 @@ int main(int argc, char ** argv) unsigned int y = 0xDEADBEEF; double a = 3.14159; double b = 2.718; + double c = 1.414; unsigned short s = 0x1234; printf("bswap(%x) = %x\n", x, __builtin_bswap(x)); @@ -17,6 +18,13 @@ int main(int argc, char ** argv) printf("fmin(%f, %f) = %f\n", a, b, __builtin_fmin(a, b)); printf("fmax(%f, %f) = %f\n", a, b, __builtin_fmax(a, b)); +#ifdef FMA3 + printf("fmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fmadd(a, b, c)); + printf("fmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fmsub(a, b, c)); + printf("fnmadd(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmadd(a, b, c)); + printf("fnmsub(%f, %f, %f) = %f\n", a, b, c, __builtin_fnmsub(a, b, c)); +#endif + printf ("read_16_rev = %x\n", __builtin_read16_reversed(&s)); printf ("read_32_rev = %x\n", __builtin_read32_reversed(&y)); __builtin_write16_reversed(&s, 0x789A); |