diff options
author | Xavier Leroy <xavier.leroy@inria.fr> | 2015-08-21 14:54:25 +0200 |
---|---|---|
committer | Xavier Leroy <xavier.leroy@inria.fr> | 2015-08-21 14:59:38 +0200 |
commit | 78808873d889608ee39fb6a9d9c0dac0335ccf47 (patch) | |
tree | d6d73f06b2c31abbd4028f39f143ace3459458e1 /ia32/Asmexpand.ml | |
parent | 54f97d1988f623ba7422e13a504caeb5701ba93c (diff) | |
parent | c6567a3f0a16050fd04469fdcc7a575f81c0c8f4 (diff) | |
download | compcert-78808873d889608ee39fb6a9d9c0dac0335ccf47.tar.gz compcert-78808873d889608ee39fb6a9d9c0dac0335ccf47.zip |
Merge branch 'master' into 'new-builtins'
Diffstat (limited to 'ia32/Asmexpand.ml')
-rw-r--r-- | ia32/Asmexpand.ml | 178 |
1 files changed, 93 insertions, 85 deletions
diff --git a/ia32/Asmexpand.ml b/ia32/Asmexpand.ml index 9d8260b7..8996794b 100644 --- a/ia32/Asmexpand.ml +++ b/ia32/Asmexpand.ml @@ -3,6 +3,7 @@ (* The Compcert verified compiler *) (* *) (* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Bernhard Schommer, AbsInt Angewandte Informatik GmbH *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) @@ -11,8 +12,7 @@ (* *********************************************************************) (* Expanding built-ins and some pseudo-instructions by rewriting - of the IA32 assembly code. Currently not done, this expansion - is performed on the fly in PrintAsm. *) + of the IA32 assembly code. *) open Asm open Asmexpandaux @@ -75,12 +75,17 @@ let addressing_of_builtin_arg = function | BA_addrglobal(id, ofs) -> Addrmode(None, None, Coq_inr(id, ofs)) | _ -> assert false +(* Operations on addressing modes *) + let offset_addressing (Addrmode(base, ofs, cst)) delta = Addrmode(base, ofs, match cst with - | Coq_inl n -> Coq_inl(Integers.Int.add n delta) - | Coq_inr(id, n) -> Coq_inr(id, Integers.Int.add n delta)) + | Coq_inl n -> Coq_inl(Int.add n delta) + | Coq_inr(id, n) -> Coq_inr(id, Int.add n delta)) +let linear_addr reg ofs = Addrmode(Some reg, None, Coq_inl ofs) +let global_addr id ofs = Addrmode(None, None, Coq_inr(id, ofs)) + (* Handling of memcpy *) (* Unaligned memory accesses are quite fast on IA32, so use large @@ -136,7 +141,7 @@ let expand_builtin_vload_common chunk addr res = | Mint32, BR(IR res) -> emit (Pmov_rm (res,addr)) | Mint64, BR_longofwords(BR(IR res1), BR(IR res2)) -> - let addr' = offset_addressing addr (coqint_of_camlint 4l) in + let addr' = offset_addressing addr _4 in if not (Asmgen.addressing_mentions addr res2) then begin emit (Pmov_rm (res2,addr)); emit (Pmov_rm (res1,addr')) @@ -172,7 +177,7 @@ let expand_builtin_vstore_common chunk addr src tmp = | Mint32, BA(IR src) -> emit (Pmov_mr (addr,src)) | Mint64, BA_longofwords(BA(IR src1), BA(IR src2)) -> - let addr' = offset_addressing addr (coqint_of_camlint 4l) in + let addr' = offset_addressing addr _4 in emit (Pmov_mr (addr,src2)); emit (Pmov_mr (addr',src1)) | Mfloat32, BA(FR src) -> @@ -199,8 +204,30 @@ let expand_builtin_va_start r = Int32.(add (add !PrintAsmaux.current_function_stacksize 4l) (mul 4l (Z.to_int32 (Conventions1.size_arguments !current_function.fn_sig)))) in - emit (Pmov_mr (Addrmode (Some r, None, Coq_inl _0),ESP)); - emit (Paddl_mi (Addrmode (Some r,None,Coq_inl _0),ofs)) + emit (Pmov_mr (linear_addr r _0, ESP)); + emit (Padd_mi (linear_addr r _0, ofs)) + +(* FMA operations *) + +(* vfmadd<i><j><k> r1, r2, r3 performs r1 := ri * rj + rk + hence + vfmadd132 r1, r2, r3 performs r1 := r1 * r3 + r2 + vfmadd213 r1, r2, r3 performs r1 := r2 * r1 + r3 + vfmadd231 r1, r2, r3 performs r1 := r2 * r3 + r1 +*) + +let expand_fma args res i132 i213 i231 = + match args, res with + | [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> + if res = a1 then emit (i132 a1 a3 a2) (* a1 * a2 + a3 *) + else if res = a2 then emit (i213 a2 a1 a3) (* a1 * a2 + a3 *) + else if res = a3 then emit (i231 a3 a1 a2) (* a1 * a2 + a3 *) + else begin + emit (Pmovsd_ff(res, a3)); + emit (i231 res a1 a2) (* a1 * a2 + res *) + end + | _ -> + invalid_arg ("ill-formed fma builtin") (* Handling of compiler-inlined builtins *) @@ -214,120 +241,99 @@ let expand_builtin_inline name args res = | "__builtin_bswap16", [BA(IR a1)], BR(IR res) -> if a1 <> res then emit (Pmov_rr (res,a1)); - emit (Prolw_8 res) + emit (Pbswap16 res) | "__builtin_clz", [BA(IR a1)], BR(IR res) -> - emit (Pbslr (a1,res)); + emit (Pbsr (res,a1)); emit (Pxor_ri(res,coqint_of_camlint 31l)) | "__builtin_ctz", [BA(IR a1)], BR(IR res) -> - emit (Pbsfl (a1,res)) + emit (Pbsf (res,a1)) (* Float arithmetic *) | "__builtin_fabs", [BA(FR a1)], BR(FR res) -> if a1 <> res then - emit (Pmovsd_ff (a1,res)); + emit (Pmovsd_ff (res,a1)); emit (Pabsd res) (* This ensures that need_masks is set to true *) | "__builtin_fsqrt", [BA(FR a1)], BR(FR res) -> - emit (Psqrtsd (a1,res)) + emit (Psqrtsd (res,a1)) | "__builtin_fmax", [BA(FR a1); BA(FR a2)], BR(FR res) -> if res = a1 then - emit (Pmaxsd (a2,res)) + emit (Pmaxsd (res,a2)) else if res = a2 then - emit (Pmaxsd (a1,res)) + emit (Pmaxsd (res,a1)) else begin - emit (Pmovsd_ff (a1,res)); - emit (Pmaxsd (a2,res)) + emit (Pmovsd_ff (res,a1)); + emit (Pmaxsd (res,a2)) end | "__builtin_fmin", [BA(FR a1); BA(FR a2)], BR(FR res) -> if res = a1 then - emit (Pminsd (a2,res)) + emit (Pminsd (res,a2)) else if res = a2 then - emit (Pminsd (a1,res)) + emit (Pminsd (res,a1)) else begin - emit (Pmovsd_ff (a1,res)); - emit (Pminsd (a2,res)) - end - | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> - if res = a1 then - emit (Pfmadd132 (a2,a3,res)) - else if res = a2 then - emit (Pfmadd213 (a2,a3,res)) - else if res = a3 then - emit (Pfmadd231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (a2,res)); - emit (Pfmadd231 (a1,a2,res)) - end - |"__builtin_fmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> - if res = a1 then - emit (Pfmsub132 (a2,a3,res)) - else if res = a2 then - emit (Pfmsub213 (a2,a3,res)) - else if res = a3 then - emit (Pfmsub231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (a2,res)); - emit (Pfmsub231 (a1,a2,res)) - end - | "__builtin_fnmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> - if res = a1 then - emit (Pfnmadd132 (a2,a3,res)) - else if res = a2 then - emit (Pfnmadd213 (a2,a3,res)) - else if res = a3 then - emit (Pfnmadd231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (a2,res)); - emit (Pfnmadd231 (a1,a2,res)) - end - |"__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> - if res = a1 then - emit (Pfnmsub132 (a2,a3,res)) - else if res = a2 then - emit (Pfnmsub213 (a2,a3,res)) - else if res = a3 then - emit (Pfnmsub231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (a2,res)); - emit (Pfnmsub231 (a1,a2,res)) + emit (Pmovsd_ff (res,a1)); + emit (Pminsd (res,a2)) end + | "__builtin_fmadd", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfmadd132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmadd213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmadd231(r1, r2, r3)) + | "__builtin_fmsub", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfmsub132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmsub213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmsub231(r1, r2, r3)) + | "__builtin_fnmadd", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfnmadd132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmadd213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmadd231(r1, r2, r3)) + | "__builtin_fnmsub", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfnmsub132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmsub213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmsub231(r1, r2, r3)) (* 64-bit integer arithmetic *) | "__builtin_negl", [BA_longofwords(BA(IR ah), BA(IR al))], BR_longofwords(BR(IR rh), BR(IR rl)) -> assert (ah = EDX && al = EAX && rh = EDX && rl = EAX); emit (Pneg EAX); - emit (Padcl_ir (_0,EDX)); + emit (Padc_ri (EDX,_0)); emit (Pneg EDX) | "__builtin_addl", [BA_longofwords(BA(IR ah), BA(IR al)); BA_longofwords(BA(IR bh), BA(IR bl))], BR_longofwords(BR(IR rh), BR(IR rl)) -> assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); - emit (Paddl (EBX,EAX)); - emit (Padcl_rr (ECX,EDX)) + emit (Padd_rr (EAX,EBX)); + emit (Padc_rr (EDX,ECX)) | "__builtin_subl", [BA_longofwords(BA(IR ah), BA(IR al)); BA_longofwords(BA(IR bh), BA(IR bl))], BR_longofwords(BR(IR rh), BR(IR rl)) -> assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); - emit (Psub_rr (EBX,EAX)); - emit (Psbbl (ECX,EDX)) + emit (Psub_rr (EAX,EBX)); + emit (Psbb_rr (EDX,ECX)) | "__builtin_mull", [BA(IR a); BA(IR b)], BR_longofwords(BR(IR rh), BR(IR rl)) -> assert (a = EAX && b = EDX && rh = EDX && rl = EAX); emit (Pmul_r EDX) (* Memory accesses *) | "__builtin_read16_reversed", [BA(IR a1)], BR(IR res) -> - let addr = Addrmode(Some a1,None,Coq_inl _0) in - emit (Pmovzw_rm (res,addr)); - emit (Prolw_8 res) + emit (Pmovzw_rm (res, linear_addr a1 _0)); + emit (Pbswap16 res) | "__builtin_read32_reversed", [BA(IR a1)], BR(IR res) -> - let addr = Addrmode(Some a1,None,Coq_inl _0) in - emit (Pmov_rm (res,addr)); + emit (Pmov_rm (res, linear_addr a1 _0)); emit (Pbswap res) | "__builtin_write16_reversed", [BA(IR a1); BA(IR a2)], _ -> let tmp = if a1 = ECX then EDX else ECX in - let addr = Addrmode(Some a1,None,Coq_inl _0) in if a2 <> tmp then - emit (Pmov_rr (a2,tmp)); - emit (Pxchg (tmp,tmp)); - emit (Pmovw_mr (addr,tmp)) + emit (Pmov_rr (tmp,a2)); + emit (Pbswap16 tmp); + emit (Pmovw_mr (linear_addr a1 _0, tmp)) + | "__builtin_write32_reversed", [BA(IR a1); BA(IR a2)], _ -> + let tmp = if a1 = ECX then EDX else ECX in + if a2 <> tmp then + emit (Pmov_rr (tmp,a2)); + emit (Pbswap tmp); + emit (Pmov_mr (linear_addr a1 _0, tmp)) (* Vararg stuff *) | "__builtin_va_start", [BA(IR a)], _ -> expand_builtin_va_start a @@ -344,17 +350,17 @@ let expand_instruction instr = match instr with | Pallocframe (sz, ofs_ra, ofs_link) -> let sz = sp_adjustment sz in - let addr = Addrmode(Some ESP,None,Coq_inl (coqint_of_camlint (Int32.add sz 4l))) in - let addr' = Addrmode (Some ESP, None, Coq_inl ofs_link) in + let addr = linear_addr ESP (coqint_of_camlint (Int32.add sz 4l)) in + let addr' = linear_addr ESP ofs_link in let sz' = coqint_of_camlint sz in - emit (Psubl_ri (ESP,sz')); + emit (Psub_ri (ESP,sz')); emit (Pcfi_adjust sz'); emit (Plea (EDX,addr)); emit (Pmov_mr (addr',EDX)); PrintAsmaux.current_function_stacksize := sz | Pfreeframe(sz, ofs_ra, ofs_link) -> let sz = sp_adjustment sz in - emit (Paddl_ri (ESP,coqint_of_camlint sz)) + emit (Padd_ri (ESP,coqint_of_camlint sz)) | Pbuiltin (ef,args, res) -> begin match ef with @@ -365,8 +371,10 @@ let expand_instruction instr = | EF_vstore chunk -> expand_builtin_vstore chunk args | EF_memcpy(sz, al) -> - expand_builtin_memcpy (Int32.to_int (camlint_of_coqint sz)) - (Int32.to_int (camlint_of_coqint al)) args + expand_builtin_memcpy + (Int32.to_int (camlint_of_coqint sz)) + (Int32.to_int (camlint_of_coqint al)) + args | EF_annot_val(txt, targ) -> expand_annot_val txt targ args res | EF_annot _ | EF_debug _ | EF_inline_asm _ -> |