diff options
author | Bernhard Schommer <bernhardschommer@gmail.com> | 2015-08-26 12:47:25 +0200 |
---|---|---|
committer | Bernhard Schommer <bernhardschommer@gmail.com> | 2015-08-26 12:47:25 +0200 |
commit | e0f1a60f5ff9d2efc8b106b5167f0170b8795dbe (patch) | |
tree | fe5c4c0b3da0b30841eef8184dade6e39e2c7407 /ia32/Asmexpand.ml | |
parent | 8d2e4a51d56b7f4d3673a5132edd1adb37a14295 (diff) | |
parent | 7cfaf10b604372044f53cb65b03df33c23f8b26d (diff) | |
download | compcert-e0f1a60f5ff9d2efc8b106b5167f0170b8795dbe.tar.gz compcert-e0f1a60f5ff9d2efc8b106b5167f0170b8795dbe.zip |
Merge branch 'master' into debug_locations
Conflicts:
debug/CtoDwarf.ml
debug/DwarfPrinter.ml
debug/DwarfTypes.mli
Diffstat (limited to 'ia32/Asmexpand.ml')
-rw-r--r-- | ia32/Asmexpand.ml | 193 |
1 files changed, 96 insertions, 97 deletions
diff --git a/ia32/Asmexpand.ml b/ia32/Asmexpand.ml index 11c63469..137b61b5 100644 --- a/ia32/Asmexpand.ml +++ b/ia32/Asmexpand.ml @@ -3,6 +3,7 @@ (* The Compcert verified compiler *) (* *) (* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Bernhard Schommer, AbsInt Angewandte Informatik GmbH *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) @@ -11,8 +12,7 @@ (* *********************************************************************) (* Expanding built-ins and some pseudo-instructions by rewriting - of the IA32 assembly code. Currently not done, this expansion - is performed on the fly in PrintAsm. *) + of the IA32 assembly code. *) open Asm open Asmexpandaux @@ -67,6 +67,16 @@ let expand_annot_val txt targ args res = if dst <> src then emit (Pmovsd_ff (dst,src)) | _, _ -> assert false +(* Operations on addressing modes *) + +let offset_addressing (Addrmode(base, ofs, cst)) delta = + Addrmode(base, ofs, + match cst with + | Coq_inl n -> Coq_inl(Int.add n delta) + | Coq_inr(id, n) -> Coq_inr(id, Int.add n delta)) + +let linear_addr reg ofs = Addrmode(Some reg, None, Coq_inl ofs) +let global_addr id ofs = Addrmode(None, None, Coq_inr(id, ofs)) (* Handling of memcpy *) @@ -77,20 +87,20 @@ let expand_builtin_memcpy_small sz al src dst = assert (src = EDX && dst = EAX); let rec copy ofs sz = if sz >= 8 && !Clflags.option_ffpu then begin - emit (Pmovq_rm (XMM7,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmovq_mr (Addrmode (Some dst, None, Coq_inl ofs),XMM7)); + emit (Pmovq_rm (XMM7, linear_addr src ofs)); + emit (Pmovq_mr (linear_addr dst ofs, XMM7)); copy (Int.add ofs _8) (sz - 8) end else if sz >= 4 then begin - emit (Pmov_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmov_mr (Addrmode (Some dst, None, Coq_inl ofs),ECX)); + emit (Pmov_rm (ECX, linear_addr src ofs)); + emit (Pmov_mr (linear_addr dst ofs, ECX)); copy (Int.add ofs _4) (sz - 4) end else if sz >= 2 then begin - emit (Pmovw_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmovw_mr (Addrmode (Some dst, None, Coq_inl ofs),ECX)); + emit (Pmovw_rm (ECX, linear_addr src ofs)); + emit (Pmovw_mr (linear_addr dst ofs, ECX)); copy (Int.add ofs _2) (sz - 2) end else if sz >= 1 then begin - emit (Pmovb_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); - emit (Pmovb_mr (Addrmode (Some dst, None, Coq_inl ofs),ECX)); + emit (Pmovb_rm (ECX, linear_addr src ofs)); + emit (Pmovb_mr (linear_addr dst ofs, ECX)); copy (Int.add ofs _1) (sz - 1) end in copy _0 sz @@ -111,12 +121,6 @@ let expand_builtin_memcpy sz al args = (* Handling of volatile reads and writes *) -let offset_addressing (Addrmode(base, ofs, cst)) delta = - Addrmode(base, ofs, - match cst with - | Coq_inl n -> Coq_inl(Integers.Int.add n delta) - | Coq_inr(id, n) -> Coq_inr(id, Integers.Int.add n delta)) - let expand_builtin_vload_common chunk addr res = match chunk, res with | Mint8unsigned, [IR res] -> @@ -130,7 +134,7 @@ let expand_builtin_vload_common chunk addr res = | Mint32, [IR res] -> emit (Pmov_rm (res,addr)) | Mint64, [IR res1; IR res2] -> - let addr' = offset_addressing addr (coqint_of_camlint 4l) in + let addr' = offset_addressing addr _4 in if not (Asmgen.addressing_mentions addr res2) then begin emit (Pmov_rm (res2,addr)); emit (Pmov_rm (res1,addr')) @@ -147,13 +151,11 @@ let expand_builtin_vload_common chunk addr res = let expand_builtin_vload chunk args res = match args with - | [IR addr] -> - expand_builtin_vload_common chunk (Addrmode (Some addr,None, Coq_inl _0)) res - | _ -> - assert false + | [IR addr] -> expand_builtin_vload_common chunk (linear_addr addr _0) res + | _ -> assert false let expand_builtin_vload_global chunk id ofs args res = - expand_builtin_vload_common chunk (Addrmode(None, None, Coq_inr(id,ofs))) res + expand_builtin_vload_common chunk (global_addr id ofs) res let expand_builtin_vstore_common chunk addr src tmp = match chunk, src with @@ -169,7 +171,7 @@ let expand_builtin_vstore_common chunk addr src tmp = | Mint32, [IR src] -> emit (Pmov_mr (addr,src)) | Mint64, [IR src1; IR src2] -> - let addr' = offset_addressing addr (coqint_of_camlint 4l) in + let addr' = offset_addressing addr _4 in emit (Pmov_mr (addr,src2)); emit (Pmov_mr (addr',src1)) | Mfloat32, [FR src] -> @@ -182,15 +184,13 @@ let expand_builtin_vstore_common chunk addr src tmp = let expand_builtin_vstore chunk args = match args with | IR addr :: src -> - expand_builtin_vstore_common chunk - (Addrmode(Some addr, None, Coq_inl Integers.Int.zero)) src + expand_builtin_vstore_common chunk (linear_addr addr _0) src (if addr = EAX then ECX else EAX) | _ -> assert false let expand_builtin_vstore_global chunk id ofs args = - expand_builtin_vstore_common chunk - (Addrmode(None, None, Coq_inr(id,ofs))) args EAX + expand_builtin_vstore_common chunk (global_addr id ofs) args EAX (* Handling of varargs *) @@ -202,8 +202,30 @@ let expand_builtin_va_start r = Int32.(add (add !PrintAsmaux.current_function_stacksize 4l) (mul 4l (Z.to_int32 (Conventions1.size_arguments !current_function.fn_sig)))) in - emit (Pmov_mr (Addrmode (Some r, None, Coq_inl _0),ESP)); - emit (Paddl_mi (Addrmode (Some r,None,Coq_inl _0),ofs)) + emit (Pmov_mr (linear_addr r _0, ESP)); + emit (Padd_mi (linear_addr r _0, ofs)) + +(* FMA operations *) + +(* vfmadd<i><j><k> r1, r2, r3 performs r1 := ri * rj + rk + hence + vfmadd132 r1, r2, r3 performs r1 := r1 * r3 + r2 + vfmadd213 r1, r2, r3 performs r1 := r2 * r1 + r3 + vfmadd231 r1, r2, r3 performs r1 := r2 * r3 + r1 +*) + +let expand_fma args res i132 i213 i231 = + match args, res with + | [FR a1; FR a2; FR a3], [FR res] -> + if res = a1 then emit (i132 a1 a3 a2) (* a1 * a2 + a3 *) + else if res = a2 then emit (i213 a2 a1 a3) (* a1 * a2 + a3 *) + else if res = a3 then emit (i231 a3 a1 a2) (* a1 * a2 + a3 *) + else begin + emit (Pmovsd_ff(res, a3)); + emit (i231 res a1 a2) (* a1 * a2 + res *) + end + | _ -> + invalid_arg ("ill-formed fma builtin") (* Handling of compiler-inlined builtins *) @@ -217,95 +239,71 @@ let expand_builtin_inline name args res = | "__builtin_bswap16", [IR a1], [IR res] -> if a1 <> res then emit (Pmov_rr (res,a1)); - emit (Prolw_8 res) + emit (Pbswap16 res) | "__builtin_clz", [IR a1], [IR res] -> - emit (Pbslr (a1,res)); + emit (Pbsr (res,a1)); emit (Pxor_ri(res,coqint_of_camlint 31l)) | "__builtin_ctz", [IR a1], [IR res] -> - emit (Pbsfl (a1,res)) + emit (Pbsf (res,a1)) (* Float arithmetic *) | "__builtin_fabs", [FR a1], [FR res] -> if a1 <> res then emit (Pmovsd_ff (res,a1)); emit (Pabsd res) (* This ensures that need_masks is set to true *) | "__builtin_fsqrt", [FR a1], [FR res] -> - emit (Psqrtsd (a1,res)) + emit (Psqrtsd (res,a1)) | "__builtin_fmax", [FR a1; FR a2], [FR res] -> if res = a1 then - emit (Pmaxsd (a2,res)) + emit (Pmaxsd (res,a2)) else if res = a2 then - emit (Pmaxsd (a1,res)) + emit (Pmaxsd (res,a1)) else begin emit (Pmovsd_ff (res,a1)); - emit (Pmaxsd (a2,res)) + emit (Pmaxsd (res,a2)) end | "__builtin_fmin", [FR a1; FR a2], [FR res] -> if res = a1 then - emit (Pminsd (a2,res)) + emit (Pminsd (res,a2)) else if res = a2 then - emit (Pminsd (a1,res)) + emit (Pminsd (res,a1)) else begin emit (Pmovsd_ff (res,a1)); - emit (Pminsd (a2,res)) - end - | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] -> - if res = a1 then - emit (Pfmadd132 (a2,a3,res)) - else if res = a2 then - emit (Pfmadd213 (a2,a3,res)) - else if res = a3 then - emit (Pfmadd231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (res,a2)); - emit (Pfmadd231 (a1,a2,res)) - end - |"__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] -> - if res = a1 then - emit (Pfmsub132 (a2,a3,res)) - else if res = a2 then - emit (Pfmsub213 (a2,a3,res)) - else if res = a3 then - emit (Pfmsub231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (res,a2)); - emit (Pfmsub231 (a1,a2,res)) - end - | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] -> - if res = a1 then - emit (Pfnmadd132 (a2,a3,res)) - else if res = a2 then - emit (Pfnmadd213 (a2,a3,res)) - else if res = a3 then - emit (Pfnmadd231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (res,a2)); - emit (Pfnmadd231 (a1,a2,res)) - end - |"__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] -> - if res = a1 then - emit (Pfnmsub132 (a2,a3,res)) - else if res = a2 then - emit (Pfnmsub213 (a2,a3,res)) - else if res = a3 then - emit (Pfnmsub231 (a2,a3,res)) - else begin - emit (Pmovsd_ff (res,a2)); - emit (Pfnmsub231 (a1,a2,res)) + emit (Pminsd (res,a2)) end + | "__builtin_fmadd", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfmadd132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmadd213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmadd231(r1, r2, r3)) + | "__builtin_fmsub", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfmsub132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmsub213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfmsub231(r1, r2, r3)) + | "__builtin_fnmadd", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfnmadd132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmadd213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmadd231(r1, r2, r3)) + | "__builtin_fnmsub", _, _ -> + expand_fma args res + (fun r1 r2 r3 -> Pfnmsub132(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmsub213(r1, r2, r3)) + (fun r1 r2 r3 -> Pfnmsub231(r1, r2, r3)) (* 64-bit integer arithmetic *) | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> assert (ah = EDX && al = EAX && rh = EDX && rl = EAX); emit (Pneg EAX); - emit (Padcl_ir (_0,EDX)); + emit (Padc_ri (EDX,_0)); emit (Pneg EDX) | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); - emit (Paddl (EBX,EAX)); - emit (Padcl_rr (ECX,EDX)) + emit (Padd_rr (EAX,EBX)); + emit (Padc_rr (EDX,ECX)) | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); emit (Psub_rr (EAX,EBX)); - emit (Psbbl (ECX,EDX)) + emit (Psbb_rr (EDX,ECX)) | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> assert (a = EAX && b = EDX && rh = EDX && rl = EAX); emit (Pmul_r EDX) @@ -313,18 +311,17 @@ let expand_builtin_inline name args res = | "__builtin_read16_reversed", [IR a1], [IR res] -> let addr = Addrmode(Some a1,None,Coq_inl _0) in emit (Pmovzw_rm (res,addr)); - emit (Prolw_8 res) + emit (Pbswap16 res) | "__builtin_read32_reversed", [IR a1], [IR res] -> let addr = Addrmode(Some a1,None,Coq_inl _0) in emit (Pmov_rm (res,addr)); emit (Pbswap res) | "__builtin_write16_reversed", [IR a1; IR a2], _ -> let tmp = if a1 = ECX then EDX else ECX in - let addr = Addrmode(Some a1,None,Coq_inl _0) in if a2 <> tmp then emit (Pmov_rr (tmp,a2)); - emit (Pxchg (tmp,tmp)); - emit (Pmovw_mr (addr,tmp)) + emit (Pbswap16 tmp); + emit (Pmovw_mr (linear_addr a1 _0,tmp)) | "__builtin_write32_reversed", [IR a1; IR a2], _ -> let tmp = if a1 = ECX then EDX else ECX in let addr = Addrmode(Some a1,None,Coq_inl _0) in @@ -348,17 +345,17 @@ let expand_instruction instr = match instr with | Pallocframe (sz, ofs_ra, ofs_link) -> let sz = sp_adjustment sz in - let addr = Addrmode(Some ESP,None,Coq_inl (coqint_of_camlint (Int32.add sz 4l))) in - let addr' = Addrmode (Some ESP, None, Coq_inl ofs_link) in + let addr = linear_addr ESP (coqint_of_camlint (Int32.add sz 4l)) in + let addr' = linear_addr ESP ofs_link in let sz' = coqint_of_camlint sz in - emit (Psubl_ri (ESP,sz')); + emit (Psub_ri (ESP,sz')); emit (Pcfi_adjust sz'); emit (Plea (EDX,addr)); emit (Pmov_mr (addr',EDX)); PrintAsmaux.current_function_stacksize := sz | Pfreeframe(sz, ofs_ra, ofs_link) -> let sz = sp_adjustment sz in - emit (Paddl_ri (ESP,coqint_of_camlint sz)) + emit (Padd_ri (ESP,coqint_of_camlint sz)) | Pbuiltin (ef,args, res) -> begin match ef with @@ -373,8 +370,10 @@ let expand_instruction instr = | EF_vstore_global(chunk, id, ofs) -> expand_builtin_vstore_global chunk id ofs args | EF_memcpy(sz, al) -> - expand_builtin_memcpy (Int32.to_int (camlint_of_coqint sz)) - (Int32.to_int (camlint_of_coqint al)) args + expand_builtin_memcpy + (Int32.to_int (camlint_of_coqint sz)) + (Int32.to_int (camlint_of_coqint al)) + args | EF_annot_val(txt, targ) -> expand_annot_val txt targ args res | EF_inline_asm(txt, sg, clob) -> |