From d03d47c6e4ce9324d6d59ae36cb8db78b013be54 Mon Sep 17 00:00:00 2001 From: Bernhard Schommer Date: Fri, 26 Jun 2015 21:53:38 +0200 Subject: Merge branch 'asmexpand' of github.com:AbsInt/CompCert --- ia32/Asmexpand.ml | 373 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 373 insertions(+) (limited to 'ia32/Asmexpand.ml') diff --git a/ia32/Asmexpand.ml b/ia32/Asmexpand.ml index 9a458c37..e07672a6 100644 --- a/ia32/Asmexpand.ml +++ b/ia32/Asmexpand.ml @@ -14,5 +14,378 @@ of the IA32 assembly code. Currently not done, this expansion is performed on the fly in PrintAsm. *) +open Asm +open Asmexpandaux +open Asmgen +open AST +open Camlcoq +open Datatypes +open Integers + +(* Useful constants and helper functions *) + +let _0 = Int.zero +let _1 = Int.one +let _2 = coqint_of_camlint 2l +let _4 = coqint_of_camlint 4l +let _8 = coqint_of_camlint 8l + +let stack_alignment () = + if Configuration.system = "macoxs" then 16 + else 8 + +(* SP adjustment to allocate or free a stack frame *) + +let int32_align n a = + if n >= 0l + then Int32.logand (Int32.add n (Int32.of_int (a-1))) (Int32.of_int (-a)) + else Int32.logand n (Int32.of_int (-a)) + +let sp_adjustment sz = + let sz = camlint_of_coqint sz in + (* Preserve proper alignment of the stack *) + let sz = int32_align sz (stack_alignment ()) in + (* The top 4 bytes have already been allocated by the "call" instruction. *) + let sz = Int32.sub sz 4l in + sz + + +(* Built-ins. They come in two flavors: + - annotation statements: take their arguments in registers or stack + locations; generate no code; + - inlined by the compiler: take their arguments in arbitrary + registers; preserve all registers except ECX, EDX, XMM6 and XMM7. *) + +(* Handling of annotations *) + +let expand_annot_val txt targ args res = + emit (Pannot (EF_annot(txt,[targ]), List.map (fun r -> AA_base r) args)); + match args, res with + | [IR src], [IR dst] -> + if dst <> src then emit (Pmov_rr (dst,src)) + | [FR src], [FR dst] -> + if dst <> src then emit (Pmovsd_ff (dst,src)) + | _, _ -> assert false + + +(* Handling of memcpy *) + +(* Unaligned memory accesses are quite fast on IA32, so use large + memory accesses regardless of alignment. *) + +let expand_builtin_memcpy_small sz al src dst = + assert (src = EDX && dst = EAX); + let rec copy ofs sz = + if sz >= 8 && !Clflags.option_ffpu then begin + emit (Pmovq_rm (XMM7,Addrmode (Some src, None, Coq_inl ofs))); + emit (Pmovq_mr (Addrmode (Some src, None, Coq_inl ofs),XMM7)); + copy (Int.add ofs _8) (sz - 8) + end else if sz >= 4 then begin + emit (Pmov_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); + emit (Pmov_mr (Addrmode (Some src, None, Coq_inl ofs),ECX)); + copy (Int.add ofs _4) (sz - 4) + end else if sz >= 2 then begin + emit (Pmovw_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); + emit (Pmovw_mr (Addrmode (Some src, None, Coq_inl ofs),ECX)); + copy (Int.add ofs _2) (sz - 2) + end else if sz >= 1 then begin + emit (Pmovb_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); + emit (Pmovb_mr (Addrmode (Some src, None, Coq_inl ofs),ECX)); + copy (Int.add ofs _1) (sz - 1) + end in + copy _0 sz + +let expand_builtin_memcpy_big sz al src dst = + assert (src = ESI && dst = EDI); + emit (Pmov_ri (ECX,coqint_of_camlint (Int32.of_int (sz / 4)))); + emit Prep_movsl; + if sz mod 4 >= 2 then emit Pmovsw; + if sz mod 2 >= 1 then emit Pmovsb + +let expand_builtin_memcpy sz al args = + let (dst, src) = + match args with [IR d; IR s] -> (d, s) | _ -> assert false in + if sz <= 32 + then expand_builtin_memcpy_small sz al src dst + else expand_builtin_memcpy_big sz al src dst + +(* Handling of volatile reads and writes *) + +let offset_addressing (Addrmode(base, ofs, cst)) delta = + Addrmode(base, ofs, + match cst with + | Coq_inl n -> Coq_inl(Integers.Int.add n delta) + | Coq_inr(id, n) -> Coq_inr(id, Integers.Int.add n delta)) + +let expand_builtin_vload_common chunk addr res = + match chunk, res with + | Mint8unsigned, [IR res] -> + emit (Pmovzb_rm (res,addr)) + | Mint8signed, [IR res] -> + emit (Pmovsb_rm (res,addr)) + | Mint16unsigned, [IR res] -> + emit (Pmovzw_rm (res,addr)) + | Mint16signed, [IR res] -> + emit (Pmovsw_rm (res,addr)) + | Mint32, [IR res] -> + emit (Pmov_rm (res,addr)) + | Mint64, [IR res1; IR res2] -> + let addr' = offset_addressing addr (coqint_of_camlint 4l) in + if not (Asmgen.addressing_mentions addr res2) then begin + emit (Pmov_rm (res2,addr)); + emit (Pmov_rm (res1,addr')) + end else begin + emit (Pmov_rm (res1,addr')); + emit (Pmov_rm (res2,addr)) + end + | Mfloat32, [FR res] -> + emit (Pmovss_fm (res,addr)) + | Mfloat64, [FR res] -> + emit (Pmovsd_fm (res,addr)) + | _ -> + assert false + +let expand_builtin_vload chunk args res = + match args with + | [IR addr] -> + expand_builtin_vload_common chunk (Addrmode (Some addr,None, Coq_inl _0)) res + | _ -> + assert false + +let expand_builtin_vload_global chunk id ofs args res = + expand_builtin_vload_common chunk (Addrmode(None, None, Coq_inr(id,ofs))) res + +let expand_builtin_vstore_common chunk addr src tmp = + match chunk, src with + | (Mint8signed | Mint8unsigned), [IR src] -> + if Asmgen.low_ireg src then + emit (Pmovb_mr (addr,src)) + else begin + emit (Pmov_rr (tmp,src)); + emit (Pmovb_mr (addr,tmp)) + end + | (Mint16signed | Mint16unsigned), [IR src] -> + emit (Pmovw_mr (addr,src)) + | Mint32, [IR src] -> + emit (Pmov_mr (addr,src)) + | Mint64, [IR src1; IR src2] -> + let addr' = offset_addressing addr (coqint_of_camlint 4l) in + emit (Pmov_mr (addr,src2)); + emit (Pmov_mr (addr',src1)) + | Mfloat32, [FR src] -> + emit (Pmovss_mf (addr,src)) + | Mfloat64, [FR src] -> + emit (Pmovsd_mf (addr,src)) + | _ -> + assert false + +let expand_builtin_vstore chunk args = + match args with + | IR addr :: src -> + expand_builtin_vstore_common chunk + (Addrmode(Some addr, None, Coq_inl Integers.Int.zero)) src + (if addr = EAX then ECX else EAX) + | _ -> assert false + + +let expand_builtin_vstore_global chunk id ofs args = + expand_builtin_vstore_common chunk + (Addrmode(None, None, Coq_inr(id,ofs))) args EAX + + +(* Handling of varargs *) + +let expand_builtin_va_start r = + if not !current_function.fn_sig.sig_cc.cc_vararg then + invalid_arg "Fatal error: va_start used in non-vararg function"; + let ofs = coqint_of_camlint + Int32.(add (add !PrintAsmaux.current_function_stacksize 4l) + (mul 4l (Z.to_int32 (Conventions1.size_arguments + !current_function.fn_sig)))) in + emit (Pmov_mr (Addrmode (Some r, None, Coq_inl _0),ESP)); + emit (Paddl_mi (Addrmode (Some r,None,Coq_inl _0),ofs)) + +(* Handling of compiler-inlined builtins *) + +let expand_builtin_inline name args res = + match name, args, res with + (* Integer arithmetic *) + | ("__builtin_bswap"| "__builtin_bswap32"), [IR a1], [IR res] -> + if a1 <> res then + emit (Pmov_rr (res,a1)); + emit (Pbswap res) + | "__builtin_bswap16", [IR a1], [IR res] -> + if a1 <> res then + emit (Pmov_rr (res,a1)); + emit (Prolw_8 res) + | "__builtin_clz", [IR a1], [IR res] -> + emit (Pbslr (a1,res)); + emit (Pxor_ri(res,coqint_of_camlint 31l)) + | "__builtin_ctz", [IR a1], [IR res] -> + emit (Pbsfl (a1,res)) + (* Float arithmetic *) + | "__builtin_fabs", [FR a1], [FR res] -> + if a1 <> res then + emit (Pmovsd_ff (a1,res)); + emit (Pabsd res) (* This ensures that need_masks is set to true *) + | "__builtin_fsqrt", [FR a1], [FR res] -> + emit (Psqrtsd (a1,res)) + | "__builtin_fmax", [FR a1; FR a2], [FR res] -> + if res = a1 then + emit (Pmaxsd (a2,res)) + else if res = a2 then + emit (Pmaxsd (a1,res)) + else begin + emit (Pmovsd_ff (a1,res)); + emit (Pmaxsd (a2,res)) + end + | "__builtin_fmin", [FR a1; FR a2], [FR res] -> + if res = a1 then + emit (Pminsd (a2,res)) + else if res = a2 then + emit (Pminsd (a1,res)) + else begin + emit (Pmovsd_ff (a1,res)); + emit (Pminsd (a2,res)) + end + | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] -> + if res = a1 then + emit (Pfmadd132 (a2,a3,res)) + else if res = a2 then + emit (Pfmadd213 (a2,a3,res)) + else if res = a3 then + emit (Pfmadd231 (a2,a3,res)) + else begin + emit (Pmovsd_ff (a2,res)); + emit (Pfmadd231 (a1,a2,res)) + end + |"__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] -> + if res = a1 then + emit (Pfmsub132 (a2,a3,res)) + else if res = a2 then + emit (Pfmsub213 (a2,a3,res)) + else if res = a3 then + emit (Pfmsub231 (a2,a3,res)) + else begin + emit (Pmovsd_ff (a2,res)); + emit (Pfmsub231 (a1,a2,res)) + end + | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] -> + if res = a1 then + emit (Pfnmadd132 (a2,a3,res)) + else if res = a2 then + emit (Pfnmadd213 (a2,a3,res)) + else if res = a3 then + emit (Pfnmadd231 (a2,a3,res)) + else begin + emit (Pmovsd_ff (a2,res)); + emit (Pfnmadd231 (a1,a2,res)) + end + |"__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] -> + if res = a1 then + emit (Pfnmsub132 (a2,a3,res)) + else if res = a2 then + emit (Pfnmsub213 (a2,a3,res)) + else if res = a3 then + emit (Pfnmsub231 (a2,a3,res)) + else begin + emit (Pmovsd_ff (a2,res)); + emit (Pfnmsub231 (a1,a2,res)) + end + (* 64-bit integer arithmetic *) + | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> + assert (ah = EDX && al = EAX && rh = EDX && rl = EAX); + emit (Pneg EAX); + emit (Padcl_ir (_0,EDX)); + emit (Pneg EDX) + | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); + emit (Paddl (EBX,EAX)); + emit (Padcl_rr (ECX,EDX)) + | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX); + emit (Psub_rr (EBX,EAX)); + emit (Psbbl (ECX,EDX)) + | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> + assert (a = EAX && b = EDX && rh = EDX && rl = EAX); + emit (Pmul_r EDX) + (* Memory accesses *) + | "__builtin_read16_reversed", [IR a1], [IR res] -> + let addr = Addrmode(Some a1,None,Coq_inl _0) in + emit (Pmovzw_rm (res,addr)); + emit (Prolw_8 res) + | "__builtin_read32_reversed", [IR a1], [IR res] -> + let addr = Addrmode(Some a1,None,Coq_inl _0) in + emit (Pmov_rm (res,addr)); + emit (Pbswap res) + | "__builtin_write16_reversed", [IR a1; IR a2], _ -> + let tmp = if a1 = ECX then EDX else ECX in + let addr = Addrmode(Some a1,None,Coq_inl _0) in + if a2 <> tmp then + emit (Pmov_rr (a2,tmp)); + emit (Pxchg (tmp,tmp)); + emit (Pmovw_mr (addr,tmp)) + (* Vararg stuff *) + | "__builtin_va_start", [IR a], _ -> + expand_builtin_va_start a + (* Synchronization *) + | "__builtin_membar", [], _ -> + () + (* Catch-all *) + | _ -> + invalid_arg ("unrecognized builtin " ^ name) + + + +let expand_instruction instr = + match instr with + | Pallocframe (sz, ofs_ra, ofs_link) -> + let sz = sp_adjustment sz in + let addr = Addrmode(Some ESP,None,Coq_inl (coqint_of_camlint (Int32.add sz 4l))) in + let addr' = Addrmode (Some ESP, None, Coq_inl ofs_link) in + let sz' = coqint_of_camlint sz in + emit (Psubl_ri (ESP,sz')); + emit (Pcfi_adjust sz'); + emit (Plea (EDX,addr)); + emit (Pmov_mr (addr',EDX)); + PrintAsmaux.current_function_stacksize := sz + | Pfreeframe(sz, ofs_ra, ofs_link) -> + let sz = sp_adjustment sz in + emit (Paddl_ri (ESP,coqint_of_camlint sz)) + | Pbuiltin (ef,args, res) -> + begin + match ef with + | EF_builtin(name, sg) -> + expand_builtin_inline (extern_atom name) args res + | EF_vload chunk -> + expand_builtin_vload chunk args res + | EF_vstore chunk -> + expand_builtin_vstore chunk args + | EF_vload_global(chunk, id, ofs) -> + expand_builtin_vload_global chunk id ofs args res + | EF_vstore_global(chunk, id, ofs) -> + expand_builtin_vstore_global chunk id ofs args + | EF_memcpy(sz, al) -> + expand_builtin_memcpy (Int32.to_int (camlint_of_coqint sz)) + (Int32.to_int (camlint_of_coqint al)) args + | EF_annot_val(txt, targ) -> + expand_annot_val txt targ args res + | EF_inline_asm(txt, sg, clob) -> + emit instr + | _ -> assert false + end + | _ -> emit instr + let expand_program p = p +let expand_function fn = + set_current_function fn; + List.iter expand_instruction fn.fn_code; + get_current_function () + +let expand_fundef = function + | Internal f -> Internal (expand_function f) + | External ef -> External ef + +let expand_program (p: Asm.program) : Asm.program = + AST.transform_program expand_fundef p -- cgit