diff options
Diffstat (limited to 'ia32/Asmexpand.ml')
-rw-r--r-- | ia32/Asmexpand.ml | 256 |
1 files changed, 236 insertions, 20 deletions
diff --git a/ia32/Asmexpand.ml b/ia32/Asmexpand.ml index e93de125..e07672a6 100644 --- a/ia32/Asmexpand.ml +++ b/ia32/Asmexpand.ml @@ -16,16 +16,198 @@ open Asm open Asmexpandaux +open Asmgen open AST open Camlcoq - +open Datatypes +open Integers + (* Useful constants and helper functions *) -let _0 = Integers.Int.zero +let _0 = Int.zero +let _1 = Int.one +let _2 = coqint_of_camlint 2l +let _4 = coqint_of_camlint 4l +let _8 = coqint_of_camlint 8l + +let stack_alignment () = + if Configuration.system = "macoxs" then 16 + else 8 + +(* SP adjustment to allocate or free a stack frame *) + +let int32_align n a = + if n >= 0l + then Int32.logand (Int32.add n (Int32.of_int (a-1))) (Int32.of_int (-a)) + else Int32.logand n (Int32.of_int (-a)) + +let sp_adjustment sz = + let sz = camlint_of_coqint sz in + (* Preserve proper alignment of the stack *) + let sz = int32_align sz (stack_alignment ()) in + (* The top 4 bytes have already been allocated by the "call" instruction. *) + let sz = Int32.sub sz 4l in + sz + + +(* Built-ins. They come in two flavors: + - annotation statements: take their arguments in registers or stack + locations; generate no code; + - inlined by the compiler: take their arguments in arbitrary + registers; preserve all registers except ECX, EDX, XMM6 and XMM7. *) + +(* Handling of annotations *) + +let expand_annot_val txt targ args res = + emit (Pannot (EF_annot(txt,[targ]), List.map (fun r -> AA_base r) args)); + match args, res with + | [IR src], [IR dst] -> + if dst <> src then emit (Pmov_rr (dst,src)) + | [FR src], [FR dst] -> + if dst <> src then emit (Pmovsd_ff (dst,src)) + | _, _ -> assert false + + +(* Handling of memcpy *) + +(* Unaligned memory accesses are quite fast on IA32, so use large + memory accesses regardless of alignment. *) + +let expand_builtin_memcpy_small sz al src dst = + assert (src = EDX && dst = EAX); + let rec copy ofs sz = + if sz >= 8 && !Clflags.option_ffpu then begin + emit (Pmovq_rm (XMM7,Addrmode (Some src, None, Coq_inl ofs))); + emit (Pmovq_mr (Addrmode (Some src, None, Coq_inl ofs),XMM7)); + copy (Int.add ofs _8) (sz - 8) + end else if sz >= 4 then begin + emit (Pmov_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); + emit (Pmov_mr (Addrmode (Some src, None, Coq_inl ofs),ECX)); + copy (Int.add ofs _4) (sz - 4) + end else if sz >= 2 then begin + emit (Pmovw_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); + emit (Pmovw_mr (Addrmode (Some src, None, Coq_inl ofs),ECX)); + copy (Int.add ofs _2) (sz - 2) + end else if sz >= 1 then begin + emit (Pmovb_rm (ECX,Addrmode (Some src, None, Coq_inl ofs))); + emit (Pmovb_mr (Addrmode (Some src, None, Coq_inl ofs),ECX)); + copy (Int.add ofs _1) (sz - 1) + end in + copy _0 sz + +let expand_builtin_memcpy_big sz al src dst = + assert (src = ESI && dst = EDI); + emit (Pmov_ri (ECX,coqint_of_camlint (Int32.of_int (sz / 4)))); + emit Prep_movsl; + if sz mod 4 >= 2 then emit Pmovsw; + if sz mod 2 >= 1 then emit Pmovsb +let expand_builtin_memcpy sz al args = + let (dst, src) = + match args with [IR d; IR s] -> (d, s) | _ -> assert false in + if sz <= 32 + then expand_builtin_memcpy_small sz al src dst + else expand_builtin_memcpy_big sz al src dst + +(* Handling of volatile reads and writes *) + +let offset_addressing (Addrmode(base, ofs, cst)) delta = + Addrmode(base, ofs, + match cst with + | Coq_inl n -> Coq_inl(Integers.Int.add n delta) + | Coq_inr(id, n) -> Coq_inr(id, Integers.Int.add n delta)) + +let expand_builtin_vload_common chunk addr res = + match chunk, res with + | Mint8unsigned, [IR res] -> + emit (Pmovzb_rm (res,addr)) + | Mint8signed, [IR res] -> + emit (Pmovsb_rm (res,addr)) + | Mint16unsigned, [IR res] -> + emit (Pmovzw_rm (res,addr)) + | Mint16signed, [IR res] -> + emit (Pmovsw_rm (res,addr)) + | Mint32, [IR res] -> + emit (Pmov_rm (res,addr)) + | Mint64, [IR res1; IR res2] -> + let addr' = offset_addressing addr (coqint_of_camlint 4l) in + if not (Asmgen.addressing_mentions addr res2) then begin + emit (Pmov_rm (res2,addr)); + emit (Pmov_rm (res1,addr')) + end else begin + emit (Pmov_rm (res1,addr')); + emit (Pmov_rm (res2,addr)) + end + | Mfloat32, [FR res] -> + emit (Pmovss_fm (res,addr)) + | Mfloat64, [FR res] -> + emit (Pmovsd_fm (res,addr)) + | _ -> + assert false + +let expand_builtin_vload chunk args res = + match args with + | [IR addr] -> + expand_builtin_vload_common chunk (Addrmode (Some addr,None, Coq_inl _0)) res + | _ -> + assert false + +let expand_builtin_vload_global chunk id ofs args res = + expand_builtin_vload_common chunk (Addrmode(None, None, Coq_inr(id,ofs))) res + +let expand_builtin_vstore_common chunk addr src tmp = + match chunk, src with + | (Mint8signed | Mint8unsigned), [IR src] -> + if Asmgen.low_ireg src then + emit (Pmovb_mr (addr,src)) + else begin + emit (Pmov_rr (tmp,src)); + emit (Pmovb_mr (addr,tmp)) + end + | (Mint16signed | Mint16unsigned), [IR src] -> + emit (Pmovw_mr (addr,src)) + | Mint32, [IR src] -> + emit (Pmov_mr (addr,src)) + | Mint64, [IR src1; IR src2] -> + let addr' = offset_addressing addr (coqint_of_camlint 4l) in + emit (Pmov_mr (addr,src2)); + emit (Pmov_mr (addr',src1)) + | Mfloat32, [FR src] -> + emit (Pmovss_mf (addr,src)) + | Mfloat64, [FR src] -> + emit (Pmovsd_mf (addr,src)) + | _ -> + assert false + +let expand_builtin_vstore chunk args = + match args with + | IR addr :: src -> + expand_builtin_vstore_common chunk + (Addrmode(Some addr, None, Coq_inl Integers.Int.zero)) src + (if addr = EAX then ECX else EAX) + | _ -> assert false + + +let expand_builtin_vstore_global chunk id ofs args = + expand_builtin_vstore_common chunk + (Addrmode(None, None, Coq_inr(id,ofs))) args EAX + + +(* Handling of varargs *) + +let expand_builtin_va_start r = + if not !current_function.fn_sig.sig_cc.cc_vararg then + invalid_arg "Fatal error: va_start used in non-vararg function"; + let ofs = coqint_of_camlint + Int32.(add (add !PrintAsmaux.current_function_stacksize 4l) + (mul 4l (Z.to_int32 (Conventions1.size_arguments + !current_function.fn_sig)))) in + emit (Pmov_mr (Addrmode (Some r, None, Coq_inl _0),ESP)); + emit (Paddl_mi (Addrmode (Some r,None,Coq_inl _0),ofs)) + (* Handling of compiler-inlined builtins *) -let emit_builtin_inline oc name args res = +let expand_builtin_inline name args res = match name, args, res with (* Integer arithmetic *) | ("__builtin_bswap"| "__builtin_bswap32"), [IR a1], [IR res] -> @@ -128,36 +310,70 @@ let emit_builtin_inline oc name args res = assert (a = EAX && b = EDX && rh = EDX && rl = EAX); emit (Pmul_r EDX) (* Memory accesses *) - (* | "__builtin_read16_reversed", [IR a1], [IR res] -> - fprintf oc " movzwl 0(%a), %a\n" ireg a1 ireg res; - fprintf oc " rolw $8, %a\n" ireg16 res + | "__builtin_read16_reversed", [IR a1], [IR res] -> + let addr = Addrmode(Some a1,None,Coq_inl _0) in + emit (Pmovzw_rm (res,addr)); + emit (Prolw_8 res) | "__builtin_read32_reversed", [IR a1], [IR res] -> - fprintf oc " movl 0(%a), %a\n" ireg a1 ireg res; - fprintf oc " bswap %a\n" ireg res + let addr = Addrmode(Some a1,None,Coq_inl _0) in + emit (Pmov_rm (res,addr)); + emit (Pbswap res) | "__builtin_write16_reversed", [IR a1; IR a2], _ -> let tmp = if a1 = ECX then EDX else ECX in + let addr = Addrmode(Some a1,None,Coq_inl _0) in if a2 <> tmp then emit (Pmov_rr (a2,tmp)); - fprintf oc " xchg %a, %a\n" ireg8 tmp high_ireg8 tmp; - fprintf oc " movw %a, 0(%a)\n" ireg16 tmp ireg a1 - | "__builtin_write32_reversed", [IR a1; IR a2], _ -> - let tmp = if a1 = ECX then EDX else ECX in - if a2 <> tmp then - emit (Pmov_rr (a2,tmp)); - emit (Pbswap res); - fprintf oc " movl %a, 0(%a)\n" ireg tmp ireg a1 + emit (Pxchg (tmp,tmp)); + emit (Pmovw_mr (addr,tmp)) + (* Vararg stuff *) + | "__builtin_va_start", [IR a], _ -> + expand_builtin_va_start a (* Synchronization *) | "__builtin_membar", [], _ -> () - (* Vararg stuff *) - | "__builtin_va_start", [IR a], _ -> - print_builtin_va_start oc a - (* Catch-all *) *) + (* Catch-all *) | _ -> invalid_arg ("unrecognized builtin " ^ name) + + let expand_instruction instr = match instr with + | Pallocframe (sz, ofs_ra, ofs_link) -> + let sz = sp_adjustment sz in + let addr = Addrmode(Some ESP,None,Coq_inl (coqint_of_camlint (Int32.add sz 4l))) in + let addr' = Addrmode (Some ESP, None, Coq_inl ofs_link) in + let sz' = coqint_of_camlint sz in + emit (Psubl_ri (ESP,sz')); + emit (Pcfi_adjust sz'); + emit (Plea (EDX,addr)); + emit (Pmov_mr (addr',EDX)); + PrintAsmaux.current_function_stacksize := sz + | Pfreeframe(sz, ofs_ra, ofs_link) -> + let sz = sp_adjustment sz in + emit (Paddl_ri (ESP,coqint_of_camlint sz)) + | Pbuiltin (ef,args, res) -> + begin + match ef with + | EF_builtin(name, sg) -> + expand_builtin_inline (extern_atom name) args res + | EF_vload chunk -> + expand_builtin_vload chunk args res + | EF_vstore chunk -> + expand_builtin_vstore chunk args + | EF_vload_global(chunk, id, ofs) -> + expand_builtin_vload_global chunk id ofs args res + | EF_vstore_global(chunk, id, ofs) -> + expand_builtin_vstore_global chunk id ofs args + | EF_memcpy(sz, al) -> + expand_builtin_memcpy (Int32.to_int (camlint_of_coqint sz)) + (Int32.to_int (camlint_of_coqint al)) args + | EF_annot_val(txt, targ) -> + expand_annot_val txt targ args res + | EF_inline_asm(txt, sg, clob) -> + emit instr + | _ -> assert false + end | _ -> emit instr let expand_program p = p |