aboutsummaryrefslogtreecommitdiffstats
path: root/ia32/Asmexpand.ml
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@inria.fr>2016-10-01 17:38:24 +0200
committerXavier Leroy <xavier.leroy@inria.fr>2016-10-01 17:38:24 +0200
commita14b9578ee5297d954103e05d7b2d322816ddd8f (patch)
tree93b7c2b6bd7de8a4dedaf399088257e0660959b8 /ia32/Asmexpand.ml
parent3bef0962079cf971673b4267b0142bd5fe092509 (diff)
downloadcompcert-a14b9578ee5297d954103e05d7b2d322816ddd8f.tar.gz
compcert-a14b9578ee5297d954103e05d7b2d322816ddd8f.zip
Support for 64-bit architectures: x86 in 64-bit mode
This commit enriches the IA32 port so that it supports x86 processors in 64-bit mode as well as in 32-bit mode, depending on the value of Archi.ptr64, which itself is set from the configuration model. To activate x86-64 bit support, configure with "x86_64-linux". Main steps: - Enrich Op.v and Asm.v with 64-bit operations - SelectLong: in 64-bit mode, use 64-bit operations directly; in 32-bit mode, fall back on the old implementation based on pairs of 32-bit integers - Conventions1: support x86-64 ABI in addition to the 32-bit ABI. - Add support for the new 64-bit operations everywhere. - runtime/x86_64: implementation of the supporting library appropriate for x86 in 64-bit mode To do: - More optimizations are possible on 64-bit integer arithmetic operations. - Could add new chunks to load, say, an unsigned byte into a 64-bit long (currently we load as a 32-bit int then zero-extend). - Implements the wrong ABI for struct passing.
Diffstat (limited to 'ia32/Asmexpand.ml')
-rw-r--r--ia32/Asmexpand.ml367
1 files changed, 248 insertions, 119 deletions
diff --git a/ia32/Asmexpand.ml b/ia32/Asmexpand.ml
index 6a64221e..5c2a4bc9 100644
--- a/ia32/Asmexpand.ml
+++ b/ia32/Asmexpand.ml
@@ -19,38 +19,60 @@ open Asmexpandaux
open AST
open Camlcoq
open Datatypes
-open Integers
exception Error of string
(* Useful constants and helper functions *)
-let _0 = Int.zero
-let _1 = Int.one
+let _0 = Integers.Int.zero
+let _1 = Integers.Int.one
let _2 = coqint_of_camlint 2l
let _4 = coqint_of_camlint 4l
let _8 = coqint_of_camlint 8l
+
+let _0z = Z.zero
+let _1z = Z.one
+let _2z = Z.of_sint 2
+let _4z = Z.of_sint 4
+let _8z = Z.of_sint 8
+let _16z = Z.of_sint 16
-let stack_alignment () =
- if Configuration.system = "macosx" then 16
- else 8
+let stack_alignment () = 16
+
+(* Pseudo instructions for 32/64 bit compatibility *)
+
+let _Plea (r, addr) =
+ if Archi.ptr64 then Pleaq (r, addr) else Pleal (r, addr)
(* SP adjustment to allocate or free a stack frame *)
-let int32_align n a =
- if n >= 0l
- then Int32.logand (Int32.add n (Int32.of_int (a-1))) (Int32.of_int (-a))
- else Int32.logand n (Int32.of_int (-a))
+let align n a =
+ if n >= 0 then (n + a - 1) land (-a) else n land (-a)
-let sp_adjustment sz =
- let sz = camlint_of_coqint sz in
+let sp_adjustment_32 sz =
+ let sz = Z.to_int sz in
(* Preserve proper alignment of the stack *)
- let sz = int32_align sz (stack_alignment ()) in
+ let sz = align sz (stack_alignment ()) in
(* The top 4 bytes have already been allocated by the "call" instruction. *)
- let sz = Int32.sub sz 4l in
- sz
-
-
+ sz - 4
+
+let sp_adjustment_64 sz =
+ let sz = Z.to_int sz in
+ if is_current_function_variadic() then begin
+ (* If variadic, add room for register save area, which must be 16-aligned *)
+ let ofs = align (sz - 8) 16 in
+ let sz = ofs + 176 (* save area *) + 8 (* return address *) in
+ (* Preserve proper alignment of the stack *)
+ let sz = align sz 16 in
+ (* The top 8 bytes have already been allocated by the "call" instruction. *)
+ (sz - 8, ofs)
+ end else begin
+ (* Preserve proper alignment of the stack *)
+ let sz = align sz 16 in
+ (* The top 8 bytes have already been allocated by the "call" instruction. *)
+ (sz - 8, -1)
+ end
+
(* Built-ins. They come in two flavors:
- annotation statements: take their arguments in registers or stack
locations; generate no code;
@@ -69,25 +91,25 @@ let expand_annot_val txt targ args res =
| _, _ ->
raise (Error "ill-formed __builtin_annot_intval")
-(* Translate a builtin argument into an addressing mode *)
-
-let addressing_of_builtin_arg = function
- | BA (IR r) -> Addrmode(Some r, None, Coq_inl Integers.Int.zero)
- | BA_addrstack ofs -> Addrmode(Some ESP, None, Coq_inl ofs)
- | BA_addrglobal(id, ofs) -> Addrmode(None, None, Coq_inr(id, ofs))
- | _ -> assert false
-
(* Operations on addressing modes *)
let offset_addressing (Addrmode(base, ofs, cst)) delta =
Addrmode(base, ofs,
match cst with
- | Coq_inl n -> Coq_inl(Int.add n delta)
- | Coq_inr(id, n) -> Coq_inr(id, Int.add n delta))
+ | Coq_inl n -> Coq_inl(Z.add n delta)
+ | Coq_inr(id, n) -> Coq_inr(id, Integers.Ptrofs.add n delta))
let linear_addr reg ofs = Addrmode(Some reg, None, Coq_inl ofs)
let global_addr id ofs = Addrmode(None, None, Coq_inr(id, ofs))
+(* Translate a builtin argument into an addressing mode *)
+
+let addressing_of_builtin_arg = function
+ | BA (IR r) -> linear_addr r Z.zero
+ | BA_addrstack ofs -> linear_addr RSP (Integers.Ptrofs.unsigned ofs)
+ | BA_addrglobal(id, ofs) -> global_addr id ofs
+ | _ -> assert false
+
(* Handling of memcpy *)
(* Unaligned memory accesses are quite fast on IA32, so use large
@@ -95,29 +117,34 @@ let global_addr id ofs = Addrmode(None, None, Coq_inr(id, ofs))
let expand_builtin_memcpy_small sz al src dst =
let rec copy src dst sz =
- if sz >= 8 && !Clflags.option_ffpu then begin
- emit (Pmovq_rm (XMM7, src));
- emit (Pmovq_mr (dst, XMM7));
- copy (offset_addressing src _8) (offset_addressing dst _8) (sz - 8)
+ if sz >= 8 && Archi.ptr64 then begin
+ emit (Pmovq_rm (RCX, src));
+ emit (Pmovq_mr (dst, RCX));
+ copy (offset_addressing src _8z) (offset_addressing dst _8z) (sz - 8)
+ end else if sz >= 8 && !Clflags.option_ffpu then begin
+ emit (Pmovsq_rm (XMM7, src));
+ emit (Pmovsq_mr (dst, XMM7));
+ copy (offset_addressing src _8z) (offset_addressing dst _8z) (sz - 8)
end else if sz >= 4 then begin
- emit (Pmov_rm (ECX, src));
- emit (Pmov_mr (dst, ECX));
- copy (offset_addressing src _4) (offset_addressing dst _4) (sz - 4)
+ emit (Pmovl_rm (RCX, src));
+ emit (Pmovl_mr (dst, RCX));
+ copy (offset_addressing src _4z) (offset_addressing dst _4z) (sz - 4)
end else if sz >= 2 then begin
- emit (Pmovw_rm (ECX, src));
- emit (Pmovw_mr (dst, ECX));
- copy (offset_addressing src _2) (offset_addressing dst _2) (sz - 2)
+ emit (Pmovw_rm (RCX, src));
+ emit (Pmovw_mr (dst, RCX));
+ copy (offset_addressing src _2z) (offset_addressing dst _2z) (sz - 2)
end else if sz >= 1 then begin
- emit (Pmovb_rm (ECX, src));
- emit (Pmovb_mr (dst, ECX));
- copy (offset_addressing src _1) (offset_addressing dst _1) (sz - 1)
+ emit (Pmovb_rm (RCX, src));
+ emit (Pmovb_mr (dst, RCX));
+ copy (offset_addressing src _1z) (offset_addressing dst _1z) (sz - 1)
end in
copy (addressing_of_builtin_arg src) (addressing_of_builtin_arg dst) sz
let expand_builtin_memcpy_big sz al src dst =
- if src <> BA (IR ESI) then emit (Plea (ESI, addressing_of_builtin_arg src));
- if dst <> BA (IR EDI) then emit (Plea (EDI, addressing_of_builtin_arg dst));
- emit (Pmov_ri (ECX,coqint_of_camlint (Int32.of_int (sz / 4))));
+ if src <> BA (IR RSI) then emit (_Plea (RSI, addressing_of_builtin_arg src));
+ if dst <> BA (IR RDI) then emit (_Plea (RDI, addressing_of_builtin_arg dst));
+ (* TODO: movsq? *)
+ emit (Pmovl_ri (RCX,coqint_of_camlint (Int32.of_int (sz / 4))));
emit Prep_movsl;
if sz mod 4 >= 2 then emit Pmovsw;
if sz mod 2 >= 1 then emit Pmovsb
@@ -141,15 +168,17 @@ let expand_builtin_vload_common chunk addr res =
| Mint16signed, BR(IR res) ->
emit (Pmovsw_rm (res,addr))
| Mint32, BR(IR res) ->
- emit (Pmov_rm (res,addr))
+ emit (Pmovl_rm (res,addr))
+ | Mint64, BR(IR res) ->
+ emit (Pmovq_rm (res,addr))
| Mint64, BR_splitlong(BR(IR res1), BR(IR res2)) ->
- let addr' = offset_addressing addr _4 in
+ let addr' = offset_addressing addr _4z in
if not (Asmgen.addressing_mentions addr res2) then begin
- emit (Pmov_rm (res2,addr));
- emit (Pmov_rm (res1,addr'))
+ emit (Pmovl_rm (res2,addr));
+ emit (Pmovl_rm (res1,addr'))
end else begin
- emit (Pmov_rm (res1,addr'));
- emit (Pmov_rm (res2,addr))
+ emit (Pmovl_rm (res1,addr'));
+ emit (Pmovl_rm (res2,addr))
end
| Mfloat32, BR(FR res) ->
emit (Pmovss_fm (res,addr))
@@ -168,20 +197,22 @@ let expand_builtin_vload chunk args res =
let expand_builtin_vstore_common chunk addr src tmp =
match chunk, src with
| (Mint8signed | Mint8unsigned), BA(IR src) ->
- if Asmgen.low_ireg src then
+ if Archi.ptr64 || Asmgen.low_ireg src then
emit (Pmovb_mr (addr,src))
else begin
- emit (Pmov_rr (tmp,src));
- emit (Pmovb_mr (addr,tmp))
- end
+ emit (Pmov_rr (tmp,src));
+ emit (Pmovb_mr (addr,tmp))
+ end
| (Mint16signed | Mint16unsigned), BA(IR src) ->
emit (Pmovw_mr (addr,src))
| Mint32, BA(IR src) ->
- emit (Pmov_mr (addr,src))
+ emit (Pmovl_mr (addr,src))
+ | Mint64, BA(IR src) ->
+ emit (Pmovq_mr (addr,src))
| Mint64, BA_splitlong(BA(IR src1), BA(IR src2)) ->
- let addr' = offset_addressing addr _4 in
- emit (Pmov_mr (addr,src2));
- emit (Pmov_mr (addr',src1))
+ let addr' = offset_addressing addr _4z in
+ emit (Pmovl_mr (addr,src2));
+ emit (Pmovl_mr (addr',src1))
| Mfloat32, BA(FR src) ->
emit (Pmovss_mf (addr,src))
| Mfloat64, BA(FR src) ->
@@ -194,20 +225,65 @@ let expand_builtin_vstore chunk args =
| [addr; src] ->
let addr = addressing_of_builtin_arg addr in
expand_builtin_vstore_common chunk addr src
- (if Asmgen.addressing_mentions addr EAX then ECX else EAX)
+ (if Asmgen.addressing_mentions addr RAX then RCX else RAX)
| _ -> assert false
(* Handling of varargs *)
-let expand_builtin_va_start r =
+let rec next_arg_locations ir fr ofs = function
+ | [] ->
+ (ir, fr, ofs)
+ | (Tint | Tlong | Tany32 | Tany64) :: l ->
+ if ir < 6
+ then next_arg_locations (ir + 1) fr ofs l
+ else next_arg_locations ir fr (ofs + 8) l
+ | (Tfloat | Tsingle) :: l ->
+ if fr < 8
+ then next_arg_locations ir (fr + 1) ofs l
+ else next_arg_locations ir fr (ofs + 8) l
+
+let current_function_stacksize = ref 0L
+
+let expand_builtin_va_start_32 r =
if not (is_current_function_variadic ()) then
invalid_arg "Fatal error: va_start used in non-vararg function";
- let ofs = coqint_of_camlint
+ let ofs =
Int32.(add (add !PrintAsmaux.current_function_stacksize 4l)
(mul 4l (Z.to_int32 (Conventions1.size_arguments
(get_current_function_sig ()))))) in
- emit (Pmov_mr (linear_addr r _0, ESP));
- emit (Padd_mi (linear_addr r _0, ofs))
+ emit (Pleal (RAX, linear_addr RSP (Z.of_uint32 ofs)));
+ emit (Pmovl_mr (linear_addr r _0z, RAX))
+
+let expand_builtin_va_start_64 r =
+ if not (is_current_function_variadic ()) then
+ invalid_arg "Fatal error: va_start used in non-vararg function";
+ let (ir, fr, ofs) =
+ next_arg_locations 0 0 0 (get_current_function_args ()) in
+ (* [r] points to the following struct:
+ struct {
+ unsigned int gp_offset;
+ unsigned int fp_offset;
+ void *overflow_arg_area;
+ void *reg_save_area;
+ }
+ gp_offset is initialized to ir * 8
+ fp_offset is initialized to 6 * 8 + fr * 16
+ overflow_arg_area is initialized to sp + current stacksize + ofs
+ reg_save_area is initialized to
+ sp + current stacksize - 16 - save area size (6 * 8 + 8 * 16) *)
+ let gp_offset = Int32.of_int (ir * 8)
+ and fp_offset = Int32.of_int (6 * 8 + fr * 16)
+ and overflow_arg_area = Int64.(add !current_function_stacksize (of_int ofs))
+ and reg_save_area = Int64.(sub !current_function_stacksize 192L) in
+ assert (r <> RAX);
+ emit (Pmovl_ri (RAX, coqint_of_camlint gp_offset));
+ emit (Pmovl_mr (linear_addr r _0z, RAX));
+ emit (Pmovl_ri (RAX, coqint_of_camlint fp_offset));
+ emit (Pmovl_mr (linear_addr r _4z, RAX));
+ emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 overflow_arg_area)));
+ emit (Pmovq_mr (linear_addr r _8z, RAX));
+ emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 reg_save_area)));
+ emit (Pmovq_mr (linear_addr r _16z, RAX))
(* FMA operations *)
@@ -239,38 +315,47 @@ let expand_builtin_inline name args res =
| ("__builtin_bswap"| "__builtin_bswap32"), [BA(IR a1)], BR(IR res) ->
if a1 <> res then
emit (Pmov_rr (res,a1));
- emit (Pbswap res)
+ emit (Pbswap32 res)
+ | "__builtin_bswap64", [BA(IR a1)], BR(IR res) ->
+ if a1 <> res then
+ emit (Pmov_rr (res,a1));
+ emit (Pbswap64 res)
| "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
if a1 <> res then
emit (Pmov_rr (res,a1));
emit (Pbswap16 res)
| ("__builtin_clz"|"__builtin_clzl"), [BA(IR a1)], BR(IR res) ->
- emit (Pbsr (res,a1));
- emit (Pxor_ri(res,coqint_of_camlint 31l))
+ emit (Pbsrl (res,a1));
+ emit (Pxorl_ri(res,coqint_of_camlint 31l))
+ | "__builtin_clzll", [BA(IR a1)], BR(IR res) ->
+ emit (Pbsrq (res,a1));
+ emit (Pxorl_ri(res,coqint_of_camlint 63l))
| "__builtin_clzll", [BA_splitlong(BA (IR ah), BA (IR al))], BR(IR res) ->
let lbl1 = new_label() in
let lbl2 = new_label() in
- emit (Ptest_rr(ah, ah));
+ emit (Ptestl_rr(ah, ah));
emit (Pjcc(Cond_e, lbl1));
- emit (Pbsr(res, ah));
- emit (Pxor_ri(res, coqint_of_camlint 31l));
+ emit (Pbsrl(res, ah));
+ emit (Pxorl_ri(res, coqint_of_camlint 31l));
emit (Pjmp_l lbl2);
emit (Plabel lbl1);
- emit (Pbsr(res, al));
- emit (Pxor_ri(res, coqint_of_camlint 63l));
+ emit (Pbsrl(res, al));
+ emit (Pxorl_ri(res, coqint_of_camlint 63l));
emit (Plabel lbl2)
| ("__builtin_ctz" | "__builtin_ctzl"), [BA(IR a1)], BR(IR res) ->
- emit (Pbsf (res,a1))
+ emit (Pbsfl (res,a1))
+ | "__builtin_ctzll", [BA(IR a1)], BR(IR res) ->
+ emit (Pbsfq (res,a1))
| "__builtin_ctzll", [BA_splitlong(BA (IR ah), BA (IR al))], BR(IR res) ->
let lbl1 = new_label() in
let lbl2 = new_label() in
- emit (Ptest_rr(al, al));
+ emit (Ptestl_rr(al, al));
emit (Pjcc(Cond_e, lbl1));
- emit (Pbsf(res, al));
+ emit (Pbsfl(res, al));
emit (Pjmp_l lbl2);
emit (Plabel lbl1);
- emit (Pbsf(res, ah));
- emit (Padd_ri(res, coqint_of_camlint 32l));
+ emit (Pbsfl(res, ah));
+ emit (Paddl_ri(res, coqint_of_camlint 32l));
emit (Plabel lbl2)
(* Float arithmetic *)
| "__builtin_fabs", [BA(FR a1)], BR(FR res) ->
@@ -320,75 +405,120 @@ let expand_builtin_inline name args res =
(* 64-bit integer arithmetic *)
| "__builtin_negl", [BA_splitlong(BA(IR ah), BA(IR al))],
BR_splitlong(BR(IR rh), BR(IR rl)) ->
- assert (ah = EDX && al = EAX && rh = EDX && rl = EAX);
- emit (Pneg EAX);
- emit (Padc_ri (EDX,_0));
- emit (Pneg EDX)
+ assert (ah = RDX && al = RAX && rh = RDX && rl = RAX);
+ emit (Pnegl RAX);
+ emit (Padcl_ri (RDX,_0));
+ emit (Pnegl RDX)
| "__builtin_addl", [BA_splitlong(BA(IR ah), BA(IR al));
BA_splitlong(BA(IR bh), BA(IR bl))],
BR_splitlong(BR(IR rh), BR(IR rl)) ->
- assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX);
- emit (Padd_rr (EAX,EBX));
- emit (Padc_rr (EDX,ECX))
+ assert (ah = RDX && al = RAX && bh = RCX && bl = RBX && rh = RDX && rl = RAX);
+ emit (Paddl_rr (RAX,RBX));
+ emit (Padcl_rr (RDX,RCX))
| "__builtin_subl", [BA_splitlong(BA(IR ah), BA(IR al));
BA_splitlong(BA(IR bh), BA(IR bl))],
BR_splitlong(BR(IR rh), BR(IR rl)) ->
- assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX);
- emit (Psub_rr (EAX,EBX));
- emit (Psbb_rr (EDX,ECX))
+ assert (ah = RDX && al = RAX && bh = RCX && bl = RBX && rh = RDX && rl = RAX);
+ emit (Psubl_rr (RAX,RBX));
+ emit (Psbbl_rr (RDX,RCX))
| "__builtin_mull", [BA(IR a); BA(IR b)],
BR_splitlong(BR(IR rh), BR(IR rl)) ->
- assert (a = EAX && b = EDX && rh = EDX && rl = EAX);
- emit (Pmul_r EDX)
+ assert (a = RAX && b = RDX && rh = RDX && rl = RAX);
+ emit (Pmull_r RDX)
(* Memory accesses *)
| "__builtin_read16_reversed", [BA(IR a1)], BR(IR res) ->
emit (Pmovzw_rm (res, linear_addr a1 _0));
emit (Pbswap16 res)
| "__builtin_read32_reversed", [BA(IR a1)], BR(IR res) ->
- emit (Pmov_rm (res, linear_addr a1 _0));
- emit (Pbswap res)
+ emit (Pmovl_rm (res, linear_addr a1 _0));
+ emit (Pbswap32 res)
| "__builtin_write16_reversed", [BA(IR a1); BA(IR a2)], _ ->
- let tmp = if a1 = ECX then EDX else ECX in
+ let tmp = if a1 = RCX then RDX else RCX in
if a2 <> tmp then
emit (Pmov_rr (tmp,a2));
emit (Pbswap16 tmp);
- emit (Pmovw_mr (linear_addr a1 _0, tmp))
+ emit (Pmovw_mr (linear_addr a1 _0z, tmp))
| "__builtin_write32_reversed", [BA(IR a1); BA(IR a2)], _ ->
- let tmp = if a1 = ECX then EDX else ECX in
+ let tmp = if a1 = RCX then RDX else RCX in
if a2 <> tmp then
emit (Pmov_rr (tmp,a2));
- emit (Pbswap tmp);
- emit (Pmov_mr (linear_addr a1 _0, tmp))
+ emit (Pbswap32 tmp);
+ emit (Pmovl_mr (linear_addr a1 _0z, tmp))
(* Vararg stuff *)
| "__builtin_va_start", [BA(IR a)], _ ->
- expand_builtin_va_start a
+ assert (a = RDX);
+ if Archi.ptr64
+ then expand_builtin_va_start_64 a
+ else expand_builtin_va_start_32 a
(* Synchronization *)
| "__builtin_membar", [], _ ->
()
(* no operation *)
| "__builtin_nop", [], _ ->
- emit (Pxchg_rr (EAX,EAX))
+ emit (Pmov_rr (RAX,RAX))
(* Catch-all *)
| _ ->
raise (Error ("unrecognized builtin " ^ name))
+(* Calls to variadic functions for x86-64: register AL must contain
+ the number of XMM registers used for parameter passing. To be on
+ the safe side. do the same if the called function is
+ unprototyped. *)
+
+let set_al sg =
+ if Archi.ptr64 && (sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto) then begin
+ let (ir, fr, ofs) = next_arg_locations 0 0 0 sg.sig_args in
+ emit (Pmovl_ri (RAX, coqint_of_camlint (Int32.of_int fr)))
+ end
+
(* Expansion of instructions *)
let expand_instruction instr =
match instr with
| Pallocframe (sz, ofs_ra, ofs_link) ->
- let sz = sp_adjustment sz in
- let addr = linear_addr ESP (coqint_of_camlint (Int32.add sz 4l)) in
- let addr' = linear_addr ESP ofs_link in
- let sz' = coqint_of_camlint sz in
- emit (Psub_ri (ESP,sz'));
- emit (Pcfi_adjust sz');
- emit (Plea (EDX,addr));
- emit (Pmov_mr (addr',EDX));
- PrintAsmaux.current_function_stacksize := sz
+ if Archi.ptr64 then begin
+ let (sz, save_regs) = sp_adjustment_64 sz in
+ (* Allocate frame *)
+ let sz' = Z.of_uint sz in
+ emit (Psubq_ri (RSP, sz'));
+ emit (Pcfi_adjust sz');
+ if save_regs >= 0 then begin
+ (* Save the registers *)
+ emit (Pleaq (R10, linear_addr RSP (Z.of_uint save_regs)));
+ emit (Pcall_s (intern_string "__compcert_va_saveregs",
+ {sig_args = []; sig_res = None; sig_cc = cc_default}))
+ end;
+ (* Stack chaining *)
+ let fullsz = sz + 8 in
+ let addr1 = linear_addr RSP (Z.of_uint fullsz) in
+ let addr2 = linear_addr RSP ofs_link in
+ emit (Pleaq (RAX, addr1));
+ emit (Pmovq_mr (addr2, RAX));
+ current_function_stacksize := Int64.of_int fullsz
+ end else begin
+ let sz = sp_adjustment_32 sz in
+ (* Allocate frame *)
+ let sz' = Z.of_uint sz in
+ emit (Psubl_ri (RSP, sz'));
+ emit (Pcfi_adjust sz');
+ (* Stack chaining *)
+ let addr1 = linear_addr RSP (Z.of_uint (sz + 4)) in
+ let addr2 = linear_addr RSP ofs_link in
+ emit (Pleal (RAX,addr1));
+ emit (Pmovl_mr (addr2,RAX));
+ PrintAsmaux.current_function_stacksize := Int32.of_int sz
+ end
| Pfreeframe(sz, ofs_ra, ofs_link) ->
- let sz = sp_adjustment sz in
- emit (Padd_ri (ESP,coqint_of_camlint sz))
+ if Archi.ptr64 then begin
+ let (sz, _) = sp_adjustment_64 sz in
+ emit (Paddq_ri (RSP, Z.of_uint sz))
+ end else begin
+ let sz = sp_adjustment_32 sz in
+ emit (Paddl_ri (RSP, Z.of_uint sz))
+ end
+ | Pjmp_s(_, sg) | Pjmp_r(_, sg) | Pcall_s(_, sg) | Pcall_r(_, sg) ->
+ set_al sg;
+ emit instr
| Pbuiltin (ef,args, res) ->
begin
match ef with
@@ -399,10 +529,7 @@ let expand_instruction instr =
| EF_vstore chunk ->
expand_builtin_vstore chunk args
| EF_memcpy(sz, al) ->
- expand_builtin_memcpy
- (Int32.to_int (camlint_of_coqint sz))
- (Int32.to_int (camlint_of_coqint al))
- args
+ expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
| EF_annot_val(txt, targ) ->
expand_annot_val txt targ args res
| EF_annot _ | EF_debug _ | EF_inline_asm _ ->
@@ -413,14 +540,15 @@ let expand_instruction instr =
| _ -> emit instr
let int_reg_to_dwarf = function
- | EAX -> 0
- | EBX -> 3
- | ECX -> 1
- | EDX -> 2
- | ESI -> 6
- | EDI -> 7
- | EBP -> 5
- | ESP -> 4
+ | RAX -> 0
+ | RBX -> 3
+ | RCX -> 1
+ | RDX -> 2
+ | RSI -> 6
+ | RDI -> 7
+ | RBP -> 5
+ | RSP -> 4
+ | _ -> assert false (* TODO *)
let float_reg_to_dwarf = function
| XMM0 -> 21
@@ -431,6 +559,7 @@ let float_reg_to_dwarf = function
| XMM5 -> 26
| XMM6 -> 27
| XMM7 -> 28
+ | _ -> assert false (* TODO *)
let preg_to_dwarf = function
| IR r -> int_reg_to_dwarf r