aboutsummaryrefslogtreecommitdiffstats
path: root/x86/Asmexpand.ml
diff options
context:
space:
mode:
Diffstat (limited to 'x86/Asmexpand.ml')
-rw-r--r--x86/Asmexpand.ml45
1 files changed, 29 insertions, 16 deletions
diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml
index 73efc3c5..a1c24f2d 100644
--- a/x86/Asmexpand.ml
+++ b/x86/Asmexpand.ml
@@ -39,11 +39,6 @@ let _16z = Z.of_sint 16
let stack_alignment () = 16
-(* Pseudo instructions for 32/64 bit compatibility *)
-
-let _Plea (r, addr) =
- if Archi.ptr64 then Pleaq (r, addr) else Pleal (r, addr)
-
(* SP adjustment to allocate or free a stack frame. *)
let align n a =
@@ -109,6 +104,21 @@ let offset_addressing (Addrmode(base, ofs, cst)) delta =
let linear_addr reg ofs = Addrmode(Some reg, None, Coq_inl ofs)
let global_addr id ofs = Addrmode(None, None, Coq_inr(id, ofs))
+(* A "leaq" instruction that does not overflow *)
+
+let emit_leaq r addr =
+ match Asmgen.normalize_addrmode_64 addr with
+ | (addr, None) ->
+ emit (Pleaq (r, addr))
+ | (addr, Some delta) ->
+ emit (Pleaq (r, addr));
+ emit (Paddq_ri (r, delta))
+
+(* Pseudo "lea" instruction for 32/64 bit compatibility *)
+
+let emit_lea r addr =
+ if Archi.ptr64 then emit_leaq r addr else emit (Pleal (r, addr))
+
(* Translate a builtin argument into an addressing mode *)
let addressing_of_builtin_arg = function
@@ -150,8 +160,8 @@ let expand_builtin_memcpy_small sz al src dst =
copy (addressing_of_builtin_arg src) (addressing_of_builtin_arg dst) sz
let expand_builtin_memcpy_big sz al src dst =
- if src <> BA (IR RSI) then emit (_Plea (RSI, addressing_of_builtin_arg src));
- if dst <> BA (IR RDI) then emit (_Plea (RDI, addressing_of_builtin_arg dst));
+ if src <> BA (IR RSI) then emit_lea RSI (addressing_of_builtin_arg src);
+ if dst <> BA (IR RDI) then emit_lea RDI (addressing_of_builtin_arg dst);
(* TODO: movsq? *)
emit (Pmovl_ri (RCX,coqint_of_camlint (Int32.of_int (sz / 4))));
emit Prep_movsl;
@@ -289,9 +299,9 @@ let expand_builtin_va_start_elf64 r =
emit (Pmovl_mr (linear_addr r _0z, RAX));
emit (Pmovl_ri (RAX, coqint_of_camlint fp_offset));
emit (Pmovl_mr (linear_addr r _4z, RAX));
- emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 overflow_arg_area)));
+ emit_leaq RAX (linear_addr RSP (Z.of_uint64 overflow_arg_area));
emit (Pmovq_mr (linear_addr r _8z, RAX));
- emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 reg_save_area)));
+ emit_leaq RAX (linear_addr RSP (Z.of_uint64 reg_save_area));
emit (Pmovq_mr (linear_addr r _16z, RAX))
let expand_builtin_va_start_win64 r =
@@ -302,7 +312,7 @@ let expand_builtin_va_start_win64 r =
let ofs =
Int64.(add !current_function_stacksize
(mul 8L (of_int num_args))) in
- emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 ofs)));
+ emit_leaq RAX (linear_addr RSP (Z.of_uint64 ofs));
emit (Pmovq_mr (linear_addr r _0z, RAX))
(* FMA operations *)
@@ -487,9 +497,12 @@ let expand_builtin_inline name args res =
(* Synchronization *)
| "__builtin_membar", [], _ ->
()
- (* no operation *)
+ (* No operation *)
| "__builtin_nop", [], _ ->
emit Pnop
+ (* Optimization hint *)
+ | "__builtin_unreachable", [], _ ->
+ ()
(* Catch-all *)
| _ ->
raise (Error ("unrecognized builtin " ^ name))
@@ -500,7 +513,7 @@ let expand_builtin_inline name args res =
unprototyped. *)
let fixup_funcall_elf64 sg =
- if sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto then begin
+ if sg.sig_cc.cc_vararg <> None || sg.sig_cc.cc_unproto then begin
let (ir, fr, ofs) = next_arg_locations 0 0 0 sg.sig_args in
emit (Pmovl_ri (RAX, coqint_of_camlint (Int32.of_int fr)))
end
@@ -521,7 +534,7 @@ let rec copy_fregs_to_iregs args fr ir =
()
let fixup_funcall_win64 sg =
- if sg.sig_cc.cc_vararg then
+ if sg.sig_cc.cc_vararg <> None then
copy_fregs_to_iregs sg.sig_args [XMM0; XMM1; XMM2; XMM3] [RCX; RDX; R8; R9]
let fixup_funcall sg =
@@ -549,7 +562,7 @@ let expand_instruction instr =
(* Stack chaining *)
let addr1 = linear_addr RSP (Z.of_uint (sz + 8)) in
let addr2 = linear_addr RSP ofs_link in
- emit (Pleaq (RAX,addr1));
+ emit_leaq RAX addr1;
emit (Pmovq_mr (addr2, RAX));
current_function_stacksize := Int64.of_int (sz + 8)
end else if Archi.ptr64 then begin
@@ -560,7 +573,7 @@ let expand_instruction instr =
emit (Pcfi_adjust sz');
if save_regs >= 0 then begin
(* Save the registers *)
- emit (Pleaq (R10, linear_addr RSP (Z.of_uint save_regs)));
+ emit_leaq R10 (linear_addr RSP (Z.of_uint save_regs));
emit (Pcall_s (intern_string "__compcert_va_saveregs",
{sig_args = []; sig_res = Tvoid; sig_cc = cc_default}))
end;
@@ -568,7 +581,7 @@ let expand_instruction instr =
let fullsz = sz + 8 in
let addr1 = linear_addr RSP (Z.of_uint fullsz) in
let addr2 = linear_addr RSP ofs_link in
- emit (Pleaq (RAX, addr1));
+ emit_leaq RAX addr1;
emit (Pmovq_mr (addr2, RAX));
current_function_stacksize := Int64.of_int fullsz
end else begin