aboutsummaryrefslogtreecommitdiffstats
path: root/x86
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-12-08 09:32:48 +0100
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-12-08 09:32:48 +0100
commit766968b709e5248a6aac6fdb92f6228be05fc70c (patch)
tree792c7fc4651dd0bf98b6697305e0eb3a006be854 /x86
parenta100edde18de43cf933c0d53467e196541436e13 (diff)
parentcb93a301fd2ddae3071ae0838290b201496d90ef (diff)
downloadcompcert-kvx-766968b709e5248a6aac6fdb92f6228be05fc70c.tar.gz
compcert-kvx-766968b709e5248a6aac6fdb92f6228be05fc70c.zip
Merge remote-tracking branch 'origin/kvx-work' into kvx-better2-cse3
Diffstat (limited to 'x86')
-rw-r--r--x86/Asm.v2
-rw-r--r--x86/Asmexpand.ml97
-rw-r--r--x86/CBuiltins.ml21
-rw-r--r--x86/ConstpropOp.vp5
-rw-r--r--x86/ConstpropOpproof.v2
-rw-r--r--x86/Conventions1.v200
-rw-r--r--x86/Machregsaux.ml18
-rw-r--r--x86/Machregsaux.mli3
-rw-r--r--x86/Stacklayout.v22
-rw-r--r--x86/TargetPrinter.ml60
-rw-r--r--x86/extractionMachdep.v20
11 files changed, 300 insertions, 150 deletions
diff --git a/x86/Asm.v b/x86/Asm.v
index 58e28c40..33f1f2ad 100644
--- a/x86/Asm.v
+++ b/x86/Asm.v
@@ -279,6 +279,7 @@ Inductive instruction: Type :=
| Pmaxsd (rd: freg) (r2: freg)
| Pminsd (rd: freg) (r2: freg)
| Pmovb_rm (rd: ireg) (a: addrmode)
+ | Pmovq_rf (rd: ireg) (r1: freg)
| Pmovsq_mr (a: addrmode) (rs: freg)
| Pmovsq_rm (rd: freg) (a: addrmode)
| Pmovsb
@@ -998,6 +999,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Pmaxsd _ _
| Pminsd _ _
| Pmovb_rm _ _
+ | Pmovq_rf _ _
| Pmovsq_rm _ _
| Pmovsq_mr _ _
| Pmovsb
diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml
index ad667e3d..20f5d170 100644
--- a/x86/Asmexpand.ml
+++ b/x86/Asmexpand.ml
@@ -44,7 +44,7 @@ let stack_alignment () = 16
let _Plea (r, addr) =
if Archi.ptr64 then Pleaq (r, addr) else Pleal (r, addr)
-(* SP adjustment to allocate or free a stack frame *)
+(* SP adjustment to allocate or free a stack frame. *)
let align n a =
if n >= 0 then (n + a - 1) land (-a) else n land (-a)
@@ -56,7 +56,7 @@ let sp_adjustment_32 sz =
(* The top 4 bytes have already been allocated by the "call" instruction. *)
sz - 4
-let sp_adjustment_64 sz =
+let sp_adjustment_elf64 sz =
let sz = Z.to_int sz in
if is_current_function_variadic() then begin
(* If variadic, add room for register save area, which must be 16-aligned *)
@@ -73,6 +73,13 @@ let sp_adjustment_64 sz =
(sz - 8, -1)
end
+let sp_adjustment_win64 sz =
+ let sz = Z.to_int sz in
+ (* Preserve proper alignment of the stack *)
+ let sz = align sz 16 in
+ (* The top 8 bytes have already been allocated by the "call" instruction. *)
+ sz - 8
+
(* Built-ins. They come in two flavors:
- annotation statements: take their arguments in registers or stack
locations; generate no code;
@@ -256,7 +263,7 @@ let expand_builtin_va_start_32 r =
emit (Pleal (RAX, linear_addr RSP (Z.of_uint32 ofs)));
emit (Pmovl_mr (linear_addr r _0z, RAX))
-let expand_builtin_va_start_64 r =
+let expand_builtin_va_start_elf64 r =
if not (is_current_function_variadic ()) then
invalid_arg "Fatal error: va_start used in non-vararg function";
let (ir, fr, ofs) =
@@ -287,6 +294,17 @@ let expand_builtin_va_start_64 r =
emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 reg_save_area)));
emit (Pmovq_mr (linear_addr r _16z, RAX))
+let expand_builtin_va_start_win64 r =
+ if not (is_current_function_variadic ()) then
+ invalid_arg "Fatal error: va_start used in non-vararg function";
+ let num_args =
+ List.length (get_current_function_args()) in
+ let ofs =
+ Int64.(add !current_function_stacksize
+ (mul 8L (of_int num_args))) in
+ emit (Pleaq (RAX, linear_addr RSP (Z.of_uint64 ofs)));
+ emit (Pmovq_mr (linear_addr r _0z, RAX))
+
(* FMA operations *)
(* vfmadd<i><j><k> r1, r2, r3 performs r1 := ri * rj + rk
@@ -378,11 +396,7 @@ let expand_builtin_inline name args res =
emit (Paddl_ri(res, coqint_of_camlint 32l));
emit (Plabel lbl2)
(* Float arithmetic *)
- | "__builtin_fabs", [BA(FR a1)], BR(FR res) ->
- if a1 <> res then
- emit (Pmovsd_ff (res,a1));
- emit (Pabsd res) (* This ensures that need_masks is set to true *)
- | "__builtin_fsqrt", [BA(FR a1)], BR(FR res) ->
+ | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(FR a1)], BR(FR res) ->
emit (Psqrtsd (res,a1))
| "__builtin_fmax", [BA(FR a1); BA(FR a2)], BR(FR res) ->
if res = a1 then
@@ -467,8 +481,8 @@ let expand_builtin_inline name args res =
(* Vararg stuff *)
| "__builtin_va_start", [BA(IR a)], _ ->
assert (a = RDX);
- if Archi.ptr64
- then expand_builtin_va_start_64 a
+ if Archi.win64 then expand_builtin_va_start_win64 a
+ else if Archi.ptr64 then expand_builtin_va_start_elf64 a
else expand_builtin_va_start_32 a
(* Synchronization *)
| "__builtin_membar", [], _ ->
@@ -480,24 +494,66 @@ let expand_builtin_inline name args res =
| _ ->
raise (Error ("unrecognized builtin " ^ name))
-(* Calls to variadic functions for x86-64: register AL must contain
+(* Calls to variadic functions for x86-64 ELF: register AL must contain
the number of XMM registers used for parameter passing. To be on
- the safe side. do the same if the called function is
+ the safe side, do the same if the called function is
unprototyped. *)
-let set_al sg =
- if Archi.ptr64 && (sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto) then begin
+let fixup_funcall_elf64 sg =
+ if sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto then begin
let (ir, fr, ofs) = next_arg_locations 0 0 0 sg.sig_args in
emit (Pmovl_ri (RAX, coqint_of_camlint (Int32.of_int fr)))
end
+(* Calls to variadic functions for x86-64 Windows:
+ FP arguments passed in FP registers must also be passed in integer
+ registers.
+*)
+
+let rec copy_fregs_to_iregs args fr ir =
+ match (ir, fr, args) with
+ | (i1 :: ir, f1 :: fr, (Tfloat | Tsingle) :: args) ->
+ emit (Pmovq_rf (i1, f1));
+ copy_fregs_to_iregs args fr ir
+ | (i1 :: ir, f1 :: fr, _ :: args) ->
+ copy_fregs_to_iregs args fr ir
+ | _ ->
+ ()
+
+let fixup_funcall_win64 sg =
+ if sg.sig_cc.cc_vararg then
+ copy_fregs_to_iregs sg.sig_args [XMM0; XMM1; XMM2; XMM3] [RCX; RDX; R8; R9]
+
+let fixup_funcall sg =
+ if Archi.ptr64
+ then if Archi.win64
+ then fixup_funcall_win64 sg
+ else fixup_funcall_elf64 sg
+ else ()
+
(* Expansion of instructions *)
let expand_instruction instr =
match instr with
| Pallocframe (sz, ofs_ra, ofs_link) ->
- if Archi.ptr64 then begin
- let (sz, save_regs) = sp_adjustment_64 sz in
+ if Archi.win64 then begin
+ let sz = sp_adjustment_win64 sz in
+ if is_current_function_variadic() then
+ (* Save parameters passed in registers in reserved stack area *)
+ emit (Pcall_s (intern_string "__compcert_va_saveregs",
+ {sig_args = []; sig_res = Tvoid; sig_cc = cc_default}));
+ (* Allocate frame *)
+ let sz' = Z.of_uint sz in
+ emit (Psubl_ri (RSP, sz'));
+ emit (Pcfi_adjust sz');
+ (* Stack chaining *)
+ let addr1 = linear_addr RSP (Z.of_uint (sz + 8)) in
+ let addr2 = linear_addr RSP ofs_link in
+ emit (Pleaq (RAX,addr1));
+ emit (Pmovq_mr (addr2, RAX));
+ current_function_stacksize := Int64.of_int (sz + 8)
+ end else if Archi.ptr64 then begin
+ let (sz, save_regs) = sp_adjustment_elf64 sz in
(* Allocate frame *)
let sz' = Z.of_uint sz in
emit (Psubq_ri (RSP, sz'));
@@ -529,15 +585,18 @@ let expand_instruction instr =
PrintAsmaux.current_function_stacksize := Int32.of_int sz
end
| Pfreeframe(sz, ofs_ra, ofs_link) ->
- if Archi.ptr64 then begin
- let (sz, _) = sp_adjustment_64 sz in
+ if Archi.win64 then begin
+ let sz = sp_adjustment_win64 sz in
+ emit (Paddq_ri (RSP, Z.of_uint sz))
+ end else if Archi.ptr64 then begin
+ let (sz, _) = sp_adjustment_elf64 sz in
emit (Paddq_ri (RSP, Z.of_uint sz))
end else begin
let sz = sp_adjustment_32 sz in
emit (Paddl_ri (RSP, Z.of_uint sz))
end
| Pjmp_s(_, sg) | Pjmp_r(_, sg) | Pcall_s(_, sg) | Pcall_r(_, sg) ->
- set_al sg;
+ fixup_funcall sg;
emit instr
| Pbuiltin (ef,args, res) ->
begin
diff --git a/x86/CBuiltins.ml b/x86/CBuiltins.ml
index e7f714c7..a16f3ef7 100644
--- a/x86/CBuiltins.ml
+++ b/x86/CBuiltins.ml
@@ -19,8 +19,12 @@ open C
let (va_list_type, va_list_scalar, size_va_list) =
if Archi.ptr64 then
- (* Actually a struct passed by reference; equivalent to 3 64-bit words *)
- (TArray(TInt(IULong, []), Some 3L, []), false, 3*8)
+ if Archi.win64 then
+ (* Just a pointer *)
+ (TPtr(TVoid [], []), true, 8)
+ else
+ (* Actually a struct passed by reference; equivalent to 3 64-bit words *)
+ (TArray(TInt(IULong, []), Some 3L, []), false, 3*8)
else
(* Just a pointer *)
(TPtr(TVoid [], []), true, 4)
@@ -30,19 +34,6 @@ let builtins = {
"__builtin_va_list", va_list_type;
];
builtin_functions = [
- (* Integer arithmetic *)
- "__builtin_clz",
- (TInt(IInt, []), [TInt(IUInt, [])], false);
- "__builtin_clzl",
- (TInt(IInt, []), [TInt(IULong, [])], false);
- "__builtin_clzll",
- (TInt(IInt, []), [TInt(IULongLong, [])], false);
- "__builtin_ctz",
- (TInt(IInt, []), [TInt(IUInt, [])], false);
- "__builtin_ctzl",
- (TInt(IInt, []), [TInt(IULong, [])], false);
- "__builtin_ctzll",
- (TInt(IInt, []), [TInt(IULongLong, [])], false);
(* Float arithmetic *)
"__builtin_fmax",
(TFloat(FDouble, []), [TFloat(FDouble, []); TFloat(FDouble, [])], false);
diff --git a/x86/ConstpropOp.vp b/x86/ConstpropOp.vp
index ada8d54a..dd4b839a 100644
--- a/x86/ConstpropOp.vp
+++ b/x86/ConstpropOp.vp
@@ -17,11 +17,10 @@ Require Import Coqlib Compopts.
Require Import AST Integers Floats.
Require Import Op Registers.
Require Import ValueDomain ValueAOp.
+Require SelectOp.
(** * Converting known values to constants *)
-Parameter symbol_is_external: ident -> bool. (**r See [SelectOp] *)
-
Definition Olea_ptr (a: addressing) := if Archi.ptr64 then Oleal a else Olea a.
Definition const_for_result (a: aval) : option operation :=
@@ -31,7 +30,7 @@ Definition const_for_result (a: aval) : option operation :=
| F n => if Compopts.generate_float_constants tt then Some(Ofloatconst n) else None
| FS n => if Compopts.generate_float_constants tt then Some(Osingleconst n) else None
| Ptr(Gl id ofs) =>
- if symbol_is_external id then
+ if SelectOp.symbol_is_external id then
if Ptrofs.eq ofs Ptrofs.zero then Some (Oindirectsymbol id) else None
else
Some (Olea_ptr (Aglobal id ofs))
diff --git a/x86/ConstpropOpproof.v b/x86/ConstpropOpproof.v
index 6d2df9c1..82179fa4 100644
--- a/x86/ConstpropOpproof.v
+++ b/x86/ConstpropOpproof.v
@@ -107,7 +107,7 @@ Proof.
- (* pointer *)
destruct p; try discriminate; SimplVM.
+ (* global *)
- destruct (symbol_is_external id).
+ destruct (SelectOp.symbol_is_external id).
* revert H2; predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero; intros EQ; inv EQ.
exists (Genv.symbol_address ge id Ptrofs.zero); auto.
* inv H2. exists (Genv.symbol_address ge id ofs); split.
diff --git a/x86/Conventions1.v b/x86/Conventions1.v
index d9f5b8fa..b4cb233e 100644
--- a/x86/Conventions1.v
+++ b/x86/Conventions1.v
@@ -32,40 +32,44 @@ Definition is_callee_save (r: mreg) : bool :=
match r with
| AX | CX | DX => false
| BX | BP => true
- | SI | DI => negb Archi.ptr64 (**r callee-save in 32 bits but not in 64 bits *)
+ | SI | DI => negb Archi.ptr64 || Archi.win64 (**r callee-save in ELF 64 bits *)
| R8 | R9 | R10 | R11 => false
| R12 | R13 | R14 | R15 => true
| X0 | X1 | X2 | X3 | X4 | X5 | X6 | X7 => false
- | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 => false
+ | X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 => Archi.win64
| FP0 => false
end.
Definition int_caller_save_regs :=
if Archi.ptr64
- then AX :: CX :: DX :: SI :: DI :: R8 :: R9 :: R10 :: R11 :: nil
+ then if Archi.win64
+ then AX :: CX :: DX :: R8 :: R9 :: R10 :: R11 :: nil
+ else AX :: CX :: DX :: SI :: DI :: R8 :: R9 :: R10 :: R11 :: nil
else AX :: CX :: DX :: nil.
Definition float_caller_save_regs :=
if Archi.ptr64
- then X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 ::
- X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil
+ then if Archi.win64
+ then X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: nil
+ else X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 ::
+ X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil
else X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil.
Definition int_callee_save_regs :=
if Archi.ptr64
- then BX :: BP :: R12 :: R13 :: R14 :: R15 :: nil
+ then if Archi.win64
+ then BX :: SI :: DI :: BP :: R12 :: R13 :: R14 :: R15 :: nil
+ else BX :: BP :: R12 :: R13 :: R14 :: R15 :: nil
else BX :: SI :: DI :: BP :: nil.
-Definition float_callee_save_regs : list mreg := nil.
+Definition float_callee_save_regs :=
+ if Archi.ptr64 && Archi.win64
+ then X6 :: X7 :: X8 :: X9 :: X10 :: X11 :: X12 :: X13 :: X14 :: X15 :: nil
+ else nil.
Definition destroyed_at_call :=
List.filter (fun r => negb (is_callee_save r)) all_mregs.
-Definition dummy_int_reg := AX. (**r Used in [Regalloc]. *)
-Definition dummy_float_reg := X0. (**r Used in [Regalloc]. *)
-
-Definition callee_save_type := mreg_type.
-
Definition is_float_reg (r: mreg) :=
match r with
| AX | BX | CX | DX | SI | DI | BP
@@ -74,6 +78,11 @@ Definition is_float_reg (r: mreg) :=
| X8 | X9 | X10 | X11 | X12 | X13 | X14 | X15 | FP0 => true
end.
+Definition dummy_int_reg := AX. (**r Used in [Regalloc]. *)
+Definition dummy_float_reg := X0. (**r Used in [Regalloc]. *)
+
+Definition callee_save_type := mreg_type.
+
(** * Function calling conventions *)
(** The functions in this section determine the locations (machine registers
@@ -182,7 +191,7 @@ Fixpoint loc_arguments_32
:: loc_arguments_32 tys (ofs + typesize ty)
end.
-(** In the x86-64 ABI:
+(** In the x86-64 ELF ABI:
- The first 6 integer arguments are passed in registers [DI], [SI], [DX], [CX], [R8], [R9].
- The first 8 floating-point arguments are passed in registers [X0] to [X7].
- Extra arguments are passed on the stack, in [Outgoing] slots.
@@ -190,26 +199,62 @@ Fixpoint loc_arguments_32
of data is used in a slot.
*)
-Definition int_param_regs := DI :: SI :: DX :: CX :: R8 :: R9 :: nil.
-Definition float_param_regs := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil.
+Definition int_param_regs_elf64 := DI :: SI :: DX :: CX :: R8 :: R9 :: nil.
+Definition float_param_regs_elf64 := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil.
-Fixpoint loc_arguments_64
+Fixpoint loc_arguments_elf64
(tyl: list typ) (ir fr ofs: Z) {struct tyl} : list (rpair loc) :=
match tyl with
| nil => nil
| (Tint | Tlong | Tany32 | Tany64) as ty :: tys =>
- match list_nth_z int_param_regs ir with
+ match list_nth_z int_param_regs_elf64 ir with
| None =>
- One (S Outgoing ofs ty) :: loc_arguments_64 tys ir fr (ofs + 2)
+ One (S Outgoing ofs ty) :: loc_arguments_elf64 tys ir fr (ofs + 2)
| Some ireg =>
- One (R ireg) :: loc_arguments_64 tys (ir + 1) fr ofs
+ One (R ireg) :: loc_arguments_elf64 tys (ir + 1) fr ofs
end
| (Tfloat | Tsingle) as ty :: tys =>
- match list_nth_z float_param_regs fr with
+ match list_nth_z float_param_regs_elf64 fr with
| None =>
- One (S Outgoing ofs ty) :: loc_arguments_64 tys ir fr (ofs + 2)
+ One (S Outgoing ofs ty) :: loc_arguments_elf64 tys ir fr (ofs + 2)
| Some freg =>
- One (R freg) :: loc_arguments_64 tys ir (fr + 1) ofs
+ One (R freg) :: loc_arguments_elf64 tys ir (fr + 1) ofs
+ end
+ end.
+
+(** In the x86-64 Win64 ABI:
+- The first 4 arguments are passed in registers [RCX], [RDX], [R8], and [R9]
+ (for integer arguments) and [X0] to [X3] (for floating-point arguments).
+ Each argument "burns" both an integer register and a FP integer.
+- The first 8 floating-point arguments are passed in registers [X0] to [X7].
+- Extra arguments are passed on the stack, in [Outgoing] slots.
+ Consecutive stack slots are separated by 8 bytes, even if only 4 bytes
+ of data is used in a slot.
+- Four 8-byte words are always reserved at the bottom of the outgoing area
+ so that the callee can use them to save the registers containing the
+ first four arguments. This is handled in the Stacking phase.
+*)
+
+Definition int_param_regs_win64 := CX :: DX :: R8 :: R9 :: nil.
+Definition float_param_regs_win64 := X0 :: X1 :: X2 :: X3 :: nil.
+
+Fixpoint loc_arguments_win64
+ (tyl: list typ) (r ofs: Z) {struct tyl} : list (rpair loc) :=
+ match tyl with
+ | nil => nil
+ | (Tint | Tlong | Tany32 | Tany64) as ty :: tys =>
+ match list_nth_z int_param_regs_win64 r with
+ | None =>
+ One (S Outgoing ofs ty) :: loc_arguments_win64 tys r (ofs + 2)
+ | Some ireg =>
+ One (R ireg) :: loc_arguments_win64 tys (r + 1) ofs
+ end
+ | (Tfloat | Tsingle) as ty :: tys =>
+ match list_nth_z float_param_regs_win64 r with
+ | None =>
+ One (S Outgoing ofs ty) :: loc_arguments_win64 tys r (ofs + 2)
+ | Some freg =>
+ One (R freg) :: loc_arguments_win64 tys (r + 1) ofs
end
end.
@@ -218,7 +263,9 @@ Fixpoint loc_arguments_64
Definition loc_arguments (s: signature) : list (rpair loc) :=
if Archi.ptr64
- then loc_arguments_64 s.(sig_args) 0 0 0
+ then if Archi.win64
+ then loc_arguments_win64 s.(sig_args) 0 0
+ else loc_arguments_elf64 s.(sig_args) 0 0 0
else loc_arguments_32 s.(sig_args) 0.
(** Argument locations are either caller-save registers or [Outgoing]
@@ -237,9 +284,16 @@ Definition loc_argument_32_charact (ofs: Z) (l: loc) : Prop :=
| _ => False
end.
-Definition loc_argument_64_charact (ofs: Z) (l: loc) : Prop :=
+Definition loc_argument_elf64_charact (ofs: Z) (l: loc) : Prop :=
+ match l with
+ | R r => In r int_param_regs_elf64 \/ In r float_param_regs_elf64
+ | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs')
+ | _ => False
+ end.
+
+Definition loc_argument_win64_charact (ofs: Z) (l: loc) : Prop :=
match l with
- | R r => In r int_param_regs \/ In r float_param_regs
+ | R r => In r int_param_regs_win64 \/ In r float_param_regs_win64
| S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs')
| _ => False
end.
@@ -259,37 +313,75 @@ Proof.
* destruct H; split; eapply X; eauto; omega.
Qed.
-Remark loc_arguments_64_charact:
+Remark loc_arguments_elf64_charact:
forall tyl ir fr ofs p,
- In p (loc_arguments_64 tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_64_charact ofs) p.
+ In p (loc_arguments_elf64 tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_elf64_charact ofs) p.
Proof.
- assert (X: forall ofs1 ofs2 l, loc_argument_64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_64_charact ofs1 l).
+ assert (X: forall ofs1 ofs2 l, loc_argument_elf64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_elf64_charact ofs1 l).
{ destruct l; simpl; intros; auto. destruct sl; auto. intuition omega. }
- assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_64_charact ofs1) p).
+ assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_elf64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_elf64_charact ofs1) p).
{ destruct p; simpl; intuition eauto. }
assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)).
{ intros. apply Z.divide_add_r; auto. apply Z.divide_refl. }
Opaque list_nth_z.
- induction tyl; simpl loc_arguments_64; intros.
+ induction tyl; simpl loc_arguments_elf64; intros.
elim H.
assert (A: forall ty, In p
- match list_nth_z int_param_regs ir with
- | Some ireg => One (R ireg) :: loc_arguments_64 tyl (ir + 1) fr ofs
- | None => One (S Outgoing ofs ty) :: loc_arguments_64 tyl ir fr (ofs + 2)
+ match list_nth_z int_param_regs_elf64 ir with
+ | Some ireg => One (R ireg) :: loc_arguments_elf64 tyl (ir + 1) fr ofs
+ | None => One (S Outgoing ofs ty) :: loc_arguments_elf64 tyl ir fr (ofs + 2)
end ->
- forall_rpair (loc_argument_64_charact ofs) p).
- { intros. destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H1.
+ forall_rpair (loc_argument_elf64_charact ofs) p).
+ { intros. destruct (list_nth_z int_param_regs_elf64 ir) as [r|] eqn:E; destruct H1.
subst. left. eapply list_nth_z_in; eauto.
eapply IHtyl; eauto.
subst. split. omega. assumption.
eapply Y; eauto. omega. }
assert (B: forall ty, In p
- match list_nth_z float_param_regs fr with
- | Some ireg => One (R ireg) :: loc_arguments_64 tyl ir (fr + 1) ofs
- | None => One (S Outgoing ofs ty) :: loc_arguments_64 tyl ir fr (ofs + 2)
+ match list_nth_z float_param_regs_elf64 fr with
+ | Some ireg => One (R ireg) :: loc_arguments_elf64 tyl ir (fr + 1) ofs
+ | None => One (S Outgoing ofs ty) :: loc_arguments_elf64 tyl ir fr (ofs + 2)
end ->
- forall_rpair (loc_argument_64_charact ofs) p).
- { intros. destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H1.
+ forall_rpair (loc_argument_elf64_charact ofs) p).
+ { intros. destruct (list_nth_z float_param_regs_elf64 fr) as [r|] eqn:E; destruct H1.
+ subst. right. eapply list_nth_z_in; eauto.
+ eapply IHtyl; eauto.
+ subst. split. omega. assumption.
+ eapply Y; eauto. omega. }
+ destruct a; eauto.
+Qed.
+
+Remark loc_arguments_win64_charact:
+ forall tyl r ofs p,
+ In p (loc_arguments_win64 tyl r ofs) -> (2 | ofs) -> forall_rpair (loc_argument_win64_charact ofs) p.
+Proof.
+ assert (X: forall ofs1 ofs2 l, loc_argument_win64_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_win64_charact ofs1 l).
+ { destruct l; simpl; intros; auto. destruct sl; auto. intuition omega. }
+ assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_win64_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_win64_charact ofs1) p).
+ { destruct p; simpl; intuition eauto. }
+ assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)).
+ { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. }
+Opaque list_nth_z.
+ induction tyl; simpl loc_arguments_win64; intros.
+ elim H.
+ assert (A: forall ty, In p
+ match list_nth_z int_param_regs_win64 r with
+ | Some ireg => One (R ireg) :: loc_arguments_win64 tyl (r + 1) ofs
+ | None => One (S Outgoing ofs ty) :: loc_arguments_win64 tyl r (ofs + 2)
+ end ->
+ forall_rpair (loc_argument_win64_charact ofs) p).
+ { intros. destruct (list_nth_z int_param_regs_win64 r) as [r'|] eqn:E; destruct H1.
+ subst. left. eapply list_nth_z_in; eauto.
+ eapply IHtyl; eauto.
+ subst. split. omega. assumption.
+ eapply Y; eauto. omega. }
+ assert (B: forall ty, In p
+ match list_nth_z float_param_regs_win64 r with
+ | Some ireg => One (R ireg) :: loc_arguments_win64 tyl (r + 1) ofs
+ | None => One (S Outgoing ofs ty) :: loc_arguments_win64 tyl r (ofs + 2)
+ end ->
+ forall_rpair (loc_argument_win64_charact ofs) p).
+ { intros. destruct (list_nth_z float_param_regs_win64 r) as [r'|] eqn:E; destruct H1.
subst. right. eapply list_nth_z_in; eauto.
eapply IHtyl; eauto.
subst. split. omega. assumption.
@@ -301,18 +393,30 @@ Lemma loc_arguments_acceptable:
forall (s: signature) (p: rpair loc),
In p (loc_arguments s) -> forall_rpair loc_argument_acceptable p.
Proof.
- unfold loc_arguments; intros. destruct Archi.ptr64 eqn:SF.
-- (* 64 bits *)
- assert (A: forall r, In r int_param_regs -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF; decide_goal).
- assert (B: forall r, In r float_param_regs -> is_callee_save r = false) by decide_goal.
- assert (X: forall l, loc_argument_64_charact 0 l -> loc_argument_acceptable l).
- { unfold loc_argument_64_charact, loc_argument_acceptable.
+ unfold loc_arguments; intros. destruct Archi.ptr64 eqn:SF; [destruct Archi.win64 eqn:W64|].
+- (* WIN 64 bits *)
+ assert (A: forall r, In r int_param_regs_win64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF; decide_goal).
+ assert (B: forall r, In r float_param_regs_win64 -> is_callee_save r = false) by (unfold is_callee_save; decide_goal).
+ assert (X: forall l, loc_argument_win64_charact 0 l -> loc_argument_acceptable l).
+ { unfold loc_argument_win64_charact, loc_argument_acceptable.
destruct l as [r | [] ofs ty]; auto. intros [C|C]; auto.
intros [C D]. split; auto. apply Z.divide_trans with 2; auto.
exists (2 / typealign ty); destruct ty; reflexivity.
}
- exploit loc_arguments_64_charact; eauto using Z.divide_0_r.
+ exploit loc_arguments_win64_charact; eauto using Z.divide_0_r.
unfold forall_rpair; destruct p; intuition auto.
+- (* ELF 64 bits *)
+ assert (A: forall r, In r int_param_regs_elf64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite SF, W64; decide_goal).
+ assert (B: forall r, In r float_param_regs_elf64 -> is_callee_save r = false) by (unfold is_callee_save; rewrite W64; decide_goal).
+ assert (X: forall l, loc_argument_elf64_charact 0 l -> loc_argument_acceptable l).
+ { unfold loc_argument_elf64_charact, loc_argument_acceptable.
+ destruct l as [r | [] ofs ty]; auto. intros [C|C]; auto.
+ intros [C D]. split; auto. apply Z.divide_trans with 2; auto.
+ exists (2 / typealign ty); destruct ty; reflexivity.
+ }
+ exploit loc_arguments_elf64_charact; eauto using Z.divide_0_r.
+ unfold forall_rpair; destruct p; intuition auto.
+
- (* 32 bits *)
assert (X: forall l, loc_argument_32_charact 0 l -> loc_argument_acceptable l).
{ destruct l as [r | [] ofs ty]; simpl; intuition auto. rewrite H2; apply Z.divide_1_l. }
@@ -325,7 +429,7 @@ Hint Resolve loc_arguments_acceptable: locs.
Lemma loc_arguments_main:
loc_arguments signature_main = nil.
Proof.
- unfold loc_arguments; destruct Archi.ptr64; reflexivity.
+ unfold loc_arguments; destruct Archi.ptr64; auto; destruct Archi.win64; auto.
Qed.
(** ** Normalization of function results *)
diff --git a/x86/Machregsaux.ml b/x86/Machregsaux.ml
index 80066b00..840943e7 100644
--- a/x86/Machregsaux.ml
+++ b/x86/Machregsaux.ml
@@ -12,25 +12,7 @@
(** Auxiliary functions on machine registers *)
-open Camlcoq
-open Machregs
-
-let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31
-
-let _ =
- List.iter
- (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s))
- Machregs.register_names
-
let is_scratch_register r = false
-
-let name_of_register r =
- try Some (Hashtbl.find register_names r) with Not_found -> None
-
-let register_by_name s =
- Machregs.register_by_name (coqstring_uppercase_ascii_of_camlstring s)
-
-let can_reserve_register r = Conventions1.is_callee_save r
let class_of_type = function
| AST.Tint | AST.Tlong -> 0
diff --git a/x86/Machregsaux.mli b/x86/Machregsaux.mli
index d7117c21..01b0f9fd 100644
--- a/x86/Machregsaux.mli
+++ b/x86/Machregsaux.mli
@@ -12,9 +12,6 @@
(** Auxiliary functions on machine registers *)
-val name_of_register: Machregs.mreg -> string option
-val register_by_name: string -> Machregs.mreg option
val is_scratch_register: string -> bool
-val can_reserve_register: Machregs.mreg -> bool
val class_of_type: AST.typ -> int
diff --git a/x86/Stacklayout.v b/x86/Stacklayout.v
index d375febf..4f68cf26 100644
--- a/x86/Stacklayout.v
+++ b/x86/Stacklayout.v
@@ -15,11 +15,13 @@
Require Import Coqlib.
Require Import AST Memory Separation.
Require Import Bounds.
+Require Archi.
Local Open Scope sep_scope.
(** The general shape of activation records is as follows,
from bottom (lowest offsets) to top:
+- For the Win64 ABI: 32 reserved bytes
- Space for outgoing arguments to function calls.
- Back link to parent frame
- Saved values of integer callee-save registers used by the function.
@@ -29,11 +31,11 @@ Local Open Scope sep_scope.
- Return address.
*)
-Definition fe_ofs_arg := 0.
+Definition fe_ofs_arg := if Archi.win64 then 32 else 0.
Definition make_env (b: bounds) : frame_env :=
let w := if Archi.ptr64 then 8 else 4 in
- let olink := align (4 * b.(bound_outgoing)) w in (* back link *)
+ let olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w in (* back link *)
let ocs := olink + w in (* callee-saves *)
let ol := align (size_callee_save_area b ocs) 8 in (* locals *)
let ostkdata := align (ol + 4 * b.(bound_local)) 8 in (* stack data *)
@@ -61,7 +63,7 @@ Proof.
Local Opaque Z.add Z.mul sepconj range.
intros; simpl.
set (w := if Archi.ptr64 then 8 else 4).
- set (olink := align (4 * b.(bound_outgoing)) w).
+ set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w).
set (ocs := olink + w).
set (ol := align (size_callee_save_area b ocs) 8).
set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
@@ -69,8 +71,9 @@ Local Opaque Z.add Z.mul sepconj range.
replace (size_chunk Mptr) with w by (rewrite size_chunk_Mptr; auto).
assert (0 < w) by (unfold w; destruct Archi.ptr64; omega).
generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
+ assert (0 <= fe_ofs_arg) by (unfold fe_ofs_arg; destruct Archi.win64; omega).
assert (0 <= 4 * b.(bound_outgoing)) by omega.
- assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
+ assert (fe_ofs_arg + 4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
assert (olink + w <= ocs) by (unfold ocs; omega).
assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr).
assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega).
@@ -87,7 +90,7 @@ Local Opaque Z.add Z.mul sepconj range.
rewrite sep_swap45.
rewrite sep_swap34.
(* Apply range_split and range_split2 repeatedly *)
- unfold fe_ofs_arg.
+ apply range_drop_left with 0. omega.
apply range_split_2. fold olink. omega. omega.
apply range_split. omega.
apply range_split_2. fold ol. omega. omega.
@@ -105,15 +108,16 @@ Lemma frame_env_range:
Proof.
intros; simpl.
set (w := if Archi.ptr64 then 8 else 4).
- set (olink := align (4 * b.(bound_outgoing)) w).
+ set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w).
set (ocs := olink + w).
set (ol := align (size_callee_save_area b ocs) 8).
set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
set (oretaddr := align (ostkdata + b.(bound_stack_data)) w).
assert (0 < w) by (unfold w; destruct Archi.ptr64; omega).
generalize b.(bound_local_pos) b.(bound_outgoing_pos) b.(bound_stack_data_pos); intros.
+ assert (0 <= fe_ofs_arg) by (unfold fe_ofs_arg; destruct Archi.win64; omega).
assert (0 <= 4 * b.(bound_outgoing)) by omega.
- assert (4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
+ assert (fe_ofs_arg + 4 * b.(bound_outgoing) <= olink) by (apply align_le; omega).
assert (olink + w <= ocs) by (unfold ocs; omega).
assert (ocs <= size_callee_save_area b ocs) by (apply size_callee_save_area_incr).
assert (size_callee_save_area b ocs <= ol) by (apply align_le; omega).
@@ -133,14 +137,14 @@ Lemma frame_env_aligned:
Proof.
intros; simpl.
set (w := if Archi.ptr64 then 8 else 4).
- set (olink := align (4 * b.(bound_outgoing)) w).
+ set (olink := align (fe_ofs_arg + 4 * b.(bound_outgoing)) w).
set (ocs := olink + w).
set (ol := align (size_callee_save_area b ocs) 8).
set (ostkdata := align (ol + 4 * b.(bound_local)) 8).
set (oretaddr := align (ostkdata + b.(bound_stack_data)) w).
assert (0 < w) by (unfold w; destruct Archi.ptr64; omega).
replace (align_chunk Mptr) with w by (rewrite align_chunk_Mptr; auto).
- split. apply Z.divide_0_r.
+ split. exists (fe_ofs_arg / 8). unfold fe_ofs_arg; destruct Archi.win64; reflexivity.
split. apply align_divides; omega.
split. apply align_divides; omega.
split. apply align_divides; omega.
diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml
index 38eff731..52955dcb 100644
--- a/x86/TargetPrinter.ml
+++ b/x86/TargetPrinter.ml
@@ -238,7 +238,7 @@ module MacOS_System : SYSTEM =
if i || (not !Clflags.option_fcommon) then ".const" else "COMM"
| Section_string -> ".const"
| Section_literal -> ".literal8"
- | Section_jumptable -> ".text" (* needed in 64 bits, not a problem in 32 bits *)
+ | Section_jumptable -> ".text"
| Section_user(s, wr, ex) ->
sprintf ".section \"%s\", %s, %s"
(if wr then "__DATA" else "__TEXT") s
@@ -257,32 +257,14 @@ module MacOS_System : SYSTEM =
let print_align oc n =
fprintf oc " .align %d\n" (log2 n)
- let indirect_symbols : StringSet.t ref = ref StringSet.empty
-
let print_mov_rs oc rd id =
- if Archi.ptr64 then begin
- fprintf oc " movq %a@GOTPCREL(%%rip), %a\n" symbol id ireg64 rd
- end else begin
- let id = extern_atom id in
- indirect_symbols := StringSet.add id !indirect_symbols;
- fprintf oc " movl L%a$non_lazy_ptr, %a\n" raw_symbol id ireg rd
- end
+ fprintf oc " movq %a@GOTPCREL(%%rip), %a\n" symbol id ireg64 rd
let print_fun_info _ _ = ()
let print_var_info _ _ = ()
- let print_epilogue oc =
- if not Archi.ptr64 then begin
- fprintf oc " .section __IMPORT,__pointers,non_lazy_symbol_pointers\n";
- StringSet.iter
- (fun s ->
- fprintf oc "L%a$non_lazy_ptr:\n" raw_symbol s;
- fprintf oc " .indirect_symbol %a\n" raw_symbol s;
- fprintf oc " .long 0\n")
- !indirect_symbols;
- indirect_symbols := StringSet.empty
- end
+ let print_epilogue oc = ()
let print_comm_decl oc name sz al =
fprintf oc " .comm %a, %s, %d\n"
@@ -298,8 +280,11 @@ module MacOS_System : SYSTEM =
module Cygwin_System : SYSTEM =
struct
+ let symbol_prefix =
+ if Archi.ptr64 then "" else "_"
+
let raw_symbol oc s =
- fprintf oc "_%s" s
+ fprintf oc "%s%s" symbol_prefix s
let symbol oc symb =
raw_symbol oc (extern_atom symb)
@@ -329,19 +314,39 @@ module Cygwin_System : SYSTEM =
| Section_debug_str-> assert false (* Should not be used *)
| Section_ais_annotation -> assert false (* Not supported for coff binaries *)
- let stack_alignment = 8 (* minimum is 4, 8 is better for perfs *)
+ let stack_alignment = 8
+ (* minimum is 4 for 32 bits, 8 for 64 bits; 8 is better for perfs *)
let print_align oc n =
fprintf oc " .balign %d\n" n
+ let indirect_symbols : StringSet.t ref = ref StringSet.empty
+
let print_mov_rs oc rd id =
- fprintf oc " movl $%a, %a\n" symbol id ireg rd
+ if Archi.ptr64 then begin
+ let s = extern_atom id in
+ indirect_symbols := StringSet.add s !indirect_symbols;
+ fprintf oc " movq .refptr.%s(%%rip), %a\n" s ireg rd
+ end else begin
+ fprintf oc " movl $%a, %a\n" symbol id ireg rd
+ end
let print_fun_info _ _ = ()
let print_var_info _ _ = ()
- let print_epilogue _ = ()
+ let declare_indirect_symbol oc s =
+ fprintf oc " .section .rdata$.refptr.%s, \"dr\"\n" s;
+ fprintf oc " .globl .refptr.%s\n" s;
+ fprintf oc " .linkonce discard\n";
+ fprintf oc ".refptr.%s:\n" s;
+ fprintf oc " .quad %s\n" s
+
+ let print_epilogue oc =
+ if Archi.ptr64 then begin
+ StringSet.iter (declare_indirect_symbol oc) !indirect_symbols;
+ indirect_symbols := StringSet.empty
+ end
let print_comm_decl oc name sz al =
fprintf oc " .comm %a, %s, %d\n"
@@ -349,7 +354,8 @@ module Cygwin_System : SYSTEM =
let print_lcomm_decl oc name sz al =
fprintf oc " .lcomm %a, %s, %d\n"
- symbol name (Z.to_string sz) (log2 al)
+ symbol name (Z.to_string sz)
+ (if Archi.ptr64 then al else log2 al)
end
@@ -850,6 +856,8 @@ module Target(System: SYSTEM):TARGET =
fprintf oc " minsd %a, %a\n" freg a1 freg res
| Pmovb_rm (rd,a) ->
fprintf oc " movb %a, %a\n" addressing a ireg8 rd
+ | Pmovq_rf (rd, r1) ->
+ fprintf oc " movq %a, %a\n" freg r1 ireg64 rd
| Pmovsq_mr(a, rs) ->
fprintf oc " movq %a, %a\n" freg rs addressing a
| Pmovsq_rm(rd, a) ->
diff --git a/x86/extractionMachdep.v b/x86/extractionMachdep.v
index a29553e8..20c6a521 100644
--- a/x86/extractionMachdep.v
+++ b/x86/extractionMachdep.v
@@ -15,15 +15,19 @@
(* Additional extraction directives specific to the x86-64 port *)
-Require SelectOp ConstpropOp.
+Require Archi SelectOp.
-(* SelectOp *)
-
-Extract Constant SelectOp.symbol_is_external =>
- "fun id -> Configuration.system = ""macosx"" && C2C.atom_is_extern id".
+(* Archi *)
-(* ConstpropOp *)
+Extract Constant Archi.win64 =>
+ "match Configuration.system with
+ | ""cygwin"" when ptr64 -> true
+ | _ -> false".
-Extract Constant ConstpropOp.symbol_is_external =>
- "fun id -> Configuration.system = ""macosx"" && C2C.atom_is_extern id".
+(* SelectOp *)
+Extract Constant SelectOp.symbol_is_external =>
+ "match Configuration.system with
+ | ""macosx"" -> C2C.atom_is_extern
+ | ""cygwin"" when Archi.ptr64 -> C2C.atom_is_extern
+ | _ -> (fun _ -> false)".