From c50680bb86564fe61db61e6140a418ccc7d36677 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Wed, 23 Dec 2020 15:54:51 +0100 Subject: AArch64: macOS port This commit adds support for macOS (and probably iOS) running on AArch64 / ARM 64-bit / "Apple silicon" processors. --- aarch64/Archi.v | 8 +- aarch64/Asmexpand.ml | 48 +++++-- aarch64/Asmgen.v | 5 +- aarch64/Asmgenproof.v | 2 +- aarch64/Asmgenproof1.v | 6 +- aarch64/CBuiltins.ml | 26 +++- aarch64/ConstpropOp.vp | 4 +- aarch64/Conventions1.v | 232 +++++++++++++++++++++---------- aarch64/SelectOp.vp | 10 +- aarch64/SelectOpproof.v | 8 +- aarch64/TargetPrinter.ml | 332 +++++++++++++++++++++++++++++--------------- aarch64/extractionMachdep.v | 18 ++- 12 files changed, 484 insertions(+), 215 deletions(-) (limited to 'aarch64') diff --git a/aarch64/Archi.v b/aarch64/Archi.v index 42500e70..91e91673 100644 --- a/aarch64/Archi.v +++ b/aarch64/Archi.v @@ -85,6 +85,10 @@ Global Opaque ptr64 big_endian splitlong fma_order fma_invalid_mul_is_nan float_of_single_preserves_sNaN. -(** Whether to generate position-independent code or not *) +(** Which ABI to implement *) -Parameter pic_code: unit -> bool. +Inductive abi_kind: Type := + | AAPCS64 (**r ARM's standard as used in Linux and other ELF platforms *) + | Apple. (**r the variant used in macOS and iOS *) + +Parameter abi: abi_kind. diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml index 1ba754dd..6c80e9d3 100644 --- a/aarch64/Asmexpand.ml +++ b/aarch64/Asmexpand.ml @@ -47,17 +47,28 @@ let expand_storeptr (src: ireg) (base: iregsp) ofs = (* Determine the number of int registers, FP registers, and stack locations used to pass the fixed parameters. *) +let align n a = (n + a - 1) land (-a) + +let typesize = function + | Tint | Tany32 | Tsingle -> 4 + | Tlong | Tany64 | Tfloat -> 8 + +let reserve_stack stk ty = + match Archi.abi with + | Archi.AAPCS64 -> stk + 8 + | Archi.Apple -> align stk (typesize ty) + typesize ty + let rec next_arg_locations ir fr stk = function | [] -> (ir, fr, stk) - | (Tint | Tlong | Tany32 | Tany64) :: l -> + | (Tint | Tlong | Tany32 | Tany64 as ty) :: l -> if ir < 8 then next_arg_locations (ir + 1) fr stk l - else next_arg_locations ir fr (stk + 8) l - | (Tfloat | Tsingle) :: l -> + else next_arg_locations ir fr (reserve_stack stk ty) l + | (Tfloat | Tsingle as ty) :: l -> if fr < 8 then next_arg_locations ir (fr + 1) stk l - else next_arg_locations ir fr (stk + 8) l + else next_arg_locations ir fr (reserve_stack stk ty) l (* Allocate memory on the stack and use it to save the registers used for parameter passing. As an optimization, do not save @@ -86,6 +97,8 @@ let save_parameter_registers ir fr = emit (Pstrd(float_param_regs.(i), ADimm(XSP, Z.of_uint pos))) done +let current_function_stacksize = ref 0L + (* Initialize a va_list as per va_start. Register r points to the following struct: @@ -98,11 +111,7 @@ let save_parameter_registers ir fr = } *) -let current_function_stacksize = ref 0L - -let expand_builtin_va_start r = - if not (is_current_function_variadic ()) then - invalid_arg "Fatal error: va_start used in non-vararg function"; +let expand_builtin_va_start_aapcs64 r = let (ir, fr, stk) = next_arg_locations 0 0 0 (get_current_function_args ()) in let stack_ofs = Int64.(add !current_function_stacksize (of_int stk)) @@ -127,6 +136,25 @@ let expand_builtin_va_start r = expand_loadimm32 X16 (coqint_of_camlint (Int32.of_int vr_offs)); emit (Pstrw(X16, ADimm(RR1 r, coqint_of_camlint64 28L))) +(* In macOS, va_list is just a pointer (char * ) and all variadic arguments + are passed on the stack. *) + +let expand_builtin_va_start_apple r = + let (ir, fr, stk) = + next_arg_locations 0 0 0 (get_current_function_args ()) in + let stk = align stk 8 in + let stack_ofs = Int64.(add !current_function_stacksize (of_int stk)) in + (* *va = sp + stack_ofs *) + expand_addimm64 (RR1 X16) XSP (coqint_of_camlint64 stack_ofs); + emit (Pstrx(X16, ADimm(RR1 r, coqint_of_camlint64 0L))) + +let expand_builtin_va_start r = + if not (is_current_function_variadic ()) then + invalid_arg "Fatal error: va_start used in non-vararg function"; + match Archi.abi with + | Archi.AAPCS64 -> expand_builtin_va_start_aapcs64 r + | Archi.Apple -> expand_builtin_va_start_apple r + (* Handling of annotations *) let expand_annot_val kind txt targ args res = @@ -380,7 +408,7 @@ let expand_instruction instr = match instr with | Pallocframe (sz, ofs) -> emit (Pmov (RR1 X29, XSP)); - if is_current_function_variadic() then begin + if is_current_function_variadic() && Archi.abi = Archi.AAPCS64 then begin let (ir, fr, _) = next_arg_locations 0 0 0 (get_current_function_args ()) in save_parameter_registers ir fr; diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v index 875f3fd1..baaab6c4 100644 --- a/aarch64/Asmgen.v +++ b/aarch64/Asmgen.v @@ -15,6 +15,7 @@ Require Import Recdef Coqlib Zwf Zbits. Require Import Errors AST Integers Floats Op. Require Import Locations Mach Asm. +Require SelectOp. Local Open Scope string_scope. Local Open Scope list_scope. @@ -284,7 +285,7 @@ Definition shrx64 (rd r1: ireg) (n: int) (k: code) : code := (** Load the address [id + ofs] in [rd] *) Definition loadsymbol (rd: ireg) (id: ident) (ofs: ptrofs) (k: code) : code := - if Archi.pic_code tt then + if SelectOp.symbol_is_relocatable id then if Ptrofs.eq ofs Ptrofs.zero then Ploadsymbol rd id :: k else @@ -946,7 +947,7 @@ Definition transl_addressing (sz: Z) (addr: Op.addressing) (args: list mreg) OK (arith_extended Paddext (Padd X) X16 r1 r2 x a (insn (ADimm X16 Int64.zero) :: k)) | Aglobal id ofs, nil => - assertion (negb (Archi.pic_code tt)); + assertion (negb (SelectOp.symbol_is_relocatable id)); if Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz then OK (Padrp X16 id ofs :: insn (ADadr X16 id ofs) :: k) else OK (loadsymbol X16 id ofs (insn (ADimm X16 Int64.zero) :: k)) diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v index b60623a6..d3515a96 100644 --- a/aarch64/Asmgenproof.v +++ b/aarch64/Asmgenproof.v @@ -208,7 +208,7 @@ Qed. Remark loadsymbol_label: forall r id ofs k, tail_nolabel k (loadsymbol r id ofs k). Proof. intros; unfold loadsymbol. - destruct (Archi.pic_code tt); TailNoLabel. destruct Ptrofs.eq; TailNoLabel. + destruct (SelectOp.symbol_is_relocatable id); TailNoLabel. destruct Ptrofs.eq; TailNoLabel. Qed. Hint Resolve loadsymbol_label: labels. diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v index 6d44bcc8..d95376d2 100644 --- a/aarch64/Asmgenproof1.v +++ b/aarch64/Asmgenproof1.v @@ -594,13 +594,13 @@ Qed. (** Load address of symbol *) Lemma exec_loadsymbol: forall rd s ofs k rs m, - rd <> X16 \/ Archi.pic_code tt = false -> + rd <> X16 \/ SelectOp.symbol_is_relocatable s = false -> exists rs', exec_straight ge fn (loadsymbol rd s ofs k) rs m k rs' m /\ rs'#rd = Genv.symbol_address ge s ofs /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadsymbol; intros. destruct (Archi.pic_code tt). + unfold loadsymbol; intros. destruct (SelectOp.symbol_is_relocatable s). - predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero. + subst ofs. econstructor; split. apply exec_straight_one; [simpl; eauto | reflexivity]. @@ -1833,4 +1833,4 @@ Proof. intros. Simpl. Qed. -End CONSTRUCTORS. \ No newline at end of file +End CONSTRUCTORS. diff --git a/aarch64/CBuiltins.ml b/aarch64/CBuiltins.ml index e2a9c87a..4ba7e5ae 100644 --- a/aarch64/CBuiltins.ml +++ b/aarch64/CBuiltins.ml @@ -17,16 +17,28 @@ open C -(* va_list is a struct of size 32 and alignment 8, passed by reference *) +(* AAPCS64: + va_list is a struct of size 32 and alignment 8, passed by reference + Apple: + va_list is a pointer (size 8, alignment 8), passed by reference *) -let va_list_type = TArray(TInt(IULong, []), Some 4L, []) -let size_va_list = 32 -let va_list_scalar = false +let (va_list_type, size_va_list, va_list_scalar) = + match Archi.abi with + | Archi.AAPCS64 -> (TArray(TInt(IULong, []), Some 4L, []), 32, false) + | Archi.Apple -> (TPtr(TVoid [], []), 8, true) + +(* Some macOS headers use the GCC built-in types "__int128_t" and + "__uint128_t" unconditionally. Provide a dummy definition. *) + +let int128_type = TArray(TInt(IULong, []), Some 2L, []) let builtins = { - builtin_typedefs = [ - "__builtin_va_list", va_list_type - ]; + builtin_typedefs = + [ "__builtin_va_list", va_list_type ] @ + (if Configuration.system = "macosx" then + [ "__int128_t", int128_type; + "__uint128_t", int128_type ] + else []); builtin_functions = [ (* Synchronization *) "__builtin_fence", diff --git a/aarch64/ConstpropOp.vp b/aarch64/ConstpropOp.vp index c0a2c6bf..f2d17a51 100644 --- a/aarch64/ConstpropOp.vp +++ b/aarch64/ConstpropOp.vp @@ -13,11 +13,11 @@ (** Strength reduction for operators and conditions. This is the machine-dependent part of [Constprop]. *) -Require Archi. Require Import Coqlib Compopts. Require Import AST Integers Floats. Require Import Op Registers. Require Import ValueDomain ValueAOp. +Require SelectOp. (** * Converting known values to constants *) @@ -375,7 +375,7 @@ Nondetfunction op_strength_reduction Nondetfunction addr_strength_reduction (addr: addressing) (args: list reg) (vl: list aval) := match addr, args, vl with - | Aindexed n, r1 :: nil, Ptr(Gl symb n1) :: nil => + | Aindexed n, r1 :: nil, Ptr(Gl symb n1) :: nil ?? negb (SelectOp.symbol_is_relocatable symb) => (Aglobal symb (Ptrofs.add n1 (Ptrofs.of_int64 n)), nil) | Aindexed n, r1 :: nil, Ptr(Stk n1) :: nil => (Ainstack (Ptrofs.add n1 (Ptrofs.of_int64 n)), nil) diff --git a/aarch64/Conventions1.v b/aarch64/Conventions1.v index efda835d..4873dd91 100644 --- a/aarch64/Conventions1.v +++ b/aarch64/Conventions1.v @@ -24,7 +24,12 @@ Require Archi. - Caller-save registers that can be modified during a function call. We follow the Procedure Call Standard for the ARM 64-bit Architecture - (AArch64) document: R19-R28 and F8-F15 are callee-save. *) + (AArch64) document: R19-R28 and F8-F15 are callee-save. + + X16 is reserved as a temporary for asm generation. + X18 is reserved as the platform register. + X29 is reserved as the frame pointer register. + X30 is reserved as the return address register. *) Definition is_callee_save (r: mreg): bool := match r with @@ -154,9 +159,23 @@ Qed. (** - The first 8 integer arguments are passed in registers [R0...R7]. - The first 8 FP arguments are passed in registers [F0...F7]. -- Extra arguments are passed on the stack, in [Outgoing] slots of size - 64 bits (2 words), consecutively assigned, starting at word offset 0. -**) +- Extra arguments are passed on the stack, in [Outgoing] slots, + consecutively assigned, starting at word offset 0. + +In the standard AAPCS64, all stack slots are 8-byte wide (2 words). + +In the Apple variant, a stack slot has the size of the type of the +corresponding argument, and is aligned accordingly. We use 8-byte +slots (2 words) for C types [long] and [double], and 4-byte slots +(1 word) for C types [int] and [float]. For full conformance, we should +use 1-byte slots for [char] types and 2-byte slots for [short] types, +but this cannot be expressed in CompCert's type algebra, so we +incorrectly use 4-byte slots. + +Concerning variable arguments to vararg functions: +- In the AAPCS64 standard, they are passed like regular, fixed arguments. +- In the Apple variant, they are always passed on stack, in 8-byte slots. +*) Definition int_param_regs := R0 :: R1 :: R2 :: R3 :: R4 :: R5 :: R6 :: R7 :: nil. @@ -164,31 +183,70 @@ Definition int_param_regs := Definition float_param_regs := F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: nil. +Definition stack_arg (ty: typ) (ir fr ofs: Z) + (rec: Z -> Z -> Z -> list (rpair loc)) := + match Archi.abi with + | Archi.AAPCS64 => + let ofs := align ofs 2 in + One (S Outgoing ofs ty) :: rec ir fr (ofs + 2) + | Archi.Apple => + let ofs := align ofs (typesize ty) in + One (S Outgoing ofs ty) :: rec ir fr (ofs + typesize ty) + end. + +Definition int_arg (ty: typ) (ir fr ofs: Z) + (rec: Z -> Z -> Z -> list (rpair loc)) := + match list_nth_z int_param_regs ir with + | None => + stack_arg ty ir fr ofs rec + | Some ireg => + One (R ireg) :: rec (ir + 1) fr ofs + end. + +Definition float_arg (ty: typ) (ir fr ofs: Z) + (rec: Z -> Z -> Z -> list (rpair loc)) := + match list_nth_z float_param_regs fr with + | None => + stack_arg ty ir fr ofs rec + | Some freg => + One (R freg) :: rec ir (fr + 1) ofs + end. + +Fixpoint loc_arguments_stack (tyl: list typ) (ofs: Z) {struct tyl} : list (rpair loc) := + match tyl with + | nil => nil + | ty :: tys => One (S Outgoing ofs Tany64) :: loc_arguments_stack tys (ofs + 2) + end. + Fixpoint loc_arguments_rec - (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list (rpair loc) := + (tyl: list typ) (fixed ir fr ofs: Z) {struct tyl} : list (rpair loc) := match tyl with | nil => nil - | (Tint | Tlong | Tany32 | Tany64) as ty :: tys => - match list_nth_z int_param_regs ir with - | None => - One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + 2) - | Some ireg => - One (R ireg) :: loc_arguments_rec tys (ir + 1) fr ofs - end - | (Tfloat | Tsingle) as ty :: tys => - match list_nth_z float_param_regs fr with - | None => - One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + 2) - | Some freg => - One (R freg) :: loc_arguments_rec tys ir (fr + 1) ofs + | ty :: tys => + if zle fixed 0 then loc_arguments_stack tyl (align ofs 2) else + match ty with + | Tint | Tlong | Tany32 | Tany64 => + int_arg ty ir fr ofs (loc_arguments_rec tys (fixed - 1)) + | Tfloat | Tsingle => + float_arg ty ir fr ofs (loc_arguments_rec tys (fixed - 1)) end end. +(** Number of fixed arguments for a function with signature [s]. + For AAPCS64, all arguments are treated as fixed, even for a vararg + function. *) + +Definition fixed_arguments (s: signature) : Z := + match Archi.abi, s.(sig_cc).(cc_vararg) with + | Archi.Apple, Some n => n + | _, _ => list_length_z s.(sig_args) + end. + (** [loc_arguments s] returns the list of locations where to store arguments when calling a function with signature [s]. *) Definition loc_arguments (s: signature) : list (rpair loc) := - loc_arguments_rec s.(sig_args) 0 0 0. + loc_arguments_rec s.(sig_args) (fixed_arguments s) 0 0 0. (** Argument locations are either caller-save registers or [Outgoing] stack slots at nonnegative offsets. *) @@ -200,49 +258,73 @@ Definition loc_argument_acceptable (l: loc) : Prop := | _ => False end. -Definition loc_argument_charact (ofs: Z) (l: loc) : Prop := - match l with - | R r => In r int_param_regs \/ In r float_param_regs - | S Outgoing ofs' ty => ofs' >= ofs /\ (2 | ofs') - | _ => False - end. - -Remark loc_arguments_rec_charact: - forall tyl ir fr ofs p, - In p (loc_arguments_rec tyl ir fr ofs) -> (2 | ofs) -> forall_rpair (loc_argument_charact ofs) p. +Lemma loc_arguments_rec_charact: + forall tyl fixed ri rf ofs p, + ofs >= 0 -> + In p (loc_arguments_rec tyl fixed ri rf ofs) -> forall_rpair loc_argument_acceptable p. Proof. - assert (X: forall ofs1 ofs2 l, loc_argument_charact ofs2 l -> ofs1 <= ofs2 -> loc_argument_charact ofs1 l). - { destruct l; simpl; intros; auto. destruct sl; auto. intuition omega. } - assert (Y: forall ofs1 ofs2 p, forall_rpair (loc_argument_charact ofs2) p -> ofs1 <= ofs2 -> forall_rpair (loc_argument_charact ofs1) p). - { destruct p; simpl; intuition eauto. } - assert (Z: forall ofs, (2 | ofs) -> (2 | ofs + 2)). - { intros. apply Z.divide_add_r; auto. apply Z.divide_refl. } -Opaque list_nth_z. - induction tyl; simpl loc_arguments_rec; intros. -- contradiction. -- assert (A: forall ty, In p - match list_nth_z int_param_regs ir with - | Some ireg => One (R ireg) :: loc_arguments_rec tyl (ir + 1) fr ofs - | None => One (S Outgoing ofs ty) :: loc_arguments_rec tyl ir fr (ofs + 2) - end -> - forall_rpair (loc_argument_charact ofs) p). - { intros. destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H1. - subst. left. eapply list_nth_z_in; eauto. - eapply IHtyl; eauto. - subst. split. omega. assumption. - eapply Y; eauto. omega. } - assert (B: forall ty, In p - match list_nth_z float_param_regs fr with - | Some ireg => One (R ireg) :: loc_arguments_rec tyl ir (fr + 1) ofs - | None => One (S Outgoing ofs ty) :: loc_arguments_rec tyl ir fr (ofs + 2) - end -> - forall_rpair (loc_argument_charact ofs) p). - { intros. destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H1. - subst. right. eapply list_nth_z_in; eauto. - eapply IHtyl; eauto. - subst. split. omega. assumption. - eapply Y; eauto. omega. } - destruct a; eauto. + set (OK := fun (l: list (rpair loc)) => + forall p, In p l -> forall_rpair loc_argument_acceptable p). + set (OKF := fun (f: Z -> Z -> Z -> list (rpair loc)) => + forall ri rf ofs, ofs >= 0 -> OK (f ri rf ofs)). + assert (CSI: forall r, In r int_param_regs -> is_callee_save r = false). + { decide_goal. } + assert (CSF: forall r, In r float_param_regs -> is_callee_save r = false). + { decide_goal. } + assert (ALP: forall ofs ty, ofs >= 0 -> align ofs (typesize ty) >= 0). + { intros. + assert (ofs <= align ofs (typesize ty)) by (apply align_le; apply typesize_pos). + omega. } + assert (ALD: forall ofs ty, ofs >= 0 -> (typealign ty | align ofs (typesize ty))). + { intros. apply Z.divide_trans with (typesize ty). apply typealign_typesize. apply align_divides. apply typesize_pos. } + assert (ALP2: forall ofs, ofs >= 0 -> align ofs 2 >= 0). + { intros. + assert (ofs <= align ofs 2) by (apply align_le; omega). + omega. } + assert (ALD2: forall ofs ty, ofs >= 0 -> (typealign ty | align ofs 2)). + { intros. eapply Z.divide_trans with 2. + exists (2 / typealign ty). destruct ty; reflexivity. + apply align_divides. omega. } + assert (STK: forall tyl ofs, + ofs >= 0 -> OK (loc_arguments_stack tyl ofs)). + { induction tyl as [ | ty tyl]; intros ofs OO; red; simpl; intros. + - contradiction. + - destruct H. + + subst p. split. auto. simpl. apply Z.divide_1_l. + + apply IHtyl with (ofs := ofs + 2). omega. auto. + } + assert (A: forall ty ri rf ofs f, + OKF f -> ofs >= 0 -> OK (stack_arg ty ri rf ofs f)). + { intros until f; intros OF OO; red; unfold stack_arg; intros. + destruct Archi.abi; destruct H. + - subst p; simpl; auto. + - eapply OF; [|eauto]. apply ALP2 in OO. omega. + - subst p; simpl; auto. + - eapply OF; [|eauto]. apply (ALP ofs ty) in OO. generalize (typesize_pos ty). omega. + } + assert (B: forall ty ri rf ofs f, + OKF f -> ofs >= 0 -> OK (int_arg ty ri rf ofs f)). + { intros until f; intros OF OO; red; unfold int_arg; intros. + destruct (list_nth_z int_param_regs ri) as [r|] eqn:NTH; [destruct H|]. + - subst p; simpl. apply CSI. eapply list_nth_z_in; eauto. + - eapply OF; eauto. + - eapply A; eauto. + } + assert (C: forall ty ri rf ofs f, + OKF f -> ofs >= 0 -> OK (float_arg ty ri rf ofs f)). + { intros until f; intros OF OO; red; unfold float_arg; intros. + destruct (list_nth_z float_param_regs rf) as [r|] eqn:NTH; [destruct H|]. + - subst p; simpl. apply CSF. eapply list_nth_z_in; eauto. + - eapply OF; eauto. + - eapply A; eauto. + } + cut (forall tyl fixed ri rf ofs, ofs >= 0 -> OK (loc_arguments_rec tyl fixed ri rf ofs)). + unfold OK. eauto. + induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; simpl. +- red; simpl; tauto. +- destruct (zle fixed 0). + + apply (STK (ty1 :: tyl)); auto. + + unfold OKF in *; destruct ty1; eauto. Qed. Lemma loc_arguments_acceptable: @@ -250,16 +332,7 @@ Lemma loc_arguments_acceptable: In p (loc_arguments s) -> forall_rpair loc_argument_acceptable p. Proof. unfold loc_arguments; intros. - assert (A: forall r, In r int_param_regs -> is_callee_save r = false) by decide_goal. - assert (B: forall r, In r float_param_regs -> is_callee_save r = false) by decide_goal. - assert (X: forall l, loc_argument_charact 0 l -> loc_argument_acceptable l). - { unfold loc_argument_charact, loc_argument_acceptable. - destruct l as [r | [] ofs ty]; auto. intros [C|C]; auto. - intros [C D]. split; auto. apply Z.divide_trans with 2; auto. - exists (2 / typealign ty); destruct ty; reflexivity. - } - exploit loc_arguments_rec_charact; eauto using Z.divide_0_r. - unfold forall_rpair; destruct p; intuition auto. + eapply loc_arguments_rec_charact; eauto. omega. Qed. Hint Resolve loc_arguments_acceptable: locs. @@ -276,10 +349,19 @@ Qed. value of a function have unpredictable values and must be ignored. Consequently, we force normalization of return values of small integer types (8- and 16-bit integers), so that the top bits (the "padding bits") - are proper sign- or zero-extensions of the small integer value. *) + are proper sign- or zero-extensions of the small integer value. + + The Apple variant of the AAPCS64 requires the callee to return a normalized + value, hence no normalization is needed in the caller. + *) Definition return_value_needs_normalization (t: rettype) : bool := - match t with - | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => true - | _ => false + match Archi.abi with + | Archi.Apple => false + | Archi.AAPCS64 => + match t with + | Tint8signed | Tint8unsigned | Tint16signed | Tint16unsigned => true + | _ => false + end end. + diff --git a/aarch64/SelectOp.vp b/aarch64/SelectOp.vp index 5bd96987..b5a03989 100644 --- a/aarch64/SelectOp.vp +++ b/aarch64/SelectOp.vp @@ -536,10 +536,18 @@ Definition select (ty: typ) (cond: condition) (args: exprlist) (e1 e2: expr) := (** ** Recognition of addressing modes for load and store operations *) +(** Some symbols are relocatable (e.g. external symbols in macOS) + and cannot be used with [Aglobal] addressing mode. *) + +Parameter symbol_is_relocatable: ident -> bool. + Nondetfunction addressing (chunk: memory_chunk) (e: expr) := match e with | Eop (Oaddrstack n) Enil => (Ainstack n, Enil) - | Eop (Oaddrsymbol id ofs) Enil => (Aglobal id ofs, Enil) + | Eop (Oaddrsymbol id ofs) Enil => + if symbol_is_relocatable id + then (Aindexed (Ptrofs.to_int64 ofs), Eop (Oaddrsymbol id Ptrofs.zero) Enil ::: Enil) + else (Aglobal id ofs, Enil) | Eop (Oaddlimm n) (e1:::Enil) => (Aindexed n, e1:::Enil) | Eop (Oaddlshift Slsl a) (e1:::e2:::Enil) => (Aindexed2shift a, e1:::e2:::Enil) | Eop (Oaddlext x a) (e1:::e2:::Enil) => (Aindexed2ext x a, e1:::e2:::Enil) diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v index b78a5ed8..625a0c14 100644 --- a/aarch64/SelectOpproof.v +++ b/aarch64/SelectOpproof.v @@ -1029,7 +1029,13 @@ Theorem eval_addressing: Proof. intros until v. unfold addressing; case (addressing_match a); intros; InvEval. - econstructor; split. EvalOp. simpl; auto. -- econstructor; split. EvalOp. simpl; auto. +- destruct (symbol_is_relocatable id). + + exists (Genv.symbol_address ge id Ptrofs.zero :: nil); split. + constructor. EvalOp. constructor. + simpl. rewrite <- Genv.shift_symbol_address_64 by auto. + rewrite Ptrofs.of_int64_to_int64, Ptrofs.add_zero_l by auto. + auto. + + econstructor; split. EvalOp. simpl; auto. - econstructor; split. EvalOp. simpl. destruct v1; try discriminate. rewrite <- H; auto. - econstructor; split. EvalOp. simpl. congruence. diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml index a46b548c..1c1ff018 100644 --- a/aarch64/TargetPrinter.ml +++ b/aarch64/TargetPrinter.ml @@ -36,100 +36,128 @@ let is_immediate_float32 bits = let mant = Int32.logand bits 0x7F_FFFFl in exp >= -3 && exp <= 4 && Int32.logand mant 0x78_0000l = mant -(* Module containing the printing functions *) +(* Naming and printing registers *) + +let intsz oc (sz, n) = + match sz with X -> coqint64 oc n | W -> coqint oc n + +let xreg_name = function + | X0 -> "x0" | X1 -> "x1" | X2 -> "x2" | X3 -> "x3" + | X4 -> "x4" | X5 -> "x5" | X6 -> "x6" | X7 -> "x7" + | X8 -> "x8" | X9 -> "x9" | X10 -> "x10" | X11 -> "x11" + | X12 -> "x12" | X13 -> "x13" | X14 -> "x14" | X15 -> "x15" + | X16 -> "x16" | X17 -> "x17" | X18 -> "x18" | X19 -> "x19" + | X20 -> "x20" | X21 -> "x21" | X22 -> "x22" | X23 -> "x23" + | X24 -> "x24" | X25 -> "x25" | X26 -> "x26" | X27 -> "x27" + | X28 -> "x28" | X29 -> "x29" | X30 -> "x30" + +let wreg_name = function + | X0 -> "w0" | X1 -> "w1" | X2 -> "w2" | X3 -> "w3" + | X4 -> "w4" | X5 -> "w5" | X6 -> "w6" | X7 -> "w7" + | X8 -> "w8" | X9 -> "w9" | X10 -> "w10" | X11 -> "w11" + | X12 -> "w12" | X13 -> "w13" | X14 -> "w14" | X15 -> "w15" + | X16 -> "w16" | X17 -> "w17" | X18 -> "w18" | X19 -> "w19" + | X20 -> "w20" | X21 -> "w21" | X22 -> "w22" | X23 -> "w23" + | X24 -> "w24" | X25 -> "w25" | X26 -> "w26" | X27 -> "w27" + | X28 -> "w28" | X29 -> "w29" | X30 -> "w30" + +let xreg0_name = function RR0 r -> xreg_name r | XZR -> "xzr" +let wreg0_name = function RR0 r -> wreg_name r | XZR -> "wzr" + +let xregsp_name = function RR1 r -> xreg_name r | XSP -> "sp" +let wregsp_name = function RR1 r -> wreg_name r | XSP -> "wsp" + +let dreg_name = function +| D0 -> "d0" | D1 -> "d1" | D2 -> "d2" | D3 -> "d3" +| D4 -> "d4" | D5 -> "d5" | D6 -> "d6" | D7 -> "d7" +| D8 -> "d8" | D9 -> "d9" | D10 -> "d10" | D11 -> "d11" +| D12 -> "d12" | D13 -> "d13" | D14 -> "d14" | D15 -> "d15" +| D16 -> "d16" | D17 -> "d17" | D18 -> "d18" | D19 -> "d19" +| D20 -> "d20" | D21 -> "d21" | D22 -> "d22" | D23 -> "d23" +| D24 -> "d24" | D25 -> "d25" | D26 -> "d26" | D27 -> "d27" +| D28 -> "d28" | D29 -> "d29" | D30 -> "d30" | D31 -> "d31" + +let sreg_name = function +| D0 -> "s0" | D1 -> "s1" | D2 -> "s2" | D3 -> "s3" +| D4 -> "s4" | D5 -> "s5" | D6 -> "s6" | D7 -> "s7" +| D8 -> "s8" | D9 -> "s9" | D10 -> "s10" | D11 -> "s11" +| D12 -> "s12" | D13 -> "s13" | D14 -> "s14" | D15 -> "s15" +| D16 -> "s16" | D17 -> "s17" | D18 -> "s18" | D19 -> "s19" +| D20 -> "s20" | D21 -> "s21" | D22 -> "s22" | D23 -> "s23" +| D24 -> "s24" | D25 -> "s25" | D26 -> "s26" | D27 -> "s27" +| D28 -> "s28" | D29 -> "s29" | D30 -> "s30" | D31 -> "s31" + +let xreg oc r = output_string oc (xreg_name r) +let wreg oc r = output_string oc (wreg_name r) +let ireg oc (sz, r) = + output_string oc (match sz with X -> xreg_name r | W -> wreg_name r) + +let xreg0 oc r = output_string oc (xreg0_name r) +let wreg0 oc r = output_string oc (wreg0_name r) +let ireg0 oc (sz, r) = + output_string oc (match sz with X -> xreg0_name r | W -> wreg0_name r) + +let xregsp oc r = output_string oc (xregsp_name r) +let iregsp oc (sz, r) = + output_string oc (match sz with X -> xregsp_name r | W -> wregsp_name r) + +let dreg oc r = output_string oc (dreg_name r) +let sreg oc r = output_string oc (sreg_name r) +let freg oc (sz, r) = + output_string oc (match sz with D -> dreg_name r | S -> sreg_name r) + +let preg_asm oc ty = function + | IR r -> if ty = Tint then wreg oc r else xreg oc r + | FR r -> if ty = Tsingle then sreg oc r else dreg oc r + | _ -> assert false + +let preg_annot = function + | IR r -> xreg_name r + | FR r -> dreg_name r + | _ -> assert false + +(* Base-2 log of a Caml integer *) +let rec log2 n = + assert (n > 0); + if n = 1 then 0 else 1 + log2 (n lsr 1) + +(* System dependent printer functions *) + +module type SYSTEM = + sig + val comment: string + val raw_symbol: out_channel -> string -> unit + val symbol: out_channel -> P.t -> unit + val symbol_offset_high: out_channel -> P.t * Z.t -> unit + val symbol_offset_low: out_channel -> P.t * Z.t -> unit + val label: out_channel -> int -> unit + val label_high: out_channel -> int -> unit + val label_low: out_channel -> int -> unit + val load_symbol_address: out_channel -> ireg -> P.t -> unit + val name_of_section: section_name -> string + val print_fun_info: out_channel -> P.t -> unit + val print_var_info: out_channel -> P.t -> unit + val print_comm_decl: out_channel -> P.t -> Z.t -> int -> unit + val print_lcomm_decl: out_channel -> P.t -> Z.t -> int -> unit + end -module Target : TARGET = +module ELF_System : SYSTEM = struct - -(* Basic printing functions *) - let comment = "//" + let raw_symbol = output_string + let symbol = elf_symbol + let symbol_offset_high = elf_symbol_offset + let symbol_offset_low oc id_ofs = + fprintf oc "#:lo12:%a" elf_symbol_offset id_ofs - let symbol = elf_symbol - let symbol_offset = elf_symbol_offset - let label = elf_label + let label = elf_label + let label_high = elf_label + let label_low oc lbl = + fprintf oc "#:lo12:%a" elf_label lbl - let print_label oc lbl = label oc (transl_label lbl) - - let intsz oc (sz, n) = - match sz with X -> coqint64 oc n | W -> coqint oc n - - let xreg_name = function - | X0 -> "x0" | X1 -> "x1" | X2 -> "x2" | X3 -> "x3" - | X4 -> "x4" | X5 -> "x5" | X6 -> "x6" | X7 -> "x7" - | X8 -> "x8" | X9 -> "x9" | X10 -> "x10" | X11 -> "x11" - | X12 -> "x12" | X13 -> "x13" | X14 -> "x14" | X15 -> "x15" - | X16 -> "x16" | X17 -> "x17" | X18 -> "x18" | X19 -> "x19" - | X20 -> "x20" | X21 -> "x21" | X22 -> "x22" | X23 -> "x23" - | X24 -> "x24" | X25 -> "x25" | X26 -> "x26" | X27 -> "x27" - | X28 -> "x28" | X29 -> "x29" | X30 -> "x30" - - let wreg_name = function - | X0 -> "w0" | X1 -> "w1" | X2 -> "w2" | X3 -> "w3" - | X4 -> "w4" | X5 -> "w5" | X6 -> "w6" | X7 -> "w7" - | X8 -> "w8" | X9 -> "w9" | X10 -> "w10" | X11 -> "w11" - | X12 -> "w12" | X13 -> "w13" | X14 -> "w14" | X15 -> "w15" - | X16 -> "w16" | X17 -> "w17" | X18 -> "w18" | X19 -> "w19" - | X20 -> "w20" | X21 -> "w21" | X22 -> "w22" | X23 -> "w23" - | X24 -> "w24" | X25 -> "w25" | X26 -> "w26" | X27 -> "w27" - | X28 -> "w28" | X29 -> "w29" | X30 -> "w30" - - let xreg0_name = function RR0 r -> xreg_name r | XZR -> "xzr" - let wreg0_name = function RR0 r -> wreg_name r | XZR -> "wzr" - - let xregsp_name = function RR1 r -> xreg_name r | XSP -> "sp" - let wregsp_name = function RR1 r -> wreg_name r | XSP -> "wsp" - - let dreg_name = function - | D0 -> "d0" | D1 -> "d1" | D2 -> "d2" | D3 -> "d3" - | D4 -> "d4" | D5 -> "d5" | D6 -> "d6" | D7 -> "d7" - | D8 -> "d8" | D9 -> "d9" | D10 -> "d10" | D11 -> "d11" - | D12 -> "d12" | D13 -> "d13" | D14 -> "d14" | D15 -> "d15" - | D16 -> "d16" | D17 -> "d17" | D18 -> "d18" | D19 -> "d19" - | D20 -> "d20" | D21 -> "d21" | D22 -> "d22" | D23 -> "d23" - | D24 -> "d24" | D25 -> "d25" | D26 -> "d26" | D27 -> "d27" - | D28 -> "d28" | D29 -> "d29" | D30 -> "d30" | D31 -> "d31" - - let sreg_name = function - | D0 -> "s0" | D1 -> "s1" | D2 -> "s2" | D3 -> "s3" - | D4 -> "s4" | D5 -> "s5" | D6 -> "s6" | D7 -> "s7" - | D8 -> "s8" | D9 -> "s9" | D10 -> "s10" | D11 -> "s11" - | D12 -> "s12" | D13 -> "s13" | D14 -> "s14" | D15 -> "s15" - | D16 -> "s16" | D17 -> "s17" | D18 -> "s18" | D19 -> "s19" - | D20 -> "s20" | D21 -> "s21" | D22 -> "s22" | D23 -> "s23" - | D24 -> "s24" | D25 -> "s25" | D26 -> "s26" | D27 -> "s27" - | D28 -> "s28" | D29 -> "s29" | D30 -> "s30" | D31 -> "s31" - - let xreg oc r = output_string oc (xreg_name r) - let wreg oc r = output_string oc (wreg_name r) - let ireg oc (sz, r) = - output_string oc (match sz with X -> xreg_name r | W -> wreg_name r) - - let xreg0 oc r = output_string oc (xreg0_name r) - let wreg0 oc r = output_string oc (wreg0_name r) - let ireg0 oc (sz, r) = - output_string oc (match sz with X -> xreg0_name r | W -> wreg0_name r) - - let xregsp oc r = output_string oc (xregsp_name r) - let iregsp oc (sz, r) = - output_string oc (match sz with X -> xregsp_name r | W -> wregsp_name r) - - let dreg oc r = output_string oc (dreg_name r) - let sreg oc r = output_string oc (sreg_name r) - let freg oc (sz, r) = - output_string oc (match sz with D -> dreg_name r | S -> sreg_name r) - - let preg_asm oc ty = function - | IR r -> if ty = Tint then wreg oc r else xreg oc r - | FR r -> if ty = Tsingle then sreg oc r else dreg oc r - | _ -> assert false - - let preg_annot = function - | IR r -> xreg_name r - | FR r -> dreg_name r - | _ -> assert false - -(* Names of sections *) + let load_symbol_address oc rd id = + fprintf oc " adrp %a, :got:%a\n" xreg rd symbol id; + fprintf oc " ldr %a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id let name_of_section = function | Section_text -> ".text" @@ -151,6 +179,94 @@ module Target : TARGET = s (if wr then "w" else "") (if ex then "x" else "") | Section_ais_annotation -> sprintf ".section \"__compcert_ais_annotations\",\"\",@note" + let print_fun_info = elf_print_fun_info + let print_var_info = elf_print_var_info + + let print_comm_decl oc name sz al = + fprintf oc " .comm %a, %s, %d\n" symbol name (Z.to_string sz) al + + let print_lcomm_decl oc name sz al = + fprintf oc " .local %a\n" symbol name; + print_comm_decl oc name sz al + + end + +module MacOS_System : SYSTEM = + struct + let comment = ";" + + let raw_symbol oc s = + fprintf oc "_%s" s + + let symbol oc symb = + raw_symbol oc (extern_atom symb) + + let symbol_offset_gen kind oc (id, ofs) = + fprintf oc "%a@%s" symbol id kind; + let ofs = camlint64_of_ptrofs ofs in + if ofs <> 0L then fprintf oc " + %Ld" ofs + + let symbol_offset_high = symbol_offset_gen "PAGE" + let symbol_offset_low = symbol_offset_gen "PAGEOFF" + + let label oc lbl = + fprintf oc "L%d" lbl + + let label_high oc lbl = + fprintf oc "%a@PAGE" label lbl + let label_low oc lbl = + fprintf oc "%a@PAGEOFF" label lbl + + let load_symbol_address oc rd id = + fprintf oc " adrp %a, %a@GOTPAGE\n" xreg rd symbol id; + fprintf oc " ldr %a, [%a, %a@GOTPAGEOFF]\n" xreg rd xreg rd symbol id + + let name_of_section = function + | Section_text -> ".text" + | Section_data i | Section_small_data i -> + if i || (not !Clflags.option_fcommon) then ".data" else "COMM" + | Section_const i | Section_small_const i -> + if i || (not !Clflags.option_fcommon) then ".const" else "COMM" + | Section_string -> ".const" + | Section_literal -> ".const" + | Section_jumptable -> ".text" + | Section_user(s, wr, ex) -> + sprintf ".section \"%s\", %s, %s" + (if wr then "__DATA" else "__TEXT") s + (if ex then "regular, pure_instructions" else "regular") + | Section_debug_info _ -> ".section __DWARF,__debug_info,regular,debug" + | Section_debug_loc -> ".section __DWARF,__debug_loc,regular,debug" + | Section_debug_line _ -> ".section __DWARF,__debug_line,regular,debug" + | Section_debug_str -> ".section __DWARF,__debug_str,regular,debug" + | Section_debug_ranges -> ".section __DWARF,__debug_ranges,regular,debug" + | Section_debug_abbrev -> ".section __DWARF,__debug_abbrev,regular,debug" + | Section_ais_annotation -> assert false (* Not supported under MacOS *) + + let print_fun_info _ _ = () + let print_var_info _ _ = () + + let print_comm_decl oc name sz al = + fprintf oc " .comm %a, %s, %d\n" + symbol name (Z.to_string sz) (log2 al) + + let print_lcomm_decl oc name sz al = + fprintf oc " .lcomm %a, %s, %d\n" + symbol name (Z.to_string sz) (log2 al) + + end + +(* Module containing the printing functions *) + +module Target(System: SYSTEM): TARGET = + struct + include System + +(* Basic printing functions *) + + let print_label oc lbl = label oc (transl_label lbl) + +(* Names of sections *) + let section oc sec = fprintf oc " %s\n" (name_of_section sec) @@ -206,7 +322,7 @@ module Target : TARGET = | ADlsl(base, r, n) -> fprintf oc "[%a, %a, lsl #%a]" xregsp base xreg r coqint n | ADsxt(base, r, n) -> fprintf oc "[%a, %a, sxtw #%a]" xregsp base wreg r coqint n | ADuxt(base, r, n) -> fprintf oc "[%a, %a, uxtw #%a]" xregsp base wreg r coqint n - | ADadr(base, id, ofs) -> fprintf oc "[%a, #:lo12:%a]" xregsp base symbol_offset (id, ofs) + | ADadr(base, id, ofs) -> fprintf oc "[%a, %a]" xregsp base symbol_offset_low (id, ofs) | ADpostincr(base, n) -> fprintf oc "[%a], #%a" xregsp base coqint64 n (* Print a shifted operand *) @@ -312,9 +428,9 @@ module Target : TARGET = fprintf oc " movk %a, #%d, lsl #%d\n" ireg (sz, rd) (Z.to_int n) (Z.to_int pos) (* PC-relative addressing *) | Padrp(rd, id, ofs) -> - fprintf oc " adrp %a, %a\n" xreg rd symbol_offset (id, ofs) + fprintf oc " adrp %a, %a\n" xreg rd symbol_offset_high (id, ofs) | Paddadr(rd, r1, id, ofs) -> - fprintf oc " add %a, %a, #:lo12:%a\n" xreg rd xreg r1 symbol_offset (id, ofs) + fprintf oc " add %a, %a, %a\n" xreg rd xreg r1 symbol_offset_low (id, ofs) (* Bit-field operations *) | Psbfiz(sz, rd, r1, r, s) -> fprintf oc " sbfiz %a, %a, %a, %d\n" ireg (sz, rd) ireg (sz, r1) coqint r (Z.to_int s) @@ -413,8 +529,8 @@ module Target : TARGET = fprintf oc " fmov %a, #%.7f\n" dreg rd (Int64.float_of_bits d) else begin let lbl = label_literal64 d in - fprintf oc " adrp x16, %a\n" label lbl; - fprintf oc " ldr %a, [x16, #:lo12:%a] %s %.18g\n" dreg rd label lbl comment (Int64.float_of_bits d) + fprintf oc " adrp x16, %a\n" label_high lbl; + fprintf oc " ldr %a, [x16, %a] %s %.18g\n" dreg rd label_low lbl comment (Int64.float_of_bits d) end | Pfmovimms(rd, f) -> let d = camlint_of_coqint (Floats.Float32.to_bits f) in @@ -422,8 +538,8 @@ module Target : TARGET = fprintf oc " fmov %a, #%.7f\n" sreg rd (Int32.float_of_bits d) else begin let lbl = label_literal32 d in - fprintf oc " adrp x16, %a\n" label lbl; - fprintf oc " ldr %a, [x16, #:lo12:%a] %s %.18g\n" sreg rd label lbl comment (Int32.float_of_bits d) + fprintf oc " adrp x16, %a\n" label_high lbl; + fprintf oc " ldr %a, [x16, %a] %s %.18g\n" sreg rd label_low lbl comment (Int32.float_of_bits d) end | Pfmovi(D, rd, r1) -> fprintf oc " fmov %a, %a\n" dreg rd xreg0 r1 @@ -490,8 +606,7 @@ module Target : TARGET = | Plabel lbl -> fprintf oc "%a:\n" print_label lbl | Ploadsymbol(rd, id) -> - fprintf oc " adrp %a, :got:%a\n" xreg rd symbol id; - fprintf oc " ldr %a, [%a, #:got_lo12:%a]\n" xreg rd xreg rd symbol id + load_symbol_address oc rd id | Pcvtsw2x(rd, r1) -> fprintf oc " sxtw %a, %a\n" xreg rd wreg r1 | Pcvtuw2x(rd, r1) -> @@ -554,19 +669,12 @@ module Target : TARGET = jumptables := [] end - let print_fun_info = elf_print_fun_info - let print_optional_fun_info _ = () - let print_var_info = elf_print_var_info - let print_comm_symb oc sz name align = - if C2C.atom_is_static name then - fprintf oc " .local %a\n" symbol name; - fprintf oc " .comm %a, %s, %d\n" - symbol name - (Z.to_string sz) - align + if C2C.atom_is_static name + then print_lcomm_decl oc name sz align + else print_comm_decl oc name sz align let print_instructions oc fn = current_function_sig := fn.fn_sig; @@ -595,4 +703,10 @@ module Target : TARGET = end let sel_target () = - (module Target:TARGET) + let module S = + (val (match Configuration.system with + | "linux" -> (module ELF_System : SYSTEM) + | "macosx" -> (module MacOS_System : SYSTEM) + | _ -> invalid_arg ("System " ^ Configuration.system ^ " not supported")) + : SYSTEM) in + (module Target(S) : TARGET) diff --git a/aarch64/extractionMachdep.v b/aarch64/extractionMachdep.v index 5f26dc28..ee0e3631 100644 --- a/aarch64/extractionMachdep.v +++ b/aarch64/extractionMachdep.v @@ -15,13 +15,27 @@ (* Additional extraction directives specific to the AArch64 port *) -Require Archi Asm. +Require Archi Asm Asmgen SelectOp. (* Archi *) -Extract Constant Archi.pic_code => "fun () -> false". (* for the time being *) +Extract Constant Archi.abi => + "match Configuration.abi with + | ""apple"" -> Apple + | _ -> AAPCS64". + +(* SelectOp *) + +Extract Constant SelectOp.symbol_is_relocatable => + "match Configuration.system with + | ""macosx"" -> C2C.atom_is_extern + | _ -> (fun _ -> false)". (* Asm *) + Extract Constant Asm.symbol_low => "fun _ _ _ -> assert false". Extract Constant Asm.symbol_high => "fun _ _ _ -> assert false". + +(* Asmgen *) + Extract Constant Asmgen.symbol_is_aligned => "C2C.atom_is_aligned". -- cgit