From 88567ce6d247562a9fa9151eaa32f7ad63ea37c0 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Mon, 11 Jan 2021 18:04:25 +0100 Subject: RISC-V: fix FP calling conventions This is a follow-up to e81d015e3. In the RISC-V ABI, FP arguments to functions are passed in integer registers (or pairs of integer registers) in two cases: 1- the FP argument is a variadic argument 2- the FP argument is a fixed argument but all 8 FP registers reserved for parameter passing have been used already. The previous implementation handled only case 1, with some problems. This commit implements both 1 and 2. To this end, 8 extra FP caller-save registers are used to hold the values of the FP arguments that must be passed in integer registers. Fixup code moves these FP registers to integer registers / register pairs. Symmetrically, at function entry, the integer registers / register pairs are moved back to the FP registers. 8 extra FP registers is enough because there are only 8 integer registers used for parameter passing, so at most 8 FP arguments may need to be moved to integer registers. --- riscV/Asm.v | 4 + riscV/Asmexpand.ml | 194 +++++++++++++++++++++++++++++-------------------- riscV/Conventions1.v | 86 ++++++++++++---------- riscV/TargetPrinter.ml | 4 + 4 files changed, 171 insertions(+), 117 deletions(-) (limited to 'riscV') diff --git a/riscV/Asm.v b/riscV/Asm.v index 30b128ec..7e1b1fc8 100644 --- a/riscV/Asm.v +++ b/riscV/Asm.v @@ -256,7 +256,9 @@ Inductive instruction : Type := (* floating point register move *) | Pfmv (rd: freg) (rs: freg) (**r move *) | Pfmvxs (rd: ireg) (rs: freg) (**r move FP single to integer register *) + | Pfmvsx (rd: freg) (rs: ireg) (**r move integer register to FP single *) | Pfmvxd (rd: ireg) (rs: freg) (**r move FP double to integer register *) + | Pfmvdx (rd: freg) (rs: ireg) (**r move integer register to FP double *) (* 32-bit (single-precision) floating point *) | Pfls (rd: freg) (ra: ireg) (ofs: offset) (**r load float *) @@ -969,7 +971,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pfence | Pfmvxs _ _ + | Pfmvsx _ _ | Pfmvxd _ _ + | Pfmvdx _ _ | Pfmins _ _ _ | Pfmaxs _ _ _ diff --git a/riscV/Asmexpand.ml b/riscV/Asmexpand.ml index da97c4a8..e8c142e9 100644 --- a/riscV/Asmexpand.ml +++ b/riscV/Asmexpand.ml @@ -24,6 +24,7 @@ open Asmexpandaux open AST open Camlcoq open! Integers +open Locations exception Error of string @@ -50,6 +51,86 @@ let expand_addptrofs dst src n = let expand_storeind_ptr src base ofs = List.iter emit (Asmgen.storeind_ptr src base ofs []) +(* Fix-up code around function calls and function entry. + Some floating-point arguments residing in FP registers need to be + moved to integer registers or register pairs. + Symmetrically, some floating-point parameter passed in integer + registers or register pairs need to be moved to FP registers. *) + +let int_param_regs = [| X10; X11; X12; X13; X14; X15; X16; X17 |] + +let move_single_arg fr i = + emit (Pfmvxs(int_param_regs.(i), fr)) + +let move_double_arg fr i = + if Archi.ptr64 then begin + emit (Pfmvxd(int_param_regs.(i), fr)) + end else begin + emit (Paddiw(X2, X X2, Integers.Int.neg _16)); + emit (Pfsd(fr, X2, Ofsimm _0)); + emit (Plw(int_param_regs.(i), X2, Ofsimm _0)); + if i < 7 then begin + emit (Plw(int_param_regs.(i + 1), X2, Ofsimm _4)) + end else begin + emit (Plw(X31, X2, Ofsimm _4)); + emit (Psw(X31, X2, Ofsimm _16)) + end; + emit (Paddiw(X2, X X2, _16)) + end + +let move_single_param fr i = + emit (Pfmvsx(fr, int_param_regs.(i))) + +let move_double_param fr i = + if Archi.ptr64 then begin + emit (Pfmvdx(fr, int_param_regs.(i))) + end else begin + emit (Paddiw(X2, X X2, Integers.Int.neg _16)); + emit (Psw(int_param_regs.(i), X2, Ofsimm _0)); + if i < 7 then begin + emit (Psw(int_param_regs.(i + 1), X2, Ofsimm _4)) + end else begin + emit (Plw(X31, X2, Ofsimm _16)); + emit (Psw(X31, X2, Ofsimm _4)) + end; + emit (Pfld(fr, X2, Ofsimm _0)); + emit (Paddiw(X2, X X2, _16)) + end + +let float_extra_index = function + | Machregs.F0 -> Some (F0, 0) + | Machregs.F1 -> Some (F1, 1) + | Machregs.F2 -> Some (F2, 2) + | Machregs.F3 -> Some (F3, 3) + | Machregs.F4 -> Some (F4, 4) + | Machregs.F5 -> Some (F5, 5) + | Machregs.F6 -> Some (F6, 6) + | Machregs.F7 -> Some (F7, 7) + | _ -> None + +let fixup_gen single double sg = + let fixup ty loc = + match ty, loc with + | Tsingle, One (R r) -> + begin match float_extra_index r with + | Some(r, i) -> single r i + | None -> () + end + | (Tfloat | Tany64), One (R r) -> + begin match float_extra_index r with + | Some(r, i) -> double r i + | None -> () + end + | _, _ -> () + in + List.iter2 fixup sg.sig_args (Conventions1.loc_arguments sg) + +let fixup_call sg = + fixup_gen move_single_arg move_double_arg sg + +let fixup_function_entry sg = + fixup_gen move_single_param move_double_param sg + (* Built-ins. They come in two flavors: - annotation statements: take their arguments in registers or stack locations; generate no code; @@ -57,66 +138,6 @@ let expand_storeind_ptr src base ofs = registers. *) -(* Fix-up code around calls to variadic functions. - Floating-point variadic arguments residing in FP registers need to - be moved to integer registers. *) - -let int_param_regs = [| X10; X11; X12; X13; X14; X15; X16; X17 |] -let float_param_regs = [| F10; F11; F12; F13; F14; F15; F16; F17 |] - -let rec fixup_variadic_call fixed ri rf tyl = - if ri < 8 then - match tyl with - | [] -> - () - | (Tint | Tany32) :: tyl -> - fixup_variadic_call (fixed - 1) (ri + 1) rf tyl - | Tsingle :: tyl -> - if fixed > 0 then - fixup_variadic_call (fixed - 1) ri (rf + 1) tyl - else begin - let rs = float_param_regs.(rf) - and rd = int_param_regs.(ri) in - emit (Pfmvxs(rd, rs)); - fixup_variadic_call (fixed - 1) (ri + 1) (rf + 1) tyl - end - | Tlong :: tyl -> - let ri' = if Archi.ptr64 then ri + 1 else align ri 2 + 2 in - fixup_variadic_call (fixed - 1) ri' rf tyl - | (Tfloat | Tany64) :: tyl -> - if Archi.ptr64 then begin - if fixed > 0 then - fixup_variadic_call (fixed - 1) ri (rf + 1) tyl - else begin - let rs = float_param_regs.(rf) - and rd = int_param_regs.(ri) in - emit (Pfmvxd(rd, rs)); - fixup_variadic_call (fixed - 1) (ri + 1) (rf + 1) tyl - end - end else begin - let ri = align ri 2 in - if ri < 8 then begin - if fixed > 0 then - fixup_variadic_call (fixed - 1) ri (rf + 1) tyl - else begin - let rs = float_param_regs.(rf) - and rd1 = int_param_regs.(ri) - and rd2 = int_param_regs.(ri + 1) in - emit (Paddiw(X2, X X2, Integers.Int.neg _16)); - emit (Pfsd(rs, X2, Ofsimm _0)); - emit (Plw(rd1, X2, Ofsimm _0)); - emit (Plw(rd2, X2, Ofsimm _4)); - emit (Paddiw(X2, X X2, _16)); - fixup_variadic_call (fixed - 1) (ri + 2) (rf + 1) tyl - end - end - end - -let fixup_call sg = - match sg.sig_cc.cc_vararg with - | None -> () - | Some fixed -> fixup_variadic_call (Z.to_int fixed) 0 0 sg.sig_args - (* Handling of annotations *) let expand_annot_val kind txt targ args res = @@ -323,37 +344,49 @@ let expand_builtin_vstore chunk args = (* Number of integer registers, FP registers, and stack words used to pass the (fixed) arguments to a function. *) -let rec args_size ri rf ofs = function +let arg_int_size ri rf ofs k = + if ri < 8 + then k (ri + 1) rf ofs + else k ri rf (ofs + 1) + +let arg_single_size ri rf ofs k = + if rf < 8 + then k ri (rf + 1) ofs + else arg_int_size ri rf ofs k + +let arg_long_size ri rf ofs k = + if Archi.ptr64 then + if ri < 8 + then k (ri + 1) rf ofs + else k ri rf (ofs + 1) + else + if ri < 7 then k (ri + 2) rf ofs + else if ri = 7 then k (ri + 1) rf (ofs + 1) + else k ri rf (align ofs 2 + 2) + +let arg_double_size ri rf ofs k = + if rf < 8 + then k ri (rf + 1) ofs + else arg_long_size ri rf ofs k + +let rec args_size l ri rf ofs = + match l with | [] -> (ri, rf, ofs) | (Tint | Tany32) :: l -> - if ri < 8 - then args_size (ri + 1) rf ofs l - else args_size ri rf (ofs + 1) l + arg_int_size ri rf ofs (args_size l) | Tsingle :: l -> - if rf < 8 - then args_size ri (rf + 1) ofs l - else args_size ri rf (ofs + 1) l + arg_single_size ri rf ofs (args_size l) | Tlong :: l -> - if Archi.ptr64 then - if ri < 8 - then args_size (ri + 1) rf ofs l - else args_size ri rf (ofs + 1) l - else - if ri < 7 then args_size (ri + 2) rf ofs l - else if ri = 7 then args_size (ri + 1) rf (ofs + 1) l - else args_size ri rf (align ofs 2 + 2) l + arg_long_size ri rf ofs (args_size l) | (Tfloat | Tany64) :: l -> - if rf < 8 - then args_size ri (rf + 1) ofs l - else let ofs' = if Archi.ptr64 then ofs + 1 else align ofs 2 + 2 in - args_size ri rf ofs' l + arg_double_size ri rf ofs (args_size l) (* Size in words of the arguments to a function. This includes both arguments passed in integer registers and arguments passed on stack, but not arguments passed in FP registers. *) let arguments_size sg = - let (ri, _, ofs) = args_size 0 0 0 sg.sig_args in + let (ri, _, ofs) = args_size sg.sig_args 0 0 0 in ri + ofs let save_arguments first_reg base_ofs = @@ -744,6 +777,7 @@ let preg_to_dwarf = function let expand_function id fn = try set_current_function fn; + fixup_function_entry fn.fn_sig; expand id (* sp= *) 2 preg_to_dwarf expand_instruction fn.fn_code; Errors.OK (get_current_function ()) with Error s -> diff --git a/riscV/Conventions1.v b/riscV/Conventions1.v index 5dd50a4c..6cb06c61 100644 --- a/riscV/Conventions1.v +++ b/riscV/Conventions1.v @@ -176,20 +176,21 @@ Qed. - RV64: pass the first 8 integer arguments in integer registers (a1...a8: int_param_regs), the first 8 FP arguments in FP registers - (fa1...fa8: float_param_regs), and the remaining arguments on the - stack, in 8-byte slots. + (fa1...fa8: float_param_regs) then in integer registers (a1...a8), + and the remaining arguments on the stack, in 8-byte slots. -- RV32: same, but arguments of 64-bit integer type are passed in two - consecutive integer registers (a(i), a(i+1)) or in a(8) and on a - 32-bit word on the stack. Stack-allocated arguments are aligned to - their natural alignment. +- RV32: same, but arguments of size 64 bits that must be passed in + integer registers are passed in two consecutive integer registers + (a(i), a(i+1)), or in a(8) and on a 32-bit word on the stack. + Stack-allocated arguments are aligned to their natural alignment. For variadic functions, the fixed arguments are passed as described above, then the variadic arguments receive special treatment: -- RV64: all variadic arguments, including FP arguments, - are passed in the remaining integer registers (a1...a8), - then on the stack, in 8-byte slots. +- RV64: FP registers are not used for passing variadic arguments. + All variadic arguments, including FP arguments, are passed in the + remaining integer registers (a1...a8), then on the stack, in 8-byte + slots. - RV32: likewise, but arguments of 64-bit types (integers as well as floats) are passed in two consecutive aligned integer registers @@ -207,6 +208,15 @@ Definition int_param_regs := Definition float_param_regs := F10 :: F11 :: F12 :: F13 :: F14 :: F15 :: F16 :: F17 :: nil. +(** To evaluate FP arguments that must be passed in integer registers, + we can use any FP caller-save register that is not already used to pass + a fixed FP argument. Since there are 8 integer registers for argument + passing, we need at most 8 extra more FP registers for these FP + arguments. *) + +Definition float_extra_param_regs := + F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: nil. + Definition int_arg (ri rf ofs: Z) (ty: typ) (rec: Z -> Z -> Z -> list (rpair loc)) := match list_nth_z int_param_regs ri with @@ -220,26 +230,27 @@ Definition int_arg (ri rf ofs: Z) (ty: typ) Definition float_arg (va: bool) (ri rf ofs: Z) (ty: typ) (rec: Z -> Z -> Z -> list (rpair loc)) := - match list_nth_z float_param_regs rf with + match list_nth_z (if va then nil else float_param_regs) rf with | Some r => - if va then - (let ri' := (* reserve 1 or 2 aligned integer registers *) - if Archi.ptr64 || zeq (typesize ty) 1 then ri + 1 else align ri 2 + 2 in - if zle ri' 8 then - (* we have enough integer registers, put argument in FP reg - and fixup code will put it in one or two integer regs *) - One (R r) :: rec ri' (rf + 1) ofs - else - (* we are out of integer registers, pass argument on stack *) + One (R r) :: rec ri (rf + 1) ofs + | None => + (* We are out of FP registers, or cannot use them because vararg, + so try to put the argument in an extra FP register while + reserving an integer register or register pair into which + fixup code will move the extra FP register. *) + let regpair := negb Archi.ptr64 && zeq (typesize ty) 2 in + let ri' := if va && regpair then align ri 2 else ri in + match list_nth_z float_extra_param_regs ri' with + | Some r => + let ri'' := ri' + (if Archi.ptr64 then 1 else typesize ty) in + let ofs'' := if regpair && zeq ri' 7 then ofs + 1 else ofs in + One (R r) :: rec ri'' rf ofs'' + | None => + (* We are out of integer registers, pass argument on stack *) let ofs := align ofs (typesize ty) in One(S Outgoing ofs ty) - :: rec ri' rf (ofs + (if Archi.ptr64 then 2 else typesize ty))) - else - One (R r) :: rec ri (rf + 1) ofs - | None => - let ofs := align ofs (typesize ty) in - One(S Outgoing ofs ty) - :: rec ri rf (ofs + (if Archi.ptr64 then 2 else typesize ty)) + :: rec ri' rf (ofs + (if Archi.ptr64 then 2 else typesize ty)) + end end. Definition split_long_arg (va: bool) (ri rf ofs: Z) @@ -317,6 +328,8 @@ Proof. { decide_goal. } assert (CSF: forall r, In r float_param_regs -> is_callee_save r = false). { decide_goal. } + assert (CSFX: forall r, In r float_extra_param_regs -> is_callee_save r = false). + { decide_goal. } assert (AL: forall ofs ty, ofs >= 0 -> align ofs (typesize ty) >= 0). { intros. assert (ofs <= align ofs (typesize ty)) by (apply align_le; apply typesize_pos). @@ -341,18 +354,17 @@ Proof. assert (B: forall va ri rf ofs ty f, OKF f -> ofs >= 0 -> OK (float_arg va ri rf ofs ty f)). { intros until f; intros OF OO; red; unfold float_arg; intros. - destruct (list_nth_z float_param_regs rf) as [r|] eqn:NTH. - - set (ri' := if Archi.ptr64 || zeq (typesize ty) 1 then ri + 1 else align ri 2 + 2) in *. - destruct va; [destruct (zle ri' 8)|idtac]; destruct H. - + subst p; simpl. apply CSF. eapply list_nth_z_in; eauto. - + eapply OF; eauto. - + subst p; repeat split; auto. - + eapply OF; [idtac|eauto]. generalize (AL ofs ty OO) (SKK ty); lia. - + subst p; simpl. apply CSF. eapply list_nth_z_in; eauto. - + eapply OF; eauto. + destruct (list_nth_z (if va then nil else float_param_regs) rf) as [r|] eqn:NTH. - destruct H. - + subst p; repeat split; auto. - + eapply OF; [idtac|eauto]. generalize (AL ofs ty OO) (SKK ty); lia. + + subst p; simpl. apply CSF. destruct va. simpl in NTH; discriminate. eapply list_nth_z_in; eauto. + + eapply OF; eauto. + - set (regpair := negb Archi.ptr64 && zeq (typesize ty) 2) in *. + set (ri' := if va && regpair then align ri 2 else ri) in *. + destruct (list_nth_z float_extra_param_regs ri') as [r|] eqn:NTH'; destruct H. + + subst p; simpl. apply CSFX. eapply list_nth_z_in; eauto. + + eapply OF; [|eauto]. destruct (regpair && zeq ri' 7); lia. + + subst p; simpl. auto. + + eapply OF; [|eauto]. generalize (AL ofs ty OO) (SKK ty); lia. } assert (C: forall va ri rf ofs f, OKF f -> ofs >= 0 -> OK (split_long_arg va ri rf ofs f)). diff --git a/riscV/TargetPrinter.ml b/riscV/TargetPrinter.ml index 64bcea4c..5cd47b46 100644 --- a/riscV/TargetPrinter.ml +++ b/riscV/TargetPrinter.ml @@ -392,8 +392,12 @@ module Target : TARGET = fprintf oc " fmv.d %a, %a\n" freg fd freg fs | Pfmvxs (rd,fs) -> fprintf oc " fmv.x.s %a, %a\n" ireg rd freg fs + | Pfmvsx (fd,rs) -> + fprintf oc " fmv.s.x %a, %a\n" freg fd ireg rs | Pfmvxd (rd,fs) -> fprintf oc " fmv.x.d %a, %a\n" ireg rd freg fs + | Pfmvdx (fd,rs) -> + fprintf oc " fmv.d.x %a, %a\n" freg fd ireg rs (* 32-bit (single-precision) floating point *) | Pfls (fd, ra, ofs) -> -- cgit