From 9f6e2aac73ca1f863d236f86f446b0894c8ebcef Mon Sep 17 00:00:00 2001 From: Michael Schmidt Date: Wed, 2 Aug 2017 14:26:16 +0200 Subject: Improve stack offset addressing Functions which require large amounts of stack for spilling and/or arguments for function calls lead to stackframe offsets that exceed the 12bit limit of immediate constants in ARM instructions. This patch fixes the stack-offsets in the function prolog/epilog. --- arm/Asm.v | 23 ++++++++++++++++--- arm/Asmexpand.ml | 62 ++++++++++++++++++++++++++++++++++++++-------------- arm/Asmgen.v | 2 +- arm/Asmgenproof.v | 4 ++-- arm/TargetPrinter.ml | 17 +++++--------- 5 files changed, 73 insertions(+), 35 deletions(-) (limited to 'arm') diff --git a/arm/Asm.v b/arm/Asm.v index 08234975..6ba09a8f 100644 --- a/arm/Asm.v +++ b/arm/Asm.v @@ -199,15 +199,17 @@ Inductive instruction : Type := | Pfsts: freg -> ireg -> int -> instruction (**r float32 store *) (* Pseudo-instructions *) - | Pallocframe: Z -> ptrofs -> instruction (**r allocate new stack frame *) - | Pfreeframe: Z -> ptrofs -> instruction (**r deallocate stack frame and restore previous frame *) - | Plabel: label -> instruction (**r define a code label *) + | Pallocframe: Z -> ptrofs -> instruction (**r allocate new stack frame *) + | Pfreeframe: Z -> ptrofs -> instruction (**r deallocate stack frame and restore previous frame *) + | Psavelr: ptrofs -> instruction (**r store link register in allocated stack frame *) + | Plabel: label -> instruction (**r define a code label *) | Ploadsymbol: ireg -> ident -> ptrofs -> instruction (**r load the address of a symbol *) | Pmovite: testcond -> ireg -> shift_op -> shift_op -> instruction (**r integer conditional move *) | Pbtbl: ireg -> list label -> instruction (**r N-way branch through a jump table *) | Pbuiltin: external_function -> list (builtin_arg preg) -> builtin_res preg -> instruction (**r built-in function (pseudo) *) | Padc: ireg -> ireg -> shift_op -> instruction (**r add with carry *) | Pcfi_adjust: int -> instruction (**r .cfi_adjust debug directive *) + | Pcfi_rel_offset: int -> instruction (**r .cfi_rel_offset debug directive *) | Pclz: ireg -> ireg -> instruction (**r count leading zeros. *) | Pfsqrt: freg -> freg -> instruction (**r floating-point square root. *) | Prev: ireg -> ireg -> instruction (**r reverse bytes and reverse bits. *) @@ -268,6 +270,18 @@ lbl: .word symbol >> Again, our memory model cannot comprehend that this operation frees (logically) the current stack frame. +- [Psavelr pos]: this pseudo-instruction stores the link register in the + stackframe at the specified position. It handles out-of-range offsets, + i.e. the generated code for in-range offsets is: +<< + str lr, [sp, #pos] +>> + and for out-of-range offsets: +<< + add sp, sp, #pos (* code to expand the immediate *) + str lr, [sp] + sub sp, sp, #pos (* code to expand the immediate *) +>> - [Pbtbl reg table]: this is a N-way branch, implemented via a jump table as follows: << @@ -736,6 +750,8 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | _ => Stuck end end + | Psavelr pos => + exec_store Mint32 (Val.offset_ptr rs#IR13 pos) IR14 rs m | Plabel lbl => Next (nextinstr rs) m | Ploadsymbol r1 lbl ofs => @@ -763,6 +779,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Ppush _ | Padc _ _ _ | Pcfi_adjust _ + | Pcfi_rel_offset _ | Pclz _ _ | Prev _ _ | Prev16 _ _ diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml index a32b0e8b..1a473e03 100644 --- a/arm/Asmexpand.ml +++ b/arm/Asmexpand.ml @@ -46,18 +46,22 @@ let expand_movimm dst n = (fun n -> emit (Porr (dst,dst, SOimm n))) tl let expand_subimm dst src n = - match Asmgen.decompose_int n with - | [] -> assert false - | hd::tl -> - emit (Psub(dst,src,SOimm hd)); - List.iter (fun n -> emit (Psub (dst,dst,SOimm n))) tl + if dst <> src || n <> _0 then begin + match Asmgen.decompose_int n with + | [] -> assert false + | hd::tl -> + emit (Psub(dst,src,SOimm hd)); + List.iter (fun n -> emit (Psub (dst,dst,SOimm n))) tl + end let expand_addimm dst src n = - match Asmgen.decompose_int n with - | [] -> assert false - | hd::tl -> - emit (Padd (dst,src,SOimm hd)); - List.iter (fun n -> emit (Padd (dst,dst,SOimm n))) tl + if dst <> src || n <> _0 then begin + match Asmgen.decompose_int n with + | [] -> assert false + | hd::tl -> + emit (Padd (dst,src,SOimm hd)); + List.iter (fun n -> emit (Padd (dst,dst,SOimm n))) tl + end let expand_int64_arith conflict rl fn = if conflict then @@ -410,12 +414,22 @@ let expand_instruction instr = | Pallocframe (sz, ofs) -> emit (Pmov (IR12,SOreg IR13)); if (is_current_function_variadic ()) then begin - emit (Ppush [IR0;IR1;IR2;IR3]); - emit (Pcfi_adjust _16); - end; - expand_subimm IR13 IR13 sz; - emit (Pcfi_adjust sz); - emit (Pstr (IR12,IR13,SOimm ofs)); + emit (Ppush [IR0;IR1;IR2;IR3]); + emit (Pcfi_adjust _16); + end; + let sz' = camlint_of_coqint sz in + let ofs' = camlint_of_coqint ofs in + if ofs' >= 4096l && sz' >= ofs' then begin + expand_subimm IR13 IR13 (coqint_of_camlint (Int32.sub sz' (Int32.add ofs' 4l))); + emit (Ppush [IR13]); + expand_subimm IR13 IR13 ofs; + emit (Pcfi_adjust sz); + end else begin + assert (ofs' < 4096l); + expand_subimm IR13 IR13 sz; + emit (Pcfi_adjust sz); + emit (Pstr (IR12,IR13,SOimm ofs)); + end; PrintAsmaux.current_function_stacksize := camlint_of_coqint sz | Pfreeframe (sz, ofs) -> let sz = @@ -424,7 +438,21 @@ let expand_instruction instr = else sz in if Asmgen.is_immed_arith sz then emit (Padd (IR13,IR13,SOimm sz)) - else emit (Pldr (IR13,IR13,SOimm ofs)) + else begin + if camlint_of_coqint ofs >= 4096l then begin + expand_addimm IR13 IR13 ofs; + emit (Pldr (IR13,IR13,SOimm _0)) + end else + emit (Pldr (IR13,IR13,SOimm ofs)); + end + | Psavelr ofs -> + if camlint_of_coqint ofs >= 4096l then begin + expand_addimm IR13 IR13 ofs; + emit (Pstr (IR14,IR13,SOimm _0)); + expand_subimm IR13 IR13 ofs + end else + emit (Pstr (IR14,IR13,SOimm ofs)); + emit (Pcfi_rel_offset ofs) | Pbuiltin (ef,args,res) -> begin match ef with | EF_builtin (name,sg) -> diff --git a/arm/Asmgen.v b/arm/Asmgen.v index e7a3b4fa..125e95ff 100644 --- a/arm/Asmgen.v +++ b/arm/Asmgen.v @@ -790,7 +790,7 @@ Definition transl_function (f: Mach.function) := do c <- transl_code f f.(Mach.fn_code) true; OK (mkfunction f.(Mach.fn_sig) (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) :: - Pstr IR14 IR13 (SOimm (Ptrofs.to_int f.(fn_retaddr_ofs))) :: c)). + Psavelr f.(fn_retaddr_ofs) :: c)). Definition transf_function (f: Mach.function) : res Asm.function := do tf <- transl_function f; diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v index 09c20d5c..f306f43a 100644 --- a/arm/Asmgenproof.v +++ b/arm/Asmgenproof.v @@ -854,7 +854,7 @@ Opaque loadind. generalize EQ; intros EQ'. monadInv EQ'. destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x0))); inversion EQ1. clear EQ1. subst x0. monadInv EQ0. - set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) :: Pstr IR14 IR13 (SOimm (Ptrofs.to_int (fn_retaddr_ofs f))) :: x0) in *. + set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) :: Psavelr (fn_retaddr_ofs f) :: x0) in *. set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *. unfold store_stack in *. exploit Mem.alloc_extends. eauto. eauto. apply Zle_refl. apply Zle_refl. @@ -877,7 +877,7 @@ Opaque loadind. simpl. auto. simpl. unfold exec_store. change (rs2 IR14) with (rs0 IR14). rewrite Ptrofs.add_zero_l. simpl. unfold Tptr, chunk_of_type, Archi.ptr64 in P. simpl in P. - rewrite Ptrofs.add_zero_l in P. rewrite ATLR. rewrite Ptrofs.of_int_to_int by auto. + rewrite Ptrofs.add_zero_l in P. rewrite ATLR. rewrite P. auto. auto. auto. left; exists (State rs3 m3'); split. eapply exec_straight_steps_1; eauto. omega. constructor. diff --git a/arm/TargetPrinter.ml b/arm/TargetPrinter.ml index 60f52efd..02c348a8 100644 --- a/arm/TargetPrinter.ml +++ b/arm/TargetPrinter.ml @@ -539,23 +539,13 @@ struct | Psbc (r1,r2,sa) -> fprintf oc " sbc %a, %a, %a\n" ireg r1 ireg r2 shift_op sa; 1 | Pstr(r1, r2, sa) | Pstr_a(r1, r2, sa) -> - fprintf oc " str %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; - begin match r1, r2, sa with - | IR14, IR13, SOimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n) - | _ -> () - end; - 1 + fprintf oc " str %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pstrb(r1, r2, sa) -> fprintf oc " strb %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pstrh(r1, r2, sa) -> fprintf oc " strh %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pstr_p(r1, r2, sa) -> - fprintf oc " str %a, [%a], %a\n" ireg r1 ireg r2 shift_op sa; - begin match r1, r2, sa with - | IR14, IR13, SOimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n) - | _ -> () - end; - 1 + fprintf oc " str %a, [%a], %a\n" ireg r1 ireg r2 shift_op sa; 1 | Pstrb_p(r1, r2, sa) -> fprintf oc " strb %a, [%a], %a\n" ireg r1 ireg r2 shift_op sa; 1 | Pstrh_p(r1, r2, sa) -> @@ -703,6 +693,8 @@ struct assert false | Pfreeframe(sz, ofs) -> assert false + | Psavelr(ofs) -> + assert false | Plabel lbl -> fprintf oc "%a:\n" print_label lbl; 0 | Ploadsymbol(r1, id, ofs) -> @@ -751,6 +743,7 @@ struct assert false end | Pcfi_adjust sz -> cfi_adjust oc (camlint_of_coqint sz); 0 + | Pcfi_rel_offset ofs -> cfi_rel_offset oc "lr" (camlint_of_coqint ofs); 0 let no_fallthrough = function | Pb _ -> true -- cgit