aboutsummaryrefslogtreecommitdiffstats
path: root/arm
diff options
context:
space:
mode:
authorMichael Schmidt <github@mschmidt.me>2017-08-02 14:26:16 +0200
committerMichael Schmidt <github@mschmidt.me>2017-08-02 14:26:16 +0200
commit9f6e2aac73ca1f863d236f86f446b0894c8ebcef (patch)
treed87b9b562eb83f7ea36b7deb97289839f59b2331 /arm
parent0aa08f5d521401644b5c839239de97f587e0a217 (diff)
downloadcompcert-kvx-9f6e2aac73ca1f863d236f86f446b0894c8ebcef.tar.gz
compcert-kvx-9f6e2aac73ca1f863d236f86f446b0894c8ebcef.zip
Improve stack offset addressing
Functions which require large amounts of stack for spilling and/or arguments for function calls lead to stackframe offsets that exceed the 12bit limit of immediate constants in ARM instructions. This patch fixes the stack-offsets in the function prolog/epilog.
Diffstat (limited to 'arm')
-rw-r--r--arm/Asm.v23
-rw-r--r--arm/Asmexpand.ml62
-rw-r--r--arm/Asmgen.v2
-rw-r--r--arm/Asmgenproof.v4
-rw-r--r--arm/TargetPrinter.ml17
5 files changed, 73 insertions, 35 deletions
diff --git a/arm/Asm.v b/arm/Asm.v
index 08234975..6ba09a8f 100644
--- a/arm/Asm.v
+++ b/arm/Asm.v
@@ -199,15 +199,17 @@ Inductive instruction : Type :=
| Pfsts: freg -> ireg -> int -> instruction (**r float32 store *)
(* Pseudo-instructions *)
- | Pallocframe: Z -> ptrofs -> instruction (**r allocate new stack frame *)
- | Pfreeframe: Z -> ptrofs -> instruction (**r deallocate stack frame and restore previous frame *)
- | Plabel: label -> instruction (**r define a code label *)
+ | Pallocframe: Z -> ptrofs -> instruction (**r allocate new stack frame *)
+ | Pfreeframe: Z -> ptrofs -> instruction (**r deallocate stack frame and restore previous frame *)
+ | Psavelr: ptrofs -> instruction (**r store link register in allocated stack frame *)
+ | Plabel: label -> instruction (**r define a code label *)
| Ploadsymbol: ireg -> ident -> ptrofs -> instruction (**r load the address of a symbol *)
| Pmovite: testcond -> ireg -> shift_op -> shift_op -> instruction (**r integer conditional move *)
| Pbtbl: ireg -> list label -> instruction (**r N-way branch through a jump table *)
| Pbuiltin: external_function -> list (builtin_arg preg) -> builtin_res preg -> instruction (**r built-in function (pseudo) *)
| Padc: ireg -> ireg -> shift_op -> instruction (**r add with carry *)
| Pcfi_adjust: int -> instruction (**r .cfi_adjust debug directive *)
+ | Pcfi_rel_offset: int -> instruction (**r .cfi_rel_offset debug directive *)
| Pclz: ireg -> ireg -> instruction (**r count leading zeros. *)
| Pfsqrt: freg -> freg -> instruction (**r floating-point square root. *)
| Prev: ireg -> ireg -> instruction (**r reverse bytes and reverse bits. *)
@@ -268,6 +270,18 @@ lbl: .word symbol
>>
Again, our memory model cannot comprehend that this operation
frees (logically) the current stack frame.
+- [Psavelr pos]: this pseudo-instruction stores the link register in the
+ stackframe at the specified position. It handles out-of-range offsets,
+ i.e. the generated code for in-range offsets is:
+<<
+ str lr, [sp, #pos]
+>>
+ and for out-of-range offsets:
+<<
+ add sp, sp, #pos (* code to expand the immediate *)
+ str lr, [sp]
+ sub sp, sp, #pos (* code to expand the immediate *)
+>>
- [Pbtbl reg table]: this is a N-way branch, implemented via a jump table
as follows:
<<
@@ -736,6 +750,8 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| _ => Stuck
end
end
+ | Psavelr pos =>
+ exec_store Mint32 (Val.offset_ptr rs#IR13 pos) IR14 rs m
| Plabel lbl =>
Next (nextinstr rs) m
| Ploadsymbol r1 lbl ofs =>
@@ -763,6 +779,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Ppush _
| Padc _ _ _
| Pcfi_adjust _
+ | Pcfi_rel_offset _
| Pclz _ _
| Prev _ _
| Prev16 _ _
diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml
index a32b0e8b..1a473e03 100644
--- a/arm/Asmexpand.ml
+++ b/arm/Asmexpand.ml
@@ -46,18 +46,22 @@ let expand_movimm dst n =
(fun n -> emit (Porr (dst,dst, SOimm n))) tl
let expand_subimm dst src n =
- match Asmgen.decompose_int n with
- | [] -> assert false
- | hd::tl ->
- emit (Psub(dst,src,SOimm hd));
- List.iter (fun n -> emit (Psub (dst,dst,SOimm n))) tl
+ if dst <> src || n <> _0 then begin
+ match Asmgen.decompose_int n with
+ | [] -> assert false
+ | hd::tl ->
+ emit (Psub(dst,src,SOimm hd));
+ List.iter (fun n -> emit (Psub (dst,dst,SOimm n))) tl
+ end
let expand_addimm dst src n =
- match Asmgen.decompose_int n with
- | [] -> assert false
- | hd::tl ->
- emit (Padd (dst,src,SOimm hd));
- List.iter (fun n -> emit (Padd (dst,dst,SOimm n))) tl
+ if dst <> src || n <> _0 then begin
+ match Asmgen.decompose_int n with
+ | [] -> assert false
+ | hd::tl ->
+ emit (Padd (dst,src,SOimm hd));
+ List.iter (fun n -> emit (Padd (dst,dst,SOimm n))) tl
+ end
let expand_int64_arith conflict rl fn =
if conflict then
@@ -410,12 +414,22 @@ let expand_instruction instr =
| Pallocframe (sz, ofs) ->
emit (Pmov (IR12,SOreg IR13));
if (is_current_function_variadic ()) then begin
- emit (Ppush [IR0;IR1;IR2;IR3]);
- emit (Pcfi_adjust _16);
- end;
- expand_subimm IR13 IR13 sz;
- emit (Pcfi_adjust sz);
- emit (Pstr (IR12,IR13,SOimm ofs));
+ emit (Ppush [IR0;IR1;IR2;IR3]);
+ emit (Pcfi_adjust _16);
+ end;
+ let sz' = camlint_of_coqint sz in
+ let ofs' = camlint_of_coqint ofs in
+ if ofs' >= 4096l && sz' >= ofs' then begin
+ expand_subimm IR13 IR13 (coqint_of_camlint (Int32.sub sz' (Int32.add ofs' 4l)));
+ emit (Ppush [IR13]);
+ expand_subimm IR13 IR13 ofs;
+ emit (Pcfi_adjust sz);
+ end else begin
+ assert (ofs' < 4096l);
+ expand_subimm IR13 IR13 sz;
+ emit (Pcfi_adjust sz);
+ emit (Pstr (IR12,IR13,SOimm ofs));
+ end;
PrintAsmaux.current_function_stacksize := camlint_of_coqint sz
| Pfreeframe (sz, ofs) ->
let sz =
@@ -424,7 +438,21 @@ let expand_instruction instr =
else sz in
if Asmgen.is_immed_arith sz
then emit (Padd (IR13,IR13,SOimm sz))
- else emit (Pldr (IR13,IR13,SOimm ofs))
+ else begin
+ if camlint_of_coqint ofs >= 4096l then begin
+ expand_addimm IR13 IR13 ofs;
+ emit (Pldr (IR13,IR13,SOimm _0))
+ end else
+ emit (Pldr (IR13,IR13,SOimm ofs));
+ end
+ | Psavelr ofs ->
+ if camlint_of_coqint ofs >= 4096l then begin
+ expand_addimm IR13 IR13 ofs;
+ emit (Pstr (IR14,IR13,SOimm _0));
+ expand_subimm IR13 IR13 ofs
+ end else
+ emit (Pstr (IR14,IR13,SOimm ofs));
+ emit (Pcfi_rel_offset ofs)
| Pbuiltin (ef,args,res) ->
begin match ef with
| EF_builtin (name,sg) ->
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index e7a3b4fa..125e95ff 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -790,7 +790,7 @@ Definition transl_function (f: Mach.function) :=
do c <- transl_code f f.(Mach.fn_code) true;
OK (mkfunction f.(Mach.fn_sig)
(Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::
- Pstr IR14 IR13 (SOimm (Ptrofs.to_int f.(fn_retaddr_ofs))) :: c)).
+ Psavelr f.(fn_retaddr_ofs) :: c)).
Definition transf_function (f: Mach.function) : res Asm.function :=
do tf <- transl_function f;
diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v
index 09c20d5c..f306f43a 100644
--- a/arm/Asmgenproof.v
+++ b/arm/Asmgenproof.v
@@ -854,7 +854,7 @@ Opaque loadind.
generalize EQ; intros EQ'. monadInv EQ'.
destruct (zlt Ptrofs.max_unsigned (list_length_z (fn_code x0))); inversion EQ1. clear EQ1. subst x0.
monadInv EQ0.
- set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) :: Pstr IR14 IR13 (SOimm (Ptrofs.to_int (fn_retaddr_ofs f))) :: x0) in *.
+ set (tfbody := Pallocframe (fn_stacksize f) (fn_link_ofs f) :: Psavelr (fn_retaddr_ofs f) :: x0) in *.
set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *.
unfold store_stack in *.
exploit Mem.alloc_extends. eauto. eauto. apply Zle_refl. apply Zle_refl.
@@ -877,7 +877,7 @@ Opaque loadind.
simpl. auto.
simpl. unfold exec_store. change (rs2 IR14) with (rs0 IR14).
rewrite Ptrofs.add_zero_l. simpl. unfold Tptr, chunk_of_type, Archi.ptr64 in P. simpl in P.
- rewrite Ptrofs.add_zero_l in P. rewrite ATLR. rewrite Ptrofs.of_int_to_int by auto.
+ rewrite Ptrofs.add_zero_l in P. rewrite ATLR.
rewrite P. auto. auto. auto.
left; exists (State rs3 m3'); split.
eapply exec_straight_steps_1; eauto. omega. constructor.
diff --git a/arm/TargetPrinter.ml b/arm/TargetPrinter.ml
index 60f52efd..02c348a8 100644
--- a/arm/TargetPrinter.ml
+++ b/arm/TargetPrinter.ml
@@ -539,23 +539,13 @@ struct
| Psbc (r1,r2,sa) ->
fprintf oc " sbc %a, %a, %a\n" ireg r1 ireg r2 shift_op sa; 1
| Pstr(r1, r2, sa) | Pstr_a(r1, r2, sa) ->
- fprintf oc " str %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa;
- begin match r1, r2, sa with
- | IR14, IR13, SOimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n)
- | _ -> ()
- end;
- 1
+ fprintf oc " str %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1
| Pstrb(r1, r2, sa) ->
fprintf oc " strb %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1
| Pstrh(r1, r2, sa) ->
fprintf oc " strh %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1
| Pstr_p(r1, r2, sa) ->
- fprintf oc " str %a, [%a], %a\n" ireg r1 ireg r2 shift_op sa;
- begin match r1, r2, sa with
- | IR14, IR13, SOimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n)
- | _ -> ()
- end;
- 1
+ fprintf oc " str %a, [%a], %a\n" ireg r1 ireg r2 shift_op sa; 1
| Pstrb_p(r1, r2, sa) ->
fprintf oc " strb %a, [%a], %a\n" ireg r1 ireg r2 shift_op sa; 1
| Pstrh_p(r1, r2, sa) ->
@@ -703,6 +693,8 @@ struct
assert false
| Pfreeframe(sz, ofs) ->
assert false
+ | Psavelr(ofs) ->
+ assert false
| Plabel lbl ->
fprintf oc "%a:\n" print_label lbl; 0
| Ploadsymbol(r1, id, ofs) ->
@@ -751,6 +743,7 @@ struct
assert false
end
| Pcfi_adjust sz -> cfi_adjust oc (camlint_of_coqint sz); 0
+ | Pcfi_rel_offset ofs -> cfi_rel_offset oc "lr" (camlint_of_coqint ofs); 0
let no_fallthrough = function
| Pb _ -> true