aboutsummaryrefslogtreecommitdiffstats
path: root/powerpc
diff options
context:
space:
mode:
authorxleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2013-04-20 07:54:52 +0000
committerxleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2013-04-20 07:54:52 +0000
commit255cee09b71255051c2b40eae0c88bffce1f6f32 (patch)
tree7951b1b13e8fd5e525b9223e8be0580e83550f55 /powerpc
parent6e5041958df01c56762e90770abd704b95a36e5d (diff)
downloadcompcert-kvx-255cee09b71255051c2b40eae0c88bffce1f6f32.tar.gz
compcert-kvx-255cee09b71255051c2b40eae0c88bffce1f6f32.zip
Big merge of the newregalloc-int64 branch. Lots of changes in two directions:
1- new register allocator (+ live range splitting, spilling&reloading, etc) based on a posteriori validation using the Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long long"). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'powerpc')
-rw-r--r--powerpc/Asm.v79
-rw-r--r--powerpc/Asmgen.v24
-rw-r--r--powerpc/Asmgenproof.v64
-rw-r--r--powerpc/Asmgenproof1.v45
-rw-r--r--powerpc/ConstpropOp.vp4
-rw-r--r--powerpc/ConstpropOpproof.v3
-rw-r--r--powerpc/Machregs.v124
-rw-r--r--powerpc/Machregsaux.ml11
-rw-r--r--powerpc/Op.v55
-rw-r--r--powerpc/PrintAsm.ml154
-rw-r--r--powerpc/PrintOp.ml3
-rw-r--r--powerpc/SelectOp.vp13
-rw-r--r--powerpc/SelectOpproof.v48
-rw-r--r--powerpc/eabi/Conventions1.v417
-rw-r--r--powerpc/eabi/Stacklayout.v67
15 files changed, 599 insertions, 512 deletions
diff --git a/powerpc/Asm.v b/powerpc/Asm.v
index 27e801a7..115d8467 100644
--- a/powerpc/Asm.v
+++ b/powerpc/Asm.v
@@ -23,7 +23,7 @@ Require Import Events.
Require Import Globalenvs.
Require Import Smallstep.
Require Import Locations.
-Require Stacklayout.
+Require Import Stacklayout.
Require Import Conventions.
(** * Abstract syntax *)
@@ -222,7 +222,7 @@ Inductive instruction : Type :=
| Pxori: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate *)
| Pxoris: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate high *)
| Plabel: label -> instruction (**r define a code label *)
- | Pbuiltin: external_function -> list preg -> preg -> instruction (**r built-in function *)
+ | Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function *)
| Pannot: external_function -> list annot_param -> instruction (**r annotation statement *)
with annot_param : Type :=
@@ -324,6 +324,14 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset :=
| r :: l' => undef_regs l' (rs#r <- Vundef)
end.
+(** Assigning multiple registers *)
+
+Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset :=
+ match rl, vl with
+ | r1 :: rl', v1 :: vl' => set_regs rl' vl' (rs#r1 <- v1)
+ | _, _ => rs
+ end.
+
Section RELSEM.
(** Looking up instructions in a code sequence by position. *)
@@ -777,45 +785,45 @@ Definition preg_of (r: mreg) : preg :=
match r with
| R3 => GPR3 | R4 => GPR4 | R5 => GPR5 | R6 => GPR6
| R7 => GPR7 | R8 => GPR8 | R9 => GPR9 | R10 => GPR10
+ | R11 => GPR11 | R12 => GPR12
| R14 => GPR14 | R15 => GPR15 | R16 => GPR16
| R17 => GPR17 | R18 => GPR18 | R19 => GPR19 | R20 => GPR20
| R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24
| R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28
| R29 => GPR29 | R30 => GPR30 | R31 => GPR31
- | IT1 => GPR11 | IT2 => GPR12
+ | F0 => FPR0
| F1 => FPR1 | F2 => FPR2 | F3 => FPR3 | F4 => FPR4
| F5 => FPR5 | F6 => FPR6 | F7 => FPR7 | F8 => FPR8
- | F9 => FPR9 | F10 => FPR10 | F11 => FPR11
- | F14 => FPR14 | F15 => FPR15
+ | F9 => FPR9 | F10 => FPR10 | F11 => FPR11 | F12 => FPR12
+ | F13 => FPR13 | F14 => FPR14 | F15 => FPR15
| F16 => FPR16 | F17 => FPR17 | F18 => FPR18 | F19 => FPR19
| F20 => FPR20 | F21 => FPR21 | F22 => FPR22 | F23 => FPR23
| F24 => FPR24 | F25 => FPR25 | F26 => FPR26 | F27 => FPR27
| F28 => FPR28 | F29 => FPR29 | F30 => FPR30 | F31 => FPR31
- | FT1 => FPR0 | FT2 => FPR12 | FT3 => FPR13
end.
(** Extract the values of the arguments of an external call.
We exploit the calling conventions from module [Conventions], except that
we use PPC registers instead of locations. *)
+Definition chunk_of_type (ty: typ) :=
+ match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end.
+
Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
| extcall_arg_reg: forall r,
extcall_arg rs m (R r) (rs (preg_of r))
- | extcall_arg_int_stack: forall ofs bofs v,
- bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
- Mem.loadv Mint32 m (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v ->
- extcall_arg rs m (S (Outgoing ofs Tint)) v
- | extcall_arg_float_stack: forall ofs bofs v,
+ | extcall_arg_stack: forall ofs ty bofs v,
bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
- Mem.loadv Mfloat64al32 m (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v ->
- extcall_arg rs m (S (Outgoing ofs Tfloat)) v.
+ Mem.loadv (chunk_of_type ty) m
+ (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v ->
+ extcall_arg rs m (S Outgoing ofs ty) v.
Definition extcall_arguments
(rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
list_forall2 (extcall_arg rs m) (loc_arguments sg) args.
-Definition loc_external_result (sg: signature) : preg :=
- preg_of (loc_result sg).
+Definition loc_external_result (sg: signature) : list preg :=
+ map preg_of (loc_result sg).
(** Extract the values of the arguments of an annotation. *)
@@ -845,33 +853,31 @@ Inductive step: state -> trace -> state -> Prop :=
exec_instr c i rs m = Next rs' m' ->
step (State rs m) E0 (State rs' m')
| exec_step_builtin:
- forall b ofs c ef args res rs m t v m',
+ forall b ofs c ef args res rs m t vl rs' m',
rs PC = Vptr b ofs ->
Genv.find_funct_ptr ge b = Some (Internal c) ->
find_instr (Int.unsigned ofs) c = Some (Pbuiltin ef args res) ->
- external_call ef ge (map rs args) m t v m' ->
- step (State rs m) t
- (State (nextinstr(rs #GPR11 <- Vundef #GPR12 <- Vundef
- #FPR12 <- Vundef #FPR13 <- Vundef
- #FPR0 <- Vundef #CTR <- Vundef
- #res <- v)) m')
+ external_call' ef ge (map rs args) m t vl m' ->
+ rs' = nextinstr
+ (set_regs res vl
+ (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+ step (State rs m) t (State rs' m')
| exec_step_annot:
forall b ofs c ef args rs m vargs t v m',
rs PC = Vptr b ofs ->
Genv.find_funct_ptr ge b = Some (Internal c) ->
find_instr (Int.unsigned ofs) c = Some (Pannot ef args) ->
annot_arguments rs m args vargs ->
- external_call ef ge vargs m t v m' ->
+ external_call' ef ge vargs m t v m' ->
step (State rs m) t
(State (nextinstr rs) m')
| exec_step_external:
forall b ef args res rs m t rs' m',
rs PC = Vptr b Int.zero ->
Genv.find_funct_ptr ge b = Some (External ef) ->
- external_call ef ge args m t res m' ->
+ external_call' ef ge args m t res m' ->
extcall_arguments rs m (ef_sig ef) args ->
- rs' = (rs#(loc_external_result (ef_sig ef)) <- res
- #PC <- (rs LR)) ->
+ rs' = (set_regs (loc_external_result (ef_sig ef)) res rs) #PC <- (rs RA) ->
step (State rs m) t (State rs' m').
End RELSEM.
@@ -939,21 +945,21 @@ Ltac Equalities :=
discriminate.
discriminate.
inv H11.
- exploit external_call_determ. eexact H4. eexact H11. intros [A B].
+ exploit external_call_determ'. eexact H4. eexact H9. intros [A B].
split. auto. intros. destruct B; auto. subst. auto.
inv H12.
assert (vargs0 = vargs) by (eapply annot_arguments_determ; eauto). subst vargs0.
- exploit external_call_determ. eexact H5. eexact H13. intros [A B].
+ exploit external_call_determ'. eexact H5. eexact H13. intros [A B].
split. auto. intros. destruct B; auto. subst. auto.
assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0.
- exploit external_call_determ. eexact H3. eexact H8. intros [A B].
+ exploit external_call_determ'. eexact H3. eexact H8. intros [A B].
split. auto. intros. destruct B; auto. subst. auto.
(* trace length *)
red; intros. inv H; simpl.
omega.
- eapply external_call_trace_length; eauto.
- eapply external_call_trace_length; eauto.
- eapply external_call_trace_length; eauto.
+ inv H3; eapply external_call_trace_length; eauto.
+ inv H4; eapply external_call_trace_length; eauto.
+ inv H2; eapply external_call_trace_length; eauto.
(* initial states *)
inv H; inv H0. f_equal. congruence.
(* final no step *)
@@ -973,13 +979,4 @@ Definition data_preg (r: preg) : bool :=
| _ => true
end.
-Definition nontemp_preg (r: preg) : bool :=
- match r with
- | IR GPR0 => false | IR GPR11 => false | IR GPR12 => false
- | FR FPR0 => false | FR FPR12 => false | FR FPR13 => false
- | PC => false | LR => false | CTR => false
- | CR0_0 => false | CR0_1 => false | CR0_2 => false | CR0_3 => false
- | CARRY => false
- | _ => true
- end.
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index 39b84e0b..6a1d07ed 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -136,6 +136,8 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
Plfd r (Cint ofs) base :: k
else
loadimm GPR0 ofs (Plfdx r base GPR0 :: k))
+ | Tlong =>
+ Error (msg "Asmgen.loadind")
end.
Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
@@ -152,6 +154,8 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
Pstfd r (Cint ofs) base :: k
else
loadimm GPR0 ofs (Pstfdx r base GPR0 :: k))
+ | Tlong =>
+ Error (msg "Asmgen.storeind")
end.
(** Constructor for a floating-point comparison. The PowerPC has
@@ -336,8 +340,8 @@ Definition transl_op
OK (if symbol_is_small_data s ofs then
Paddi r GPR0 (Csymbol_sda s ofs) :: k
else
- Paddis GPR12 GPR0 (Csymbol_high s ofs) ::
- Paddi r GPR12 (Csymbol_low s ofs) :: k)
+ Paddis r GPR0 (Csymbol_high s ofs) ::
+ Paddi r r (Csymbol_low s ofs) :: k)
| Oaddrstack n, nil =>
do r <- ireg_of res; OK (addimm r GPR1 n k)
| Ocast8signed, a1 :: nil =>
@@ -428,7 +432,7 @@ Definition transl_op
do r1 <- ireg_of a1; do r <- ireg_of res;
OK (rolm r r1 amount mask k)
| Oroli amount mask, a1 :: a2 :: nil =>
- do x <- assertion (mreg_eq a1 res);
+ assertion (mreg_eq a1 res);
do r2 <- ireg_of a2; do r <- ireg_of res;
OK (Prlwimi r r2 amount mask :: k)
| Onegf, a1 :: nil =>
@@ -467,7 +471,7 @@ Definition transl_op
(** Translation of memory accesses: loads, and stores. *)
Definition int_temp_for (r: mreg) :=
- if mreg_eq r IT2 then GPR11 else GPR12.
+ if mreg_eq r R12 then GPR11 else GPR12.
Definition transl_memory_access
(mk1: constant -> ireg -> instruction)
@@ -529,6 +533,8 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing)
| Mfloat64 | Mfloat64al32 =>
do r <- freg_of dst;
transl_memory_access (Plfd r) (Plfdx r) addr args GPR12 k
+ | Mint64 =>
+ Error (msg "Asmgen.transl_load")
end.
Definition transl_store (chunk: memory_chunk) (addr: addressing)
@@ -550,6 +556,8 @@ Definition transl_store (chunk: memory_chunk) (addr: addressing)
| Mfloat64 | Mfloat64al32 =>
do r <- freg_of src;
transl_memory_access (Pstfd r) (Pstfdx r) addr args temp k
+ | Mint64 =>
+ Error (msg "Asmgen.transl_store")
end.
(** Translation of arguments to annotations *)
@@ -574,7 +582,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
loadind GPR11 ofs ty dst k
else
(do k1 <- loadind GPR11 ofs ty dst k;
- loadind GPR1 f.(fn_link_ofs) Tint IT1 k1)
+ loadind GPR1 f.(fn_link_ofs) Tint R11 k1)
| Mop op args res =>
transl_op op args res k
| Mload chunk addr args dst =>
@@ -598,7 +606,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) ::
Pbs symb :: k)
| Mbuiltin ef args res =>
- OK (Pbuiltin ef (map preg_of args) (preg_of res) :: k)
+ OK (Pbuiltin ef (map preg_of args) (map preg_of res) :: k)
| Mannot ef args =>
OK (Pannot ef (map transl_annot_param args) :: k)
| Mlabel lbl =>
@@ -624,8 +632,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool :=
match i with
| Msetstack src ofs ty => before
- | Mgetparam ofs ty dst => negb (mreg_eq dst IT1)
- | Mop Omove args res => before && negb (mreg_eq res IT1)
+ | Mgetparam ofs ty dst => negb (mreg_eq dst R11)
+ | Mop Omove args res => before && negb (mreg_eq res R11)
| _ => false
end.
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index 07e66cf8..37c88088 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -487,9 +487,9 @@ Definition measure (s: Mach.state) : nat :=
| Mach.Returnstate _ _ _ => 1%nat
end.
-Remark preg_of_not_GPR11: forall r, negb (mreg_eq r IT1) = true -> IR GPR11 <> preg_of r.
+Remark preg_of_not_GPR11: forall r, negb (mreg_eq r R11) = true -> IR GPR11 <> preg_of r.
Proof.
- intros. change (IR GPR11) with (preg_of IT1). red; intros.
+ intros. change (IR GPR11) with (preg_of R11). red; intros.
exploit preg_of_injective; eauto. intros; subst r; discriminate.
Qed.
@@ -526,7 +526,8 @@ Proof.
rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]].
exists rs'; split. eauto.
- split. change (undef_setstack rs) with rs. apply agree_exten with rs0; auto with asmgen.
+ split. change (Mach.undef_regs (destroyed_by_op Omove) rs) with rs.
+ apply agree_exten with rs0; auto with asmgen.
simpl; intros. rewrite Q; auto with asmgen.
- (* Mgetparam *)
@@ -568,9 +569,11 @@ Opaque loadind.
intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
left; eapply exec_straight_steps; eauto; intros. simpl in TR.
exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
- exists rs2; split. eauto. split. auto.
- simpl. destruct op; try congruence. destruct ep; simpl; try congruence. intros.
+ exists rs2; split. eauto. split. auto.
+ destruct op; simpl; try discriminate. intros.
+ destruct (andb_prop _ _ H1); clear H1.
rewrite R; auto. apply preg_of_not_GPR11; auto.
+ change (destroyed_by_op Omove) with (@nil mreg). simpl; auto.
- (* Mload *)
assert (eval_addressing tge sp addr rs##args = Some a).
@@ -595,7 +598,7 @@ Opaque loadind.
left; eapply exec_straight_steps; eauto.
intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]].
exists rs2; split. eauto.
- split. eapply agree_exten_temps; eauto. intros; auto with asmgen.
+ split. eapply agree_undef_regs; eauto with asmgen.
simpl; congruence.
- (* Mcall *)
@@ -741,19 +744,21 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
inv AT. monadInv H3.
exploit functions_transl; eauto. intro FN.
generalize (transf_function_no_overflow _ _ H2); intro NOOV.
- exploit external_call_mem_extends; eauto. eapply preg_vals; eauto.
+ exploit external_call_mem_extends'; eauto. eapply preg_vals; eauto.
intros [vres' [m2' [A [B [C D]]]]].
left. econstructor; split. apply plus_one.
eapply exec_step_builtin. eauto. eauto.
eapply find_instr_tail; eauto.
- eapply external_call_symbols_preserved; eauto.
+ eapply external_call_symbols_preserved'; eauto.
exact symbols_preserved. exact varinfo_preserved.
+ eauto.
econstructor; eauto.
- Simpl. rewrite <- H0. simpl. econstructor; eauto.
+ Simpl. rewrite set_pregs_other_2. rewrite undef_regs_other_2. rewrite <- H0. simpl. econstructor; eauto.
eapply code_tail_next_int; eauto.
- apply agree_nextinstr. eapply agree_set_undef_mreg; eauto.
- rewrite Pregmap.gss. auto.
- intros. Simpl.
+ apply preg_notin_charact; auto with asmgen.
+ apply preg_notin_charact; auto with asmgen.
+ apply agree_nextinstr. eapply agree_set_mregs; auto.
+ eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto.
congruence.
- (* Mannot *)
@@ -761,12 +766,12 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
exploit functions_transl; eauto. intro FN.
generalize (transf_function_no_overflow _ _ H3); intro NOOV.
exploit annot_arguments_match; eauto. intros [vargs' [P Q]].
- exploit external_call_mem_extends; eauto.
+ exploit external_call_mem_extends'; eauto.
intros [vres' [m2' [A [B [C D]]]]].
left. econstructor; split. apply plus_one.
eapply exec_step_annot. eauto. eauto.
eapply find_instr_tail; eauto. eauto.
- eapply external_call_symbols_preserved; eauto.
+ eapply external_call_symbols_preserved'; eauto.
exact symbols_preserved. exact varinfo_preserved.
eapply match_states_intro with (ep := false); eauto with coqlib.
unfold nextinstr. rewrite Pregmap.gss.
@@ -798,11 +803,11 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
destruct (snd (crbit_for_cond cond)).
(* Pbt, taken *)
econstructor; econstructor; econstructor; split. eexact A.
- split. eapply agree_exten_temps; eauto with asmgen.
+ split. eapply agree_exten; eauto with asmgen.
simpl. rewrite B. reflexivity.
(* Pbf, taken *)
econstructor; econstructor; econstructor; split. eexact A.
- split. eapply agree_exten_temps; eauto with asmgen.
+ split. eapply agree_exten; eauto with asmgen.
simpl. rewrite B. reflexivity.
- (* Mcond false *)
@@ -815,7 +820,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
destruct (snd (crbit_for_cond cond)).
apply exec_straight_one. simpl. rewrite B. reflexivity. auto.
apply exec_straight_one. simpl. rewrite B. reflexivity. auto.
- split. eapply agree_exten_temps; eauto with asmgen.
+ split. eapply agree_exten; eauto with asmgen.
intros; Simpl.
simpl. congruence.
@@ -835,7 +840,9 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
eapply find_instr_tail; eauto.
simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A.
econstructor; eauto.
- eapply agree_exten_temps; eauto. intros. rewrite C; auto with asmgen. Simpl.
+ eapply agree_undef_regs; eauto.
+Local Transparent destroyed_by_jumptable.
+ simpl. intros. rewrite C; auto with asmgen. Simpl.
congruence.
- (* Mreturn *)
@@ -896,6 +903,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
intros [m1' [C D]].
exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto.
intros [m2' [F G]].
+ simpl Mach.chunk_of_type in F.
exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto.
intros [m3' [P Q]].
(* Execution of function prologue *)
@@ -910,7 +918,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
rewrite <- H5 at 2.
apply exec_straight_three with rs2 m2' rs3 m2'.
unfold exec_instr. rewrite C. fold sp.
- rewrite <- (sp_val _ _ _ AG). unfold chunk_of_type in F. rewrite F. auto.
+ rewrite <- (sp_val _ _ _ AG). rewrite F. auto.
simpl. auto.
simpl. unfold store1. rewrite gpr_or_zero_not_zero.
change (rs3 GPR1) with sp. change (rs3 GPR0) with (rs0 LR). simpl.
@@ -928,8 +936,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
unfold rs4, rs3, rs2.
apply agree_nextinstr. apply agree_set_other; auto. apply agree_set_other; auto.
apply agree_nextinstr. apply agree_set_other; auto.
- eapply agree_change_sp; eauto. apply agree_exten_temps with rs0; eauto.
- unfold sp; congruence.
+ eapply agree_change_sp; eauto. unfold sp; congruence.
congruence.
- (* external function *)
@@ -937,17 +944,15 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen.
intros [tf [A B]]. simpl in B. inv B.
exploit extcall_arguments_match; eauto.
intros [args' [C D]].
- exploit external_call_mem_extends; eauto.
+ exploit external_call_mem_extends'; eauto.
intros [res' [m2' [P [Q [R S]]]]].
left; econstructor; split.
apply plus_one. eapply exec_step_external; eauto.
- eapply external_call_symbols_preserved; eauto.
+ eapply external_call_symbols_preserved'; eauto.
exact symbols_preserved. exact varinfo_preserved.
econstructor; eauto.
unfold loc_external_result.
- eapply agree_set_mreg; eauto.
- rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss. auto.
- intros; Simpl.
+ apply agree_set_other; auto. apply agree_set_mregs; auto.
- (* return *)
inv STACKS. simpl in *.
@@ -980,10 +985,9 @@ Lemma transf_final_states:
forall st1 st2 r,
match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r.
Proof.
- intros. inv H0. inv H. inv STACKS. constructor.
- auto.
- compute in H1.
- generalize (preg_val _ _ _ R3 AG). rewrite H1. intros LD; inv LD. auto.
+ intros. inv H0. inv H. constructor. auto.
+ compute in H1. inv H1.
+ generalize (preg_val _ _ _ R3 AG). rewrite H2. intros LD; inv LD. auto.
Qed.
Theorem transf_program_correct:
diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v
index 0e6d3e1c..cd961c95 100644
--- a/powerpc/Asmgenproof1.v
+++ b/powerpc/Asmgenproof1.v
@@ -546,7 +546,8 @@ Ltac ArgsInv :=
| [ H: Error _ = OK _ |- _ ] => discriminate
| [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args
| [ H: bind _ _ = OK _ |- _ ] => monadInv H
- | [ H: assertion _ = OK _ |- _ ] => monadInv H
+ | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+ | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
end);
subst;
repeat (match goal with
@@ -810,9 +811,7 @@ Lemma transl_op_correct_aux:
exists rs',
exec_straight ge fn c rs m k rs' m
/\ rs'#(preg_of res) = v
- /\ forall r,
- match op with Omove => data_preg r = true | _ => nontemp_preg r = true end ->
- r <> preg_of res -> rs'#r = rs#r.
+ /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
Proof.
Opaque Int.eq.
intros. unfold transl_op in H; destruct op; ArgsInv; simpl in H0; try (inv H0); try TranslOpSimpl.
@@ -839,7 +838,7 @@ Opaque Val.add.
rewrite gpr_or_zero_not_zero; eauto with asmgen. Simpl.
rewrite (Val.add_commut Vzero). rewrite high_half_zero.
rewrite Val.add_commut. rewrite low_high_half. auto.
- intros; Simpl.
+ intros; Simpl.
(* Oaddrstack *)
destruct (addimm_correct x GPR1 i k rs m) as [rs' [EX [RES OTH]]]; eauto with asmgen.
exists rs'; auto with asmgen.
@@ -879,12 +878,11 @@ Opaque Val.add.
intros. rewrite D; auto with asmgen. unfold rs1; Simpl.
(* Oandimm *)
destruct (andimm_correct x0 x i k rs m) as [rs' [A [B C]]]; eauto with asmgen.
- exists rs'; auto with asmgen.
(* Oorimm *)
- destruct (orimm_correct x0 x i k rs m) as [rs' [A [B C]]].
+ destruct (orimm_correct x0 x i k rs m) as [rs' [A [B C]]].
exists rs'; auto with asmgen.
(* Oxorimm *)
- destruct (xorimm_correct x0 x i k rs m) as [rs' [A [B C]]].
+ destruct (xorimm_correct x0 x i k rs m) as [rs' [A [B C]]].
exists rs'; auto with asmgen.
(* Onor *)
replace (Val.notint (rs x))
@@ -898,7 +896,6 @@ Opaque Val.add.
intros; Simpl.
(* Orolm *)
destruct (rolm_correct x0 x i i0 k rs m) as [rs' [A [B C]]]; eauto with asmgen.
- exists rs'; auto with asmgen.
(* Ointoffloat *)
replace v with (Val.maketotal (Val.intoffloat (rs x))).
TranslOpSimpl.
@@ -916,19 +913,17 @@ Lemma transl_op_correct:
Mem.extends m m' ->
exists rs',
exec_straight ge fn c rs m' k rs' m'
- /\ agree (Regmap.set res v (undef_op op ms)) sp rs'
- /\ forall r, op = Omove -> data_preg r = true -> r <> preg_of res -> rs'#r = rs#r.
+ /\ agree (Regmap.set res v (Mach.undef_regs (destroyed_by_op op) ms)) sp rs'
+ /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
Proof.
intros.
exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eauto.
intros [v' [A B]]. rewrite (sp_val _ _ _ H0) in A.
exploit transl_op_correct_aux; eauto. intros [rs' [P [Q R]]].
rewrite <- Q in B.
- exists rs'; split. eexact P.
- split. unfold undef_op. destruct op;
- (apply agree_set_undef_mreg with rs || apply agree_set_mreg with rs);
+ exists rs'; split. eexact P.
+ split. apply agree_set_undef_mreg with rs; auto.
auto.
- intros. subst op. auto.
Qed.
(** Translation of memory accesses *)
@@ -1100,17 +1095,18 @@ Lemma transl_store_correct:
Mem.storev chunk m a (rs (preg_of src)) = Some m' ->
exists rs',
exec_straight ge fn c rs m k rs' m'
- /\ forall r, r <> PC -> r <> GPR11 -> r <> GPR12 -> r <> FPR13 -> rs' r = rs r.
+ /\ forall r, r <> PC -> preg_notin r (destroyed_by_store chunk addr) -> rs' r = rs r.
Proof.
+Local Transparent destroyed_by_store.
intros.
assert (TEMP0: int_temp_for src = GPR11 \/ int_temp_for src = GPR12).
- unfold int_temp_for. destruct (mreg_eq src IT2); auto.
+ unfold int_temp_for. destruct (mreg_eq src R12); auto.
assert (TEMP1: int_temp_for src <> GPR0).
destruct TEMP0; congruence.
assert (TEMP2: IR (int_temp_for src) <> preg_of src).
- unfold int_temp_for. destruct (mreg_eq src IT2).
+ unfold int_temp_for. destruct (mreg_eq src R12).
subst src; simpl; congruence.
- change (IR GPR12) with (preg_of IT2). red; intros; elim n.
+ change (IR GPR12) with (preg_of R12). red; intros; elim n.
eapply preg_of_injective; eauto.
assert (BASE: forall mk1 mk2 chunk',
transl_memory_access mk1 mk2 addr args (int_temp_for src) k = OK c ->
@@ -1123,15 +1119,15 @@ Proof.
store2 chunk' (preg_of src) r1 r2 rs1 m) ->
exists rs',
exec_straight ge fn c rs m k rs' m'
- /\ forall r, r <> PC -> r <> GPR11 -> r <> GPR12 -> r <> FPR13 -> rs' r = rs r).
+ /\ forall r, r <> PC -> r <> GPR11 /\ r <> GPR12 -> rs' r = rs r).
{
intros. eapply transl_memory_access_correct; eauto.
intros. econstructor; split. apply exec_straight_one.
rewrite H4. unfold store1. rewrite H6. rewrite H7; auto with asmgen. rewrite H3. eauto. auto.
- intros; Simpl. apply H7; auto. destruct TEMP0; congruence.
+ intros; Simpl. apply H7; auto. destruct TEMP0; destruct H9; congruence.
intros. econstructor; split. apply exec_straight_one.
rewrite H5. unfold store2. rewrite H6. rewrite H7; auto with asmgen. rewrite H3. eauto. auto.
- intros; Simpl. apply H7; auto. destruct TEMP0; congruence.
+ intros; Simpl. apply H7; auto. destruct TEMP0; destruct H9; congruence.
}
destruct chunk; monadInv H.
- (* Mint8signed *)
@@ -1153,10 +1149,11 @@ Proof.
eapply transl_memory_access_correct. eauto. eauto. eauto.
intros. econstructor; split. apply exec_straight_one.
simpl. unfold store1. rewrite H. rewrite H2; auto with asmgen. rewrite H1. eauto. auto.
- intros. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence.
+Local Transparent destroyed_by_store.
+ simpl; intros. destruct H4 as [A [B C]]. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence.
intros. econstructor; split. apply exec_straight_one.
simpl. unfold store2. rewrite H. rewrite H2; auto with asmgen. rewrite H1. eauto. auto.
- intros. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence.
+ simpl; intros. destruct H4 as [A [B C]]. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence.
- (* Mfloat64 *)
apply Mem.storev_float64al32 in H1. eapply BASE; eauto; erewrite freg_of_eq by eauto; auto.
- (* Mfloat64al32 *)
diff --git a/powerpc/ConstpropOp.vp b/powerpc/ConstpropOp.vp
index 9131a466..e7e4095d 100644
--- a/powerpc/ConstpropOp.vp
+++ b/powerpc/ConstpropOp.vp
@@ -31,6 +31,7 @@ Inductive approx : Type :=
no compile-time information is available. *)
| I: int -> approx (** A known integer value. *)
| F: float -> approx (** A known floating-point value. *)
+ | L: int64 -> approx (** A know 64-bit integer value. *)
| G: ident -> int -> approx
(** The value is the address of the given global
symbol plus the given integer offset. *)
@@ -125,6 +126,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) :=
| Osingleoffloat, F n1 :: nil => F(Float.singleoffloat n1)
| Ointoffloat, F n1 :: nil => eval_static_intoffloat n1
| Ofloatofwords, I n1 :: I n2 :: nil => if propagate_float_constants tt then F(Float.from_words n1 n2) else Unknown
+ | Omakelong, I n1 :: I n2 :: nil => L(Int64.ofwords n1 n2)
+ | Olowlong, L n :: nil => I(Int64.loword n)
+ | Ohighlong, L n :: nil => I(Int64.hiword n)
| Ocmp c, vl => eval_static_condition_val c vl
| _, _ => Unknown
end.
diff --git a/powerpc/ConstpropOpproof.v b/powerpc/ConstpropOpproof.v
index 84e1e5bb..9d833bf1 100644
--- a/powerpc/ConstpropOpproof.v
+++ b/powerpc/ConstpropOpproof.v
@@ -45,6 +45,7 @@ Definition val_match_approx (a: approx) (v: val) : Prop :=
| Unknown => True
| I p => v = Vint p
| F p => v = Vfloat p
+ | L p => v = Vlong p
| G symb ofs => v = symbol_address ge symb ofs
| S ofs => v = Val.add sp (Vint ofs)
| _ => False
@@ -65,6 +66,8 @@ Ltac SimplVMA :=
simpl in H; (try subst v); SimplVMA
| H: (val_match_approx (F _) ?v) |- _ =>
simpl in H; (try subst v); SimplVMA
+ | H: (val_match_approx (L _) ?v) |- _ =>
+ simpl in H; (try subst v); SimplVMA
| H: (val_match_approx (G _ _) ?v) |- _ =>
simpl in H; (try subst v); SimplVMA
| H: (val_match_approx (S _) ?v) |- _ =>
diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v
index 632a55df..ce66e6a5 100644
--- a/powerpc/Machregs.v
+++ b/powerpc/Machregs.v
@@ -13,6 +13,7 @@
Require Import Coqlib.
Require Import Maps.
Require Import AST.
+Require Import Op.
(** ** Machine registers *)
@@ -21,59 +22,55 @@ Require Import AST.
- Integer registers that can be allocated to RTL pseudo-registers ([Rxx]).
- Floating-point registers that can be allocated to RTL pseudo-registers
([Fxx]).
-- Two integer registers, not allocatable, reserved as temporaries for
- spilling and reloading ([IT1, IT2]).
-- Two float registers, not allocatable, reserved as temporaries for
- spilling and reloading ([FT1, FT2]).
The type [mreg] does not include special-purpose or reserved
- machine registers such as the stack pointer and the condition codes. *)
+ machine registers such as the stack pointer (GPR1), the small data area
+ pointers (GPR2, GPR13), and the condition codes. *)
Inductive mreg: Type :=
(** Allocatable integer regs *)
| R3: mreg | R4: mreg | R5: mreg | R6: mreg
| R7: mreg | R8: mreg | R9: mreg | R10: mreg
+ | R11: mreg | R12: mreg
| R14: mreg | R15: mreg | R16: mreg
| R17: mreg | R18: mreg | R19: mreg | R20: mreg
| R21: mreg | R22: mreg | R23: mreg | R24: mreg
| R25: mreg | R26: mreg | R27: mreg | R28: mreg
| R29: mreg | R30: mreg | R31: mreg
(** Allocatable float regs *)
+ | F0: mreg
| F1: mreg | F2: mreg | F3: mreg | F4: mreg
| F5: mreg | F6: mreg | F7: mreg | F8: mreg
- | F9: mreg | F10: mreg | F11: mreg
- | F14: mreg | F15: mreg
+ | F9: mreg | F10: mreg | F11: mreg | F12: mreg
+ | F13: mreg | F14: mreg | F15: mreg
| F16: mreg | F17: mreg | F18: mreg | F19: mreg
| F20: mreg | F21: mreg | F22: mreg | F23: mreg
| F24: mreg | F25: mreg | F26: mreg | F27: mreg
- | F28: mreg | F29: mreg | F30: mreg | F31: mreg
- (** Integer temporaries *)
- | IT1: mreg (* R11 *) | IT2: mreg (* R12 *)
- (** Float temporaries *)
- | FT1: mreg (* F0 *) | FT2: mreg (* F12 *) | FT3: mreg (* F13 *).
+ | F28: mreg | F29: mreg | F30: mreg | F31: mreg.
Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}.
Proof. decide equality. Defined.
+Global Opaque mreg_eq.
Definition mreg_type (r: mreg): typ :=
match r with
| R3 => Tint | R4 => Tint | R5 => Tint | R6 => Tint
| R7 => Tint | R8 => Tint | R9 => Tint | R10 => Tint
+ | R11 => Tint | R12 => Tint
| R14 => Tint | R15 => Tint | R16 => Tint
| R17 => Tint | R18 => Tint | R19 => Tint | R20 => Tint
| R21 => Tint | R22 => Tint | R23 => Tint | R24 => Tint
| R25 => Tint | R26 => Tint | R27 => Tint | R28 => Tint
| R29 => Tint | R30 => Tint | R31 => Tint
+ | F0 => Tfloat
| F1 => Tfloat | F2 => Tfloat | F3 => Tfloat | F4 => Tfloat
| F5 => Tfloat | F6 => Tfloat | F7 => Tfloat | F8 => Tfloat
- | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat
- | F14 => Tfloat | F15 => Tfloat
+ | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat | F12 => Tfloat
+ | F13 => Tfloat | F14 => Tfloat | F15 => Tfloat
| F16 => Tfloat | F17 => Tfloat | F18 => Tfloat | F19 => Tfloat
| F20 => Tfloat | F21 => Tfloat | F22 => Tfloat | F23 => Tfloat
| F24 => Tfloat | F25 => Tfloat | F26 => Tfloat | F27 => Tfloat
| F28 => Tfloat | F29 => Tfloat | F30 => Tfloat | F31 => Tfloat
- | IT1 => Tint | IT2 => Tint
- | FT1 => Tfloat | FT2 => Tfloat | FT3 => Tfloat
end.
Open Scope positive_scope.
@@ -85,21 +82,21 @@ Module IndexedMreg <: INDEXED_TYPE.
match r with
| R3 => 1 | R4 => 2 | R5 => 3 | R6 => 4
| R7 => 5 | R8 => 6 | R9 => 7 | R10 => 8
- | R14 => 9 | R15 => 10 | R16 => 11
- | R17 => 12 | R18 => 13 | R19 => 14 | R20 => 15
- | R21 => 16 | R22 => 17 | R23 => 18 | R24 => 19
- | R25 => 20 | R26 => 21 | R27 => 22 | R28 => 23
- | R29 => 24 | R30 => 25 | R31 => 26
- | F1 => 28 | F2 => 29 | F3 => 30 | F4 => 31
- | F5 => 32 | F6 => 33 | F7 => 34 | F8 => 35
- | F9 => 36 | F10 => 37 | F11 => 38
- | F14 => 39 | F15 => 40
- | F16 => 41 | F17 => 42 | F18 => 43 | F19 => 44
- | F20 => 45 | F21 => 46 | F22 => 47 | F23 => 48
- | F24 => 49 | F25 => 50 | F26 => 51 | F27 => 52
- | F28 => 53 | F29 => 54 | F30 => 55 | F31 => 56
- | IT1 => 57 | IT2 => 58
- | FT1 => 59 | FT2 => 60 | FT3 => 61
+ | R11 => 9 | R12 => 10
+ | R14 => 11 | R15 => 12 | R16 => 13
+ | R17 => 14 | R18 => 15 | R19 => 16 | R20 => 17
+ | R21 => 18 | R22 => 19 | R23 => 20 | R24 => 21
+ | R25 => 22 | R26 => 23 | R27 => 24 | R28 => 25
+ | R29 => 26 | R30 => 27 | R31 => 28
+ | F0 => 29
+ | F1 => 30 | F2 => 31 | F3 => 32 | F4 => 33
+ | F5 => 34 | F6 => 35 | F7 => 36 | F8 => 37
+ | F9 => 38 | F10 => 39 | F11 => 40 | F12 => 41
+ | F13 => 42 | F14 => 43 | F15 => 44
+ | F16 => 45 | F17 => 46 | F18 => 47 | F19 => 48
+ | F20 => 49 | F21 => 50 | F22 => 51 | F23 => 52
+ | F24 => 53 | F25 => 54 | F26 => 55 | F27 => 56
+ | F28 => 57 | F29 => 58 | F30 => 59 | F31 => 60
end.
Lemma index_inj:
forall r1 r2, index r1 = index r2 -> r1 = r2.
@@ -108,3 +105,68 @@ Module IndexedMreg <: INDEXED_TYPE.
Qed.
End IndexedMreg.
+(** ** Destroyed registers, preferred registers *)
+
+Definition destroyed_by_op (op: operation): list mreg :=
+ match op with
+ | Ofloatconst _ => R12 :: nil
+ | Ointoffloat => F13 :: nil
+ | _ => nil
+ end.
+
+Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg :=
+ R12 :: nil.
+
+Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
+ match chunk with
+ | Mfloat32 => R11 :: R12 :: F13 :: nil
+ | _ => R11 :: R12 :: nil
+ end.
+
+Definition destroyed_by_cond (cond: condition): list mreg :=
+ nil.
+
+Definition destroyed_by_jumptable: list mreg :=
+ R12 :: nil.
+
+Definition destroyed_by_builtin (ef: external_function): list mreg :=
+ match ef with
+ | EF_builtin _ _ => F13 :: nil
+ | EF_vload _ => nil
+ | EF_vstore Mfloat32 => F13 :: nil
+ | EF_vstore _ => nil
+ | EF_vload_global _ _ _ => R11 :: nil
+ | EF_vstore_global Mint64 _ _ => R10 :: R11 :: R12 :: nil
+ | EF_vstore_global Mfloat32 _ _ => R11 :: R12 :: F13 :: nil
+ | EF_vstore_global _ _ _ => R11 :: R12 :: nil
+ | EF_memcpy _ _ => R11 :: R12 :: F13 :: nil
+ | _ => nil
+ end.
+
+Definition destroyed_at_function_entry: list mreg :=
+ nil.
+
+Definition temp_for_parent_frame: mreg :=
+ R11.
+
+Definition mregs_for_operation (op: operation): list (option mreg) * option mreg :=
+ (nil, None).
+
+Definition mregs_for_builtin (ef: external_function): list (option mreg) * list (option mreg) :=
+ (nil, nil).
+
+Global Opaque
+ destroyed_by_op destroyed_by_load destroyed_by_store
+ destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin
+ destroyed_at_function_entry temp_for_parent_frame
+ mregs_for_operation mregs_for_builtin.
+
+(** Two-address operations. Return [true] if the first argument and
+ the result must be in the same location *and* are unconstrained
+ by [mregs_for_operation]. There is only one: rotate-mask-insert. *)
+
+Definition two_address_op (op: operation) : bool :=
+ match op with
+ | Oroli _ _ => true
+ | _ => false
+ end.
diff --git a/powerpc/Machregsaux.ml b/powerpc/Machregsaux.ml
index 713e14d1..b8d7c97f 100644
--- a/powerpc/Machregsaux.ml
+++ b/powerpc/Machregsaux.ml
@@ -17,21 +17,20 @@ open Machregs
let register_names = [
("R3", R3); ("R4", R4); ("R5", R5); ("R6", R6);
("R7", R7); ("R8", R8); ("R9", R9); ("R10", R10);
+ ("R11", R11); ("R12", R12);
("R14", R14); ("R15", R15); ("R16", R16);
("R17", R17); ("R18", R18); ("R19", R19); ("R20", R20);
("R21", R21); ("R22", R22); ("R23", R23); ("R24", R24);
("R25", R25); ("R26", R26); ("R27", R27); ("R28", R28);
("R29", R29); ("R30", R30); ("R31", R31);
- ("F1", F1); ("F2", F2); ("F3", F3); ("F4", F4);
+ ("F0", F0); ("F1", F1); ("F2", F2); ("F3", F3); ("F4", F4);
("F5", F5); ("F6", F6); ("F7", F7); ("F8", F8);
- ("F9", F9); ("F10", F10); ("F11", F11);
- ("F14", F14); ("F15", F15);
+ ("F9", F9); ("F10", F10); ("F11", F11); ("F12", F12);
+ ("F13", F13); ("F14", F14); ("F15", F15);
("F16", F16); ("F17", F17); ("F18", F18); ("F19", F19);
("F20", F20); ("F21", F21); ("F22", F22); ("F23", F23);
("F24", F24); ("F25", F25); ("F26", F26); ("F27", F27);
- ("F28", F28); ("F29", F29); ("F30", F30); ("F31", F31);
- ("R11", IT1); ("R12", IT2);
- ("F0", FT1); ("F12", FT2); ("F13", FT3)
+ ("F28", F28); ("F29", F29); ("F30", F30); ("F31", F31)
]
let name_of_register r =
diff --git a/powerpc/Op.v b/powerpc/Op.v
index 110796ba..e584726b 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -97,6 +97,10 @@ Inductive operation : Type :=
(*c Conversions between int and float: *)
| Ointoffloat: operation (**r [rd = signed_int_of_float(r1)] *)
| Ofloatofwords: operation (**r [rd = float_of_words(r1,r2)] *)
+(*c Manipulating 64-bit integers: *)
+ | Omakelong: operation (**r [rd = r1 << 32 | r2] *)
+ | Olowlong: operation (**r [rd = low-word(r1)] *)
+ | Ohighlong: operation (**r [rd = high-word(r1)] *)
(*c Boolean tests: *)
| Ocmp: condition -> operation. (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
@@ -205,6 +209,9 @@ Definition eval_operation
| Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
| Ointoffloat, v1::nil => Val.intoffloat v1
| Ofloatofwords, v1::v2::nil => Some(Val.floatofwords v1 v2)
+ | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
+ | Olowlong, v1::nil => Some(Val.loword v1)
+ | Ohighlong, v1::nil => Some(Val.hiword v1)
| Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m))
| _, _ => None
end.
@@ -225,7 +232,7 @@ Ltac FuncInv :=
match goal with
| H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ =>
destruct x; simpl in H; try discriminate; FuncInv
- | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
+ | H: (match ?v with Vundef => _ | Vint _ => _ | Vlong _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ =>
destruct v; simpl in H; try discriminate; FuncInv
| H: (Some _ = Some _) |- _ =>
injection H; intros; clear H; FuncInv
@@ -292,6 +299,9 @@ Definition type_of_operation (op: operation) : list typ * typ :=
| Osingleoffloat => (Tfloat :: nil, Tfloat)
| Ointoffloat => (Tfloat :: nil, Tint)
| Ofloatofwords => (Tint :: Tint :: nil, Tfloat)
+ | Omakelong => (Tint :: Tint :: nil, Tlong)
+ | Olowlong => (Tlong :: nil, Tint)
+ | Ohighlong => (Tlong :: nil, Tint)
| Ocmp c => (type_of_condition c, Tint)
end.
@@ -366,6 +376,9 @@ Proof with (try exact I).
destruct v0...
destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2...
destruct v0; destruct v1...
+ destruct v0; destruct v1...
+ destruct v0...
+ destruct v0...
destruct (eval_condition c vl m); simpl... destruct b...
Qed.
@@ -481,6 +494,33 @@ Proof.
rewrite Val.add_assoc. simpl. auto.
Qed.
+(** Offset an addressing mode [addr] by a quantity [delta], so that
+ it designates the pointer [delta] bytes past the pointer designated
+ by [addr]. May be undefined, in which case [None] is returned. *)
+
+Definition offset_addressing (addr: addressing) (delta: int) : option addressing :=
+ match addr with
+ | Aindexed n => Some(Aindexed (Int.add n delta))
+ | Aindexed2 => None
+ | Aglobal s n => Some(Aglobal s (Int.add n delta))
+ | Abased s n => Some(Abased s (Int.add n delta))
+ | Ainstack n => Some(Ainstack (Int.add n delta))
+ end.
+
+Lemma eval_offset_addressing:
+ forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v,
+ offset_addressing addr delta = Some addr' ->
+ eval_addressing ge sp addr args = Some v ->
+ eval_addressing ge sp addr' args = Some(Val.add v (Vint delta)).
+Proof.
+ intros. destruct addr; simpl in H; inv H; simpl in *; FuncInv; subst.
+ rewrite Val.add_assoc; auto.
+ unfold symbol_address. destruct (Genv.find_symbol ge i); auto.
+ unfold symbol_address. destruct (Genv.find_symbol ge i); auto.
+ rewrite Val.add_assoc. rewrite Val.add_permut. rewrite Val.add_commut. auto.
+ rewrite Val.add_assoc. auto.
+Qed.
+
(** Transformation of addressing modes with two operands or more
into an equivalent arithmetic operation. This is used in the [Reload]
pass when a store instruction cannot be reloaded directly because
@@ -510,14 +550,6 @@ Proof.
intros. destruct addr; simpl in H; reflexivity || omegaContradiction.
Qed.
-(** Two-address operations. There is only one: rotate-mask-insert. *)
-
-Definition two_address_op (op: operation) : bool :=
- match op with
- | Oroli _ _ => true
- | _ => false
- end.
-
(** Operations that are so cheap to recompute that CSE should not factor them out. *)
Definition is_trivial_op (op: operation) : bool :=
@@ -767,7 +799,10 @@ Proof.
inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2.
exists (Vint i); auto.
inv H4; inv H2; simpl; auto.
- subst v1. destruct (eval_condition c vl1 m1) eqn:?.
+ inv H4; inv H2; simpl; auto.
+ inv H4; simpl; auto.
+ inv H4; simpl; auto.
+ subst. destruct (eval_condition c vl1 m1) eqn:?.
exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
destruct b; simpl; constructor.
simpl; constructor.
diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml
index 2d69be1e..319e12c1 100644
--- a/powerpc/PrintAsm.ml
+++ b/powerpc/PrintAsm.ml
@@ -223,8 +223,8 @@ let rolm_mask n =
- annotation statements: take their arguments in registers or stack
locations; generate no code;
- inlined by the compiler: take their arguments in arbitrary
- registers; preserve all registers except the temporaries
- (GPR0, GPR11, GPR12, FPR0, FPR12, FPR13);
+ registers; preserve all registers except the reserved temporaries
+ (GPR0, GPR11, GPR12, FPR13);
- inlined while printing asm code; take their arguments in
locations dictated by the calling conventions; preserve
callee-save regs only. *)
@@ -239,11 +239,12 @@ let print_annot_val oc txt args res =
fprintf oc "%s annotation: " comment;
PrintAnnot.print_annot_val preg oc txt args;
match args, res with
- | IR src :: _, IR dst ->
+ | [IR src], [IR dst] ->
if dst <> src then fprintf oc " mr %a, %a\n" ireg dst ireg src
- | FR src :: _, FR dst ->
+ | [FR src], [FR dst] ->
if dst <> src then fprintf oc " fmr %a, %a\n" freg dst freg src
- | _, _ -> assert false
+ | _, _ ->
+ assert false
(* Handling of memcpy *)
@@ -256,8 +257,8 @@ let print_annot_val oc txt args res =
let print_builtin_memcpy_small oc sz al src dst =
let rec copy ofs sz =
if sz >= 8 && al >= 4 then begin
- fprintf oc " lfd %a, %d(%a)\n" freg FPR0 ofs ireg src;
- fprintf oc " stfd %a, %d(%a)\n" freg FPR0 ofs ireg dst;
+ fprintf oc " lfd %a, %d(%a)\n" freg FPR13 ofs ireg src;
+ fprintf oc " stfd %a, %d(%a)\n" freg FPR13 ofs ireg dst;
copy (ofs + 8) (sz - 8)
end else if sz >= 4 then begin
fprintf oc " lwz %a, %d(%a)\n" ireg GPR0 ofs ireg src;
@@ -326,14 +327,23 @@ let print_builtin_vload_common oc chunk base offset res =
fprintf oc " lfs %a, %a(%a)\n" freg res constant offset ireg base
| (Mfloat64 | Mfloat64al32), FR res ->
fprintf oc " lfd %a, %a(%a)\n" freg res constant offset ireg base
+ (* Mint64 is special-cased below *)
| _ ->
assert false
let print_builtin_vload oc chunk args res =
fprintf oc "%s begin builtin __builtin_volatile_read\n" comment;
- begin match args with
- | [IR addr] ->
+ begin match args, res with
+ | [IR addr], [res] when chunk <> Mint64 ->
print_builtin_vload_common oc chunk addr (Cint Integers.Int.zero) res
+ | [IR addr], [IR res1; IR res2] when chunk = Mint64 ->
+ if addr <> res1 then begin
+ fprintf oc " lwz %a, 0(%a)\n" ireg res1 ireg addr;
+ fprintf oc " lwz %a, 4(%a)\n" ireg res2 ireg addr
+ end else begin
+ fprintf oc " lwz %a, 4(%a)\n" ireg res2 ireg addr;
+ fprintf oc " lwz %a, 0(%a)\n" ireg res1 ireg addr
+ end
| _ ->
assert false
end;
@@ -341,9 +351,24 @@ let print_builtin_vload oc chunk args res =
let print_builtin_vload_global oc chunk id ofs args res =
fprintf oc "%s begin builtin __builtin_volatile_read\n" comment;
- fprintf oc " addis %a, %a, %a\n"
- ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
- print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res;
+ begin match res with
+ | [res] when chunk <> Mint64 ->
+ fprintf oc " addis %a, %a, %a\n"
+ ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
+ print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res
+ | [IR res1; IR res2] when chunk = Mint64 ->
+ fprintf oc " addis %a, %a, %a\n"
+ ireg res1 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
+ fprintf oc " lwz %a, %a(%a)\n"
+ ireg res1 constant (Csymbol_low(id, ofs)) ireg res1;
+ let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in
+ fprintf oc " addis %a, %a, %a\n"
+ ireg res2 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
+ fprintf oc " lwz %a, %a(%a)\n"
+ ireg res2 constant (Csymbol_low(id, ofs)) ireg res2
+ | _ ->
+ assert false
+ end;
fprintf oc "%s end builtin __builtin_volatile_read\n" comment
let print_builtin_vstore_common oc chunk base offset src =
@@ -359,14 +384,18 @@ let print_builtin_vstore_common oc chunk base offset src =
fprintf oc " stfs %a, %a(%a)\n" freg FPR13 constant offset ireg base
| (Mfloat64 | Mfloat64al32), FR src ->
fprintf oc " stfd %a, %a(%a)\n" freg src constant offset ireg base
+ (* Mint64 is special-cased below *)
| _ ->
assert false
let print_builtin_vstore oc chunk args =
fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
begin match args with
- | [IR addr; src] ->
+ | [IR addr; src] when chunk <> Mint64 ->
print_builtin_vstore_common oc chunk addr (Cint Integers.Int.zero) src
+ | [IR addr; IR src1; IR src2] when chunk = Mint64 ->
+ fprintf oc " stw %a, 0(%a)\n" ireg src1 ireg addr;
+ fprintf oc " stw %a, 4(%a)\n" ireg src2 ireg addr
| _ ->
assert false
end;
@@ -375,11 +404,24 @@ let print_builtin_vstore oc chunk args =
let print_builtin_vstore_global oc chunk id ofs args =
fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
begin match args with
- | [src] ->
+ | [src] when chunk <> Mint64 ->
let tmp = if src = IR GPR11 then GPR12 else GPR11 in
fprintf oc " addis %a, %a, %a\n"
ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
print_builtin_vstore_common oc chunk tmp (Csymbol_low(id, ofs)) src
+ | [IR src1; IR src2] when chunk = Mint64 ->
+ let tmp =
+ if not (List.mem GPR12 [src1; src2]) then GPR12 else
+ if not (List.mem GPR11 [src1; src2]) then GPR11 else GPR10 in
+ fprintf oc " addis %a, %a, %a\n"
+ ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
+ fprintf oc " stw %a, %a(%a)\n"
+ ireg src1 constant (Csymbol_low(id, ofs)) ireg tmp;
+ let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in
+ fprintf oc " addis %a, %a, %a\n"
+ ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
+ fprintf oc " stw %a, %a(%a)\n"
+ ireg src2 constant (Csymbol_low(id, ofs)) ireg tmp
| _ ->
assert false
end;
@@ -389,47 +431,84 @@ let print_builtin_vstore_global oc chunk id ofs args =
let print_builtin_inline oc name args res =
fprintf oc "%s begin builtin %s\n" comment name;
- (* Can use as temporaries: GPR0, GPR11, GPR12, FPR0, FPR12, FPR13 *)
+ (* Can use as temporaries: GPR0, FPR13 *)
begin match name, args, res with
(* Integer arithmetic *)
- | "__builtin_mulhw", [IR a1; IR a2], IR res ->
+ | "__builtin_mulhw", [IR a1; IR a2], [IR res] ->
fprintf oc " mulhw %a, %a, %a\n" ireg res ireg a1 ireg a2
- | "__builtin_mulhwu", [IR a1; IR a2], IR res ->
+ | "__builtin_mulhwu", [IR a1; IR a2], [IR res] ->
fprintf oc " mulhwu %a, %a, %a\n" ireg res ireg a1 ireg a2
- | "__builtin_cntlz", [IR a1], IR res ->
+ | "__builtin_cntlz", [IR a1], [IR res] ->
fprintf oc " cntlzw %a, %a\n" ireg res ireg a1
- | "__builtin_bswap", [IR a1], IR res ->
+ | "__builtin_bswap", [IR a1], [IR res] ->
fprintf oc " stwu %a, -8(%a)\n" ireg a1 ireg GPR1;
fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg GPR1;
fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1
(* Float arithmetic *)
- | "__builtin_fmadd", [FR a1; FR a2; FR a3], FR res ->
+ | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] ->
fprintf oc " fmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
- | "__builtin_fmsub", [FR a1; FR a2; FR a3], FR res ->
+ | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] ->
fprintf oc " fmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
- | "__builtin_fnmadd", [FR a1; FR a2; FR a3], FR res ->
+ | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] ->
fprintf oc " fnmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
- | "__builtin_fnmsub", [FR a1; FR a2; FR a3], FR res ->
+ | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] ->
fprintf oc " fnmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
- | "__builtin_fabs", [FR a1], FR res ->
+ | "__builtin_fabs", [FR a1], [FR res] ->
fprintf oc " fabs %a, %a\n" freg res freg a1
- | "__builtin_fsqrt", [FR a1], FR res ->
+ | "__builtin_fsqrt", [FR a1], [FR res] ->
fprintf oc " fsqrt %a, %a\n" freg res freg a1
- | "__builtin_frsqrte", [FR a1], FR res ->
+ | "__builtin_frsqrte", [FR a1], [FR res] ->
fprintf oc " frsqrte %a, %a\n" freg res freg a1
- | "__builtin_fres", [FR a1], FR res ->
+ | "__builtin_fres", [FR a1], [FR res] ->
fprintf oc " fres %a, %a\n" freg res freg a1
- | "__builtin_fsel", [FR a1; FR a2; FR a3], FR res ->
+ | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] ->
fprintf oc " fsel %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
- | "__builtin_fcti", [FR a1], IR res ->
+ | "__builtin_fcti", [FR a1], [IR res] ->
fprintf oc " fctiw %a, %a\n" freg FPR13 freg a1;
fprintf oc " stfdu %a, -8(%a)\n" freg FPR13 ireg GPR1;
fprintf oc " lwz %a, 4(%a)\n" ireg res ireg GPR1;
fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1
+ (* 64-bit integer arithmetic *)
+ | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
+ if rl = ah then begin
+ fprintf oc " subfic %a, %a, 0\n" ireg GPR0 ireg al;
+ fprintf oc " subfze %a, %a\n" ireg rh ireg ah;
+ fprintf oc " mr %a, %a\n" ireg rl ireg GPR0
+ end else begin
+ fprintf oc " subfic %a, %a, 0\n" ireg rl ireg al;
+ fprintf oc " subfze %a, %a\n" ireg rh ireg ah
+ end
+ | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+ if rl = ah || rl = bh then begin
+ fprintf oc " addc %a, %a, %a\n" ireg GPR0 ireg al ireg bl;
+ fprintf oc " adde %a, %a, %a\n" ireg rh ireg ah ireg bh;
+ fprintf oc " mr %a, %a\n" ireg rl ireg GPR0
+ end else begin
+ fprintf oc " addc %a, %a, %a\n" ireg rl ireg al ireg bl;
+ fprintf oc " adde %a, %a, %a\n" ireg rh ireg ah ireg bh
+ end
+ | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+ if rl = ah || rl = bh then begin
+ fprintf oc " subfc %a, %a, %a\n" ireg GPR0 ireg bl ireg al;
+ fprintf oc " subfe %a, %a, %a\n" ireg rh ireg bh ireg ah;
+ fprintf oc " mr %a, %a\n" ireg rl ireg GPR0
+ end else begin
+ fprintf oc " subfc %a, %a, %a\n" ireg rl ireg bl ireg al;
+ fprintf oc " subfe %a, %a, %a\n" ireg rh ireg bh ireg ah
+ end
+ | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] ->
+ if rl = a || rl = b then begin
+ fprintf oc " mullw %a, %a, %a\n" ireg GPR0 ireg a ireg b;
+ fprintf oc " mulhwu %a, %a, %a\n" ireg rh ireg a ireg b;
+ fprintf oc " mr %a, %a\n" ireg rl ireg GPR0
+ end else begin
+ fprintf oc " mullw %a, %a, %a\n" ireg rl ireg a ireg b;
+ fprintf oc " mulhwu %a, %a, %a\n" ireg rh ireg a ireg b
+ end
(* Memory accesses *)
- | "__builtin_read16_reversed", [IR a1], IR res ->
+ | "__builtin_read16_reversed", [IR a1], [IR res] ->
fprintf oc " lhbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1
- | "__builtin_read32_reversed", [IR a1], IR res ->
+ | "__builtin_read32_reversed", [IR a1], [IR res] ->
fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1
| "__builtin_write16_reversed", [IR a1; IR a2], _ ->
fprintf oc " sthbrx %a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1
@@ -484,9 +563,9 @@ let print_instruction oc tbl pc fallthrough = function
if adj >= -0x8000l then
fprintf oc " stwu %a, %ld(%a)\n" ireg GPR1 adj ireg GPR1
else begin
- fprintf oc " addis %a, 0, %ld\n" ireg GPR12 (Int32.shift_right_logical adj 16);
- fprintf oc " ori %a, %a, %ld\n" ireg GPR12 ireg GPR12 (Int32.logand adj 0xFFFFl);
- fprintf oc " stwux %a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR12
+ fprintf oc " addis %a, 0, %ld\n" ireg GPR0 (Int32.shift_right_logical adj 16);
+ fprintf oc " ori %a, %a, %ld\n" ireg GPR0 ireg GPR0 (Int32.logand adj 0xFFFFl);
+ fprintf oc " stwux %a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR0
end
| Pand_(r1, r2, r3) ->
fprintf oc " and. %a, %a, %a\n" ireg r1 ireg r2 ireg r3
@@ -860,7 +939,7 @@ let print_function oc name code =
(* Generation of stub functions *)
-let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$"
+let re_variadic_stub = Str.regexp "\\(.*\\)\\$[ifl]*$"
(* Stubs for EABI *)
@@ -908,6 +987,11 @@ let print_init oc = function
fprintf oc " .short %ld\n" (camlint_of_coqint n)
| Init_int32 n ->
fprintf oc " .long %ld\n" (camlint_of_coqint n)
+ | Init_int64 n ->
+ let b = camlint64_of_coqint n in
+ fprintf oc " .long 0x%Lx, 0x%Lx\n"
+ (Int64.shift_right_logical b 32)
+ (Int64.logand b 0xFFFFFFFFL)
| Init_float32 n ->
fprintf oc " .long 0x%lx %s %.18g\n"
(camlint_of_coqint (Floats.Float.bits_of_single n))
diff --git a/powerpc/PrintOp.ml b/powerpc/PrintOp.ml
index a2f9ff83..cac8cd2b 100644
--- a/powerpc/PrintOp.ml
+++ b/powerpc/PrintOp.ml
@@ -96,6 +96,9 @@ let print_operation reg pp = function
| Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1
| Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
| Ofloatofwords, [r1;r2] -> fprintf pp "floatofwords(%a,%a)" reg r1 reg r2
+ | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
+ | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
+ | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
| Ocmp c, args -> print_condition reg pp (c, args)
| _ -> fprintf pp "<bad operator>"
diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp
index 905a4489..a0118477 100644
--- a/powerpc/SelectOp.vp
+++ b/powerpc/SelectOp.vp
@@ -200,6 +200,8 @@ Nondetfunction mul (e1: expr) (e2: expr) :=
(** ** Bitwise and, or, xor *)
Nondetfunction andimm (n1: int) (e2: expr) :=
+ if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil else
+ if Int.eq n1 Int.mone then e2 else
match e2 with
| Eop (Ointconst n2) Enil =>
Eop (Ointconst (Int.and n1 n2)) Enil
@@ -240,6 +242,8 @@ Definition same_expr_pure (e1 e2: expr) :=
end.
Nondetfunction orimm (n1: int) (e2: expr) :=
+ if Int.eq n1 Int.zero then e2 else
+ if Int.eq n1 Int.mone then Eop (Ointconst Int.mone) Enil else
match e2 with
| Eop (Ointconst n2) Enil => Eop (Ointconst (Int.or n1 n2)) Enil
| Eop (Oorimm n2) (t2:::Enil) => Eop (Oorimm (Int.or n1 n2)) (t2:::Enil)
@@ -268,6 +272,7 @@ Nondetfunction or (e1: expr) (e2: expr) :=
end.
Nondetfunction xorimm (n1: int) (e2: expr) :=
+ if Int.eq n1 Int.zero then e2 else
match e2 with
| Eop (Ointconst n2) Enil => Eop (Ointconst (Int.xor n1 n2)) Enil
| Eop (Oxorimm n2) (t2:::Enil) => Eop (Oxorimm (Int.xor n1 n2)) (t2:::Enil)
@@ -444,12 +449,18 @@ Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil).
(** ** Recognition of addressing modes for load and store operations *)
+Definition can_use_Aindexed2 (chunk: memory_chunk): bool :=
+ match chunk with Mint64 => false | _ => true end.
+
Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
match e with
| Eop (Oaddrsymbol s n) Enil => (Aglobal s n, Enil)
| Eop (Oaddrstack n) Enil => (Ainstack n, Enil)
| Eop Oadd (Eop (Oaddrsymbol s n) Enil ::: e2 ::: Enil) => (Abased s n, e2:::Enil)
| Eop (Oaddimm n) (e1:::Enil) => (Aindexed n, e1:::Enil)
- | Eop Oadd (e1:::e2:::Enil) => (Aindexed2, e1:::e2:::Enil)
+ | Eop Oadd (e1:::e2:::Enil) =>
+ if can_use_Aindexed2 chunk
+ then (Aindexed2, e1:::e2:::Enil)
+ else (Aindexed Int.zero, Eop Oadd (e1:::e2:::Enil) ::: Enil)
| _ => (Aindexed Int.zero, e:::Enil)
end.
diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v
index 7be88580..f7513140 100644
--- a/powerpc/SelectOpproof.v
+++ b/powerpc/SelectOpproof.v
@@ -27,13 +27,6 @@ Require Import SelectOp.
Open Local Scope cminorsel_scope.
-Section CMCONSTR.
-
-Variable ge: genv.
-Variable sp: val.
-Variable e: env.
-Variable m: mem.
-
(** * Useful lemmas and tactics *)
(** The following are trivial lemmas and custom tactics that help
@@ -81,6 +74,13 @@ Ltac TrivialExists :=
(** * Correctness of the smart constructors *)
+Section CMCONSTR.
+
+Variable ge: genv.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
(** We now show that the code generated by "smart constructor" functions
such as [SelectOp.notint] behaves as expected. Continuing the
[notint] example, we show that if the expression [e]
@@ -163,7 +163,7 @@ Proof.
rewrite Int.add_commut. auto.
unfold symbol_address. destruct (Genv.find_symbol ge s); simpl; auto. rewrite Int.add_commut; auto.
rewrite Val.add_assoc. rewrite Int.add_commut. auto.
- subst x. rewrite Val.add_assoc. rewrite Int.add_commut. auto.
+ subst. rewrite Val.add_assoc. rewrite Int.add_commut. auto.
Qed.
Theorem eval_add: binary_constructor_sound add Val.add.
@@ -327,7 +327,15 @@ Qed.
Theorem eval_andimm:
forall n, unary_constructor_sound (andimm n) (fun x => Val.and x (Vint n)).
Proof.
- intros; red; intros until x. unfold andimm. case (andimm_match a); intros.
+ intros; red; intros until x. unfold andimm.
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ intros. subst. exists (Vint Int.zero); split. EvalOp.
+ destruct x; simpl; auto. rewrite Int.and_zero; auto.
+ predSpec Int.eq Int.eq_spec n Int.mone.
+ intros. subst. exists x; split. auto.
+ destruct x; simpl; auto. rewrite Int.and_mone; auto.
+ clear H H0.
+ case (andimm_match a); intros.
InvEval. TrivialExists. simpl. rewrite Int.and_commut; auto.
set (n' := Int.and n n2).
destruct (Int.eq (Int.shru (Int.shl n' amount) amount) n' &&
@@ -376,8 +384,12 @@ Qed.
Theorem eval_orimm:
forall n, unary_constructor_sound (orimm n) (fun x => Val.or x (Vint n)).
Proof.
- intros; red; intros until x.
- unfold orimm. destruct (orimm_match a); intros; InvEval.
+ intros; red; intros until x. unfold orimm.
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ intros. subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.or_zero; auto.
+ predSpec Int.eq Int.eq_spec n Int.mone.
+ intros. subst. exists (Vint Int.mone); split. EvalOp. destruct x; simpl; auto. rewrite Int.or_mone; auto.
+ clear H H0. destruct (orimm_match a); intros; InvEval.
TrivialExists. simpl. rewrite Int.or_commut; auto.
subst. rewrite Val.or_assoc. simpl. rewrite Int.or_commut. TrivialExists.
TrivialExists.
@@ -433,8 +445,10 @@ Qed.
Theorem eval_xorimm:
forall n, unary_constructor_sound (xorimm n) (fun x => Val.xor x (Vint n)).
Proof.
- intros; red; intros until x.
- unfold xorimm. destruct (xorimm_match a); intros; InvEval.
+ intros; red; intros until x. unfold xorimm.
+ predSpec Int.eq Int.eq_spec n Int.zero.
+ intros. subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.xor_zero; auto.
+ clear H. destruct (xorimm_match a); intros; InvEval.
TrivialExists. simpl. rewrite Int.xor_commut; auto.
subst. rewrite Val.xor_assoc. simpl. rewrite Int.xor_commut. TrivialExists.
TrivialExists.
@@ -445,9 +459,9 @@ Proof.
red; intros until y; unfold xor; case (xor_match a b); intros; InvEval.
rewrite Val.xor_commut. apply eval_xorimm; auto.
apply eval_xorimm; auto.
- subst x. rewrite Val.xor_commut. rewrite Val.not_xor. rewrite <- Val.xor_assoc.
+ subst. rewrite Val.xor_commut. rewrite Val.not_xor. rewrite <- Val.xor_assoc.
rewrite <- Val.not_xor. rewrite Val.xor_commut. TrivialExists.
- subst y. rewrite Val.not_xor. rewrite <- Val.xor_assoc. rewrite <- Val.not_xor. TrivialExists.
+ subst. rewrite Val.not_xor. rewrite <- Val.xor_assoc. rewrite <- Val.not_xor. TrivialExists.
TrivialExists.
Qed.
@@ -843,7 +857,11 @@ Proof.
exists (@nil val). split. eauto with evalexpr. simpl. auto.
exists (v0 :: nil). split. eauto with evalexpr. simpl. congruence.
exists (v1 :: nil). split. eauto with evalexpr. simpl. congruence.
+ destruct (can_use_Aindexed2 chunk).
exists (v1 :: v0 :: nil). split. eauto with evalexpr. simpl. congruence.
+ exists (Vptr b ofs :: nil). split.
+ constructor. EvalOp. simpl; congruence. constructor.
+ simpl. rewrite Int.add_zero. auto.
exists (v :: nil). split. eauto with evalexpr. subst v. simpl.
rewrite Int.add_zero. auto.
Qed.
diff --git a/powerpc/eabi/Conventions1.v b/powerpc/eabi/Conventions1.v
index 652f4a1b..8ff0dc01 100644
--- a/powerpc/eabi/Conventions1.v
+++ b/powerpc/eabi/Conventions1.v
@@ -21,21 +21,18 @@ Require Import Locations.
(** Machine registers (type [mreg] in module [Locations]) are divided in
the following groups:
-- Temporaries used for spilling, reloading, and parallel move operations.
-- Allocatable registers, that can be assigned to RTL pseudo-registers.
- These are further divided into:
--- Callee-save registers, whose value is preserved across a function call.
--- Caller-save registers that can be modified during a function call.
+- Callee-save registers, whose value is preserved across a function call.
+- Caller-save registers that can be modified during a function call.
We follow the PowerPC/EABI application binary interface (ABI) in our choice
of callee- and caller-save registers.
*)
Definition int_caller_save_regs :=
- R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil.
+ R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: R11 :: R12 :: nil.
Definition float_caller_save_regs :=
- F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: F11 :: nil.
+ F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: F11 :: F12 :: F13 :: nil.
Definition int_callee_save_regs :=
R31 :: R30 :: R29 :: R28 :: R27 :: R26 :: R25 :: R24 :: R23 ::
@@ -45,26 +42,11 @@ Definition float_callee_save_regs :=
F31 :: F30 :: F29 :: F28 :: F27 :: F26 :: F25 :: F24 :: F23 ::
F22 :: F21 :: F20 :: F19 :: F18 :: F17 :: F16 :: F15 :: F14 :: nil.
-Definition destroyed_at_call_regs :=
- int_caller_save_regs ++ float_caller_save_regs.
-
Definition destroyed_at_call :=
- List.map R destroyed_at_call_regs.
-
-Definition int_temporaries := IT1 :: IT2 :: nil.
-
-Definition float_temporaries := FT1 :: FT2 :: FT3 :: nil.
-
-Definition temporary_regs := int_temporaries ++ float_temporaries.
-
-Definition temporaries := List.map R temporary_regs.
-
-Definition destroyed_at_move_regs: list mreg := nil.
-
-Definition destroyed_at_move := List.map R destroyed_at_move_regs.
+ int_caller_save_regs ++ float_caller_save_regs.
Definition dummy_int_reg := R3. (**r Used in [Coloring]. *)
-Definition dummy_float_reg := F1. (**r Used in [Coloring]. *)
+Definition dummy_float_reg := F0. (**r Used in [Coloring]. *)
(** The [index_int_callee_save] and [index_float_callee_save] associate
a unique positive integer to callee-save registers. This integer is
@@ -175,34 +157,26 @@ Qed.
Lemma register_classification:
forall r,
- (In (R r) temporaries \/ In (R r) destroyed_at_call) \/
- (In r int_callee_save_regs \/ In r float_callee_save_regs).
+ In r destroyed_at_call \/ In r int_callee_save_regs \/ In r float_callee_save_regs.
Proof.
destruct r;
- try (left; left; simpl; OrEq);
- try (left; right; simpl; OrEq);
+ try (left; simpl; OrEq);
try (right; left; simpl; OrEq);
try (right; right; simpl; OrEq).
Qed.
Lemma int_callee_save_not_destroyed:
forall r,
- In (R r) temporaries \/ In (R r) destroyed_at_call ->
- ~(In r int_callee_save_regs).
+ In r destroyed_at_call -> In r int_callee_save_regs -> False.
Proof.
- intros; red; intros. elim H.
- generalize H0. simpl; ElimOrEq; NotOrEq.
- generalize H0. simpl; ElimOrEq; NotOrEq.
+ intros. revert H0 H. simpl. ElimOrEq; NotOrEq.
Qed.
Lemma float_callee_save_not_destroyed:
forall r,
- In (R r) temporaries \/ In (R r) destroyed_at_call ->
- ~(In r float_callee_save_regs).
+ In r destroyed_at_call -> In r float_callee_save_regs -> False.
Proof.
- intros; red; intros. elim H.
- generalize H0. simpl; ElimOrEq; NotOrEq.
- generalize H0. simpl; ElimOrEq; NotOrEq.
+ intros. revert H0 H. simpl. ElimOrEq; NotOrEq.
Qed.
Lemma int_callee_save_type:
@@ -259,39 +233,27 @@ Qed.
(** ** Location of function result *)
(** The result value of a function is passed back to the caller in
- registers [R3] or [F1], depending on the type of the returned value.
+ registers [R3] or [F1] or [R3, R4], depending on the type of the returned value.
We treat a function without result as a function with one integer result. *)
-Definition loc_result (s: signature) : mreg :=
+Definition loc_result (s: signature) : list mreg :=
match s.(sig_res) with
- | None => R3
- | Some Tint => R3
- | Some Tfloat => F1
+ | None => R3 :: nil
+ | Some Tint => R3 :: nil
+ | Some Tfloat => F1 :: nil
+ | Some Tlong => R3 :: R4 :: nil
end.
-(** The result location has the type stated in the signature. *)
-
-Lemma loc_result_type:
- forall sig,
- mreg_type (loc_result sig) =
- match sig.(sig_res) with None => Tint | Some ty => ty end.
-Proof.
- intros; unfold loc_result.
- destruct (sig_res sig).
- destruct t; reflexivity.
- reflexivity.
-Qed.
-
-(** The result location is a caller-save register or a temporary *)
+(** The result location is a caller-save register *)
Lemma loc_result_caller_save:
- forall (s: signature),
- In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries.
+ forall (s: signature) (r: mreg),
+ In r (loc_result s) -> In r destroyed_at_call.
Proof.
- intros; unfold loc_result. left;
- destruct (sig_res s).
- destruct t; simpl; OrEq.
- simpl; OrEq.
+ intros.
+ assert (r = R3 \/ r = R4 \/ r = F1).
+ unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition.
+ destruct H0 as [A | [A | A]]; subst r; simpl; OrEq.
Qed.
(** ** Location of function arguments *)
@@ -300,213 +262,162 @@ Qed.
to a function:
- The first 8 integer arguments are passed in registers [R3] to [R10].
- The first 8 float arguments are passed in registers [F1] to [F8].
+- The first 4 long integer arguments are passed in register pairs [R3,R4] ... [R9,R10].
- Extra arguments are passed on the stack, in [Outgoing] slots, consecutively
assigned (1 word for an integer argument, 2 words for a float),
starting at word offset 0.
- No stack space is reserved for the arguments that are passed in registers.
*)
+Definition int_param_regs :=
+ R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil.
+Definition float_param_regs :=
+ F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: nil.
+
Fixpoint loc_arguments_rec
- (tyl: list typ) (iregl: list mreg) (fregl: list mreg)
- (ofs: Z) {struct tyl} : list loc :=
+ (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list loc :=
match tyl with
| nil => nil
| Tint :: tys =>
- match iregl with
- | nil =>
- S (Outgoing ofs Tint) :: loc_arguments_rec tys nil fregl (ofs + 1)
- | ireg :: iregs =>
- R ireg :: loc_arguments_rec tys iregs fregl ofs
+ match list_nth_z int_param_regs ir with
+ | None =>
+ S Outgoing ofs Tint :: loc_arguments_rec tys ir fr (ofs + 1)
+ | Some ireg =>
+ R ireg :: loc_arguments_rec tys (ir + 1) fr ofs
end
| Tfloat :: tys =>
- match fregl with
- | nil =>
- S (Outgoing ofs Tfloat) :: loc_arguments_rec tys iregl nil (ofs + 2)
- | freg :: fregs =>
- R freg :: loc_arguments_rec tys iregl fregs ofs
+ match list_nth_z float_param_regs fr with
+ | None =>
+ S Outgoing ofs Tfloat :: loc_arguments_rec tys ir fr (ofs + 2)
+ | Some freg =>
+ R freg :: loc_arguments_rec tys ir (fr + 1) ofs
+ end
+ | Tlong :: tys =>
+ let ir := align ir 2 in
+ match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with
+ | Some r1, Some r2 =>
+ R r1 :: R r2 :: loc_arguments_rec tys (ir + 2) fr ofs
+ | _, _ =>
+ S Outgoing ofs Tint :: S Outgoing (ofs + 1) Tint :: loc_arguments_rec tys ir fr (ofs + 2)
end
end.
-Definition int_param_regs :=
- R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil.
-Definition float_param_regs :=
- F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: nil.
-
(** [loc_arguments s] returns the list of locations where to store arguments
when calling a function with signature [s]. *)
Definition loc_arguments (s: signature) : list loc :=
- loc_arguments_rec s.(sig_args) int_param_regs float_param_regs 0.
+ loc_arguments_rec s.(sig_args) 0 0 0.
(** [size_arguments s] returns the number of [Outgoing] slots used
to call a function with signature [s]. *)
-Fixpoint size_arguments_rec
- (tyl: list typ) (iregl: list mreg) (fregl: list mreg)
- (ofs: Z) {struct tyl} : Z :=
+Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
match tyl with
| nil => ofs
| Tint :: tys =>
- match iregl with
- | nil => size_arguments_rec tys nil fregl (ofs + 1)
- | ireg :: iregs => size_arguments_rec tys iregs fregl ofs
+ match list_nth_z int_param_regs ir with
+ | None => size_arguments_rec tys ir fr (ofs + 1)
+ | Some ireg => size_arguments_rec tys (ir + 1) fr ofs
end
| Tfloat :: tys =>
- match fregl with
- | nil => size_arguments_rec tys iregl nil (ofs + 2)
- | freg :: fregs => size_arguments_rec tys iregl fregs ofs
+ match list_nth_z float_param_regs fr with
+ | None => size_arguments_rec tys ir fr (ofs + 2)
+ | Some freg => size_arguments_rec tys ir (fr + 1) ofs
+ end
+ | Tlong :: tys =>
+ let ir := align ir 2 in
+ match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with
+ | Some r1, Some r2 => size_arguments_rec tys (ir + 2) fr ofs
+ | _, _ => size_arguments_rec tys ir fr (ofs + 2)
end
end.
+
Definition size_arguments (s: signature) : Z :=
- size_arguments_rec s.(sig_args) int_param_regs float_param_regs 0.
+ size_arguments_rec s.(sig_args) 0 0 0.
(** A tail-call is possible for a signature if the corresponding
arguments are all passed in registers. *)
Definition tailcall_possible (s: signature) : Prop :=
forall l, In l (loc_arguments s) ->
- match l with R _ => True | S _ => False end.
+ match l with R _ => True | S _ _ _ => False end.
-(** Argument locations are either non-temporary registers or [Outgoing]
+(** Argument locations are either caller-save registers or [Outgoing]
stack slots at nonnegative offsets. *)
Definition loc_argument_acceptable (l: loc) : Prop :=
match l with
- | R r => ~(In l temporaries)
- | S (Outgoing ofs ty) => ofs >= 0
+ | R r => In r destroyed_at_call
+ | S Outgoing ofs ty => ofs >= 0 /\ ty <> Tlong
| _ => False
end.
Remark loc_arguments_rec_charact:
- forall tyl iregl fregl ofs l,
- In l (loc_arguments_rec tyl iregl fregl ofs) ->
+ forall tyl ir fr ofs l,
+ In l (loc_arguments_rec tyl ir fr ofs) ->
match l with
- | R r => In r iregl \/ In r fregl
- | S (Outgoing ofs' ty) => ofs' >= ofs
- | S _ => False
+ | R r => In r int_param_regs \/ In r float_param_regs
+ | S Outgoing ofs' ty => ofs' >= ofs /\ ty <> Tlong
+ | S _ _ _ => False
end.
Proof.
+Opaque list_nth_z.
induction tyl; simpl loc_arguments_rec; intros.
elim H.
- destruct a.
- destruct iregl; elim H; intro.
- subst l. omega.
- generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega.
- subst l. auto with coqlib.
- generalize (IHtyl _ _ _ _ H0). destruct l; auto. simpl; intuition.
- destruct fregl; elim H; intro.
- subst l. omega.
- generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega.
- subst l. auto with coqlib.
- generalize (IHtyl _ _ _ _ H0). destruct l; auto.
- intros [A|B]. left; auto. right; auto with coqlib.
+ destruct a.
+ destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H.
+ subst. left. eapply list_nth_z_in; eauto.
+ eapply IHtyl; eauto.
+ subst. split. omega. congruence.
+ exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+ destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H.
+ subst. right. eapply list_nth_z_in; eauto.
+ eapply IHtyl; eauto.
+ subst. split. omega. congruence.
+ exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+ set (ir' := align ir 2) in *.
+ destruct (list_nth_z int_param_regs ir') as [r1|] eqn:E1.
+ destruct (list_nth_z int_param_regs (ir' + 1)) as [r2|] eqn:E2.
+ destruct H. subst; left; eapply list_nth_z_in; eauto.
+ destruct H. subst; left; eapply list_nth_z_in; eauto.
+ eapply IHtyl; eauto.
+ destruct H. subst. split. omega. congruence.
+ destruct H. subst. split. omega. congruence.
+ exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
+ destruct H. subst. split. omega. congruence.
+ destruct H. subst. split. omega. congruence.
+ exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega.
Qed.
Lemma loc_arguments_acceptable:
- forall (s: signature) (r: loc),
- In r (loc_arguments s) -> loc_argument_acceptable r.
+ forall (s: signature) (l: loc),
+ In l (loc_arguments s) -> loc_argument_acceptable l.
Proof.
unfold loc_arguments; intros.
generalize (loc_arguments_rec_charact _ _ _ _ _ H).
- destruct r.
- intro H0; elim H0. simpl. unfold not. ElimOrEq; NotOrEq.
- simpl. unfold not. ElimOrEq; NotOrEq.
- destruct s0; try contradiction.
- simpl. omega.
+ destruct l.
+ intro H0; elim H0; simpl; ElimOrEq; OrEq.
+ destruct sl; try contradiction. simpl. intuition omega.
Qed.
Hint Resolve loc_arguments_acceptable: locs.
-(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *)
-
-Remark loc_arguments_rec_notin_reg:
- forall tyl iregl fregl ofs r,
- ~(In r iregl) -> ~(In r fregl) ->
- Loc.notin (R r) (loc_arguments_rec tyl iregl fregl ofs).
-Proof.
- induction tyl; simpl; intros.
- auto.
- destruct a.
- destruct iregl; simpl. auto.
- simpl in H. split. apply sym_not_equal. tauto.
- apply IHtyl. tauto. tauto.
- destruct fregl; simpl. auto.
- simpl in H0. split. apply sym_not_equal. tauto.
- apply IHtyl.
- red; intro. apply H. auto.
- tauto.
-Qed.
-
-Remark loc_arguments_rec_notin_local:
- forall tyl iregl fregl ofs ofs0 ty0,
- Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs).
-Proof.
- induction tyl; simpl; intros.
- auto.
- destruct a.
- destruct iregl; simpl; auto.
- destruct fregl; simpl; auto.
-Qed.
-
-Remark loc_arguments_rec_notin_outgoing:
- forall tyl iregl fregl ofs ofs0 ty0,
- ofs0 + typesize ty0 <= ofs ->
- Loc.notin (S (Outgoing ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs).
-Proof.
- induction tyl; simpl; intros.
- auto.
- destruct a.
- destruct iregl; simpl.
- split. omega. eapply IHtyl. omega.
- auto.
- destruct fregl; simpl.
- split. omega. eapply IHtyl. omega.
- auto.
-Qed.
-
-Lemma loc_arguments_norepet:
- forall (s: signature), Loc.norepet (loc_arguments s).
-Proof.
- assert (forall tyl iregl fregl ofs,
- list_norepet iregl ->
- list_norepet fregl ->
- list_disjoint iregl fregl ->
- Loc.norepet (loc_arguments_rec tyl iregl fregl ofs)).
- induction tyl; simpl; intros.
- constructor.
- destruct a.
- destruct iregl; constructor.
- apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
- apply loc_arguments_rec_notin_reg. inversion H. auto.
- apply list_disjoint_notin with (m :: iregl); auto with coqlib.
- apply IHtyl. inv H; auto. auto.
- eapply list_disjoint_cons_left; eauto.
- destruct fregl; constructor.
- apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
- apply loc_arguments_rec_notin_reg.
- red; intro. apply (H1 m m). auto.
- auto with coqlib. auto. inv H0; auto.
- apply IHtyl. auto.
- inv H0; auto.
- red; intros. apply H1. auto. auto with coqlib.
-
- intro. unfold loc_arguments. apply H.
- unfold int_param_regs. NoRepet.
- unfold float_param_regs. NoRepet.
- red; intros x y; simpl. ElimOrEq; ElimOrEq; discriminate.
-Qed.
-
(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
Remark size_arguments_rec_above:
- forall tyl iregl fregl ofs0,
- ofs0 <= size_arguments_rec tyl iregl fregl ofs0.
+ forall tyl ir fr ofs0,
+ ofs0 <= size_arguments_rec tyl ir fr ofs0.
Proof.
induction tyl; simpl; intros.
omega.
destruct a.
- destruct iregl. apply Zle_trans with (ofs0 + 1); auto; omega. auto.
- destruct fregl. apply Zle_trans with (ofs0 + 2); auto; omega. auto.
+ destruct (list_nth_z int_param_regs ir); eauto. apply Zle_trans with (ofs0 + 1); auto; omega.
+ destruct (list_nth_z float_param_regs fr); eauto. apply Zle_trans with (ofs0 + 2); auto; omega.
+ set (ir' := align ir 2).
+ destruct (list_nth_z int_param_regs ir'); eauto.
+ destruct (list_nth_z int_param_regs (ir' + 1)); eauto.
+ apply Zle_trans with (ofs0 + 2); auto; omega.
+ apply Zle_trans with (ofs0 + 2); auto; omega.
Qed.
Lemma size_arguments_above:
@@ -518,81 +429,39 @@ Qed.
Lemma loc_arguments_bounded:
forall (s: signature) (ofs: Z) (ty: typ),
- In (S (Outgoing ofs ty)) (loc_arguments s) ->
+ In (S Outgoing ofs ty) (loc_arguments s) ->
ofs + typesize ty <= size_arguments s.
Proof.
intros.
- assert (forall tyl iregl fregl ofs0,
- In (S (Outgoing ofs ty)) (loc_arguments_rec tyl iregl fregl ofs0) ->
- ofs + typesize ty <= size_arguments_rec tyl iregl fregl ofs0).
+ assert (forall tyl ir fr ofs0,
+ In (S Outgoing ofs ty) (loc_arguments_rec tyl ir fr ofs0) ->
+ ofs + typesize ty <= size_arguments_rec tyl ir fr ofs0).
induction tyl; simpl; intros.
elim H0.
- destruct a. destruct iregl; elim H0; intro.
- inv H1. simpl. apply size_arguments_rec_above. auto.
- discriminate. auto.
- destruct fregl; elim H0; intro.
- inv H1. simpl. apply size_arguments_rec_above. auto.
- discriminate. auto.
- unfold size_arguments. eapply H0. unfold loc_arguments in H. eauto.
-Qed.
-
-(** Temporary registers do not overlap with argument locations. *)
-
-Lemma loc_arguments_not_temporaries:
- forall sig, Loc.disjoint (loc_arguments sig) temporaries.
-Proof.
- intros; red; intros x1 x2 H.
- generalize (loc_arguments_rec_charact _ _ _ _ _ H).
- destruct x1.
- intro H0; elim H0; simpl; (ElimOrEq; ElimOrEq; congruence).
- destruct s; try contradiction. intro.
- simpl; ElimOrEq; auto.
-Qed.
-Hint Resolve loc_arguments_not_temporaries: locs.
-
-(** Argument registers are caller-save. *)
-
-Lemma arguments_caller_save:
- forall sig r,
- In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call.
-Proof.
- unfold loc_arguments; intros.
- elim (loc_arguments_rec_charact _ _ _ _ _ H); simpl.
- ElimOrEq; intuition.
- ElimOrEq; intuition.
-Qed.
-
-(** Argument locations agree in number with the function signature. *)
-
-Lemma loc_arguments_length:
- forall sig,
- List.length (loc_arguments sig) = List.length sig.(sig_args).
-Proof.
- assert (forall tyl iregl fregl ofs,
- List.length (loc_arguments_rec tyl iregl fregl ofs) = List.length tyl).
- induction tyl; simpl; intros.
- auto.
- destruct a.
- destruct iregl; simpl; decEq; auto.
- destruct fregl; simpl; decEq; auto.
- intros. unfold loc_arguments. auto.
-Qed.
-
-(** Argument locations agree in types with the function signature. *)
-
-Lemma loc_arguments_type:
- forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args).
-Proof.
- assert (forall tyl iregl fregl ofs,
- (forall r, In r iregl -> mreg_type r = Tint) ->
- (forall r, In r fregl -> mreg_type r = Tfloat) ->
- List.map Loc.type (loc_arguments_rec tyl iregl fregl ofs) = tyl).
- induction tyl; simpl; intros.
- auto.
- destruct a; [destruct iregl|destruct fregl]; simpl;
- f_equal; eauto with coqlib.
-
- intros. unfold loc_arguments. apply H.
- intro; simpl. ElimOrEq; reflexivity.
- intro; simpl. ElimOrEq; reflexivity.
+ destruct a.
+ destruct (list_nth_z int_param_regs ir); destruct H0.
+ congruence.
+ eauto.
+ inv H0. apply size_arguments_rec_above.
+ eauto.
+ destruct (list_nth_z float_param_regs fr); destruct H0.
+ congruence.
+ eauto.
+ inv H0. apply size_arguments_rec_above.
+ eauto.
+ set (ir' := align ir 2) in *.
+ destruct (list_nth_z int_param_regs ir').
+ destruct (list_nth_z int_param_regs (ir' + 1)).
+ destruct H0. congruence. destruct H0. congruence. eauto.
+ destruct H0. inv H0.
+ transitivity (ofs + 2). simpl; omega. eauto. apply size_arguments_rec_above.
+ destruct H0. inv H0.
+ transitivity (ofs0 + 2). simpl; omega. eauto. apply size_arguments_rec_above.
+ eauto.
+ destruct H0. inv H0.
+ transitivity (ofs + 2). simpl; omega. eauto. apply size_arguments_rec_above.
+ destruct H0. inv H0.
+ transitivity (ofs0 + 2). simpl; omega. eauto. apply size_arguments_rec_above.
+ eauto.
+ eauto.
Qed.
diff --git a/powerpc/eabi/Stacklayout.v b/powerpc/eabi/Stacklayout.v
index cd4d9ae4..be823c1e 100644
--- a/powerpc/eabi/Stacklayout.v
+++ b/powerpc/eabi/Stacklayout.v
@@ -24,12 +24,8 @@ Require Import Bounds.
Since we would rather store our return address in our own
frame, we will not use these 4 bytes, and just reserve them.
- Space for outgoing arguments to function calls.
-- Local stack slots of integer type.
-- Saved return address into caller.
+- Local stack slots.
- Saved values of integer callee-save registers used by the function.
-- One word of padding, if necessary to align the following data
- on a 8-byte boundary.
-- Local stack slots of float type.
- Saved values of float callee-save registers used by the function.
- Space for the stack-allocated data declared in Cminor.
@@ -43,10 +39,9 @@ Record frame_env : Type := mk_frame_env {
fe_size: Z;
fe_ofs_link: Z;
fe_ofs_retaddr: Z;
- fe_ofs_int_local: Z;
+ fe_ofs_local: Z;
fe_ofs_int_callee_save: Z;
fe_num_int_callee_save: Z;
- fe_ofs_float_local: Z;
fe_ofs_float_callee_save: Z;
fe_num_float_callee_save: Z;
fe_stack_data: Z
@@ -56,17 +51,17 @@ Record frame_env : Type := mk_frame_env {
function. *)
Definition make_env (b: bounds) :=
- let oil := 8 + 4 * b.(bound_outgoing) in (* integer locals *)
- let ora := oil + 4 * b.(bound_int_local) in (* saved return address *)
+ let ol := align (8 + 4 * b.(bound_outgoing)) 8 in (* locals *)
+ let ora := ol + 4 * b.(bound_local) in (* saved return address *)
let oics := ora + 4 in (* integer callee-saves *)
let oendi := oics + 4 * b.(bound_int_callee_save) in
- let ofl := align oendi 8 in (* float locals *)
- let ofcs := ofl + 8 * b.(bound_float_local) in (* float callee-saves *)
+ let ofcs := align oendi 8 in (* float callee-saves *)
let ostkdata := ofcs + 8 * b.(bound_float_callee_save) in (* stack data *)
let sz := align (ostkdata + b.(bound_stack_data)) 16 in
mk_frame_env sz 0 ora
- oil oics b.(bound_int_callee_save)
- ofl ofcs b.(bound_float_callee_save)
+ ol
+ oics b.(bound_int_callee_save)
+ ofcs b.(bound_float_callee_save)
ostkdata.
(** Separation property *)
@@ -76,25 +71,25 @@ Remark frame_env_separated:
let fe := make_env b in
0 <= fe.(fe_ofs_link)
/\ fe.(fe_ofs_link) + 4 <= fe_ofs_arg
- /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_int_local)
- /\ fe.(fe_ofs_int_local) + 4 * b.(bound_int_local) <= fe.(fe_ofs_retaddr)
+ /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_local)
+ /\ fe.(fe_ofs_local) + 4 * b.(bound_local) <= fe.(fe_ofs_retaddr)
/\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_ofs_int_callee_save)
- /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_local)
- /\ fe.(fe_ofs_float_local) + 8 * b.(bound_float_local) <= fe.(fe_ofs_float_callee_save)
+ /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_callee_save)
/\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_stack_data)
- /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_size).
+ /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_size)
+ /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_size).
Proof.
intros.
+ generalize (align_le (8 + 4 * b.(bound_outgoing)) 8 (refl_equal _)).
generalize (align_le (fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)).
generalize (align_le (fe.(fe_stack_data) + b.(bound_stack_data)) 16 (refl_equal _)).
unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr,
- fe_ofs_int_local, fe_ofs_int_callee_save,
+ fe_ofs_local, fe_ofs_int_callee_save,
fe_num_int_callee_save,
- fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save,
+ fe_ofs_float_callee_save, fe_num_float_callee_save,
fe_stack_data, fe_ofs_arg.
intros.
- generalize (bound_int_local_pos b); intro;
- generalize (bound_float_local_pos b); intro;
+ generalize (bound_local_pos b); intro;
generalize (bound_int_callee_save_pos b); intro;
generalize (bound_float_callee_save_pos b); intro;
generalize (bound_outgoing_pos b); intro;
@@ -108,9 +103,8 @@ Remark frame_env_aligned:
forall b,
let fe := make_env b in
(4 | fe.(fe_ofs_link))
- /\ (4 | fe.(fe_ofs_int_local))
+ /\ (8 | fe.(fe_ofs_local))
/\ (4 | fe.(fe_ofs_int_callee_save))
- /\ (8 | fe.(fe_ofs_float_local))
/\ (8 | fe.(fe_ofs_float_callee_save))
/\ (4 | fe.(fe_ofs_retaddr))
/\ (8 | fe.(fe_stack_data))
@@ -118,24 +112,23 @@ Remark frame_env_aligned:
Proof.
intros.
unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr,
- fe_ofs_int_local, fe_ofs_int_callee_save,
+ fe_ofs_local, fe_ofs_int_callee_save,
fe_num_int_callee_save,
- fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save,
+ fe_ofs_float_callee_save, fe_num_float_callee_save,
fe_stack_data.
- set (x1 := 8 + 4 * bound_outgoing b).
- assert (4 | x1). unfold x1; apply Zdivide_plus_r. exists 2; auto. exists (bound_outgoing b); ring.
- set (x2 := x1 + 4 * bound_int_local b).
- assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. exists (bound_int_local b); ring.
+ set (x1 := align (8 + 4 * bound_outgoing b) 8).
+ assert (8 | x1). unfold x1; apply align_divides. omega.
+ set (x2 := x1 + 4 * bound_local b).
+ assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto.
+ apply Zdivides_trans with 8. exists 2; auto. auto.
+ exists (bound_local b); ring.
set (x3 := x2 + 4).
assert (4 | x3). unfold x3; apply Zdivide_plus_r; auto. exists 1; auto.
set (x4 := align (x3 + 4 * bound_int_callee_save b) 8).
assert (8 | x4). unfold x4. apply align_divides. omega.
- set (x5 := x4 + 8 * bound_float_local b).
- assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring.
- set (x6 := x5 + 8 * bound_float_callee_save b).
- assert (8 | x6).
- unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
- set (x7 := align (x6 + bound_stack_data b) 16).
- assert (16 | x7). unfold x7; apply align_divides. omega.
+ set (x5 := x4 + 8 * bound_float_callee_save b).
+ assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
+ set (x6 := align (x5 + bound_stack_data b) 16).
+ assert (16 | x6). unfold x6; apply align_divides. omega.
intuition.
Qed.