From 255cee09b71255051c2b40eae0c88bffce1f6f32 Mon Sep 17 00:00:00 2001 From: xleroy Date: Sat, 20 Apr 2013 07:54:52 +0000 Subject: Big merge of the newregalloc-int64 branch. Lots of changes in two directions: 1- new register allocator (+ live range splitting, spilling&reloading, etc) based on a posteriori validation using the Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long long"). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- powerpc/Asm.v | 79 ++++----- powerpc/Asmgen.v | 24 ++- powerpc/Asmgenproof.v | 64 +++---- powerpc/Asmgenproof1.v | 45 +++-- powerpc/ConstpropOp.vp | 4 + powerpc/ConstpropOpproof.v | 3 + powerpc/Machregs.v | 124 +++++++++---- powerpc/Machregsaux.ml | 11 +- powerpc/Op.v | 55 ++++-- powerpc/PrintAsm.ml | 154 ++++++++++++---- powerpc/PrintOp.ml | 3 + powerpc/SelectOp.vp | 13 +- powerpc/SelectOpproof.v | 48 +++-- powerpc/eabi/Conventions1.v | 417 +++++++++++++++----------------------------- powerpc/eabi/Stacklayout.v | 67 ++++--- 15 files changed, 599 insertions(+), 512 deletions(-) (limited to 'powerpc') diff --git a/powerpc/Asm.v b/powerpc/Asm.v index 27e801a7..115d8467 100644 --- a/powerpc/Asm.v +++ b/powerpc/Asm.v @@ -23,7 +23,7 @@ Require Import Events. Require Import Globalenvs. Require Import Smallstep. Require Import Locations. -Require Stacklayout. +Require Import Stacklayout. Require Import Conventions. (** * Abstract syntax *) @@ -222,7 +222,7 @@ Inductive instruction : Type := | Pxori: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate *) | Pxoris: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate high *) | Plabel: label -> instruction (**r define a code label *) - | Pbuiltin: external_function -> list preg -> preg -> instruction (**r built-in function *) + | Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function *) | Pannot: external_function -> list annot_param -> instruction (**r annotation statement *) with annot_param : Type := @@ -324,6 +324,14 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset := | r :: l' => undef_regs l' (rs#r <- Vundef) end. +(** Assigning multiple registers *) + +Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset := + match rl, vl with + | r1 :: rl', v1 :: vl' => set_regs rl' vl' (rs#r1 <- v1) + | _, _ => rs + end. + Section RELSEM. (** Looking up instructions in a code sequence by position. *) @@ -777,45 +785,45 @@ Definition preg_of (r: mreg) : preg := match r with | R3 => GPR3 | R4 => GPR4 | R5 => GPR5 | R6 => GPR6 | R7 => GPR7 | R8 => GPR8 | R9 => GPR9 | R10 => GPR10 + | R11 => GPR11 | R12 => GPR12 | R14 => GPR14 | R15 => GPR15 | R16 => GPR16 | R17 => GPR17 | R18 => GPR18 | R19 => GPR19 | R20 => GPR20 | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24 | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28 | R29 => GPR29 | R30 => GPR30 | R31 => GPR31 - | IT1 => GPR11 | IT2 => GPR12 + | F0 => FPR0 | F1 => FPR1 | F2 => FPR2 | F3 => FPR3 | F4 => FPR4 | F5 => FPR5 | F6 => FPR6 | F7 => FPR7 | F8 => FPR8 - | F9 => FPR9 | F10 => FPR10 | F11 => FPR11 - | F14 => FPR14 | F15 => FPR15 + | F9 => FPR9 | F10 => FPR10 | F11 => FPR11 | F12 => FPR12 + | F13 => FPR13 | F14 => FPR14 | F15 => FPR15 | F16 => FPR16 | F17 => FPR17 | F18 => FPR18 | F19 => FPR19 | F20 => FPR20 | F21 => FPR21 | F22 => FPR22 | F23 => FPR23 | F24 => FPR24 | F25 => FPR25 | F26 => FPR26 | F27 => FPR27 | F28 => FPR28 | F29 => FPR29 | F30 => FPR30 | F31 => FPR31 - | FT1 => FPR0 | FT2 => FPR12 | FT3 => FPR13 end. (** Extract the values of the arguments of an external call. We exploit the calling conventions from module [Conventions], except that we use PPC registers instead of locations. *) +Definition chunk_of_type (ty: typ) := + match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end. + Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop := | extcall_arg_reg: forall r, extcall_arg rs m (R r) (rs (preg_of r)) - | extcall_arg_int_stack: forall ofs bofs v, - bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> - Mem.loadv Mint32 m (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v -> - extcall_arg rs m (S (Outgoing ofs Tint)) v - | extcall_arg_float_stack: forall ofs bofs v, + | extcall_arg_stack: forall ofs ty bofs v, bofs = Stacklayout.fe_ofs_arg + 4 * ofs -> - Mem.loadv Mfloat64al32 m (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v -> - extcall_arg rs m (S (Outgoing ofs Tfloat)) v. + Mem.loadv (chunk_of_type ty) m + (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v -> + extcall_arg rs m (S Outgoing ofs ty) v. Definition extcall_arguments (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop := list_forall2 (extcall_arg rs m) (loc_arguments sg) args. -Definition loc_external_result (sg: signature) : preg := - preg_of (loc_result sg). +Definition loc_external_result (sg: signature) : list preg := + map preg_of (loc_result sg). (** Extract the values of the arguments of an annotation. *) @@ -845,33 +853,31 @@ Inductive step: state -> trace -> state -> Prop := exec_instr c i rs m = Next rs' m' -> step (State rs m) E0 (State rs' m') | exec_step_builtin: - forall b ofs c ef args res rs m t v m', + forall b ofs c ef args res rs m t vl rs' m', rs PC = Vptr b ofs -> Genv.find_funct_ptr ge b = Some (Internal c) -> find_instr (Int.unsigned ofs) c = Some (Pbuiltin ef args res) -> - external_call ef ge (map rs args) m t v m' -> - step (State rs m) t - (State (nextinstr(rs #GPR11 <- Vundef #GPR12 <- Vundef - #FPR12 <- Vundef #FPR13 <- Vundef - #FPR0 <- Vundef #CTR <- Vundef - #res <- v)) m') + external_call' ef ge (map rs args) m t vl m' -> + rs' = nextinstr + (set_regs res vl + (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) -> + step (State rs m) t (State rs' m') | exec_step_annot: forall b ofs c ef args rs m vargs t v m', rs PC = Vptr b ofs -> Genv.find_funct_ptr ge b = Some (Internal c) -> find_instr (Int.unsigned ofs) c = Some (Pannot ef args) -> annot_arguments rs m args vargs -> - external_call ef ge vargs m t v m' -> + external_call' ef ge vargs m t v m' -> step (State rs m) t (State (nextinstr rs) m') | exec_step_external: forall b ef args res rs m t rs' m', rs PC = Vptr b Int.zero -> Genv.find_funct_ptr ge b = Some (External ef) -> - external_call ef ge args m t res m' -> + external_call' ef ge args m t res m' -> extcall_arguments rs m (ef_sig ef) args -> - rs' = (rs#(loc_external_result (ef_sig ef)) <- res - #PC <- (rs LR)) -> + rs' = (set_regs (loc_external_result (ef_sig ef)) res rs) #PC <- (rs RA) -> step (State rs m) t (State rs' m'). End RELSEM. @@ -939,21 +945,21 @@ Ltac Equalities := discriminate. discriminate. inv H11. - exploit external_call_determ. eexact H4. eexact H11. intros [A B]. + exploit external_call_determ'. eexact H4. eexact H9. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. inv H12. assert (vargs0 = vargs) by (eapply annot_arguments_determ; eauto). subst vargs0. - exploit external_call_determ. eexact H5. eexact H13. intros [A B]. + exploit external_call_determ'. eexact H5. eexact H13. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0. - exploit external_call_determ. eexact H3. eexact H8. intros [A B]. + exploit external_call_determ'. eexact H3. eexact H8. intros [A B]. split. auto. intros. destruct B; auto. subst. auto. (* trace length *) red; intros. inv H; simpl. omega. - eapply external_call_trace_length; eauto. - eapply external_call_trace_length; eauto. - eapply external_call_trace_length; eauto. + inv H3; eapply external_call_trace_length; eauto. + inv H4; eapply external_call_trace_length; eauto. + inv H2; eapply external_call_trace_length; eauto. (* initial states *) inv H; inv H0. f_equal. congruence. (* final no step *) @@ -973,13 +979,4 @@ Definition data_preg (r: preg) : bool := | _ => true end. -Definition nontemp_preg (r: preg) : bool := - match r with - | IR GPR0 => false | IR GPR11 => false | IR GPR12 => false - | FR FPR0 => false | FR FPR12 => false | FR FPR13 => false - | PC => false | LR => false | CTR => false - | CR0_0 => false | CR0_1 => false | CR0_2 => false | CR0_3 => false - | CARRY => false - | _ => true - end. diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v index 39b84e0b..6a1d07ed 100644 --- a/powerpc/Asmgen.v +++ b/powerpc/Asmgen.v @@ -136,6 +136,8 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) := Plfd r (Cint ofs) base :: k else loadimm GPR0 ofs (Plfdx r base GPR0 :: k)) + | Tlong => + Error (msg "Asmgen.loadind") end. Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) := @@ -152,6 +154,8 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) := Pstfd r (Cint ofs) base :: k else loadimm GPR0 ofs (Pstfdx r base GPR0 :: k)) + | Tlong => + Error (msg "Asmgen.storeind") end. (** Constructor for a floating-point comparison. The PowerPC has @@ -336,8 +340,8 @@ Definition transl_op OK (if symbol_is_small_data s ofs then Paddi r GPR0 (Csymbol_sda s ofs) :: k else - Paddis GPR12 GPR0 (Csymbol_high s ofs) :: - Paddi r GPR12 (Csymbol_low s ofs) :: k) + Paddis r GPR0 (Csymbol_high s ofs) :: + Paddi r r (Csymbol_low s ofs) :: k) | Oaddrstack n, nil => do r <- ireg_of res; OK (addimm r GPR1 n k) | Ocast8signed, a1 :: nil => @@ -428,7 +432,7 @@ Definition transl_op do r1 <- ireg_of a1; do r <- ireg_of res; OK (rolm r r1 amount mask k) | Oroli amount mask, a1 :: a2 :: nil => - do x <- assertion (mreg_eq a1 res); + assertion (mreg_eq a1 res); do r2 <- ireg_of a2; do r <- ireg_of res; OK (Prlwimi r r2 amount mask :: k) | Onegf, a1 :: nil => @@ -467,7 +471,7 @@ Definition transl_op (** Translation of memory accesses: loads, and stores. *) Definition int_temp_for (r: mreg) := - if mreg_eq r IT2 then GPR11 else GPR12. + if mreg_eq r R12 then GPR11 else GPR12. Definition transl_memory_access (mk1: constant -> ireg -> instruction) @@ -529,6 +533,8 @@ Definition transl_load (chunk: memory_chunk) (addr: addressing) | Mfloat64 | Mfloat64al32 => do r <- freg_of dst; transl_memory_access (Plfd r) (Plfdx r) addr args GPR12 k + | Mint64 => + Error (msg "Asmgen.transl_load") end. Definition transl_store (chunk: memory_chunk) (addr: addressing) @@ -550,6 +556,8 @@ Definition transl_store (chunk: memory_chunk) (addr: addressing) | Mfloat64 | Mfloat64al32 => do r <- freg_of src; transl_memory_access (Pstfd r) (Pstfdx r) addr args temp k + | Mint64 => + Error (msg "Asmgen.transl_store") end. (** Translation of arguments to annotations *) @@ -574,7 +582,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) loadind GPR11 ofs ty dst k else (do k1 <- loadind GPR11 ofs ty dst k; - loadind GPR1 f.(fn_link_ofs) Tint IT1 k1) + loadind GPR1 f.(fn_link_ofs) Tint R11 k1) | Mop op args res => transl_op op args res k | Mload chunk addr args dst => @@ -598,7 +606,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbs symb :: k) | Mbuiltin ef args res => - OK (Pbuiltin ef (map preg_of args) (preg_of res) :: k) + OK (Pbuiltin ef (map preg_of args) (map preg_of res) :: k) | Mannot ef args => OK (Pannot ef (map transl_annot_param args) :: k) | Mlabel lbl => @@ -624,8 +632,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool := match i with | Msetstack src ofs ty => before - | Mgetparam ofs ty dst => negb (mreg_eq dst IT1) - | Mop Omove args res => before && negb (mreg_eq res IT1) + | Mgetparam ofs ty dst => negb (mreg_eq dst R11) + | Mop Omove args res => before && negb (mreg_eq res R11) | _ => false end. diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v index 07e66cf8..37c88088 100644 --- a/powerpc/Asmgenproof.v +++ b/powerpc/Asmgenproof.v @@ -487,9 +487,9 @@ Definition measure (s: Mach.state) : nat := | Mach.Returnstate _ _ _ => 1%nat end. -Remark preg_of_not_GPR11: forall r, negb (mreg_eq r IT1) = true -> IR GPR11 <> preg_of r. +Remark preg_of_not_GPR11: forall r, negb (mreg_eq r R11) = true -> IR GPR11 <> preg_of r. Proof. - intros. change (IR GPR11) with (preg_of IT1). red; intros. + intros. change (IR GPR11) with (preg_of R11). red; intros. exploit preg_of_injective; eauto. intros; subst r; discriminate. Qed. @@ -526,7 +526,8 @@ Proof. rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR. exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]]. exists rs'; split. eauto. - split. change (undef_setstack rs) with rs. apply agree_exten with rs0; auto with asmgen. + split. change (Mach.undef_regs (destroyed_by_op Omove) rs) with rs. + apply agree_exten with rs0; auto with asmgen. simpl; intros. rewrite Q; auto with asmgen. - (* Mgetparam *) @@ -568,9 +569,11 @@ Opaque loadind. intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A. left; eapply exec_straight_steps; eauto; intros. simpl in TR. exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. - exists rs2; split. eauto. split. auto. - simpl. destruct op; try congruence. destruct ep; simpl; try congruence. intros. + exists rs2; split. eauto. split. auto. + destruct op; simpl; try discriminate. intros. + destruct (andb_prop _ _ H1); clear H1. rewrite R; auto. apply preg_of_not_GPR11; auto. + change (destroyed_by_op Omove) with (@nil mreg). simpl; auto. - (* Mload *) assert (eval_addressing tge sp addr rs##args = Some a). @@ -595,7 +598,7 @@ Opaque loadind. left; eapply exec_straight_steps; eauto. intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]]. exists rs2; split. eauto. - split. eapply agree_exten_temps; eauto. intros; auto with asmgen. + split. eapply agree_undef_regs; eauto with asmgen. simpl; congruence. - (* Mcall *) @@ -741,19 +744,21 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. inv AT. monadInv H3. exploit functions_transl; eauto. intro FN. generalize (transf_function_no_overflow _ _ H2); intro NOOV. - exploit external_call_mem_extends; eauto. eapply preg_vals; eauto. + exploit external_call_mem_extends'; eauto. eapply preg_vals; eauto. intros [vres' [m2' [A [B [C D]]]]]. left. econstructor; split. apply plus_one. eapply exec_step_builtin. eauto. eauto. eapply find_instr_tail; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. + eauto. econstructor; eauto. - Simpl. rewrite <- H0. simpl. econstructor; eauto. + Simpl. rewrite set_pregs_other_2. rewrite undef_regs_other_2. rewrite <- H0. simpl. econstructor; eauto. eapply code_tail_next_int; eauto. - apply agree_nextinstr. eapply agree_set_undef_mreg; eauto. - rewrite Pregmap.gss. auto. - intros. Simpl. + apply preg_notin_charact; auto with asmgen. + apply preg_notin_charact; auto with asmgen. + apply agree_nextinstr. eapply agree_set_mregs; auto. + eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto. congruence. - (* Mannot *) @@ -761,12 +766,12 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. exploit functions_transl; eauto. intro FN. generalize (transf_function_no_overflow _ _ H3); intro NOOV. exploit annot_arguments_match; eauto. intros [vargs' [P Q]]. - exploit external_call_mem_extends; eauto. + exploit external_call_mem_extends'; eauto. intros [vres' [m2' [A [B [C D]]]]]. left. econstructor; split. apply plus_one. eapply exec_step_annot. eauto. eauto. eapply find_instr_tail; eauto. eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. eapply match_states_intro with (ep := false); eauto with coqlib. unfold nextinstr. rewrite Pregmap.gss. @@ -798,11 +803,11 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. destruct (snd (crbit_for_cond cond)). (* Pbt, taken *) econstructor; econstructor; econstructor; split. eexact A. - split. eapply agree_exten_temps; eauto with asmgen. + split. eapply agree_exten; eauto with asmgen. simpl. rewrite B. reflexivity. (* Pbf, taken *) econstructor; econstructor; econstructor; split. eexact A. - split. eapply agree_exten_temps; eauto with asmgen. + split. eapply agree_exten; eauto with asmgen. simpl. rewrite B. reflexivity. - (* Mcond false *) @@ -815,7 +820,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. destruct (snd (crbit_for_cond cond)). apply exec_straight_one. simpl. rewrite B. reflexivity. auto. apply exec_straight_one. simpl. rewrite B. reflexivity. auto. - split. eapply agree_exten_temps; eauto with asmgen. + split. eapply agree_exten; eauto with asmgen. intros; Simpl. simpl. congruence. @@ -835,7 +840,9 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. eapply find_instr_tail; eauto. simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eexact A. econstructor; eauto. - eapply agree_exten_temps; eauto. intros. rewrite C; auto with asmgen. Simpl. + eapply agree_undef_regs; eauto. +Local Transparent destroyed_by_jumptable. + simpl. intros. rewrite C; auto with asmgen. Simpl. congruence. - (* Mreturn *) @@ -896,6 +903,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. intros [m1' [C D]]. exploit Mem.storev_extends. eexact D. eexact H1. eauto. eauto. intros [m2' [F G]]. + simpl Mach.chunk_of_type in F. exploit Mem.storev_extends. eexact G. eexact H2. eauto. eauto. intros [m3' [P Q]]. (* Execution of function prologue *) @@ -910,7 +918,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. rewrite <- H5 at 2. apply exec_straight_three with rs2 m2' rs3 m2'. unfold exec_instr. rewrite C. fold sp. - rewrite <- (sp_val _ _ _ AG). unfold chunk_of_type in F. rewrite F. auto. + rewrite <- (sp_val _ _ _ AG). rewrite F. auto. simpl. auto. simpl. unfold store1. rewrite gpr_or_zero_not_zero. change (rs3 GPR1) with sp. change (rs3 GPR0) with (rs0 LR). simpl. @@ -928,8 +936,7 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. unfold rs4, rs3, rs2. apply agree_nextinstr. apply agree_set_other; auto. apply agree_set_other; auto. apply agree_nextinstr. apply agree_set_other; auto. - eapply agree_change_sp; eauto. apply agree_exten_temps with rs0; eauto. - unfold sp; congruence. + eapply agree_change_sp; eauto. unfold sp; congruence. congruence. - (* external function *) @@ -937,17 +944,15 @@ Hint Resolve agree_nextinstr agree_set_other: asmgen. intros [tf [A B]]. simpl in B. inv B. exploit extcall_arguments_match; eauto. intros [args' [C D]]. - exploit external_call_mem_extends; eauto. + exploit external_call_mem_extends'; eauto. intros [res' [m2' [P [Q [R S]]]]]. left; econstructor; split. apply plus_one. eapply exec_step_external; eauto. - eapply external_call_symbols_preserved; eauto. + eapply external_call_symbols_preserved'; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto. unfold loc_external_result. - eapply agree_set_mreg; eauto. - rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss. auto. - intros; Simpl. + apply agree_set_other; auto. apply agree_set_mregs; auto. - (* return *) inv STACKS. simpl in *. @@ -980,10 +985,9 @@ Lemma transf_final_states: forall st1 st2 r, match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r. Proof. - intros. inv H0. inv H. inv STACKS. constructor. - auto. - compute in H1. - generalize (preg_val _ _ _ R3 AG). rewrite H1. intros LD; inv LD. auto. + intros. inv H0. inv H. constructor. auto. + compute in H1. inv H1. + generalize (preg_val _ _ _ R3 AG). rewrite H2. intros LD; inv LD. auto. Qed. Theorem transf_program_correct: diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v index 0e6d3e1c..cd961c95 100644 --- a/powerpc/Asmgenproof1.v +++ b/powerpc/Asmgenproof1.v @@ -546,7 +546,8 @@ Ltac ArgsInv := | [ H: Error _ = OK _ |- _ ] => discriminate | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args | [ H: bind _ _ = OK _ |- _ ] => monadInv H - | [ H: assertion _ = OK _ |- _ ] => monadInv H + | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv + | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv end); subst; repeat (match goal with @@ -810,9 +811,7 @@ Lemma transl_op_correct_aux: exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of res) = v - /\ forall r, - match op with Omove => data_preg r = true | _ => nontemp_preg r = true end -> - r <> preg_of res -> rs'#r = rs#r. + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r. Proof. Opaque Int.eq. intros. unfold transl_op in H; destruct op; ArgsInv; simpl in H0; try (inv H0); try TranslOpSimpl. @@ -839,7 +838,7 @@ Opaque Val.add. rewrite gpr_or_zero_not_zero; eauto with asmgen. Simpl. rewrite (Val.add_commut Vzero). rewrite high_half_zero. rewrite Val.add_commut. rewrite low_high_half. auto. - intros; Simpl. + intros; Simpl. (* Oaddrstack *) destruct (addimm_correct x GPR1 i k rs m) as [rs' [EX [RES OTH]]]; eauto with asmgen. exists rs'; auto with asmgen. @@ -879,12 +878,11 @@ Opaque Val.add. intros. rewrite D; auto with asmgen. unfold rs1; Simpl. (* Oandimm *) destruct (andimm_correct x0 x i k rs m) as [rs' [A [B C]]]; eauto with asmgen. - exists rs'; auto with asmgen. (* Oorimm *) - destruct (orimm_correct x0 x i k rs m) as [rs' [A [B C]]]. + destruct (orimm_correct x0 x i k rs m) as [rs' [A [B C]]]. exists rs'; auto with asmgen. (* Oxorimm *) - destruct (xorimm_correct x0 x i k rs m) as [rs' [A [B C]]]. + destruct (xorimm_correct x0 x i k rs m) as [rs' [A [B C]]]. exists rs'; auto with asmgen. (* Onor *) replace (Val.notint (rs x)) @@ -898,7 +896,6 @@ Opaque Val.add. intros; Simpl. (* Orolm *) destruct (rolm_correct x0 x i i0 k rs m) as [rs' [A [B C]]]; eauto with asmgen. - exists rs'; auto with asmgen. (* Ointoffloat *) replace v with (Val.maketotal (Val.intoffloat (rs x))). TranslOpSimpl. @@ -916,19 +913,17 @@ Lemma transl_op_correct: Mem.extends m m' -> exists rs', exec_straight ge fn c rs m' k rs' m' - /\ agree (Regmap.set res v (undef_op op ms)) sp rs' - /\ forall r, op = Omove -> data_preg r = true -> r <> preg_of res -> rs'#r = rs#r. + /\ agree (Regmap.set res v (Mach.undef_regs (destroyed_by_op op) ms)) sp rs' + /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r. Proof. intros. exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eauto. intros [v' [A B]]. rewrite (sp_val _ _ _ H0) in A. exploit transl_op_correct_aux; eauto. intros [rs' [P [Q R]]]. rewrite <- Q in B. - exists rs'; split. eexact P. - split. unfold undef_op. destruct op; - (apply agree_set_undef_mreg with rs || apply agree_set_mreg with rs); + exists rs'; split. eexact P. + split. apply agree_set_undef_mreg with rs; auto. auto. - intros. subst op. auto. Qed. (** Translation of memory accesses *) @@ -1100,17 +1095,18 @@ Lemma transl_store_correct: Mem.storev chunk m a (rs (preg_of src)) = Some m' -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, r <> PC -> r <> GPR11 -> r <> GPR12 -> r <> FPR13 -> rs' r = rs r. + /\ forall r, r <> PC -> preg_notin r (destroyed_by_store chunk addr) -> rs' r = rs r. Proof. +Local Transparent destroyed_by_store. intros. assert (TEMP0: int_temp_for src = GPR11 \/ int_temp_for src = GPR12). - unfold int_temp_for. destruct (mreg_eq src IT2); auto. + unfold int_temp_for. destruct (mreg_eq src R12); auto. assert (TEMP1: int_temp_for src <> GPR0). destruct TEMP0; congruence. assert (TEMP2: IR (int_temp_for src) <> preg_of src). - unfold int_temp_for. destruct (mreg_eq src IT2). + unfold int_temp_for. destruct (mreg_eq src R12). subst src; simpl; congruence. - change (IR GPR12) with (preg_of IT2). red; intros; elim n. + change (IR GPR12) with (preg_of R12). red; intros; elim n. eapply preg_of_injective; eauto. assert (BASE: forall mk1 mk2 chunk', transl_memory_access mk1 mk2 addr args (int_temp_for src) k = OK c -> @@ -1123,15 +1119,15 @@ Proof. store2 chunk' (preg_of src) r1 r2 rs1 m) -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, r <> PC -> r <> GPR11 -> r <> GPR12 -> r <> FPR13 -> rs' r = rs r). + /\ forall r, r <> PC -> r <> GPR11 /\ r <> GPR12 -> rs' r = rs r). { intros. eapply transl_memory_access_correct; eauto. intros. econstructor; split. apply exec_straight_one. rewrite H4. unfold store1. rewrite H6. rewrite H7; auto with asmgen. rewrite H3. eauto. auto. - intros; Simpl. apply H7; auto. destruct TEMP0; congruence. + intros; Simpl. apply H7; auto. destruct TEMP0; destruct H9; congruence. intros. econstructor; split. apply exec_straight_one. rewrite H5. unfold store2. rewrite H6. rewrite H7; auto with asmgen. rewrite H3. eauto. auto. - intros; Simpl. apply H7; auto. destruct TEMP0; congruence. + intros; Simpl. apply H7; auto. destruct TEMP0; destruct H9; congruence. } destruct chunk; monadInv H. - (* Mint8signed *) @@ -1153,10 +1149,11 @@ Proof. eapply transl_memory_access_correct. eauto. eauto. eauto. intros. econstructor; split. apply exec_straight_one. simpl. unfold store1. rewrite H. rewrite H2; auto with asmgen. rewrite H1. eauto. auto. - intros. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence. +Local Transparent destroyed_by_store. + simpl; intros. destruct H4 as [A [B C]]. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence. intros. econstructor; split. apply exec_straight_one. simpl. unfold store2. rewrite H. rewrite H2; auto with asmgen. rewrite H1. eauto. auto. - intros. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence. + simpl; intros. destruct H4 as [A [B C]]. Simpl. apply H2; auto with asmgen. destruct TEMP0; congruence. - (* Mfloat64 *) apply Mem.storev_float64al32 in H1. eapply BASE; eauto; erewrite freg_of_eq by eauto; auto. - (* Mfloat64al32 *) diff --git a/powerpc/ConstpropOp.vp b/powerpc/ConstpropOp.vp index 9131a466..e7e4095d 100644 --- a/powerpc/ConstpropOp.vp +++ b/powerpc/ConstpropOp.vp @@ -31,6 +31,7 @@ Inductive approx : Type := no compile-time information is available. *) | I: int -> approx (** A known integer value. *) | F: float -> approx (** A known floating-point value. *) + | L: int64 -> approx (** A know 64-bit integer value. *) | G: ident -> int -> approx (** The value is the address of the given global symbol plus the given integer offset. *) @@ -125,6 +126,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) := | Osingleoffloat, F n1 :: nil => F(Float.singleoffloat n1) | Ointoffloat, F n1 :: nil => eval_static_intoffloat n1 | Ofloatofwords, I n1 :: I n2 :: nil => if propagate_float_constants tt then F(Float.from_words n1 n2) else Unknown + | Omakelong, I n1 :: I n2 :: nil => L(Int64.ofwords n1 n2) + | Olowlong, L n :: nil => I(Int64.loword n) + | Ohighlong, L n :: nil => I(Int64.hiword n) | Ocmp c, vl => eval_static_condition_val c vl | _, _ => Unknown end. diff --git a/powerpc/ConstpropOpproof.v b/powerpc/ConstpropOpproof.v index 84e1e5bb..9d833bf1 100644 --- a/powerpc/ConstpropOpproof.v +++ b/powerpc/ConstpropOpproof.v @@ -45,6 +45,7 @@ Definition val_match_approx (a: approx) (v: val) : Prop := | Unknown => True | I p => v = Vint p | F p => v = Vfloat p + | L p => v = Vlong p | G symb ofs => v = symbol_address ge symb ofs | S ofs => v = Val.add sp (Vint ofs) | _ => False @@ -65,6 +66,8 @@ Ltac SimplVMA := simpl in H; (try subst v); SimplVMA | H: (val_match_approx (F _) ?v) |- _ => simpl in H; (try subst v); SimplVMA + | H: (val_match_approx (L _) ?v) |- _ => + simpl in H; (try subst v); SimplVMA | H: (val_match_approx (G _ _) ?v) |- _ => simpl in H; (try subst v); SimplVMA | H: (val_match_approx (S _) ?v) |- _ => diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v index 632a55df..ce66e6a5 100644 --- a/powerpc/Machregs.v +++ b/powerpc/Machregs.v @@ -13,6 +13,7 @@ Require Import Coqlib. Require Import Maps. Require Import AST. +Require Import Op. (** ** Machine registers *) @@ -21,59 +22,55 @@ Require Import AST. - Integer registers that can be allocated to RTL pseudo-registers ([Rxx]). - Floating-point registers that can be allocated to RTL pseudo-registers ([Fxx]). -- Two integer registers, not allocatable, reserved as temporaries for - spilling and reloading ([IT1, IT2]). -- Two float registers, not allocatable, reserved as temporaries for - spilling and reloading ([FT1, FT2]). The type [mreg] does not include special-purpose or reserved - machine registers such as the stack pointer and the condition codes. *) + machine registers such as the stack pointer (GPR1), the small data area + pointers (GPR2, GPR13), and the condition codes. *) Inductive mreg: Type := (** Allocatable integer regs *) | R3: mreg | R4: mreg | R5: mreg | R6: mreg | R7: mreg | R8: mreg | R9: mreg | R10: mreg + | R11: mreg | R12: mreg | R14: mreg | R15: mreg | R16: mreg | R17: mreg | R18: mreg | R19: mreg | R20: mreg | R21: mreg | R22: mreg | R23: mreg | R24: mreg | R25: mreg | R26: mreg | R27: mreg | R28: mreg | R29: mreg | R30: mreg | R31: mreg (** Allocatable float regs *) + | F0: mreg | F1: mreg | F2: mreg | F3: mreg | F4: mreg | F5: mreg | F6: mreg | F7: mreg | F8: mreg - | F9: mreg | F10: mreg | F11: mreg - | F14: mreg | F15: mreg + | F9: mreg | F10: mreg | F11: mreg | F12: mreg + | F13: mreg | F14: mreg | F15: mreg | F16: mreg | F17: mreg | F18: mreg | F19: mreg | F20: mreg | F21: mreg | F22: mreg | F23: mreg | F24: mreg | F25: mreg | F26: mreg | F27: mreg - | F28: mreg | F29: mreg | F30: mreg | F31: mreg - (** Integer temporaries *) - | IT1: mreg (* R11 *) | IT2: mreg (* R12 *) - (** Float temporaries *) - | FT1: mreg (* F0 *) | FT2: mreg (* F12 *) | FT3: mreg (* F13 *). + | F28: mreg | F29: mreg | F30: mreg | F31: mreg. Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}. Proof. decide equality. Defined. +Global Opaque mreg_eq. Definition mreg_type (r: mreg): typ := match r with | R3 => Tint | R4 => Tint | R5 => Tint | R6 => Tint | R7 => Tint | R8 => Tint | R9 => Tint | R10 => Tint + | R11 => Tint | R12 => Tint | R14 => Tint | R15 => Tint | R16 => Tint | R17 => Tint | R18 => Tint | R19 => Tint | R20 => Tint | R21 => Tint | R22 => Tint | R23 => Tint | R24 => Tint | R25 => Tint | R26 => Tint | R27 => Tint | R28 => Tint | R29 => Tint | R30 => Tint | R31 => Tint + | F0 => Tfloat | F1 => Tfloat | F2 => Tfloat | F3 => Tfloat | F4 => Tfloat | F5 => Tfloat | F6 => Tfloat | F7 => Tfloat | F8 => Tfloat - | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat - | F14 => Tfloat | F15 => Tfloat + | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat | F12 => Tfloat + | F13 => Tfloat | F14 => Tfloat | F15 => Tfloat | F16 => Tfloat | F17 => Tfloat | F18 => Tfloat | F19 => Tfloat | F20 => Tfloat | F21 => Tfloat | F22 => Tfloat | F23 => Tfloat | F24 => Tfloat | F25 => Tfloat | F26 => Tfloat | F27 => Tfloat | F28 => Tfloat | F29 => Tfloat | F30 => Tfloat | F31 => Tfloat - | IT1 => Tint | IT2 => Tint - | FT1 => Tfloat | FT2 => Tfloat | FT3 => Tfloat end. Open Scope positive_scope. @@ -85,21 +82,21 @@ Module IndexedMreg <: INDEXED_TYPE. match r with | R3 => 1 | R4 => 2 | R5 => 3 | R6 => 4 | R7 => 5 | R8 => 6 | R9 => 7 | R10 => 8 - | R14 => 9 | R15 => 10 | R16 => 11 - | R17 => 12 | R18 => 13 | R19 => 14 | R20 => 15 - | R21 => 16 | R22 => 17 | R23 => 18 | R24 => 19 - | R25 => 20 | R26 => 21 | R27 => 22 | R28 => 23 - | R29 => 24 | R30 => 25 | R31 => 26 - | F1 => 28 | F2 => 29 | F3 => 30 | F4 => 31 - | F5 => 32 | F6 => 33 | F7 => 34 | F8 => 35 - | F9 => 36 | F10 => 37 | F11 => 38 - | F14 => 39 | F15 => 40 - | F16 => 41 | F17 => 42 | F18 => 43 | F19 => 44 - | F20 => 45 | F21 => 46 | F22 => 47 | F23 => 48 - | F24 => 49 | F25 => 50 | F26 => 51 | F27 => 52 - | F28 => 53 | F29 => 54 | F30 => 55 | F31 => 56 - | IT1 => 57 | IT2 => 58 - | FT1 => 59 | FT2 => 60 | FT3 => 61 + | R11 => 9 | R12 => 10 + | R14 => 11 | R15 => 12 | R16 => 13 + | R17 => 14 | R18 => 15 | R19 => 16 | R20 => 17 + | R21 => 18 | R22 => 19 | R23 => 20 | R24 => 21 + | R25 => 22 | R26 => 23 | R27 => 24 | R28 => 25 + | R29 => 26 | R30 => 27 | R31 => 28 + | F0 => 29 + | F1 => 30 | F2 => 31 | F3 => 32 | F4 => 33 + | F5 => 34 | F6 => 35 | F7 => 36 | F8 => 37 + | F9 => 38 | F10 => 39 | F11 => 40 | F12 => 41 + | F13 => 42 | F14 => 43 | F15 => 44 + | F16 => 45 | F17 => 46 | F18 => 47 | F19 => 48 + | F20 => 49 | F21 => 50 | F22 => 51 | F23 => 52 + | F24 => 53 | F25 => 54 | F26 => 55 | F27 => 56 + | F28 => 57 | F29 => 58 | F30 => 59 | F31 => 60 end. Lemma index_inj: forall r1 r2, index r1 = index r2 -> r1 = r2. @@ -108,3 +105,68 @@ Module IndexedMreg <: INDEXED_TYPE. Qed. End IndexedMreg. +(** ** Destroyed registers, preferred registers *) + +Definition destroyed_by_op (op: operation): list mreg := + match op with + | Ofloatconst _ => R12 :: nil + | Ointoffloat => F13 :: nil + | _ => nil + end. + +Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg := + R12 :: nil. + +Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg := + match chunk with + | Mfloat32 => R11 :: R12 :: F13 :: nil + | _ => R11 :: R12 :: nil + end. + +Definition destroyed_by_cond (cond: condition): list mreg := + nil. + +Definition destroyed_by_jumptable: list mreg := + R12 :: nil. + +Definition destroyed_by_builtin (ef: external_function): list mreg := + match ef with + | EF_builtin _ _ => F13 :: nil + | EF_vload _ => nil + | EF_vstore Mfloat32 => F13 :: nil + | EF_vstore _ => nil + | EF_vload_global _ _ _ => R11 :: nil + | EF_vstore_global Mint64 _ _ => R10 :: R11 :: R12 :: nil + | EF_vstore_global Mfloat32 _ _ => R11 :: R12 :: F13 :: nil + | EF_vstore_global _ _ _ => R11 :: R12 :: nil + | EF_memcpy _ _ => R11 :: R12 :: F13 :: nil + | _ => nil + end. + +Definition destroyed_at_function_entry: list mreg := + nil. + +Definition temp_for_parent_frame: mreg := + R11. + +Definition mregs_for_operation (op: operation): list (option mreg) * option mreg := + (nil, None). + +Definition mregs_for_builtin (ef: external_function): list (option mreg) * list (option mreg) := + (nil, nil). + +Global Opaque + destroyed_by_op destroyed_by_load destroyed_by_store + destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin + destroyed_at_function_entry temp_for_parent_frame + mregs_for_operation mregs_for_builtin. + +(** Two-address operations. Return [true] if the first argument and + the result must be in the same location *and* are unconstrained + by [mregs_for_operation]. There is only one: rotate-mask-insert. *) + +Definition two_address_op (op: operation) : bool := + match op with + | Oroli _ _ => true + | _ => false + end. diff --git a/powerpc/Machregsaux.ml b/powerpc/Machregsaux.ml index 713e14d1..b8d7c97f 100644 --- a/powerpc/Machregsaux.ml +++ b/powerpc/Machregsaux.ml @@ -17,21 +17,20 @@ open Machregs let register_names = [ ("R3", R3); ("R4", R4); ("R5", R5); ("R6", R6); ("R7", R7); ("R8", R8); ("R9", R9); ("R10", R10); + ("R11", R11); ("R12", R12); ("R14", R14); ("R15", R15); ("R16", R16); ("R17", R17); ("R18", R18); ("R19", R19); ("R20", R20); ("R21", R21); ("R22", R22); ("R23", R23); ("R24", R24); ("R25", R25); ("R26", R26); ("R27", R27); ("R28", R28); ("R29", R29); ("R30", R30); ("R31", R31); - ("F1", F1); ("F2", F2); ("F3", F3); ("F4", F4); + ("F0", F0); ("F1", F1); ("F2", F2); ("F3", F3); ("F4", F4); ("F5", F5); ("F6", F6); ("F7", F7); ("F8", F8); - ("F9", F9); ("F10", F10); ("F11", F11); - ("F14", F14); ("F15", F15); + ("F9", F9); ("F10", F10); ("F11", F11); ("F12", F12); + ("F13", F13); ("F14", F14); ("F15", F15); ("F16", F16); ("F17", F17); ("F18", F18); ("F19", F19); ("F20", F20); ("F21", F21); ("F22", F22); ("F23", F23); ("F24", F24); ("F25", F25); ("F26", F26); ("F27", F27); - ("F28", F28); ("F29", F29); ("F30", F30); ("F31", F31); - ("R11", IT1); ("R12", IT2); - ("F0", FT1); ("F12", FT2); ("F13", FT3) + ("F28", F28); ("F29", F29); ("F30", F30); ("F31", F31) ] let name_of_register r = diff --git a/powerpc/Op.v b/powerpc/Op.v index 110796ba..e584726b 100644 --- a/powerpc/Op.v +++ b/powerpc/Op.v @@ -97,6 +97,10 @@ Inductive operation : Type := (*c Conversions between int and float: *) | Ointoffloat: operation (**r [rd = signed_int_of_float(r1)] *) | Ofloatofwords: operation (**r [rd = float_of_words(r1,r2)] *) +(*c Manipulating 64-bit integers: *) + | Omakelong: operation (**r [rd = r1 << 32 | r2] *) + | Olowlong: operation (**r [rd = low-word(r1)] *) + | Ohighlong: operation (**r [rd = high-word(r1)] *) (*c Boolean tests: *) | Ocmp: condition -> operation. (**r [rd = 1] if condition holds, [rd = 0] otherwise. *) @@ -205,6 +209,9 @@ Definition eval_operation | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1) | Ointoffloat, v1::nil => Val.intoffloat v1 | Ofloatofwords, v1::v2::nil => Some(Val.floatofwords v1 v2) + | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2) + | Olowlong, v1::nil => Some(Val.loword v1) + | Ohighlong, v1::nil => Some(Val.hiword v1) | Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m)) | _, _ => None end. @@ -225,7 +232,7 @@ Ltac FuncInv := match goal with | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ => destruct x; simpl in H; try discriminate; FuncInv - | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ => + | H: (match ?v with Vundef => _ | Vint _ => _ | Vlong _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ => destruct v; simpl in H; try discriminate; FuncInv | H: (Some _ = Some _) |- _ => injection H; intros; clear H; FuncInv @@ -292,6 +299,9 @@ Definition type_of_operation (op: operation) : list typ * typ := | Osingleoffloat => (Tfloat :: nil, Tfloat) | Ointoffloat => (Tfloat :: nil, Tint) | Ofloatofwords => (Tint :: Tint :: nil, Tfloat) + | Omakelong => (Tint :: Tint :: nil, Tlong) + | Olowlong => (Tlong :: nil, Tint) + | Ohighlong => (Tlong :: nil, Tint) | Ocmp c => (type_of_condition c, Tint) end. @@ -366,6 +376,9 @@ Proof with (try exact I). destruct v0... destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2... destruct v0; destruct v1... + destruct v0; destruct v1... + destruct v0... + destruct v0... destruct (eval_condition c vl m); simpl... destruct b... Qed. @@ -481,6 +494,33 @@ Proof. rewrite Val.add_assoc. simpl. auto. Qed. +(** Offset an addressing mode [addr] by a quantity [delta], so that + it designates the pointer [delta] bytes past the pointer designated + by [addr]. May be undefined, in which case [None] is returned. *) + +Definition offset_addressing (addr: addressing) (delta: int) : option addressing := + match addr with + | Aindexed n => Some(Aindexed (Int.add n delta)) + | Aindexed2 => None + | Aglobal s n => Some(Aglobal s (Int.add n delta)) + | Abased s n => Some(Abased s (Int.add n delta)) + | Ainstack n => Some(Ainstack (Int.add n delta)) + end. + +Lemma eval_offset_addressing: + forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v, + offset_addressing addr delta = Some addr' -> + eval_addressing ge sp addr args = Some v -> + eval_addressing ge sp addr' args = Some(Val.add v (Vint delta)). +Proof. + intros. destruct addr; simpl in H; inv H; simpl in *; FuncInv; subst. + rewrite Val.add_assoc; auto. + unfold symbol_address. destruct (Genv.find_symbol ge i); auto. + unfold symbol_address. destruct (Genv.find_symbol ge i); auto. + rewrite Val.add_assoc. rewrite Val.add_permut. rewrite Val.add_commut. auto. + rewrite Val.add_assoc. auto. +Qed. + (** Transformation of addressing modes with two operands or more into an equivalent arithmetic operation. This is used in the [Reload] pass when a store instruction cannot be reloaded directly because @@ -510,14 +550,6 @@ Proof. intros. destruct addr; simpl in H; reflexivity || omegaContradiction. Qed. -(** Two-address operations. There is only one: rotate-mask-insert. *) - -Definition two_address_op (op: operation) : bool := - match op with - | Oroli _ _ => true - | _ => false - end. - (** Operations that are so cheap to recompute that CSE should not factor them out. *) Definition is_trivial_op (op: operation) : bool := @@ -767,7 +799,10 @@ Proof. inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2. exists (Vint i); auto. inv H4; inv H2; simpl; auto. - subst v1. destruct (eval_condition c vl1 m1) eqn:?. + inv H4; inv H2; simpl; auto. + inv H4; simpl; auto. + inv H4; simpl; auto. + subst. destruct (eval_condition c vl1 m1) eqn:?. exploit eval_condition_inj; eauto. intros EQ; rewrite EQ. destruct b; simpl; constructor. simpl; constructor. diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml index 2d69be1e..319e12c1 100644 --- a/powerpc/PrintAsm.ml +++ b/powerpc/PrintAsm.ml @@ -223,8 +223,8 @@ let rolm_mask n = - annotation statements: take their arguments in registers or stack locations; generate no code; - inlined by the compiler: take their arguments in arbitrary - registers; preserve all registers except the temporaries - (GPR0, GPR11, GPR12, FPR0, FPR12, FPR13); + registers; preserve all registers except the reserved temporaries + (GPR0, GPR11, GPR12, FPR13); - inlined while printing asm code; take their arguments in locations dictated by the calling conventions; preserve callee-save regs only. *) @@ -239,11 +239,12 @@ let print_annot_val oc txt args res = fprintf oc "%s annotation: " comment; PrintAnnot.print_annot_val preg oc txt args; match args, res with - | IR src :: _, IR dst -> + | [IR src], [IR dst] -> if dst <> src then fprintf oc " mr %a, %a\n" ireg dst ireg src - | FR src :: _, FR dst -> + | [FR src], [FR dst] -> if dst <> src then fprintf oc " fmr %a, %a\n" freg dst freg src - | _, _ -> assert false + | _, _ -> + assert false (* Handling of memcpy *) @@ -256,8 +257,8 @@ let print_annot_val oc txt args res = let print_builtin_memcpy_small oc sz al src dst = let rec copy ofs sz = if sz >= 8 && al >= 4 then begin - fprintf oc " lfd %a, %d(%a)\n" freg FPR0 ofs ireg src; - fprintf oc " stfd %a, %d(%a)\n" freg FPR0 ofs ireg dst; + fprintf oc " lfd %a, %d(%a)\n" freg FPR13 ofs ireg src; + fprintf oc " stfd %a, %d(%a)\n" freg FPR13 ofs ireg dst; copy (ofs + 8) (sz - 8) end else if sz >= 4 then begin fprintf oc " lwz %a, %d(%a)\n" ireg GPR0 ofs ireg src; @@ -326,14 +327,23 @@ let print_builtin_vload_common oc chunk base offset res = fprintf oc " lfs %a, %a(%a)\n" freg res constant offset ireg base | (Mfloat64 | Mfloat64al32), FR res -> fprintf oc " lfd %a, %a(%a)\n" freg res constant offset ireg base + (* Mint64 is special-cased below *) | _ -> assert false let print_builtin_vload oc chunk args res = fprintf oc "%s begin builtin __builtin_volatile_read\n" comment; - begin match args with - | [IR addr] -> + begin match args, res with + | [IR addr], [res] when chunk <> Mint64 -> print_builtin_vload_common oc chunk addr (Cint Integers.Int.zero) res + | [IR addr], [IR res1; IR res2] when chunk = Mint64 -> + if addr <> res1 then begin + fprintf oc " lwz %a, 0(%a)\n" ireg res1 ireg addr; + fprintf oc " lwz %a, 4(%a)\n" ireg res2 ireg addr + end else begin + fprintf oc " lwz %a, 4(%a)\n" ireg res2 ireg addr; + fprintf oc " lwz %a, 0(%a)\n" ireg res1 ireg addr + end | _ -> assert false end; @@ -341,9 +351,24 @@ let print_builtin_vload oc chunk args res = let print_builtin_vload_global oc chunk id ofs args res = fprintf oc "%s begin builtin __builtin_volatile_read\n" comment; - fprintf oc " addis %a, %a, %a\n" - ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); - print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res; + begin match res with + | [res] when chunk <> Mint64 -> + fprintf oc " addis %a, %a, %a\n" + ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); + print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res + | [IR res1; IR res2] when chunk = Mint64 -> + fprintf oc " addis %a, %a, %a\n" + ireg res1 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); + fprintf oc " lwz %a, %a(%a)\n" + ireg res1 constant (Csymbol_low(id, ofs)) ireg res1; + let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in + fprintf oc " addis %a, %a, %a\n" + ireg res2 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); + fprintf oc " lwz %a, %a(%a)\n" + ireg res2 constant (Csymbol_low(id, ofs)) ireg res2 + | _ -> + assert false + end; fprintf oc "%s end builtin __builtin_volatile_read\n" comment let print_builtin_vstore_common oc chunk base offset src = @@ -359,14 +384,18 @@ let print_builtin_vstore_common oc chunk base offset src = fprintf oc " stfs %a, %a(%a)\n" freg FPR13 constant offset ireg base | (Mfloat64 | Mfloat64al32), FR src -> fprintf oc " stfd %a, %a(%a)\n" freg src constant offset ireg base + (* Mint64 is special-cased below *) | _ -> assert false let print_builtin_vstore oc chunk args = fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; begin match args with - | [IR addr; src] -> + | [IR addr; src] when chunk <> Mint64 -> print_builtin_vstore_common oc chunk addr (Cint Integers.Int.zero) src + | [IR addr; IR src1; IR src2] when chunk = Mint64 -> + fprintf oc " stw %a, 0(%a)\n" ireg src1 ireg addr; + fprintf oc " stw %a, 4(%a)\n" ireg src2 ireg addr | _ -> assert false end; @@ -375,11 +404,24 @@ let print_builtin_vstore oc chunk args = let print_builtin_vstore_global oc chunk id ofs args = fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; begin match args with - | [src] -> + | [src] when chunk <> Mint64 -> let tmp = if src = IR GPR11 then GPR12 else GPR11 in fprintf oc " addis %a, %a, %a\n" ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); print_builtin_vstore_common oc chunk tmp (Csymbol_low(id, ofs)) src + | [IR src1; IR src2] when chunk = Mint64 -> + let tmp = + if not (List.mem GPR12 [src1; src2]) then GPR12 else + if not (List.mem GPR11 [src1; src2]) then GPR11 else GPR10 in + fprintf oc " addis %a, %a, %a\n" + ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); + fprintf oc " stw %a, %a(%a)\n" + ireg src1 constant (Csymbol_low(id, ofs)) ireg tmp; + let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in + fprintf oc " addis %a, %a, %a\n" + ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); + fprintf oc " stw %a, %a(%a)\n" + ireg src2 constant (Csymbol_low(id, ofs)) ireg tmp | _ -> assert false end; @@ -389,47 +431,84 @@ let print_builtin_vstore_global oc chunk id ofs args = let print_builtin_inline oc name args res = fprintf oc "%s begin builtin %s\n" comment name; - (* Can use as temporaries: GPR0, GPR11, GPR12, FPR0, FPR12, FPR13 *) + (* Can use as temporaries: GPR0, FPR13 *) begin match name, args, res with (* Integer arithmetic *) - | "__builtin_mulhw", [IR a1; IR a2], IR res -> + | "__builtin_mulhw", [IR a1; IR a2], [IR res] -> fprintf oc " mulhw %a, %a, %a\n" ireg res ireg a1 ireg a2 - | "__builtin_mulhwu", [IR a1; IR a2], IR res -> + | "__builtin_mulhwu", [IR a1; IR a2], [IR res] -> fprintf oc " mulhwu %a, %a, %a\n" ireg res ireg a1 ireg a2 - | "__builtin_cntlz", [IR a1], IR res -> + | "__builtin_cntlz", [IR a1], [IR res] -> fprintf oc " cntlzw %a, %a\n" ireg res ireg a1 - | "__builtin_bswap", [IR a1], IR res -> + | "__builtin_bswap", [IR a1], [IR res] -> fprintf oc " stwu %a, -8(%a)\n" ireg a1 ireg GPR1; fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg GPR1; fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1 (* Float arithmetic *) - | "__builtin_fmadd", [FR a1; FR a2; FR a3], FR res -> + | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] -> fprintf oc " fmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fmsub", [FR a1; FR a2; FR a3], FR res -> + | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] -> fprintf oc " fmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fnmadd", [FR a1; FR a2; FR a3], FR res -> + | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] -> fprintf oc " fnmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fnmsub", [FR a1; FR a2; FR a3], FR res -> + | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] -> fprintf oc " fnmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fabs", [FR a1], FR res -> + | "__builtin_fabs", [FR a1], [FR res] -> fprintf oc " fabs %a, %a\n" freg res freg a1 - | "__builtin_fsqrt", [FR a1], FR res -> + | "__builtin_fsqrt", [FR a1], [FR res] -> fprintf oc " fsqrt %a, %a\n" freg res freg a1 - | "__builtin_frsqrte", [FR a1], FR res -> + | "__builtin_frsqrte", [FR a1], [FR res] -> fprintf oc " frsqrte %a, %a\n" freg res freg a1 - | "__builtin_fres", [FR a1], FR res -> + | "__builtin_fres", [FR a1], [FR res] -> fprintf oc " fres %a, %a\n" freg res freg a1 - | "__builtin_fsel", [FR a1; FR a2; FR a3], FR res -> + | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] -> fprintf oc " fsel %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fcti", [FR a1], IR res -> + | "__builtin_fcti", [FR a1], [IR res] -> fprintf oc " fctiw %a, %a\n" freg FPR13 freg a1; fprintf oc " stfdu %a, -8(%a)\n" freg FPR13 ireg GPR1; fprintf oc " lwz %a, 4(%a)\n" ireg res ireg GPR1; fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1 + (* 64-bit integer arithmetic *) + | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> + if rl = ah then begin + fprintf oc " subfic %a, %a, 0\n" ireg GPR0 ireg al; + fprintf oc " subfze %a, %a\n" ireg rh ireg ah; + fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 + end else begin + fprintf oc " subfic %a, %a, 0\n" ireg rl ireg al; + fprintf oc " subfze %a, %a\n" ireg rh ireg ah + end + | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + if rl = ah || rl = bh then begin + fprintf oc " addc %a, %a, %a\n" ireg GPR0 ireg al ireg bl; + fprintf oc " adde %a, %a, %a\n" ireg rh ireg ah ireg bh; + fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 + end else begin + fprintf oc " addc %a, %a, %a\n" ireg rl ireg al ireg bl; + fprintf oc " adde %a, %a, %a\n" ireg rh ireg ah ireg bh + end + | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + if rl = ah || rl = bh then begin + fprintf oc " subfc %a, %a, %a\n" ireg GPR0 ireg bl ireg al; + fprintf oc " subfe %a, %a, %a\n" ireg rh ireg bh ireg ah; + fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 + end else begin + fprintf oc " subfc %a, %a, %a\n" ireg rl ireg bl ireg al; + fprintf oc " subfe %a, %a, %a\n" ireg rh ireg bh ireg ah + end + | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> + if rl = a || rl = b then begin + fprintf oc " mullw %a, %a, %a\n" ireg GPR0 ireg a ireg b; + fprintf oc " mulhwu %a, %a, %a\n" ireg rh ireg a ireg b; + fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 + end else begin + fprintf oc " mullw %a, %a, %a\n" ireg rl ireg a ireg b; + fprintf oc " mulhwu %a, %a, %a\n" ireg rh ireg a ireg b + end (* Memory accesses *) - | "__builtin_read16_reversed", [IR a1], IR res -> + | "__builtin_read16_reversed", [IR a1], [IR res] -> fprintf oc " lhbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1 - | "__builtin_read32_reversed", [IR a1], IR res -> + | "__builtin_read32_reversed", [IR a1], [IR res] -> fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1 | "__builtin_write16_reversed", [IR a1; IR a2], _ -> fprintf oc " sthbrx %a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1 @@ -484,9 +563,9 @@ let print_instruction oc tbl pc fallthrough = function if adj >= -0x8000l then fprintf oc " stwu %a, %ld(%a)\n" ireg GPR1 adj ireg GPR1 else begin - fprintf oc " addis %a, 0, %ld\n" ireg GPR12 (Int32.shift_right_logical adj 16); - fprintf oc " ori %a, %a, %ld\n" ireg GPR12 ireg GPR12 (Int32.logand adj 0xFFFFl); - fprintf oc " stwux %a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR12 + fprintf oc " addis %a, 0, %ld\n" ireg GPR0 (Int32.shift_right_logical adj 16); + fprintf oc " ori %a, %a, %ld\n" ireg GPR0 ireg GPR0 (Int32.logand adj 0xFFFFl); + fprintf oc " stwux %a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR0 end | Pand_(r1, r2, r3) -> fprintf oc " and. %a, %a, %a\n" ireg r1 ireg r2 ireg r3 @@ -860,7 +939,7 @@ let print_function oc name code = (* Generation of stub functions *) -let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$" +let re_variadic_stub = Str.regexp "\\(.*\\)\\$[ifl]*$" (* Stubs for EABI *) @@ -908,6 +987,11 @@ let print_init oc = function fprintf oc " .short %ld\n" (camlint_of_coqint n) | Init_int32 n -> fprintf oc " .long %ld\n" (camlint_of_coqint n) + | Init_int64 n -> + let b = camlint64_of_coqint n in + fprintf oc " .long 0x%Lx, 0x%Lx\n" + (Int64.shift_right_logical b 32) + (Int64.logand b 0xFFFFFFFFL) | Init_float32 n -> fprintf oc " .long 0x%lx %s %.18g\n" (camlint_of_coqint (Floats.Float.bits_of_single n)) diff --git a/powerpc/PrintOp.ml b/powerpc/PrintOp.ml index a2f9ff83..cac8cd2b 100644 --- a/powerpc/PrintOp.ml +++ b/powerpc/PrintOp.ml @@ -96,6 +96,9 @@ let print_operation reg pp = function | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1 | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1 | Ofloatofwords, [r1;r2] -> fprintf pp "floatofwords(%a,%a)" reg r1 reg r2 + | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2 + | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1 + | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1 | Ocmp c, args -> print_condition reg pp (c, args) | _ -> fprintf pp "" diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp index 905a4489..a0118477 100644 --- a/powerpc/SelectOp.vp +++ b/powerpc/SelectOp.vp @@ -200,6 +200,8 @@ Nondetfunction mul (e1: expr) (e2: expr) := (** ** Bitwise and, or, xor *) Nondetfunction andimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then Eop (Ointconst Int.zero) Enil else + if Int.eq n1 Int.mone then e2 else match e2 with | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.and n1 n2)) Enil @@ -240,6 +242,8 @@ Definition same_expr_pure (e1 e2: expr) := end. Nondetfunction orimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then e2 else + if Int.eq n1 Int.mone then Eop (Ointconst Int.mone) Enil else match e2 with | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.or n1 n2)) Enil | Eop (Oorimm n2) (t2:::Enil) => Eop (Oorimm (Int.or n1 n2)) (t2:::Enil) @@ -268,6 +272,7 @@ Nondetfunction or (e1: expr) (e2: expr) := end. Nondetfunction xorimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then e2 else match e2 with | Eop (Ointconst n2) Enil => Eop (Ointconst (Int.xor n1 n2)) Enil | Eop (Oxorimm n2) (t2:::Enil) => Eop (Oxorimm (Int.xor n1 n2)) (t2:::Enil) @@ -444,12 +449,18 @@ Definition singleoffloat (e: expr) := Eop Osingleoffloat (e ::: Enil). (** ** Recognition of addressing modes for load and store operations *) +Definition can_use_Aindexed2 (chunk: memory_chunk): bool := + match chunk with Mint64 => false | _ => true end. + Nondetfunction addressing (chunk: memory_chunk) (e: expr) := match e with | Eop (Oaddrsymbol s n) Enil => (Aglobal s n, Enil) | Eop (Oaddrstack n) Enil => (Ainstack n, Enil) | Eop Oadd (Eop (Oaddrsymbol s n) Enil ::: e2 ::: Enil) => (Abased s n, e2:::Enil) | Eop (Oaddimm n) (e1:::Enil) => (Aindexed n, e1:::Enil) - | Eop Oadd (e1:::e2:::Enil) => (Aindexed2, e1:::e2:::Enil) + | Eop Oadd (e1:::e2:::Enil) => + if can_use_Aindexed2 chunk + then (Aindexed2, e1:::e2:::Enil) + else (Aindexed Int.zero, Eop Oadd (e1:::e2:::Enil) ::: Enil) | _ => (Aindexed Int.zero, e:::Enil) end. diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v index 7be88580..f7513140 100644 --- a/powerpc/SelectOpproof.v +++ b/powerpc/SelectOpproof.v @@ -27,13 +27,6 @@ Require Import SelectOp. Open Local Scope cminorsel_scope. -Section CMCONSTR. - -Variable ge: genv. -Variable sp: val. -Variable e: env. -Variable m: mem. - (** * Useful lemmas and tactics *) (** The following are trivial lemmas and custom tactics that help @@ -81,6 +74,13 @@ Ltac TrivialExists := (** * Correctness of the smart constructors *) +Section CMCONSTR. + +Variable ge: genv. +Variable sp: val. +Variable e: env. +Variable m: mem. + (** We now show that the code generated by "smart constructor" functions such as [SelectOp.notint] behaves as expected. Continuing the [notint] example, we show that if the expression [e] @@ -163,7 +163,7 @@ Proof. rewrite Int.add_commut. auto. unfold symbol_address. destruct (Genv.find_symbol ge s); simpl; auto. rewrite Int.add_commut; auto. rewrite Val.add_assoc. rewrite Int.add_commut. auto. - subst x. rewrite Val.add_assoc. rewrite Int.add_commut. auto. + subst. rewrite Val.add_assoc. rewrite Int.add_commut. auto. Qed. Theorem eval_add: binary_constructor_sound add Val.add. @@ -327,7 +327,15 @@ Qed. Theorem eval_andimm: forall n, unary_constructor_sound (andimm n) (fun x => Val.and x (Vint n)). Proof. - intros; red; intros until x. unfold andimm. case (andimm_match a); intros. + intros; red; intros until x. unfold andimm. + predSpec Int.eq Int.eq_spec n Int.zero. + intros. subst. exists (Vint Int.zero); split. EvalOp. + destruct x; simpl; auto. rewrite Int.and_zero; auto. + predSpec Int.eq Int.eq_spec n Int.mone. + intros. subst. exists x; split. auto. + destruct x; simpl; auto. rewrite Int.and_mone; auto. + clear H H0. + case (andimm_match a); intros. InvEval. TrivialExists. simpl. rewrite Int.and_commut; auto. set (n' := Int.and n n2). destruct (Int.eq (Int.shru (Int.shl n' amount) amount) n' && @@ -376,8 +384,12 @@ Qed. Theorem eval_orimm: forall n, unary_constructor_sound (orimm n) (fun x => Val.or x (Vint n)). Proof. - intros; red; intros until x. - unfold orimm. destruct (orimm_match a); intros; InvEval. + intros; red; intros until x. unfold orimm. + predSpec Int.eq Int.eq_spec n Int.zero. + intros. subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.or_zero; auto. + predSpec Int.eq Int.eq_spec n Int.mone. + intros. subst. exists (Vint Int.mone); split. EvalOp. destruct x; simpl; auto. rewrite Int.or_mone; auto. + clear H H0. destruct (orimm_match a); intros; InvEval. TrivialExists. simpl. rewrite Int.or_commut; auto. subst. rewrite Val.or_assoc. simpl. rewrite Int.or_commut. TrivialExists. TrivialExists. @@ -433,8 +445,10 @@ Qed. Theorem eval_xorimm: forall n, unary_constructor_sound (xorimm n) (fun x => Val.xor x (Vint n)). Proof. - intros; red; intros until x. - unfold xorimm. destruct (xorimm_match a); intros; InvEval. + intros; red; intros until x. unfold xorimm. + predSpec Int.eq Int.eq_spec n Int.zero. + intros. subst. exists x; split; auto. destruct x; simpl; auto. rewrite Int.xor_zero; auto. + clear H. destruct (xorimm_match a); intros; InvEval. TrivialExists. simpl. rewrite Int.xor_commut; auto. subst. rewrite Val.xor_assoc. simpl. rewrite Int.xor_commut. TrivialExists. TrivialExists. @@ -445,9 +459,9 @@ Proof. red; intros until y; unfold xor; case (xor_match a b); intros; InvEval. rewrite Val.xor_commut. apply eval_xorimm; auto. apply eval_xorimm; auto. - subst x. rewrite Val.xor_commut. rewrite Val.not_xor. rewrite <- Val.xor_assoc. + subst. rewrite Val.xor_commut. rewrite Val.not_xor. rewrite <- Val.xor_assoc. rewrite <- Val.not_xor. rewrite Val.xor_commut. TrivialExists. - subst y. rewrite Val.not_xor. rewrite <- Val.xor_assoc. rewrite <- Val.not_xor. TrivialExists. + subst. rewrite Val.not_xor. rewrite <- Val.xor_assoc. rewrite <- Val.not_xor. TrivialExists. TrivialExists. Qed. @@ -843,7 +857,11 @@ Proof. exists (@nil val). split. eauto with evalexpr. simpl. auto. exists (v0 :: nil). split. eauto with evalexpr. simpl. congruence. exists (v1 :: nil). split. eauto with evalexpr. simpl. congruence. + destruct (can_use_Aindexed2 chunk). exists (v1 :: v0 :: nil). split. eauto with evalexpr. simpl. congruence. + exists (Vptr b ofs :: nil). split. + constructor. EvalOp. simpl; congruence. constructor. + simpl. rewrite Int.add_zero. auto. exists (v :: nil). split. eauto with evalexpr. subst v. simpl. rewrite Int.add_zero. auto. Qed. diff --git a/powerpc/eabi/Conventions1.v b/powerpc/eabi/Conventions1.v index 652f4a1b..8ff0dc01 100644 --- a/powerpc/eabi/Conventions1.v +++ b/powerpc/eabi/Conventions1.v @@ -21,21 +21,18 @@ Require Import Locations. (** Machine registers (type [mreg] in module [Locations]) are divided in the following groups: -- Temporaries used for spilling, reloading, and parallel move operations. -- Allocatable registers, that can be assigned to RTL pseudo-registers. - These are further divided into: --- Callee-save registers, whose value is preserved across a function call. --- Caller-save registers that can be modified during a function call. +- Callee-save registers, whose value is preserved across a function call. +- Caller-save registers that can be modified during a function call. We follow the PowerPC/EABI application binary interface (ABI) in our choice of callee- and caller-save registers. *) Definition int_caller_save_regs := - R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil. + R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: R11 :: R12 :: nil. Definition float_caller_save_regs := - F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: F11 :: nil. + F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: F11 :: F12 :: F13 :: nil. Definition int_callee_save_regs := R31 :: R30 :: R29 :: R28 :: R27 :: R26 :: R25 :: R24 :: R23 :: @@ -45,26 +42,11 @@ Definition float_callee_save_regs := F31 :: F30 :: F29 :: F28 :: F27 :: F26 :: F25 :: F24 :: F23 :: F22 :: F21 :: F20 :: F19 :: F18 :: F17 :: F16 :: F15 :: F14 :: nil. -Definition destroyed_at_call_regs := - int_caller_save_regs ++ float_caller_save_regs. - Definition destroyed_at_call := - List.map R destroyed_at_call_regs. - -Definition int_temporaries := IT1 :: IT2 :: nil. - -Definition float_temporaries := FT1 :: FT2 :: FT3 :: nil. - -Definition temporary_regs := int_temporaries ++ float_temporaries. - -Definition temporaries := List.map R temporary_regs. - -Definition destroyed_at_move_regs: list mreg := nil. - -Definition destroyed_at_move := List.map R destroyed_at_move_regs. + int_caller_save_regs ++ float_caller_save_regs. Definition dummy_int_reg := R3. (**r Used in [Coloring]. *) -Definition dummy_float_reg := F1. (**r Used in [Coloring]. *) +Definition dummy_float_reg := F0. (**r Used in [Coloring]. *) (** The [index_int_callee_save] and [index_float_callee_save] associate a unique positive integer to callee-save registers. This integer is @@ -175,34 +157,26 @@ Qed. Lemma register_classification: forall r, - (In (R r) temporaries \/ In (R r) destroyed_at_call) \/ - (In r int_callee_save_regs \/ In r float_callee_save_regs). + In r destroyed_at_call \/ In r int_callee_save_regs \/ In r float_callee_save_regs. Proof. destruct r; - try (left; left; simpl; OrEq); - try (left; right; simpl; OrEq); + try (left; simpl; OrEq); try (right; left; simpl; OrEq); try (right; right; simpl; OrEq). Qed. Lemma int_callee_save_not_destroyed: forall r, - In (R r) temporaries \/ In (R r) destroyed_at_call -> - ~(In r int_callee_save_regs). + In r destroyed_at_call -> In r int_callee_save_regs -> False. Proof. - intros; red; intros. elim H. - generalize H0. simpl; ElimOrEq; NotOrEq. - generalize H0. simpl; ElimOrEq; NotOrEq. + intros. revert H0 H. simpl. ElimOrEq; NotOrEq. Qed. Lemma float_callee_save_not_destroyed: forall r, - In (R r) temporaries \/ In (R r) destroyed_at_call -> - ~(In r float_callee_save_regs). + In r destroyed_at_call -> In r float_callee_save_regs -> False. Proof. - intros; red; intros. elim H. - generalize H0. simpl; ElimOrEq; NotOrEq. - generalize H0. simpl; ElimOrEq; NotOrEq. + intros. revert H0 H. simpl. ElimOrEq; NotOrEq. Qed. Lemma int_callee_save_type: @@ -259,39 +233,27 @@ Qed. (** ** Location of function result *) (** The result value of a function is passed back to the caller in - registers [R3] or [F1], depending on the type of the returned value. + registers [R3] or [F1] or [R3, R4], depending on the type of the returned value. We treat a function without result as a function with one integer result. *) -Definition loc_result (s: signature) : mreg := +Definition loc_result (s: signature) : list mreg := match s.(sig_res) with - | None => R3 - | Some Tint => R3 - | Some Tfloat => F1 + | None => R3 :: nil + | Some Tint => R3 :: nil + | Some Tfloat => F1 :: nil + | Some Tlong => R3 :: R4 :: nil end. -(** The result location has the type stated in the signature. *) - -Lemma loc_result_type: - forall sig, - mreg_type (loc_result sig) = - match sig.(sig_res) with None => Tint | Some ty => ty end. -Proof. - intros; unfold loc_result. - destruct (sig_res sig). - destruct t; reflexivity. - reflexivity. -Qed. - -(** The result location is a caller-save register or a temporary *) +(** The result location is a caller-save register *) Lemma loc_result_caller_save: - forall (s: signature), - In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries. + forall (s: signature) (r: mreg), + In r (loc_result s) -> In r destroyed_at_call. Proof. - intros; unfold loc_result. left; - destruct (sig_res s). - destruct t; simpl; OrEq. - simpl; OrEq. + intros. + assert (r = R3 \/ r = R4 \/ r = F1). + unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition. + destruct H0 as [A | [A | A]]; subst r; simpl; OrEq. Qed. (** ** Location of function arguments *) @@ -300,213 +262,162 @@ Qed. to a function: - The first 8 integer arguments are passed in registers [R3] to [R10]. - The first 8 float arguments are passed in registers [F1] to [F8]. +- The first 4 long integer arguments are passed in register pairs [R3,R4] ... [R9,R10]. - Extra arguments are passed on the stack, in [Outgoing] slots, consecutively assigned (1 word for an integer argument, 2 words for a float), starting at word offset 0. - No stack space is reserved for the arguments that are passed in registers. *) +Definition int_param_regs := + R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil. +Definition float_param_regs := + F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: nil. + Fixpoint loc_arguments_rec - (tyl: list typ) (iregl: list mreg) (fregl: list mreg) - (ofs: Z) {struct tyl} : list loc := + (tyl: list typ) (ir fr ofs: Z) {struct tyl} : list loc := match tyl with | nil => nil | Tint :: tys => - match iregl with - | nil => - S (Outgoing ofs Tint) :: loc_arguments_rec tys nil fregl (ofs + 1) - | ireg :: iregs => - R ireg :: loc_arguments_rec tys iregs fregl ofs + match list_nth_z int_param_regs ir with + | None => + S Outgoing ofs Tint :: loc_arguments_rec tys ir fr (ofs + 1) + | Some ireg => + R ireg :: loc_arguments_rec tys (ir + 1) fr ofs end | Tfloat :: tys => - match fregl with - | nil => - S (Outgoing ofs Tfloat) :: loc_arguments_rec tys iregl nil (ofs + 2) - | freg :: fregs => - R freg :: loc_arguments_rec tys iregl fregs ofs + match list_nth_z float_param_regs fr with + | None => + S Outgoing ofs Tfloat :: loc_arguments_rec tys ir fr (ofs + 2) + | Some freg => + R freg :: loc_arguments_rec tys ir (fr + 1) ofs + end + | Tlong :: tys => + let ir := align ir 2 in + match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with + | Some r1, Some r2 => + R r1 :: R r2 :: loc_arguments_rec tys (ir + 2) fr ofs + | _, _ => + S Outgoing ofs Tint :: S Outgoing (ofs + 1) Tint :: loc_arguments_rec tys ir fr (ofs + 2) end end. -Definition int_param_regs := - R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil. -Definition float_param_regs := - F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: nil. - (** [loc_arguments s] returns the list of locations where to store arguments when calling a function with signature [s]. *) Definition loc_arguments (s: signature) : list loc := - loc_arguments_rec s.(sig_args) int_param_regs float_param_regs 0. + loc_arguments_rec s.(sig_args) 0 0 0. (** [size_arguments s] returns the number of [Outgoing] slots used to call a function with signature [s]. *) -Fixpoint size_arguments_rec - (tyl: list typ) (iregl: list mreg) (fregl: list mreg) - (ofs: Z) {struct tyl} : Z := +Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z := match tyl with | nil => ofs | Tint :: tys => - match iregl with - | nil => size_arguments_rec tys nil fregl (ofs + 1) - | ireg :: iregs => size_arguments_rec tys iregs fregl ofs + match list_nth_z int_param_regs ir with + | None => size_arguments_rec tys ir fr (ofs + 1) + | Some ireg => size_arguments_rec tys (ir + 1) fr ofs end | Tfloat :: tys => - match fregl with - | nil => size_arguments_rec tys iregl nil (ofs + 2) - | freg :: fregs => size_arguments_rec tys iregl fregs ofs + match list_nth_z float_param_regs fr with + | None => size_arguments_rec tys ir fr (ofs + 2) + | Some freg => size_arguments_rec tys ir (fr + 1) ofs + end + | Tlong :: tys => + let ir := align ir 2 in + match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with + | Some r1, Some r2 => size_arguments_rec tys (ir + 2) fr ofs + | _, _ => size_arguments_rec tys ir fr (ofs + 2) end end. + Definition size_arguments (s: signature) : Z := - size_arguments_rec s.(sig_args) int_param_regs float_param_regs 0. + size_arguments_rec s.(sig_args) 0 0 0. (** A tail-call is possible for a signature if the corresponding arguments are all passed in registers. *) Definition tailcall_possible (s: signature) : Prop := forall l, In l (loc_arguments s) -> - match l with R _ => True | S _ => False end. + match l with R _ => True | S _ _ _ => False end. -(** Argument locations are either non-temporary registers or [Outgoing] +(** Argument locations are either caller-save registers or [Outgoing] stack slots at nonnegative offsets. *) Definition loc_argument_acceptable (l: loc) : Prop := match l with - | R r => ~(In l temporaries) - | S (Outgoing ofs ty) => ofs >= 0 + | R r => In r destroyed_at_call + | S Outgoing ofs ty => ofs >= 0 /\ ty <> Tlong | _ => False end. Remark loc_arguments_rec_charact: - forall tyl iregl fregl ofs l, - In l (loc_arguments_rec tyl iregl fregl ofs) -> + forall tyl ir fr ofs l, + In l (loc_arguments_rec tyl ir fr ofs) -> match l with - | R r => In r iregl \/ In r fregl - | S (Outgoing ofs' ty) => ofs' >= ofs - | S _ => False + | R r => In r int_param_regs \/ In r float_param_regs + | S Outgoing ofs' ty => ofs' >= ofs /\ ty <> Tlong + | S _ _ _ => False end. Proof. +Opaque list_nth_z. induction tyl; simpl loc_arguments_rec; intros. elim H. - destruct a. - destruct iregl; elim H; intro. - subst l. omega. - generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega. - subst l. auto with coqlib. - generalize (IHtyl _ _ _ _ H0). destruct l; auto. simpl; intuition. - destruct fregl; elim H; intro. - subst l. omega. - generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega. - subst l. auto with coqlib. - generalize (IHtyl _ _ _ _ H0). destruct l; auto. - intros [A|B]. left; auto. right; auto with coqlib. + destruct a. + destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H. + subst. left. eapply list_nth_z_in; eauto. + eapply IHtyl; eauto. + subst. split. omega. congruence. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. + destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H. + subst. right. eapply list_nth_z_in; eauto. + eapply IHtyl; eauto. + subst. split. omega. congruence. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. + set (ir' := align ir 2) in *. + destruct (list_nth_z int_param_regs ir') as [r1|] eqn:E1. + destruct (list_nth_z int_param_regs (ir' + 1)) as [r2|] eqn:E2. + destruct H. subst; left; eapply list_nth_z_in; eauto. + destruct H. subst; left; eapply list_nth_z_in; eauto. + eapply IHtyl; eauto. + destruct H. subst. split. omega. congruence. + destruct H. subst. split. omega. congruence. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. + destruct H. subst. split. omega. congruence. + destruct H. subst. split. omega. congruence. + exploit IHtyl; eauto. destruct l; auto. destruct sl; auto. intuition omega. Qed. Lemma loc_arguments_acceptable: - forall (s: signature) (r: loc), - In r (loc_arguments s) -> loc_argument_acceptable r. + forall (s: signature) (l: loc), + In l (loc_arguments s) -> loc_argument_acceptable l. Proof. unfold loc_arguments; intros. generalize (loc_arguments_rec_charact _ _ _ _ _ H). - destruct r. - intro H0; elim H0. simpl. unfold not. ElimOrEq; NotOrEq. - simpl. unfold not. ElimOrEq; NotOrEq. - destruct s0; try contradiction. - simpl. omega. + destruct l. + intro H0; elim H0; simpl; ElimOrEq; OrEq. + destruct sl; try contradiction. simpl. intuition omega. Qed. Hint Resolve loc_arguments_acceptable: locs. -(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *) - -Remark loc_arguments_rec_notin_reg: - forall tyl iregl fregl ofs r, - ~(In r iregl) -> ~(In r fregl) -> - Loc.notin (R r) (loc_arguments_rec tyl iregl fregl ofs). -Proof. - induction tyl; simpl; intros. - auto. - destruct a. - destruct iregl; simpl. auto. - simpl in H. split. apply sym_not_equal. tauto. - apply IHtyl. tauto. tauto. - destruct fregl; simpl. auto. - simpl in H0. split. apply sym_not_equal. tauto. - apply IHtyl. - red; intro. apply H. auto. - tauto. -Qed. - -Remark loc_arguments_rec_notin_local: - forall tyl iregl fregl ofs ofs0 ty0, - Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs). -Proof. - induction tyl; simpl; intros. - auto. - destruct a. - destruct iregl; simpl; auto. - destruct fregl; simpl; auto. -Qed. - -Remark loc_arguments_rec_notin_outgoing: - forall tyl iregl fregl ofs ofs0 ty0, - ofs0 + typesize ty0 <= ofs -> - Loc.notin (S (Outgoing ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs). -Proof. - induction tyl; simpl; intros. - auto. - destruct a. - destruct iregl; simpl. - split. omega. eapply IHtyl. omega. - auto. - destruct fregl; simpl. - split. omega. eapply IHtyl. omega. - auto. -Qed. - -Lemma loc_arguments_norepet: - forall (s: signature), Loc.norepet (loc_arguments s). -Proof. - assert (forall tyl iregl fregl ofs, - list_norepet iregl -> - list_norepet fregl -> - list_disjoint iregl fregl -> - Loc.norepet (loc_arguments_rec tyl iregl fregl ofs)). - induction tyl; simpl; intros. - constructor. - destruct a. - destruct iregl; constructor. - apply loc_arguments_rec_notin_outgoing. simpl; omega. auto. - apply loc_arguments_rec_notin_reg. inversion H. auto. - apply list_disjoint_notin with (m :: iregl); auto with coqlib. - apply IHtyl. inv H; auto. auto. - eapply list_disjoint_cons_left; eauto. - destruct fregl; constructor. - apply loc_arguments_rec_notin_outgoing. simpl; omega. auto. - apply loc_arguments_rec_notin_reg. - red; intro. apply (H1 m m). auto. - auto with coqlib. auto. inv H0; auto. - apply IHtyl. auto. - inv H0; auto. - red; intros. apply H1. auto. auto with coqlib. - - intro. unfold loc_arguments. apply H. - unfold int_param_regs. NoRepet. - unfold float_param_regs. NoRepet. - red; intros x y; simpl. ElimOrEq; ElimOrEq; discriminate. -Qed. - (** The offsets of [Outgoing] arguments are below [size_arguments s]. *) Remark size_arguments_rec_above: - forall tyl iregl fregl ofs0, - ofs0 <= size_arguments_rec tyl iregl fregl ofs0. + forall tyl ir fr ofs0, + ofs0 <= size_arguments_rec tyl ir fr ofs0. Proof. induction tyl; simpl; intros. omega. destruct a. - destruct iregl. apply Zle_trans with (ofs0 + 1); auto; omega. auto. - destruct fregl. apply Zle_trans with (ofs0 + 2); auto; omega. auto. + destruct (list_nth_z int_param_regs ir); eauto. apply Zle_trans with (ofs0 + 1); auto; omega. + destruct (list_nth_z float_param_regs fr); eauto. apply Zle_trans with (ofs0 + 2); auto; omega. + set (ir' := align ir 2). + destruct (list_nth_z int_param_regs ir'); eauto. + destruct (list_nth_z int_param_regs (ir' + 1)); eauto. + apply Zle_trans with (ofs0 + 2); auto; omega. + apply Zle_trans with (ofs0 + 2); auto; omega. Qed. Lemma size_arguments_above: @@ -518,81 +429,39 @@ Qed. Lemma loc_arguments_bounded: forall (s: signature) (ofs: Z) (ty: typ), - In (S (Outgoing ofs ty)) (loc_arguments s) -> + In (S Outgoing ofs ty) (loc_arguments s) -> ofs + typesize ty <= size_arguments s. Proof. intros. - assert (forall tyl iregl fregl ofs0, - In (S (Outgoing ofs ty)) (loc_arguments_rec tyl iregl fregl ofs0) -> - ofs + typesize ty <= size_arguments_rec tyl iregl fregl ofs0). + assert (forall tyl ir fr ofs0, + In (S Outgoing ofs ty) (loc_arguments_rec tyl ir fr ofs0) -> + ofs + typesize ty <= size_arguments_rec tyl ir fr ofs0). induction tyl; simpl; intros. elim H0. - destruct a. destruct iregl; elim H0; intro. - inv H1. simpl. apply size_arguments_rec_above. auto. - discriminate. auto. - destruct fregl; elim H0; intro. - inv H1. simpl. apply size_arguments_rec_above. auto. - discriminate. auto. - unfold size_arguments. eapply H0. unfold loc_arguments in H. eauto. -Qed. - -(** Temporary registers do not overlap with argument locations. *) - -Lemma loc_arguments_not_temporaries: - forall sig, Loc.disjoint (loc_arguments sig) temporaries. -Proof. - intros; red; intros x1 x2 H. - generalize (loc_arguments_rec_charact _ _ _ _ _ H). - destruct x1. - intro H0; elim H0; simpl; (ElimOrEq; ElimOrEq; congruence). - destruct s; try contradiction. intro. - simpl; ElimOrEq; auto. -Qed. -Hint Resolve loc_arguments_not_temporaries: locs. - -(** Argument registers are caller-save. *) - -Lemma arguments_caller_save: - forall sig r, - In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call. -Proof. - unfold loc_arguments; intros. - elim (loc_arguments_rec_charact _ _ _ _ _ H); simpl. - ElimOrEq; intuition. - ElimOrEq; intuition. -Qed. - -(** Argument locations agree in number with the function signature. *) - -Lemma loc_arguments_length: - forall sig, - List.length (loc_arguments sig) = List.length sig.(sig_args). -Proof. - assert (forall tyl iregl fregl ofs, - List.length (loc_arguments_rec tyl iregl fregl ofs) = List.length tyl). - induction tyl; simpl; intros. - auto. - destruct a. - destruct iregl; simpl; decEq; auto. - destruct fregl; simpl; decEq; auto. - intros. unfold loc_arguments. auto. -Qed. - -(** Argument locations agree in types with the function signature. *) - -Lemma loc_arguments_type: - forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args). -Proof. - assert (forall tyl iregl fregl ofs, - (forall r, In r iregl -> mreg_type r = Tint) -> - (forall r, In r fregl -> mreg_type r = Tfloat) -> - List.map Loc.type (loc_arguments_rec tyl iregl fregl ofs) = tyl). - induction tyl; simpl; intros. - auto. - destruct a; [destruct iregl|destruct fregl]; simpl; - f_equal; eauto with coqlib. - - intros. unfold loc_arguments. apply H. - intro; simpl. ElimOrEq; reflexivity. - intro; simpl. ElimOrEq; reflexivity. + destruct a. + destruct (list_nth_z int_param_regs ir); destruct H0. + congruence. + eauto. + inv H0. apply size_arguments_rec_above. + eauto. + destruct (list_nth_z float_param_regs fr); destruct H0. + congruence. + eauto. + inv H0. apply size_arguments_rec_above. + eauto. + set (ir' := align ir 2) in *. + destruct (list_nth_z int_param_regs ir'). + destruct (list_nth_z int_param_regs (ir' + 1)). + destruct H0. congruence. destruct H0. congruence. eauto. + destruct H0. inv H0. + transitivity (ofs + 2). simpl; omega. eauto. apply size_arguments_rec_above. + destruct H0. inv H0. + transitivity (ofs0 + 2). simpl; omega. eauto. apply size_arguments_rec_above. + eauto. + destruct H0. inv H0. + transitivity (ofs + 2). simpl; omega. eauto. apply size_arguments_rec_above. + destruct H0. inv H0. + transitivity (ofs0 + 2). simpl; omega. eauto. apply size_arguments_rec_above. + eauto. + eauto. Qed. diff --git a/powerpc/eabi/Stacklayout.v b/powerpc/eabi/Stacklayout.v index cd4d9ae4..be823c1e 100644 --- a/powerpc/eabi/Stacklayout.v +++ b/powerpc/eabi/Stacklayout.v @@ -24,12 +24,8 @@ Require Import Bounds. Since we would rather store our return address in our own frame, we will not use these 4 bytes, and just reserve them. - Space for outgoing arguments to function calls. -- Local stack slots of integer type. -- Saved return address into caller. +- Local stack slots. - Saved values of integer callee-save registers used by the function. -- One word of padding, if necessary to align the following data - on a 8-byte boundary. -- Local stack slots of float type. - Saved values of float callee-save registers used by the function. - Space for the stack-allocated data declared in Cminor. @@ -43,10 +39,9 @@ Record frame_env : Type := mk_frame_env { fe_size: Z; fe_ofs_link: Z; fe_ofs_retaddr: Z; - fe_ofs_int_local: Z; + fe_ofs_local: Z; fe_ofs_int_callee_save: Z; fe_num_int_callee_save: Z; - fe_ofs_float_local: Z; fe_ofs_float_callee_save: Z; fe_num_float_callee_save: Z; fe_stack_data: Z @@ -56,17 +51,17 @@ Record frame_env : Type := mk_frame_env { function. *) Definition make_env (b: bounds) := - let oil := 8 + 4 * b.(bound_outgoing) in (* integer locals *) - let ora := oil + 4 * b.(bound_int_local) in (* saved return address *) + let ol := align (8 + 4 * b.(bound_outgoing)) 8 in (* locals *) + let ora := ol + 4 * b.(bound_local) in (* saved return address *) let oics := ora + 4 in (* integer callee-saves *) let oendi := oics + 4 * b.(bound_int_callee_save) in - let ofl := align oendi 8 in (* float locals *) - let ofcs := ofl + 8 * b.(bound_float_local) in (* float callee-saves *) + let ofcs := align oendi 8 in (* float callee-saves *) let ostkdata := ofcs + 8 * b.(bound_float_callee_save) in (* stack data *) let sz := align (ostkdata + b.(bound_stack_data)) 16 in mk_frame_env sz 0 ora - oil oics b.(bound_int_callee_save) - ofl ofcs b.(bound_float_callee_save) + ol + oics b.(bound_int_callee_save) + ofcs b.(bound_float_callee_save) ostkdata. (** Separation property *) @@ -76,25 +71,25 @@ Remark frame_env_separated: let fe := make_env b in 0 <= fe.(fe_ofs_link) /\ fe.(fe_ofs_link) + 4 <= fe_ofs_arg - /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_int_local) - /\ fe.(fe_ofs_int_local) + 4 * b.(bound_int_local) <= fe.(fe_ofs_retaddr) + /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_local) + /\ fe.(fe_ofs_local) + 4 * b.(bound_local) <= fe.(fe_ofs_retaddr) /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_ofs_int_callee_save) - /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_local) - /\ fe.(fe_ofs_float_local) + 8 * b.(bound_float_local) <= fe.(fe_ofs_float_callee_save) + /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_callee_save) /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_stack_data) - /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_size). + /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_size) + /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_size). Proof. intros. + generalize (align_le (8 + 4 * b.(bound_outgoing)) 8 (refl_equal _)). generalize (align_le (fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)). generalize (align_le (fe.(fe_stack_data) + b.(bound_stack_data)) 16 (refl_equal _)). unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr, - fe_ofs_int_local, fe_ofs_int_callee_save, + fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save, - fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save, + fe_ofs_float_callee_save, fe_num_float_callee_save, fe_stack_data, fe_ofs_arg. intros. - generalize (bound_int_local_pos b); intro; - generalize (bound_float_local_pos b); intro; + generalize (bound_local_pos b); intro; generalize (bound_int_callee_save_pos b); intro; generalize (bound_float_callee_save_pos b); intro; generalize (bound_outgoing_pos b); intro; @@ -108,9 +103,8 @@ Remark frame_env_aligned: forall b, let fe := make_env b in (4 | fe.(fe_ofs_link)) - /\ (4 | fe.(fe_ofs_int_local)) + /\ (8 | fe.(fe_ofs_local)) /\ (4 | fe.(fe_ofs_int_callee_save)) - /\ (8 | fe.(fe_ofs_float_local)) /\ (8 | fe.(fe_ofs_float_callee_save)) /\ (4 | fe.(fe_ofs_retaddr)) /\ (8 | fe.(fe_stack_data)) @@ -118,24 +112,23 @@ Remark frame_env_aligned: Proof. intros. unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr, - fe_ofs_int_local, fe_ofs_int_callee_save, + fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save, - fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save, + fe_ofs_float_callee_save, fe_num_float_callee_save, fe_stack_data. - set (x1 := 8 + 4 * bound_outgoing b). - assert (4 | x1). unfold x1; apply Zdivide_plus_r. exists 2; auto. exists (bound_outgoing b); ring. - set (x2 := x1 + 4 * bound_int_local b). - assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. exists (bound_int_local b); ring. + set (x1 := align (8 + 4 * bound_outgoing b) 8). + assert (8 | x1). unfold x1; apply align_divides. omega. + set (x2 := x1 + 4 * bound_local b). + assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. + apply Zdivides_trans with 8. exists 2; auto. auto. + exists (bound_local b); ring. set (x3 := x2 + 4). assert (4 | x3). unfold x3; apply Zdivide_plus_r; auto. exists 1; auto. set (x4 := align (x3 + 4 * bound_int_callee_save b) 8). assert (8 | x4). unfold x4. apply align_divides. omega. - set (x5 := x4 + 8 * bound_float_local b). - assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring. - set (x6 := x5 + 8 * bound_float_callee_save b). - assert (8 | x6). - unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring. - set (x7 := align (x6 + bound_stack_data b) 16). - assert (16 | x7). unfold x7; apply align_divides. omega. + set (x5 := x4 + 8 * bound_float_callee_save b). + assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring. + set (x6 := align (x5 + bound_stack_data b) 16). + assert (16 | x6). unfold x6; apply align_divides. omega. intuition. Qed. -- cgit