From a745efa7f07a10cec625b9c302eb0196b7bfaefb Mon Sep 17 00:00:00 2001 From: xleroy Date: Thu, 26 Feb 2009 14:48:59 +0000 Subject: Reserve register GPR13 for compatibility with EABI. Optimize operations 'x >= 0' and 'x < 0'. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@999 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- powerpc/Asm.v | 2 +- powerpc/Asmgen.v | 46 ++++++++++++++++++++++++++++++++++++++------ powerpc/Asmgenproof.v | 29 +++++++++++++++++++++++----- powerpc/Asmgenproof1.v | 34 +++++++++++++++++++++++++++----- powerpc/Asmgenretaddr.v | 5 ++++- powerpc/Machregs.v | 18 ++++++++--------- powerpc/eabi/Conventions.v | 12 ++++++------ powerpc/macosx/Conventions.v | 12 ++++++------ 8 files changed, 119 insertions(+), 39 deletions(-) (limited to 'powerpc') diff --git a/powerpc/Asm.v b/powerpc/Asm.v index 69085aa9..63f0232a 100644 --- a/powerpc/Asm.v +++ b/powerpc/Asm.v @@ -759,7 +759,7 @@ Definition ireg_of (r: mreg) : ireg := match r with | R3 => GPR3 | R4 => GPR4 | R5 => GPR5 | R6 => GPR6 | R7 => GPR7 | R8 => GPR8 | R9 => GPR9 | R10 => GPR10 - | R13 => GPR13 | R14 => GPR14 | R15 => GPR15 | R16 => GPR16 + | R14 => GPR14 | R15 => GPR15 | R16 => GPR16 | R17 => GPR17 | R18 => GPR18 | R19 => GPR19 | R20 => GPR20 | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24 | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28 diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v index 1dcc3990..0c3ac75d 100644 --- a/powerpc/Asmgen.v +++ b/powerpc/Asmgen.v @@ -223,6 +223,32 @@ Definition crbit_for_cond (cond: condition) := | Cmasknotzero n => (CRbit_2, false) end. +(** Recognition of comparisons [>= 0] and [< 0]. *) + +Inductive condition_class: condition -> list mreg -> Set := + | condition_ge0: + forall n r, n = Int.zero -> condition_class (Ccompimm Cge n) (r :: nil) + | condition_lt0: + forall n r, n = Int.zero -> condition_class (Ccompimm Clt n) (r :: nil) + | condition_default: + forall c rl, condition_class c rl. + +Definition classify_condition (c: condition) (args: list mreg): condition_class c args := + match c as z1, args as z2 return condition_class z1 z2 with + | Ccompimm Cge n, r :: nil => + match Int.eq_dec n Int.zero with + | left EQ => condition_ge0 n r EQ + | right _ => condition_default (Ccompimm Cge n) (r :: nil) + end + | Ccompimm Clt n, r :: nil => + match Int.eq_dec n Int.zero with + | left EQ => condition_lt0 n r EQ + | right _ => condition_default (Ccompimm Clt n) (r :: nil) + end + | x, y => + condition_default x y + end. + (** Translation of the arithmetic operation [r <- op(args)]. The corresponding instructions are prepended to [k]. *) @@ -331,12 +357,20 @@ Definition transl_op | Ofloatofintu, a1 :: nil => Piuctf (freg_of r) (ireg_of a1) :: k | Ocmp cmp, _ => - let p := crbit_for_cond cmp in - transl_cond cmp args - (Pmfcrbit (ireg_of r) (fst p) :: - if snd p - then k - else Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k) + match classify_condition cmp args with + | condition_ge0 _ a _ => + Prlwinm (ireg_of r) (ireg_of a) Int.one Int.one :: + Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k + | condition_lt0 _ a _ => + Prlwinm (ireg_of r) (ireg_of a) Int.one Int.one :: k + | condition_default _ _ => + let p := crbit_for_cond cmp in + transl_cond cmp args + (Pmfcrbit (ireg_of r) (fst p) :: + if snd p + then k + else Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k) + end | _, _ => k (**r never happens for well-typed code *) end. diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v index a96125a2..40c593a3 100644 --- a/powerpc/Asmgenproof.v +++ b/powerpc/Asmgenproof.v @@ -432,6 +432,30 @@ Proof. apply andimm_label. apply andimm_label. Qed. Hint Rewrite transl_cond_label: labels. + +Remark transl_op_cmp_label: + forall c args r k, + find_label lbl + (match classify_condition c args with + | condition_ge0 _ a _ => + Prlwinm (ireg_of r) (ireg_of a) Int.one Int.one + :: Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k + | condition_lt0 _ a _ => + Prlwinm (ireg_of r) (ireg_of a) Int.one Int.one :: k + | condition_default _ _ => + transl_cond c args + (Pmfcrbit (ireg_of r) (fst (crbit_for_cond c)) + :: (if snd (crbit_for_cond c) + then k + else Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k)) + end) = find_label lbl k. +Proof. + intros. destruct (classify_condition c args); auto. + autorewrite with labels. + case (snd (crbit_for_cond c)); auto. +Qed. +Hint Rewrite transl_op_cmp_label: labels. + Remark transl_op_label: forall op args r k, find_label lbl (transl_op op args r k) = find_label lbl k. Proof. @@ -441,11 +465,6 @@ Proof. case (mreg_type m); reflexivity. case (Int.eq (high_s i) Int.zero); autorewrite with labels; reflexivity. case (Int.eq (high_s i) Int.zero); autorewrite with labels; reflexivity. - case (snd (crbit_for_cond c)); reflexivity. - case (snd (crbit_for_cond c)); reflexivity. - case (snd (crbit_for_cond c)); reflexivity. - case (snd (crbit_for_cond c)); reflexivity. - case (snd (crbit_for_cond c)); reflexivity. Qed. Hint Rewrite transl_op_label: labels. diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v index fdb48bec..5eda7adf 100644 --- a/powerpc/Asmgenproof1.v +++ b/powerpc/Asmgenproof1.v @@ -1349,14 +1349,38 @@ Proof. repeat (rewrite (ireg_val ms sp rs); auto). reflexivity. auto 10 with ppcgen. (* Ocmp *) - set (bit := fst (crbit_for_cond c)). - set (isset := snd (crbit_for_cond c)). + generalize H2; case (classify_condition c args); intros. + (* Optimization: compimm Cge 0 *) + subst n. simpl in H4. simpl. + set (rs1 := nextinstr (rs#(ireg_of res) <- (Val.rolm (ms r) Int.one Int.one))). + set (rs2 := nextinstr (rs1#(ireg_of res) <- (Val.xor (rs1#(ireg_of res)) (Vint Int.one)))). + exists rs2. + split. apply exec_straight_two with rs1 m. + simpl. unfold rs1. rewrite (ireg_val ms sp rs); auto. congruence. + auto. auto. auto. + rewrite <- Val.rolm_ge_zero. + unfold rs2. apply agree_nextinstr. + unfold rs1. apply agree_nextinstr_commut. fold rs1. + replace (rs1 (ireg_of res)) with (Val.rolm (ms r) Int.one Int.one). + apply agree_set_mireg_twice; auto. + unfold rs1. rewrite nextinstr_inv. rewrite Pregmap.gss. auto. + auto with ppcgen. auto with ppcgen. + (* Optimization: compimm Clt 0 *) + subst n. simpl in H4. simpl. + exists (nextinstr (rs#(ireg_of res) <- (Val.rolm (ms r) Int.one Int.one))). + split. apply exec_straight_one. simpl. rewrite (ireg_val ms sp rs); auto. congruence. + auto. + apply agree_nextinstr. apply agree_set_mireg. + rewrite Val.rolm_lt_zero. apply agree_set_mreg. auto. congruence. + (* General case *) + set (bit := fst (crbit_for_cond c0)). + set (isset := snd (crbit_for_cond c0)). set (k1 := Pmfcrbit (ireg_of res) bit :: (if isset then k else Pxori (ireg_of res) (ireg_of res) (Cint Int.one) :: k)). - generalize (transl_cond_correct_aux c args k1 ms sp rs m H2 H0). + generalize (transl_cond_correct_aux c0 rl k1 ms sp rs m H4 H0). fold bit; fold isset. intros [rs1 [EX1 [RES1 AG1]]]. set (rs2 := nextinstr (rs1#(ireg_of res) <- (rs1#(reg_of_crbit bit)))). @@ -1366,12 +1390,12 @@ Proof. unfold k1. apply exec_straight_one. reflexivity. reflexivity. unfold rs2. rewrite RES1. auto with ppcgen. - exists (nextinstr (rs2#(ireg_of res) <- (eval_condition_total c ms##args))). + exists (nextinstr (rs2#(ireg_of res) <- (eval_condition_total c0 ms##rl))). split. apply exec_straight_trans with k1 rs1 m. assumption. unfold k1. apply exec_straight_two with rs2 m. reflexivity. simpl. replace (Val.xor (rs2 (ireg_of res)) (Vint Int.one)) - with (eval_condition_total c ms##args). + with (eval_condition_total c0 ms##rl). reflexivity. unfold rs2. rewrite nextinstr_inv; auto with ppcgen. rewrite Pregmap.gss. rewrite RES1. apply Val.notbool_xor. apply eval_condition_total_is_bool. diff --git a/powerpc/Asmgenretaddr.v b/powerpc/Asmgenretaddr.v index 23bd186e..d414752c 100644 --- a/powerpc/Asmgenretaddr.v +++ b/powerpc/Asmgenretaddr.v @@ -149,7 +149,10 @@ Hint Resolve transl_cond_tail: ppcretaddr. Lemma transl_op_tail: forall op args r k, is_tail k (transl_op op args r k). -Proof. unfold transl_op; intros; destruct op; IsTail. Qed. +Proof. + unfold transl_op; intros; destruct op; IsTail. + destruct (classify_condition c args); IsTail. +Qed. Hint Resolve transl_op_tail: ppcretaddr. Lemma transl_load_store_tail: diff --git a/powerpc/Machregs.v b/powerpc/Machregs.v index 260a0e85..d0f7cf46 100644 --- a/powerpc/Machregs.v +++ b/powerpc/Machregs.v @@ -26,14 +26,14 @@ Require Import AST. - Two float registers, not allocatable, reserved as temporaries for spilling and reloading ([FT1, FT2]). - The type [mreg] does not include special-purpose machine registers - such as the stack pointer and the condition codes. *) + The type [mreg] does not include special-purpose or reserved + machine registers such as the stack pointer and the condition codes. *) Inductive mreg: Set := (** Allocatable integer regs *) | R3: mreg | R4: mreg | R5: mreg | R6: mreg | R7: mreg | R8: mreg | R9: mreg | R10: mreg - | R13: mreg | R14: mreg | R15: mreg | R16: mreg + | R14: mreg | R15: mreg | R16: mreg | R17: mreg | R18: mreg | R19: mreg | R20: mreg | R21: mreg | R22: mreg | R23: mreg | R24: mreg | R25: mreg | R26: mreg | R27: mreg | R28: mreg @@ -58,7 +58,7 @@ Definition mreg_type (r: mreg): typ := match r with | R3 => Tint | R4 => Tint | R5 => Tint | R6 => Tint | R7 => Tint | R8 => Tint | R9 => Tint | R10 => Tint - | R13 => Tint | R14 => Tint | R15 => Tint | R16 => Tint + | R14 => Tint | R15 => Tint | R16 => Tint | R17 => Tint | R18 => Tint | R19 => Tint | R20 => Tint | R21 => Tint | R22 => Tint | R23 => Tint | R24 => Tint | R25 => Tint | R26 => Tint | R27 => Tint | R28 => Tint @@ -83,11 +83,11 @@ Module IndexedMreg <: INDEXED_TYPE. match r with | R3 => 1 | R4 => 2 | R5 => 3 | R6 => 4 | R7 => 5 | R8 => 6 | R9 => 7 | R10 => 8 - | R13 => 9 | R14 => 10 | R15 => 11 | R16 => 12 - | R17 => 13 | R18 => 14 | R19 => 15 | R20 => 16 - | R21 => 17 | R22 => 18 | R23 => 19 | R24 => 20 - | R25 => 21 | R26 => 22 | R27 => 23 | R28 => 24 - | R29 => 25 | R30 => 26 | R31 => 27 + | R14 => 9 | R15 => 10 | R16 => 11 + | R17 => 12 | R18 => 13 | R19 => 14 | R20 => 15 + | R21 => 16 | R22 => 17 | R23 => 18 | R24 => 19 + | R25 => 20 | R26 => 21 | R27 => 22 | R28 => 23 + | R29 => 24 | R30 => 25 | R31 => 26 | F1 => 28 | F2 => 29 | F3 => 30 | F4 => 31 | F5 => 32 | F6 => 33 | F7 => 34 | F8 => 35 | F9 => 36 | F10 => 37 | F14 => 38 | F15 => 39 diff --git a/powerpc/eabi/Conventions.v b/powerpc/eabi/Conventions.v index 90e14a0a..acad0129 100644 --- a/powerpc/eabi/Conventions.v +++ b/powerpc/eabi/Conventions.v @@ -38,7 +38,7 @@ Definition float_caller_save_regs := F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: nil. Definition int_callee_save_regs := - R13 :: R14 :: R15 :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: + R14 :: R15 :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29 :: R30 :: R31 :: nil. Definition float_callee_save_regs := @@ -65,11 +65,11 @@ Definition temporaries := Definition index_int_callee_save (r: mreg) := match r with - | R13 => 0 | R14 => 1 | R15 => 2 | R16 => 3 - | R17 => 4 | R18 => 5 | R19 => 6 | R20 => 7 - | R21 => 8 | R22 => 9 | R23 => 10 | R24 => 11 - | R25 => 12 | R26 => 13 | R27 => 14 | R28 => 15 - | R29 => 16 | R30 => 17 | R31 => 18 | _ => -1 + | R14 => 0 | R15 => 1 | R16 => 2 | R17 => 3 + | R18 => 4 | R19 => 5 | R20 => 6 | R21 => 7 + | R22 => 8 | R23 => 9 | R24 => 10 | R25 => 11 + | R26 => 12 | R27 => 13 | R28 => 14 | R29 => 15 + | R30 => 16 | R31 => 17 | _ => -1 end. Definition index_float_callee_save (r: mreg) := diff --git a/powerpc/macosx/Conventions.v b/powerpc/macosx/Conventions.v index 6d1ccb13..62723627 100644 --- a/powerpc/macosx/Conventions.v +++ b/powerpc/macosx/Conventions.v @@ -38,7 +38,7 @@ Definition float_caller_save_regs := F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: nil. Definition int_callee_save_regs := - R13 :: R14 :: R15 :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: + R14 :: R15 :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29 :: R30 :: R31 :: nil. Definition float_callee_save_regs := @@ -65,11 +65,11 @@ Definition temporaries := Definition index_int_callee_save (r: mreg) := match r with - | R13 => 0 | R14 => 1 | R15 => 2 | R16 => 3 - | R17 => 4 | R18 => 5 | R19 => 6 | R20 => 7 - | R21 => 8 | R22 => 9 | R23 => 10 | R24 => 11 - | R25 => 12 | R26 => 13 | R27 => 14 | R28 => 15 - | R29 => 16 | R30 => 17 | R31 => 18 | _ => -1 + | R14 => 0 | R15 => 1 | R16 => 2 | R17 => 3 + | R18 => 4 | R19 => 5 | R20 => 6 | R21 => 7 + | R22 => 8 | R23 => 9 | R24 => 10 | R25 => 11 + | R26 => 12 | R27 => 13 | R28 => 14 | R29 => 15 + | R30 => 16 | R31 => 17 | _ => -1 end. Definition index_float_callee_save (r: mreg) := -- cgit