From e10555313645cf3c35f244f42afa5a03fba2bac1 Mon Sep 17 00:00:00 2001 From: Bernhard Schommer Date: Tue, 28 May 2019 10:33:34 +0200 Subject: Provide a float select operation for PowerPC. (#173) The FP select for PowerPC stores both addresses in two subsequent stack slots and loads them using an offset created from the result of the comparison. --- powerpc/Asm.v | 8 ++++++++ powerpc/AsmToJSON.ml | 1 + powerpc/Asmexpand.ml | 19 +++++++++++++++++++ powerpc/Asmgen.v | 19 ++++++++++++++++++- powerpc/Asmgenproof.v | 11 ++++++++++- powerpc/Asmgenproof1.v | 41 ++++++++++++++++++++++++++++++++++++++--- powerpc/SelectOp.vp | 3 +++ powerpc/SelectOpproof.v | 5 +++-- powerpc/TargetPrinter.ml | 1 + 9 files changed, 101 insertions(+), 7 deletions(-) diff --git a/powerpc/Asm.v b/powerpc/Asm.v index e821c6c4..b9300fd7 100644 --- a/powerpc/Asm.v +++ b/powerpc/Asm.v @@ -231,6 +231,7 @@ Inductive instruction : Type := | Pfres: freg -> freg -> instruction (**r approximate inverse *) | Pfsel: freg -> freg -> freg -> freg -> instruction (**r FP conditional move *) | Pisel: ireg -> ireg -> ireg -> crbit -> instruction (**r integer select *) + | Pfsel_gen: freg -> freg -> freg -> crbit -> instruction (**r floating point select *) | Pisync: instruction (**r ISYNC barrier *) | Picbi: ireg -> ireg -> instruction (**r instruction cache invalidate *) | Picbtls: int -> ireg -> ireg -> instruction (**r instruction cache block touch and lock set *) @@ -867,6 +868,13 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | _ => Vundef end in Next (nextinstr (rs #rd <- v #GPR0 <- Vundef)) m + | Pfsel_gen rd r1 r2 bit => + let v := + match rs#(reg_of_crbit bit) with + | Vint n => if Int.eq n Int.zero then rs#r2 else rs#r1 + | _ => Vundef + end in + Next (nextinstr (rs #rd <- v #GPR0 <- Vundef)) m | Plbz rd cst r1 => load1 Mint8unsigned rd cst r1 rs m | Plbzx rd r1 r2 => diff --git a/powerpc/AsmToJSON.ml b/powerpc/AsmToJSON.ml index ee3eaca8..99c51e43 100644 --- a/powerpc/AsmToJSON.ml +++ b/powerpc/AsmToJSON.ml @@ -228,6 +228,7 @@ let pp_instructions pp ic = | Pfres (fr1,fr2) -> instruction pp "Pfres" [Freg fr1; Freg fr2] | Pfsel (fr1,fr2,fr3,fr4) -> instruction pp "Pfsel" [Freg fr1; Freg fr2; Freg fr3; Freg fr4] | Pisel (ir1,ir2,ir3,cr) -> instruction pp "Pisel" [Ireg ir1; Ireg ir2; Ireg ir3; Crbit cr] + | Pfsel_gen _ -> assert false (* Should not occur *) | Picbi (ir1,ir2) -> instruction pp "Picbi" [Ireg ir1; Ireg ir2] | Picbtls (n,ir1,ir2) -> instruction pp "Picbtls" [Constant (Cint n);Ireg ir1; Ireg ir2] | Pisync -> instruction pp "Pisync" [] diff --git a/powerpc/Asmexpand.ml b/powerpc/Asmexpand.ml index 5903ab0e..415b6651 100644 --- a/powerpc/Asmexpand.ml +++ b/powerpc/Asmexpand.ml @@ -36,12 +36,14 @@ let _2 = coqint_of_camlint 2l let _4 = coqint_of_camlint 4l let _6 = coqint_of_camlint 6l let _8 = coqint_of_camlint 8l +let _16 = coqint_of_camlint 16l let _31 = coqint_of_camlint 31l let _32 = coqint_of_camlint 32l let _64 = coqint_of_camlint 64l let _m1 = coqint_of_camlint (-1l) let _m4 = coqint_of_camlint (-4l) let _m8 = coqint_of_camlint (-8l) +let _m16 = coqint_of_camlint (-16l) let _0L = Integers.Int64.zero let _32L = coqint_of_camlint64 32L @@ -444,6 +446,21 @@ let expand_integer_cond_move a1 a2 a3 res = expand_integer_cond_move_1 a2 a3 res end + +(* Expansion of floating point conditional moves (Pfcmove) *) + +let expand_float_cond_move bit a2 a3 res = + emit (Pmfcr GPR0); + emit (Prlwinm(GPR0, GPR0, Z.of_uint (4 + num_crbit bit), _8)); + emit (Pstfdu (a3, Cint (_m16), GPR1)); + emit (Pcfi_adjust _16); + emit (Pstfd (a2, Cint (_8), GPR1)); + emit (Plfdx (res, GPR1, GPR0)); + emit (Paddi (GPR1, GPR1, (Cint _16))); + emit (Pcfi_adjust _m16) + + + (* Symmetrically, we emulate the "isel" instruction on PPC processors that do not have it. *) @@ -894,6 +911,8 @@ let expand_instruction instr = if r1 <> r2 then emit(Pfmr(r1, r2)) | Pisel(rd, r1, r2, bit) -> expand_isel bit r1 r2 rd + | Pfsel_gen (rd, r1, r2, bit) -> + expand_float_cond_move bit r1 r2 rd | Plmake(r1, rhi, rlo) -> if r1 = rlo then emit (Prldimi(r1, rhi, _32L, upper32)) diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v index 62efc17f..a686414a 100644 --- a/powerpc/Asmgen.v +++ b/powerpc/Asmgen.v @@ -416,6 +416,16 @@ Definition transl_select_op let r2' := if snd p then r2 else r1 in transl_cond cond args (Pisel rd r1' r2' (fst p) :: k)). +Definition transl_fselect_op + (cond: condition) (args: list mreg) (r1 r2 rd: freg) (k: code) := + if freg_eq r1 r2 then + OK (Pfmr rd r1 :: k) + else + (let p := crbit_for_cond cond in + let r1' := if snd p then r1 else r2 in + let r2' := if snd p then r2 else r1 in + transl_cond cond args (Pfsel_gen rd r1' r2' (fst p) :: k)). + (** Translation of the arithmetic operation [r <- op(args)]. The corresponding instructions are prepended to [k]. *) @@ -623,9 +633,16 @@ Definition transl_op | Ocmp cmp, _ => transl_cond_op cmp args res k | Osel cmp ty, a1 :: a2 :: args => - assertion (typ_eq ty Tint || typ_eq ty Tlong); + match preg_of res with + | IR r1 => do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r <- ireg_of res; transl_select_op cmp args r1 r2 r k + | FR r => + do r1 <- freg_of a1; do r2 <- freg_of a2; do r <- freg_of res; + transl_fselect_op cmp args r1 r2 r k + | _ => + Error (msg "Asmgen.Osel") + end (*c PPC64 operations *) | Olongconst n, nil => do r <- ireg_of res; OK (loadimm64 r n k) diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v index 4d0b41ba..d653633c 100644 --- a/powerpc/Asmgenproof.v +++ b/powerpc/Asmgenproof.v @@ -257,6 +257,15 @@ Proof. eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel. Qed. +Remark transl_fselect_op_label: + forall cond args r1 r2 rd k c, + transl_fselect_op cond args r1 r2 rd k = OK c -> tail_nolabel k c. +Proof. + unfold transl_fselect_op; intros. destruct (freg_eq r1 r2). + TailNoLabel. + eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel. +Qed. + Remark transl_op_label: forall op args r k c, transl_op op args r k = OK c -> tail_nolabel k c. @@ -284,7 +293,7 @@ Opaque Int.eq. destruct Int64.eq. TailNoLabel. destruct ireg_eq; [apply tail_nolabel_cons; unfold nolabel;auto|]; eapply tail_nolabel_trans; TailNoLabel. - eapply transl_cond_op_label; eauto. -- eapply transl_select_op_label; eauto. +- destruct (preg_of r); monadInv H. eapply transl_select_op_label; eauto. eapply transl_fselect_op_label; eauto. Qed. Remark transl_memory_access_label: diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v index 2e609900..884d5366 100644 --- a/powerpc/Asmgenproof1.v +++ b/powerpc/Asmgenproof1.v @@ -1288,6 +1288,35 @@ Proof. + intros. Simpl. Qed. +Lemma transl_fselect_op_correct: + forall cond args ty r1 r2 rd k rs m c, + transl_fselect_op cond args r1 r2 rd k = OK c -> + important_preg rd = true -> important_preg r1 = true -> important_preg r2 = true -> + exists rs', + exec_straight ge fn c rs m k rs' m + /\ Val.lessdef (Val.select (eval_condition cond (map rs (map preg_of args)) m) rs#r1 rs#r2 ty) rs'#rd + /\ forall r, important_preg r = true -> r <> rd -> rs'#r = rs#r. +Proof. + intros until c. intros TR IMP1 IMP2 IMP3. + unfold transl_fselect_op in TR. + destruct (freg_eq r1 r2). + - inv TR. econstructor; split; [|split]. + + apply exec_straight_one. simpl; eauto. auto. + + Simpl. destruct (eval_condition cond rs ## (preg_of ## args) m) as [[]|]; simpl; auto using Val.lessdef_normalize. + + intros; Simpl. + - destruct (transl_cond_correct_1 cond args _ rs m _ TR) as (rs1 & A & B & C). + set (bit := fst (crbit_for_cond cond)) in *. + set (dir := snd (crbit_for_cond cond)) in *. + set (ob := eval_condition cond rs##(preg_of##args) m) in *. + econstructor; split; [|split]. + + eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. + reflexivity. + + Simpl. + rewrite <- (C r1), <- (C r2) by auto. + rewrite B. destruct dir; destruct ob as [[]|]; simpl; auto using Val.lessdef_normalize. + + intros. Simpl. +Qed. + (** Translation of arithmetic operations. *) Ltac TranslOpSimpl := @@ -1486,10 +1515,16 @@ Opaque Val.add. exists rs'; auto with asmgen. (* Osel *) - assert (X: forall mr r, ireg_of mr = OK r -> important_preg r = true). - { intros. apply ireg_of_eq in H. apply important_data_preg_1. rewrite <- H. + { intros. apply ireg_of_eq in H0. apply important_data_preg_1. rewrite <- H0. + auto with asmgen. } + assert (Y: forall mr r, freg_of mr = OK r -> important_preg r = true). + { intros. apply freg_of_eq in H0. apply important_data_preg_1. rewrite <- H0. auto with asmgen. } - destruct (transl_select_op_correct _ _ t _ _ _ _ rs m _ EQ3) as (rs' & A & B & C); - eauto. + destruct (preg_of res) eqn:RES; monadInv H; rewrite <- RES. + + rewrite (ireg_of_eq _ _ EQ), (ireg_of_eq _ _ EQ0), (ireg_of_eq _ _ EQ1) in *. + destruct (transl_select_op_correct _ _ t _ _ _ _ rs m _ EQ3) as (rs' & A & B & C); eauto. + + rewrite (freg_of_eq _ _ EQ), (freg_of_eq _ _ EQ0), (freg_of_eq _ _ EQ1) in *. + destruct (transl_fselect_op_correct _ _ t _ _ _ _ rs m _ EQ3) as (rs' & A & B & C); eauto. Qed. Lemma transl_op_correct: diff --git a/powerpc/SelectOp.vp b/powerpc/SelectOp.vp index 41bc0efc..24aeb531 100644 --- a/powerpc/SelectOp.vp +++ b/powerpc/SelectOp.vp @@ -43,6 +43,7 @@ Require Import Integers. Require Import Floats. Require Import Op. Require Import CminorSel. +Require Archi. Local Open Scope cminorsel_scope. @@ -521,6 +522,8 @@ Definition floatofsingle (e: expr) := Eop Ofloatofsingle (e ::: Enil). Definition select (ty: typ) (cond: condition) (args: exprlist) (e1 e2: expr) := if match ty with | Tint => true + | Tfloat => true + | Tsingle => true | Tlong => Archi.ppc64 | _ => false end diff --git a/powerpc/SelectOpproof.v b/powerpc/SelectOpproof.v index 5bf25722..5d7cd52b 100644 --- a/powerpc/SelectOpproof.v +++ b/powerpc/SelectOpproof.v @@ -1007,12 +1007,13 @@ Theorem eval_select: eval_expr ge sp e m le a1 v1 -> eval_expr ge sp e m le a2 v2 -> eval_condition cond vl m = Some b -> - exists v, + + exists v, eval_expr ge sp e m le a v /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v. Proof. unfold select; intros. - destruct (match ty with Tint => true | Tlong => Archi.ppc64 | _ => false end); inv H. + destruct (match ty with Tint => true | Tfloat => true | Tsingle => true | Tlong => Archi.ppc64 | _ => false end); inv H. exists (Val.select (Some b) v1 v2 ty); split. apply eval_Eop with (v1 :: v2 :: vl). constructor; auto. constructor; auto. diff --git a/powerpc/TargetPrinter.ml b/powerpc/TargetPrinter.ml index 0a98584b..a1338561 100644 --- a/powerpc/TargetPrinter.ml +++ b/powerpc/TargetPrinter.ml @@ -613,6 +613,7 @@ module Target (System : SYSTEM):TARGET = fprintf oc " fsel %a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4 | Pisel (r1,r2,r3,cr) -> fprintf oc " isel %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 crbit cr + | Pfsel_gen _ -> assert false | Picbi (r1,r2) -> fprintf oc " icbi %a, %a\n" ireg r1 ireg r2 | Picbtls (n,r1,r2) -> -- cgit