diff options
author | Cyril SIX <cyril.six@kalray.eu> | 2018-04-20 10:31:56 +0200 |
---|---|---|
committer | Cyril SIX <cyril.six@kalray.eu> | 2018-04-20 10:31:56 +0200 |
commit | aa25ec270b651186154523ec71a3888b50994d70 (patch) | |
tree | a4572adf5cde4f97642885cb1f90f73a20d169cd | |
parent | 41a048fa4bb9ddefd4e4acff2207251bb3ddbf06 (diff) | |
download | compcert-kvx-aa25ec270b651186154523ec71a3888b50994d70.tar.gz compcert-kvx-aa25ec270b651186154523ec71a3888b50994d70.zip |
MPPA - Oshrximm + Mgetparam + FP is GPR10 + bug
Added Oshrximm and Mgetparam -> mmult.c divide & conqueer generates
FP is now GPR10 instead of being a mix of GPR30 and GPR32
Corrected a bug where Pgoto and Pj_l were given the same interpretation,
where in fact there's a fundamental difference : Pgoto is supposed to
have a function name (symbol), while Pj_l is supposed to have a label
name (print_label). This led to having undefinite labels in the code.
-rw-r--r-- | mppa_k1c/Asm.v | 5 | ||||
-rw-r--r-- | mppa_k1c/Asmgen.v | 16 | ||||
-rw-r--r-- | mppa_k1c/Asmgenproof.v | 32 | ||||
-rw-r--r-- | mppa_k1c/Asmgenproof1.v | 25 | ||||
-rw-r--r-- | mppa_k1c/Machregs.v | 10 | ||||
-rw-r--r-- | mppa_k1c/TargetPrinter.ml | 4 | ||||
-rw-r--r-- | test/mppa/mmult/mmult.c | 34 |
7 files changed, 67 insertions, 59 deletions
diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v index f8ab1e8d..d199495b 100644 --- a/mppa_k1c/Asm.v +++ b/mppa_k1c/Asm.v @@ -92,6 +92,7 @@ Module Pregmap := EMap(PregEq). (** Conventional names for stack pointer ([SP]) and return address ([RA]). *) Notation "'SP'" := GPR12 (only parsing) : asm. +Notation "'FP'" := GPR10 (only parsing) : asm. Notation "'RTMP'" := GPR31 (only parsing) : asm. Inductive btest: Type := @@ -1008,7 +1009,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out let sp := (Vptr stk Ptrofs.zero) in match Mem.storev Mptr m1 (Val.offset_ptr sp pos) rs#SP with | None => Stuck - | Some m2 => Next (nextinstr (rs #GPR32 <- (rs SP) #SP <- sp #GPR31 <- Vundef)) m2 + | Some m2 => Next (nextinstr (rs #FP <- (rs SP) #SP <- sp #GPR31 <- Vundef)) m2 end | Pfreeframe sz pos => match Mem.loadv Mptr m (Val.offset_ptr rs#SP pos) with @@ -1082,7 +1083,7 @@ Definition preg_of (r: mreg) : preg := match r with | R0 => GPR0 | R1 => GPR1 | R2 => GPR2 | R3 => GPR3 | R4 => GPR4 | R5 => GPR5 | R6 => GPR6 | R7 => GPR7 | R9 => GPR9 -(*| R10 => GPR10 | R11 => GPR11 | R12 => GPR12 | R13 => GPR13 | R14 => GPR14 *) + | R10 => GPR10 (*| R11 => GPR11 | R12 => GPR12 | R13 => GPR13 | R14 => GPR14 *) | R15 => GPR15 | R16 => GPR16 | R17 => GPR17 | R18 => GPR18 | R19 => GPR19 | R20 => GPR20 | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24 | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28 | R29 => GPR29 diff --git a/mppa_k1c/Asmgen.v b/mppa_k1c/Asmgen.v index 675cb065..8198fa78 100644 --- a/mppa_k1c/Asmgen.v +++ b/mppa_k1c/Asmgen.v @@ -346,7 +346,7 @@ Definition transl_op | Oshruimm n, a1 :: nil => do rd <- ireg_of res; do rs <- ireg_of a1; OK (Psrliw rd rs n :: k) -(*| Oshrximm n, a1 :: nil => + | Oshrximm n, a1 :: nil => do rd <- ireg_of res; do rs <- ireg_of a1; OK (if Int.eq n Int.zero then Pmv rd rs :: k else Psraiw GPR31 rs (Int.repr 31) :: @@ -355,7 +355,7 @@ Definition transl_op Psraiw rd GPR31 n :: k) (* [Omakelong], [Ohighlong] should not occur *) -*)| Olowlong, a1 :: nil => + | Olowlong, a1 :: nil => do rd <- ireg_of res; do rs <- ireg_of a1; OK (Pcvtl2w rd rs :: k) | Ocast32signed, a1 :: nil => @@ -675,12 +675,12 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) loadind SP ofs ty dst k | Msetstack src ofs ty => storeind src SP ofs ty k -(*| Mgetparam ofs ty dst => + | Mgetparam ofs ty dst => (* load via the frame pointer if it is valid *) - do c <- loadind GPR30 ofs ty dst k; + do c <- loadind FP ofs ty dst k; OK (if ep then c - else loadind_ptr SP f.(fn_link_ofs) GPR30 c) -*)| Mop op args res => + else loadind_ptr SP f.(fn_link_ofs) FP c) + | Mop op args res => transl_op op args res k | Mload chunk addr args dst => transl_load chunk addr args dst k @@ -716,8 +716,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool := match i with | Msetstack src ofs ty => before - | Mgetparam ofs ty dst => negb (mreg_eq dst R32) - | Mop op args res => before && negb (mreg_eq res R32) + | Mgetparam ofs ty dst => negb (mreg_eq dst R10) + | Mop op args res => before && negb (mreg_eq res R10) | _ => false end. diff --git a/mppa_k1c/Asmgenproof.v b/mppa_k1c/Asmgenproof.v index 2003239e..04335726 100644 --- a/mppa_k1c/Asmgenproof.v +++ b/mppa_k1c/Asmgenproof.v @@ -323,6 +323,8 @@ Opaque Int.eq. - apply opimm32_label; intros; exact I. (* Oandimm32 *) - apply opimm32_label; intros; exact I. +(* Oshrximm *) +- destruct (Int.eq n Int.zero); TailNoLabel. (* Oaddimm64 *) - apply opimm64_label; intros; exact I. (* Oandimm64 *) @@ -415,6 +417,9 @@ Proof. - eapply loadind_label; eauto. (* storeind *) - eapply storeind_label; eauto. +(* Mgetparam *) +- destruct ep. eapply loadind_label; eauto. + eapply tail_nolabel_trans. apply loadind_ptr_label. eapply loadind_label; eauto. (* transl_op *) - eapply transl_op_label; eauto. (* transl_load *) @@ -429,8 +434,7 @@ Proof. Qed. (* -- destruct ep. eapply loadind_label; eauto. - eapply tail_nolabel_trans. apply loadind_ptr_label. eapply loadind_label; eauto. + - eapply transl_op_label; eauto. - destruct m; monadInv H; eapply transl_memory_access_label; eauto; intros; exact I. - destruct m; monadInv H; eapply transl_memory_access_label; eauto; intros; exact I. @@ -552,7 +556,7 @@ Inductive match_states: Mach.state -> Asm.state -> Prop := (MEXT: Mem.extends m m') (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc) (AG: agree ms sp rs) - (DXP: ep = true -> rs#GPR32 = parent_sp s), + (DXP: ep = true -> rs#FP = parent_sp s), match_states (Mach.State s fb sp c ms m) (Asm.State rs m') | match_states_call: @@ -583,7 +587,7 @@ Lemma exec_straight_steps: exists rs2, exec_straight tge tf c rs1 m1' k rs2 m2' /\ agree ms2 sp rs2 - /\ (it1_is_parent ep i = true -> rs2#GPR32 = parent_sp s)) -> + /\ (it1_is_parent ep i = true -> rs2#FP = parent_sp s)) -> exists st', plus step tge (State rs1 m1') E0 st' /\ match_states (Mach.State s fb sp c ms2 m2) st'. @@ -694,9 +698,9 @@ Definition measure (s: Mach.state) : nat := | Mach.Returnstate _ _ _ => 1%nat end. -Remark preg_of_not_GPR32: forall r, negb (mreg_eq r R32) = true -> IR GPR32 <> preg_of r. +Remark preg_of_not_FP: forall r, negb (mreg_eq r R10) = true -> IR FP <> preg_of r. Proof. - intros. change (IR GPR32) with (preg_of R32). red; intros. + intros. change (IR FP) with (preg_of R10). red; intros. exploit preg_of_injective; eauto. intros; subst r; discriminate. Qed. @@ -748,16 +752,15 @@ Proof. intros [v' [C D]]. (* Opaque loadind. *) left; eapply exec_straight_steps; eauto; intros. monadInv TR. -(* destruct ep. -(* X30 contains parent *) +(* GPR31 contains parent *) exploit loadind_correct. eexact EQ. instantiate (2 := rs0). rewrite DXP; eauto. congruence. intros [rs1 [P [Q R]]]. exists rs1; split. eauto. split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen. simpl; intros. rewrite R; auto with asmgen. - apply preg_of_not_X30; auto. + apply preg_of_not_FP; auto. (* GPR11 does not contain parent *) rewrite chunk_of_Tptr in A. exploit loadind_ptr_correct. eexact A. congruence. intros [rs1 [P [Q R]]]. @@ -765,13 +768,12 @@ Proof. intros [rs2 [S [T U]]]. exists rs2; split. eapply exec_straight_trans; eauto. split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto. - instantiate (1 := rs1#X30 <- (rs2#X30)). intros. + instantiate (1 := rs1#FP <- (rs2#FP)). intros. rewrite Pregmap.gso; auto with asmgen. congruence. - intros. unfold Pregmap.set. destruct (PregEq.eq r' X30). congruence. auto with asmgen. + intros. unfold Pregmap.set. destruct (PregEq.eq r' FP). congruence. auto with asmgen. simpl; intros. rewrite U; auto with asmgen. - apply preg_of_not_X30; auto. -*) + apply preg_of_not_FP; auto. - (* Mop *) assert (eval_operation tge sp op (map rs args) m = Some v). rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. @@ -783,7 +785,7 @@ Proof. apply agree_set_undef_mreg with rs0; auto. apply Val.lessdef_trans with v'; auto. simpl; intros. destruct (andb_prop _ _ H1); clear H1. - rewrite R; auto. apply preg_of_not_GPR32; auto. + rewrite R; auto. apply preg_of_not_FP; auto. Local Transparent destroyed_by_op. destruct op; simpl; auto; congruence. @@ -1021,7 +1023,7 @@ Local Transparent destroyed_by_op. Pget GPR8 RA :: storeind_ptr GPR8 SP (fn_retaddr_ofs f) x0) in *. set (tf := {| fn_sig := Mach.fn_sig f; fn_code := tfbody |}) in *. - set (rs2 := nextinstr (rs0#GPR32 <- (parent_sp s) #SP <- sp #GPR31 <- Vundef)). + set (rs2 := nextinstr (rs0#FP <- (parent_sp s) #SP <- sp #GPR31 <- Vundef)). exploit (Pget_correct tge tf GPR8 RA (storeind_ptr GPR8 SP (fn_retaddr_ofs f) x0) rs2 m2'); auto. intros (rs' & U' & V'). exploit (storeind_ptr_correct tge tf SP (fn_retaddr_ofs f) GPR8 x0 rs' m2'). diff --git a/mppa_k1c/Asmgenproof1.v b/mppa_k1c/Asmgenproof1.v index 3085072e..21ff9738 100644 --- a/mppa_k1c/Asmgenproof1.v +++ b/mppa_k1c/Asmgenproof1.v @@ -1214,6 +1214,18 @@ Opaque Int.eq. - (* Oaddrstack *) exploit addptrofs_correct. instantiate (1 := GPR12); auto with asmgen. intros (rs' & A & B & C). exists rs'; split; eauto. auto with asmgen. +- (* Oshrximm *) + clear H. exploit Val.shrx_shr_2; eauto. intros E; subst v; clear EV. + destruct (Int.eq n Int.zero). ++ econstructor; split. apply exec_straight_one. simpl; eauto. auto. + split; intros; Simpl. ++ change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n). + econstructor; split. + eapply exec_straight_step. simpl; reflexivity. auto. + eapply exec_straight_step. simpl; reflexivity. auto. + eapply exec_straight_step. simpl; reflexivity. auto. + apply exec_straight_one. simpl; reflexivity. auto. + split; intros; unfold getw; Simpl. - (* Ocast32signed *) exploit cast32signed_correct; eauto. intros (rs' & A & B & C). exists rs'; split; eauto. split. apply B. @@ -1275,18 +1287,7 @@ Opaque Int.eq. exploit (opimm32_correct Pxorw Pxoriw Val.xor); auto. instantiate (1 := x0); eauto with asmgen. intros (rs' & A & B & C). exists rs'; split; eauto. rewrite B; auto with asmgen. -- (* shrximm *) - clear H. exploit Val.shrx_shr_2; eauto. intros E; subst v; clear EV. - destruct (Int.eq n Int.zero). -+ econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros; Simpl. -+ change (Int.repr 32) with Int.iwordsize. set (n' := Int.sub Int.iwordsize n). - econstructor; split. - eapply exec_straight_step. simpl; reflexivity. auto. - eapply exec_straight_step. simpl; reflexivity. auto. - eapply exec_straight_step. simpl; reflexivity. auto. - apply exec_straight_one. simpl; reflexivity. auto. - split; intros; Simpl. + - (* longofintu *) econstructor; split. eapply exec_straight_three. simpl; eauto. simpl; eauto. simpl; eauto. auto. auto. auto. diff --git a/mppa_k1c/Machregs.v b/mppa_k1c/Machregs.v index ce86a06f..ed582c98 100644 --- a/mppa_k1c/Machregs.v +++ b/mppa_k1c/Machregs.v @@ -42,7 +42,7 @@ Require Import Op. Inductive mreg: Type := (* Allocatable General Purpose regs. *) | R0 | R1 | R2 | R3 | R4 | R5 | R6 | R7 | R9 - (* R10 to R14 are reserved *) | R15 | R16 | R17 | R18 | R19 + | R10 (* R11 to R14 res *) | R15 | R16 | R17 | R18 | R19 | R20 | R21 | R22 | R23 | R24 | R25 | R26 | R27 | R28 | R29 | R30 | R32 | R33 | R34 | R35 | R36 | R37 | R38 | R39 | R40 | R41 | R42 | R43 | R44 | R45 | R46 | R47 | R48 | R49 @@ -55,7 +55,7 @@ Global Opaque mreg_eq. Definition all_mregs := R0 :: R1 :: R2 :: R3 :: R4 :: R5 :: R6 :: R7 :: R9 - :: R15 :: R16 :: R17 :: R18 :: R19 + :: R10 :: R15 :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29 :: R30 :: R32 :: R33 :: R34 :: R35 :: R36 :: R37 :: R38 :: R39 :: R40 :: R41 :: R42 :: R43 :: R44 :: R45 :: R46 :: R47 :: R48 :: R49 @@ -87,6 +87,7 @@ Module IndexedMreg <: INDEXED_TYPE. match r with | R0 => 1 | R1 => 2 | R2 => 3 | R3 => 4 | R4 => 5 | R5 => 6 | R6 => 7 | R7 => 8 | R9 => 10 + | R10 => 11 | R15 => 16 | R16 => 17 | R17 => 18 | R18 => 19 | R19 => 20 | R20 => 21 | R21 => 22 | R22 => 23 | R23 => 24 | R24 => 25 | R25 => 26 | R26 => 27 | R27 => 28 | R28 => 29 | R29 => 30 @@ -115,6 +116,7 @@ Local Open Scope string_scope. Definition register_names := ("R0" , R0) :: ("R1" , R1) :: ("R2" , R2) :: ("R3" , R3) :: ("R4" , R4) :: ("R5" , R5) :: ("R6" , R6) :: ("R7" , R7) :: ("R9" , R9) + :: ("R10", R10) :: ("R15", R15) :: ("R16", R16) :: ("R17", R17) :: ("R18", R18) :: ("R19", R19) :: ("R20", R20) :: ("R21", R21) :: ("R22", R22) :: ("R23", R23) :: ("R24", R24) :: ("R25", R25) :: ("R26", R26) :: ("R27", R27) :: ("R28", R28) :: ("R29", R29) @@ -173,9 +175,9 @@ Definition destroyed_by_builtin (ef: external_function): list mreg := Definition destroyed_by_setstack (ty: typ): list mreg := nil. -Definition destroyed_at_function_entry: list mreg := R32 :: nil. +Definition destroyed_at_function_entry: list mreg := R10 :: nil. -Definition temp_for_parent_frame: mreg := R9. (* FIXME - and R8 ?? *) +Definition temp_for_parent_frame: mreg := R10. (* FIXME - and R8 ?? *) Definition destroyed_at_indirect_call: list mreg := nil. (* R10 :: R11 :: R12 :: R13 :: R14 :: R15 :: R16 :: R17 :: nil. *) diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml index 4c3bf3c6..b463a9c5 100644 --- a/mppa_k1c/TargetPrinter.ml +++ b/mppa_k1c/TargetPrinter.ml @@ -191,7 +191,9 @@ module Target : TARGET = let print_instruction oc = function | Pcall(s) -> fprintf oc " call %a\n;;\n" symbol s - | Pgoto(s) | Pj_l(s) -> + | Pgoto(s) -> + fprintf oc " goto %a\n;;\n" symbol s + | Pj_l(s) -> fprintf oc " goto %a\n;;\n" print_label s | Pret -> fprintf oc " ret\n;;\n" diff --git a/test/mppa/mmult/mmult.c b/test/mppa/mmult/mmult.c index c9e7ad5e..dcbcfe17 100644 --- a/test/mppa/mmult/mmult.c +++ b/test/mppa/mmult/mmult.c @@ -39,7 +39,7 @@ typedef struct mblock { #define MAT_XY(mat, x, y) (mat)[(x)*SIZE + (y)] #define MAT_IJ(block, i, j) MAT_XY((block)->mat, (block)->imin + (i), block->jmin + (j)) -int strassen_mul(mblock *C, const mblock *A, const mblock *B){ +void divac_mul(mblock *C, const mblock *A, const mblock *B){ const int size = C->imax - C->imin; for (int i = 0 ; i < size ; i++) @@ -67,50 +67,50 @@ int strassen_mul(mblock *C, const mblock *A, const mblock *B){ newb.jmax = (block)->jmax;\ } -int strassen_part(mblock *C, const mblock *A, const mblock *B); +void divac_part(mblock *C, const mblock *A, const mblock *B); -void strassen_wrap(mblock *C , char IC, char JC, +void divac_wrap(mblock *C , char IC, char JC, const mblock *A, char IA, char JA, const mblock *B, char IB, char JB){ MAKE_MBLOCK(Cb, C, IC, JC); MAKE_MBLOCK(Ab, A, IA, JA); MAKE_MBLOCK(Bb, B, IB, JB); - strassen_part(&Cb, &Ab, &Bb); + divac_part(&Cb, &Ab, &Bb); } -int strassen_part(mblock *C, const mblock *A, const mblock *B){ +void divac_part(mblock *C, const mblock *A, const mblock *B){ const int size = C->imax - C->imin; if (size % 2 == 1) - strassen_mul(C, A, B); + divac_mul(C, A, B); else{ /* C_00 = A_00 B_00 + A_01 B_10 */ - strassen_wrap(C, 0, 0, A, 0, 0, B, 0, 0); - strassen_wrap(C, 0, 0, A, 0, 1, B, 1, 0); + divac_wrap(C, 0, 0, A, 0, 0, B, 0, 0); + divac_wrap(C, 0, 0, A, 0, 1, B, 1, 0); /* C_10 = A_10 B_00 + A_11 B_10 */ - strassen_wrap(C, 1, 0, A, 1, 0, B, 0, 0); - strassen_wrap(C, 1, 0, A, 1, 1, B, 1, 0); + divac_wrap(C, 1, 0, A, 1, 0, B, 0, 0); + divac_wrap(C, 1, 0, A, 1, 1, B, 1, 0); /* C_01 = A_00 B_01 + A_01 B_11 */ - strassen_wrap(C, 0, 1, A, 0, 0, B, 0, 1); - strassen_wrap(C, 0, 1, A, 0, 1, B, 1, 1); + divac_wrap(C, 0, 1, A, 0, 0, B, 0, 1); + divac_wrap(C, 0, 1, A, 0, 1, B, 1, 1); /* C_11 = A_10 B_01 + A_11 B_11 */ - strassen_wrap(C, 1, 1, A, 1, 0, B, 0, 1); - strassen_wrap(C, 1, 1, A, 1, 1, B, 1, 1); + divac_wrap(C, 1, 1, A, 1, 0, B, 0, 1); + divac_wrap(C, 1, 1, A, 1, 1, B, 1, 1); } } -void mmult_strassen(uint64_t C[][SIZE], uint64_t A[][SIZE], uint64_t B[][SIZE]){ +void mmult_divac(uint64_t C[][SIZE], uint64_t A[][SIZE], uint64_t B[][SIZE]){ mblock Cb = {.mat = (uint64_t *) C, .imin = 0, .imax = SIZE, .jmin = 0, .jmax = SIZE}; mblock Ab = {.mat = (uint64_t *) A , .imin = 0, .imax = SIZE, .jmin = 0, .jmax = SIZE}; mblock Bb = {.mat = (uint64_t *) B , .imin = 0, .imax = SIZE, .jmin = 0, .jmax = SIZE}; - strassen_part(&Cb, &Ab, &Bb); + divac_part(&Cb, &Ab, &Bb); } #ifdef __UNIT_TEST_MMULT__ @@ -128,7 +128,7 @@ int main(void){ mmult_row(C1, A, B); mmult_col(C2, A, B); - mmult_strassen(C3, A, B); + mmult_divac(C3, A, B); for (int i = 0 ; i < SIZE ; i++) for (int j = 0 ; j < SIZE ; j++) |