diff options
Diffstat (limited to 'aarch64')
-rw-r--r-- | aarch64/Archi.v | 2 | ||||
-rw-r--r-- | aarch64/Asmexpand.ml | 2 | ||||
-rw-r--r-- | aarch64/Asmgen.v | 42 | ||||
-rw-r--r-- | aarch64/Asmgenproof.v | 163 | ||||
-rw-r--r-- | aarch64/Asmgenproof1.v | 789 | ||||
-rw-r--r-- | aarch64/CSE2deps.v | 35 | ||||
-rw-r--r-- | aarch64/CSE2depsproof.v | 209 | ||||
-rw-r--r-- | aarch64/ConstpropOpproof.v | 121 | ||||
-rw-r--r-- | aarch64/DuplicateOpcodeHeuristic.ml | 41 | ||||
-rw-r--r-- | aarch64/Machregs.v | 1 | ||||
-rw-r--r-- | aarch64/Machregsaux.ml | 5 | ||||
-rw-r--r-- | aarch64/Op.v | 248 | ||||
-rw-r--r-- | aarch64/OpWeights.ml | 353 | ||||
-rw-r--r-- | aarch64/PrepassSchedulingOracle.ml | 477 | ||||
-rw-r--r-- | aarch64/PrepassSchedulingOracleDeps.ml | 17 | ||||
-rw-r--r-- | aarch64/SelectLongproof.v | 33 | ||||
-rw-r--r-- | aarch64/SelectOp.vp | 15 | ||||
-rw-r--r-- | aarch64/SelectOpproof.v | 61 | ||||
-rw-r--r-- | aarch64/TargetPrinter.ml | 45 | ||||
-rw-r--r-- | aarch64/ValueAOp.v | 24 |
20 files changed, 2255 insertions, 428 deletions
diff --git a/aarch64/Archi.v b/aarch64/Archi.v index 42500e70..7f39d1fa 100644 --- a/aarch64/Archi.v +++ b/aarch64/Archi.v @@ -88,3 +88,5 @@ Global Opaque ptr64 big_endian splitlong (** Whether to generate position-independent code or not *) Parameter pic_code: unit -> bool. + +Definition has_notrap_loads := false. diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml index 1ba754dd..5b183c2d 100644 --- a/aarch64/Asmexpand.ml +++ b/aarch64/Asmexpand.ml @@ -408,7 +408,7 @@ let expand_instruction instr = expand_annot_val kind txt targ args res | EF_memcpy(sz, al) -> expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args - | EF_annot _ | EF_debug _ | EF_inline_asm _ -> + | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ -> emit instr | _ -> assert false diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v index 875f3fd1..024c9a17 100644 --- a/aarch64/Asmgen.v +++ b/aarch64/Asmgen.v @@ -268,18 +268,24 @@ Definition arith_extended Definition shrx32 (rd r1: ireg) (n: int) (k: code) : code := if Int.eq n Int.zero then Pmov rd r1 :: k - else - Porr W X16 XZR r1 (SOasr (Int.repr 31)) :: - Padd W X16 r1 X16 (SOlsr (Int.sub Int.iwordsize n)) :: - Porr W rd XZR X16 (SOasr n) :: k. + else if Int.eq n Int.one then + Padd W X16 r1 r1 (SOlsr (Int.repr 31)) :: + Porr W rd XZR X16 (SOasr n) :: k + else + Porr W X16 XZR r1 (SOasr (Int.repr 31)) :: + Padd W X16 r1 X16 (SOlsr (Int.sub Int.iwordsize n)) :: + Porr W rd XZR X16 (SOasr n) :: k. Definition shrx64 (rd r1: ireg) (n: int) (k: code) : code := if Int.eq n Int.zero then Pmov rd r1 :: k - else - Porr X X16 XZR r1 (SOasr (Int.repr 63)) :: - Padd X X16 r1 X16 (SOlsr (Int.sub Int64.iwordsize' n)) :: - Porr X rd XZR X16 (SOasr n) :: k. + else if Int.eq n Int.one then + Padd X X16 r1 r1 (SOlsr (Int.repr 63)) :: + Porr X rd XZR X16 (SOasr n) :: k + else + Porr X X16 XZR r1 (SOasr (Int.repr 63)) :: + Padd X X16 r1 X16 (SOlsr (Int.sub Int64.iwordsize' n)) :: + Porr X rd XZR X16 (SOasr n) :: k. (** Load the address [id + ofs] in [rd] *) @@ -962,8 +968,12 @@ Definition transl_addressing (sz: Z) (addr: Op.addressing) (args: list mreg) (** Translation of loads and stores *) -Definition transl_load (chunk: memory_chunk) (addr: Op.addressing) +Definition transl_load (trap: trapping_mode) + (chunk: memory_chunk) (addr: Op.addressing) (args: list mreg) (dst: mreg) (k: code) : res code := + match trap with + | NOTRAP => Error (msg "Asmgen.transl_load non-trapping loads unsupported on aarch64") + | TRAP => match chunk with | Mint8unsigned => do rd <- ireg_of dst; transl_addressing 1 addr args (Pldrb W rd) k @@ -985,6 +995,7 @@ Definition transl_load (chunk: memory_chunk) (addr: Op.addressing) do rd <- ireg_of dst; transl_addressing 4 addr args (Pldrw_a rd) k | Many64 => do rd <- ireg_of dst; transl_addressing 8 addr args (Pldrx_a rd) k + end end. Definition transl_store (chunk: memory_chunk) (addr: Op.addressing) @@ -1050,8 +1061,13 @@ Definition storeptr (src: ireg) (base: iregsp) (ofs: ptrofs) (k: code) := (** Function epilogue *) Definition make_epilogue (f: Mach.function) (k: code) := - loadptr XSP f.(fn_retaddr_ofs) RA - (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k). + (* FIXME + Cannot be used because memcpy destroys X30; + issue being discussed with X. Leroy *) + (* if is_leaf_function f + then Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k + else*) loadptr XSP f.(fn_retaddr_ofs) RA + (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k). (** Translation of a Mach instruction. *) @@ -1068,8 +1084,8 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) OK (if r29_is_parent then c else loadptr XSP f.(fn_link_ofs) X29 c) | Mop op args res => transl_op op args res k - | Mload chunk addr args dst => - transl_load chunk addr args dst k + | Mload trap chunk addr args dst => + transl_load trap chunk addr args dst k | Mstore chunk addr args src => transl_store chunk addr args src k | Mcall sig (inl r) => diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v index b60623a6..df88043f 100644 --- a/aarch64/Asmgenproof.v +++ b/aarch64/Asmgenproof.v @@ -259,13 +259,13 @@ Proof. - apply logicalimm32_label; unfold nolabel; auto. - apply logicalimm32_label; unfold nolabel; auto. - apply logicalimm32_label; unfold nolabel; auto. -- unfold shrx32. destruct Int.eq; TailNoLabel. +- unfold shrx32. destruct (Int.eq _ _); try destruct (Int.eq _ _); TailNoLabel. - apply arith_extended_label; unfold nolabel; auto. - apply arith_extended_label; unfold nolabel; auto. - apply logicalimm64_label; unfold nolabel; auto. - apply logicalimm64_label; unfold nolabel; auto. - apply logicalimm64_label; unfold nolabel; auto. -- unfold shrx64. destruct Int.eq; TailNoLabel. +- unfold shrx64. destruct (Int.eq _ _); try destruct (Int.eq _ _); TailNoLabel. - eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel. - destruct (preg_of r); try discriminate; TailNoLabel; (eapply tail_nolabel_trans; [eapply transl_cond_label; eauto | TailNoLabel]). @@ -283,10 +283,10 @@ Proof. Qed. Remark transl_load_label: - forall chunk addr args dst k c, - transl_load chunk addr args dst k = OK c -> tail_nolabel k c. + forall trap chunk addr args dst k c, + transl_load trap chunk addr args dst k = OK c -> tail_nolabel k c. Proof. - unfold transl_load; intros; destruct chunk; TailNoLabel; eapply transl_addressing_label; eauto; unfold nolabel; auto. + unfold transl_load; intros; destruct trap; try discriminate; destruct chunk; TailNoLabel; eapply transl_addressing_label; eauto; unfold nolabel; auto. Qed. Remark transl_store_label: @@ -337,7 +337,12 @@ Qed. Remark make_epilogue_label: forall f k, tail_nolabel k (make_epilogue f k). Proof. - unfold make_epilogue; intros. eapply tail_nolabel_trans. apply loadptr_label. TailNoLabel. + unfold make_epilogue; intros. + (* FIXME destruct is_leaf_function. + { TailNoLabel. } *) + eapply tail_nolabel_trans. + apply loadptr_label. + TailNoLabel. Qed. Lemma transl_instr_label: @@ -472,7 +477,8 @@ Inductive match_states: Mach.state -> Asm.state -> Prop := (MEXT: Mem.extends m m') (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc) (AG: agree ms sp rs) - (DXP: ep = true -> rs#X29 = parent_sp s), + (DXP: ep = true -> rs#X29 = parent_sp s) + (LEAF: is_leaf_function f = true -> rs#RA = parent_ra s), match_states (Mach.State s fb sp c ms m) (Asm.State rs m') | match_states_call: @@ -503,16 +509,17 @@ Lemma exec_straight_steps: exists rs2, exec_straight tge tf c rs1 m1' k rs2 m2' /\ agree ms2 sp rs2 - /\ (it1_is_parent ep i = true -> rs2#X29 = parent_sp s)) -> + /\ (it1_is_parent ep i = true -> rs2#X29 = parent_sp s) + /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) -> exists st', plus step tge (State rs1 m1') E0 st' /\ match_states (Mach.State s fb sp c ms2 m2) st'. Proof. intros. inversion H2. subst. monadInv H7. - exploit H3; eauto. intros [rs2 [A [B C]]]. + exploit H3; eauto. intros [rs2 [A [B [C D]]]]. exists (State rs2 m2'); split. - eapply exec_straight_exec; eauto. - econstructor; eauto. eapply exec_straight_at; eauto. + - eapply exec_straight_exec; eauto. + - econstructor; eauto. eapply exec_straight_at; eauto. Qed. Lemma exec_straight_steps_goto: @@ -527,13 +534,14 @@ Lemma exec_straight_steps_goto: exists jmp, exists k', exists rs2, exec_straight tge tf c rs1 m1' (jmp :: k') rs2 m2' /\ agree ms2 sp rs2 - /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') -> + /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2' + /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) -> exists st', plus step tge (State rs1 m1') E0 st' /\ match_states (Mach.State s fb sp c' ms2 m2) st'. Proof. intros. inversion H3. subst. monadInv H9. - exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]]. + exploit H5; eauto. intros [jmp [k' [rs2 [A [B [C D]]]]]]. generalize (functions_transl _ _ _ H7 H8); intro FN. generalize (transf_function_no_overflow _ _ H8); intro NOOV. exploit exec_straight_steps_2; eauto. @@ -550,6 +558,7 @@ Proof. econstructor; eauto. apply agree_exten with rs2; auto with asmgen. congruence. + rewrite OTH by congruence; auto. Qed. Lemma exec_straight_opt_steps_goto: @@ -564,13 +573,14 @@ Lemma exec_straight_opt_steps_goto: exists jmp, exists k', exists rs2, exec_straight_opt tge tf c rs1 m1' (jmp :: k') rs2 m2' /\ agree ms2 sp rs2 - /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') -> + /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2' + /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) -> exists st', plus step tge (State rs1 m1') E0 st' /\ match_states (Mach.State s fb sp c' ms2 m2) st'. Proof. intros. inversion H3. subst. monadInv H9. - exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]]. + exploit H5; eauto. intros [jmp [k' [rs2 [A [B [C D]]]]]]. generalize (functions_transl _ _ _ H7 H8); intro FN. generalize (transf_function_no_overflow _ _ H8); intro NOOV. inv A. @@ -583,6 +593,7 @@ Proof. econstructor; eauto. apply agree_exten with rs2; auto with asmgen. congruence. + rewrite OTH by congruence; auto. - exploit exec_straight_steps_2; eauto. intros [ofs' [PC2 CT2]]. exploit find_label_goto_label; eauto. @@ -597,6 +608,7 @@ Proof. econstructor; eauto. apply agree_exten with rs2; auto with asmgen. congruence. + rewrite OTH by congruence; auto. Qed. (** We need to show that, in the simulation diagram, we cannot @@ -629,7 +641,7 @@ Qed. Theorem step_simulation: forall S1 t S2, Mach.step return_address_offset ge S1 t S2 -> - forall S1' (MS: match_states S1 S1'), + forall S1' (MS: match_states S1 S1') (WF: wf_state ge S1), (exists S2', plus step tge S1' t S2' /\ match_states S2 S2') \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat. Proof. @@ -638,17 +650,20 @@ Proof. - (* Mlabel *) left; eapply exec_straight_steps; eauto; intros. monadInv TR. econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split. apply agree_nextinstr; auto. simpl; congruence. + split. { apply agree_nextinstr; auto. } + split. { simpl; congruence. } + rewrite nextinstr_inv by congruence; assumption. - (* Mgetstack *) unfold load_stack in H. exploit Mem.loadv_extends; eauto. intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A. left; eapply exec_straight_steps; eauto. intros. simpl in TR. - exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q R]]]. + exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q [R S]]]]. exists rs'; split. eauto. - split. eapply agree_set_mreg; eauto with asmgen. congruence. - simpl; congruence. + split. { eapply agree_set_mreg; eauto with asmgen. congruence. } + split. { simpl; congruence. } + rewrite S. assumption. - (* Msetstack *) unfold store_stack in H. @@ -656,10 +671,12 @@ Proof. exploit Mem.storev_extends; eauto. intros [m2' [A B]]. left; eapply exec_straight_steps; eauto. rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR. - exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]]. + exploit storeind_correct; eauto with asmgen. intros [rs' [P [Q R]]]. exists rs'; split. eauto. split. eapply agree_undef_regs; eauto with asmgen. - simpl; intros. rewrite Q; auto with asmgen. + simpl; intros. + split. rewrite Q; auto with asmgen. + rewrite R. assumption. - (* Mgetparam *) assert (f0 = f) by congruence; subst f0. @@ -675,50 +692,69 @@ Opaque loadind. (* X30 contains parent *) exploit loadind_correct. eexact EQ. instantiate (2 := rs0). simpl; rewrite DXP; eauto. simpl; congruence. - intros [rs1 [P [Q R]]]. + intros [rs1 [P [Q [R S]]]]. exists rs1; split. eauto. split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen. - simpl; intros. rewrite R; auto with asmgen. - apply preg_of_not_X29; auto. + simpl; split; intros. + { rewrite R; auto with asmgen. + apply preg_of_not_X29; auto. + } + { rewrite S; auto. } + (* X30 does not contain parent *) exploit loadptr_correct. eexact A. simpl; congruence. intros [rs1 [P [Q R]]]. exploit loadind_correct. eexact EQ. instantiate (2 := rs1). simpl; rewrite Q. eauto. simpl; congruence. - intros [rs2 [S [T U]]]. + intros [rs2 [S [T [U V]]]]. exists rs2; split. eapply exec_straight_trans; eauto. split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto. instantiate (1 := rs1#X29 <- (rs2#X29)). intros. rewrite Pregmap.gso; auto with asmgen. congruence. intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen. - simpl; intros. rewrite U; auto with asmgen. + split; simpl; intros. rewrite U; auto with asmgen. apply preg_of_not_X29; auto. - + rewrite V. rewrite R by congruence. auto. + - (* Mop *) assert (eval_operation tge sp op (map rs args) m = Some v). { rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. } exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eexact H0. intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A. left; eapply exec_straight_steps; eauto; intros. simpl in TR. - exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. + exploit transl_op_correct; eauto. intros [rs2 [P [Q [R S]]]]. exists rs2; split. eauto. split. apply agree_set_undef_mreg with rs0; auto. apply Val.lessdef_trans with v'; auto. - simpl; intros. InvBooleans. + split; simpl; intros. InvBooleans. rewrite R; auto. apply preg_of_not_X29; auto. Local Transparent destroyed_by_op. destruct op; try exact I; simpl; congruence. - + rewrite S. + auto. - (* Mload *) + destruct trap. + { assert (Op.eval_addressing tge sp addr (map rs args) = Some a). { rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. } exploit eval_addressing_lessdef. eapply preg_vals; eauto. eexact H1. intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A. exploit Mem.loadv_extends; eauto. intros [v' [C D]]. left; eapply exec_straight_steps; eauto; intros. simpl in TR. - exploit transl_load_correct; eauto. intros [rs2 [P [Q R]]]. + exploit transl_load_correct; eauto. intros [rs2 [P [Q [R S]]]]. exists rs2; split. eauto. split. eapply agree_set_undef_mreg; eauto. congruence. - simpl; congruence. + split. simpl; congruence. + rewrite S. assumption. + } + + (* Mload notrap1 *) + inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate. + +- (* Mload notrap *) + inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate. + +- (* Mload notrap *) + inv AT. simpl in *. unfold bind in *. destruct (transl_code _ _ _) in *; discriminate. - (* Mstore *) assert (Op.eval_addressing tge sp addr (map rs args) = Some a). @@ -728,10 +764,11 @@ Local Transparent destroyed_by_op. assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto). exploit Mem.storev_extends; eauto. intros [m2' [C D]]. left; eapply exec_straight_steps; eauto. - intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]]. + intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P [Q R]]]. exists rs2; split. eauto. split. eapply agree_undef_regs; eauto with asmgen. - simpl; congruence. + split. simpl; congruence. + rewrite R. assumption. - (* Mcall *) assert (f0 = f) by congruence. subst f0. @@ -843,6 +880,18 @@ Local Transparent destroyed_by_op. simpl. rewrite undef_regs_other_2; auto. Simpl. congruence. + Simpl. + rewrite set_res_other by trivial. + rewrite undef_regs_other. + assumption. + intro. + rewrite in_map_iff. + intros (x0 & PREG & IN). + subst r'. + intro. + apply (preg_of_not_RA x0). + congruence. + - (* Mgoto *) assert (f0 = f) by congruence. subst f0. inv AT. monadInv H4. @@ -856,25 +905,33 @@ Local Transparent destroyed_by_op. eapply agree_exten; eauto with asmgen. congruence. + rewrite INV by congruence. + assumption. + - (* Mcond true *) assert (f0 = f) by congruence. subst f0. exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC. left; eapply exec_straight_opt_steps_goto; eauto. intros. simpl in TR. - exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C). + exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C & D). exists jmp; exists k; exists rs'. split. eexact A. split. apply agree_exten with rs0; auto with asmgen. - exact B. + split. + exact B. + rewrite D. exact LEAF. - (* Mcond false *) exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC. left; eapply exec_straight_steps; eauto. intros. simpl in TR. - exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C). + exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C & D). econstructor; split. eapply exec_straight_opt_right. eexact A. apply exec_straight_one. eexact B. auto. split. apply agree_exten with rs0; auto. intros. Simpl. + split. simpl; congruence. + Simpl. rewrite D. + exact LEAF. - (* Mjumptable *) assert (f0 = f) by congruence. subst f0. @@ -896,6 +953,10 @@ Local Transparent destroyed_by_op. simpl. intros. rewrite C; auto with asmgen. Simpl. congruence. + rewrite C by congruence. + repeat rewrite Pregmap.gso by congruence. + assumption. + - (* Mreturn *) assert (f0 = f) by congruence. subst f0. inversion AT; subst. simpl in H6; monadInv H6. @@ -938,7 +999,7 @@ Local Transparent destroyed_by_op. simpl preg_of_iregsp. change (rs2 X30) with (rs0 X30). rewrite ATLR. change (rs2 X2) with sp. eexact P. simpl; congruence. congruence. - intros (rs3 & U & V). + intros (rs3 & U & V & W). assert (EXEC_PROLOGUE: exec_straight tge tf tf.(fn_code) rs0 m' @@ -965,6 +1026,10 @@ Local Transparent destroyed_at_function_entry. simpl. unfold sp; congruence. intros. rewrite V by auto with asmgen. reflexivity. + rewrite W. + unfold rs2. + Simpl. + - (* external function *) exploit functions_translated; eauto. intros [tf [A B]]. simpl in B. inv B. @@ -984,6 +1049,10 @@ Local Transparent destroyed_at_function_entry. simpl. right. split. omega. split. auto. rewrite <- ATPC in H5. econstructor; eauto. congruence. + inv WF. + inv STACK. + inv H1. + congruence. Qed. Lemma transf_initial_states: @@ -1019,11 +1088,17 @@ Qed. Theorem transf_program_correct: forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog). Proof. - eapply forward_simulation_star with (measure := measure). - apply senv_preserved. - eexact transf_initial_states. - eexact transf_final_states. - exact step_simulation. + eapply forward_simulation_star with (measure := measure) + (match_states := fun S1 S2 => match_states S1 S2 /\ wf_state ge S1). + - apply senv_preserved. + - simpl; intros. exploit transf_initial_states; eauto. + intros (s2 & A & B). + exists s2; intuition auto. apply wf_initial; auto. + - simpl; intros. destruct H as [MS WF]. eapply transf_final_states; eauto. + - simpl; intros. destruct H0 as [MS WF]. + exploit step_simulation; eauto. intros [ (s2' & A & B) | (A & B & C) ]. + + left; exists s2'; intuition auto. eapply wf_step; eauto. + + right; intuition auto. eapply wf_step; eauto. Qed. End PRESERVATION. diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v index 6d44bcc8..35f1f2d7 100644 --- a/aarch64/Asmgenproof1.v +++ b/aarch64/Asmgenproof1.v @@ -22,6 +22,51 @@ Local Transparent Archi.ptr64. (** Properties of registers *) +Lemma preg_of_not_RA: + forall r, (preg_of r) <> RA. +Proof. + destruct r; discriminate. +Qed. + +Lemma RA_not_written: + forall (rs : regset) dst v, + rs # (preg_of dst) <- v RA = rs RA. +Proof. + intros. + apply Pregmap.gso. + intro. + symmetry in H. + exact (preg_of_not_RA dst H). +Qed. + +Hint Resolve RA_not_written : asmgen. + +Lemma RA_not_written2: + forall (rs : regset) dst v i, + preg_of dst = i -> + rs # i <- v RA = rs RA. +Proof. + intros. + subst i. + apply RA_not_written. +Qed. + +Hint Resolve RA_not_written2 : asmgen. + +Lemma RA_not_written3: + forall (rs : regset) dst v i, + ireg_of dst = OK i -> + rs # i <- v RA = rs RA. +Proof. + intros. + unfold ireg_of in H. + destruct preg_of eqn:PREG; try discriminate. + replace i0 with i in * by congruence. + eapply RA_not_written2; eassumption. +Qed. + +Hint Resolve RA_not_written3 : asmgen. + Lemma preg_of_iregsp_not_PC: forall r, preg_of_iregsp r <> PC. Proof. destruct r; simpl; congruence. @@ -39,6 +84,26 @@ Proof. red; intros; subst x. elim (preg_of_not_X16 r); auto. Qed. +Lemma ireg_of_not_RA: forall r x, ireg_of r = OK x -> x <> RA. +Proof. + unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H. + red; intros; subst x. elim (preg_of_not_RA r); auto. +Qed. + +Lemma ireg_of_not_RA': forall r x, ireg_of r = OK x -> RA <> x. +Proof. + intros. intro. + apply (ireg_of_not_RA r x); auto. +Qed. + +Lemma ireg_of_not_RA'': forall r x, ireg_of r = OK x -> IR RA <> IR x. +Proof. + intros. intro. + apply (ireg_of_not_RA' r x); auto. congruence. +Qed. + +Hint Resolve ireg_of_not_RA ireg_of_not_RA' ireg_of_not_RA'' : asmgen. + Lemma ireg_of_not_X16': forall r x, ireg_of r = OK x -> IR x <> IR X16. Proof. intros. apply ireg_of_not_X16 in H. congruence. @@ -205,42 +270,49 @@ Qed. Lemma exec_loadimm_k_w: forall (rd: ireg) k m l, wf_decomposition l -> + rd <> RA -> forall (rs: regset) accu, rs#rd = Vint (Int.repr accu) -> exists rs', exec_straight_opt ge fn (loadimm_k W rd l k) rs m k rs' m /\ rs'#rd = Vint (Int.repr (recompose_int accu l)) - /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. + /\ (forall r, r <> PC -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. - induction 1; intros rs accu ACCU; simpl. + induction 1; intros RD_NOT_RA rs accu ACCU; simpl. - exists rs; split. apply exec_straight_opt_refl. auto. -- destruct (IHwf_decomposition +- destruct (IHwf_decomposition RD_NOT_RA (nextinstr (rs#rd <- (insert_in_int rs#rd n p 16))) (Zinsert accu n p 16)) - as (rs' & P & Q & R). + as (rs' & P & Q & R & S). Simpl. rewrite ACCU. simpl. f_equal. apply Int.eqm_samerepr. apply Zinsert_eqmod. auto. omega. apply Int.eqm_sym; apply Int.eqm_unsigned_repr. exists rs'; split. eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P. - split. exact Q. intros; Simpl. rewrite R by auto. Simpl. + split. exact Q. + split. + { intros; Simpl. + rewrite R by auto. Simpl. } + { rewrite S. Simpl. } Qed. Lemma exec_loadimm_z_w: forall rd l k rs m, wf_decomposition l -> + rd <> RA -> exists rs', exec_straight ge fn (loadimm_z W rd l k) rs m k rs' m /\ rs'#rd = Vint (Int.repr (recompose_int 0 l)) /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadimm_z; destruct 1. + unfold loadimm_z; destruct 1; intro RD_NOT_RA. - econstructor; split. apply exec_straight_one. simpl; eauto. auto. split. Simpl. intros; Simpl. - set (accu0 := Zinsert 0 n p 16). set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))). - destruct (exec_loadimm_k_w rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto. + destruct (exec_loadimm_k_w rd k m l H1 RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R & S); auto. unfold rs1; Simpl. exists rs2; split. eapply exec_straight_opt_step; eauto. @@ -253,12 +325,13 @@ Qed. Lemma exec_loadimm_n_w: forall rd l k rs m, wf_decomposition l -> + rd <> RA -> exists rs', exec_straight ge fn (loadimm_n W rd l k) rs m k rs' m /\ rs'#rd = Vint (Int.repr (Z.lnot (recompose_int 0 l))) /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadimm_n; destruct 1. + unfold loadimm_n; destruct 1; intro RD_NOT_RA. - econstructor; split. apply exec_straight_one. simpl; eauto. auto. split. Simpl. @@ -267,7 +340,8 @@ Proof. set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))). destruct (exec_loadimm_k_w rd k m (negate_decomposition l) (negate_decomposition_wf l H1) - rs1 accu0) as (rs2 & P & Q & R). + RD_NOT_RA rs1 accu0) + as (rs2 & P & Q & R & S). unfold rs1; Simpl. exists rs2; split. eapply exec_straight_opt_step; eauto. @@ -279,7 +353,8 @@ Proof. Qed. Lemma exec_loadimm32: - forall rd n k rs m, + forall rd n k rs m + (RD_NOT_RA : rd <> RA), exists rs', exec_straight ge fn (loadimm32 rd n k) rs m k rs' m /\ rs'#rd = Vint n @@ -302,13 +377,14 @@ Proof. apply Int.eqm_samerepr. apply decompose_notint_eqmod. apply Int.repr_unsigned. } destruct Nat.leb. -+ rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; omega. -+ rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; omega. ++ rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; omega. trivial. ++ rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; omega. trivial. Qed. Lemma exec_loadimm_k_x: forall (rd: ireg) k m l, - wf_decomposition l -> + wf_decomposition l -> + rd <> RA -> forall (rs: regset) accu, rs#rd = Vlong (Int64.repr accu) -> exists rs', @@ -316,9 +392,9 @@ Lemma exec_loadimm_k_x: /\ rs'#rd = Vlong (Int64.repr (recompose_int accu l)) /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - induction 1; intros rs accu ACCU; simpl. + induction 1; intros RD_NOT_RA rs accu ACCU; simpl. - exists rs; split. apply exec_straight_opt_refl. auto. -- destruct (IHwf_decomposition +- destruct (IHwf_decomposition RD_NOT_RA (nextinstr (rs#rd <- (insert_in_long rs#rd n p 16))) (Zinsert accu n p 16)) as (rs' & P & Q & R). @@ -332,19 +408,20 @@ Qed. Lemma exec_loadimm_z_x: forall rd l k rs m, wf_decomposition l -> + rd <> RA -> exists rs', exec_straight ge fn (loadimm_z X rd l k) rs m k rs' m /\ rs'#rd = Vlong (Int64.repr (recompose_int 0 l)) /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadimm_z; destruct 1. + unfold loadimm_z; destruct 1; intro RD_NOT_RA. - econstructor; split. apply exec_straight_one. simpl; eauto. auto. split. Simpl. intros; Simpl. - set (accu0 := Zinsert 0 n p 16). set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))). - destruct (exec_loadimm_k_x rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto. + destruct (exec_loadimm_k_x rd k m l H1 RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R); auto. unfold rs1; Simpl. exists rs2; split. eapply exec_straight_opt_step; eauto. @@ -357,12 +434,13 @@ Qed. Lemma exec_loadimm_n_x: forall rd l k rs m, wf_decomposition l -> + rd <> RA -> exists rs', exec_straight ge fn (loadimm_n X rd l k) rs m k rs' m /\ rs'#rd = Vlong (Int64.repr (Z.lnot (recompose_int 0 l))) /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadimm_n; destruct 1. + unfold loadimm_n; destruct 1; intro RD_NOT_RA. - econstructor; split. apply exec_straight_one. simpl; eauto. auto. split. Simpl. @@ -371,7 +449,7 @@ Proof. set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))). destruct (exec_loadimm_k_x rd k m (negate_decomposition l) (negate_decomposition_wf l H1) - rs1 accu0) as (rs2 & P & Q & R). + RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R). unfold rs1; Simpl. exists rs2; split. eapply exec_straight_opt_step; eauto. @@ -384,12 +462,13 @@ Qed. Lemma exec_loadimm64: forall rd n k rs m, + rd <> RA -> exists rs', exec_straight ge fn (loadimm64 rd n k) rs m k rs' m /\ rs'#rd = Vlong n /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadimm64, loadimm; intros. + unfold loadimm64, loadimm; intros until m; intro RD_NOT_RA. destruct (is_logical_imm64 n). - econstructor; split. apply exec_straight_one. simpl; eauto. auto. @@ -406,8 +485,8 @@ Proof. apply Int64.eqm_samerepr. apply decompose_notint_eqmod. apply Int64.repr_unsigned. } destruct Nat.leb. -+ rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; omega. -+ rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; omega. ++ rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; omega. trivial. ++ rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; omega. trivial. Qed. (** Add immediate *) @@ -419,55 +498,59 @@ Lemma exec_addimm_aux_32: Next (nextinstr (rs#rd <- (sem rs#r1 (Vint (Int.repr n))))) m) -> (forall v n1 n2, sem (sem v (Vint n1)) (Vint n2) = sem v (Vint (Int.add n1 n2))) -> forall rd r1 n k rs m, + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (addimm_aux insn rd r1 (Int.unsigned n) k) rs m k rs' m /\ rs'#rd = sem rs#r1 (Vint n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. - intros insn sem SEM ASSOC; intros. unfold addimm_aux. + intros insn sem SEM ASSOC; intros until m; intro RD_NOT_RA. unfold addimm_aux. set (nlo := Zzero_ext 12 (Int.unsigned n)). set (nhi := Int.unsigned n - nlo). assert (E: Int.unsigned n = nhi + nlo) by (unfold nhi; omega). rewrite <- (Int.repr_unsigned n). destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)]. - econstructor; split. apply exec_straight_one. apply SEM. Simpl. split. Simpl. do 3 f_equal; omega. - intros; Simpl. + split; intros; Simpl. - econstructor; split. apply exec_straight_one. apply SEM. Simpl. split. Simpl. do 3 f_equal; omega. - intros; Simpl. + split; intros; Simpl. - econstructor; split. eapply exec_straight_two. apply SEM. apply SEM. Simpl. Simpl. split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int.eqm_samerepr. rewrite E. auto with ints. - intros; Simpl. + split; intros; Simpl. Qed. Lemma exec_addimm32: forall rd r1 n k rs m, r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (addimm32 rd r1 n k) rs m k rs' m /\ rs'#rd = Val.add rs#r1 (Vint n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros. unfold addimm32. set (nn := Int.neg n). destruct (Int.eq n (Int.zero_ext 24 n)); [| destruct (Int.eq nn (Int.zero_ext 24 nn))]. -- apply exec_addimm_aux_32 with (sem := Val.add). auto. intros; apply Val.add_assoc. +- apply exec_addimm_aux_32 with (sem := Val.add); auto. intros; apply Val.add_assoc. - rewrite <- Val.sub_opp_add. - apply exec_addimm_aux_32 with (sem := Val.sub). auto. + apply exec_addimm_aux_32 with (sem := Val.sub); auto. intros. rewrite ! Val.sub_add_opp, Val.add_assoc. rewrite Int.neg_add_distr. auto. - destruct (Int.lt n Int.zero). + rewrite <- Val.sub_opp_add; fold nn. - edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C). + edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto. split. Simpl. rewrite B, C; eauto with asmgen. - intros; Simpl. -+ edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). + split; intros; Simpl. ++ edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto. split. Simpl. rewrite B, C; eauto with asmgen. - intros; Simpl. + split; intros; Simpl. Qed. Lemma exec_addimm_aux_64: @@ -477,10 +560,12 @@ Lemma exec_addimm_aux_64: Next (nextinstr (rs#rd <- (sem rs#r1 (Vlong (Int64.repr n))))) m) -> (forall v n1 n2, sem (sem v (Vlong n1)) (Vlong n2) = sem v (Vlong (Int64.add n1 n2))) -> forall rd r1 n k rs m, + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (addimm_aux insn rd r1 (Int64.unsigned n) k) rs m k rs' m /\ rs'#rd = sem rs#r1 (Vlong n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros insn sem SEM ASSOC; intros. unfold addimm_aux. set (nlo := Zzero_ext 12 (Int64.unsigned n)). set (nhi := Int64.unsigned n - nlo). @@ -489,44 +574,46 @@ Proof. destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)]. - econstructor; split. apply exec_straight_one. apply SEM. Simpl. split. Simpl. do 3 f_equal; omega. - intros; Simpl. + split; intros; Simpl. - econstructor; split. apply exec_straight_one. apply SEM. Simpl. split. Simpl. do 3 f_equal; omega. - intros; Simpl. + split; intros; Simpl. - econstructor; split. eapply exec_straight_two. apply SEM. apply SEM. Simpl. Simpl. split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int64.eqm_samerepr. rewrite E. auto with ints. - intros; Simpl. + split; intros; Simpl. Qed. Lemma exec_addimm64: forall rd r1 n k rs m, preg_of_iregsp r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (addimm64 rd r1 n k) rs m k rs' m /\ rs'#rd = Val.addl rs#r1 (Vlong n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros. unfold addimm64. set (nn := Int64.neg n). destruct (Int64.eq n (Int64.zero_ext 24 n)); [| destruct (Int64.eq nn (Int64.zero_ext 24 nn))]. -- apply exec_addimm_aux_64 with (sem := Val.addl). auto. intros; apply Val.addl_assoc. +- apply exec_addimm_aux_64 with (sem := Val.addl); auto. intros; apply Val.addl_assoc. - rewrite <- Val.subl_opp_addl. - apply exec_addimm_aux_64 with (sem := Val.subl). auto. + apply exec_addimm_aux_64 with (sem := Val.subl); auto. intros. rewrite ! Val.subl_addl_opp, Val.addl_assoc. rewrite Int64.neg_add_distr. auto. - destruct (Int64.lt n Int64.zero). + rewrite <- Val.subl_opp_addl; fold nn. - edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C). + edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto. - intros; Simpl. -+ edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). + split; intros; Simpl. ++ edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto. - intros; Simpl. + split; intros; Simpl. Qed. (** Logical immediate *) @@ -543,22 +630,25 @@ Lemma exec_logicalimm32: Next (nextinstr (rs#rd <- (sem rs##r1 (eval_shift_op_int rs#r2 s)))) m) -> forall rd r1 n k rs m, r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (logicalimm32 insn1 insn2 rd r1 n k) rs m k rs' m /\ rs'#rd = sem rs#r1 (Vint n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros until sem; intros SEM1 SEM2; intros. unfold logicalimm32. destruct (is_logical_imm32 n). - econstructor; split. apply exec_straight_one. apply SEM1. reflexivity. - split. Simpl. rewrite Int.repr_unsigned; auto. intros; Simpl. -- edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). + split. Simpl. rewrite Int.repr_unsigned; auto. + split; intros; Simpl. +- edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. apply SEM2. reflexivity. split. Simpl. f_equal; auto. apply C; auto with asmgen. - intros; Simpl. + split; intros; Simpl. Qed. Lemma exec_logicalimm64: @@ -573,50 +663,58 @@ Lemma exec_logicalimm64: Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) -> forall rd r1 n k rs m, r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (logicalimm64 insn1 insn2 rd r1 n k) rs m k rs' m /\ rs'#rd = sem rs#r1 (Vlong n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros until sem; intros SEM1 SEM2; intros. unfold logicalimm64. destruct (is_logical_imm64 n). - econstructor; split. apply exec_straight_one. apply SEM1. reflexivity. - split. Simpl. rewrite Int64.repr_unsigned. auto. intros; Simpl. -- edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). + split. Simpl. rewrite Int64.repr_unsigned. auto. + split; intros; Simpl. +- edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. apply SEM2. reflexivity. split. Simpl. f_equal; auto. apply C; auto with asmgen. - intros; Simpl. + split; intros; Simpl. Qed. (** Load address of symbol *) Lemma exec_loadsymbol: forall rd s ofs k rs m, - rd <> X16 \/ Archi.pic_code tt = false -> + rd <> X16 \/ Archi.pic_code tt = false -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (loadsymbol rd s ofs k) rs m k rs' m /\ rs'#rd = Genv.symbol_address ge s ofs - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs'#RA = rs#RA. Proof. unfold loadsymbol; intros. destruct (Archi.pic_code tt). - predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero. + subst ofs. econstructor; split. apply exec_straight_one; [simpl; eauto | reflexivity]. - split. Simpl. intros; Simpl. + split. Simpl. split; intros; Simpl. + + exploit exec_addimm64. instantiate (1 := rd). simpl. destruct H; congruence. - intros (rs1 & A & B & C). + instantiate (1 := rd). assumption. + intros (rs1 & A & B & C & D). econstructor; split. econstructor. simpl; eauto. auto. eexact A. split. simpl in B; rewrite B. Simpl. rewrite <- Genv.shift_symbol_address_64 by auto. rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto. - intros. rewrite C by auto. Simpl. + split; intros. rewrite C by auto; Simpl. + rewrite D. Simpl. - econstructor; split. eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. split. Simpl. rewrite symbol_high_low; auto. - intros; Simpl. + split; intros; Simpl. Qed. (** Shifted operands *) @@ -725,23 +823,25 @@ Lemma exec_arith_extended: Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) -> forall (rd r1 r2: ireg) (ex: extension) (a: amount64) (k: code) rs m, r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (arith_extended insnX insnS rd r1 r2 ex a k) rs m k rs' m /\ rs'#rd = sem rs#r1 (Op.eval_extend ex rs#r2 a) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros sem insnX insnS EX ES; intros. unfold arith_extended. destruct (Int.ltu a (Int.repr 5)). - econstructor; split. apply exec_straight_one. rewrite EX; eauto. auto. split. Simpl. f_equal. destruct ex; auto. - intros; Simpl. + split; intros; Simpl. - exploit (exec_move_extended_base X16 r2 ex). intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. rewrite ES. eauto. auto. split. Simpl. unfold ir0x. rewrite C by eauto with asmgen. f_equal. rewrite B. destruct ex; auto. - intros; Simpl. + split; intros; Simpl. Qed. (** Extended right shift *) @@ -749,41 +849,138 @@ Qed. Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m, Val.shrx rs#r1 (Vint n) = Some v -> r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m /\ rs'#rd = v - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. - unfold shrx32; intros. apply Val.shrx_shr_2 in H. + unfold shrx32; intros. apply Val.shrx_shr_3 in H. destruct (Int.eq n Int.zero) eqn:E. - econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. - split. Simpl. subst v; auto. intros; Simpl. -- econstructor; split. eapply exec_straight_three. - unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto. - simpl; eauto. - unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto. - auto. auto. auto. - split. subst v; Simpl. intros; Simpl. -Qed. - + split. Simpl. subst v; auto. + split; intros; Simpl. +- generalize (Int.eq_spec n Int.one). + destruct (Int.eq n Int.one); intro ONE. + * subst n. + econstructor; split. eapply exec_straight_two. + all: simpl; auto. + split. + ** subst v; Simpl. + destruct (Val.add _ _); simpl; trivial. + change (Int.ltu Int.one Int.iwordsize) with true; simpl. + rewrite Int.or_zero_l. + reflexivity. + ** split; intros; Simpl. + * econstructor; split. eapply exec_straight_three. + unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto. + simpl; eauto. + unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto. + auto. auto. auto. + split. subst v; Simpl. + split; intros; Simpl. +Qed. + + +Lemma exec_shrx32_none: forall (rd r1: ireg) (n: int) k (rs: regset) m, + Val.shrx rs#r1 (Vint n) = None -> + r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> + exists rs', + exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. +Proof. + unfold shrx32; intros. + destruct (Int.eq n Int.zero) eqn:E. +- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. + split. + + intros. Simpl. + + Simpl. +- generalize (Int.eq_spec n Int.one). + destruct (Int.eq n Int.one); intro ONE. + * subst n. + econstructor; split. eapply exec_straight_two. + all: cbn; auto. + split. + ** intros. + destruct (Val.add _ _); cbn; Simpl. + ** Simpl. + * econstructor; split. eapply exec_straight_three. + all: cbn; auto. + split. + ** intros. + destruct (Val.shr _ _); cbn; Simpl. + ** Simpl. +Qed. + Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m, Val.shrxl rs#r1 (Vint n) = Some v -> r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m /\ rs'#rd = v - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. - unfold shrx64; intros. apply Val.shrxl_shrl_2 in H. + unfold shrx64; intros. apply Val.shrxl_shrl_3 in H. destruct (Int.eq n Int.zero) eqn:E. - econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. - split. Simpl. subst v; auto. intros; Simpl. -- econstructor; split. eapply exec_straight_three. - unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto. - simpl; eauto. - unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto. - auto. auto. auto. - split. subst v; Simpl. intros; Simpl. + split. Simpl. subst v; auto. + split; intros; Simpl. +- generalize (Int.eq_spec n Int.one). + destruct (Int.eq n Int.one); intro ONE. + * subst n. + econstructor; split. eapply exec_straight_two. + all: simpl; auto. + split. + ** subst v; Simpl. + destruct (Val.addl _ _); simpl; trivial. + change (Int.ltu Int.one Int64.iwordsize') with true; simpl. + rewrite Int64.or_zero_l. + reflexivity. + ** split; intros; Simpl. + * econstructor; split. eapply exec_straight_three. + unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto. + simpl; eauto. + unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto. + auto. auto. auto. + split. subst v; Simpl. + split; intros; Simpl. +Qed. + +Lemma exec_shrx64_none: forall (rd r1: ireg) (n: int) k (rs: regset) m, + Val.shrxl rs#r1 (Vint n) = None -> + r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> + exists rs', + exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. +Proof. + unfold shrx64; intros. + destruct (Int.eq n Int.zero) eqn:E. +- econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. + split. + + intros. Simpl. + + Simpl. +- generalize (Int.eq_spec n Int.one). + destruct (Int.eq n Int.one); intro ONE. + * subst n. + econstructor; split. eapply exec_straight_two. + all: cbn; auto. + split. + ** intros. + destruct (Val.addl _ _); cbn; Simpl. + ** Simpl. + * econstructor; split. eapply exec_straight_three. + all: cbn; auto. + split. + ** intros. + destruct (Val.shrl _ _); cbn; Simpl. + ** Simpl. Qed. (** Condition bits *) @@ -1039,6 +1236,56 @@ Ltac ArgsInv := | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in * end). +Lemma compare_int_RA: + forall rs a b m, + compare_int rs a b m X30 = rs X30. +Proof. + unfold compare_int. + intros. + repeat rewrite Pregmap.gso by congruence. + trivial. +Qed. + +Hint Resolve compare_int_RA : asmgen. + +Lemma compare_long_RA: + forall rs a b m, + compare_long rs a b m X30 = rs X30. +Proof. + unfold compare_long. + intros. + repeat rewrite Pregmap.gso by congruence. + trivial. +Qed. + +Hint Resolve compare_long_RA : asmgen. + +Lemma compare_float_RA: + forall rs a b, + compare_float rs a b X30 = rs X30. +Proof. + unfold compare_float. + intros. + destruct a; destruct b. + all: repeat rewrite Pregmap.gso by congruence; trivial. +Qed. + +Hint Resolve compare_float_RA : asmgen. + + +Lemma compare_single_RA: + forall rs a b, + compare_single rs a b X30 = rs X30. +Proof. + unfold compare_single. + intros. + destruct a; destruct b. + all: repeat rewrite Pregmap.gso by congruence; trivial. +Qed. + +Hint Resolve compare_single_RA : asmgen. + + Lemma transl_cond_correct: forall cond args k c rs m, transl_cond cond args k = OK c -> @@ -1047,185 +1294,218 @@ Lemma transl_cond_correct: /\ (forall b, eval_condition cond (map rs (map preg_of args)) m = Some b -> eval_testcond (cond_for_cond cond) rs' = Some b) - /\ forall r, data_preg r = true -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros until m; intros TR. destruct cond; simpl in TR; ArgsInv. - (* Ccomp *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. apply eval_testcond_compare_sint; auto. + repeat split; intros. apply eval_testcond_compare_sint; auto. destruct r; reflexivity || discriminate. - (* Ccompu *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. apply eval_testcond_compare_uint; auto. + repeat split; intros. apply eval_testcond_compare_uint; auto. destruct r; reflexivity || discriminate. - (* Ccompimm *) destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))]. + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto. + repeat split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto. destruct r; reflexivity || discriminate. + econstructor; split. apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto. - split; intros. apply eval_testcond_compare_sint; auto. + repeat split; intros. apply eval_testcond_compare_sint; auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply eval_testcond_compare_sint; auto. - transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + repeat split; intros. apply eval_testcond_compare_sint; auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. + auto with asmgen. + Simpl. rewrite compare_int_RA. + apply C; congruence. - (* Ccompuimm *) destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))]. + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto. + repeat split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto. destruct r; reflexivity || discriminate. + econstructor; split. apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto. - split; intros. apply eval_testcond_compare_uint; auto. + repeat split; intros. apply eval_testcond_compare_uint; auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply eval_testcond_compare_uint; auto. + repeat split; intros. apply eval_testcond_compare_uint; auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_int_RA. + apply C; congruence. - (* Ccompshift *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto. + repeat split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto. destruct r; reflexivity || discriminate. - (* Ccompushift *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto. + repeat split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto. destruct r; reflexivity || discriminate. - (* Cmaskzero *) destruct (is_logical_imm32 n). + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto. + repeat split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply (eval_testcond_compare_sint Ceq); auto. + repeat split; intros. apply (eval_testcond_compare_sint Ceq); auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_int_RA. + apply C; congruence. + - (* Cmasknotzero *) destruct (is_logical_imm32 n). + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto. + repeat split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). + ++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply (eval_testcond_compare_sint Cne); auto. + repeat split; intros. apply (eval_testcond_compare_sint Cne); auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_int_RA. + apply C; congruence. + - (* Ccompl *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. apply eval_testcond_compare_slong; auto. + repeat split; intros. apply eval_testcond_compare_slong; auto. destruct r; reflexivity || discriminate. - (* Ccomplu *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. apply eval_testcond_compare_ulong; auto. + repeat split; intros. apply eval_testcond_compare_ulong; auto. destruct r; reflexivity || discriminate. - (* Ccomplimm *) destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))]. + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto. + repeat split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto. destruct r; reflexivity || discriminate. + econstructor; split. apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto. - split; intros. apply eval_testcond_compare_slong; auto. + repeat split; intros. apply eval_testcond_compare_slong; auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply eval_testcond_compare_slong; auto. + repeat split; intros. apply eval_testcond_compare_slong; auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_long_RA. + apply C; congruence. + - (* Ccompluimm *) destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))]. + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto. + repeat split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto. destruct r; reflexivity || discriminate. + econstructor; split. apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto. - split; intros. apply eval_testcond_compare_ulong; auto. + repeat split; intros. apply eval_testcond_compare_ulong; auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply eval_testcond_compare_ulong; auto. + repeat split; intros. apply eval_testcond_compare_ulong; auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_long_RA. + apply C; congruence. + - (* Ccomplshift *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto. + repeat split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto. destruct r; reflexivity || discriminate. - (* Ccomplushift *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto. + repeat split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto. destruct r; reflexivity || discriminate. - (* Cmasklzero *) destruct (is_logical_imm64 n). + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto. + repeat split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply (eval_testcond_compare_slong Ceq); auto. + repeat split; intros. apply (eval_testcond_compare_slong Ceq); auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_long_RA. + apply C; congruence. + - (* Cmasknotzero *) destruct (is_logical_imm64 n). + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto. + repeat split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply (eval_testcond_compare_slong Cne); auto. + repeat split; intros. apply (eval_testcond_compare_slong Cne); auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_long_RA. + apply C; congruence. + - (* Ccompf *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_float_inv; auto. - split; intros. apply eval_testcond_compare_float; auto. + repeat split; intros. apply eval_testcond_compare_float; auto. destruct r; discriminate || rewrite compare_float_inv; auto. + Simpl. - (* Cnotcompf *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_float_inv; auto. - split; intros. apply eval_testcond_compare_not_float; auto. + repeat split; intros. apply eval_testcond_compare_not_float; auto. destruct r; discriminate || rewrite compare_float_inv; auto. + Simpl. - (* Ccompfzero *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_float_inv; auto. - split; intros. apply eval_testcond_compare_float; auto. + repeat split; intros. apply eval_testcond_compare_float; auto. destruct r; discriminate || rewrite compare_float_inv; auto. + Simpl. - (* Cnotcompfzero *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_float_inv; auto. - split; intros. apply eval_testcond_compare_not_float; auto. + repeat split; intros. apply eval_testcond_compare_not_float; auto. destruct r; discriminate || rewrite compare_float_inv; auto. + Simpl. - (* Ccompfs *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_single_inv; auto. - split; intros. apply eval_testcond_compare_single; auto. + repeat split; intros. apply eval_testcond_compare_single; auto. destruct r; discriminate || rewrite compare_single_inv; auto. + Simpl. - (* Cnotcompfs *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_single_inv; auto. - split; intros. apply eval_testcond_compare_not_single; auto. + repeat split; intros. apply eval_testcond_compare_not_single; auto. destruct r; discriminate || rewrite compare_single_inv; auto. + Simpl. - (* Ccompfszero *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_single_inv; auto. - split; intros. apply eval_testcond_compare_single; auto. + repeat split; intros. apply eval_testcond_compare_single; auto. destruct r; discriminate || rewrite compare_single_inv; auto. + Simpl. - (* Cnotcompfszero *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_single_inv; auto. - split; intros. apply eval_testcond_compare_not_single; auto. + repeat split; intros. apply eval_testcond_compare_not_single; auto. destruct r; discriminate || rewrite compare_single_inv; auto. + Simpl. Qed. (** Translation of conditional branches *) @@ -1238,7 +1518,8 @@ Lemma transl_cond_branch_correct: exec_straight_opt ge fn c rs m (insn :: k) rs' m /\ exec_instr ge fn insn rs' m = (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m) - /\ forall r, data_preg r = true -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros until b; intros TR EV. assert (DFL: @@ -1247,13 +1528,14 @@ Proof. exec_straight_opt ge fn c rs m (insn :: k) rs' m /\ exec_instr ge fn insn rs' m = (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m) - /\ forall r, data_preg r = true -> rs'#r = rs#r). + /\ (forall r, data_preg r = true -> rs'#r = rs#r) + /\ rs' # RA = rs # RA ). { unfold transl_cond_branch_default; intros. - exploit transl_cond_correct; eauto. intros (rs' & A & B & C). + exploit transl_cond_correct; eauto. intros (rs' & A & B & C & D). exists rs', (Pbc (cond_for_cond cond) lbl); split. apply exec_straight_opt_intro. eexact A. - split; auto. simpl. rewrite (B b) by auto. auto. + repeat split; auto. simpl. rewrite (B b) by auto. auto. } Local Opaque transl_cond transl_cond_branch_default. destruct args as [ | a1 args]; simpl in TR; auto. @@ -1347,13 +1629,15 @@ Ltac TranslOpSimpl := [ apply exec_straight_one; [simpl; eauto | reflexivity] | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; apply Val.lessdef_same; Simpl; fail - | intros; Simpl; fail ] ]. + | split; [ intros; Simpl; fail + | intros; Simpl; eauto with asmgen; fail] ]]. Ltac TranslOpBase := econstructor; split; [ apply exec_straight_one; [simpl; eauto | reflexivity] | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; Simpl - | intros; Simpl; fail ] ]. + | split; [ intros; Simpl; fail + | intros; Simpl; eapply RA_not_written2; eauto] ]]. Lemma transl_op_correct: forall op args res k (rs: regset) m v c, @@ -1362,21 +1646,29 @@ Lemma transl_op_correct: exists rs', exec_straight ge fn c rs m k rs' m /\ Val.lessdef v rs'#(preg_of res) - /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r. + /\ (forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r) + /\ rs' RA = rs RA. Proof. Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize. intros until c; intros TR EV. unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl. - (* move *) destruct (preg_of res) eqn:RR; try discriminate; destruct (preg_of m0) eqn:R1; inv TR. -+ TranslOpSimpl. -+ TranslOpSimpl. + all: TranslOpSimpl. - (* intconst *) - exploit exec_loadimm32. intros (rs' & A & B & C). - exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen. + exploit exec_loadimm32. apply (ireg_of_not_RA res); eassumption. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. + split. intros; auto with asmgen. + apply C. congruence. + eapply ireg_of_not_RA''; eauto. - (* longconst *) - exploit exec_loadimm64. intros (rs' & A & B & C). - exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen. + exploit exec_loadimm64. apply (ireg_of_not_RA res); eassumption. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. + split. intros; auto with asmgen. + apply C. congruence. + eapply ireg_of_not_RA''; eauto. - (* floatconst *) destruct (Float.eq_dec n Float.zero). + subst n. TranslOpSimpl. @@ -1386,11 +1678,15 @@ Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize. + subst n. TranslOpSimpl. + TranslOpSimpl. - (* loadsymbol *) - exploit (exec_loadsymbol x id ofs). eauto with asmgen. intros (rs' & A & B & C). - exists rs'; split. eexact A. split. rewrite B; auto. auto. + exploit (exec_loadsymbol x id ofs). eauto with asmgen. + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + exists rs'; split. eexact A. split. rewrite B; auto. + split; auto. - (* addrstack *) exploit (exec_addimm64 x XSP (Ptrofs.to_int64 ofs)). simpl; eauto with asmgen. - intros (rs' & A & B & C). + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. simpl in B; rewrite B. Local Transparent Val.addl. destruct (rs SP); simpl; auto. rewrite Ptrofs.of_int64_to_int64 by auto. auto. @@ -1398,7 +1694,8 @@ Local Transparent Val.addl. - (* shift *) rewrite <- transl_eval_shift'. TranslOpSimpl. - (* addimm *) - exploit (exec_addimm32 x x0 n). eauto with asmgen. intros (rs' & A & B & C). + exploit (exec_addimm32 x x0 n). eauto with asmgen. eapply ireg_of_not_RA''; eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. rewrite B; auto. auto. - (* mul *) TranslOpBase. @@ -1406,18 +1703,20 @@ Local Transparent Val.add. destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int.add_zero_l; auto. - (* andimm *) exploit (exec_logicalimm32 (Pandimm W) (Pand W)). - intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). - exists rs'; split. eexact A. split. rewrite B; auto. auto. + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + exists rs'; split. eexact A. split. rewrite B; auto. + split; auto. - (* orimm *) exploit (exec_logicalimm32 (Porrimm W) (Porr W)). - intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). - exists rs'; split. eexact A. split. rewrite B; auto. auto. + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + exists rs'; split. eexact A. split. rewrite B; auto. + split; auto. - (* xorimm *) exploit (exec_logicalimm32 (Peorimm W) (Peor W)). - intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. rewrite B; auto. auto. - (* not *) TranslOpBase. @@ -1426,8 +1725,19 @@ Local Transparent Val.add. TranslOpBase. destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto. - (* shrx *) - exploit (exec_shrx32 x x0 n); eauto with asmgen. intros (rs' & A & B & C). - econstructor; split. eexact A. split. rewrite B; auto. auto. + destruct (Val.shrx (rs x0) (Vint n)) eqn:TOTAL. + { + exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + econstructor; split. eexact A. split. rewrite B; auto. + split; auto. + } + exploit (exec_shrx32_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C). + econstructor; split. { eexact A. } + split. { cbn. constructor. } + split; auto. + - (* zero-ext *) TranslOpBase. destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto. @@ -1451,36 +1761,47 @@ Local Transparent Val.add. - (* extend *) exploit (exec_move_extended x0 x1 x a k). intros (rs' & A & B & C). econstructor; split. eexact A. - split. rewrite B; auto. eauto with asmgen. + split. rewrite B; auto. + split; eauto with asmgen. - (* addext *) exploit (exec_arith_extended Val.addl Paddext (Padd X)). - auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C). - econstructor; split. eexact A. split. rewrite B; auto. auto. + auto. auto. instantiate (1 := x1). eauto with asmgen. + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + econstructor; split. eexact A. split. rewrite B; auto. + split; auto. - (* addlimm *) exploit (exec_addimm64 x x0 n). simpl. generalize (ireg_of_not_X16 _ _ EQ1). congruence. - intros (rs' & A & B & C). + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. simpl in B; rewrite B; auto. auto. - (* subext *) exploit (exec_arith_extended Val.subl Psubext (Psub X)). - auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C). - econstructor; split. eexact A. split. rewrite B; auto. auto. + auto. auto. instantiate (1 := x1). eauto with asmgen. + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + econstructor; split. eexact A. split. rewrite B; auto. + split; auto. - (* mull *) TranslOpBase. destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int64.add_zero_l; auto. - (* andlimm *) exploit (exec_logicalimm64 (Pandimm X) (Pand X)). intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. rewrite B; auto. auto. - (* orlimm *) exploit (exec_logicalimm64 (Porrimm X) (Porr X)). intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. rewrite B; auto. auto. - (* xorlimm *) exploit (exec_logicalimm64 (Peorimm X) (Peor X)). intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. rewrite B; auto. auto. - (* notl *) TranslOpBase. @@ -1489,8 +1810,18 @@ Local Transparent Val.add. TranslOpBase. destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto. - (* shrx *) - exploit (exec_shrx64 x x0 n); eauto with asmgen. intros (rs' & A & B & C). + destruct (Val.shrxl (rs x0) (Vint n)) eqn:TOTAL. + { + exploit (exec_shrx64 x x0 n); eauto with asmgen. + apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D ). econstructor; split. eexact A. split. rewrite B; auto. auto. + } + exploit (exec_shrx64_none x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C). + econstructor; split. { eexact A. } + split. { cbn. constructor. } + split; auto. + - (* zero-ext-l *) TranslOpBase. destruct (rs x0); auto; simpl. rewrite Int64.shl'_zero. auto. @@ -1510,35 +1841,37 @@ Local Transparent Val.add. TranslOpBase. destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.sign_ext_shr'_min; auto using a64_range. - (* condition *) - exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. rewrite (B b) by auto. auto. auto. - intros; Simpl. + split; intros; Simpl. - (* select *) destruct (preg_of res) eqn:RES; monadInv TR. + (* integer *) generalize (ireg_of_eq _ _ EQ) (ireg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2. - exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize. rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen. auto. - intros; Simpl. + split; intros; Simpl. + rewrite <- D. + eapply RA_not_written2; eassumption. + (* FP *) generalize (freg_of_eq _ _ EQ) (freg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2. - exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize. rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen. auto. - intros; Simpl. + split; intros; Simpl. Qed. (** Translation of addressing modes, loads, stores *) @@ -1550,7 +1883,8 @@ Lemma transl_addressing_correct: exists ad rs', exec_straight_opt ge fn c rs m (insn ad :: k) rs' m /\ Asm.eval_addressing ge ad rs' = Vptr b o - /\ forall r, data_preg r = true -> rs' r = rs r. + /\ (forall r, data_preg r = true -> rs' r = rs r) + /\ rs' # RA = rs # RA. Proof. intros until o; intros TR EV. unfold transl_addressing in TR; destruct addr; ArgsInv; SimplEval EV. @@ -1558,10 +1892,10 @@ Proof. destruct (offset_representable sz ofs); inv EQ0. + econstructor; econstructor; split. apply exec_straight_opt_refl. auto. -+ exploit (exec_loadimm64 X16 ofs). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 ofs). congruence. intros (rs' & A & B & C). econstructor; exists rs'; split. apply exec_straight_opt_intro; eexact A. split. simpl. rewrite B, C by eauto with asmgen. auto. - eauto with asmgen. + split; eauto with asmgen. - (* Aindexed2 *) econstructor; econstructor; split. apply exec_straight_opt_refl. auto. @@ -1577,33 +1911,38 @@ Proof. + econstructor; econstructor; split. apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto. split. simpl. Simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero. auto. - intros; Simpl. + split; intros; Simpl. - (* Aindexed2ext *) destruct (Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz)); inv EQ2. + econstructor; econstructor; split. apply exec_straight_opt_refl. split; auto. destruct x; auto. + exploit (exec_arith_extended Val.addl Paddext (Padd X)); auto. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). + instantiate (1 := X16). simpl. congruence. + intros (rs' & A & B & C & D). econstructor; exists rs'; split. apply exec_straight_opt_intro. eexact A. split. simpl. rewrite B. rewrite Val.addl_assoc. f_equal. unfold Op.eval_extend; destruct x, (rs x1); simpl; auto; rewrite ! a64_range; simpl; rewrite Int64.add_zero; auto. - intros. apply C; eauto with asmgen. + split; intros. + apply C; eauto with asmgen. + trivial. - (* Aglobal *) destruct (Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz); inv TR. + econstructor; econstructor; split. apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto. split. simpl. Simpl. rewrite symbol_high_low. simpl in EV. congruence. - intros; Simpl. -+ exploit (exec_loadsymbol X16 id ofs). auto. intros (rs' & A & B & C). + split; intros; Simpl. ++ exploit (exec_loadsymbol X16 id ofs). auto. + simpl. congruence. + intros (rs' & A & B & C & D). econstructor; exists rs'; split. apply exec_straight_opt_intro. eexact A. split. simpl. rewrite B. rewrite <- Genv.shift_symbol_address_64, Ptrofs.add_zero by auto. simpl in EV. congruence. - auto with asmgen. + split; auto with asmgen. - (* Ainstrack *) assert (E: Val.addl (rs SP) (Vlong (Ptrofs.to_int64 ofs)) = Vptr b o). { simpl in EV. inv EV. destruct (rs SP); simpl in H1; inv H1. simpl. @@ -1611,7 +1950,9 @@ Proof. destruct (offset_representable sz (Ptrofs.to_int64 ofs)); inv TR. + econstructor; econstructor; split. apply exec_straight_opt_refl. auto. -+ exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)). + simpl. congruence. + intros (rs' & A & B & C). econstructor; exists rs'; split. apply exec_straight_opt_intro. eexact A. split. simpl. rewrite B, C by eauto with asmgen. auto. @@ -1620,13 +1961,14 @@ Qed. Lemma transl_load_correct: forall chunk addr args dst k c (rs: regset) m vaddr v, - transl_load chunk addr args dst k = OK c -> + transl_load TRAP chunk addr args dst k = OK c -> Op.eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some vaddr -> Mem.loadv chunk m vaddr = Some v -> exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of dst) = v - /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r. + /\ (forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r) + /\ rs' # RA = rs # RA. Proof. intros. destruct vaddr; try discriminate. assert (A: exists sz insn, @@ -1639,14 +1981,17 @@ Proof. do 2 econstructor; (split; [eassumption|auto]). } destruct A as (sz & insn & B & C). - exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R). + exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R & S). assert (X: exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m = Next (nextinstr (rs'#(preg_of dst) <- v)) m). { unfold exec_load. rewrite Q, H1. auto. } econstructor; split. eapply exec_straight_opt_right. eexact P. apply exec_straight_one. rewrite C, X; eauto. Simpl. - split. Simpl. intros; Simpl. + split. Simpl. + split; intros; Simpl. + rewrite <- S. + apply RA_not_written. Qed. Lemma transl_store_correct: @@ -1656,7 +2001,8 @@ Lemma transl_store_correct: Mem.storev chunk m vaddr rs#(preg_of src) = Some m' -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, data_preg r = true -> rs' r = rs r. + /\ (forall r, data_preg r = true -> rs' r = rs r) + /\ rs' # RA = rs # RA. Proof. intros. destruct vaddr; try discriminate. set (chunk' := match chunk with Mint8signed => Mint8unsigned @@ -1672,7 +2018,7 @@ Proof. do 2 econstructor; (split; [eassumption|auto]). } destruct A as (sz & insn & B & C). - exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R). + exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R & S). assert (X: Mem.storev chunk' m (Vptr b i) rs#(preg_of src) = Some m'). { rewrite <- H1. unfold chunk'. destruct chunk; auto; simpl; symmetry. apply Mem.store_signed_unsigned_8. @@ -1683,7 +2029,7 @@ Proof. econstructor; split. eapply exec_straight_opt_right. eexact P. apply exec_straight_one. rewrite C, Y; eauto. Simpl. - intros; Simpl. + split; intros; Simpl. Qed. (** Translation of indexed memory accesses *) @@ -1701,7 +2047,9 @@ Proof. { destruct (rs base); try discriminate. simpl in *. rewrite Ptrofs.of_int64_to_int64 by auto. auto. } destruct offset_representable. - econstructor; econstructor; split. apply exec_straight_opt_refl. auto. -- exploit (exec_loadimm64 X16); eauto. intros (rs' & A & B & C). +- exploit (exec_loadimm64 X16); eauto. + simpl. congruence. + intros (rs' & A & B & C). econstructor; econstructor; split. apply exec_straight_opt_intro; eexact A. split. simpl. rewrite B, C by eauto with asmgen. auto. auto. Qed. @@ -1712,7 +2060,7 @@ Lemma loadptr_correct: forall (base: iregsp) ofs dst k m v (rs: regset), exists rs', exec_straight ge fn (loadptr base ofs dst k) rs m k rs' m /\ rs'#dst = v - /\ forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r. + /\ (forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r). Proof. intros. destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. @@ -1720,7 +2068,8 @@ Proof. econstructor; split. eapply exec_straight_opt_right. eexact A. apply exec_straight_one. simpl. unfold exec_load. rewrite B, H. eauto. auto. - split. Simpl. intros; Simpl. + split. Simpl. + intros; Simpl. Qed. Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset), @@ -1729,7 +2078,8 @@ Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset src <> X16 -> exists rs', exec_straight ge fn (storeptr src base ofs k) rs m k rs' m' - /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r. + /\ (forall r, r <> PC -> r <> X16 -> rs' r = rs r) + /\ rs' RA = rs RA. Proof. intros. destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. @@ -1737,7 +2087,7 @@ Proof. econstructor; split. eapply exec_straight_opt_right. eexact A. apply exec_straight_one. simpl. unfold exec_store. rewrite B, C, H by eauto with asmgen. eauto. auto. - intros; Simpl. + split; intros; Simpl. Qed. Lemma loadind_correct: forall (base: iregsp) ofs ty dst k c (rs: regset) m v, @@ -1747,7 +2097,8 @@ Lemma loadind_correct: forall (base: iregsp) ofs ty dst k c (rs: regset) m v, exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of dst) = v - /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r. + /\ (forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r) + /\ rs' RA = rs RA. Proof. intros. destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. @@ -1763,7 +2114,10 @@ Proof. econstructor; split. eapply exec_straight_opt_right. eexact A. apply exec_straight_one. rewrite SEM. unfold exec_load. rewrite B, H0. eauto. Simpl. - split. Simpl. intros; Simpl. + split. Simpl. + split. intros; Simpl. + Simpl. rewrite RA_not_written. + apply C; congruence. Qed. Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m', @@ -1772,7 +2126,8 @@ Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m', preg_of_iregsp base <> IR X16 -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, data_preg r = true -> rs' r = rs r. + /\ (forall r, data_preg r = true -> rs' r = rs r) + /\ rs' RA = rs RA. Proof. intros. destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. @@ -1790,13 +2145,15 @@ Proof. apply exec_straight_one. rewrite SEM. unfold exec_store. rewrite B, C, H0 by eauto with asmgen. eauto. Simpl. - intros; Simpl. + split. intros; Simpl. + Simpl. Qed. Lemma make_epilogue_correct: forall ge0 f m stk soff cs m' ms rs k tm, + (is_leaf_function f = true -> rs # (IR RA) = parent_ra cs) -> load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) -> - load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) -> + ((* FIXME is_leaf_function f = false -> *) load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs)) -> Mem.free m stk 0 f.(fn_stacksize) = Some m' -> agree ms (Vptr stk soff) rs -> Mem.extends m tm -> @@ -1807,18 +2164,46 @@ Lemma make_epilogue_correct: /\ Mem.extends m' tm' /\ rs'#RA = parent_ra cs /\ rs'#SP = parent_sp cs - /\ (forall r, r <> PC -> r <> SP -> r <> X30 -> r <> X16 -> rs'#r = rs#r). + /\ (forall r, r <> PC -> r <> SP -> r <> RA -> r <> X16 -> rs'#r = rs#r). Proof. - intros until tm; intros LP LRA FREE AG MEXT MCS. + intros until tm; intros LEAF_RA LP LRA FREE AG MEXT MCS. + + (* FIXME + Cannot be used at this point + destruct (is_leaf_function f) eqn:IS_LEAF. + { + exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP'). + exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'. + exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT'). + unfold make_epilogue. + rewrite IS_LEAF. + + econstructor; econstructor; split. + apply exec_straight_one. simpl. + rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. + rewrite FREE'. eauto. auto. + split. apply agree_nextinstr. apply agree_set_other; auto. + apply agree_change_sp with (Vptr stk soff). + apply agree_exten with rs; auto. + eapply parent_sp_def; eauto. + split. auto. + split. Simpl. + split. Simpl. + intros. Simpl. + } + lapply LRA. 2: reflexivity. + clear LRA. intro LRA. *) exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP'). exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA'). exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'. exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'. exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT'). - unfold make_epilogue. + unfold make_epilogue. + (* FIXME rewrite IS_LEAF. *) exploit (loadptr_correct XSP (fn_retaddr_ofs f)). instantiate (2 := rs). simpl. rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. simpl; congruence. intros (rs1 & A1 & B1 & C1). + econstructor; econstructor; split. eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl. simpl; rewrite (C1 SP) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. @@ -1833,4 +2218,4 @@ Proof. intros. Simpl. Qed. -End CONSTRUCTORS.
\ No newline at end of file +End CONSTRUCTORS. diff --git a/aarch64/CSE2deps.v b/aarch64/CSE2deps.v new file mode 100644 index 00000000..d5c7ee0f --- /dev/null +++ b/aarch64/CSE2deps.v @@ -0,0 +1,35 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* David Monniaux CNRS, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import BoolEqual Coqlib. +Require Import AST Integers Floats. +Require Import Values Memory Globalenvs Events. +Require Import Op. + + +Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw := + (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk)) + && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk)) + && ((ofsw + size_chunk chunkw <=? ofsr) || + (ofsr + size_chunk chunkr <=? ofsw)). + +Definition may_overlap chunk addr args chunk' addr' args' := + match addr, addr', args, args' with + | (Aindexed ofs), (Aindexed ofs'), + (base :: nil), (base' :: nil) => + if peq base base' + then negb (can_swap_accesses_ofs (Int64.unsigned ofs') chunk' (Int64.unsigned ofs) chunk) + else true + | (Ainstack ofs), (Ainstack ofs'), _, _ => + negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk) + | _, _, _, _ => true + end. diff --git a/aarch64/CSE2depsproof.v b/aarch64/CSE2depsproof.v new file mode 100644 index 00000000..653c88f4 --- /dev/null +++ b/aarch64/CSE2depsproof.v @@ -0,0 +1,209 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* David Monniaux CNRS, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +Require Import Coqlib Maps Errors Integers Floats Lattice Kildall. +Require Import AST Linking. +Require Import Memory Registers Op RTL Maps. + +Require Import Globalenvs Values. +Require Import Linking Values Memory Globalenvs Events Smallstep. +Require Import Registers Op RTL. +Require Import CSE2 CSE2deps. +Require Import Lia. + +Lemma ptrofs_size : + Ptrofs.wordsize = 64%nat. +Proof. + unfold Ptrofs.wordsize. + unfold Wordsize_Ptrofs.wordsize. + trivial. +Qed. + +Lemma ptrofs_modulus : + Ptrofs.modulus = 18446744073709551616. +Proof. + reflexivity. +Qed. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Section MEMORY_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + Variable base : val. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : int64. + Hypothesis ADDRW : eval_addressing genv sp + (Aindexed ofsw) (base :: nil) = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aindexed ofsr) (base :: nil) = Some addrr. + + Lemma load_store_away1 : + forall RANGEW : 0 <= Int64.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= Int64.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : Int64.unsigned ofsw + size_chunk chunkw <= Int64.unsigned ofsr + \/ Int64.unsigned ofsr + size_chunk chunkr <= Int64.unsigned ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in *. + + rewrite ptrofs_modulus in *. + simpl in *. + inv ADDRR. + inv ADDRW. + + destruct base; try discriminate. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: unfold Ptrofs.of_int64. + + all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsr))) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsw))) as [OFSW | OFSW]; + rewrite OFSW). + all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 18446744073709551615; lia). + + all: try rewrite ptrofs_modulus in *. + + all: intuition lia. + Qed. + + Theorem load_store_away : + can_swap_accesses_ofs (Int64.unsigned ofsr) chunkr (Int64.unsigned ofsw) chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. +End MEMORY_WRITE. + + +Section STACK_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : ptrofs. + Hypothesis ADDRW : eval_addressing genv sp + (Ainstack ofsw) nil = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Ainstack ofsr) nil = Some addrr. + + Lemma stack_load_store_away1 : + forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr + \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in *. + + rewrite ptrofs_modulus in *. + simpl in *. + inv ADDRR. + inv ADDRW. + + destruct sp; try discriminate. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW]; + rewrite OFSW). + + all: try rewrite ptrofs_modulus in *. + + all: intuition lia. + Qed. + + Theorem stack_load_store_away : + can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply stack_load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. +End STACK_WRITE. +End SOUNDNESS. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Lemma may_overlap_sound: + forall m m' : mem, + forall chunk addr args chunk' addr' args' v a a' rs, + (eval_addressing genv sp addr (rs ## args)) = Some a -> + (eval_addressing genv sp addr' (rs ## args')) = Some a' -> + (may_overlap chunk addr args chunk' addr' args') = false -> + (Mem.storev chunk m a v) = Some m' -> + (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a'). +Proof. + intros until rs. + intros ADDR ADDR' OVERLAP STORE. + destruct addr; destruct addr'; try discriminate. + - (* Aindexed / Aindexed *) + destruct args as [ | base [ | ]]. 1,3: discriminate. + destruct args' as [ | base' [ | ]]. 1,3: discriminate. + simpl in OVERLAP. + destruct (peq base base'). 2: discriminate. + subst base'. + destruct (can_swap_accesses_ofs (Int64.unsigned ofs0) chunk' (Int64.unsigned ofs) chunk) eqn:SWAP. + 2: discriminate. + simpl in *. + eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. +- (* Ainstack / Ainstack *) + destruct args. 2: discriminate. + destruct args'. 2: discriminate. + cbn in OVERLAP. + destruct (can_swap_accesses_ofs (Ptrofs.unsigned ofs0) chunk' (Ptrofs.unsigned ofs) chunk) eqn:SWAP. + 2: discriminate. + cbn in *. + eapply stack_load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. +Qed. + +End SOUNDNESS. diff --git a/aarch64/ConstpropOpproof.v b/aarch64/ConstpropOpproof.v index deab7cd4..c777062c 100644 --- a/aarch64/ConstpropOpproof.v +++ b/aarch64/ConstpropOpproof.v @@ -335,40 +335,63 @@ Qed. Lemma make_divimm_correct: forall n r1 r2 v, - Val.divs e#r1 e#r2 = Some v -> + Val.maketotal (Val.divs e#r1 e#r2) = v -> e#r2 = Vint n -> let (op, args) := make_divimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divimm. - predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H. - destruct (e#r1) eqn:?; - try (rewrite Val.divs_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto); - inv H; auto. - destruct (Int.is_power2 n) eqn:?. - destruct (Int.ltu i (Int.repr 31)) eqn:?. - exists v; split; auto. simpl. eapply Val.divs_pow2; eauto. congruence. - exists v; auto. - exists v; auto. + predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0. + { destruct (e # r1) eqn:Er1. + all: try (cbn; exists (e # r1); split; auto; fail). + rewrite Val.divs_one. + cbn. + rewrite Er1. + exists (Vint i); split; auto. + } + destruct (Int.is_power2 n) eqn:Power2. + { + destruct (Int.ltu i (Int.repr 31)) eqn:iLT31. + { + cbn. + exists (Val.maketotal (Val.shrx e # r1 (Vint i))); split; auto. + destruct (Val.divs e # r1 (Vint n)) eqn:DIVS; cbn; auto. + rewrite Val.divs_pow2 with (y:=v) (n:=n). + cbn. + all: auto. + } + exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence. + } + exists (Val.maketotal (Val.divs e # r1 (Vint n))); split; cbn; auto; congruence. Qed. + Lemma make_divuimm_correct: forall n r1 r2 v, - Val.divu e#r1 e#r2 = Some v -> + Val.maketotal (Val.divu e#r1 e#r2) = v -> e#r2 = Vint n -> let (op, args) := make_divuimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divuimm. - predSpec Int.eq Int.eq_spec n Int.one; intros. subst. rewrite H0 in H. - destruct (e#r1) eqn:?; - try (rewrite Val.divu_one in H; exists (Vint i); split; simpl; try rewrite Heqv0; auto); - inv H; auto. - destruct (Int.is_power2 n) eqn:?. - econstructor; split. simpl; eauto. - rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto). - rewrite H0 in H. erewrite Val.divu_pow2 by eauto. auto. - exists v; auto. + predSpec Int.eq Int.eq_spec n Int.one; intros; subst; rewrite H0. + { destruct (e # r1) eqn:Er1. + all: try (cbn; exists (e # r1); split; auto; fail). + rewrite Val.divu_one. + cbn. + rewrite Er1. + exists (Vint i); split; auto. + } + destruct (Int.is_power2 n) eqn:Power2. + { + cbn. + rewrite mk_amount32_eq by (eapply Int.is_power2_range; eauto). + exists (Val.shru e # r1 (Vint i)); split; auto. + destruct (Val.divu e # r1 (Vint n)) eqn:DIVU; cbn; auto. + rewrite Val.divu_pow2 with (y:=v) (n:=n). + all: auto. + } + exists (Val.maketotal (Val.divu e # r1 (Vint n))); split; cbn; auto; congruence. Qed. Lemma make_andimm_correct: @@ -503,34 +526,60 @@ Qed. Lemma make_divlimm_correct: forall n r1 r2 v, - Val.divls e#r1 e#r2 = Some v -> + Val.maketotal (Val.divls e#r1 e#r2) = v -> e#r2 = Vlong n -> let (op, args) := make_divlimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divlimm. - destruct (Int64.is_power2' n) eqn:?. destruct (Int.ltu i (Int.repr 63)) eqn:?. - rewrite H0 in H. econstructor; split. simpl; eauto. eapply Val.divls_pow2; eauto. auto. - exists v; auto. - exists v; auto. + destruct (Int64.is_power2' n) eqn:Power2. + { + destruct (Int.ltu i (Int.repr 63)) eqn:iLT63. + { + cbn. + exists (Val.maketotal (Val.shrxl e # r1 (Vint i))); split; auto. + rewrite H0 in H. + destruct (Val.divls e # r1 (Vlong n)) eqn:DIVS; cbn in H; auto. + { + subst v0. + rewrite Val.divls_pow2 with (y:=v) (n:=n). + cbn. + all: auto. + } + subst. auto. + } + cbn. subst. rewrite H0. + exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto. + } + cbn. subst. rewrite H0. + exists (Val.maketotal (Val.divls e # r1 (Vlong n))); split; auto. Qed. + Lemma make_divluimm_correct: forall n r1 r2 v, - Val.divlu e#r1 e#r2 = Some v -> + Val.maketotal (Val.divlu e#r1 e#r2) = v -> e#r2 = Vlong n -> let (op, args) := make_divluimm n r1 r2 in exists w, eval_operation ge (Vptr sp Ptrofs.zero) op e##args m = Some w /\ Val.lessdef v w. Proof. intros; unfold make_divluimm. destruct (Int64.is_power2' n) eqn:?. + { econstructor; split. simpl; eauto. - rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto). - rewrite H0 in H. destruct (e#r1); inv H. destruct (Int64.eq n Int64.zero); inv H2. - simpl. - erewrite Int64.is_power2'_range by eauto. - erewrite Int64.divu_pow2' by eauto. auto. - exists v; auto. + rewrite H0 in H. destruct (e#r1); inv H. + all: cbn; auto. + { + rewrite mk_amount64_eq by (eapply Int64.is_power2'_range; eauto). + destruct (Int64.eq n Int64.zero); cbn; auto. + erewrite Int64.is_power2'_range by eauto. + erewrite Int64.divu_pow2' by eauto. auto. + } + } + exists v; split; auto. + cbn. + rewrite H. + reflexivity. Qed. Lemma make_andlimm_correct: @@ -679,10 +728,10 @@ Proof. InvApproxRegs; SimplVM; inv H0. apply make_mulimm_correct; auto. - (* divs *) assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divimm_correct; auto. + apply make_divimm_correct; auto. congruence. - (* divu *) assert (e#r2 = Vint n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divuimm_correct; auto. + apply make_divuimm_correct; auto. congruence. - (* and 1 *) rewrite Val.and_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andimm_correct; auto. - (* and 2 *) @@ -745,10 +794,10 @@ Proof. InvApproxRegs; SimplVM; inv H0. apply make_mullimm_correct; auto. - (* divl *) assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divlimm_correct; auto. + apply make_divlimm_correct; auto. congruence. - (* divlu *) assert (e#r2 = Vlong n2). clear H0. InvApproxRegs; SimplVM; auto. - apply make_divluimm_correct; auto. + apply make_divluimm_correct; auto. congruence. - (* andl 1 *) rewrite Val.andl_commut in H0. InvApproxRegs; SimplVM; inv H0. apply make_andlimm_correct; auto. - (* andl 2 *) diff --git a/aarch64/DuplicateOpcodeHeuristic.ml b/aarch64/DuplicateOpcodeHeuristic.ml new file mode 100644 index 00000000..3a3b87fc --- /dev/null +++ b/aarch64/DuplicateOpcodeHeuristic.ml @@ -0,0 +1,41 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open Op +open Integers + +let opcode_heuristic code cond ifso ifnot is_loop_header = + match cond with + | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccompf c | Ccompfs c -> (match c with + | Ceq -> Some false + | Cne -> Some true + | _ -> None + ) + | Cnotcompf c | Cnotcompfs c -> (match c with + | Ceq -> Some true + | Cne -> Some false + | _ -> None + ) + | _ -> None + diff --git a/aarch64/Machregs.v b/aarch64/Machregs.v index b2a2308e..3d27f48f 100644 --- a/aarch64/Machregs.v +++ b/aarch64/Machregs.v @@ -158,6 +158,7 @@ Definition destroyed_by_builtin (ef: external_function): list mreg := match ef with | EF_memcpy sz al => R15 :: R17 :: R29 :: nil | EF_inline_asm txt sg clob => destroyed_by_clobber clob + | EF_profiling _ _ => R15 :: R17 :: nil | _ => nil end. diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml index 5f3c1a8d..41db3bd4 100644 --- a/aarch64/Machregsaux.ml +++ b/aarch64/Machregsaux.ml @@ -14,3 +14,8 @@ let is_scratch_register s = s = "X16" || s = "x16" || s = "X30" || s = "x30" + +let class_of_type = function + | AST.Tint | AST.Tlong -> 0 + | AST.Tfloat | AST.Tsingle -> 1 + | AST.Tany32 | AST.Tany64 -> assert false diff --git a/aarch64/Op.v b/aarch64/Op.v index a7483d56..f2a8e6fb 100644 --- a/aarch64/Op.v +++ b/aarch64/Op.v @@ -386,8 +386,8 @@ Definition eval_operation | Omul, v1 :: v2 :: nil => Some (Val.mul v1 v2) | Omuladd, v1 :: v2 :: v3 :: nil => Some (Val.add v1 (Val.mul v2 v3)) | Omulsub, v1 :: v2 :: v3 :: nil => Some (Val.sub v1 (Val.mul v2 v3)) - | Odiv, v1 :: v2 :: nil => Val.divs v1 v2 - | Odivu, v1 :: v2 :: nil => Val.divu v1 v2 + | Odiv, v1 :: v2 :: nil => Some (Val.maketotal (Val.divs v1 v2)) + | Odivu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divu v1 v2)) | Oand, v1 :: v2 :: nil => Some (Val.and v1 v2) | Oandshift s a, v1 :: v2 :: nil => Some (Val.and v1 (eval_shift s v2 a)) | Oandimm n, v1 :: nil => Some (Val.and v1 (Vint n)) @@ -408,7 +408,7 @@ Definition eval_operation | Oshl, v1 :: v2 :: nil => Some (Val.shl v1 v2) | Oshr, v1 :: v2 :: nil => Some (Val.shr v1 v2) | Oshru, v1 :: v2 :: nil => Some (Val.shru v1 v2) - | Oshrximm n, v1::nil => Val.shrx v1 (Vint n) + | Oshrximm n, v1::nil => Some (Val.maketotal (Val.shrx v1 (Vint n))) | Ozext s, v1 :: nil => Some (Val.zero_ext s v1) | Osext s, v1 :: nil => Some (Val.sign_ext s v1) | Oshlzext s a, v1 :: nil => Some (Val.shl (Val.zero_ext s v1) (Vint a)) @@ -435,8 +435,8 @@ Definition eval_operation | Omullsub, v1 :: v2 :: v3 :: nil => Some (Val.subl v1 (Val.mull v2 v3)) | Omullhs, v1::v2::nil => Some (Val.mullhs v1 v2) | Omullhu, v1::v2::nil => Some (Val.mullhu v1 v2) - | Odivl, v1 :: v2 :: nil => Val.divls v1 v2 - | Odivlu, v1 :: v2 :: nil => Val.divlu v1 v2 + | Odivl, v1 :: v2 :: nil => Some (Val.maketotal (Val.divls v1 v2)) + | Odivlu, v1 :: v2 :: nil => Some (Val.maketotal (Val.divlu v1 v2)) | Oandl, v1 :: v2 :: nil => Some (Val.andl v1 v2) | Oandlshift s a, v1 :: v2 :: nil => Some (Val.andl v1 (eval_shiftl s v2 a)) | Oandlimm n, v1 :: nil => Some (Val.andl v1 (Vlong n)) @@ -457,7 +457,7 @@ Definition eval_operation | Oshll, v1 :: v2 :: nil => Some (Val.shll v1 v2) | Oshrl, v1 :: v2 :: nil => Some (Val.shrl v1 v2) | Oshrlu, v1 :: v2 :: nil => Some (Val.shrlu v1 v2) - | Oshrlximm n, v1::nil => Val.shrxl v1 (Vint n) + | Oshrlximm n, v1::nil => Some (Val.maketotal (Val.shrxl v1 (Vint n))) | Ozextl s, v1 :: nil => Some (Val.zero_ext_l s v1) | Osextl s, v1 :: nil => Some (Val.sign_ext_l s v1) | Oshllzext s a, v1 :: nil => Some (Val.shll (Val.zero_ext_l s v1) (Vint a)) @@ -481,22 +481,22 @@ Definition eval_operation | Osingleoffloat, v1::nil => Some (Val.singleoffloat v1) | Ofloatofsingle, v1::nil => Some (Val.floatofsingle v1) - | Ointoffloat, v1::nil => Val.intoffloat v1 - | Ointuoffloat, v1::nil => Val.intuoffloat v1 - | Ofloatofint, v1::nil => Val.floatofint v1 - | Ofloatofintu, v1::nil => Val.floatofintu v1 - | Ointofsingle, v1::nil => Val.intofsingle v1 - | Ointuofsingle, v1::nil => Val.intuofsingle v1 - | Osingleofint, v1::nil => Val.singleofint v1 - | Osingleofintu, v1::nil => Val.singleofintu v1 - | Olongoffloat, v1::nil => Val.longoffloat v1 - | Olonguoffloat, v1::nil => Val.longuoffloat v1 - | Ofloatoflong, v1::nil => Val.floatoflong v1 - | Ofloatoflongu, v1::nil => Val.floatoflongu v1 - | Olongofsingle, v1::nil => Val.longofsingle v1 - | Olonguofsingle, v1::nil => Val.longuofsingle v1 - | Osingleoflong, v1::nil => Val.singleoflong v1 - | Osingleoflongu, v1::nil => Val.singleoflongu v1 + | Ointoffloat, v1::nil => Some (Val.maketotal (Val.intoffloat v1)) + | Ointuoffloat, v1::nil => Some (Val.maketotal (Val.intuoffloat v1)) + | Ofloatofint, v1::nil => Some (Val.maketotal (Val.floatofint v1)) + | Ofloatofintu, v1::nil => Some (Val.maketotal (Val.floatofintu v1)) + | Ointofsingle, v1::nil => Some (Val.maketotal (Val.intofsingle v1)) + | Ointuofsingle, v1::nil => Some (Val.maketotal (Val.intuofsingle v1)) + | Osingleofint, v1::nil => Some (Val.maketotal (Val.singleofint v1)) + | Osingleofintu, v1::nil => Some (Val.maketotal (Val.singleofintu v1)) + | Olongoffloat, v1::nil => Some (Val.maketotal (Val.longoffloat v1)) + | Olonguoffloat, v1::nil => Some (Val.maketotal (Val.longuoffloat v1)) + | Ofloatoflong, v1::nil => Some (Val.maketotal (Val.floatoflong v1)) + | Ofloatoflongu, v1::nil => Some (Val.maketotal (Val.floatoflongu v1)) + | Olongofsingle, v1::nil => Some (Val.maketotal (Val.longofsingle v1)) + | Olonguofsingle, v1::nil => Some (Val.maketotal (Val.longuofsingle v1)) + | Osingleoflong, v1::nil => Some (Val.maketotal (Val.singleoflong v1)) + | Osingleoflongu, v1::nil => Some (Val.maketotal (Val.singleoflongu v1)) | Ocmp c, _ => Some (Val.of_optbool (eval_condition c vl m)) | Osel c ty, v1::v2::vl => Some(Val.select (eval_condition c vl m) v1 v2 ty) @@ -788,10 +788,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0... destruct v1... - apply type_add. - apply type_sub. - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int.eq i0 Int.zero || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int.eq i0 Int.zero); inv H2... + - destruct v0; destruct v1; cbn in *; trivial. + destruct (_ || _); trivial... + - destruct v0; destruct v1; cbn in *; trivial. + destruct (Int.eq i0 Int.zero); constructor. - destruct v0... destruct v1... - destruct v0... destruct (eval_shift s v1 a)... - destruct v0... @@ -812,7 +812,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)... - - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 31)); inv H0... + - destruct v0; cbn; trivial. + destruct (Int.ltu n (Int.repr 31)); cbn; trivial. - destruct v0... - destruct v0... - destruct (Val.zero_ext s v0)... simpl; rewrite a32_range... @@ -843,10 +844,10 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - apply type_subl. - destruct v0... destruct v1... - destruct v0... destruct v1... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int64.eq i0 Int64.zero || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2... - - destruct v0; destruct v1; simpl in *; inv H0. - destruct (Int64.eq i0 Int64.zero); inv H2... + - destruct v0; destruct v1; cbn; trivial. + destruct (_ || _); cbn; trivial. + - destruct v0; destruct v1; cbn; trivial. + destruct (Int64.eq i0 Int64.zero); cbn; trivial. - destruct v0... destruct v1... - destruct v0... destruct (eval_shiftl s v1 a)... - destruct v0... @@ -867,7 +868,8 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int64.iwordsize')... - - destruct v0; simpl in H0; try discriminate. destruct (Int.ltu n (Int.repr 63)); inv H0... + - destruct v0; cbn; trivial. + destruct (Int.ltu n (Int.repr 63)); cbn; trivial. - destruct v0... - destruct v0... - destruct (Val.zero_ext_l s v0)... simpl; rewrite a64_range... @@ -893,34 +895,60 @@ Proof with (try exact I; try reflexivity; auto using Val.Vptr_has_type). - destruct v0... - destruct v0... (* intoffloat, intuoffloat *) - - destruct v0; simpl in H0; inv H0. destruct (Float.to_int f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float.to_intu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float.to_int f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float.to_intu f); cbn; trivial. (* floatofint, floatofintu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* intofsingle, intuofsingle *) - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_int f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_intu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float32.to_int f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float32.to_intu f); cbn; trivial. (* singleofint, singleofintu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* longoffloat, longuoffloat *) - - destruct v0; simpl in H0; inv H0. destruct (Float.to_long f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float.to_longu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float.to_long f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float.to_longu f); cbn; trivial. (* floatoflong, floatoflongu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* longofsingle, longuofsingle *) - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_long f); inv H2... - - destruct v0; simpl in H0; inv H0. destruct (Float32.to_longu f); inv H2... + - destruct v0; cbn; trivial. destruct (Float32.to_long f); cbn; trivial. + - destruct v0; cbn; trivial. destruct (Float32.to_longu f); cbn; trivial. (* singleoflong, singleoflongu *) - - destruct v0; simpl in H0; inv H0... - - destruct v0; simpl in H0; inv H0... + - destruct v0; cbn; trivial. + - destruct v0; cbn; trivial. (* cmp *) - destruct (eval_condition cond vl m) as [[]|]... - unfold Val.select. destruct (eval_condition cond vl m). apply Val.normalize_type. exact I. Qed. + +Definition is_trapping_op (op : operation) := + match op with + | Omove => false + | _ => false + end. + + +Definition args_of_operation op := + if eq_operation op Omove + then 1%nat + else List.length (fst (type_of_operation op)). + +Lemma is_trapping_op_sound: + forall op vl sp m, + is_trapping_op op = false -> + (List.length vl) = args_of_operation op -> + eval_operation genv sp op vl m <> None. +Proof. + unfold args_of_operation. + destruct op; destruct eq_operation; intros; simpl in *; try congruence. + all: try (destruct vl as [ | vh1 vl1]; try discriminate). + all: try (destruct vl1 as [ | vh2 vl2]; try discriminate). + all: try (destruct vl2 as [ | vh3 vl3]; try discriminate). + all: try (destruct vl3 as [ | vh4 vl4]; try discriminate). +Qed. End SOUNDNESS. (** * Manipulating and transforming operations *) @@ -1174,6 +1202,20 @@ Proof. rewrite (cond_depends_on_memory_correct cond args m1 m2 H). auto. Qed. +Lemma op_valid_pointer_eq: + forall (F V: Type) (ge: Genv.t F V) sp op args m1 m2, + (forall b z, Mem.valid_pointer m1 b z = Mem.valid_pointer m2 b z) -> + eval_operation ge sp op args m1 = eval_operation ge sp op args m2. +Proof. + intros until m2. destruct op eqn:OP; simpl; try congruence. + - intros MEM; destruct cond; simpl; try congruence; + repeat (destruct args; simpl; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. + - intro MEM; destruct cond; simpl; try congruence; + repeat (destruct args; simpl; try congruence); + erewrite cmpu_bool_valid_pointer_eq || erewrite cmplu_bool_valid_pointer_eq; eauto. +Qed. + (** Global variables mentioned in an operation or addressing mode *) Definition globals_addressing (addr: addressing) : list ident := @@ -1374,12 +1416,12 @@ Proof. - apply Val.add_inject; auto. inv H2; inv H3; simpl; auto. - apply Val.sub_inject; auto. inv H2; inv H3; simpl; auto. (* div, divu *) - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int.eq i0 Int.zero - || Int.eq i (Int.repr Int.min_signed) && Int.eq i0 Int.mone); inv H2. - TrivialExists. - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int.eq i0 Int.zero); inv H2. TrivialExists. + - inv H4; inv H2; trivial. cbn. + destruct (_ || _); cbn; + constructor. + - inv H4; inv H2; trivial. cbn. + destruct (Int.eq i0 Int.zero); cbn; + constructor. (* and*) - inv H4; inv H2; simpl; auto. - generalize (eval_shift_inject s a H2); intros J; inv H4; inv J; simpl; auto. @@ -1411,8 +1453,8 @@ Proof. (* shru *) - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto. (* shrx *) - - inv H4; simpl in H1; try discriminate. simpl. - destruct (Int.ltu n (Int.repr 31)); inv H1. TrivialExists. + - inv H4; cbn; trivial. + destruct (Int.ltu n (Int.repr 31)); inv H; cbn; trivial. (* shift-ext *) - inv H4; simpl; auto. - inv H4; simpl; auto. @@ -1447,12 +1489,10 @@ Proof. - inv H4; inv H2; simpl; auto. - inv H4; inv H2; simpl; auto. (* divl, divlu *) - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int64.eq i0 Int64.zero - || Int64.eq i (Int64.repr Int64.min_signed) && Int64.eq i0 Int64.mone); inv H2. - TrivialExists. - - inv H4; inv H3; simpl in H1; inv H1. simpl. - destruct (Int64.eq i0 Int64.zero); inv H2. TrivialExists. + - inv H4; inv H2; cbn; trivial. + destruct (_ || _); cbn; trivial. + - inv H4; inv H2; cbn; trivial. + destruct (Int64.eq i0 Int64.zero); cbn; trivial. (* andl *) - inv H4; inv H2; simpl; auto. - generalize (eval_shiftl_inject s a H2); intros J; inv H4; inv J; simpl; auto. @@ -1484,8 +1524,8 @@ Proof. (* shrlu *) - inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int64.iwordsize'); auto. (* shrlx *) - - inv H4; simpl in H1; try discriminate. simpl. - destruct (Int.ltu n (Int.repr 63)); inv H1. TrivialExists. + - inv H4; cbn; trivial. + destruct (Int.ltu n (Int.repr 63)); inv H; cbn; trivial. (* shift-ext *) - inv H4; simpl; auto. - inv H4; simpl; auto. @@ -1516,37 +1556,29 @@ Proof. - inv H4; simpl; auto. - inv H4; simpl; auto. (* intoffloat, intuoffloat *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_int f0); simpl in H2; inv H2. - exists (Vint i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_intu f0); simpl in H2; inv H2. - exists (Vint i); auto. + - inv H4; cbn; trivial. destruct (Float.to_int f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float.to_intu f0); cbn; trivial. (* floatofint, floatofintu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* intofsingle, intuofsingle *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_int f0); simpl in H2; inv H2. - exists (Vint i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_intu f0); simpl in H2; inv H2. - exists (Vint i); auto. + - inv H4; cbn; trivial. destruct (Float32.to_int f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float32.to_intu f0); cbn; trivial. (* singleofint, singleofintu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* longoffloat, longuoffloat *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_long f0); simpl in H2; inv H2. - exists (Vlong i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float.to_longu f0); simpl in H2; inv H2. - exists (Vlong i); auto. + - inv H4; cbn; trivial. destruct (Float.to_long f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float.to_longu f0); cbn; trivial. (* floatoflong, floatoflongu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* longofsingle, longuofsingle *) - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_long f0); simpl in H2; inv H2. - exists (Vlong i); auto. - - inv H4; simpl in H1; inv H1. simpl. destruct (Float32.to_longu f0); simpl in H2; inv H2. - exists (Vlong i); auto. + - inv H4; cbn; trivial. destruct (Float32.to_long f0); cbn; trivial. + - inv H4; cbn; trivial. destruct (Float32.to_longu f0); cbn; trivial. (* singleoflong, singleoflongu *) - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. - - inv H4; simpl in H1; inv H1. simpl. TrivialExists. + - inv H4; cbn; trivial. + - inv H4; cbn; trivial. (* cmp, sel *) - subst v1. destruct (eval_condition cond vl1 m1) eqn:?. exploit eval_condition_inj; eauto. intros EQ; rewrite EQ. @@ -1576,6 +1608,21 @@ Proof. - apply Val.offset_ptr_inject; auto. Qed. + +Lemma eval_addressing_inj_none: + forall addr sp1 vl1 sp2 vl2, + (forall id ofs, + In id (globals_addressing addr) -> + Val.inject f (Genv.symbol_address ge1 id ofs) (Genv.symbol_address ge2 id ofs)) -> + Val.inject f sp1 sp2 -> + Val.inject_list f vl1 vl2 -> + eval_addressing ge1 sp1 addr vl1 = None -> + eval_addressing ge2 sp2 addr vl2 = None. +Proof. + intros until vl2. intros Hglobal Hinjsp Hinjvl. + destruct addr; simpl in *; + inv Hinjvl; trivial; try discriminate; inv H0; trivial; try discriminate; inv H2; trivial; try discriminate. +Qed. End EVAL_COMPAT. (** Compatibility of the evaluation functions with the ``is less defined'' relation over values. *) @@ -1682,6 +1729,18 @@ Proof. destruct H1 as [v2 [A B]]. exists v2; split; auto. rewrite val_inject_lessdef; auto. Qed. +Lemma eval_addressing_lessdef_none: + forall sp addr vl1 vl2, + Val.lessdef_list vl1 vl2 -> + eval_addressing genv sp addr vl1 = None -> + eval_addressing genv sp addr vl2 = None. +Proof. + intros. rewrite val_inject_list_lessdef in H. + eapply eval_addressing_inj_none with (sp1 := sp). + intros. rewrite <- val_inject_lessdef; auto. + rewrite <- val_inject_lessdef; auto. + eauto. auto. +Qed. End EVAL_LESSDEF. (** Compatibility of the evaluation functions with memory injections. *) @@ -1734,6 +1793,19 @@ Proof. econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. Qed. +Lemma eval_addressing_inject_none: + forall addr vl1 vl2, + Val.inject_list f vl1 vl2 -> + eval_addressing genv (Vptr sp1 Ptrofs.zero) addr vl1 = None -> + eval_addressing genv (Vptr sp2 Ptrofs.zero) (shift_stack_addressing delta addr) vl2 = None. +Proof. + intros. + rewrite eval_shift_stack_addressing. + eapply eval_addressing_inj_none with (sp1 := Vptr sp1 Ptrofs.zero); eauto. + intros. apply symbol_address_inject. + econstructor; eauto. rewrite Ptrofs.add_zero_l; auto. +Qed. + Lemma eval_operation_inject: forall op vl1 vl2 v1 m1 m2, Val.inject_list f vl1 vl2 -> diff --git a/aarch64/OpWeights.ml b/aarch64/OpWeights.ml new file mode 100644 index 00000000..5cdd002c --- /dev/null +++ b/aarch64/OpWeights.ml @@ -0,0 +1,353 @@ +open Op;; +open PrepassSchedulingOracleDeps;; + +module Cortex_A53= + struct + let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *) + let nr_non_pipelined_units = 1;; + + let latency_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ -> 1 + | Oshift _ -> 2 + | Oadd -> 1 + | Oaddshift _ -> 2 + | Oaddimm _ + | Oneg -> 1 + | Onegshift _ -> 2 + | Osub -> 1 + | Osubshift _ -> 2 + | Omul + | Omuladd + | Omulsub -> 4 + | Odiv + | Odivu -> 29 + | Oand -> 1 + | Oandshift _ -> 2 + | Oandimm _ -> 1 + | Oor -> 1 + | Oorshift _ -> 2 + | Oorimm _ -> 1 + | Oxor -> 1 + | Oxorshift _ -> 2 + | Oxorimm _ -> 1 + | Onot -> 1 + | Onotshift _ -> 2 + | Obic -> 1 + | Obicshift _ -> 2 + | Oorn -> 1 + | Oornshift _ -> 2 + | Oeqv -> 1 + | Oeqvshift _ -> 2 + | Oshl + | Oshr + | Oshru -> 2 + | Oshrximm _ -> 6 + | Ozext _ + | Osext _ -> 1 + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ -> 2 + + (* 64-bit integer arithmetic *) + | Oshiftl _ -> 2 + | Oextend _ -> 1 + | Omakelong + | Olowlong + | Ohighlong + | Oaddl -> 1 + | Oaddlshift _ + | Oaddlext _ -> 2 + | Oaddlimm _ + | Onegl -> 1 + | Oneglshift _ -> 2 + | Osubl -> 1 + | Osublshift _ + | Osublext _ -> 2 + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> 4 + | Odivl -> 50 + | Odivlu -> 50 + | Oandl -> 1 + | Oandlshift _ -> 2 + | Oandlimm _ + | Oorl -> 1 + | Oorlshift _ -> 2 + | Oorlimm _ + | Oxorl -> 1 + | Oxorlshift _ -> 2 + | Oxorlimm _ + | Onotl -> 1 + | Onotlshift _ -> 2 + | Obicl -> 1 + | Obiclshift _ -> 2 + | Oornl -> 1 + | Oornlshift _ -> 2 + | Oeqvl -> 1 + | Oeqvlshift _ -> 2 + | Oshll + | Oshrl + | Oshrlu -> 2 + | Oshrlximm _ -> 6 + | Ozextl _ + | Osextl _ -> 1 + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> 2 + + (* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) +(* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) +(* Conversions between int and float *) + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> 6 + | Odivf -> 50 (* r [rd = r1 / r2] *) + | Odivfs -> 20 + (* Boolean tests *) + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> 6 + | _ -> 1);; + + let resources_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ + (* 32-bit integer arithmetic *) + | Oshift _ + | Oadd + | Oaddshift _ + | Oaddimm _ + | Oneg + | Onegshift _ + | Osub + | Osubshift _ -> [| 1 ; 1; 0; 0 |] + | Omul + | Omuladd + | Omulsub -> [| 1; 1; 1; 0 |] + | Odiv + | Odivu -> [| 1; 0; 0; 0 |] + | Oand + | Oandshift _ + | Oandimm _ + | Oor + | Oorshift _ + | Oorimm _ + | Oxor + | Oxorshift _ + | Oxorimm _ + | Onot + | Onotshift _ + | Obic + | Obicshift _ + | Oorn + | Oornshift _ + | Oeqv + | Oeqvshift _ + | Oshl + | Oshr + | Oshru + | Oshrximm _ + | Ozext _ + | Osext _ + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ + +(* 64-bit integer arithmetic *) + | Oshiftl _ + | Oextend _ + | Omakelong + | Olowlong + | Ohighlong + | Oaddl + | Oaddlshift _ + | Oaddlext _ + | Oaddlimm _ + | Onegl + | Oneglshift _ + | Osubl + | Osublshift _ + | Osublext _ -> [| 1 ; 1 ; 0; 0 |] + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> [| 1 ; 1 ; 1; 0 |] + | Odivl + | Odivlu -> [| 1; 0; 0; 0 |] + | Oandl + | Oandlshift _ + | Oandlimm _ + | Oorl + | Oorlshift _ + | Oorlimm _ + | Oxorl + | Oxorlshift _ + | Oxorlimm _ + | Onotl + | Onotlshift _ + | Obicl + | Obiclshift _ + | Oornl + | Oornlshift _ + | Oeqvl + | Oeqvlshift _ + | Oshll + | Oshrl + | Oshrlu + | Oshrlximm _ + | Ozextl _ + | Osextl _ + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> [| 1; 1; 0; 0 |] + (* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) + | Odivf + (* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Odivfs (* r [rd = r1 / r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) +(* Conversions between int and float *) + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> [| 1 ; 1; 1; 0 |] + +(* Boolean tests *) + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let non_pipelined_resources_of_op (op : operation) (nargs : int) = + match op with + | Odiv | Odivu -> [| 29 |] + | Odivfs -> [| 20 |] + | Odivl | Odivlu | Odivf -> [| 50 |] + | _ -> [| -1 |];; + + let resources_of_cond (cmp : condition) (nargs : int) = + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;; + let latency_of_call _ _ = 6;; + + let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_call _ _ = resource_bounds;; + let resources_of_builtin _ = resource_bounds;; + end;; + +let get_opweights () : opweights = + match !Clflags.option_mtune with + | "cortex-a53" | "cortex-a35" | "" -> + { + pipelined_resource_bounds = Cortex_A53.resource_bounds; + nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units; + latency_of_op = Cortex_A53.latency_of_op; + resources_of_op = Cortex_A53.resources_of_op; + non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op; + latency_of_load = Cortex_A53.latency_of_load; + resources_of_load = Cortex_A53.resources_of_load; + resources_of_store = Cortex_A53.resources_of_store; + resources_of_cond = Cortex_A53.resources_of_cond; + latency_of_call = Cortex_A53.latency_of_call; + resources_of_call = Cortex_A53.resources_of_call; + resources_of_builtin = Cortex_A53.resources_of_builtin + } + | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);; diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..2c3eb14f --- /dev/null +++ b/aarch64/PrepassSchedulingOracle.ml @@ -0,0 +1,477 @@ +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/aarch64/PrepassSchedulingOracleDeps.ml b/aarch64/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/aarch64/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/aarch64/SelectLongproof.v b/aarch64/SelectLongproof.v index b051369c..513ee9bd 100644 --- a/aarch64/SelectLongproof.v +++ b/aarch64/SelectLongproof.v @@ -16,6 +16,7 @@ Require Import Coqlib Zbits. Require Import AST Integers Floats Values Memory Globalenvs. Require Import Cminor Op CminorSel. Require Import SelectOp SelectLong SelectOpproof. +Require Import OpHelpers OpHelpersproof. Local Open Scope cminorsel_scope. Local Transparent Archi.ptr64. @@ -23,8 +24,10 @@ Local Transparent Archi.ptr64. (** * Correctness of the smart constructors *) Section CMCONSTR. - -Variable ge: genv. +Variable prog: program. +Variable hf: helper_functions. +Hypothesis HELPERS: helper_functions_declared prog hf. +Let ge := Genv.globalenv prog. Variable sp: val. Variable e: env. Variable m: mem. @@ -556,25 +559,29 @@ Qed. Theorem eval_divls_base: partial_binary_constructor_sound divls_base Val.divls. Proof. red; intros; unfold divls_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modls_base: partial_binary_constructor_sound modls_base Val.modls. Proof. red; intros; unfold modls_base, modl_aux. exploit Val.modls_divls; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_divlu_base: partial_binary_constructor_sound divlu_base Val.divlu. Proof. red; intros; unfold divlu_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modlu_base: partial_binary_constructor_sound modlu_base Val.modlu. Proof. red; intros; unfold modlu_base, modl_aux. exploit Val.modlu_divlu; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_shrxlimm: @@ -589,7 +596,7 @@ Proof. destruct x; simpl in H0; try discriminate. change (Int.ltu Int.zero (Int.repr 63)) with true in H0; inv H0. rewrite Int64.shrx'_zero. auto. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** General shifts *) @@ -723,42 +730,42 @@ Qed. Theorem eval_longoffloat: partial_unary_constructor_sound longoffloat Val.longoffloat. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longuoffloat: partial_unary_constructor_sound longuoffloat Val.longuoffloat. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatoflong: partial_unary_constructor_sound floatoflong Val.floatoflong. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatoflongu: partial_unary_constructor_sound floatoflongu Val.floatoflongu. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longofsingle: partial_unary_constructor_sound longofsingle Val.longofsingle. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_longuofsingle: partial_unary_constructor_sound longuofsingle Val.longuofsingle. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleoflong: partial_unary_constructor_sound singleoflong Val.singleoflong. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleoflongu: partial_unary_constructor_sound singleoflongu Val.singleoflongu. Proof. - red; intros; TrivialExists. + red; intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. End CMCONSTR. diff --git a/aarch64/SelectOp.vp b/aarch64/SelectOp.vp index 5bd96987..67575fdb 100644 --- a/aarch64/SelectOp.vp +++ b/aarch64/SelectOp.vp @@ -56,9 +56,13 @@ Nondetfunction add (e1: expr) (e2: expr) := | t1, Eop (Oshift s a) (t2:::Enil) ?? arith_shift s => Eop (Oaddshift s a) (t1 ::: t2 ::: Enil) | Eop Omul (t1:::t2:::Enil), t3 => - Eop Omuladd (t3:::t1:::t2:::Enil) + if Compopts.optim_madd tt + then Eop Omuladd (t3:::t1:::t2:::Enil) + else Eop Oadd (e1:::e2:::Enil) | t1, Eop Omul (t2:::t3:::Enil) => - Eop Omuladd (t1:::t2:::t3:::Enil) + if Compopts.optim_madd tt + then Eop Omuladd (t1:::t2:::t3:::Enil) + else Eop Oadd (e1:::e2:::Enil) | _, _ => Eop Oadd (e1:::e2:::Enil) end. @@ -547,6 +551,13 @@ Nondetfunction addressing (chunk: memory_chunk) (e: expr) := | _ => (Aindexed Int64.zero, e:::Enil) end. +(* floats *) +Definition divf_base (e1: expr) (e2: expr) := + Eop Odivf (e1 ::: e2 ::: Enil). + +Definition divfs_base (e1: expr) (e2: expr) := + Eop Odivfs (e1 ::: e2 ::: Enil). + (** ** Arguments of builtins *) Nondetfunction builtin_arg (e: expr) := diff --git a/aarch64/SelectOpproof.v b/aarch64/SelectOpproof.v index b78a5ed8..9ce7a8bf 100644 --- a/aarch64/SelectOpproof.v +++ b/aarch64/SelectOpproof.v @@ -16,6 +16,7 @@ Require Import Coqlib Zbits. Require Import AST Integers Floats Values Memory Builtins Globalenvs. Require Import Cminor Op CminorSel. Require Import SelectOp. +Require Import OpHelpers OpHelpersproof. Local Open Scope cminorsel_scope. Local Transparent Archi.ptr64. @@ -74,8 +75,10 @@ Ltac TrivialExists := (** * Correctness of the smart constructors *) Section CMCONSTR. - -Variable ge: genv. +Variable prog: program. +Variable hf: helper_functions. +Hypothesis HELPERS: helper_functions_declared prog hf. +Let ge := Genv.globalenv prog. Variable sp: val. Variable e: env. Variable m: mem. @@ -158,8 +161,10 @@ Proof. - rewrite <- Val.add_assoc. apply eval_addimm. EvalOp. - rewrite Val.add_commut. TrivialExists. - TrivialExists. -- rewrite Val.add_commut. TrivialExists. -- TrivialExists. +- destruct (Compopts.optim_madd tt). + + rewrite Val.add_commut. TrivialExists. + + TrivialExists. +- destruct (Compopts.optim_madd tt); TrivialExists. - TrivialExists. Qed. @@ -661,7 +666,8 @@ Theorem eval_divs_base: Val.divs x y = Some z -> exists v, eval_expr ge sp e m le (divs_base a b) v /\ Val.lessdef z v. Proof. - intros; unfold divs_base; TrivialExists. + intros; unfold divs_base; TrivialExists; cbn. + rewrite H1. reflexivity. Qed. Theorem eval_mods_base: @@ -673,7 +679,8 @@ Theorem eval_mods_base: Proof. intros; unfold mods_base, mod_aux. exploit Val.mods_divs; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + cbn. rewrite A. reflexivity. Qed. Theorem eval_divu_base: @@ -684,6 +691,7 @@ Theorem eval_divu_base: exists v, eval_expr ge sp e m le (divu_base a b) v /\ Val.lessdef z v. Proof. intros; unfold divu_base; TrivialExists. + cbn. rewrite H1. reflexivity. Qed. Theorem eval_modu_base: @@ -695,7 +703,8 @@ Theorem eval_modu_base: Proof. intros; unfold modu_base, mod_aux. exploit Val.modu_divu; eauto. intros (q & A & B). subst z. - TrivialExists. repeat (econstructor; eauto with evalexpr). exact A. + TrivialExists. repeat (econstructor; eauto with evalexpr). + rewrite A. reflexivity. Qed. Theorem eval_shrximm: @@ -710,7 +719,7 @@ Proof. destruct x; simpl in H0; try discriminate. change (Int.ltu Int.zero (Int.repr 31)) with true in H0; inv H0. rewrite Int.shrx_zero by (compute; auto). auto. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** General shifts *) @@ -923,7 +932,7 @@ Theorem eval_intoffloat: Val.intoffloat x = Some y -> exists v, eval_expr ge sp e m le (intoffloat a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatofint: @@ -934,7 +943,7 @@ Theorem eval_floatofint: Proof. intros until y; unfold floatofint. case (floatofint_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intuoffloat: @@ -943,7 +952,7 @@ Theorem eval_intuoffloat: Val.intuoffloat x = Some y -> exists v, eval_expr ge sp e m le (intuoffloat a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_floatofintu: @@ -954,7 +963,7 @@ Theorem eval_floatofintu: Proof. intros until y; unfold floatofintu. case (floatofintu_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intofsingle: @@ -963,7 +972,7 @@ Theorem eval_intofsingle: Val.intofsingle x = Some y -> exists v, eval_expr ge sp e m le (intofsingle a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleofint: @@ -974,7 +983,7 @@ Theorem eval_singleofint: Proof. intros until y; unfold singleofint. case (singleofint_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_intuofsingle: @@ -983,7 +992,7 @@ Theorem eval_intuofsingle: Val.intuofsingle x = Some y -> exists v, eval_expr ge sp e m le (intuofsingle a) v /\ Val.lessdef y v. Proof. - intros; TrivialExists. + intros; TrivialExists. cbn. rewrite H0. reflexivity. Qed. Theorem eval_singleofintu: @@ -994,7 +1003,7 @@ Theorem eval_singleofintu: Proof. intros until y; unfold singleofintu. case (singleofintu_match a); intros; InvEval. - TrivialExists. -- TrivialExists. +- TrivialExists. cbn. rewrite H0. reflexivity. Qed. (** Selection *) @@ -1055,6 +1064,26 @@ Proof. - constructor; auto. Qed. +Theorem eval_divf_base: + forall le a b x y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (divf_base a b) v /\ Val.lessdef (Val.divf x y) v. +Proof. + intros; unfold divf_base. + TrivialExists. +Qed. + +Theorem eval_divfs_base: + forall le a b x y, + eval_expr ge sp e m le a x -> + eval_expr ge sp e m le b y -> + exists v, eval_expr ge sp e m le (divfs_base a b) v /\ Val.lessdef (Val.divfs x y) v. +Proof. + intros; unfold divfs_base. + TrivialExists. +Qed. + (** Platform-specific known builtins *) Theorem eval_platform_builtin: diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml index 78b9eb2a..6ba44cf0 100644 --- a/aarch64/TargetPrinter.ml +++ b/aarch64/TargetPrinter.ml @@ -133,7 +133,9 @@ module Target : TARGET = let name_of_section = function | Section_text -> ".text" - | Section_data i | Section_small_data i -> + | Section_data(i, true) -> + failwith "_Thread_local unsupported on this platform" + | Section_data(i, false) | Section_small_data i -> if i then ".data" else common_section () | Section_const i | Section_small_const i -> if i || (not !Clflags.option_fcommon) then ".section .rodata" else "COMM" @@ -227,6 +229,28 @@ module Target : TARGET = | EOuxtw n -> fprintf oc ", uxtw #%a" coqint n | EOuxtx n -> fprintf oc ", uxtx #%a" coqint n + let next_profiling_label = + let atomic_incr_counter = ref 0 in + fun () -> + let r = sprintf ".Lcompcert_atomic_incr%d" !atomic_incr_counter in + incr atomic_incr_counter; r;; + + let print_profiling_logger oc id kind = + assert (kind >= 0); + assert (kind <= 1); + fprintf oc "%s begin profiling %a %d: atomic increment\n" comment + Profilingaux.pp_id id kind; + let ofs = profiling_offset id kind and lbl = next_profiling_label () in + fprintf oc " adrp x15, %s+%d\n" profiling_counter_table_name ofs; + fprintf oc " add x15, x15, :lo12:(%s+%d)\n" profiling_counter_table_name ofs; + fprintf oc "%s:\n" lbl; + fprintf oc " ldaxr x17, [x15]\n"; + fprintf oc " add x17, x17, 1\n"; + fprintf oc " stlxr w17, x17, [x15]\n"; + fprintf oc " cbnz w17, %s\n" lbl; + fprintf oc "%s end profiling %a %d\n" comment + Profilingaux.pp_id id kind;; + (* Printing of instructions *) let print_instruction oc = function (* Branches *) @@ -525,6 +549,8 @@ module Target : TARGET = fprintf oc "%s begin inline assembly\n\t" comment; print_inline_asm preg_asm oc (camlstring_of_coqstring txt) sg args res; fprintf oc "%s end inline assembly\n" comment + | EF_profiling (id, coq_kind) -> + print_profiling_logger oc id (Z.to_int coq_kind) | _ -> assert false end @@ -581,7 +607,24 @@ module Target : TARGET = section oc Section_text; end + let aarch64_profiling_stub oc nr_items + profiling_id_table_name + profiling_counter_table_name = + fprintf oc " adrp x2, %s\n" profiling_counter_table_name; + fprintf oc " adrp x1, %s\n" profiling_id_table_name; + fprintf oc " add x2, x2, :lo12:%s\n" profiling_counter_table_name; + fprintf oc " add x1, x1, :lo12:%s\n" profiling_id_table_name; + fprintf oc " mov w0, %d\n" nr_items; + fprintf oc " b %s\n" profiling_write_table_helper ;; + + let print_atexit oc to_be_called = + fprintf oc " adrp x0, %s\n" to_be_called; + fprintf oc " add x0, x0, :lo12:%s\n" to_be_called; + fprintf oc " b atexit\n";; + + let print_epilogue oc = + print_profiling_epilogue elf_text_print_fun_info (Init_atexit print_atexit) aarch64_profiling_stub oc; if !Clflags.option_g then begin Debug.compute_gnu_file_enum (fun f -> ignore (print_file oc f)); section oc Section_text; diff --git a/aarch64/ValueAOp.v b/aarch64/ValueAOp.v index e0d98c85..e6a60d4e 100644 --- a/aarch64/ValueAOp.v +++ b/aarch64/ValueAOp.v @@ -96,8 +96,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Omul, v1::v2::nil => mul v1 v2 | Omuladd, v1::v2::v3::nil => add v1 (mul v2 v3) | Omulsub, v1::v2::v3::nil => sub v1 (mul v2 v3) - | Odiv, v1::v2::nil => divs v1 v2 - | Odivu, v1::v2::nil => divu v1 v2 + | Odiv, v1::v2::nil => divs_total v1 v2 + | Odivu, v1::v2::nil => divu_total v1 v2 | Oand, v1::v2::nil => and v1 v2 | Oandshift s a, v1::v2::nil => and v1 (eval_static_shift s v2 a) | Oandimm n, v1::nil => and v1 (I n) @@ -145,8 +145,8 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Omullsub, v1::v2::v3::nil => subl v1 (mull v2 v3) | Omullhs, v1::v2::nil => mullhs v1 v2 | Omullhu, v1::v2::nil => mullhu v1 v2 - | Odivl, v1::v2::nil => divls v1 v2 - | Odivlu, v1::v2::nil => divlu v1 v2 + | Odivl, v1::v2::nil => divls_total v1 v2 + | Odivlu, v1::v2::nil => divlu_total v1 v2 | Oandl, v1::v2::nil => andl v1 v2 | Oandlshift s a, v1::v2::nil => andl v1 (eval_static_shiftl s v2 a) | Oandlimm n, v1::nil => andl v1 (L n) @@ -191,20 +191,20 @@ Definition eval_static_operation (op: operation) (vl: list aval): aval := | Osingleoffloat, v1::nil => singleoffloat v1 | Ofloatofsingle, v1::nil => floatofsingle v1 - | Ointoffloat, v1::nil => intoffloat v1 - | Ointuoffloat, v1::nil => intuoffloat v1 + | Ointoffloat, v1::nil => intoffloat_total v1 + | Ointuoffloat, v1::nil => intuoffloat_total v1 | Ofloatofint, v1::nil => floatofint v1 | Ofloatofintu, v1::nil => floatofintu v1 - | Ointofsingle, v1::nil => intofsingle v1 - | Ointuofsingle, v1::nil => intuofsingle v1 + | Ointofsingle, v1::nil => intofsingle_total v1 + | Ointuofsingle, v1::nil => intuofsingle_total v1 | Osingleofint, v1::nil => singleofint v1 | Osingleofintu, v1::nil => singleofintu v1 - | Olongoffloat, v1::nil => longoffloat v1 - | Olonguoffloat, v1::nil => longuoffloat v1 + | Olongoffloat, v1::nil => longoffloat_total v1 + | Olonguoffloat, v1::nil => longuoffloat_total v1 | Ofloatoflong, v1::nil => floatoflong v1 | Ofloatoflongu, v1::nil => floatoflongu v1 - | Olongofsingle, v1::nil => longofsingle v1 - | Olonguofsingle, v1::nil => longuofsingle v1 + | Olongofsingle, v1::nil => longofsingle_total v1 + | Olonguofsingle, v1::nil => longuofsingle_total v1 | Osingleoflong, v1::nil => singleoflong v1 | Osingleoflongu, v1::nil => singleoflongu v1 |