diff options
author | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2020-04-08 21:45:42 +0200 |
---|---|---|
committer | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2020-04-08 21:45:42 +0200 |
commit | 63915fbebe707cc1de7c0ed5a24148cac45a742c (patch) | |
tree | da503cba224f14281a2ee841930b8843459cb42b | |
parent | f78d61faf3db94ac1704ce0d11291211b5307629 (diff) | |
parent | e326ed9f28a2ed6869f0cb356ef9a8e189cb0a47 (diff) | |
download | compcert-kvx-63915fbebe707cc1de7c0ed5a24148cac45a742c.tar.gz compcert-kvx-63915fbebe707cc1de7c0ed5a24148cac45a742c.zip |
Merge remote-tracking branch 'origin/mppa-work' into mppa-thread
124 files changed, 4766 insertions, 1881 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..1f854fc3 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,240 @@ +stages: + - build + +check-admitted: + stage: build + image: "coqorg/coq" + before_script: + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_x86_64.sh + - make check-admitted + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual + +build_x86_64: + stage: build + image: "coqorg/coq" + before_script: + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_x86_64.sh + - make -j "$NJOBS" + - make -C test all test + - ulimit -s65536 && make -C test/monniaux/yarpgen + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual + +build_ia32: + stage: build + image: "coqorg/coq" + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-multilib + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_ia32.sh + - make -j "$NJOBS" + - make -C test all test + - ulimit -s65536 && make -C test/monniaux/yarpgen BITS=32 TARGET_CC='gcc -m32' + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual + +build_aarch64: + stage: build + image: "coqorg/coq" + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-aarch64-linux-gnu qemu-user + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_aarch64.sh + - make -j "$NJOBS" + - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64' EXECUTE='qemu-aarch64' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64' CCOMPOPTS='-static' TARGET_CFLAGS='-static' + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual + +build_arm: + stage: build + image: "coqorg/coq" + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-arm-linux-gnueabi qemu-user + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_arm.sh + - make -j "$NJOBS" + - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual + + +build_armhf: + stage: build + image: "coqorg/coq" + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-arm-linux-gnueabihf qemu-user + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_armhf.sh + - make -j "$NJOBS" + - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual + +build_ppc: + stage: build + image: "coqorg/coq" + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_ppc.sh + - make -j "$NJOBS" + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual + +build_ppc64: + stage: build + image: "coqorg/coq" + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-powerpc64-linux-gnu + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_ppc64.sh + - make -j "$NJOBS" + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual + +build_rv64: + stage: build + image: "coqorg/coq" + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-riscv64-linux-gnu qemu-user + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_rv64.sh + - make -j "$NJOBS" + - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64' EXECUTE='qemu-riscv64' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64' CCOMPOPTS='-static' TARGET_CFLAGS='-static' + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual + +build_rv32: + stage: build + image: "coqorg/coq" + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-riscv64-linux-gnu qemu-user + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_rv32.sh -no-runtime-lib + - make -j "$NJOBS" + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual + +build_k1c: + stage: build + image: "coqorg/coq" + before_script: + - opam switch 4.07.1+flambda + - eval `opam config env` + - opam install -y menhir + script: + - ./config_k1c.sh -no-runtime-lib + - make -j "$NJOBS" + rules: + - if: '$CI_COMMIT_BRANCH == "mppa-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "mppa-k1c"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual @@ -1,7 +1,41 @@ -Coq development: -- Compatibility with Coq version 8.11.0 (#316) +Release 3.7, 2020-03-31 +======================= + +ISO C conformance: +- Functions declared `extern` then implemented `inline` remain `extern` +- The type of a wide char constant is `wchar_t`, not `int` +- Support vertical tabs and treat them as whitespace +- Define the semantics of `free(NULL)` + +Bug fixing: +- Take sign into account for conversions from 32-bit integers to 64-bit pointers +- PowerPC: more precise determination of small data accesses +- AArch64: when addressing global variables, check for correct alignment +- PowerPC, ARM: double rounding error in int64->float32 conversions + +ABI conformance: +- x86, AArch64: re-normalize values of small integer types returned by + function calls +- PowerPC: `float` arguments passed on stack are passed in 64-bit format +- RISC-V: use the new ELF psABI instead of the old ABI from ISA 2.1 + +Usability and diagnostics: +- Unknown builtin functions trigger a specific error message +- Improved error messages + +Coq formalization: +- Revised modeling of the PowerPC/EREF `isel` instruction +- Weaker `ec_readonly` condition over external calls + (permissions can be dropped on read-only locations) + +Coq and OCaml development: +- Compatibility with Coq version 8.10.1, 8.10.2, 8.11.0 +- Compatibility with OCaml 4.10 and up +- Compatibility with Menhir 20200123 and up +- Coq versions prior to 8.8.0 are no longer supported +- OCaml versions prior to 4.05.0 are no longer supported + - Release 3.6, 2019-09-17 ======================= @@ -86,6 +86,7 @@ BACKEND=\ ValueDomain.v ValueAOp.v ValueAnalysis.v \ ConstpropOp.v Constprop.v ConstpropOpproof.v Constpropproof.v \ CSEdomain.v CombineOp.v CSE.v CombineOpproof.v CSEproof.v \ + CSE2deps.v CSE2depsproof.v \ CSE2.v CSE2proof.v \ NeedDomain.v NeedOp.v Deadcode.v Deadcodeproof.v \ Unusedglob.v Unusedglobproof.v \ @@ -1,3 +1,3 @@ -version=3.6 +version=3.7 buildnr= tag= diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v index 46dd875d..024c9a17 100644 --- a/aarch64/Asmgen.v +++ b/aarch64/Asmgen.v @@ -1061,8 +1061,13 @@ Definition storeptr (src: ireg) (base: iregsp) (ofs: ptrofs) (k: code) := (** Function epilogue *) Definition make_epilogue (f: Mach.function) (k: code) := - loadptr XSP f.(fn_retaddr_ofs) RA - (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k). + (* FIXME + Cannot be used because memcpy destroys X30; + issue being discussed with X. Leroy *) + (* if is_leaf_function f + then Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k + else*) loadptr XSP f.(fn_retaddr_ofs) RA + (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k). (** Translation of a Mach instruction. *) diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v index 88258cd6..6831509f 100644 --- a/aarch64/Asmgenproof.v +++ b/aarch64/Asmgenproof.v @@ -337,7 +337,12 @@ Qed. Remark make_epilogue_label: forall f k, tail_nolabel k (make_epilogue f k). Proof. - unfold make_epilogue; intros. eapply tail_nolabel_trans. apply loadptr_label. TailNoLabel. + unfold make_epilogue; intros. + (* FIXME destruct is_leaf_function. + { TailNoLabel. } *) + eapply tail_nolabel_trans. + apply loadptr_label. + TailNoLabel. Qed. Lemma transl_instr_label: @@ -472,7 +477,8 @@ Inductive match_states: Mach.state -> Asm.state -> Prop := (MEXT: Mem.extends m m') (AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc) (AG: agree ms sp rs) - (DXP: ep = true -> rs#X29 = parent_sp s), + (DXP: ep = true -> rs#X29 = parent_sp s) + (LEAF: is_leaf_function f = true -> rs#RA = parent_ra s), match_states (Mach.State s fb sp c ms m) (Asm.State rs m') | match_states_call: @@ -503,16 +509,17 @@ Lemma exec_straight_steps: exists rs2, exec_straight tge tf c rs1 m1' k rs2 m2' /\ agree ms2 sp rs2 - /\ (it1_is_parent ep i = true -> rs2#X29 = parent_sp s)) -> + /\ (it1_is_parent ep i = true -> rs2#X29 = parent_sp s) + /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) -> exists st', plus step tge (State rs1 m1') E0 st' /\ match_states (Mach.State s fb sp c ms2 m2) st'. Proof. intros. inversion H2. subst. monadInv H7. - exploit H3; eauto. intros [rs2 [A [B C]]]. + exploit H3; eauto. intros [rs2 [A [B [C D]]]]. exists (State rs2 m2'); split. - eapply exec_straight_exec; eauto. - econstructor; eauto. eapply exec_straight_at; eauto. + - eapply exec_straight_exec; eauto. + - econstructor; eauto. eapply exec_straight_at; eauto. Qed. Lemma exec_straight_steps_goto: @@ -527,13 +534,14 @@ Lemma exec_straight_steps_goto: exists jmp, exists k', exists rs2, exec_straight tge tf c rs1 m1' (jmp :: k') rs2 m2' /\ agree ms2 sp rs2 - /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') -> + /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2' + /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) -> exists st', plus step tge (State rs1 m1') E0 st' /\ match_states (Mach.State s fb sp c' ms2 m2) st'. Proof. intros. inversion H3. subst. monadInv H9. - exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]]. + exploit H5; eauto. intros [jmp [k' [rs2 [A [B [C D]]]]]]. generalize (functions_transl _ _ _ H7 H8); intro FN. generalize (transf_function_no_overflow _ _ H8); intro NOOV. exploit exec_straight_steps_2; eauto. @@ -550,6 +558,7 @@ Proof. econstructor; eauto. apply agree_exten with rs2; auto with asmgen. congruence. + rewrite OTH by congruence; auto. Qed. Lemma exec_straight_opt_steps_goto: @@ -564,13 +573,14 @@ Lemma exec_straight_opt_steps_goto: exists jmp, exists k', exists rs2, exec_straight_opt tge tf c rs1 m1' (jmp :: k') rs2 m2' /\ agree ms2 sp rs2 - /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') -> + /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2' + /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) -> exists st', plus step tge (State rs1 m1') E0 st' /\ match_states (Mach.State s fb sp c' ms2 m2) st'. Proof. intros. inversion H3. subst. monadInv H9. - exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]]. + exploit H5; eauto. intros [jmp [k' [rs2 [A [B [C D]]]]]]. generalize (functions_transl _ _ _ H7 H8); intro FN. generalize (transf_function_no_overflow _ _ H8); intro NOOV. inv A. @@ -583,6 +593,7 @@ Proof. econstructor; eauto. apply agree_exten with rs2; auto with asmgen. congruence. + rewrite OTH by congruence; auto. - exploit exec_straight_steps_2; eauto. intros [ofs' [PC2 CT2]]. exploit find_label_goto_label; eauto. @@ -597,6 +608,7 @@ Proof. econstructor; eauto. apply agree_exten with rs2; auto with asmgen. congruence. + rewrite OTH by congruence; auto. Qed. (** We need to show that, in the simulation diagram, we cannot @@ -629,7 +641,7 @@ Qed. Theorem step_simulation: forall S1 t S2, Mach.step return_address_offset ge S1 t S2 -> - forall S1' (MS: match_states S1 S1'), + forall S1' (MS: match_states S1 S1') (WF: wf_state ge S1), (exists S2', plus step tge S1' t S2' /\ match_states S2 S2') \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat. Proof. @@ -638,17 +650,20 @@ Proof. - (* Mlabel *) left; eapply exec_straight_steps; eauto; intros. monadInv TR. econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split. apply agree_nextinstr; auto. simpl; congruence. + split. { apply agree_nextinstr; auto. } + split. { simpl; congruence. } + rewrite nextinstr_inv by congruence; assumption. - (* Mgetstack *) unfold load_stack in H. exploit Mem.loadv_extends; eauto. intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A. left; eapply exec_straight_steps; eauto. intros. simpl in TR. - exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q R]]]. + exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q [R S]]]]. exists rs'; split. eauto. - split. eapply agree_set_mreg; eauto with asmgen. congruence. - simpl; congruence. + split. { eapply agree_set_mreg; eauto with asmgen. congruence. } + split. { simpl; congruence. } + rewrite S. assumption. - (* Msetstack *) unfold store_stack in H. @@ -656,10 +671,12 @@ Proof. exploit Mem.storev_extends; eauto. intros [m2' [A B]]. left; eapply exec_straight_steps; eauto. rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR. - exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]]. + exploit storeind_correct; eauto with asmgen. intros [rs' [P [Q R]]]. exists rs'; split. eauto. split. eapply agree_undef_regs; eauto with asmgen. - simpl; intros. rewrite Q; auto with asmgen. + simpl; intros. + split. rewrite Q; auto with asmgen. + rewrite R. assumption. - (* Mgetparam *) assert (f0 = f) by congruence; subst f0. @@ -675,39 +692,45 @@ Opaque loadind. (* X30 contains parent *) exploit loadind_correct. eexact EQ. instantiate (2 := rs0). simpl; rewrite DXP; eauto. simpl; congruence. - intros [rs1 [P [Q R]]]. + intros [rs1 [P [Q [R S]]]]. exists rs1; split. eauto. split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen. - simpl; intros. rewrite R; auto with asmgen. - apply preg_of_not_X29; auto. + simpl; split; intros. + { rewrite R; auto with asmgen. + apply preg_of_not_X29; auto. + } + { rewrite S; auto. } + (* X30 does not contain parent *) exploit loadptr_correct. eexact A. simpl; congruence. intros [rs1 [P [Q R]]]. exploit loadind_correct. eexact EQ. instantiate (2 := rs1). simpl; rewrite Q. eauto. simpl; congruence. - intros [rs2 [S [T U]]]. + intros [rs2 [S [T [U V]]]]. exists rs2; split. eapply exec_straight_trans; eauto. split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto. instantiate (1 := rs1#X29 <- (rs2#X29)). intros. rewrite Pregmap.gso; auto with asmgen. congruence. intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen. - simpl; intros. rewrite U; auto with asmgen. + split; simpl; intros. rewrite U; auto with asmgen. apply preg_of_not_X29; auto. - + rewrite V. rewrite R by congruence. auto. + - (* Mop *) assert (eval_operation tge sp op (map rs args) m = Some v). { rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. } exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eexact H0. intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A. left; eapply exec_straight_steps; eauto; intros. simpl in TR. - exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. + exploit transl_op_correct; eauto. intros [rs2 [P [Q [R S]]]]. exists rs2; split. eauto. split. apply agree_set_undef_mreg with rs0; auto. apply Val.lessdef_trans with v'; auto. - simpl; intros. InvBooleans. + split; simpl; intros. InvBooleans. rewrite R; auto. apply preg_of_not_X29; auto. Local Transparent destroyed_by_op. destruct op; try exact I; simpl; congruence. - + rewrite S. + auto. - (* Mload *) destruct trap. { @@ -717,10 +740,11 @@ Local Transparent destroyed_by_op. intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A. exploit Mem.loadv_extends; eauto. intros [v' [C D]]. left; eapply exec_straight_steps; eauto; intros. simpl in TR. - exploit transl_load_correct; eauto. intros [rs2 [P [Q R]]]. + exploit transl_load_correct; eauto. intros [rs2 [P [Q [R S]]]]. exists rs2; split. eauto. split. eapply agree_set_undef_mreg; eauto. congruence. - simpl; congruence. + split. simpl; congruence. + rewrite S. assumption. } (* Mload notrap1 *) @@ -740,10 +764,11 @@ Local Transparent destroyed_by_op. assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto). exploit Mem.storev_extends; eauto. intros [m2' [C D]]. left; eapply exec_straight_steps; eauto. - intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]]. + intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P [Q R]]]. exists rs2; split. eauto. split. eapply agree_undef_regs; eauto with asmgen. - simpl; congruence. + split. simpl; congruence. + rewrite R. assumption. - (* Mcall *) assert (f0 = f) by congruence. subst f0. @@ -852,6 +877,18 @@ Local Transparent destroyed_by_op. eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2; auto. congruence. + Simpl. + rewrite set_res_other by trivial. + rewrite undef_regs_other. + assumption. + intro. + rewrite in_map_iff. + intros (x0 & PREG & IN). + subst r'. + intro. + apply (preg_of_not_RA x0). + congruence. + - (* Mgoto *) assert (f0 = f) by congruence. subst f0. inv AT. monadInv H4. @@ -865,25 +902,33 @@ Local Transparent destroyed_by_op. eapply agree_exten; eauto with asmgen. congruence. + rewrite INV by congruence. + assumption. + - (* Mcond true *) assert (f0 = f) by congruence. subst f0. exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC. left; eapply exec_straight_opt_steps_goto; eauto. intros. simpl in TR. - exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C). + exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C & D). exists jmp; exists k; exists rs'. split. eexact A. split. apply agree_exten with rs0; auto with asmgen. - exact B. + split. + exact B. + rewrite D. exact LEAF. - (* Mcond false *) exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC. left; eapply exec_straight_steps; eauto. intros. simpl in TR. - exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C). + exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C & D). econstructor; split. eapply exec_straight_opt_right. eexact A. apply exec_straight_one. eexact B. auto. split. apply agree_exten with rs0; auto. intros. Simpl. + split. simpl; congruence. + Simpl. rewrite D. + exact LEAF. - (* Mjumptable *) assert (f0 = f) by congruence. subst f0. @@ -905,6 +950,10 @@ Local Transparent destroyed_by_op. simpl. intros. rewrite C; auto with asmgen. Simpl. congruence. + rewrite C by congruence. + repeat rewrite Pregmap.gso by congruence. + assumption. + - (* Mreturn *) assert (f0 = f) by congruence. subst f0. inversion AT; subst. simpl in H6; monadInv H6. @@ -947,7 +996,7 @@ Local Transparent destroyed_by_op. simpl preg_of_iregsp. change (rs2 X30) with (rs0 X30). rewrite ATLR. change (rs2 X2) with sp. eexact P. simpl; congruence. congruence. - intros (rs3 & U & V). + intros (rs3 & U & V & W). assert (EXEC_PROLOGUE: exec_straight tge tf tf.(fn_code) rs0 m' @@ -974,6 +1023,10 @@ Local Transparent destroyed_at_function_entry. simpl. unfold sp; congruence. intros. rewrite V by auto with asmgen. reflexivity. + rewrite W. + unfold rs2. + Simpl. + - (* external function *) exploit functions_translated; eauto. intros [tf [A B]]. simpl in B. inv B. @@ -993,6 +1046,10 @@ Local Transparent destroyed_at_function_entry. simpl. right. split. omega. split. auto. rewrite <- ATPC in H5. econstructor; eauto. congruence. + inv WF. + inv STACK. + inv H1. + congruence. Qed. Lemma transf_initial_states: @@ -1028,11 +1085,17 @@ Qed. Theorem transf_program_correct: forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog). Proof. - eapply forward_simulation_star with (measure := measure). - apply senv_preserved. - eexact transf_initial_states. - eexact transf_final_states. - exact step_simulation. + eapply forward_simulation_star with (measure := measure) + (match_states := fun S1 S2 => match_states S1 S2 /\ wf_state ge S1). + - apply senv_preserved. + - simpl; intros. exploit transf_initial_states; eauto. + intros (s2 & A & B). + exists s2; intuition auto. apply wf_initial; auto. + - simpl; intros. destruct H as [MS WF]. eapply transf_final_states; eauto. + - simpl; intros. destruct H0 as [MS WF]. + exploit step_simulation; eauto. intros [ (s2' & A & B) | (A & B & C) ]. + + left; exists s2'; intuition auto. eapply wf_step; eauto. + + right; intuition auto. eapply wf_step; eauto. Qed. End PRESERVATION. diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v index 6f296f56..0e36bd05 100644 --- a/aarch64/Asmgenproof1.v +++ b/aarch64/Asmgenproof1.v @@ -22,6 +22,51 @@ Local Transparent Archi.ptr64. (** Properties of registers *) +Lemma preg_of_not_RA: + forall r, (preg_of r) <> RA. +Proof. + destruct r; discriminate. +Qed. + +Lemma RA_not_written: + forall (rs : regset) dst v, + rs # (preg_of dst) <- v RA = rs RA. +Proof. + intros. + apply Pregmap.gso. + intro. + symmetry in H. + exact (preg_of_not_RA dst H). +Qed. + +Hint Resolve RA_not_written : asmgen. + +Lemma RA_not_written2: + forall (rs : regset) dst v i, + preg_of dst = i -> + rs # i <- v RA = rs RA. +Proof. + intros. + subst i. + apply RA_not_written. +Qed. + +Hint Resolve RA_not_written2 : asmgen. + +Lemma RA_not_written3: + forall (rs : regset) dst v i, + ireg_of dst = OK i -> + rs # i <- v RA = rs RA. +Proof. + intros. + unfold ireg_of in H. + destruct preg_of eqn:PREG; try discriminate. + replace i0 with i in * by congruence. + eapply RA_not_written2; eassumption. +Qed. + +Hint Resolve RA_not_written3 : asmgen. + Lemma preg_of_iregsp_not_PC: forall r, preg_of_iregsp r <> PC. Proof. destruct r; simpl; congruence. @@ -39,6 +84,26 @@ Proof. red; intros; subst x. elim (preg_of_not_X16 r); auto. Qed. +Lemma ireg_of_not_RA: forall r x, ireg_of r = OK x -> x <> RA. +Proof. + unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H. + red; intros; subst x. elim (preg_of_not_RA r); auto. +Qed. + +Lemma ireg_of_not_RA': forall r x, ireg_of r = OK x -> RA <> x. +Proof. + intros. intro. + apply (ireg_of_not_RA r x); auto. +Qed. + +Lemma ireg_of_not_RA'': forall r x, ireg_of r = OK x -> IR RA <> IR x. +Proof. + intros. intro. + apply (ireg_of_not_RA' r x); auto. congruence. +Qed. + +Hint Resolve ireg_of_not_RA ireg_of_not_RA' ireg_of_not_RA'' : asmgen. + Lemma ireg_of_not_X16': forall r x, ireg_of r = OK x -> IR x <> IR X16. Proof. intros. apply ireg_of_not_X16 in H. congruence. @@ -205,42 +270,49 @@ Qed. Lemma exec_loadimm_k_w: forall (rd: ireg) k m l, wf_decomposition l -> + rd <> RA -> forall (rs: regset) accu, rs#rd = Vint (Int.repr accu) -> exists rs', exec_straight_opt ge fn (loadimm_k W rd l k) rs m k rs' m /\ rs'#rd = Vint (Int.repr (recompose_int accu l)) - /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. + /\ (forall r, r <> PC -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. - induction 1; intros rs accu ACCU; simpl. + induction 1; intros RD_NOT_RA rs accu ACCU; simpl. - exists rs; split. apply exec_straight_opt_refl. auto. -- destruct (IHwf_decomposition +- destruct (IHwf_decomposition RD_NOT_RA (nextinstr (rs#rd <- (insert_in_int rs#rd n p 16))) (Zinsert accu n p 16)) - as (rs' & P & Q & R). + as (rs' & P & Q & R & S). Simpl. rewrite ACCU. simpl. f_equal. apply Int.eqm_samerepr. apply Zinsert_eqmod. auto. omega. apply Int.eqm_sym; apply Int.eqm_unsigned_repr. exists rs'; split. eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P. - split. exact Q. intros; Simpl. rewrite R by auto. Simpl. + split. exact Q. + split. + { intros; Simpl. + rewrite R by auto. Simpl. } + { rewrite S. Simpl. } Qed. Lemma exec_loadimm_z_w: forall rd l k rs m, wf_decomposition l -> + rd <> RA -> exists rs', exec_straight ge fn (loadimm_z W rd l k) rs m k rs' m /\ rs'#rd = Vint (Int.repr (recompose_int 0 l)) /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadimm_z; destruct 1. + unfold loadimm_z; destruct 1; intro RD_NOT_RA. - econstructor; split. apply exec_straight_one. simpl; eauto. auto. split. Simpl. intros; Simpl. - set (accu0 := Zinsert 0 n p 16). set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))). - destruct (exec_loadimm_k_w rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto. + destruct (exec_loadimm_k_w rd k m l H1 RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R & S); auto. unfold rs1; Simpl. exists rs2; split. eapply exec_straight_opt_step; eauto. @@ -253,12 +325,13 @@ Qed. Lemma exec_loadimm_n_w: forall rd l k rs m, wf_decomposition l -> + rd <> RA -> exists rs', exec_straight ge fn (loadimm_n W rd l k) rs m k rs' m /\ rs'#rd = Vint (Int.repr (Z.lnot (recompose_int 0 l))) /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadimm_n; destruct 1. + unfold loadimm_n; destruct 1; intro RD_NOT_RA. - econstructor; split. apply exec_straight_one. simpl; eauto. auto. split. Simpl. @@ -267,7 +340,8 @@ Proof. set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))). destruct (exec_loadimm_k_w rd k m (negate_decomposition l) (negate_decomposition_wf l H1) - rs1 accu0) as (rs2 & P & Q & R). + RD_NOT_RA rs1 accu0) + as (rs2 & P & Q & R & S). unfold rs1; Simpl. exists rs2; split. eapply exec_straight_opt_step; eauto. @@ -279,7 +353,8 @@ Proof. Qed. Lemma exec_loadimm32: - forall rd n k rs m, + forall rd n k rs m + (RD_NOT_RA : rd <> RA), exists rs', exec_straight ge fn (loadimm32 rd n k) rs m k rs' m /\ rs'#rd = Vint n @@ -302,13 +377,14 @@ Proof. apply Int.eqm_samerepr. apply decompose_notint_eqmod. apply Int.repr_unsigned. } destruct Nat.leb. -+ rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; omega. -+ rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; omega. ++ rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; omega. trivial. ++ rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; omega. trivial. Qed. Lemma exec_loadimm_k_x: forall (rd: ireg) k m l, - wf_decomposition l -> + wf_decomposition l -> + rd <> RA -> forall (rs: regset) accu, rs#rd = Vlong (Int64.repr accu) -> exists rs', @@ -316,9 +392,9 @@ Lemma exec_loadimm_k_x: /\ rs'#rd = Vlong (Int64.repr (recompose_int accu l)) /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - induction 1; intros rs accu ACCU; simpl. + induction 1; intros RD_NOT_RA rs accu ACCU; simpl. - exists rs; split. apply exec_straight_opt_refl. auto. -- destruct (IHwf_decomposition +- destruct (IHwf_decomposition RD_NOT_RA (nextinstr (rs#rd <- (insert_in_long rs#rd n p 16))) (Zinsert accu n p 16)) as (rs' & P & Q & R). @@ -332,19 +408,20 @@ Qed. Lemma exec_loadimm_z_x: forall rd l k rs m, wf_decomposition l -> + rd <> RA -> exists rs', exec_straight ge fn (loadimm_z X rd l k) rs m k rs' m /\ rs'#rd = Vlong (Int64.repr (recompose_int 0 l)) /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadimm_z; destruct 1. + unfold loadimm_z; destruct 1; intro RD_NOT_RA. - econstructor; split. apply exec_straight_one. simpl; eauto. auto. split. Simpl. intros; Simpl. - set (accu0 := Zinsert 0 n p 16). set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))). - destruct (exec_loadimm_k_x rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto. + destruct (exec_loadimm_k_x rd k m l H1 RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R); auto. unfold rs1; Simpl. exists rs2; split. eapply exec_straight_opt_step; eauto. @@ -357,12 +434,13 @@ Qed. Lemma exec_loadimm_n_x: forall rd l k rs m, wf_decomposition l -> + rd <> RA -> exists rs', exec_straight ge fn (loadimm_n X rd l k) rs m k rs' m /\ rs'#rd = Vlong (Int64.repr (Z.lnot (recompose_int 0 l))) /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadimm_n; destruct 1. + unfold loadimm_n; destruct 1; intro RD_NOT_RA. - econstructor; split. apply exec_straight_one. simpl; eauto. auto. split. Simpl. @@ -371,7 +449,7 @@ Proof. set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))). destruct (exec_loadimm_k_x rd k m (negate_decomposition l) (negate_decomposition_wf l H1) - rs1 accu0) as (rs2 & P & Q & R). + RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R). unfold rs1; Simpl. exists rs2; split. eapply exec_straight_opt_step; eauto. @@ -384,12 +462,13 @@ Qed. Lemma exec_loadimm64: forall rd n k rs m, + rd <> RA -> exists rs', exec_straight ge fn (loadimm64 rd n k) rs m k rs' m /\ rs'#rd = Vlong n /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r. Proof. - unfold loadimm64, loadimm; intros. + unfold loadimm64, loadimm; intros until m; intro RD_NOT_RA. destruct (is_logical_imm64 n). - econstructor; split. apply exec_straight_one. simpl; eauto. auto. @@ -406,8 +485,8 @@ Proof. apply Int64.eqm_samerepr. apply decompose_notint_eqmod. apply Int64.repr_unsigned. } destruct Nat.leb. -+ rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; omega. -+ rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; omega. ++ rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; omega. trivial. ++ rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; omega. trivial. Qed. (** Add immediate *) @@ -419,55 +498,59 @@ Lemma exec_addimm_aux_32: Next (nextinstr (rs#rd <- (sem rs#r1 (Vint (Int.repr n))))) m) -> (forall v n1 n2, sem (sem v (Vint n1)) (Vint n2) = sem v (Vint (Int.add n1 n2))) -> forall rd r1 n k rs m, + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (addimm_aux insn rd r1 (Int.unsigned n) k) rs m k rs' m /\ rs'#rd = sem rs#r1 (Vint n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. - intros insn sem SEM ASSOC; intros. unfold addimm_aux. + intros insn sem SEM ASSOC; intros until m; intro RD_NOT_RA. unfold addimm_aux. set (nlo := Zzero_ext 12 (Int.unsigned n)). set (nhi := Int.unsigned n - nlo). assert (E: Int.unsigned n = nhi + nlo) by (unfold nhi; omega). rewrite <- (Int.repr_unsigned n). destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)]. - econstructor; split. apply exec_straight_one. apply SEM. Simpl. split. Simpl. do 3 f_equal; omega. - intros; Simpl. + split; intros; Simpl. - econstructor; split. apply exec_straight_one. apply SEM. Simpl. split. Simpl. do 3 f_equal; omega. - intros; Simpl. + split; intros; Simpl. - econstructor; split. eapply exec_straight_two. apply SEM. apply SEM. Simpl. Simpl. split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int.eqm_samerepr. rewrite E. auto with ints. - intros; Simpl. + split; intros; Simpl. Qed. Lemma exec_addimm32: forall rd r1 n k rs m, r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (addimm32 rd r1 n k) rs m k rs' m /\ rs'#rd = Val.add rs#r1 (Vint n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros. unfold addimm32. set (nn := Int.neg n). destruct (Int.eq n (Int.zero_ext 24 n)); [| destruct (Int.eq nn (Int.zero_ext 24 nn))]. -- apply exec_addimm_aux_32 with (sem := Val.add). auto. intros; apply Val.add_assoc. +- apply exec_addimm_aux_32 with (sem := Val.add); auto. intros; apply Val.add_assoc. - rewrite <- Val.sub_opp_add. - apply exec_addimm_aux_32 with (sem := Val.sub). auto. + apply exec_addimm_aux_32 with (sem := Val.sub); auto. intros. rewrite ! Val.sub_add_opp, Val.add_assoc. rewrite Int.neg_add_distr. auto. - destruct (Int.lt n Int.zero). + rewrite <- Val.sub_opp_add; fold nn. - edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C). + edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto. split. Simpl. rewrite B, C; eauto with asmgen. - intros; Simpl. -+ edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). + split; intros; Simpl. ++ edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto. split. Simpl. rewrite B, C; eauto with asmgen. - intros; Simpl. + split; intros; Simpl. Qed. Lemma exec_addimm_aux_64: @@ -477,10 +560,12 @@ Lemma exec_addimm_aux_64: Next (nextinstr (rs#rd <- (sem rs#r1 (Vlong (Int64.repr n))))) m) -> (forall v n1 n2, sem (sem v (Vlong n1)) (Vlong n2) = sem v (Vlong (Int64.add n1 n2))) -> forall rd r1 n k rs m, + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (addimm_aux insn rd r1 (Int64.unsigned n) k) rs m k rs' m /\ rs'#rd = sem rs#r1 (Vlong n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros insn sem SEM ASSOC; intros. unfold addimm_aux. set (nlo := Zzero_ext 12 (Int64.unsigned n)). set (nhi := Int64.unsigned n - nlo). @@ -489,44 +574,46 @@ Proof. destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)]. - econstructor; split. apply exec_straight_one. apply SEM. Simpl. split. Simpl. do 3 f_equal; omega. - intros; Simpl. + split; intros; Simpl. - econstructor; split. apply exec_straight_one. apply SEM. Simpl. split. Simpl. do 3 f_equal; omega. - intros; Simpl. + split; intros; Simpl. - econstructor; split. eapply exec_straight_two. apply SEM. apply SEM. Simpl. Simpl. split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int64.eqm_samerepr. rewrite E. auto with ints. - intros; Simpl. + split; intros; Simpl. Qed. Lemma exec_addimm64: forall rd r1 n k rs m, preg_of_iregsp r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (addimm64 rd r1 n k) rs m k rs' m /\ rs'#rd = Val.addl rs#r1 (Vlong n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros. unfold addimm64. set (nn := Int64.neg n). destruct (Int64.eq n (Int64.zero_ext 24 n)); [| destruct (Int64.eq nn (Int64.zero_ext 24 nn))]. -- apply exec_addimm_aux_64 with (sem := Val.addl). auto. intros; apply Val.addl_assoc. +- apply exec_addimm_aux_64 with (sem := Val.addl); auto. intros; apply Val.addl_assoc. - rewrite <- Val.subl_opp_addl. - apply exec_addimm_aux_64 with (sem := Val.subl). auto. + apply exec_addimm_aux_64 with (sem := Val.subl); auto. intros. rewrite ! Val.subl_addl_opp, Val.addl_assoc. rewrite Int64.neg_add_distr. auto. - destruct (Int64.lt n Int64.zero). + rewrite <- Val.subl_opp_addl; fold nn. - edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C). + edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto. - intros; Simpl. -+ edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). + split; intros; Simpl. ++ edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl. split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto. - intros; Simpl. + split; intros; Simpl. Qed. (** Logical immediate *) @@ -543,22 +630,25 @@ Lemma exec_logicalimm32: Next (nextinstr (rs#rd <- (sem rs##r1 (eval_shift_op_int rs#r2 s)))) m) -> forall rd r1 n k rs m, r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (logicalimm32 insn1 insn2 rd r1 n k) rs m k rs' m /\ rs'#rd = sem rs#r1 (Vint n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros until sem; intros SEM1 SEM2; intros. unfold logicalimm32. destruct (is_logical_imm32 n). - econstructor; split. apply exec_straight_one. apply SEM1. reflexivity. - split. Simpl. rewrite Int.repr_unsigned; auto. intros; Simpl. -- edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). + split. Simpl. rewrite Int.repr_unsigned; auto. + split; intros; Simpl. +- edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. apply SEM2. reflexivity. split. Simpl. f_equal; auto. apply C; auto with asmgen. - intros; Simpl. + split; intros; Simpl. Qed. Lemma exec_logicalimm64: @@ -573,50 +663,58 @@ Lemma exec_logicalimm64: Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) -> forall rd r1 n k rs m, r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (logicalimm64 insn1 insn2 rd r1 n k) rs m k rs' m /\ rs'#rd = sem rs#r1 (Vlong n) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros until sem; intros SEM1 SEM2; intros. unfold logicalimm64. destruct (is_logical_imm64 n). - econstructor; split. apply exec_straight_one. apply SEM1. reflexivity. - split. Simpl. rewrite Int64.repr_unsigned. auto. intros; Simpl. -- edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). + split. Simpl. rewrite Int64.repr_unsigned. auto. + split; intros; Simpl. +- edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). congruence. econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. apply SEM2. reflexivity. split. Simpl. f_equal; auto. apply C; auto with asmgen. - intros; Simpl. + split; intros; Simpl. Qed. (** Load address of symbol *) Lemma exec_loadsymbol: forall rd s ofs k rs m, - rd <> X16 \/ Archi.pic_code tt = false -> + rd <> X16 \/ Archi.pic_code tt = false -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (loadsymbol rd s ofs k) rs m k rs' m /\ rs'#rd = Genv.symbol_address ge s ofs - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs'#RA = rs#RA. Proof. unfold loadsymbol; intros. destruct (Archi.pic_code tt). - predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero. + subst ofs. econstructor; split. apply exec_straight_one; [simpl; eauto | reflexivity]. - split. Simpl. intros; Simpl. + split. Simpl. split; intros; Simpl. + + exploit exec_addimm64. instantiate (1 := rd). simpl. destruct H; congruence. - intros (rs1 & A & B & C). + instantiate (1 := rd). assumption. + intros (rs1 & A & B & C & D). econstructor; split. econstructor. simpl; eauto. auto. eexact A. split. simpl in B; rewrite B. Simpl. rewrite <- Genv.shift_symbol_address_64 by auto. rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto. - intros. rewrite C by auto. Simpl. + split; intros. rewrite C by auto; Simpl. + rewrite D. Simpl. - econstructor; split. eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. split. Simpl. rewrite symbol_high_low; auto. - intros; Simpl. + split; intros; Simpl. Qed. (** Shifted operands *) @@ -725,23 +823,25 @@ Lemma exec_arith_extended: Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) -> forall (rd r1 r2: ireg) (ex: extension) (a: amount64) (k: code) rs m, r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (arith_extended insnX insnS rd r1 r2 ex a k) rs m k rs' m /\ rs'#rd = sem rs#r1 (Op.eval_extend ex rs#r2 a) - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros sem insnX insnS EX ES; intros. unfold arith_extended. destruct (Int.ltu a (Int.repr 5)). - econstructor; split. apply exec_straight_one. rewrite EX; eauto. auto. split. Simpl. f_equal. destruct ex; auto. - intros; Simpl. + split; intros; Simpl. - exploit (exec_move_extended_base X16 r2 ex). intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. rewrite ES. eauto. auto. split. Simpl. unfold ir0x. rewrite C by eauto with asmgen. f_equal. rewrite B. destruct ex; auto. - intros; Simpl. + split; intros; Simpl. Qed. (** Extended right shift *) @@ -749,15 +849,18 @@ Qed. Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m, Val.shrx rs#r1 (Vint n) = Some v -> r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m /\ rs'#rd = v - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. unfold shrx32; intros. apply Val.shrx_shr_3 in H. destruct (Int.eq n Int.zero) eqn:E. - econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. - split. Simpl. subst v; auto. intros; Simpl. + split. Simpl. subst v; auto. + split; intros; Simpl. - generalize (Int.eq_spec n Int.one). destruct (Int.eq n Int.one); intro ONE. * subst n. @@ -769,27 +872,31 @@ Proof. change (Int.ltu Int.one Int.iwordsize) with true; simpl. rewrite Int.or_zero_l. reflexivity. - ** intros; Simpl. + ** split; intros; Simpl. * econstructor; split. eapply exec_straight_three. unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto. simpl; eauto. unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto. auto. auto. auto. - split. subst v; Simpl. intros; Simpl. + split. subst v; Simpl. + split; intros; Simpl. Qed. Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m, Val.shrxl rs#r1 (Vint n) = Some v -> r1 <> X16 -> + (IR RA) <> (preg_of_iregsp (RR1 rd)) -> exists rs', exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m /\ rs'#rd = v - /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. unfold shrx64; intros. apply Val.shrxl_shrl_3 in H. destruct (Int.eq n Int.zero) eqn:E. - econstructor; split. apply exec_straight_one; [simpl;eauto|auto]. - split. Simpl. subst v; auto. intros; Simpl. + split. Simpl. subst v; auto. + split; intros; Simpl. - generalize (Int.eq_spec n Int.one). destruct (Int.eq n Int.one); intro ONE. * subst n. @@ -801,13 +908,14 @@ Proof. change (Int.ltu Int.one Int64.iwordsize') with true; simpl. rewrite Int64.or_zero_l. reflexivity. - ** intros; Simpl. + ** split; intros; Simpl. * econstructor; split. eapply exec_straight_three. unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto. simpl; eauto. unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto. auto. auto. auto. - split. subst v; Simpl. intros; Simpl. + split. subst v; Simpl. + split; intros; Simpl. Qed. (** Condition bits *) @@ -1063,6 +1171,56 @@ Ltac ArgsInv := | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in * end). +Lemma compare_int_RA: + forall rs a b m, + compare_int rs a b m X30 = rs X30. +Proof. + unfold compare_int. + intros. + repeat rewrite Pregmap.gso by congruence. + trivial. +Qed. + +Hint Resolve compare_int_RA : asmgen. + +Lemma compare_long_RA: + forall rs a b m, + compare_long rs a b m X30 = rs X30. +Proof. + unfold compare_long. + intros. + repeat rewrite Pregmap.gso by congruence. + trivial. +Qed. + +Hint Resolve compare_long_RA : asmgen. + +Lemma compare_float_RA: + forall rs a b, + compare_float rs a b X30 = rs X30. +Proof. + unfold compare_float. + intros. + destruct a; destruct b. + all: repeat rewrite Pregmap.gso by congruence; trivial. +Qed. + +Hint Resolve compare_float_RA : asmgen. + + +Lemma compare_single_RA: + forall rs a b, + compare_single rs a b X30 = rs X30. +Proof. + unfold compare_single. + intros. + destruct a; destruct b. + all: repeat rewrite Pregmap.gso by congruence; trivial. +Qed. + +Hint Resolve compare_single_RA : asmgen. + + Lemma transl_cond_correct: forall cond args k c rs m, transl_cond cond args k = OK c -> @@ -1071,185 +1229,218 @@ Lemma transl_cond_correct: /\ (forall b, eval_condition cond (map rs (map preg_of args)) m = Some b -> eval_testcond (cond_for_cond cond) rs' = Some b) - /\ forall r, data_preg r = true -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros until m; intros TR. destruct cond; simpl in TR; ArgsInv. - (* Ccomp *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. apply eval_testcond_compare_sint; auto. + repeat split; intros. apply eval_testcond_compare_sint; auto. destruct r; reflexivity || discriminate. - (* Ccompu *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. apply eval_testcond_compare_uint; auto. + repeat split; intros. apply eval_testcond_compare_uint; auto. destruct r; reflexivity || discriminate. - (* Ccompimm *) destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))]. + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto. + repeat split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto. destruct r; reflexivity || discriminate. + econstructor; split. apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto. - split; intros. apply eval_testcond_compare_sint; auto. + repeat split; intros. apply eval_testcond_compare_sint; auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply eval_testcond_compare_sint; auto. - transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + repeat split; intros. apply eval_testcond_compare_sint; auto. + transitivity (rs' r). destruct r; reflexivity || discriminate. + auto with asmgen. + Simpl. rewrite compare_int_RA. + apply C; congruence. - (* Ccompuimm *) destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))]. + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto. + repeat split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto. destruct r; reflexivity || discriminate. + econstructor; split. apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto. - split; intros. apply eval_testcond_compare_uint; auto. + repeat split; intros. apply eval_testcond_compare_uint; auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply eval_testcond_compare_uint; auto. + repeat split; intros. apply eval_testcond_compare_uint; auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_int_RA. + apply C; congruence. - (* Ccompshift *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto. + repeat split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto. destruct r; reflexivity || discriminate. - (* Ccompushift *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto. + repeat split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto. destruct r; reflexivity || discriminate. - (* Cmaskzero *) destruct (is_logical_imm32 n). + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto. + repeat split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply (eval_testcond_compare_sint Ceq); auto. + repeat split; intros. apply (eval_testcond_compare_sint Ceq); auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_int_RA. + apply C; congruence. + - (* Cmasknotzero *) destruct (is_logical_imm32 n). + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto. + repeat split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C). + ++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply (eval_testcond_compare_sint Cne); auto. + repeat split; intros. apply (eval_testcond_compare_sint Cne); auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_int_RA. + apply C; congruence. + - (* Ccompl *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. apply eval_testcond_compare_slong; auto. + repeat split; intros. apply eval_testcond_compare_slong; auto. destruct r; reflexivity || discriminate. - (* Ccomplu *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. apply eval_testcond_compare_ulong; auto. + repeat split; intros. apply eval_testcond_compare_ulong; auto. destruct r; reflexivity || discriminate. - (* Ccomplimm *) destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))]. + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto. + repeat split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto. destruct r; reflexivity || discriminate. + econstructor; split. apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto. - split; intros. apply eval_testcond_compare_slong; auto. + repeat split; intros. apply eval_testcond_compare_slong; auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply eval_testcond_compare_slong; auto. + repeat split; intros. apply eval_testcond_compare_slong; auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_long_RA. + apply C; congruence. + - (* Ccompluimm *) destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))]. + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto. + repeat split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto. destruct r; reflexivity || discriminate. + econstructor; split. apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto. - split; intros. apply eval_testcond_compare_ulong; auto. + repeat split; intros. apply eval_testcond_compare_ulong; auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply eval_testcond_compare_ulong; auto. + repeat split; intros. apply eval_testcond_compare_ulong; auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_long_RA. + apply C; congruence. + - (* Ccomplshift *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto. + repeat split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto. destruct r; reflexivity || discriminate. - (* Ccomplushift *) econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto. + repeat split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto. destruct r; reflexivity || discriminate. - (* Cmasklzero *) destruct (is_logical_imm64 n). + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto. + repeat split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply (eval_testcond_compare_slong Ceq); auto. + repeat split; intros. apply (eval_testcond_compare_slong Ceq); auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_long_RA. + apply C; congruence. + - (* Cmasknotzero *) destruct (is_logical_imm64 n). + econstructor; split. apply exec_straight_one. simpl; eauto. auto. - split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto. + repeat split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto. destruct r; reflexivity || discriminate. -+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto. - split; intros. apply (eval_testcond_compare_slong Cne); auto. + repeat split; intros. apply (eval_testcond_compare_slong Cne); auto. transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen. + Simpl. rewrite compare_long_RA. + apply C; congruence. + - (* Ccompf *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_float_inv; auto. - split; intros. apply eval_testcond_compare_float; auto. + repeat split; intros. apply eval_testcond_compare_float; auto. destruct r; discriminate || rewrite compare_float_inv; auto. + Simpl. - (* Cnotcompf *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_float_inv; auto. - split; intros. apply eval_testcond_compare_not_float; auto. + repeat split; intros. apply eval_testcond_compare_not_float; auto. destruct r; discriminate || rewrite compare_float_inv; auto. + Simpl. - (* Ccompfzero *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_float_inv; auto. - split; intros. apply eval_testcond_compare_float; auto. + repeat split; intros. apply eval_testcond_compare_float; auto. destruct r; discriminate || rewrite compare_float_inv; auto. + Simpl. - (* Cnotcompfzero *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_float_inv; auto. - split; intros. apply eval_testcond_compare_not_float; auto. + repeat split; intros. apply eval_testcond_compare_not_float; auto. destruct r; discriminate || rewrite compare_float_inv; auto. + Simpl. - (* Ccompfs *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_single_inv; auto. - split; intros. apply eval_testcond_compare_single; auto. + repeat split; intros. apply eval_testcond_compare_single; auto. destruct r; discriminate || rewrite compare_single_inv; auto. + Simpl. - (* Cnotcompfs *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_single_inv; auto. - split; intros. apply eval_testcond_compare_not_single; auto. + repeat split; intros. apply eval_testcond_compare_not_single; auto. destruct r; discriminate || rewrite compare_single_inv; auto. + Simpl. - (* Ccompfszero *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_single_inv; auto. - split; intros. apply eval_testcond_compare_single; auto. + repeat split; intros. apply eval_testcond_compare_single; auto. destruct r; discriminate || rewrite compare_single_inv; auto. + Simpl. - (* Cnotcompfszero *) econstructor; split. apply exec_straight_one. simpl; eauto. rewrite compare_single_inv; auto. - split; intros. apply eval_testcond_compare_not_single; auto. + repeat split; intros. apply eval_testcond_compare_not_single; auto. destruct r; discriminate || rewrite compare_single_inv; auto. + Simpl. Qed. (** Translation of conditional branches *) @@ -1262,7 +1453,8 @@ Lemma transl_cond_branch_correct: exec_straight_opt ge fn c rs m (insn :: k) rs' m /\ exec_instr ge fn insn rs' m = (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m) - /\ forall r, data_preg r = true -> rs'#r = rs#r. + /\ (forall r, data_preg r = true -> rs'#r = rs#r) + /\ rs' # RA = rs # RA. Proof. intros until b; intros TR EV. assert (DFL: @@ -1271,13 +1463,14 @@ Proof. exec_straight_opt ge fn c rs m (insn :: k) rs' m /\ exec_instr ge fn insn rs' m = (if b then goto_label fn lbl rs' m else Next (nextinstr rs') m) - /\ forall r, data_preg r = true -> rs'#r = rs#r). + /\ (forall r, data_preg r = true -> rs'#r = rs#r) + /\ rs' # RA = rs # RA ). { unfold transl_cond_branch_default; intros. - exploit transl_cond_correct; eauto. intros (rs' & A & B & C). + exploit transl_cond_correct; eauto. intros (rs' & A & B & C & D). exists rs', (Pbc (cond_for_cond cond) lbl); split. apply exec_straight_opt_intro. eexact A. - split; auto. simpl. rewrite (B b) by auto. auto. + repeat split; auto. simpl. rewrite (B b) by auto. auto. } Local Opaque transl_cond transl_cond_branch_default. destruct args as [ | a1 args]; simpl in TR; auto. @@ -1371,13 +1564,15 @@ Ltac TranslOpSimpl := [ apply exec_straight_one; [simpl; eauto | reflexivity] | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; apply Val.lessdef_same; Simpl; fail - | intros; Simpl; fail ] ]. + | split; [ intros; Simpl; fail + | intros; Simpl; eauto with asmgen; fail] ]]. Ltac TranslOpBase := econstructor; split; [ apply exec_straight_one; [simpl; eauto | reflexivity] | split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; Simpl - | intros; Simpl; fail ] ]. + | split; [ intros; Simpl; fail + | intros; Simpl; eapply RA_not_written2; eauto] ]]. Lemma transl_op_correct: forall op args res k (rs: regset) m v c, @@ -1386,21 +1581,29 @@ Lemma transl_op_correct: exists rs', exec_straight ge fn c rs m k rs' m /\ Val.lessdef v rs'#(preg_of res) - /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r. + /\ (forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r) + /\ rs' RA = rs RA. Proof. Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize. intros until c; intros TR EV. unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl. - (* move *) destruct (preg_of res) eqn:RR; try discriminate; destruct (preg_of m0) eqn:R1; inv TR. -+ TranslOpSimpl. -+ TranslOpSimpl. + all: TranslOpSimpl. - (* intconst *) - exploit exec_loadimm32. intros (rs' & A & B & C). - exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen. + exploit exec_loadimm32. apply (ireg_of_not_RA res); eassumption. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. + split. intros; auto with asmgen. + apply C. congruence. + eapply ireg_of_not_RA''; eauto. - (* longconst *) - exploit exec_loadimm64. intros (rs' & A & B & C). - exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen. + exploit exec_loadimm64. apply (ireg_of_not_RA res); eassumption. + intros (rs' & A & B & C). + exists rs'; split. eexact A. split. rewrite B; auto. + split. intros; auto with asmgen. + apply C. congruence. + eapply ireg_of_not_RA''; eauto. - (* floatconst *) destruct (Float.eq_dec n Float.zero). + subst n. TranslOpSimpl. @@ -1410,11 +1613,15 @@ Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize. + subst n. TranslOpSimpl. + TranslOpSimpl. - (* loadsymbol *) - exploit (exec_loadsymbol x id ofs). eauto with asmgen. intros (rs' & A & B & C). - exists rs'; split. eexact A. split. rewrite B; auto. auto. + exploit (exec_loadsymbol x id ofs). eauto with asmgen. + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + exists rs'; split. eexact A. split. rewrite B; auto. + split; auto. - (* addrstack *) exploit (exec_addimm64 x XSP (Ptrofs.to_int64 ofs)). simpl; eauto with asmgen. - intros (rs' & A & B & C). + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. simpl in B; rewrite B. Local Transparent Val.addl. destruct (rs SP); simpl; auto. rewrite Ptrofs.of_int64_to_int64 by auto. auto. @@ -1422,7 +1629,8 @@ Local Transparent Val.addl. - (* shift *) rewrite <- transl_eval_shift'. TranslOpSimpl. - (* addimm *) - exploit (exec_addimm32 x x0 n). eauto with asmgen. intros (rs' & A & B & C). + exploit (exec_addimm32 x x0 n). eauto with asmgen. eapply ireg_of_not_RA''; eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. rewrite B; auto. auto. - (* mul *) TranslOpBase. @@ -1430,18 +1638,20 @@ Local Transparent Val.add. destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int.add_zero_l; auto. - (* andimm *) exploit (exec_logicalimm32 (Pandimm W) (Pand W)). - intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). - exists rs'; split. eexact A. split. rewrite B; auto. auto. + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + exists rs'; split. eexact A. split. rewrite B; auto. + split; auto. - (* orimm *) exploit (exec_logicalimm32 (Porrimm W) (Porr W)). - intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). - exists rs'; split. eexact A. split. rewrite B; auto. auto. + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + exists rs'; split. eexact A. split. rewrite B; auto. + split; auto. - (* xorimm *) exploit (exec_logicalimm32 (Peorimm W) (Peor W)). - intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). + intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. rewrite B; auto. auto. - (* not *) TranslOpBase. @@ -1450,8 +1660,10 @@ Local Transparent Val.add. TranslOpBase. destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto. - (* shrx *) - exploit (exec_shrx32 x x0 n); eauto with asmgen. intros (rs' & A & B & C). - econstructor; split. eexact A. split. rewrite B; auto. auto. + exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + econstructor; split. eexact A. split. rewrite B; auto. + split; auto. - (* zero-ext *) TranslOpBase. destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto. @@ -1475,36 +1687,47 @@ Local Transparent Val.add. - (* extend *) exploit (exec_move_extended x0 x1 x a k). intros (rs' & A & B & C). econstructor; split. eexact A. - split. rewrite B; auto. eauto with asmgen. + split. rewrite B; auto. + split; eauto with asmgen. - (* addext *) exploit (exec_arith_extended Val.addl Paddext (Padd X)). - auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C). - econstructor; split. eexact A. split. rewrite B; auto. auto. + auto. auto. instantiate (1 := x1). eauto with asmgen. + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + econstructor; split. eexact A. split. rewrite B; auto. + split; auto. - (* addlimm *) exploit (exec_addimm64 x x0 n). simpl. generalize (ireg_of_not_X16 _ _ EQ1). congruence. - intros (rs' & A & B & C). + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. simpl in B; rewrite B; auto. auto. - (* subext *) exploit (exec_arith_extended Val.subl Psubext (Psub X)). - auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C). - econstructor; split. eexact A. split. rewrite B; auto. auto. + auto. auto. instantiate (1 := x1). eauto with asmgen. + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). + econstructor; split. eexact A. split. rewrite B; auto. + split; auto. - (* mull *) TranslOpBase. destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int64.add_zero_l; auto. - (* andlimm *) exploit (exec_logicalimm64 (Pandimm X) (Pand X)). intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. rewrite B; auto. auto. - (* orlimm *) exploit (exec_logicalimm64 (Porrimm X) (Porr X)). intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. rewrite B; auto. auto. - (* xorlimm *) exploit (exec_logicalimm64 (Peorimm X) (Peor X)). intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). + apply (ireg_of_not_RA'' res); eassumption. + intros (rs' & A & B & C & D). exists rs'; split. eexact A. split. rewrite B; auto. auto. - (* notl *) TranslOpBase. @@ -1513,7 +1736,8 @@ Local Transparent Val.add. TranslOpBase. destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto. - (* shrx *) - exploit (exec_shrx64 x x0 n); eauto with asmgen. intros (rs' & A & B & C). + exploit (exec_shrx64 x x0 n); eauto with asmgen. + apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D ). econstructor; split. eexact A. split. rewrite B; auto. auto. - (* zero-ext-l *) TranslOpBase. @@ -1534,35 +1758,37 @@ Local Transparent Val.add. TranslOpBase. destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.sign_ext_shr'_min; auto using a64_range. - (* condition *) - exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. rewrite (B b) by auto. auto. auto. - intros; Simpl. + split; intros; Simpl. - (* select *) destruct (preg_of res) eqn:RES; monadInv TR. + (* integer *) generalize (ireg_of_eq _ _ EQ) (ireg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2. - exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize. rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen. auto. - intros; Simpl. + split; intros; Simpl. + rewrite <- D. + eapply RA_not_written2; eassumption. + (* FP *) generalize (freg_of_eq _ _ EQ) (freg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2. - exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C). + exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D). econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *. rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize. rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen. auto. - intros; Simpl. + split; intros; Simpl. Qed. (** Translation of addressing modes, loads, stores *) @@ -1574,7 +1800,8 @@ Lemma transl_addressing_correct: exists ad rs', exec_straight_opt ge fn c rs m (insn ad :: k) rs' m /\ Asm.eval_addressing ge ad rs' = Vptr b o - /\ forall r, data_preg r = true -> rs' r = rs r. + /\ (forall r, data_preg r = true -> rs' r = rs r) + /\ rs' # RA = rs # RA. Proof. intros until o; intros TR EV. unfold transl_addressing in TR; destruct addr; ArgsInv; SimplEval EV. @@ -1582,10 +1809,10 @@ Proof. destruct (offset_representable sz ofs); inv EQ0. + econstructor; econstructor; split. apply exec_straight_opt_refl. auto. -+ exploit (exec_loadimm64 X16 ofs). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 ofs). congruence. intros (rs' & A & B & C). econstructor; exists rs'; split. apply exec_straight_opt_intro; eexact A. split. simpl. rewrite B, C by eauto with asmgen. auto. - eauto with asmgen. + split; eauto with asmgen. - (* Aindexed2 *) econstructor; econstructor; split. apply exec_straight_opt_refl. auto. @@ -1601,33 +1828,38 @@ Proof. + econstructor; econstructor; split. apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto. split. simpl. Simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero. auto. - intros; Simpl. + split; intros; Simpl. - (* Aindexed2ext *) destruct (Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz)); inv EQ2. + econstructor; econstructor; split. apply exec_straight_opt_refl. split; auto. destruct x; auto. + exploit (exec_arith_extended Val.addl Paddext (Padd X)); auto. instantiate (1 := x0). eauto with asmgen. - intros (rs' & A & B & C). + instantiate (1 := X16). simpl. congruence. + intros (rs' & A & B & C & D). econstructor; exists rs'; split. apply exec_straight_opt_intro. eexact A. split. simpl. rewrite B. rewrite Val.addl_assoc. f_equal. unfold Op.eval_extend; destruct x, (rs x1); simpl; auto; rewrite ! a64_range; simpl; rewrite Int64.add_zero; auto. - intros. apply C; eauto with asmgen. + split; intros. + apply C; eauto with asmgen. + trivial. - (* Aglobal *) destruct (Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz); inv TR. + econstructor; econstructor; split. apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto. split. simpl. Simpl. rewrite symbol_high_low. simpl in EV. congruence. - intros; Simpl. -+ exploit (exec_loadsymbol X16 id ofs). auto. intros (rs' & A & B & C). + split; intros; Simpl. ++ exploit (exec_loadsymbol X16 id ofs). auto. + simpl. congruence. + intros (rs' & A & B & C & D). econstructor; exists rs'; split. apply exec_straight_opt_intro. eexact A. split. simpl. rewrite B. rewrite <- Genv.shift_symbol_address_64, Ptrofs.add_zero by auto. simpl in EV. congruence. - auto with asmgen. + split; auto with asmgen. - (* Ainstrack *) assert (E: Val.addl (rs SP) (Vlong (Ptrofs.to_int64 ofs)) = Vptr b o). { simpl in EV. inv EV. destruct (rs SP); simpl in H1; inv H1. simpl. @@ -1635,7 +1867,9 @@ Proof. destruct (offset_representable sz (Ptrofs.to_int64 ofs)); inv TR. + econstructor; econstructor; split. apply exec_straight_opt_refl. auto. -+ exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)). intros (rs' & A & B & C). ++ exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)). + simpl. congruence. + intros (rs' & A & B & C). econstructor; exists rs'; split. apply exec_straight_opt_intro. eexact A. split. simpl. rewrite B, C by eauto with asmgen. auto. @@ -1650,7 +1884,8 @@ Lemma transl_load_correct: exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of dst) = v - /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r. + /\ (forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r) + /\ rs' # RA = rs # RA. Proof. intros. destruct vaddr; try discriminate. assert (A: exists sz insn, @@ -1663,14 +1898,17 @@ Proof. do 2 econstructor; (split; [eassumption|auto]). } destruct A as (sz & insn & B & C). - exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R). + exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R & S). assert (X: exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m = Next (nextinstr (rs'#(preg_of dst) <- v)) m). { unfold exec_load. rewrite Q, H1. auto. } econstructor; split. eapply exec_straight_opt_right. eexact P. apply exec_straight_one. rewrite C, X; eauto. Simpl. - split. Simpl. intros; Simpl. + split. Simpl. + split; intros; Simpl. + rewrite <- S. + apply RA_not_written. Qed. Lemma transl_store_correct: @@ -1680,7 +1918,8 @@ Lemma transl_store_correct: Mem.storev chunk m vaddr rs#(preg_of src) = Some m' -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, data_preg r = true -> rs' r = rs r. + /\ (forall r, data_preg r = true -> rs' r = rs r) + /\ rs' # RA = rs # RA. Proof. intros. destruct vaddr; try discriminate. set (chunk' := match chunk with Mint8signed => Mint8unsigned @@ -1696,7 +1935,7 @@ Proof. do 2 econstructor; (split; [eassumption|auto]). } destruct A as (sz & insn & B & C). - exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R). + exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R & S). assert (X: Mem.storev chunk' m (Vptr b i) rs#(preg_of src) = Some m'). { rewrite <- H1. unfold chunk'. destruct chunk; auto; simpl; symmetry. apply Mem.store_signed_unsigned_8. @@ -1707,7 +1946,7 @@ Proof. econstructor; split. eapply exec_straight_opt_right. eexact P. apply exec_straight_one. rewrite C, Y; eauto. Simpl. - intros; Simpl. + split; intros; Simpl. Qed. (** Translation of indexed memory accesses *) @@ -1725,7 +1964,9 @@ Proof. { destruct (rs base); try discriminate. simpl in *. rewrite Ptrofs.of_int64_to_int64 by auto. auto. } destruct offset_representable. - econstructor; econstructor; split. apply exec_straight_opt_refl. auto. -- exploit (exec_loadimm64 X16); eauto. intros (rs' & A & B & C). +- exploit (exec_loadimm64 X16); eauto. + simpl. congruence. + intros (rs' & A & B & C). econstructor; econstructor; split. apply exec_straight_opt_intro; eexact A. split. simpl. rewrite B, C by eauto with asmgen. auto. auto. Qed. @@ -1736,7 +1977,7 @@ Lemma loadptr_correct: forall (base: iregsp) ofs dst k m v (rs: regset), exists rs', exec_straight ge fn (loadptr base ofs dst k) rs m k rs' m /\ rs'#dst = v - /\ forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r. + /\ (forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r). Proof. intros. destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. @@ -1744,7 +1985,8 @@ Proof. econstructor; split. eapply exec_straight_opt_right. eexact A. apply exec_straight_one. simpl. unfold exec_load. rewrite B, H. eauto. auto. - split. Simpl. intros; Simpl. + split. Simpl. + intros; Simpl. Qed. Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset), @@ -1753,7 +1995,8 @@ Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset src <> X16 -> exists rs', exec_straight ge fn (storeptr src base ofs k) rs m k rs' m' - /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r. + /\ (forall r, r <> PC -> r <> X16 -> rs' r = rs r) + /\ rs' RA = rs RA. Proof. intros. destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. @@ -1761,7 +2004,7 @@ Proof. econstructor; split. eapply exec_straight_opt_right. eexact A. apply exec_straight_one. simpl. unfold exec_store. rewrite B, C, H by eauto with asmgen. eauto. auto. - intros; Simpl. + split; intros; Simpl. Qed. Lemma loadind_correct: forall (base: iregsp) ofs ty dst k c (rs: regset) m v, @@ -1771,7 +2014,8 @@ Lemma loadind_correct: forall (base: iregsp) ofs ty dst k c (rs: regset) m v, exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of dst) = v - /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r. + /\ (forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r) + /\ rs' RA = rs RA. Proof. intros. destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. @@ -1787,7 +2031,10 @@ Proof. econstructor; split. eapply exec_straight_opt_right. eexact A. apply exec_straight_one. rewrite SEM. unfold exec_load. rewrite B, H0. eauto. Simpl. - split. Simpl. intros; Simpl. + split. Simpl. + split. intros; Simpl. + Simpl. rewrite RA_not_written. + apply C; congruence. Qed. Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m', @@ -1796,7 +2043,8 @@ Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m', preg_of_iregsp base <> IR X16 -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, data_preg r = true -> rs' r = rs r. + /\ (forall r, data_preg r = true -> rs' r = rs r) + /\ rs' RA = rs RA. Proof. intros. destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate. @@ -1814,13 +2062,15 @@ Proof. apply exec_straight_one. rewrite SEM. unfold exec_store. rewrite B, C, H0 by eauto with asmgen. eauto. Simpl. - intros; Simpl. + split. intros; Simpl. + Simpl. Qed. Lemma make_epilogue_correct: forall ge0 f m stk soff cs m' ms rs k tm, + (is_leaf_function f = true -> rs # (IR RA) = parent_ra cs) -> load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) -> - load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) -> + ((* FIXME is_leaf_function f = false -> *) load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs)) -> Mem.free m stk 0 f.(fn_stacksize) = Some m' -> agree ms (Vptr stk soff) rs -> Mem.extends m tm -> @@ -1831,18 +2081,46 @@ Lemma make_epilogue_correct: /\ Mem.extends m' tm' /\ rs'#RA = parent_ra cs /\ rs'#SP = parent_sp cs - /\ (forall r, r <> PC -> r <> SP -> r <> X30 -> r <> X16 -> rs'#r = rs#r). + /\ (forall r, r <> PC -> r <> SP -> r <> RA -> r <> X16 -> rs'#r = rs#r). Proof. - intros until tm; intros LP LRA FREE AG MEXT MCS. + intros until tm; intros LEAF_RA LP LRA FREE AG MEXT MCS. + + (* FIXME + Cannot be used at this point + destruct (is_leaf_function f) eqn:IS_LEAF. + { + exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP'). + exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'. + exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT'). + unfold make_epilogue. + rewrite IS_LEAF. + + econstructor; econstructor; split. + apply exec_straight_one. simpl. + rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. + rewrite FREE'. eauto. auto. + split. apply agree_nextinstr. apply agree_set_other; auto. + apply agree_change_sp with (Vptr stk soff). + apply agree_exten with rs; auto. + eapply parent_sp_def; eauto. + split. auto. + split. Simpl. + split. Simpl. + intros. Simpl. + } + lapply LRA. 2: reflexivity. + clear LRA. intro LRA. *) exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP'). exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA'). exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'. exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'. exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT'). - unfold make_epilogue. + unfold make_epilogue. + (* FIXME rewrite IS_LEAF. *) exploit (loadptr_correct XSP (fn_retaddr_ofs f)). instantiate (2 := rs). simpl. rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. simpl; congruence. intros (rs1 & A1 & B1 & C1). + econstructor; econstructor; split. eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl. simpl; rewrite (C1 SP) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'. @@ -1857,4 +2135,4 @@ Proof. intros. Simpl. Qed. -End CONSTRUCTORS.
\ No newline at end of file +End CONSTRUCTORS. diff --git a/aarch64/CSE2deps.v b/aarch64/CSE2deps.v new file mode 100644 index 00000000..90b514a2 --- /dev/null +++ b/aarch64/CSE2deps.v @@ -0,0 +1,20 @@ +Require Import BoolEqual Coqlib. +Require Import AST Integers Floats. +Require Import Values Memory Globalenvs Events. +Require Import Op. + + +Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw := + (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk)) + && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk)) + && ((ofsw + size_chunk chunkw <=? ofsr) || + (ofsr + size_chunk chunkr <=? ofsw)). + +Definition may_overlap chunk addr args chunk' addr' args' := + match addr, addr', args, args' with + | (Aindexed ofs), (Aindexed ofs'), + (base :: nil), (base' :: nil) => + if peq base base' + then negb (can_swap_accesses_ofs (Int64.unsigned ofs') chunk' (Int64.unsigned ofs) chunk) + else true | _, _, _, _ => true + end. diff --git a/aarch64/CSE2depsproof.v b/aarch64/CSE2depsproof.v new file mode 100644 index 00000000..4aac23af --- /dev/null +++ b/aarch64/CSE2depsproof.v @@ -0,0 +1,128 @@ +Require Import Coqlib Maps Errors Integers Floats Lattice Kildall. +Require Import AST Linking. +Require Import Memory Registers Op RTL Maps. + +Require Import Globalenvs Values. +Require Import Linking Values Memory Globalenvs Events Smallstep. +Require Import Registers Op RTL. +Require Import CSE2 CSE2deps. +Require Import Lia. + +Lemma ptrofs_size : + Ptrofs.wordsize = 64%nat. +Proof. + unfold Ptrofs.wordsize. + unfold Wordsize_Ptrofs.wordsize. + trivial. +Qed. + +Lemma ptrofs_modulus : + Ptrofs.modulus = 18446744073709551616. +Proof. + reflexivity. +Qed. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Section MEMORY_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + Variable base : val. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : int64. + Hypothesis ADDRW : eval_addressing genv sp + (Aindexed ofsw) (base :: nil) = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aindexed ofsr) (base :: nil) = Some addrr. + + Lemma load_store_away1 : + forall RANGEW : 0 <= Int64.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= Int64.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : Int64.unsigned ofsw + size_chunk chunkw <= Int64.unsigned ofsr + \/ Int64.unsigned ofsr + size_chunk chunkr <= Int64.unsigned ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in *. + + rewrite ptrofs_modulus in *. + simpl in *. + inv ADDRR. + inv ADDRW. + + destruct base; try discriminate. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: unfold Ptrofs.of_int64. + + all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsr))) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsw))) as [OFSW | OFSW]; + rewrite OFSW). + all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 18446744073709551615; lia). + + all: try rewrite ptrofs_modulus in *. + + all: intuition lia. + Qed. + + Theorem load_store_away : + can_swap_accesses_ofs (Int64.unsigned ofsr) chunkr (Int64.unsigned ofsw) chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. +End MEMORY_WRITE. +End SOUNDNESS. + + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Lemma may_overlap_sound: + forall m m' : mem, + forall chunk addr args chunk' addr' args' v a a' rs, + (eval_addressing genv sp addr (rs ## args)) = Some a -> + (eval_addressing genv sp addr' (rs ## args')) = Some a' -> + (may_overlap chunk addr args chunk' addr' args') = false -> + (Mem.storev chunk m a v) = Some m' -> + (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a'). +Proof. + intros until rs. + intros ADDR ADDR' OVERLAP STORE. + destruct addr; destruct addr'; try discriminate. + { (* Aindexed / Aindexed *) + destruct args as [ | base [ | ]]. 1,3: discriminate. + destruct args' as [ | base' [ | ]]. 1,3: discriminate. + simpl in OVERLAP. + destruct (peq base base'). 2: discriminate. + subst base'. + destruct (can_swap_accesses_ofs (Int64.unsigned ofs0) chunk' (Int64.unsigned ofs) chunk) eqn:SWAP. + 2: discriminate. + simpl in *. + eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. + } +Qed. + +End SOUNDNESS. diff --git a/aarch64/Conventions1.v b/aarch64/Conventions1.v index 14cb199f..efda835d 100644 --- a/aarch64/Conventions1.v +++ b/aarch64/Conventions1.v @@ -190,27 +190,6 @@ Fixpoint loc_arguments_rec Definition loc_arguments (s: signature) : list (rpair loc) := loc_arguments_rec s.(sig_args) 0 0 0. -(** [size_arguments s] returns the number of [Outgoing] slots used - to call a function with signature [s]. *) - -Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z := - match tyl with - | nil => ofs - | (Tint | Tlong | Tany32 | Tany64) :: tys => - match list_nth_z int_param_regs ir with - | None => size_arguments_rec tys ir fr (ofs + 2) - | Some ireg => size_arguments_rec tys (ir + 1) fr ofs - end - | (Tfloat | Tsingle) :: tys => - match list_nth_z float_param_regs fr with - | None => size_arguments_rec tys ir fr (ofs + 2) - | Some freg => size_arguments_rec tys ir (fr + 1) ofs - end - end. - -Definition size_arguments (s: signature) : Z := - size_arguments_rec s.(sig_args) 0 0 0. - (** Argument locations are either caller-save registers or [Outgoing] stack slots at nonnegative offsets. *) @@ -285,92 +264,6 @@ Qed. Hint Resolve loc_arguments_acceptable: locs. -(** The offsets of [Outgoing] arguments are below [size_arguments s]. *) - -Remark size_arguments_rec_above: - forall tyl ir fr ofs0, - ofs0 <= size_arguments_rec tyl ir fr ofs0. -Proof. - induction tyl; simpl; intros. - omega. - assert (A: ofs0 <= - match list_nth_z int_param_regs ir with - | Some _ => size_arguments_rec tyl (ir + 1) fr ofs0 - | None => size_arguments_rec tyl ir fr (ofs0 + 2) - end). - { destruct (list_nth_z int_param_regs ir); eauto. - apply Z.le_trans with (ofs0 + 2); auto. omega. } - assert (B: ofs0 <= - match list_nth_z float_param_regs fr with - | Some _ => size_arguments_rec tyl ir (fr + 1) ofs0 - | None => size_arguments_rec tyl ir fr (ofs0 + 2) - end). - { destruct (list_nth_z float_param_regs fr); eauto. - apply Z.le_trans with (ofs0 + 2); auto. omega. } - destruct a; auto. -Qed. - -Lemma size_arguments_above: - forall s, size_arguments s >= 0. -Proof. - intros; unfold size_arguments. apply Z.le_ge. apply size_arguments_rec_above. -Qed. - -Lemma loc_arguments_rec_bounded: - forall ofs ty tyl ir fr ofs0, - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_rec tyl ir fr ofs0)) -> - ofs + typesize ty <= size_arguments_rec tyl ir fr ofs0. -Proof. - induction tyl; simpl; intros. -- contradiction. -- assert (T: forall ty0, typesize ty0 <= 2). - { destruct ty0; simpl; omega. } - assert (A: forall ty0, - In (S Outgoing ofs ty) (regs_of_rpairs - match list_nth_z int_param_regs ir with - | Some ireg => - One (R ireg) :: loc_arguments_rec tyl (ir + 1) fr ofs0 - | None => One (S Outgoing ofs0 ty0) :: loc_arguments_rec tyl ir fr (ofs0 + 2) - end) -> - ofs + typesize ty <= - match list_nth_z int_param_regs ir with - | Some _ => size_arguments_rec tyl (ir + 1) fr ofs0 - | None => size_arguments_rec tyl ir fr (ofs0 + 2) - end). - { intros. destruct (list_nth_z int_param_regs ir); simpl in H0; destruct H0. - - discriminate. - - eapply IHtyl; eauto. - - inv H0. apply Z.le_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_rec_above. - - eapply IHtyl; eauto. } - assert (B: forall ty0, - In (S Outgoing ofs ty) (regs_of_rpairs - match list_nth_z float_param_regs fr with - | Some ireg => - One (R ireg) :: loc_arguments_rec tyl ir (fr + 1) ofs0 - | None => One (S Outgoing ofs0 ty0) :: loc_arguments_rec tyl ir fr (ofs0 + 2) - end) -> - ofs + typesize ty <= - match list_nth_z float_param_regs fr with - | Some _ => size_arguments_rec tyl ir (fr + 1) ofs0 - | None => size_arguments_rec tyl ir fr (ofs0 + 2) - end). - { intros. destruct (list_nth_z float_param_regs fr); simpl in H0; destruct H0. - - discriminate. - - eapply IHtyl; eauto. - - inv H0. apply Z.le_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_rec_above. - - eapply IHtyl; eauto. } - destruct a; eauto. -Qed. - -Lemma loc_arguments_bounded: - forall (s: signature) (ofs: Z) (ty: typ), - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) -> - ofs + typesize ty <= size_arguments s. -Proof. - unfold loc_arguments, size_arguments; intros. - eauto using loc_arguments_rec_bounded. -Qed. - Lemma loc_arguments_main: loc_arguments signature_main = nil. Proof. diff --git a/aarch64/DuplicateOpcodeHeuristic.ml b/aarch64/DuplicateOpcodeHeuristic.ml index 85505245..5fc2156c 100644 --- a/aarch64/DuplicateOpcodeHeuristic.ml +++ b/aarch64/DuplicateOpcodeHeuristic.ml @@ -1,3 +1,27 @@ -exception HeuristicSucceeded - -let opcode_heuristic code cond ifso ifnot preferred = () +open Op +open Integers + +let opcode_heuristic code cond ifso ifnot is_loop_header = + match cond with + | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccompf c | Ccompfs c -> (match c with + | Ceq -> Some false + | Cne -> Some true + | _ -> None + ) + | Cnotcompf c | Cnotcompfs c -> (match c with + | Ceq -> Some true + | Cne -> Some false + | _ -> None + ) + | _ -> None + diff --git a/arm/CSE2deps.v b/arm/CSE2deps.v new file mode 100644 index 00000000..9db51bbb --- /dev/null +++ b/arm/CSE2deps.v @@ -0,0 +1,20 @@ +Require Import BoolEqual Coqlib. +Require Import AST Integers Floats. +Require Import Values Memory Globalenvs Events. +Require Import Op. + + +Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw := + (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk)) + && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk)) + && ((ofsw + size_chunk chunkw <=? ofsr) || + (ofsr + size_chunk chunkr <=? ofsw)). + +Definition may_overlap chunk addr args chunk' addr' args' := + match addr, addr', args, args' with + | (Aindexed ofs), (Aindexed ofs'), + (base :: nil), (base' :: nil) => + if peq base base' + then negb (can_swap_accesses_ofs (Int.unsigned ofs') chunk' (Int.unsigned ofs) chunk) + else true | _, _, _, _ => true + end. diff --git a/arm/CSE2depsproof.v b/arm/CSE2depsproof.v new file mode 100644 index 00000000..61fe5980 --- /dev/null +++ b/arm/CSE2depsproof.v @@ -0,0 +1,129 @@ +Require Import Coqlib Maps Errors Integers Floats Lattice Kildall. +Require Import AST Linking. +Require Import Memory Registers Op RTL Maps. + +Require Import Globalenvs Values. +Require Import Linking Values Memory Globalenvs Events Smallstep. +Require Import Registers Op RTL. +Require Import CSE2 CSE2deps. +Require Import Lia. + +Lemma ptrofs_size : + Ptrofs.wordsize = 32%nat. +Proof. + unfold Ptrofs.wordsize. + unfold Wordsize_Ptrofs.wordsize. + trivial. +Qed. + +Lemma ptrofs_modulus : + Ptrofs.modulus = 4294967296. +Proof. + unfold Ptrofs.modulus. + rewrite ptrofs_size. + destruct Archi.ptr64; reflexivity. +Qed. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Section MEMORY_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + Variable base : val. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : int. + Hypothesis ADDRW : eval_addressing genv sp + (Aindexed ofsw) (base :: nil) = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aindexed ofsr) (base :: nil) = Some addrr. + + Lemma load_store_away1 : + forall RANGEW : 0 <= Int.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= Int.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : Int.unsigned ofsw + size_chunk chunkw <= Int.unsigned ofsr + \/ Int.unsigned ofsr + size_chunk chunkr <= Int.unsigned ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in *. + + rewrite ptrofs_modulus in *. + simpl in *. + inv ADDRR. + inv ADDRW. + destruct base; try discriminate. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsr)) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsw)) as [OFSW | OFSW]; + rewrite OFSW). + + all: try rewrite ptrofs_modulus in *. + + all: unfold Ptrofs.of_int. + + all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 4294967295; lia). + all: intuition lia. + Qed. + + Theorem load_store_away : + can_swap_accesses_ofs (Int.unsigned ofsr) chunkr (Int.unsigned ofsw) chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. +End MEMORY_WRITE. +End SOUNDNESS. + + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Lemma may_overlap_sound: + forall m m' : mem, + forall chunk addr args chunk' addr' args' v a a' rs, + (eval_addressing genv sp addr (rs ## args)) = Some a -> + (eval_addressing genv sp addr' (rs ## args')) = Some a' -> + (may_overlap chunk addr args chunk' addr' args') = false -> + (Mem.storev chunk m a v) = Some m' -> + (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a'). +Proof. + intros until rs. + intros ADDR ADDR' OVERLAP STORE. + destruct addr; destruct addr'; try discriminate. + { (* Aindexed / Aindexed *) + destruct args as [ | base [ | ]]. 1,3: discriminate. + destruct args' as [ | base' [ | ]]. 1,3: discriminate. + simpl in OVERLAP. + destruct (peq base base'). 2: discriminate. + subst base'. + destruct (can_swap_accesses_ofs (Int.unsigned i0) chunk' (Int.unsigned i) chunk) eqn:SWAP. + 2: discriminate. + simpl in *. + eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. + } +Qed. + +End SOUNDNESS. diff --git a/arm/Conventions1.v b/arm/Conventions1.v index 7016c1ee..fe49a781 100644 --- a/arm/Conventions1.v +++ b/arm/Conventions1.v @@ -269,48 +269,6 @@ Definition loc_arguments (s: signature) : list (rpair loc) := else loc_arguments_hf s.(sig_args) 0 0 0 end. -(** [size_arguments s] returns the number of [Outgoing] slots used - to call a function with signature [s]. *) - -Fixpoint size_arguments_hf (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z := - match tyl with - | nil => ofs - | (Tint|Tany32) :: tys => - if zlt ir 4 - then size_arguments_hf tys (ir + 1) fr ofs - else size_arguments_hf tys ir fr (ofs + 1) - | (Tfloat|Tany64) :: tys => - if zlt fr 8 - then size_arguments_hf tys ir (fr + 1) ofs - else size_arguments_hf tys ir fr (align ofs 2 + 2) - | Tsingle :: tys => - if zlt fr 8 - then size_arguments_hf tys ir (fr + 1) ofs - else size_arguments_hf tys ir fr (ofs + 1) - | Tlong :: tys => - let ir := align ir 2 in - if zlt ir 4 - then size_arguments_hf tys (ir + 2) fr ofs - else size_arguments_hf tys ir fr (align ofs 2 + 2) - end. - -Fixpoint size_arguments_sf (tyl: list typ) (ofs: Z) {struct tyl} : Z := - match tyl with - | nil => Z.max 0 ofs - | (Tint | Tsingle | Tany32) :: tys => size_arguments_sf tys (ofs + 1) - | (Tfloat | Tlong | Tany64) :: tys => size_arguments_sf tys (align ofs 2 + 2) - end. - -Definition size_arguments (s: signature) : Z := - match Archi.abi with - | Archi.Softfloat => - size_arguments_sf s.(sig_args) (-4) - | Archi.Hardfloat => - if s.(sig_cc).(cc_vararg) - then size_arguments_sf s.(sig_args) (-4) - else size_arguments_hf s.(sig_args) 0 0 0 - end. - (** Argument locations are either non-temporary registers or [Outgoing] stack slots at nonnegative offsets. *) @@ -471,170 +429,6 @@ Qed. Hint Resolve loc_arguments_acceptable: locs. -(** The offsets of [Outgoing] arguments are below [size_arguments s]. *) - -Remark size_arguments_hf_above: - forall tyl ir fr ofs0, - ofs0 <= size_arguments_hf tyl ir fr ofs0. -Proof. - induction tyl; simpl; intros. - omega. - destruct a. - destruct (zlt ir 4); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega. - destruct (zlt fr 8); eauto. - apply Z.le_trans with (align ofs0 2). apply align_le; omega. - apply Z.le_trans with (align ofs0 2 + 2); auto; omega. - set (ir' := align ir 2). - destruct (zlt ir' 4); eauto. - apply Z.le_trans with (align ofs0 2). apply align_le; omega. - apply Z.le_trans with (align ofs0 2 + 2); auto; omega. - destruct (zlt fr 8); eauto. - apply Z.le_trans with (ofs0 + 1); eauto. omega. - destruct (zlt ir 4); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega. - destruct (zlt fr 8); eauto. - apply Z.le_trans with (align ofs0 2). apply align_le; omega. - apply Z.le_trans with (align ofs0 2 + 2); auto; omega. -Qed. - -Remark size_arguments_sf_above: - forall tyl ofs0, - Z.max 0 ofs0 <= size_arguments_sf tyl ofs0. -Proof. - induction tyl; simpl; intros. - omega. - destruct a; (eapply Z.le_trans; [idtac|eauto]). - xomega. - assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega. - assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega. - xomega. - xomega. - assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega. -Qed. - -Lemma size_arguments_above: - forall s, size_arguments s >= 0. -Proof. - intros; unfold size_arguments. apply Z.le_ge. - assert (0 <= size_arguments_sf (sig_args s) (-4)). - { change 0 with (Z.max 0 (-4)). apply size_arguments_sf_above. } - assert (0 <= size_arguments_hf (sig_args s) 0 0 0). - { apply size_arguments_hf_above. } - destruct Archi.abi; [ | destruct (cc_vararg (sig_cc s)) ]; auto. -Qed. - -Lemma loc_arguments_hf_bounded: - forall ofs ty tyl ir fr ofs0, - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_hf tyl ir fr ofs0)) -> - ofs + typesize ty <= size_arguments_hf tyl ir fr ofs0. -Proof. - induction tyl; simpl; intros. - elim H. - destruct a. -- (* int *) - destruct (zlt ir 4); destruct H. - discriminate. - eauto. - inv H. apply size_arguments_hf_above. - eauto. -- (* float *) - destruct (zlt fr 8); destruct H. - discriminate. - eauto. - inv H. apply size_arguments_hf_above. - eauto. -- (* long *) - destruct (zlt (align ir 2) 4). - destruct H. discriminate. destruct H. discriminate. eauto. - destruct Archi.big_endian. - destruct H. inv H. - eapply Z.le_trans. 2: apply size_arguments_hf_above. simpl; omega. - destruct H. inv H. - rewrite <- Z.add_assoc. simpl. apply size_arguments_hf_above. - eauto. - destruct H. inv H. - rewrite <- Z.add_assoc. simpl. apply size_arguments_hf_above. - destruct H. inv H. - eapply Z.le_trans. 2: apply size_arguments_hf_above. simpl; omega. - eauto. -- (* float *) - destruct (zlt fr 8); destruct H. - discriminate. - eauto. - inv H. apply size_arguments_hf_above. - eauto. -- (* any32 *) - destruct (zlt ir 4); destruct H. - discriminate. - eauto. - inv H. apply size_arguments_hf_above. - eauto. -- (* any64 *) - destruct (zlt fr 8); destruct H. - discriminate. - eauto. - inv H. apply size_arguments_hf_above. - eauto. -Qed. - -Lemma loc_arguments_sf_bounded: - forall ofs ty tyl ofs0, - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_sf tyl ofs0)) -> - Z.max 0 (ofs + typesize ty) <= size_arguments_sf tyl ofs0. -Proof. - induction tyl; simpl; intros. - elim H. - destruct a. -- (* int *) - destruct H. - destruct (zlt ofs0 0); inv H. apply size_arguments_sf_above. - eauto. -- (* float *) - destruct H. - destruct (zlt (align ofs0 2) 0); inv H. apply size_arguments_sf_above. - eauto. -- (* long *) - destruct H. - destruct Archi.big_endian. - destruct (zlt (align ofs0 2) 0); inv H. - eapply Z.le_trans. 2: apply size_arguments_sf_above. simpl; xomega. - destruct (zlt (align ofs0 2) 0); inv H. - rewrite <- Z.add_assoc. simpl. apply size_arguments_sf_above. - destruct H. - destruct Archi.big_endian. - destruct (zlt (align ofs0 2) 0); inv H. - rewrite <- Z.add_assoc. simpl. apply size_arguments_sf_above. - destruct (zlt (align ofs0 2) 0); inv H. - eapply Z.le_trans. 2: apply size_arguments_sf_above. simpl; xomega. - eauto. -- (* float *) - destruct H. - destruct (zlt ofs0 0); inv H. apply size_arguments_sf_above. - eauto. -- (* any32 *) - destruct H. - destruct (zlt ofs0 0); inv H. apply size_arguments_sf_above. - eauto. -- (* any64 *) - destruct H. - destruct (zlt (align ofs0 2) 0); inv H. apply size_arguments_sf_above. - eauto. -Qed. - -Lemma loc_arguments_bounded: - forall (s: signature) (ofs: Z) (ty: typ), - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) -> - ofs + typesize ty <= size_arguments s. -Proof. - unfold loc_arguments, size_arguments; intros. - assert (In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_sf (sig_args s) (-4))) -> - ofs + typesize ty <= size_arguments_sf (sig_args s) (-4)). - { intros. eapply Z.le_trans. 2: eapply loc_arguments_sf_bounded; eauto. xomega. } - assert (In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_hf (sig_args s) 0 0 0)) -> - ofs + typesize ty <= size_arguments_hf (sig_args s) 0 0 0). - { intros. eapply loc_arguments_hf_bounded; eauto. } - destruct Archi.abi; [ | destruct (cc_vararg (sig_cc s)) ]; eauto. -Qed. - Lemma loc_arguments_main: loc_arguments signature_main = nil. Proof. diff --git a/arm/DuplicateOpcodeHeuristic.ml b/arm/DuplicateOpcodeHeuristic.ml index 85505245..9b6a6409 100644 --- a/arm/DuplicateOpcodeHeuristic.ml +++ b/arm/DuplicateOpcodeHeuristic.ml @@ -1,3 +1,22 @@ -exception HeuristicSucceeded - -let opcode_heuristic code cond ifso ifnot preferred = () +open Op +open Integers + +let opcode_heuristic code cond ifso ifnot is_loop_header = + match cond with + | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccompf c | Ccompfs c -> (match c with + | Ceq -> Some false + | Cne -> Some true + | _ -> None + ) + | Cnotcompf c | Cnotcompfs c -> (match c with + | Ceq -> Some true + | Cne -> Some false + | _ -> None + ) + | _ -> None + diff --git a/backend/Allocation.v b/backend/Allocation.v index d18b07a9..2323c050 100644 --- a/backend/Allocation.v +++ b/backend/Allocation.v @@ -314,10 +314,10 @@ Definition pair_instr_block Some(BSbuiltin ef args res mv1 args' res' mv2 s) | _ => None end - | Icond cond args s1 s2 => + | Icond cond args s1 s2 i => let (mv1, b1) := extract_moves nil b in match b1 with - | Lcond cond' args' s1' s2' :: b2 => + | Lcond cond' args' s1' s2' i' :: b2 => assertion (eq_condition cond cond'); assertion (peq s1 s1'); assertion (peq s2 s2'); diff --git a/backend/Allocproof.v b/backend/Allocproof.v index b6880860..3c7df58a 100644 --- a/backend/Allocproof.v +++ b/backend/Allocproof.v @@ -169,11 +169,11 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr (Ibuiltin ef args res s) (expand_moves mv1 (Lbuiltin ef args' res' :: expand_moves mv2 (Lbranch s :: k))) - | ebs_cond: forall cond args mv args' s1 s2 k, + | ebs_cond: forall cond args mv args' s1 s2 k i i', wf_moves mv -> expand_block_shape (BScond cond args mv args' s1 s2) - (Icond cond args s1 s2) - (expand_moves mv (Lcond cond args' s1 s2 :: k)) + (Icond cond args s1 s2 i) + (expand_moves mv (Lcond cond args' s1 s2 i' :: k)) | ebs_jumptable: forall arg mv arg' tbl k, wf_moves mv -> expand_block_shape (BSjumptable arg mv arg' tbl) diff --git a/backend/CSE.v b/backend/CSE.v index 2827161d..1936d4e4 100644 --- a/backend/CSE.v +++ b/backend/CSE.v @@ -496,7 +496,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb | EF_vload _ | EF_annot _ _ _ | EF_annot_val _ _ _ | EF_debug _ _ _ => set_res_unknown before res end - | Icond cond args ifso ifnot => + | Icond cond args ifso ifnot _ => before | Ijumptable arg tbl => before @@ -549,10 +549,10 @@ Definition transf_instr (n: numbering) (instr: instruction) := let (n1, vl) := valnum_regs n args in let (addr', args') := reduce _ combine_addr n1 addr args vl in Istore chunk addr' args' src s - | Icond cond args s1 s2 => + | Icond cond args s1 s2 i => let (n1, vl) := valnum_regs n args in let (cond', args') := reduce _ combine_cond n1 cond args vl in - Icond cond' args' s1 s2 + Icond cond' args' s1 s2 i | _ => instr end. diff --git a/backend/CSE2.v b/backend/CSE2.v index 38a46c1b..900a7517 100644 --- a/backend/CSE2.v +++ b/backend/CSE2.v @@ -6,7 +6,7 @@ David Monniaux, CNRS, VERIMAG Require Import Coqlib Maps Errors Integers Floats Lattice Kildall. Require Import AST Linking. -Require Import Memory Registers Op RTL Maps. +Require Import Memory Registers Op RTL Maps CSE2deps. (* Static analysis *) @@ -265,7 +265,7 @@ Definition kill_sym_val (dst : reg) (sv : sym_val) := Definition kill_reg (dst : reg) (rel : RELATION.t) := PTree.filter1 (fun x => negb (kill_sym_val dst x)) (PTree.remove dst rel). - + Definition kill_sym_val_mem (sv: sym_val) := match sv with | SMove _ => false @@ -273,16 +273,28 @@ Definition kill_sym_val_mem (sv: sym_val) := | SLoad _ _ _ => true end. +Definition kill_sym_val_store chunk addr args (sv: sym_val) := + match sv with + | SMove _ => false + | SOp op _ => op_depends_on_memory op + | SLoad chunk' addr' args' => may_overlap chunk addr args chunk' addr' args' + end. + Definition kill_mem (rel : RELATION.t) := PTree.filter1 (fun x => negb (kill_sym_val_mem x)) rel. - Definition forward_move (rel : RELATION.t) (x : reg) : reg := match rel ! x with | Some (SMove org) => org | _ => x end. +Definition kill_store1 chunk addr args rel := + PTree.filter1 (fun x => negb (kill_sym_val_store chunk addr args x)) rel. + +Definition kill_store chunk addr args rel := + kill_store1 chunk addr (List.map (forward_move rel) args) rel. + Definition move (src dst : reg) (rel : RELATION.t) := PTree.set dst (SMove (forward_move rel src)) (kill_reg dst rel). @@ -393,9 +405,9 @@ Qed. Definition apply_instr instr (rel : RELATION.t) : RB.t := match instr with | Inop _ - | Icond _ _ _ _ + | Icond _ _ _ _ _ | Ijumptable _ _ => Some rel - | Istore _ _ _ _ _ => Some (kill_mem rel) + | Istore chunk addr args _ _ => Some (kill_store chunk addr args rel) | Iop op args dst _ => Some (gen_oper op dst args rel) | Iload trap chunk addr args dst _ => Some (load chunk addr dst args rel) | Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel)) @@ -457,7 +469,7 @@ Definition transf_instr (fmap : option (PMap.t RB.t)) match instr with | Iop op args dst s => let args' := subst_args fmap pc args in - match find_op_in_fmap fmap pc op args' with + match (if is_trivial_op op then None else find_op_in_fmap fmap pc op args') with | None => Iop op args' dst s | Some src => Iop Omove (src::nil) dst s end @@ -473,8 +485,8 @@ Definition transf_instr (fmap : option (PMap.t RB.t)) Icall sig ros (subst_args fmap pc args) dst s | Itailcall sig ros args => Itailcall sig ros (subst_args fmap pc args) - | Icond cond args s1 s2 => - Icond cond (subst_args fmap pc args) s1 s2 + | Icond cond args s1 s2 i => + Icond cond (subst_args fmap pc args) s1 s2 i | Ijumptable arg tbl => Ijumptable (subst_arg fmap pc arg) tbl | Ireturn (Some arg) => diff --git a/backend/CSE2proof.v b/backend/CSE2proof.v index 254cc4ce..309ccce1 100644 --- a/backend/CSE2proof.v +++ b/backend/CSE2proof.v @@ -13,7 +13,8 @@ Require Import Memory Registers Op RTL Maps. Require Import Globalenvs Values. Require Import Linking Values Memory Globalenvs Events Smallstep. Require Import Registers Op RTL. -Require Import CSE2. +Require Import CSE2 CSE2deps CSE2depsproof. +Require Import Lia. Lemma args_unaffected: forall rs : regset, @@ -54,9 +55,9 @@ Definition sem_sym_val sym rs (v : option val) : Prop := match eval_addressing genv sp addr rs##args with | Some a => match Mem.loadv chunk m a with | Some dat => v = Some dat - | None => v = None \/ v = Some (default_notrap_load_value chunk) + | None => v = None \/ v = Some Vundef end - | None => v = None \/ v = Some (default_notrap_load_value chunk) + | None => v = None \/ v = Some Vundef end end. @@ -390,6 +391,7 @@ Proof. apply REC; auto. Qed. + Lemma find_load_sound : forall rel : RELATION.t, forall chunk : memory_chunk, @@ -402,9 +404,9 @@ Lemma find_load_sound : match eval_addressing genv sp addr rs##args with | Some a => match Mem.loadv chunk m a with | Some dat => rs#src = dat - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end. Proof. intros until rs. @@ -419,9 +421,9 @@ Proof. match eval_addressing genv sp addr rs##args with | Some a => match Mem.loadv chunk m a with | Some dat => rs#src = dat - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end end -> fold_left @@ -431,9 +433,9 @@ Proof. match eval_addressing genv sp addr rs##args with | Some a => match Mem.loadv chunk m a with | Some dat => rs#src = dat - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end) as REC. { @@ -521,7 +523,7 @@ Lemma find_load_notrap1_sound' : sem_rel rel rs -> find_load rel chunk addr args = Some src -> eval_addressing genv sp addr rs##args = None -> - rs # src = (default_notrap_load_value chunk). + rs # src = Vundef. Proof. intros until rs. intros REL FINDLOAD ADDR. pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z. @@ -541,7 +543,7 @@ Lemma find_load_notrap2_sound' : find_load rel chunk addr args = Some src -> eval_addressing genv sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - rs # src = (default_notrap_load_value chunk). + rs # src = Vundef. Proof. intros until a. intros REL FINDLOAD ADDR LOAD. pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z. @@ -566,6 +568,20 @@ Proof. destruct s; congruence. Qed. + +Lemma forward_move_rs: + forall rel arg rs, + sem_rel rel rs -> + rs # (forward_move rel arg) = rs # arg. +Proof. + unfold forward_move, sem_rel, sem_reg, sem_sym_val in *. + intros until rs. + intro REL. + pose proof (REL arg) as RELarg. + destruct (rel ! arg); trivial. + destruct s; congruence. +Qed. + Lemma oper_sound : forall rel : RELATION.t, forall op : operation, @@ -673,11 +689,11 @@ Lemma load2_notrap1_sound : sem_rel rel rs -> not (In dst args) -> eval_addressing genv sp addr (rs ## args) = None -> - sem_rel (load2 chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)). + sem_rel (load2 chunk addr dst args rel) (rs # dst <- Vundef). Proof. intros until rs. intros REL NOT_IN ADDR x. - pose proof (kill_reg_sound rel dst rs (default_notrap_load_value chunk) REL x) as KILL. + pose proof (kill_reg_sound rel dst rs Vundef REL x) as KILL. unfold load2. destruct (peq x dst). { @@ -710,11 +726,11 @@ Lemma load2_notrap2_sound : not (In dst args) -> eval_addressing genv sp addr (rs ## args) = Some a -> Mem.loadv chunk m a = None -> - sem_rel (load2 chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)). + sem_rel (load2 chunk addr dst args rel) (rs # dst <- Vundef). Proof. intros until a. intros REL NOT_IN ADDR LOAD x. - pose proof (kill_reg_sound rel dst rs (default_notrap_load_value chunk) REL x) as KILL. + pose proof (kill_reg_sound rel dst rs Vundef REL x) as KILL. unfold load2. destruct (peq x dst). { @@ -768,7 +784,7 @@ Lemma load1_notrap1_sound : forall rs : regset, sem_rel rel rs -> eval_addressing genv sp addr (rs ## args) = None -> - sem_rel (load1 chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)). + sem_rel (load1 chunk addr dst args rel) (rs # dst <- Vundef). Proof. intros until rs. intros REL ADDR LOAD. @@ -791,7 +807,7 @@ Lemma load1_notrap2_sound : sem_rel rel rs -> eval_addressing genv sp addr (rs ## args) = Some a -> Mem.loadv chunk m a = None -> - sem_rel (load1 chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)). + sem_rel (load1 chunk addr dst args rel) (rs # dst <- Vundef). Proof. intros until a. intros REL ADDR LOAD. @@ -825,9 +841,9 @@ Proof. assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with | Some a => match Mem.loadv chunk m a with | Some dat => rs#src = dat - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end) as FIND_LOAD. { apply (find_load_sound rel); trivial. @@ -853,7 +869,7 @@ Lemma load_notrap1_sound : forall rs : regset, sem_rel rel rs -> eval_addressing genv sp addr (rs ## args) = None -> - sem_rel (load chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)). + sem_rel (load chunk addr dst args rel) (rs # dst <- Vundef). Proof. intros until rs. intros REL ADDR. @@ -863,9 +879,9 @@ Proof. assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with | Some a => match Mem.loadv chunk m a with | Some dat => rs#src = dat - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end) as FIND_LOAD. { apply (find_load_sound rel); trivial. @@ -890,7 +906,7 @@ Lemma load_notrap2_sound : sem_rel rel rs -> eval_addressing genv sp addr (rs ## args) = Some a -> Mem.loadv chunk m a = None -> - sem_rel (load chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)). + sem_rel (load chunk addr dst args rel) (rs # dst <- Vundef). Proof. intros until a. intros REL ADDR. @@ -900,9 +916,9 @@ Proof. assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with | Some a => match Mem.loadv chunk m a with | Some dat => rs#src = dat - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end - | None => rs#src = default_notrap_load_value chunk + | None => rs#src = Vundef end) as FIND_LOAD. { apply (find_load_sound rel); trivial. @@ -984,7 +1000,40 @@ Proof. apply op_depends_on_memory_correct; auto. } Qed. - + +Lemma kill_store_sound : + forall m m' : mem, + forall rel : RELATION.t, + forall chunk addr args a v rs, + (eval_addressing genv sp addr (rs ## args)) = Some a -> + (Mem.storev chunk m a v) = Some m' -> + sem_rel m rel rs -> sem_rel m' (kill_store chunk addr args rel) rs. +Proof. + unfold sem_rel, sem_reg. + intros until rs. + intros ADDR STORE SEM x. + pose proof (SEM x) as SEMx. + unfold kill_store, kill_store1. + rewrite PTree.gfilter1. + destruct (rel ! x) as [ sv | ]. + 2: reflexivity. + destruct sv; simpl in *; trivial. + { + destruct op_depends_on_memory eqn:DEPENDS; simpl; trivial. + rewrite SEMx. + apply op_depends_on_memory_correct; auto. + } + destruct may_overlap eqn:OVERLAP; simpl; trivial. + destruct (eval_addressing genv sp addr0 rs ## args0) eqn:ADDR0. + { + erewrite may_overlap_sound with (args := (map (forward_move rel) args)). + all: try eassumption. + + erewrite forward_move_map by eassumption. + assumption. + } + intuition congruence. +Qed. End SOUNDNESS. Definition match_prog (p tp: RTL.program) := @@ -1072,6 +1121,7 @@ Definition fmap_sem' := fmap_sem fundef unit ge. Definition subst_arg_ok' := subst_arg_ok fundef unit ge. Definition subst_args_ok' := subst_args_ok fundef unit ge. Definition kill_mem_sound' := kill_mem_sound fundef unit ge. +Definition kill_store_sound' := kill_store_sound fundef unit ge. Lemma sem_rel_b_ge: forall rb1 rb2 : RB.t, @@ -1150,8 +1200,11 @@ Proof. reflexivity. - (* op *) unfold transf_instr in *. - destruct find_op_in_fmap eqn:FIND_OP. + destruct (if is_trivial_op op then None else find_op_in_fmap (forward_map f) pc op + (subst_args (forward_map f) pc args)) eqn:FIND_OP. { + destruct (is_trivial_op op). + discriminate. unfold find_op_in_fmap, fmap_sem', fmap_sem in *. destruct (forward_map f) as [map |] eqn:MAP. 2: discriminate. @@ -1241,9 +1294,9 @@ Proof. { f_equal. symmetry. - apply find_load_sound' with (chunk := chunk) (m := m) (a := a) (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs); trivial. rewrite MAP in H0. - assumption. + eapply find_load_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs). + all: eassumption. } unfold fmap_sem. rewrite MAP. rewrite MPC. assumption. } @@ -1268,36 +1321,37 @@ Proof. apply load_sound with (a := a); auto. } { - econstructor; split. - assert (eval_addressing tge sp addr rs ## args = Some a). - rewrite <- H0. - apply eval_addressing_preserved. exact symbols_preserved. - eapply exec_Iload; eauto. - rewrite (subst_args_ok' sp m); assumption. - constructor; auto. - - simpl in *. - unfold fmap_sem', fmap_sem in *. - destruct (forward_map _) as [map |] eqn:MAP in *; trivial. - destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction. - apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)). + econstructor; split. + assert (eval_addressing tge sp addr rs ## args = Some a). + rewrite <- H0. + apply eval_addressing_preserved. exact symbols_preserved. + eapply exec_Iload; eauto. + rewrite (subst_args_ok' sp m); assumption. + constructor; auto. + + simpl in *. + unfold fmap_sem', fmap_sem in *. + destruct (forward_map _) as [map |] eqn:MAP in *; trivial. + destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction. + apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)). + { + replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)). { - replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)). - { - eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption. - 2: apply apply_instr'_bot. - simpl. tauto. - } - unfold apply_instr'. - rewrite H. - rewrite MPC. - simpl. - reflexivity. + eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption. + 2: apply apply_instr'_bot. + simpl. tauto. } - apply load_sound with (a := a); assumption. + unfold apply_instr'. + rewrite H. + rewrite MPC. + simpl. + reflexivity. + } + apply load_sound with (a := a); assumption. } -- unfold transf_instr in *. +- (* load notrap1 *) + unfold transf_instr in *. destruct find_load_in_fmap eqn:FIND_LOAD. { unfold find_load_in_fmap, fmap_sem', fmap_sem in *. @@ -1306,16 +1360,16 @@ Proof. change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *. destruct (map # pc) as [mpc | ] eqn:MPC. 2: discriminate. - econstructor; split. { eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto. + simpl. rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0. - { simpl. + { f_equal. - apply find_load_notrap1_sound' with (chunk := chunk) (m := m) (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs); trivial. rewrite MAP in H0. - assumption. + eapply find_load_notrap1_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs). + all: eassumption. } unfold fmap_sem. rewrite MAP. rewrite MPC. assumption. } @@ -1339,37 +1393,38 @@ Proof. unfold sem_rel_b', sem_rel_b. apply load_notrap1_sound; auto. } - { - econstructor; split. - assert (eval_addressing tge sp addr rs ## args = None). - rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. - eapply exec_Iload_notrap1; eauto. - rewrite subst_args_ok with (genv := ge) (sp := sp) (m := m) ; assumption. - constructor; auto. + { + econstructor; split. + assert (eval_addressing tge sp addr rs ## args = None). + rewrite <- H0. + apply eval_addressing_preserved. exact symbols_preserved. + eapply exec_Iload_notrap1; eauto. + rewrite (subst_args_ok' sp m); assumption. + constructor; auto. - simpl in *. - unfold fmap_sem', fmap_sem in *. - destruct (forward_map _) as [map |] eqn:MAP in *; trivial. - destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction. - apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)). + simpl in *. + unfold fmap_sem', fmap_sem in *. + destruct (forward_map _) as [map |] eqn:MAP in *; trivial. + destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction. + apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)). + { + replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)). { - replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)). - { - eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption. - 2: apply apply_instr'_bot. - simpl. tauto. - } - unfold apply_instr'. - rewrite H. - rewrite MPC. - simpl. - reflexivity. + eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption. + 2: apply apply_instr'_bot. + simpl. tauto. } - apply load_notrap1_sound; trivial. + unfold apply_instr'. + rewrite H. + rewrite MPC. + simpl. + reflexivity. + } + apply load_notrap1_sound; assumption. } -- (* load notrap2 *) - unfold transf_instr in *. +(* load notrap2 *) +- unfold transf_instr in *. destruct find_load_in_fmap eqn:FIND_LOAD. { unfold find_load_in_fmap, fmap_sem', fmap_sem in *. @@ -1385,9 +1440,9 @@ Proof. rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0. { f_equal. - apply find_load_notrap2_sound' with (chunk := chunk) (m := m) (a := a) (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs); trivial. rewrite MAP in H0. - assumption. + eapply find_load_notrap2_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs). + all: try eassumption. } unfold fmap_sem. rewrite MAP. rewrite MPC. assumption. } @@ -1411,19 +1466,20 @@ Proof. unfold sem_rel_b', sem_rel_b. apply load_notrap2_sound with (a := a); auto. } - { + { econstructor; split. assert (eval_addressing tge sp addr rs ## args = Some a). - rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. + rewrite <- H0. + apply eval_addressing_preserved. exact symbols_preserved. eapply exec_Iload_notrap2; eauto. - rewrite subst_args_ok with (genv := ge) (sp := sp) (m := m); assumption. + rewrite (subst_args_ok' sp m); assumption. constructor; auto. simpl in *. unfold fmap_sem', fmap_sem in *. destruct (forward_map _) as [map |] eqn:MAP in *; trivial. destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction. - apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)). + apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)). { replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)). { @@ -1454,9 +1510,9 @@ Proof. unfold fmap_sem', fmap_sem in *. destruct (forward_map _) as [map |] eqn:MAP in *; trivial. destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction. - apply sem_rel_b_ge with (rb2 := Some (kill_mem mpc)); trivial. + apply sem_rel_b_ge with (rb2 := Some (kill_store chunk addr args mpc)); trivial. { - replace (Some (kill_mem mpc)) with (apply_instr' (fn_code f) pc (map # pc)). + replace (Some (kill_store chunk addr args mpc)) with (apply_instr' (fn_code f) pc (map # pc)). { eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption. 2: apply apply_instr'_bot. @@ -1468,8 +1524,7 @@ Proof. rewrite H. reflexivity. } - apply (kill_mem_sound' sp m). - assumption. + eapply (kill_store_sound' sp m); eassumption. (* call *) - econstructor; split. @@ -1625,7 +1680,7 @@ Proof. econstructor; split. eapply exec_return; eauto. constructor; auto. -Admitted. +Qed. Lemma transf_initial_states: @@ -1657,4 +1712,4 @@ Proof. exact step_simulation. Qed. -End PRESERVATION.
\ No newline at end of file +End PRESERVATION. diff --git a/backend/Constprop.v b/backend/Constprop.v index eda41b39..0be9438c 100644 --- a/backend/Constprop.v +++ b/backend/Constprop.v @@ -69,7 +69,7 @@ Fixpoint successor_rec (n: nat) (f: function) (ae: AE.t) (pc: node) : node := match f.(fn_code)!pc with | Some (Inop s) => successor_rec n' f ae s - | Some (Icond cond args s1 s2) => + | Some (Icond cond args s1 s2 _) => match resolve_branch (eval_static_condition cond (aregs ae args)) with | Some b => successor_rec n' f ae (if b then s1 else s2) | None => pc @@ -217,14 +217,14 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem) end | _, _ => dfl end - | Icond cond args s1 s2 => + | Icond cond args s1 s2 i => let aargs := aregs ae args in match resolve_branch (eval_static_condition cond aargs) with | Some b => if b then Inop s1 else Inop s2 | None => let (cond', args') := cond_strength_reduction cond args aargs in - Icond cond' args' s1 s2 + Icond cond' args' s1 s2 i end | Ijumptable arg tbl => match areg ae arg with diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v index 63cfee24..60663503 100644 --- a/backend/Constpropproof.v +++ b/backend/Constpropproof.v @@ -142,8 +142,8 @@ Inductive match_pc (f: function) (rs: regset) (m: mem): nat -> node -> node -> P f.(fn_code)!pc = Some (Inop s) -> match_pc f rs m n s pcx -> match_pc f rs m (S n) pc pcx - | match_pc_cond: forall n pc cond args s1 s2 pcx, - f.(fn_code)!pc = Some (Icond cond args s1 s2) -> + | match_pc_cond: forall n pc cond args s1 s2 pcx i, + f.(fn_code)!pc = Some (Icond cond args s1 s2 i) -> (forall b, eval_condition cond rs##args m = Some b -> match_pc f rs m n (if b then s1 else s2) pcx) -> diff --git a/backend/Conventions.v b/backend/Conventions.v index 6025c6b4..14ffb587 100644 --- a/backend/Conventions.v +++ b/backend/Conventions.v @@ -34,6 +34,73 @@ Proof. apply IHpl; auto. Qed. +(** ** Stack size of function arguments *) + +(** [size_arguments s] returns the number of [Outgoing] slots used + to call a function with signature [s]. *) + +Definition max_outgoing_1 (accu: Z) (l: loc) : Z := + match l with + | S Outgoing ofs ty => Z.max accu (ofs + typesize ty) + | _ => accu + end. + +Definition max_outgoing_2 (accu: Z) (rl: rpair loc) : Z := + match rl with + | One l => max_outgoing_1 accu l + | Twolong l1 l2 => max_outgoing_1 (max_outgoing_1 accu l1) l2 + end. + +Definition size_arguments (s: signature) : Z := + List.fold_left max_outgoing_2 (loc_arguments s) 0. + +(** The offsets of [Outgoing] arguments are below [size_arguments s]. *) + +Remark fold_max_outgoing_above: + forall l n, fold_left max_outgoing_2 l n >= n. +Proof. + assert (A: forall n l, max_outgoing_1 n l >= n). + { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. } + induction l; simpl; intros. + - omega. + - eapply Zge_trans. eauto. + destruct a; simpl. apply A. eapply Zge_trans; eauto. +Qed. + +Lemma size_arguments_above: + forall s, size_arguments s >= 0. +Proof. + intros. apply fold_max_outgoing_above. +Qed. + +Lemma loc_arguments_bounded: + forall (s: signature) (ofs: Z) (ty: typ), + In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) -> + ofs + typesize ty <= size_arguments s. +Proof. + intros until ty. + assert (A: forall n l, n <= max_outgoing_1 n l). + { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. } + assert (B: forall p n, + In (S Outgoing ofs ty) (regs_of_rpair p) -> + ofs + typesize ty <= max_outgoing_2 n p). + { intros. destruct p; simpl in H; intuition; subst; simpl. + - xomega. + - eapply Z.le_trans. 2: apply A. xomega. + - xomega. } + assert (C: forall l n, + In (S Outgoing ofs ty) (regs_of_rpairs l) -> + ofs + typesize ty <= fold_left max_outgoing_2 l n). + { induction l; simpl; intros. + - contradiction. + - rewrite in_app_iff in H. destruct H. + + eapply Z.le_trans. eapply B; eauto. + apply Z.ge_le. apply fold_max_outgoing_above. + + apply IHl; auto. + } + apply C. +Qed. + (** ** Location of function parameters *) (** A function finds the values of its parameter in the same locations diff --git a/backend/Deadcode.v b/backend/Deadcode.v index 1f208a91..3412a6fa 100644 --- a/backend/Deadcode.v +++ b/backend/Deadcode.v @@ -142,7 +142,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t) nmem_dead_stack f.(fn_stacksize)) | Some(Ibuiltin ef args res s) => transfer_builtin approx!!pc ef args res ne nm - | Some(Icond cond args s1 s2) => + | Some(Icond cond args s1 s2 _) => if peq s1 s2 then after else (add_needs args (needs_of_condition cond) ne, nm) | Some(Ijumptable arg tbl) => @@ -192,7 +192,7 @@ Definition transf_instr (approx: PMap.t VA.t) (an: PMap.t NA.t) if nmem_contains (snd an!!pc) (aaddr_arg approx!!pc dst) sz then instr else Inop s - | Icond cond args s1 s2 => + | Icond cond args s1 s2 _ => if peq s1 s2 then Inop s1 else instr | _ => instr diff --git a/backend/Duplicate.v b/backend/Duplicate.v index 82c17367..af85efe4 100644 --- a/backend/Duplicate.v +++ b/backend/Duplicate.v @@ -134,8 +134,8 @@ Definition verify_match_inst dupmap inst tinst := else Error (msg "Different ef in Ibuiltin") | _ => Error (msg "verify_match_inst Ibuiltin") end - | Icond cond lr n1 n2 => match tinst with - | Icond cond' lr' n1' n2' => + | Icond cond lr n1 n2 i => match tinst with + | Icond cond' lr' n1' n2' i' => if (list_eq_dec Pos.eq_dec lr lr') then if (eq_condition cond cond') then do u1 <- verify_is_copy dupmap n1 n1'; diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index d0b7129e..89f187da 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -1,25 +1,32 @@ +(* Oracle for Duplicate pass. + * - Add static prediction information to Icond nodes + * - Performs tail duplication on interesting traces to form superblocks + * - (TODO: perform partial loop unrolling inside innermost loops) + *) + open RTL open Maps open Camlcoq -(* TTL : IR emphasizing the preferred next node *) -module TTL = struct - type instruction = - | Tleaf of RTL.instruction - | Tnext of node * RTL.instruction - - type code = instruction PTree.t -end;; - -open TTL +let debug_flag = ref false -(** RTL to TTL *) +let debug fmt = + if !debug_flag then Printf.eprintf fmt + else Printf.ifprintf stderr fmt let get_some = function | None -> failwith "Did not get some" | Some thing -> thing -let bfs code entrypoint = +let rtl_successors = function +| Itailcall _ | Ireturn _ -> [] +| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n) +| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n] +| Icond (_,_,n1,n2,_) -> [n1; n2] +| Ijumptable (_,ln) -> ln + +let bfs code entrypoint = begin + debug "bfs\n"; let visited = ref (PTree.map (fun n i -> false) code) and bfs_list = ref [] and to_visit = Queue.create () @@ -33,32 +40,24 @@ let bfs code entrypoint = match PTree.get !node code with | None -> failwith "No such node" | Some i -> - bfs_list := !bfs_list @ [!node]; - match i with - | Icall(_, _, _, _, n) -> Queue.add n to_visit - | Ibuiltin(_, _, _, n) -> Queue.add n to_visit - | Ijumptable(_, ln) -> List.iter (fun n -> Queue.add n to_visit) ln - | Itailcall _ | Ireturn _ -> () - | Icond (_, _, n1, n2) -> Queue.add n1 to_visit; Queue.add n2 to_visit - | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) -> Queue.add n to_visit + bfs_list := !node :: !bfs_list; + let succ = rtl_successors i in + List.iter (fun n -> Queue.add n to_visit) succ end done; - !bfs_list + List.rev !bfs_list end +end let optbool o = match o with Some _ -> true | None -> false let ptree_get_some n ptree = get_some @@ PTree.get n ptree -let get_predecessors_rtl code = +let get_predecessors_rtl code = begin + debug "get_predecessors_rtl\n"; let preds = ref (PTree.map (fun n i -> []) code) in let process_inst (node, i) = - let succ = match i with - | Inop n | Iop (_,_,_,n) | Iload (_, _,_,_,_,n) | Istore (_,_,_,_,n) - | Icall (_,_,_,_,n) | Ibuiltin (_, _, _, n) -> [n] - | Icond (_,_,n1,n2) -> [n1;n2] - | Ijumptable (_,ln) -> ln - | Itailcall _ | Ireturn _ -> [] + let succ = rtl_successors i in List.iter (fun s -> let previous_preds = ptree_get_some s !preds in if optbool @@ List.find_opt (fun e -> e == node) previous_preds then () @@ -67,6 +66,7 @@ let get_predecessors_rtl code = List.iter process_inst (PTree.elements code); !preds end +end module PInt = struct type t = P.t @@ -80,65 +80,23 @@ let print_intlist l = | [] -> () | n::ln -> (Printf.printf "%d " (P.to_int n); f ln) in begin - Printf.printf "["; - f l; - Printf.printf "]" + if !debug_flag then begin + Printf.printf "["; + f l; + Printf.printf "]" + end end let print_intset s = let seq = PSet.to_seq s in begin - Printf.printf "{"; - Seq.iter (fun n -> - Printf.printf "%d " (P.to_int n) - ) seq; - Printf.printf "}" - end - -(* FIXME - dominators not working well because the order of dataflow update isn't right *) - (* -let get_dominators code entrypoint = - let bfs_order = bfs code entrypoint - and predecessors = get_predecessors_rtl code - in let doms = ref (PTree.map (fun n i -> PSet.of_list bfs_order) code) - in begin - Printf.printf "BFS: "; - print_intlist bfs_order; - Printf.printf "\n"; - List.iter (fun n -> - let preds = get_some @@ PTree.get n predecessors - and single = PSet.singleton n - in match preds with - | [] -> doms := PTree.set n single !doms - | p::lp -> - let set_p = get_some @@ PTree.get p !doms - and set_lp = List.map (fun p -> get_some @@ PTree.get p !doms) lp - in let inter = List.fold_left PSet.inter set_p set_lp - in let union = PSet.union inter single - in begin - Printf.printf "----------------------------------------\n"; - Printf.printf "n = %d\n" (P.to_int n); - Printf.printf "set_p = "; print_intset set_p; Printf.printf "\n"; - Printf.printf "set_lp = ["; List.iter (fun s -> print_intset s; Printf.printf ", ") set_lp; Printf.printf "]\n"; - Printf.printf "=> inter = "; print_intset inter; Printf.printf "\n"; - Printf.printf "=> union = "; print_intset union; Printf.printf "\n"; - doms := PTree.set n union !doms - end - ) bfs_order; - !doms - end -*) - -let print_dominators dominators = - let domlist = PTree.elements dominators - in begin - Printf.printf "{\n"; - List.iter (fun (n, doms) -> - Printf.printf "\t"; - Printf.printf "%d:" (P.to_int n); - print_intset doms; - Printf.printf "\n" - ) domlist + if !debug_flag then begin + Printf.printf "{"; + Seq.iter (fun n -> + Printf.printf "%d " (P.to_int n) + ) seq; + Printf.printf "}" + end end type vstate = Unvisited | Processed | Visited @@ -150,7 +108,8 @@ type vstate = Unvisited | Processed | Visited * * If we come accross an edge to a Processed node, it's a loop! *) -let get_loop_headers code entrypoint = +let get_loop_headers code entrypoint = begin + debug "get_loop_headers\n"; let visited = ref (PTree.map (fun n i -> Unvisited) code) and is_loop_header = ref (PTree.map (fun n i -> false) code) in let rec dfs_visit code = function @@ -159,6 +118,7 @@ let get_loop_headers code entrypoint = match (get_some @@ PTree.get node !visited) with | Visited -> () | Processed -> begin + debug "Node %d is a loop header\n" (P.to_int node); is_loop_header := PTree.set node true !is_loop_header; visited := PTree.set node Visited !visited end @@ -166,13 +126,7 @@ let get_loop_headers code entrypoint = visited := PTree.set node Processed !visited; match PTree.get node code with | None -> failwith "No such node" - | Some i -> let next_visits = (match i with - | Icall (_, _, _, _, n) | Ibuiltin (_, _, _, n) | Inop n | Iop (_, _, _, n) - | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) -> [n] - | Icond (_, _, n1, n2) -> [n1; n2] - | Itailcall _ | Ireturn _ -> [] - | Ijumptable (_, ln) -> ln - ) in dfs_visit code next_visits; + | Some i -> let next_visits = rtl_successors i in dfs_visit code next_visits; visited := PTree.set node Visited !visited; dfs_visit code ln end @@ -180,140 +134,220 @@ let get_loop_headers code entrypoint = dfs_visit code [entrypoint]; !is_loop_header end +end let ptree_printbool pt = let elements = PTree.elements pt in begin - Printf.printf "["; - List.iter (fun (n, b) -> - if b then Printf.printf "%d, " (P.to_int n) else () - ) elements; - Printf.printf "]" + if !debug_flag then begin + Printf.printf "["; + List.iter (fun (n, b) -> + if b then Printf.printf "%d, " (P.to_int n) else () + ) elements; + Printf.printf "]" + end end (* Looks ahead (until a branch) to see if a node further down verifies * the given predicate *) let rec look_ahead code node is_loop_header predicate = if (predicate node) then true - else match (get_some @@ PTree.get node code) with - | Ireturn _ | Itailcall _ | Icond _ | Ijumptable _ -> false - | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n) - | Istore (_, _, _, _, n) | Icall (_, _, _, _, n) - | Ibuiltin (_, _, _, n) -> - if (predicate n) then true - else ( - if (get_some @@ PTree.get n is_loop_header) then false - else look_ahead code n is_loop_header predicate - ) - -exception HeuristicSucceeded - -let do_call_heuristic code ifso ifnot is_loop_header preferred = - let predicate n = (function - | Icall _ -> true - | _ -> false) @@ get_some @@ PTree.get n code - in if (look_ahead code ifso is_loop_header predicate) then - (preferred := false; raise HeuristicSucceeded) - else if (look_ahead code ifnot is_loop_header predicate) then - (preferred := true; raise HeuristicSucceeded) - else () - -let do_opcode_heuristic code cond ifso ifnot preferred = DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot preferred - -let do_return_heuristic code ifso ifnot is_loop_header preferred = - let predicate n = (function - | Ireturn _ -> true - | _ -> false) @@ get_some @@ PTree.get n code - in if (look_ahead code ifso is_loop_header predicate) then - (preferred := false; raise HeuristicSucceeded) - else if (look_ahead code ifnot is_loop_header predicate) then - (preferred := true; raise HeuristicSucceeded) - else () - -let do_store_heuristic code ifso ifnot is_loop_header preferred = - let predicate n = (function - | Istore _ -> true - | _ -> false) @@ get_some @@ PTree.get n code - in if (look_ahead code ifso is_loop_header predicate) then - (preferred := false; raise HeuristicSucceeded) - else if (look_ahead code ifnot is_loop_header predicate) then - (preferred := true; raise HeuristicSucceeded) - else () - -let do_loop_heuristic code ifso ifnot is_loop_header preferred = - let predicate n = get_some @@ PTree.get n is_loop_header - in if (look_ahead code ifso is_loop_header predicate) then - (preferred := true; raise HeuristicSucceeded) - else if (look_ahead code ifnot is_loop_header predicate) then - (preferred := false; raise HeuristicSucceeded) - else () - -let get_directions code entrypoint = - let bfs_order = bfs code entrypoint - and is_loop_header = get_loop_headers code entrypoint - and directions = ref (PTree.map (fun n i -> false) code) (* false <=> fallthru *) + else match (rtl_successors @@ get_some @@ PTree.get node code) with + | [n] -> if (predicate n) then true + else ( + if (get_some @@ PTree.get n is_loop_header) then false + else look_ahead code n is_loop_header predicate + ) + | _ -> false + +let do_call_heuristic code cond ifso ifnot is_loop_header = + begin + debug "\tCall heuristic..\n"; + let predicate n = (function + | Icall _ -> true + | _ -> false) @@ get_some @@ PTree.get n code + in let ifso_call = look_ahead code ifso is_loop_header predicate + in let ifnot_call = look_ahead code ifnot is_loop_header predicate + in if ifso_call && ifnot_call then None + else if ifso_call then Some false + else if ifnot_call then Some true + else None + end + +let do_opcode_heuristic code cond ifso ifnot is_loop_header = + begin + debug "\tOpcode heuristic..\n"; + DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot is_loop_header + end + +let do_return_heuristic code cond ifso ifnot is_loop_header = + begin + debug "\tReturn heuristic..\n"; + let predicate n = (function + | Ireturn _ -> true + | _ -> false) @@ get_some @@ PTree.get n code + in let ifso_return = look_ahead code ifso is_loop_header predicate + in let ifnot_return = look_ahead code ifnot is_loop_header predicate + in if ifso_return && ifnot_return then None + else if ifso_return then Some false + else if ifnot_return then Some true + else None + end + +let do_store_heuristic code cond ifso ifnot is_loop_header = + begin + debug "\tStore heuristic..\n"; + let predicate n = (function + | Istore _ -> true + | _ -> false) @@ get_some @@ PTree.get n code + in let ifso_store = look_ahead code ifso is_loop_header predicate + in let ifnot_store = look_ahead code ifnot is_loop_header predicate + in if ifso_store && ifnot_store then None + else if ifso_store then Some false + else if ifnot_store then Some true + else None + end + +let do_loop_heuristic code cond ifso ifnot is_loop_header = + begin + debug "\tLoop heuristic..\n"; + let predicate n = get_some @@ PTree.get n is_loop_header in + let ifso_loop = look_ahead code ifso is_loop_header predicate in + let ifnot_loop = look_ahead code ifnot is_loop_header predicate in + if ifso_loop && ifnot_loop then None (* TODO - take the innermost loop ? *) + else if ifso_loop then Some true + else if ifnot_loop then Some false + else None + end + +let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header = + begin + debug "\tLoop2 heuristic..\n"; + match get_some @@ PTree.get n loop_info with + | None -> None + | Some b -> Some b + end + +(* Returns a PTree of either None or Some b where b determines the node following the loop, for a cb instruction *) +(* It uses the fact that loops in CompCert are done by a branch (backedge) instruction followed by a cb *) +let get_loop_info is_loop_header bfs_order code = + let loop_info = ref (PTree.map (fun n i -> None) code) in + let mark_path s n = + let visited = ref (PTree.map (fun n i -> false) code) in + let rec explore src dest = + if (get_some @@ PTree.get src !visited) then false + else if src == dest then true + else begin + visited := PTree.set src true !visited; + match rtl_successors @@ get_some @@ PTree.get src code with + | [] -> false + | [s] -> explore s dest + | [s1; s2] -> (explore s1 dest) || (explore s2 dest) + | _ -> false + end + in let rec advance_to_cb src = + if (get_some @@ PTree.get src !visited) then None + else begin + visited := PTree.set src true !visited; + match get_some @@ PTree.get src code with + | Inop s | Iop (_, _, _, s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s) + | Ibuiltin (_,_,_,s) -> advance_to_cb s + | Icond _ -> Some src + | Ijumptable _ | Itailcall _ | Ireturn _ -> None + end + in begin + debug "Marking path from %d to %d\n" (P.to_int n) (P.to_int s); + match advance_to_cb s with + | None -> (debug "Nothing found\n") + | Some s -> ( debug "Advancing to %d\n" (P.to_int s); + match get_some @@ PTree.get s !loop_info with + | None | Some _ -> begin + match get_some @@ PTree.get s code with + | Icond (_, _, n1, n2, _) -> + let b1 = explore n1 n in + let b2 = explore n2 n in + if (b1 && b2) then (debug "both true\n") + else if b1 then (debug "true privileged\n"; loop_info := PTree.set s (Some true) !loop_info) + else if b2 then (debug "false privileged\n"; loop_info := PTree.set s (Some false) !loop_info) + else (debug "none true\n") + | _ -> ( debug "not an icond\n" ) + end + (* | Some _ -> ( debug "already loop info there\n" ) FIXME - we don't know yet whether a branch to a loop head is a backedge or not *) + ) + end in begin - Printf.printf "Loop headers: "; - ptree_printbool is_loop_header; - Printf.printf "\n"; + List.iter (fun n -> + match get_some @@ PTree.get n code with + | Inop s | Iop (_,_,_,s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s) + | Ibuiltin (_, _, _, s) -> + if get_some @@ PTree.get s is_loop_header then mark_path s n + | Icond _ -> () (* loop backedges are never Icond in CompCert RTL.3 *) + | Ijumptable _ -> () + | Itailcall _ | Ireturn _ -> () + ) bfs_order; + !loop_info + end + +(* Remark - compared to the original paper, we don't use the store heuristic *) +let get_directions code entrypoint = begin + debug "get_directions\n"; + let bfs_order = bfs code entrypoint in + let is_loop_header = get_loop_headers code entrypoint in + let loop_info = get_loop_info is_loop_header bfs_order code in + let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *) + begin + (* ptree_printbool is_loop_header; *) + (* debug "\n"; *) List.iter (fun n -> match (get_some @@ PTree.get n code) with - | Icond (cond, lr, ifso, ifnot) -> - Printf.printf "Analyzing %d.." (P.to_int n); - let preferred = ref false - in (try - Printf.printf " call.."; - do_call_heuristic code ifso ifnot is_loop_header preferred; - Printf.printf " opcode.."; - do_opcode_heuristic code cond ifso ifnot preferred; - Printf.printf " return.."; - do_return_heuristic code ifso ifnot is_loop_header preferred; - Printf.printf " store.."; - do_store_heuristic code ifso ifnot is_loop_header preferred; - Printf.printf " loop.."; - do_loop_heuristic code ifso ifnot is_loop_header preferred; - Printf.printf "Random choice for %d\n" (P.to_int n); - preferred := Random.bool () - with HeuristicSucceeded | DuplicateOpcodeHeuristic.HeuristicSucceeded - -> Printf.printf " %s\n" (match !preferred with true -> "BRANCH" - | false -> "FALLTHROUGH") - ); directions := PTree.set n !preferred !directions + | Icond (cond, lr, ifso, ifnot, _) -> + (* debug "Analyzing %d.." (P.to_int n); *) + let heuristics = [ do_opcode_heuristic; + do_return_heuristic; do_loop2_heuristic loop_info n; do_loop_heuristic; do_call_heuristic; + (* do_store_heuristic *) ] in + let preferred = ref None in + begin + debug "Deciding condition for RTL node %d\n" (P.to_int n); + List.iter (fun do_heur -> + match !preferred with + | None -> preferred := do_heur code cond ifso ifnot is_loop_header + | Some _ -> () + ) heuristics; + directions := PTree.set n !preferred !directions; + (match !preferred with | Some false -> debug "\tFALLTHROUGH\n" + | Some true -> debug "\tBRANCH\n" + | None -> debug "\tUNSURE\n"); + debug "---------------------------------------\n" + end | _ -> () ) bfs_order; !directions end +end + +let update_direction direction = function +| Icond (cond, lr, n, n', _) -> Icond (cond, lr, n, n', direction) +| i -> i -let to_ttl_inst direction = function -| Ireturn o -> Tleaf (Ireturn o) -| Inop n -> Tnext (n, Inop n) -| Iop (op, lr, r, n) -> Tnext (n, Iop(op, lr, r, n)) -| Iload (tm, m, a, lr, r, n) -> Tnext (n, Iload(tm, m, a, lr, r, n)) -| Istore (m, a, lr, r, n) -> Tnext (n, Istore(m, a, lr, r, n)) -| Icall (s, ri, lr, r, n) -> Tleaf (Icall(s, ri, lr, r, n)) -| Itailcall (s, ri, lr) -> Tleaf (Itailcall(s, ri, lr)) -| Ibuiltin (ef, lbr, br, n) -> Tleaf (Ibuiltin(ef, lbr, br, n)) -| Icond (cond, lr, n, n') -> (match direction with - | false -> Tnext (n', Icond(cond, lr, n, n')) - | true -> Tnext (n, Icond(cond, lr, n, n'))) -| Ijumptable (r, ln) -> Tleaf (Ijumptable(r, ln)) - -let rec to_ttl_code_rec directions = function +let rec update_direction_rec directions = function | [] -> PTree.empty | m::lm -> let (n, i) = m in let direction = get_some @@ PTree.get n directions - in PTree.set n (to_ttl_inst direction i) (to_ttl_code_rec directions lm) + in PTree.set n (update_direction direction i) (update_direction_rec directions lm) -let to_ttl_code code entrypoint = +(* Uses branch prediction to write prediction annotations in Icond *) +let update_directions code entrypoint = begin + debug "Update_directions\n"; let directions = get_directions code entrypoint in begin - Printf.printf "Ifso directions: "; + (* debug "Ifso directions: "; ptree_printbool directions; - Printf.printf "\n"; - Random.init(0); (* using same seed to make it deterministic *) - to_ttl_code_rec directions (PTree.elements code) + debug "\n"; *) + update_direction_rec directions (PTree.elements code) end +end -(** Trace selection on TTL *) +(** Trace selection *) let rec exists_false_rec = function | [] -> false @@ -321,50 +355,29 @@ let rec exists_false_rec = function let exists_false boolmap = exists_false_rec (PTree.elements boolmap) -(* DFS on TTL to guide the exploration *) -let dfs code entrypoint = +(* DFS using prediction info to guide the exploration *) +let dfs code entrypoint = begin + debug "dfs\n"; let visited = ref (PTree.map (fun n i -> false) code) in let rec dfs_list code = function | [] -> [] | node :: ln -> - let node_dfs = - if not (get_some @@ PTree.get node !visited) then begin - visited := PTree.set node true !visited; - match PTree.get node code with - | None -> failwith "No such node" - | Some ti -> [node] @ match ti with - | Tleaf i -> (match i with - | Icall(_, _, _, _, n) -> dfs_list code [n] - | Ibuiltin(_, _, _, n) -> dfs_list code [n] - | Ijumptable(_, ln) -> dfs_list code ln - | Itailcall _ | Ireturn _ -> [] - | _ -> failwith "Tleaf case not handled in dfs" ) - | Tnext (n,i) -> (dfs_list code [n]) @ match i with - | Icond (_, _, n1, n2) -> dfs_list code [n1; n2] - | Inop _ | Iop _ | Iload _ | Istore _ -> [] - | _ -> failwith "Tnext case not handled in dfs" - end - else [] - in node_dfs @ (dfs_list code ln) + if get_some @@ PTree.get node !visited then dfs_list code ln + else begin + visited := PTree.set node true !visited; + let next_nodes = (match get_some @@ PTree.get node code with + | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n) + | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> [n] + | Ijumptable (_, ln) -> ln + | Itailcall _ | Ireturn _ -> [] + | Icond (_, _, n1, n2, info) -> (match info with + | Some false -> [n2; n1] + | _ -> [n1; n2] + ) + ) in node :: dfs_list code (next_nodes @ ln) + end in dfs_list code [entrypoint] - -let get_predecessors_ttl code = - let preds = ref (PTree.map (fun n i -> []) code) in - let process_inst (node, ti) = match ti with - | Tleaf _ -> () - | Tnext (_, i) -> let succ = match i with - | Inop n | Iop (_,_,_,n) | Iload (_, _,_,_,_,n) | Istore (_,_,_,_,n) - | Icall (_,_,_,_,n) | Ibuiltin (_, _, _, n) -> [n] - | Icond (_,_,n1,n2) -> [n1;n2] - | Ijumptable (_,ln) -> ln - | _ -> [] - in List.iter (fun s -> preds := PTree.set s (node::(get_some @@ PTree.get s !preds)) !preds) succ - in begin - List.iter process_inst (PTree.elements code); - !preds - end - -let rtl_proj code = PTree.map (fun n ti -> match ti with Tleaf i | Tnext(_, i) -> i) code +end let rec select_unvisited_node is_visited = function | [] -> failwith "Empty list" @@ -373,24 +386,87 @@ let rec select_unvisited_node is_visited = function let best_successor_of node code is_visited = match (PTree.get node code) with | None -> failwith "No such node in the code" - | Some ti -> match ti with - | Tleaf _ -> None - | Tnext (n,_) -> if not (ptree_get_some n is_visited) then Some n - else None - -let best_predecessor_of node predecessors order is_visited = + | Some i -> + let next_node = match i with + | Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore(_,_,_,_,n) + | Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> Some n + | Icond (_, _, n1, n2, ob) -> (match ob with None -> None | Some false -> Some n2 | Some true -> Some n1) + | _ -> None + in match next_node with + | None -> None + | Some n -> if not (ptree_get_some n is_visited) then Some n else None + +(* FIXME - could be improved by selecting in priority the predicted paths *) +let best_predecessor_of node predecessors code order is_visited = match (PTree.get node predecessors) with | None -> failwith "No predecessor list found" - | Some lp -> try Some (List.find (fun n -> (List.mem n lp) && (not (ptree_get_some n is_visited))) order) - with Not_found -> None + | Some lp -> + try Some (List.find (fun n -> + if (List.mem n lp) && (not (ptree_get_some n is_visited)) then + match ptree_get_some n code with + | Icond (_, _, n1, n2, ob) -> (match ob with + | None -> false + | Some false -> n == n2 + | Some true -> n == n1 + ) + | _ -> true + else false + ) order) + with Not_found -> None + +let print_trace t = print_intlist t + +let print_traces traces = + let rec f = function + | [] -> () + | t::lt -> Printf.printf "\n\t"; print_trace t; Printf.printf ",\n"; f lt + in begin + if !debug_flag then begin + Printf.printf "Traces: {"; + f traces; + Printf.printf "}\n"; + end + end + +(* Dumb (but linear) trace selection *) +let select_traces_linear code entrypoint = + let is_visited = ref (PTree.map (fun n i -> false) code) in + let bfs_order = bfs code entrypoint in + let rec go_through node = begin + is_visited := PTree.set node true !is_visited; + let next_node = match (get_some @@ PTree.get node code) with + | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n) + | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> Some n + | Ijumptable _ | Itailcall _ | Ireturn _ -> None + | Icond (_, _, n1, n2, info) -> (match info with + | Some false -> Some n2 + | Some true -> Some n1 + | None -> None + ) + in match next_node with + | None -> [node] + | Some n -> + if not (get_some @@ PTree.get n !is_visited) then node :: go_through n + else [node] + end + in let traces = ref [] in begin + List.iter (fun n -> + if not (get_some @@ PTree.get n !is_visited) then + traces := (go_through n) :: !traces + ) bfs_order; + !traces + end + (* Algorithm mostly inspired from Chang and Hwu 1988 * "Trace Selection for Compiling Large C Application Programs to Microcode" *) -let select_traces code entrypoint = +let select_traces_chang code entrypoint = begin + debug "select_traces\n"; let order = dfs code entrypoint in - let predecessors = get_predecessors_ttl code in + let predecessors = get_predecessors_rtl code in let traces = ref [] in let is_visited = ref (PTree.map (fun n i -> false) code) in begin (* mark all nodes visited *) + debug "Length: %d\n" (List.length order); while exists_false !is_visited do (* while (there are unvisited nodes) *) let seed = select_unvisited_node !is_visited order in let trace = ref [seed] in @@ -410,7 +486,7 @@ let select_traces code entrypoint = current := seed; quit_loop := false; while not !quit_loop do - let s = best_predecessor_of !current predecessors order !is_visited in + let s = best_predecessor_of !current predecessors code order !is_visited in match s with | None -> quit_loop := true (* if (s==0) exit loop *) | Some pred -> begin @@ -423,21 +499,16 @@ let select_traces code entrypoint = end end done; - Printf.printf "DFS: \t"; print_intlist order; Printf.printf "\n"; + (* debug "DFS: \t"; print_intlist order; debug "\n"; *) + debug "Traces: "; print_traces !traces; !traces end +end -let print_trace t = print_intlist t - -let print_traces traces = - let rec f = function - | [] -> () - | t::lt -> Printf.printf "\n\t"; print_trace t; Printf.printf ",\n"; f lt - in begin - Printf.printf "Traces: {"; - f traces; - Printf.printf "}\n"; - end +let select_traces code entrypoint = + let length = List.length @@ PTree.elements code in + if (length < 5000) then select_traces_chang code entrypoint + else select_traces_linear code entrypoint let rec make_identity_ptree_rec = function | [] -> PTree.empty @@ -454,10 +525,10 @@ let rec change_pointers code n n' = function | Ibuiltin(a, b, c, n0) -> assert (n0 == n); Ibuiltin(a, b, c, n') | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e == n) ln); Ijumptable(a, List.map (fun e -> if (e == n) then n' else e) ln) - | Icond(a, b, n1, n2) -> assert (n1 == n || n2 == n); + | Icond(a, b, n1, n2, i) -> assert (n1 == n || n2 == n); let n1' = if (n1 == n) then n' else n1 in let n2' = if (n2 == n) then n' else n2 - in Icond(a, b, n1', n2') + in Icond(a, b, n1', n2', i) | Inop n0 -> assert (n0 == n); Inop n' | Iop (a, b, c, n0) -> assert (n0 == n); Iop (a, b, c, n') | Iload (a, b, c, d, e, n0) -> assert (n0 == n); Iload (a, b, c, d, e, n') @@ -471,7 +542,7 @@ let rec change_pointers code n n' = function * n': the integer which should contain the duplicate of n * returns: new code, new ptree *) let duplicate code ptree parent n preds n' = - Printf.printf "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n'); + debug "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n'); match PTree.get n' code with | Some _ -> failwith "The PTree already has a node n'" | None -> @@ -529,30 +600,30 @@ let tail_duplicate code preds ptree trace = in (new_code, new_ptree, !nb_duplicated) let superblockify_traces code preds traces = - let max_nb_duplicated = 1 (* FIXME - should be architecture dependent *) + let max_nb_duplicated = !Clflags.option_fduplicate (* FIXME - should be architecture dependent *) in let ptree = make_identity_ptree code in let rec f code ptree = function | [] -> (code, ptree, 0) | trace :: traces -> let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace - in if (nb_duplicated < max_nb_duplicated) then f new_code new_ptree traces - else (Printf.printf "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0)) + in if (nb_duplicated < max_nb_duplicated) + then (debug "End duplication\n"; f new_code new_ptree traces) + else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0)) in let new_code, new_ptree, _ = f code ptree traces in (new_code, new_ptree) let rec invert_iconds_trace code = function | [] -> code - | n::[] -> code - | n :: n' :: t -> + | n :: ln -> let code' = match ptree_get_some n code with - | Icond (c, lr, ifso, ifnot) -> - assert (n' == ifso || n' == ifnot); - if (n' == ifso) then ( - Printf.printf "Reversing ifso/ifnot for node %d\n" (P.to_int n); - PTree.set n (Icond (Op.negate_condition c, lr, ifnot, ifso)) code ) - else code + | Icond (c, lr, ifso, ifnot, info) -> (match info with + | Some true -> begin + (* debug "Reversing ifso/ifnot for node %d\n" (P.to_int n); *) + PTree.set n (Icond (Op.negate_condition c, lr, ifnot, ifso, Some false)) code + end + | _ -> code) | _ -> code - in invert_iconds_trace code' (n'::t) + in invert_iconds_trace code' ln let rec invert_iconds code = function | [] -> code @@ -561,12 +632,17 @@ let rec invert_iconds code = function else code in invert_iconds code' ts -(* For now, identity function *) let duplicate_aux f = let entrypoint = f.fn_entrypoint in - let code = f.fn_code in - let traces = select_traces (to_ttl_code code entrypoint) entrypoint in - let icond_code = invert_iconds code traces in - let preds = get_predecessors_rtl icond_code in - let (new_code, pTreeId) = (print_traces traces; superblockify_traces icond_code preds traces) in - ((new_code, f.fn_entrypoint), pTreeId) + if !Clflags.option_fduplicate < 0 then + ((f.fn_code, entrypoint), make_identity_ptree f.fn_code) + else + let code = update_directions (f.fn_code) entrypoint in + let traces = select_traces code entrypoint in + let icond_code = invert_iconds code traces in + let preds = get_predecessors_rtl icond_code in + if !Clflags.option_fduplicate >= 1 then + let (new_code, pTreeId) = ((* print_traces traces; *) superblockify_traces icond_code preds traces) in + ((new_code, f.fn_entrypoint), pTreeId) + else + ((icond_code, entrypoint), make_identity_ptree code) diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v index a8e9b16b..6b598dc7 100644 --- a/backend/Duplicateproof.v +++ b/backend/Duplicateproof.v @@ -23,12 +23,12 @@ Inductive match_inst (dupmap: PTree.t node): instruction -> instruction -> Prop match_inst dupmap (Itailcall s ri lr) (Itailcall s ri lr) | match_inst_builtin: forall n n' ef la br, dupmap!n' = (Some n) -> match_inst dupmap (Ibuiltin ef la br n) (Ibuiltin ef la br n') - | match_inst_cond: forall ifso ifso' ifnot ifnot' c lr, + | match_inst_cond: forall ifso ifso' ifnot ifnot' c lr i i', dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) -> - match_inst dupmap (Icond c lr ifso ifnot) (Icond c lr ifso' ifnot') - | match_inst_revcond: forall ifso ifso' ifnot ifnot' c lr, + match_inst dupmap (Icond c lr ifso ifnot i) (Icond c lr ifso' ifnot' i') + | match_inst_revcond: forall ifso ifso' ifnot ifnot' c lr i i', dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) -> - match_inst dupmap (Icond c lr ifso ifnot) (Icond (negate_condition c) lr ifnot' ifso') + match_inst dupmap (Icond c lr ifso ifnot i) (Icond (negate_condition c) lr ifnot' ifso' i') | match_inst_jumptable: forall ln ln' r, list_forall2 (fun n n' => (dupmap!n' = (Some n))) ln ln' -> match_inst dupmap (Ijumptable r ln) (Ijumptable r ln') @@ -378,7 +378,7 @@ Theorem step_simulation: step tge s2 t s2' /\ match_states s1' s2'. Proof. - Local Hint Resolve transf_fundef_correct. + Local Hint Resolve transf_fundef_correct: core. induction 1; intros; inv MS. (* Inop *) - eapply dupmap_correct in DUPLIC; eauto. diff --git a/backend/ForwardMoves.v b/backend/ForwardMoves.v index c73b0213..7cfd411f 100644 --- a/backend/ForwardMoves.v +++ b/backend/ForwardMoves.v @@ -250,7 +250,7 @@ Fixpoint kill_builtin_res (res : builtin_res reg) (rel : RELATION.t) := Definition apply_instr instr x := match instr with | Inop _ - | Icond _ _ _ _ + | Icond _ _ _ _ _ | Ijumptable _ _ | Istore _ _ _ _ _ => Some x | Iop Omove (src :: nil) dst _ => Some (move src dst x) @@ -309,8 +309,8 @@ Definition transf_instr (fmap : option (PMap.t RB.t)) Icall sig ros (subst_args fmap pc args) dst s | Itailcall sig ros args => Itailcall sig ros (subst_args fmap pc args) - | Icond cond args s1 s2 => - Icond cond (subst_args fmap pc args) s1 s2 + | Icond cond args s1 s2 i => + Icond cond (subst_args fmap pc args) s1 s2 i | Ijumptable arg tbl => Ijumptable (subst_arg fmap pc arg) tbl | Ireturn (Some arg) => diff --git a/backend/IRC.ml b/backend/IRC.ml index 67da47da..785b0a2d 100644 --- a/backend/IRC.ml +++ b/backend/IRC.ml @@ -238,7 +238,6 @@ type graph = { according to their types. A variable can be forced into class 2 by giving it a negative spill cost. *) - let class_of_reg r = if Conventions1.is_float_reg r then 1 else 0 diff --git a/backend/Inlining.v b/backend/Inlining.v index 9cf535b9..8c7e1898 100644 --- a/backend/Inlining.v +++ b/backend/Inlining.v @@ -397,9 +397,9 @@ Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit := | Ibuiltin ef args res s => set_instr (spc ctx pc) (Ibuiltin ef (map (sbuiltinarg ctx) args) (sbuiltinres ctx res) (spc ctx s)) - | Icond cond args s1 s2 => + | Icond cond args s1 s2 info => set_instr (spc ctx pc) - (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2)) + (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2) info) | Ijumptable r tbl => set_instr (spc ctx pc) (Ijumptable (sreg ctx r) (List.map (spc ctx) tbl)) diff --git a/backend/Inliningaux.ml b/backend/Inliningaux.ml index d58704ca..cf308962 100644 --- a/backend/Inliningaux.ml +++ b/backend/Inliningaux.ml @@ -17,7 +17,8 @@ open Maps open Op open Ordered open! RTL - +open Camlcoq + module PSet = Make(OrderedPositive) type inlining_info = { @@ -83,13 +84,15 @@ let static_called_once id io = else false -(* To be considered: heuristics based on size of function? *) +(* D. Monniaux: attempt at heuristic based on size *) +let small_enough (f : coq_function) = + P.to_int (RTL.max_pc_function f) <= !Clflags.option_inline_auto_threshold let should_inline (io: inlining_info) (id: ident) (f: coq_function) = if !Clflags.option_finline then begin match C2C.atom_inline id with | C2C.Inline -> true | C2C.Noinline -> false - | C2C.No_specifier -> static_called_once id io + | C2C.No_specifier -> static_called_once id io || small_enough f end else false diff --git a/backend/Inliningspec.v b/backend/Inliningspec.v index e20fb373..eba026ec 100644 --- a/backend/Inliningspec.v +++ b/backend/Inliningspec.v @@ -312,9 +312,9 @@ Inductive tr_instr: context -> node -> instruction -> code -> Prop := match res with BR r => Ple r ctx.(mreg) | _ => True end -> c!(spc ctx pc) = Some (Ibuiltin ef (map (sbuiltinarg ctx) args) (sbuiltinres ctx res) (spc ctx s)) -> tr_instr ctx pc (Ibuiltin ef args res s) c - | tr_cond: forall ctx pc cond args s1 s2 c, - c!(spc ctx pc) = Some (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2)) -> - tr_instr ctx pc (Icond cond args s1 s2) c + | tr_cond: forall ctx pc cond args s1 s2 c i, + c!(spc ctx pc) = Some (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2) i) -> + tr_instr ctx pc (Icond cond args s1 s2 i) c | tr_jumptable: forall ctx pc r tbl c, c!(spc ctx pc) = Some (Ijumptable (sreg ctx r) (List.map (spc ctx) tbl)) -> tr_instr ctx pc (Ijumptable r tbl) c diff --git a/backend/LTL.v b/backend/LTL.v index ee8b4826..3edd60a2 100644 --- a/backend/LTL.v +++ b/backend/LTL.v @@ -37,7 +37,7 @@ Inductive instruction: Type := | Ltailcall (sg: signature) (ros: mreg + ident) | Lbuiltin (ef: external_function) (args: list (builtin_arg loc)) (res: builtin_res mreg) | Lbranch (s: node) - | Lcond (cond: condition) (args: list mreg) (s1 s2: node) + | Lcond (cond: condition) (args: list mreg) (s1 s2: node) (info: option bool) | Ljumptable (arg: mreg) (tbl: list node) | Lreturn. @@ -263,11 +263,11 @@ Inductive step: state -> trace -> state -> Prop := | exec_Lbranch: forall s f sp pc bb rs m, step (Block s f sp (Lbranch pc :: bb) rs m) E0 (State s f sp pc rs m) - | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m, + | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m i, eval_condition cond (reglist rs args) m = Some b -> pc = (if b then pc1 else pc2) -> rs' = undef_regs (destroyed_by_cond cond) rs -> - step (Block s f sp (Lcond cond args pc1 pc2 :: bb) rs m) + step (Block s f sp (Lcond cond args pc1 pc2 i :: bb) rs m) E0 (State s f sp pc rs' m) | exec_Ljumptable: forall s f sp arg tbl bb rs m n pc rs', rs (R arg) = Vint n -> @@ -328,7 +328,7 @@ Fixpoint successors_block (b: bblock) : list node := | nil => nil (**r should never happen *) | Ltailcall _ _ :: _ => nil | Lbranch s :: _ => s :: nil - | Lcond _ _ s1 s2 :: _ => s1 :: s2 :: nil + | Lcond _ _ s1 s2 _ :: _ => s1 :: s2 :: nil | Ljumptable _ tbl :: _ => tbl | Lreturn :: _ => nil | instr :: b' => successors_block b' diff --git a/backend/Linearize.v b/backend/Linearize.v index 4216958c..66b36428 100644 --- a/backend/Linearize.v +++ b/backend/Linearize.v @@ -179,7 +179,7 @@ Fixpoint linearize_block (b: LTL.bblock) (k: code) : code := Lbuiltin ef args res :: linearize_block b' k | LTL.Lbranch s :: b' => add_branch s k - | LTL.Lcond cond args s1 s2 :: b' => + | LTL.Lcond cond args s1 s2 _ :: b' => if starts_with s1 k then Lcond (negate_condition cond) args s2 :: add_branch s1 k else diff --git a/backend/Linearizeaux.ml b/backend/Linearizeaux.ml index a6964233..1381877b 100644 --- a/backend/Linearizeaux.ml +++ b/backend/Linearizeaux.ml @@ -13,6 +13,12 @@ open LTL open Maps +let debug_flag = ref false + +let debug fmt = + if !debug_flag then Printf.eprintf fmt + else Printf.ifprintf stderr fmt + (* Trivial enumeration, in decreasing order of PC *) (*** @@ -81,7 +87,7 @@ let basic_blocks f joins = | [] -> assert false | Lbranch s :: _ -> next_in_block blk minpc s | Ltailcall (sig0, ros) :: _ -> end_block blk minpc - | Lcond (cond, args, ifso, ifnot) :: _ -> + | Lcond (cond, args, ifso, ifnot, _) :: _ -> end_block blk minpc; start_block ifso; start_block ifnot | Ljumptable(arg, tbl) :: _ -> end_block blk minpc; List.iter start_block tbl @@ -115,14 +121,11 @@ let enumerate_aux_flat f reach = flatten_blocks (basic_blocks f (join_points f)) (** - * Enumeration based on traces as identified by Duplicate.v - * - * The Duplicate phase heuristically identifies the most frequented paths. Each - * Icond is modified so that the preferred condition is a fallthrough (ifnot) - * rather than a branch (ifso). + * Alternate enumeration based on traces as identified by Duplicate.v * - * The enumeration below takes advantage of this - preferring to layout nodes - * following the fallthroughs of the Lcond branches + * This is a slight alteration to the above heuristic, ensuring that any + * superblock will be contiguous in memory, while still following the original + * heuristic *) let get_some = function @@ -136,29 +139,385 @@ let rec last_element = function | e :: [] -> e | e' :: e :: l -> last_element (e::l) -let dfs code entrypoint = +let print_plist l = + let rec f = function + | [] -> () + | n :: l -> Printf.printf "%d, " (P.to_int n); f l + in begin + if !debug_flag then begin + Printf.printf "["; + f l; + Printf.printf "]" + end + end + +(* adapted from the above join_points function, but with PTree *) +let get_join_points code entry = + let reached = ref (PTree.map (fun n i -> false) code) in + let reached_twice = ref (PTree.map (fun n i -> false) code) in + let rec traverse pc = + if get_some @@ PTree.get pc !reached then begin + if not (get_some @@ PTree.get pc !reached_twice) then + reached_twice := PTree.set pc true !reached_twice + end else begin + reached := PTree.set pc true !reached; + traverse_succs (successors_block @@ get_some @@ PTree.get pc code) + end + and traverse_succs = function + | [] -> () + | [pc] -> traverse pc + | pc :: l -> traverse pc; traverse_succs l + in traverse entry; !reached_twice + +let forward_sequences code entry = let visited = ref (PTree.map (fun n i -> false) code) in - let rec dfs_list code = function + let join_points = get_join_points code entry in + (* returns the list of traversed nodes, and a list of nodes to start traversing next *) + let rec traverse_fallthrough code node = + (* debug "Traversing %d..\n" (P.to_int node); *) + if not (get_some @@ PTree.get node !visited) then begin + visited := PTree.set node true !visited; + match PTree.get node code with + | None -> failwith "No such node" + | Some bb -> + let ln, rem = match (last_element bb) with + | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _ + | Lbuiltin _ -> assert false + | Ltailcall _ | Lreturn -> begin (* debug "STOP tailcall/return\n"; *) ([], []) end + | Lbranch n -> + if get_some @@ PTree.get n join_points then ([], [n]) + else let ln, rem = traverse_fallthrough code n in (ln, rem) + | Lcond (_, _, ifso, ifnot, info) -> (match info with + | None -> begin (* debug "STOP Lcond None\n"; *) ([], [ifso; ifnot]) end + | Some false -> + if get_some @@ PTree.get ifnot join_points then ([], [ifso; ifnot]) + else let ln, rem = traverse_fallthrough code ifnot in (ln, [ifso] @ rem) + | Some true -> + let errstr = Printf.sprintf ("Inconsistency detected in node %d: ifnot is not the preferred branch") (P.to_int node) in + failwith errstr) + | Ljumptable(_, ln) -> begin (* debug "STOP Ljumptable\n"; *) ([], ln) end + in ([node] @ ln, rem) + end + else ([], []) + in let rec f code = function | [] -> [] | node :: ln -> - let node_dfs = - if not (get_some @@ PTree.get node !visited) then begin - visited := PTree.set node true !visited; - match PTree.get node code with - | None -> failwith "No such node" - | Some bb -> [node] @ match (last_element bb) with - | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _ - | Lbuiltin _ -> assert false - | Ltailcall _ | Lreturn -> [] - | Lbranch n -> dfs_list code [n] - | Lcond (_, _, ifso, ifnot) -> dfs_list code [ifnot; ifso] - | Ljumptable(_, ln) -> dfs_list code ln + let fs, rem_from_node = traverse_fallthrough code node + in [fs] @ ((f code rem_from_node) @ (f code ln)) + in (f code [entry]) + +(** Unused code +module PInt = struct + type t = P.t + let compare x y = compare (P.to_int x) (P.to_int y) +end + +module PSet = Set.Make(PInt) + +module LPInt = struct + type t = P.t list + let rec compare x y = + match x with + | [] -> ( match y with + | [] -> 0 + | _ -> 1 ) + | e :: l -> match y with + | [] -> -1 + | e' :: l' -> + let e_cmp = PInt.compare e e' in + if e_cmp == 0 then compare l l' else e_cmp +end + +module LPSet = Set.Make(LPInt) + +let iter_lpset f s = Seq.iter f (LPSet.to_seq s) + +let first_of = function + | [] -> None + | e :: l -> Some e + +let rec last_of = function + | [] -> None + | e :: l -> (match l with [] -> Some e | e :: l -> last_of l) + +let can_be_merged code s s' = + let last_s = get_some @@ last_of s in + let first_s' = get_some @@ first_of s' in + match get_some @@ PTree.get last_s code with + | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _ + | Lbuiltin _ | Ltailcall _ | Lreturn -> false + | Lbranch n -> n == first_s' + | Lcond (_, _, ifso, ifnot, info) -> (match info with + | None -> false + | Some false -> ifnot == first_s' + | Some true -> failwith "Inconsistency detected - ifnot is not the preferred branch") + | Ljumptable (_, ln) -> + match ln with + | [] -> false + | n :: ln -> n == first_s' + +let merge s s' = Some s + +let try_merge code (fs: (BinNums.positive list) list) = + let seqs = ref (LPSet.of_list fs) in + let oldLength = ref (LPSet.cardinal !seqs) in + let continue = ref true in + let found = ref false in + while !continue do + begin + found := false; + iter_lpset (fun s -> + if !found then () + else iter_lpset (fun s' -> + if (!found || s == s') then () + else if (can_be_merged code s s') then + begin + seqs := LPSet.remove s !seqs; + seqs := LPSet.remove s' !seqs; + seqs := LPSet.add (get_some (merge s s')) !seqs; + found := true; + end + else () + ) !seqs + ) !seqs; + if !oldLength == LPSet.cardinal !seqs then + continue := false + else + oldLength := LPSet.cardinal !seqs + end + done; + !seqs +*) + +(** Code adapted from Duplicateaux.get_loop_headers + * + * Getting loop branches with a DFS visit : + * Each node is either Unvisited, Visited, or Processed + * pre-order: node becomes Processed + * post-order: node becomes Visited + * + * If we come accross an edge to a Processed node, it's a loop! + *) +type pos = BinNums.positive + +module PP = struct + type t = pos * pos + let compare a b = + let ax, ay = a in + let bx, by = b in + let dx = compare ax bx in + if (dx == 0) then compare ay by + else dx +end + +module PPMap = Map.Make(PP) + +type vstate = Unvisited | Processed | Visited + +let get_loop_edges code entry = + let visited = ref (PTree.map (fun n i -> Unvisited) code) in + let is_loop_edge = ref PPMap.empty + in let rec dfs_visit code from = function + | [] -> () + | node :: ln -> + match (get_some @@ PTree.get node !visited) with + | Visited -> () + | Processed -> begin + let from_node = get_some from in + is_loop_edge := PPMap.add (from_node, node) true !is_loop_edge; + visited := PTree.set node Visited !visited + end + | Unvisited -> begin + visited := PTree.set node Processed !visited; + let bb = get_some @@ PTree.get node code in + let next_visits = (match (last_element bb) with + | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _ + | Lbuiltin _ -> assert false + | Ltailcall _ | Lreturn -> [] + | Lbranch n -> [n] + | Lcond (_, _, ifso, ifnot, _) -> [ifso; ifnot] + | Ljumptable(_, ln) -> ln + ) in dfs_visit code (Some node) next_visits; + visited := PTree.set node Visited !visited; + dfs_visit code from ln + end + in begin + dfs_visit code None [entry]; + !is_loop_edge + end + +let ppmap_is_true pp ppmap = PPMap.mem pp ppmap && PPMap.find pp ppmap + +module Int = struct + type t = int + let compare x y = compare x y +end + +module ISet = Set.Make(Int) + +let print_iset s = begin + if !debug_flag then begin + Printf.printf "{"; + ISet.iter (fun e -> Printf.printf "%d, " e) s; + Printf.printf "}" + end +end + +let print_depmap dm = begin + if !debug_flag then begin + Printf.printf "[|"; + Array.iter (fun s -> print_iset s; Printf.printf ", ") dm; + Printf.printf "|]\n" + end +end + +let construct_depmap code entry fs = + let is_loop_edge = get_loop_edges code entry in + let visited = ref (PTree.map (fun n i -> false) code) in + let depmap = Array.map (fun e -> ISet.empty) fs in + let find_index_of_node n = + let index = ref 0 in + begin + Array.iteri (fun i s -> + match List.find_opt (fun e -> e == n) s with + | Some _ -> index := i + | None -> () + ) fs; + !index + end + in let check_and_update_depmap from target = + (* debug "From %d to %d\n" (P.to_int from) (P.to_int target); *) + if not (ppmap_is_true (from, target) is_loop_edge) then + let in_index_fs = find_index_of_node from in + let out_index_fs = find_index_of_node target in + if out_index_fs != in_index_fs then + depmap.(out_index_fs) <- ISet.add in_index_fs depmap.(out_index_fs) + else () + else () + in let rec dfs_visit code = function + | [] -> () + | node :: ln -> + begin + match (get_some @@ PTree.get node !visited) with + | true -> () + | false -> begin + visited := PTree.set node true !visited; + let bb = get_some @@ PTree.get node code in + let next_visits = + match (last_element bb) with + | Ltailcall _ | Lreturn -> [] + | Lbranch n -> (check_and_update_depmap node n; [n]) + | Lcond (_, _, ifso, ifnot, _) -> begin + check_and_update_depmap node ifso; + check_and_update_depmap node ifnot; + [ifso; ifnot] + end + | Ljumptable(_, ln) -> begin + List.iter (fun n -> check_and_update_depmap node n) ln; + ln + end + (* end of bblocks should not be another value than one of the above *) + | _ -> failwith "last_element gave an invalid output" + in dfs_visit code next_visits + end; + dfs_visit code ln + end + in begin + dfs_visit code [entry]; + depmap + end + +let print_sequence s = + if !debug_flag then begin + Printf.printf "["; + List.iter (fun n -> Printf.printf "%d, " (P.to_int n)) s; + Printf.printf "]\n" + end + +let print_ssequence ofs = + if !debug_flag then begin + Printf.printf "["; + List.iter (fun s -> print_sequence s) ofs; + Printf.printf "]\n" + end + +let order_sequences code entry fs = + let fs_a = Array.of_list fs in + let depmap = construct_depmap code entry fs_a in + let fs_evaluated = Array.map (fun e -> false) fs_a in + let ordered_fs = ref [] in + let evaluate s_id = + begin + assert (not fs_evaluated.(s_id)); + ordered_fs := fs_a.(s_id) :: !ordered_fs; + fs_evaluated.(s_id) <- true; + (* debug "++++++\n"; + debug "Scheduling %d\n" s_id; + debug "Initial depmap: "; print_depmap depmap; *) + Array.iteri (fun i deps -> + depmap.(i) <- ISet.remove s_id deps + ) depmap; + (* debug "Final depmap: "; print_depmap depmap; *) + end + in let choose_best_of candidates = + let current_best_id = ref None in + let current_best_score = ref None in + begin + List.iter (fun id -> + match !current_best_id with + | None -> begin + current_best_id := Some id; + match fs_a.(id) with + | [] -> current_best_score := None + | n::l -> current_best_score := Some (P.to_int n) + end + | Some b -> begin + match fs_a.(id) with + | [] -> () + | n::l -> let nscore = P.to_int n in + match !current_best_score with + | None -> (current_best_id := Some id; current_best_score := Some nscore) + | Some bs -> if nscore > bs then (current_best_id := Some id; current_best_score := Some nscore) end - else [] - in node_dfs @ (dfs_list code ln) - in dfs_list code [entrypoint] + ) candidates; + !current_best_id + end + in let select_next () = + let candidates = ref [] in + begin + Array.iteri (fun i deps -> + begin + (* debug "Deps of %d: " i; print_iset deps; debug "\n"; *) + (* FIXME - if we keep it that way (no dependency check), remove all the unneeded stuff *) + if ((* deps == ISet.empty && *) not fs_evaluated.(i)) then + candidates := i :: !candidates + end + ) depmap; + if not (List.length !candidates > 0) then begin + Array.iteri (fun i deps -> + if (not fs_evaluated.(i)) then candidates := i :: !candidates + ) depmap; + end; + get_some (choose_best_of !candidates) + end + in begin + debug "-------------------------------\n"; + debug "depmap: "; print_depmap depmap; + debug "forward sequences identified: "; print_ssequence fs; + while List.length !ordered_fs != List.length fs do + let next_id = select_next () in + evaluate next_id + done; + debug "forward sequences ordered: "; print_ssequence (List.rev (!ordered_fs)); + List.rev (!ordered_fs) + end -let enumerate_aux_trace f reach = dfs f.fn_code f.fn_entrypoint +let enumerate_aux_trace f reach = + let code = f.fn_code in + let entry = f.fn_entrypoint in + let fs = forward_sequences code entry in + let ofs = order_sequences code entry fs in + List.flatten ofs let enumerate_aux f reach = if !Clflags.option_ftracelinearize then enumerate_aux_trace f reach diff --git a/backend/Liveness.v b/backend/Liveness.v index afe11ae6..9652b363 100644 --- a/backend/Liveness.v +++ b/backend/Liveness.v @@ -94,7 +94,7 @@ Definition transfer | Ibuiltin ef args res s => reg_list_live (params_of_builtin_args args) (reg_list_dead (params_of_builtin_res res) after) - | Icond cond args ifso ifnot => + | Icond cond args ifso ifnot _ => reg_list_live args after | Ijumptable arg tbl => reg_live arg after diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml index b309a9f2..d8f2ac12 100644 --- a/backend/PrintLTL.ml +++ b/backend/PrintLTL.ml @@ -83,10 +83,11 @@ let print_instruction pp succ = function (print_builtin_args loc) args | Lbranch s -> print_succ pp s succ - | Lcond(cond, args, s1, s2) -> - fprintf pp "if (%a) goto %d else goto %d" + | Lcond(cond, args, s1, s2, info) -> + fprintf pp "if (%a) goto %d else goto %d (prediction: %s)" (print_condition mreg) (cond, args) (P.to_int s1) (P.to_int s2) + (match info with None -> "none" | Some true -> "branch" | Some false -> "fallthrough") | Ljumptable(arg, tbl) -> let tbl = Array.of_list tbl in fprintf pp "jumptable (%a)" mreg arg; diff --git a/backend/PrintRTL.ml b/backend/PrintRTL.ml index c25773e5..b2ef05ca 100644 --- a/backend/PrintRTL.ml +++ b/backend/PrintRTL.ml @@ -75,10 +75,11 @@ let print_instruction pp (pc, i) = (name_of_external ef) (print_builtin_args reg) args; print_succ pp s (pc - 1) - | Icond(cond, args, s1, s2) -> - fprintf pp "if (%a) goto %d else goto %d\n" + | Icond(cond, args, s1, s2, info) -> + fprintf pp "if (%a) goto %d else goto %d (prediction: %s)\n" (PrintOp.print_condition reg) (cond, args) (P.to_int s1) (P.to_int s2) + (match info with None -> "none" | Some true -> "branch" | Some false -> "fallthrough") | Ijumptable(arg, tbl) -> let tbl = Array.of_list tbl in fprintf pp "jumptable (%a)\n" reg arg; diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml index 1c7655fb..d1b79623 100644 --- a/backend/PrintXTL.ml +++ b/backend/PrintXTL.ml @@ -104,7 +104,7 @@ let print_instruction pp succ = function (print_builtin_args var) args | Xbranch s -> print_succ pp s succ - | Xcond(cond, args, s1, s2) -> + | Xcond(cond, args, s1, s2, _) -> fprintf pp "if (%a) goto %d else goto %d" (print_condition var) (cond, args) (P.to_int s1) (P.to_int s2) diff --git a/backend/RTL.v b/backend/RTL.v index 29a49311..dec59ca2 100644 --- a/backend/RTL.v +++ b/backend/RTL.v @@ -67,11 +67,12 @@ Inductive instruction: Type := (** [Ibuiltin ef args dest succ] calls the built-in function identified by [ef], giving it the values of [args] as arguments. It stores the return value in [dest] and branches to [succ]. *) - | Icond: condition -> list reg -> node -> node -> instruction - (** [Icond cond args ifso ifnot] evaluates the boolean condition + | Icond: condition -> list reg -> node -> node -> option bool -> instruction + (** [Icond cond args ifso ifnot info] evaluates the boolean condition [cond] over the values of registers [args]. If the condition is true, it transitions to [ifso]. If the condition is false, - it transitions to [ifnot]. *) + it transitions to [ifnot]. [info] is a ghost field there to provide + information relative to branch prediction. *) | Ijumptable: reg -> list node -> instruction (** [Ijumptable arg tbl] transitions to the node that is the [n]-th element of the list [tbl], where [n] is the unsigned integer @@ -262,8 +263,8 @@ Inductive step: state -> trace -> state -> Prop := step (State s f sp pc rs m) t (State s f sp pc' (regmap_setres res vres rs) m') | exec_Icond: - forall s f sp pc rs m cond args ifso ifnot b pc', - (fn_code f)!pc = Some(Icond cond args ifso ifnot) -> + forall s f sp pc rs m cond args ifso ifnot b pc' predb, + (fn_code f)!pc = Some(Icond cond args ifso ifnot predb) -> eval_condition cond rs##args m = Some b -> pc' = (if b then ifso else ifnot) -> step (State s f sp pc rs m) @@ -403,7 +404,7 @@ Definition successors_instr (i: instruction) : list node := | Icall sig ros args res s => s :: nil | Itailcall sig ros args => nil | Ibuiltin ef args res s => s :: nil - | Icond cond args ifso ifnot => ifso :: ifnot :: nil + | Icond cond args ifso ifnot _ => ifso :: ifnot :: nil | Ijumptable arg tbl => tbl | Ireturn optarg => nil end. @@ -424,7 +425,7 @@ Definition instr_uses (i: instruction) : list reg := | Itailcall sig (inl r) args => r :: args | Itailcall sig (inr id) args => args | Ibuiltin ef args res s => params_of_builtin_args args - | Icond cond args ifso ifnot => args + | Icond cond args ifso ifnot _ => args | Ijumptable arg tbl => arg :: nil | Ireturn None => nil | Ireturn (Some arg) => arg :: nil @@ -442,7 +443,7 @@ Definition instr_defs (i: instruction) : option reg := | Itailcall sig ros args => None | Ibuiltin ef args res s => match res with BR r => Some r | _ => None end - | Icond cond args ifso ifnot => None + | Icond cond args ifso ifnot _ => None | Ijumptable arg tbl => None | Ireturn optarg => None end. @@ -485,7 +486,7 @@ Definition max_reg_instr (m: positive) (pc: node) (i: instruction) := | Ibuiltin ef args res s => fold_left Pos.max (params_of_builtin_args args) (fold_left Pos.max (params_of_builtin_res res) m) - | Icond cond args ifso ifnot => fold_left Pos.max args m + | Icond cond args ifso ifnot _ => fold_left Pos.max args m | Ijumptable arg tbl => Pos.max arg m | Ireturn None => m | Ireturn (Some arg) => Pos.max arg m diff --git a/backend/RTLgen.v b/backend/RTLgen.v index 2c27944a..ac98f3a1 100644 --- a/backend/RTLgen.v +++ b/backend/RTLgen.v @@ -479,7 +479,7 @@ with transl_condexpr (map: mapping) (a: condexpr) (ntrue nfalse: node) match a with | CEcond c al => do rl <- alloc_regs map al; - do nt <- add_instr (Icond c rl ntrue nfalse); + do nt <- add_instr (Icond c rl ntrue nfalse None); transl_exprlist map al rl nt | CEcondition a b c => do nc <- transl_condexpr map c ntrue nfalse; diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v index 92b48e2b..30ad7d82 100644 --- a/backend/RTLgenspec.v +++ b/backend/RTLgenspec.v @@ -744,9 +744,9 @@ Inductive tr_expr (c: code): with tr_condition (c: code): mapping -> list reg -> condexpr -> node -> node -> node -> Prop := - | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl, + | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl i, tr_exprlist c map pr bl ns n1 rl -> - c!n1 = Some (Icond cond rl ntrue nfalse) -> + c!n1 = Some (Icond cond rl ntrue nfalse i) -> tr_condition c map pr (CEcond cond bl) ns ntrue nfalse | tr_CEcondition: forall map pr a1 a2 a3 ns ntrue nfalse n2 n3, tr_condition c map pr a1 ns n2 n3 -> diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v index 857f2211..15ed6d8a 100644 --- a/backend/RTLtyping.v +++ b/backend/RTLtyping.v @@ -139,11 +139,11 @@ Inductive wt_instr : instruction -> Prop := valid_successor s -> wt_instr (Ibuiltin ef args res s) | wt_Icond: - forall cond args s1 s2, + forall cond args s1 s2 i, map env args = type_of_condition cond -> valid_successor s1 -> valid_successor s2 -> - wt_instr (Icond cond args s1 s2) + wt_instr (Icond cond args s1 s2 i) | wt_Ijumptable: forall arg tbl, env arg = Tint -> @@ -313,7 +313,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv := | _ => type_builtin_args e args sig.(sig_args) end; type_builtin_res e1 res (proj_sig_res sig) - | Icond cond args s1 s2 => + | Icond cond args s1 s2 _ => do x1 <- check_successor s1; do x2 <- check_successor s2; S.set_list e args (type_of_condition cond) diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml index f2658b04..ffe26933 100644 --- a/backend/Regalloc.ml +++ b/backend/Regalloc.ml @@ -295,8 +295,8 @@ let block_of_RTL_instr funsig tyenv = function (Xbuiltin(ef, args2, res2) :: movelist (params_of_builtin_res res2) (params_of_builtin_res res1) [Xbranch s]) - | RTL.Icond(cond, args, s1, s2) -> - [Xcond(cond, vregs tyenv args, s1, s2)] + | RTL.Icond(cond, args, s1, s2, i) -> + [Xcond(cond, vregs tyenv args, s1, s2, i)] | RTL.Ijumptable(arg, tbl) -> [Xjumptable(vreg tyenv arg, tbl)] | RTL.Ireturn None -> @@ -380,7 +380,7 @@ let live_before instr after = vset_addargs args (vset_removeres res after) | Xbranch s -> after - | Xcond(cond, args, s1, s2) -> + | Xcond(cond, args, s1, s2, _) -> List.fold_right VSet.add args after | Xjumptable(arg, tbl) -> VSet.add arg after @@ -575,7 +575,7 @@ let spill_costs f = charge_list 10 1 (params_of_builtin_res res) end | Xbranch _ -> () - | Xcond(cond, args, _, _) -> + | Xcond(cond, args, _, _, _) -> charge_list 10 1 args | Xjumptable(arg, _) -> charge 10 1 arg @@ -718,7 +718,7 @@ let add_interfs_instr g instr live = end | Xbranch s -> () - | Xcond(cond, args, s1, s2) -> + | Xcond(cond, args, s1, s2, _) -> add_interfs_destroyed g live (destroyed_by_cond cond) | Xjumptable(arg, tbl) -> add_interfs_destroyed g live destroyed_by_jumptable @@ -797,7 +797,7 @@ let tospill_instr alloc instr ts = (addlist_tospill alloc (params_of_builtin_res res) ts) | Xbranch s -> ts - | Xcond(cond, args, s1, s2) -> + | Xcond(cond, args, s1, s2, _) -> addlist_tospill alloc args ts | Xjumptable(arg, tbl) -> add_tospill alloc arg ts @@ -990,9 +990,9 @@ let spill_instr tospill eqs instr = (c1 @ Xbuiltin(ef, args', res') :: c2, eqs2) | Xbranch s -> ([instr], eqs) - | Xcond(cond, args, s1, s2) -> + | Xcond(cond, args, s1, s2, i) -> let (args', c1, eqs1) = reload_vars tospill eqs args in - (c1 @ [Xcond(cond, args', s1, s2)], eqs1) + (c1 @ [Xcond(cond, args', s1, s2, i)], eqs1) | Xjumptable(arg, tbl) -> let (arg', c1, eqs1) = reload_var tospill eqs arg in (c1 @ [Xjumptable(arg', tbl)], eqs1) @@ -1128,8 +1128,8 @@ let transl_instr alloc instr k = AST.map_builtin_res (mreg_of alloc) res) :: k | Xbranch s -> LTL.Lbranch s :: [] - | Xcond(cond, args, s1, s2) -> - LTL.Lcond(cond, mregs_of alloc args, s1, s2) :: [] + | Xcond(cond, args, s1, s2, i) -> + LTL.Lcond(cond, mregs_of alloc args, s1, s2, i) :: [] | Xjumptable(arg, tbl) -> LTL.Ljumptable(mreg_of alloc arg, tbl) :: [] | Xreturn optarg -> diff --git a/backend/Renumber.v b/backend/Renumber.v index 7ba16658..2727b979 100644 --- a/backend/Renumber.v +++ b/backend/Renumber.v @@ -48,7 +48,7 @@ Definition renum_instr (i: instruction) : instruction := | Icall sg ros args res s => Icall sg ros args res (renum_pc s) | Itailcall sg ros args => i | Ibuiltin ef args res s => Ibuiltin ef args res (renum_pc s) - | Icond cond args s1 s2 => Icond cond args (renum_pc s1) (renum_pc s2) + | Icond cond args s1 s2 info => Icond cond args (renum_pc s1) (renum_pc s2) info | Ijumptable arg tbl => Ijumptable arg (List.map renum_pc tbl) | Ireturn or => i end. diff --git a/backend/Splitting.ml b/backend/Splitting.ml index 78eb66a5..3ca45c3b 100644 --- a/backend/Splitting.ml +++ b/backend/Splitting.ml @@ -162,8 +162,8 @@ let ren_instr f maps pc i = | Ibuiltin(ef, args, res, s) -> Ibuiltin(ef, List.map (AST.map_builtin_arg (ren_reg before)) args, AST.map_builtin_res (ren_reg after) res, s) - | Icond(cond, args, s1, s2) -> - Icond(cond, ren_regs before args, s1, s2) + | Icond(cond, args, s1, s2, i) -> + Icond(cond, ren_regs before args, s1, s2, i) | Ijumptable(arg, tbl) -> Ijumptable(ren_reg before arg, tbl) | Ireturn optarg -> diff --git a/backend/Tunneling.v b/backend/Tunneling.v index da1ce45a..a4c4a195 100644 --- a/backend/Tunneling.v +++ b/backend/Tunneling.v @@ -78,11 +78,11 @@ Definition record_gotos (f: LTL.function) : U.t := Definition tunnel_instr (uf: U.t) (i: instruction) : instruction := match i with | Lbranch s => Lbranch (U.repr uf s) - | Lcond cond args s1 s2 => + | Lcond cond args s1 s2 info => let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in if peq s1' s2' then Lbranch s1' - else Lcond cond args s1' s2' + else Lcond cond args s1' s2' info | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl) | _ => i end. diff --git a/backend/Unusedglob.v b/backend/Unusedglob.v index 1b5f2547..93ca7af4 100644 --- a/backend/Unusedglob.v +++ b/backend/Unusedglob.v @@ -53,7 +53,7 @@ Definition ref_instruction (i: instruction) : list ident := | Itailcall _ (inl r) _ => nil | Itailcall _ (inr id) _ => id :: nil | Ibuiltin _ args _ _ => globals_of_builtin_args args - | Icond cond _ _ _ => nil + | Icond cond _ _ _ _ => nil | Ijumptable _ _ => nil | Ireturn _ => nil end. diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v index 9a33768c..2e79d1a9 100644 --- a/backend/ValueAnalysis.v +++ b/backend/ValueAnalysis.v @@ -156,7 +156,7 @@ Definition transfer (f: function) (rm: romem) (pc: node) (ae: aenv) (am: amem) : VA.Bot | Some(Ibuiltin ef args res s) => transfer_builtin ae am rm ef args res - | Some(Icond cond args s1 s2) => + | Some(Icond cond args s1 s2 _) => VA.State ae am | Some(Ijumptable arg tbl) => VA.State ae am @@ -1044,9 +1044,8 @@ Proof. red; simpl; intros. destruct (plt b (Mem.nextblock m)). exploit RO; eauto. intros (R & P & Q). split; auto. - split. apply bmatch_incr with bc; auto. apply bmatch_inv with m; auto. - intros. eapply Mem.loadbytes_unchanged_on_1. eapply external_call_readonly; eauto. - auto. intros; red. apply Q. + split. apply bmatch_incr with bc; auto. apply bmatch_ext with m; auto. + intros. eapply external_call_readonly with (m2 := m'); eauto. intros; red; intros; elim (Q ofs). eapply external_call_max_perm with (m2 := m'); eauto. destruct (j' b); congruence. diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v index c132ce7c..779e7bb9 100644 --- a/backend/ValueDomain.v +++ b/backend/ValueDomain.v @@ -3502,11 +3502,6 @@ Proof. - omegaContradiction. Qed. -Lemma max_size_chunk: forall chunk, size_chunk chunk <= 8. -Proof. - destruct chunk; simpl; omega. -Qed. - Remark inval_before_contents: forall i c chunk' av' j, (inval_before i (i - 7) c)##j = Some (ACval chunk' av') -> diff --git a/backend/XTL.ml b/backend/XTL.ml index c496fafb..1d8e89c0 100644 --- a/backend/XTL.ml +++ b/backend/XTL.ml @@ -36,7 +36,7 @@ type instruction = | Xtailcall of signature * (var, ident) sum * var list | Xbuiltin of external_function * var builtin_arg list * var builtin_res | Xbranch of node - | Xcond of condition * var list * node * node + | Xcond of condition * var list * node * node * bool option | Xjumptable of var * node list | Xreturn of var list @@ -105,7 +105,7 @@ let twin_reg r = let rec successors_block = function | Xbranch s :: _ -> [s] | Xtailcall(sg, vos, args) :: _ -> [] - | Xcond(cond, args, s1, s2) :: _ -> [s1; s2] + | Xcond(cond, args, s1, s2, _) :: _ -> [s1; s2] | Xjumptable(arg, tbl) :: _ -> tbl | Xreturn _:: _ -> [] | instr :: blk -> successors_block blk @@ -179,7 +179,7 @@ let type_instr = function type_builtin_res res (proj_sig_res sg) | Xbranch s -> () - | Xcond(cond, args, s1, s2) -> + | Xcond(cond, args, s1, s2, _) -> set_vars_type args (type_of_condition cond) | Xjumptable(arg, tbl) -> set_var_type arg Tint diff --git a/backend/XTL.mli b/backend/XTL.mli index b4b77fab..7b7f7186 100644 --- a/backend/XTL.mli +++ b/backend/XTL.mli @@ -37,7 +37,7 @@ type instruction = | Xtailcall of signature * (var, ident) sum * var list | Xbuiltin of external_function * var builtin_arg list * var builtin_res | Xbranch of node - | Xcond of condition * var list * node * node + | Xcond of condition * var list * node * node * bool option | Xjumptable of var * node list | Xreturn of var list diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml index f2a68156..f637b5e4 100644 --- a/cfrontend/C2C.ml +++ b/cfrontend/C2C.ml @@ -1272,7 +1272,7 @@ let convertFundef loc env fd = { a_storage = fd.fd_storage; a_alignment = None; a_size = None; - a_sections = Sections.for_function env id' fd.fd_attrib; + a_sections = Sections.for_function env loc id' fd.fd_attrib; a_access = Sections.Access_default; a_inline = inline; a_loc = loc }; @@ -1358,7 +1358,7 @@ let convertGlobvar loc env (sto, id, ty, optinit) = | Some i -> convertInitializer env ty i in let (section, access) = - Sections.for_variable env id' ty (optinit <> None) + Sections.for_variable env loc id' ty (optinit <> None) (match sto with | Storage_thread_local | Storage_thread_local_extern | Storage_thread_local_static -> true diff --git a/cfrontend/Cexec.v b/cfrontend/Cexec.v index 2942080b..b08c3ad7 100644 --- a/cfrontend/Cexec.v +++ b/cfrontend/Cexec.v @@ -460,6 +460,14 @@ Definition do_ef_free check (zlt 0 (Ptrofs.unsigned sz)); do m' <- Mem.free m b (Ptrofs.unsigned lo - size_chunk Mptr) (Ptrofs.unsigned lo + Ptrofs.unsigned sz); Some(w, E0, Vundef, m') + | Vint n :: nil => + if Int.eq_dec n Int.zero && negb Archi.ptr64 + then Some(w, E0, Vundef, m) + else None + | Vlong n :: nil => + if Int64.eq_dec n Int64.zero && Archi.ptr64 + then Some(w, E0, Vundef, m) + else None | _ => None end. @@ -544,45 +552,51 @@ Proof with try congruence. - eapply do_external_function_sound; eauto. } destruct ef; simpl. -(* EF_external *) +- (* EF_external *) eapply do_external_function_sound; eauto. -(* EF_builtin *) +- (* EF_builtin *) eapply BF_EX; eauto. -(* EF_runtime *) +- (* EF_runtime *) eapply BF_EX; eauto. -(* EF_vload *) +- (* EF_vload *) unfold do_ef_volatile_load. destruct vargs... destruct v... destruct vargs... mydestr. destruct p as [[w'' t''] v]; mydestr. exploit do_volatile_load_sound; eauto. intuition. econstructor; eauto. - auto. -(* EF_vstore *) +- (* EF_vstore *) unfold do_ef_volatile_store. destruct vargs... destruct v... destruct vargs... destruct vargs... mydestr. destruct p as [[w'' t''] m'']. mydestr. exploit do_volatile_store_sound; eauto. intuition. econstructor; eauto. - auto. -(* EF_malloc *) +- (* EF_malloc *) unfold do_ef_malloc. destruct vargs... destruct vargs... mydestr. destruct (Mem.alloc m (- size_chunk Mptr) (Ptrofs.unsigned i)) as [m1 b] eqn:?. mydestr. split. apply SIZE in Heqo. subst v. econstructor; eauto. constructor. -(* EF_free *) - unfold do_ef_free. destruct vargs... destruct v... destruct vargs... - mydestr. split. apply SIZE in Heqo0. econstructor; eauto. congruence. omega. constructor. -(* EF_memcpy *) +- (* EF_free *) + unfold do_ef_free. destruct vargs... destruct v... ++ destruct vargs... mydestr; InvBooleans; subst i. + replace (Vint Int.zero) with Vnullptr. split; constructor. + apply negb_true_iff in H0. unfold Vnullptr; rewrite H0; auto. ++ destruct vargs... mydestr; InvBooleans; subst i. + replace (Vlong Int64.zero) with Vnullptr. split; constructor. + unfold Vnullptr; rewrite H0; auto. ++ destruct vargs... mydestr. + split. apply SIZE in Heqo0. econstructor; eauto. congruence. omega. + constructor. +- (* EF_memcpy *) unfold do_ef_memcpy. destruct vargs... destruct v... destruct vargs... destruct v... destruct vargs... mydestr. apply Decidable_sound in Heqb1. red in Heqb1. split. econstructor; eauto; tauto. constructor. -(* EF_annot *) +- (* EF_annot *) unfold do_ef_annot. mydestr. split. constructor. apply list_eventval_of_val_sound; auto. econstructor. constructor; eauto. constructor. -(* EF_annot_val *) +- (* EF_annot_val *) unfold do_ef_annot_val. destruct vargs... destruct vargs... mydestr. split. constructor. apply eventval_of_val_sound; auto. econstructor. constructor; eauto. constructor. -(* EF_inline_asm *) +- (* EF_inline_asm *) eapply do_inline_assembly_sound; eauto. -(* EF_debug *) +- (* EF_debug *) unfold do_ef_debug. mydestr. split; constructor. Qed. @@ -605,37 +619,38 @@ Proof. - eapply do_external_function_complete; eauto. } destruct ef; simpl in *. -(* EF_external *) +- (* EF_external *) eapply do_external_function_complete; eauto. -(* EF_builtin *) +- (* EF_builtin *) eapply BF_EX; eauto. -(* EF_runtime *) +- (* EF_runtime *) eapply BF_EX; eauto. -(* EF_vload *) +- (* EF_vload *) inv H; unfold do_ef_volatile_load. exploit do_volatile_load_complete; eauto. intros EQ; rewrite EQ; auto. -(* EF_vstore *) +- (* EF_vstore *) inv H; unfold do_ef_volatile_store. exploit do_volatile_store_complete; eauto. intros EQ; rewrite EQ; auto. -(* EF_malloc *) +- (* EF_malloc *) inv H; unfold do_ef_malloc. inv H0. erewrite SIZE by eauto. rewrite H1, H2. auto. -(* EF_free *) +- (* EF_free *) inv H; unfold do_ef_free. - inv H0. rewrite H1. erewrite SIZE by eauto. rewrite zlt_true. rewrite H3. auto. omega. -(* EF_memcpy *) ++ inv H0. rewrite H1. erewrite SIZE by eauto. rewrite zlt_true. rewrite H3. auto. omega. ++ inv H0. unfold Vnullptr; destruct Archi.ptr64; auto. +- (* EF_memcpy *) inv H; unfold do_ef_memcpy. inv H0. rewrite Decidable_complete. rewrite H7; rewrite H8; auto. red. tauto. -(* EF_annot *) +- (* EF_annot *) inv H; unfold do_ef_annot. inv H0. inv H6. inv H4. rewrite (list_eventval_of_val_complete _ _ _ H1). auto. -(* EF_annot_val *) +- (* EF_annot_val *) inv H; unfold do_ef_annot_val. inv H0. inv H6. inv H4. rewrite (eventval_of_val_complete _ _ _ H1). auto. -(* EF_inline_asm *) +- (* EF_inline_asm *) eapply do_inline_assembly_complete; eauto. -(* EF_debug *) +- (* EF_debug *) inv H. inv H0. reflexivity. Qed. diff --git a/common/Events.v b/common/Events.v index 10e0c232..28bb992a 100644 --- a/common/Events.v +++ b/common/Events.v @@ -649,9 +649,12 @@ Record extcall_properties (sem: extcall_sem) (sg: signature) : Prop := (** External call cannot modify memory unless they have [Max, Writable] permissions. *) ec_readonly: - forall ge vargs m1 t vres m2, + forall ge vargs m1 t vres m2 b ofs n bytes, sem ge vargs m1 t vres m2 -> - Mem.unchanged_on (loc_not_writable m1) m1 m2; + Mem.valid_block m1 b -> + Mem.loadbytes m2 b ofs n = Some bytes -> + (forall i, ofs <= i < ofs + n -> ~Mem.perm m1 b i Max Writable) -> + Mem.loadbytes m1 b ofs n = Some bytes; (** External calls must commute with memory extensions, in the following sense. *) @@ -784,7 +787,7 @@ Proof. (* max perms *) - inv H; auto. (* readonly *) -- inv H. apply Mem.unchanged_on_refl. +- inv H; auto. (* mem extends *) - inv H. inv H1. inv H6. inv H4. exploit volatile_load_extends; eauto. intros [v' [A B]]. @@ -833,14 +836,27 @@ Proof. rewrite C; auto. Qed. +Lemma unchanged_on_readonly: + forall m1 m2 b ofs n bytes, + Mem.unchanged_on (loc_not_writable m1) m1 m2 -> + Mem.valid_block m1 b -> + Mem.loadbytes m2 b ofs n = Some bytes -> + (forall i, ofs <= i < ofs + n -> ~Mem.perm m1 b i Max Writable) -> + Mem.loadbytes m1 b ofs n = Some bytes. +Proof. + intros. + rewrite <- H1. symmetry. + apply Mem.loadbytes_unchanged_on_1 with (P := loc_not_writable m1); auto. +Qed. + Lemma volatile_store_readonly: forall ge chunk1 m1 b1 ofs1 v t m2, volatile_store ge chunk1 m1 b1 ofs1 v t m2 -> Mem.unchanged_on (loc_not_writable m1) m1 m2. Proof. intros. inv H. - apply Mem.unchanged_on_refl. - eapply Mem.store_unchanged_on; eauto. +- apply Mem.unchanged_on_refl. +- eapply Mem.store_unchanged_on; eauto. exploit Mem.store_valid_access_3; eauto. intros [P Q]. intros. unfold loc_not_writable. red; intros. elim H2. apply Mem.perm_cur_max. apply P. auto. @@ -934,7 +950,7 @@ Proof. (* perms *) - inv H. inv H2. auto. eauto with mem. (* readonly *) -- inv H. eapply volatile_store_readonly; eauto. +- inv H. eapply unchanged_on_readonly; eauto. eapply volatile_store_readonly; eauto. (* mem extends*) - inv H. inv H1. inv H6. inv H7. inv H4. exploit volatile_store_extends; eauto. intros [m2' [A [B C]]]. @@ -994,7 +1010,7 @@ Proof. rewrite dec_eq_false. auto. apply Mem.valid_not_valid_diff with m1; eauto with mem. (* readonly *) -- inv H. eapply UNCHANGED; eauto. +- inv H. eapply unchanged_on_readonly; eauto. (* mem extends *) - inv H. inv H1. inv H7. assert (SZ: v2 = Vptrofs sz). @@ -1045,11 +1061,13 @@ Qed. Inductive extcall_free_sem (ge: Senv.t): list val -> mem -> trace -> val -> mem -> Prop := - | extcall_free_sem_intro: forall b lo sz m m', + | extcall_free_sem_ptr: forall b lo sz m m', Mem.load Mptr m b (Ptrofs.unsigned lo - size_chunk Mptr) = Some (Vptrofs sz) -> Ptrofs.unsigned sz > 0 -> Mem.free m b (Ptrofs.unsigned lo - size_chunk Mptr) (Ptrofs.unsigned lo + Ptrofs.unsigned sz) = Some m' -> - extcall_free_sem ge (Vptr b lo :: nil) m E0 Vundef m'. + extcall_free_sem ge (Vptr b lo :: nil) m E0 Vundef m' + | extcall_free_sem_null: forall m, + extcall_free_sem ge (Vnullptr :: nil) m E0 Vundef m. Lemma extcall_free_ok: extcall_properties extcall_free_sem @@ -1057,26 +1075,29 @@ Lemma extcall_free_ok: Proof. constructor; intros. (* well typed *) -- inv H. simpl. auto. +- inv H; simpl; auto. (* symbols preserved *) - inv H0; econstructor; eauto. (* valid block *) -- inv H. eauto with mem. +- inv H; eauto with mem. (* perms *) -- inv H. eapply Mem.perm_free_3; eauto. +- inv H; eauto using Mem.perm_free_3. (* readonly *) -- inv H. eapply Mem.free_unchanged_on; eauto. - intros. red; intros. elim H3. +- eapply unchanged_on_readonly; eauto. inv H. ++ eapply Mem.free_unchanged_on; eauto. + intros. red; intros. elim H6. apply Mem.perm_cur_max. apply Mem.perm_implies with Freeable; auto with mem. eapply Mem.free_range_perm; eauto. ++ apply Mem.unchanged_on_refl. (* mem extends *) -- inv H. inv H1. inv H8. inv H6. +- inv H. ++ inv H1. inv H8. inv H6. exploit Mem.load_extends; eauto. intros [v' [A B]]. assert (v' = Vptrofs sz). { unfold Vptrofs in *; destruct Archi.ptr64; inv B; auto. } subst v'. exploit Mem.free_parallel_extends; eauto. intros [m2' [C D]]. - exists Vundef; exists m2'; intuition. + exists Vundef; exists m2'; intuition auto. econstructor; eauto. eapply Mem.free_unchanged_on; eauto. unfold loc_out_of_bounds; intros. @@ -1084,8 +1105,14 @@ Proof. { apply Mem.perm_cur_max. apply Mem.perm_implies with Freeable; auto with mem. eapply Mem.free_range_perm. eexact H4. eauto. } tauto. ++ inv H1. inv H5. replace v2 with Vnullptr. + exists Vundef; exists m1'; intuition auto. + constructor. + apply Mem.unchanged_on_refl. + unfold Vnullptr in *; destruct Archi.ptr64; inv H3; auto. (* mem inject *) -- inv H0. inv H2. inv H7. inv H9. +- inv H0. ++ inv H2. inv H7. inv H9. exploit Mem.load_inject; eauto. intros [v' [A B]]. assert (v' = Vptrofs sz). { unfold Vptrofs in *; destruct Archi.ptr64; inv B; auto. } @@ -1099,7 +1126,7 @@ Proof. intro EQ. exploit Mem.free_parallel_inject; eauto. intros (m2' & C & D). exists f, Vundef, m2'; split. - apply extcall_free_sem_intro with (sz := sz) (m' := m2'). + apply extcall_free_sem_ptr with (sz := sz) (m' := m2'). rewrite EQ. rewrite <- A. f_equal. omega. auto. auto. rewrite ! EQ. rewrite <- C. f_equal; omega. @@ -1112,14 +1139,19 @@ Proof. apply P. omega. split. auto. red; intros. congruence. ++ inv H2. inv H6. replace v' with Vnullptr. + exists f, Vundef, m1'; intuition auto using Mem.unchanged_on_refl. + constructor. + red; intros; congruence. + unfold Vnullptr in *; destruct Archi.ptr64; inv H4; auto. (* trace length *) - inv H; simpl; omega. (* receptive *) -- assert (t1 = t2). inv H; inv H0; auto. subst t2. +- assert (t1 = t2) by (inv H; inv H0; auto). subst t2. exists vres1; exists m1; auto. (* determ *) -- inv H; inv H0. - assert (EQ1: Vptrofs sz0 = Vptrofs sz) by congruence. +- inv H; inv H0; try (unfold Vnullptr in *; destruct Archi.ptr64; discriminate). ++ assert (EQ1: Vptrofs sz0 = Vptrofs sz) by congruence. assert (EQ2: sz0 = sz). { unfold Vptrofs in EQ1; destruct Archi.ptr64 eqn:SF. rewrite <- (Ptrofs.of_int64_to_int64 SF sz0), <- (Ptrofs.of_int64_to_int64 SF sz). congruence. @@ -1127,6 +1159,7 @@ Proof. } subst sz0. split. constructor. intuition congruence. ++ split. constructor. intuition auto. Qed. (** ** Semantics of [memcpy] operations. *) @@ -1159,8 +1192,9 @@ Proof. - (* perms *) intros. inv H. eapply Mem.perm_storebytes_2; eauto. - (* readonly *) - intros. inv H. eapply Mem.storebytes_unchanged_on; eauto. - intros; red; intros. elim H8. + intros. inv H. eapply unchanged_on_readonly; eauto. + eapply Mem.storebytes_unchanged_on; eauto. + intros; red; intros. elim H11. apply Mem.perm_cur_max. eapply Mem.storebytes_range_perm; eauto. - (* extensions *) intros. inv H. @@ -1271,7 +1305,7 @@ Proof. (* perms *) - inv H; auto. (* readonly *) -- inv H. apply Mem.unchanged_on_refl. +- inv H; auto. (* mem extends *) - inv H. exists Vundef; exists m1'; intuition. @@ -1316,7 +1350,7 @@ Proof. (* perms *) - inv H; auto. (* readonly *) -- inv H. apply Mem.unchanged_on_refl. +- inv H; auto. (* mem extends *) - inv H. inv H1. inv H6. exists v2; exists m1'; intuition. @@ -1359,7 +1393,7 @@ Proof. (* perms *) - inv H; auto. (* readonly *) -- inv H. apply Mem.unchanged_on_refl. +- inv H; auto. (* mem extends *) - inv H. exists Vundef; exists m1'; intuition. @@ -1406,7 +1440,7 @@ Proof. (* perms *) - inv H; auto. (* readonly *) -- inv H. apply Mem.unchanged_on_refl. +- inv H; auto. (* mem extends *) - inv H. fold bsem in H2. apply val_inject_list_lessdef in H1. specialize (bs_inject _ bsem _ _ _ H1). diff --git a/common/Memdata.v b/common/Memdata.v index f3016efe..a09b90f5 100644 --- a/common/Memdata.v +++ b/common/Memdata.v @@ -44,6 +44,13 @@ Definition size_chunk (chunk: memory_chunk) : Z := | Many64 => 8 end. +Definition largest_size_chunk := 8. + +Lemma max_size_chunk: forall chunk, size_chunk chunk <= 8. +Proof. + destruct chunk; simpl; omega. +Qed. + Lemma size_chunk_pos: forall chunk, size_chunk chunk > 0. Proof. diff --git a/common/Memory.v b/common/Memory.v index f21d99af..cd8a2001 100644 --- a/common/Memory.v +++ b/common/Memory.v @@ -1307,6 +1307,23 @@ Proof. } Qed. +Section STOREV. +Variable chunk: memory_chunk. +Variable m1: mem. +Variables addr v: val. +Variable m2: mem. +Hypothesis STORE: storev chunk m1 addr v = Some m2. + + +Theorem loadv_storev_same: + loadv chunk m2 addr = Some (Val.load_result chunk v). +Proof. + destruct addr; simpl in *; try discriminate. + eapply load_store_same. + eassumption. +Qed. +End STOREV. + Lemma load_store_overlap: forall chunk m1 b ofs v m2 chunk' ofs' v', store chunk m1 b ofs v = Some m2 -> diff --git a/common/Sections.ml b/common/Sections.ml index 9555c203..ea0b6dbc 100644 --- a/common/Sections.ml +++ b/common/Sections.ml @@ -166,9 +166,22 @@ let gcc_section name readonly exec = sec_writable = not readonly; sec_executable = exec; sec_access = Access_default } +(* Check and extract whether a section was given as attribute *) + +let get_attr_section loc attr = + match Cutil.find_custom_attributes ["section"; "__section__"] attr with + | [] -> None + | [[C.AString name]] -> Some name + | [[_]] -> + Diagnostics.error loc "'section' attribute requires a string"; + None + | _ -> + Diagnostics.error loc "ambiguous 'section' attribute"; + None + (* Determine section for a variable definition *) -let for_variable env id ty init thrl = +let for_variable env loc id ty init thrl = let attr = Cutil.attributes_of_type env ty in let readonly = List.mem C.AConst attr && not(List.mem C.AVolatile attr) in let si = @@ -176,11 +189,11 @@ let for_variable env id ty init thrl = (* 1- Section explicitly associated with #use_section *) Hashtbl.find use_section_table id with Not_found -> - match Cutil.find_custom_attributes ["section"; "__section__"] attr with - | [[C.AString name]] -> + match get_attr_section loc attr with + | Some name -> (* 2- Section given as an attribute, gcc-style *) gcc_section name readonly false - | _ -> + | None -> (* 3- Default section appropriate for size and const-ness *) let size = match Cutil.sizeof env ty with Some sz -> sz | None -> max_int in @@ -197,17 +210,17 @@ let for_variable env id ty init thrl = (* Determine sections for a function definition *) -let for_function env id attr = +let for_function env loc id attr = let si_code = try (* 1- Section explicitly associated with #use_section *) Hashtbl.find use_section_table id with Not_found -> - match Cutil.find_custom_attributes ["section"; "__section__"] attr with - | [[C.AString name]] -> + match get_attr_section loc attr with + | Some name -> (* 2- Section given as an attribute, gcc-style *) gcc_section name true true - | _ -> + | None -> (* 3- Default section *) try Hashtbl.find current_section_table "CODE" diff --git a/common/Sections.mli b/common/Sections.mli index e882f042..00c06c20 100644 --- a/common/Sections.mli +++ b/common/Sections.mli @@ -47,7 +47,7 @@ val define_section: -> ?writable:bool -> ?executable:bool -> ?access:access_mode -> unit -> unit val use_section_for: AST.ident -> string -> bool -val for_variable: Env.t -> AST.ident -> C.typ -> bool -> bool -> +val for_variable: Env.t -> C.location -> AST.ident -> C.typ -> bool -> bool -> section_name * access_mode -val for_function: Env.t -> AST.ident -> C.attributes -> section_name list +val for_function: Env.t -> C.location -> AST.ident -> C.attributes -> section_name list val for_stringlit: unit -> section_name diff --git a/config_arm.sh b/config_arm.sh index eed55fab..1861e029 100755 --- a/config_arm.sh +++ b/config_arm.sh @@ -1 +1 @@ -exec ./config_simple.sh arm-linux --toolprefix arm-linux-gnueabihf- "$@" +exec ./config_simple.sh arm-linux --toolprefix arm-linux-gnueabi- "$@" diff --git a/config_armhf.sh b/config_armhf.sh new file mode 100755 index 00000000..8a1302bd --- /dev/null +++ b/config_armhf.sh @@ -0,0 +1 @@ +exec ./config_simple.sh arm-eabihf --toolprefix arm-linux-gnueabihf- "$@" diff --git a/config_ppc64.sh b/config_ppc64.sh new file mode 100755 index 00000000..df31c18f --- /dev/null +++ b/config_ppc64.sh @@ -0,0 +1 @@ +exec ./config_simple.sh ppc64-linux --toolprefix powerpc64-linux-gnu- "$@" diff --git a/config_rv32.sh b/config_rv32.sh index 654cacfa..a5a5cf1c 100755 --- a/config_rv32.sh +++ b/config_rv32.sh @@ -1 +1 @@ -exec ./config_simple.sh rv32-linux --toolprefix riscv64-unknown-elf- "$@" +exec ./config_simple.sh rv32-linux --toolprefix riscv64-linux-gnu- "$@" diff --git a/config_rv64.sh b/config_rv64.sh index e95f8a70..0698c2ff 100755 --- a/config_rv64.sh +++ b/config_rv64.sh @@ -1 +1 @@ -exec ./config_simple.sh rv64-linux --toolprefix riscv64-unknown-elf- "$@" +exec ./config_simple.sh rv64-linux --toolprefix riscv64-linux-gnu- "$@" diff --git a/config_simple.sh b/config_simple.sh index f02680c4..e2d3844c 100755 --- a/config_simple.sh +++ b/config_simple.sh @@ -3,4 +3,9 @@ shift version=`git rev-parse --short HEAD` branch=`git rev-parse --abbrev-ref HEAD` date=`date -I` -./configure --prefix /opt/CompCert/${branch}/${date}_${version}/$arch "$@" $arch + +if test "x$CCOMP_INSTALL_PREFIX" = "x" ; +then CCOMP_INSTALL_PREFIX=/opt/CompCert ; +fi + +./configure --prefix ${CCOMP_INSTALL_PREFIX}/${branch}/${date}_${version}/$arch "$@" $arch @@ -568,7 +568,7 @@ missingtools=false echo "Testing Coq... " | tr -d '\n' coq_ver=$(${COQBIN}coqc -v 2>/dev/null | sed -n -e 's/The Coq Proof Assistant, version \([^ ]*\).*$/\1/p') case "$coq_ver" in - 8.8.0|8.8.1|8.8.2|8.9.0|8.9.1|8.10.0|8.10.1|8.10.2|8.11.0) + 8.9.0|8.9.1|8.10.0|8.10.1|8.10.2|8.11.0|8.11.1) echo "version $coq_ver -- good!";; ?*) echo "version $coq_ver -- UNSUPPORTED" diff --git a/cparser/Elab.ml b/cparser/Elab.ml index c5295347..0504ad0b 100644 --- a/cparser/Elab.ml +++ b/cparser/Elab.ml @@ -469,7 +469,8 @@ let elab_constant loc = function let (v, fk) = elab_float_constant f in CFloat(v, fk) | CONST_CHAR(wide, s) -> - CInt(elab_char_constant loc wide s, IInt, "") + let ikind = if wide then wchar_ikind () else IInt in + CInt(elab_char_constant loc wide s, ikind, "") | CONST_STRING(wide, s) -> elab_string_literal loc wide s @@ -2457,8 +2458,8 @@ let enter_typedef loc env sto (s, ty, init) = env end else begin - error loc "typedef redefinition with different types (%a vs %a)" - (print_typ env) ty (print_typ env) ty'; + error loc "redefinition of typedef '%s' with different type (%a vs %a)" + s (print_typ env) ty (print_typ env) ty'; env end | _ -> diff --git a/doc/index.html b/doc/index.html index 3a4cf6ba..631c5d99 100644 --- a/doc/index.html +++ b/doc/index.html @@ -24,7 +24,7 @@ a:active {color : Red; text-decoration : underline; } <H1 align="center">The CompCert verified compiler</H1> <H2 align="center">Commented Coq development</H2> -<H3 align="center">Version 3.6, 2019-09-17</H3> +<H3 align="center">Version 3.7, 2020-03-31</H3> <H2>Introduction</H2> @@ -101,6 +101,8 @@ See also: <A HREF="html/compcert.common.Memdata.html">Memdata</A> (in-memory rep <LI> <A HREF="html/compcert.common.Determinism.html">Determinism</A>: determinism properties of small-step semantics. <LI> <A HREF="html/compcert.powerpc.Op.html"><I>Op</I></A>: operators, addressing modes and their semantics. +<LI> <A HREF="html/compcert.common.Builtins.html">Builtins</A>: semantics of built-in functions. <BR> +See also: <A HREF="html/compcert.common.Builtins0.html">Builtins0</A> (target-independent part), <A HREF="html/compcert.powerpc.Builtins1.html"><I>Builtins1</I></A> (target-dependent part). <LI> <A HREF="html/compcert.common.Unityping.html">Unityping</A>: a solver for atomic unification constraints. </UL> diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 6d6f1df4..6986fb96 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -28,8 +28,8 @@ let option_fconstprop = ref true let option_fcse = ref true let option_fcse2 = ref true let option_fredundancy = ref true -let option_fduplicate = ref false -let option_finvertcond = ref true (* only active if option_fduplicate is also true *) +let option_fduplicate = ref (-1) +let option_finvertcond = ref true let option_ftracelinearize = ref false let option_fpostpass = ref true let option_fpostpass_sched = ref "list" @@ -81,3 +81,4 @@ let option_faddx = ref false let option_fcoalesce_mem = ref true let option_fforward_moves = ref true let option_all_loads_nontrap = ref false +let option_inline_auto_threshold = ref 0 diff --git a/driver/Compopts.v b/driver/Compopts.v index b4b9f30d..848657e5 100644 --- a/driver/Compopts.v +++ b/driver/Compopts.v @@ -27,8 +27,7 @@ Parameter generate_float_constants: unit -> bool. (** For value analysis. Currently always false. *) Parameter va_strict: unit -> bool. -(** Flag -fduplicate. For tail duplication optimization. Necessary to have - * bigger superblocks *) +(** Flag -fduplicate. Branch prediction annotation + tail duplication *) Parameter optim_duplicate: unit -> bool. (** Flag -ftailcalls. For tail call optimization. *) diff --git a/driver/Driver.ml b/driver/Driver.ml index db71aef9..388482a0 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -190,6 +190,7 @@ Processing options: -Os Optimize for code size in preference to code speed -Obranchless Optimize to generate fewer conditional branches; try to produce branch-free instruction sequences as much as possible + -finline-auto-threshold n Inline functions under size n -ftailcalls Optimize function calls in tail position [on] -fconst-prop Perform global constant propagation [on] -ffloat-const-prop <n> Control constant propagation of floats @@ -200,11 +201,15 @@ Processing options: -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list] (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles) - -fduplicate Perform tail duplication to form superblocks on predicted traces + -fduplicate <nb_nodes> Perform tail duplication to form superblocks on predicted traces + nb_nodes control the heuristic deciding to duplicate or not + A value of -1 desactivates the entire pass (including branch prediction) + A value of 0 desactivates the duplication (but activates the branch prediction) + FIXME : this is desactivated by default for now -finvertcond Invert conditions based on predicted paths (to prefer fallthrough). Requires -fduplicate to be also activated [on] -ftracelinearize Linearizes based on the traces identified by duplicate phase - It is recommended to also activate -fduplicate with this pass [off] + It is heavily recommended to activate -finvertcond with this pass [off] -fforward-moves Forward moves after CSE -finline Perform inlining of functions [on] -finline-functions-called-once Integrate functions only required by their @@ -318,10 +323,11 @@ let cmdline_actions = [ Exact "-O0", Unit (unset_all optimization_options); Exact "-O", Unit (set_all optimization_options); - _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false; option_fduplicate := false); + _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false); _Regexp "-O[123]$", Unit (set_all optimization_options); Exact "-Os", Set option_Osize; Exact "-Obranchless", Set option_Obranchless; + Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n); Exact "-fsmall-data", Integer(fun n -> option_small_data := n); Exact "-fsmall-const", Integer(fun n -> option_small_const := n); Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); @@ -393,7 +399,7 @@ let cmdline_actions = @ f_opt "cse2" option_fcse2 @ f_opt "redundancy" option_fredundancy @ f_opt "postpass" option_fpostpass - @ f_opt "duplicate" option_fduplicate + @ [ Exact "-fduplicate", Integer (fun n -> option_fduplicate := n) ] @ f_opt "invertcond" option_finvertcond @ f_opt "tracelinearize" option_ftracelinearize @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched diff --git a/extraction/extraction.v b/extraction/extraction.v index 929c21e0..9b568951 100644 --- a/extraction/extraction.v +++ b/extraction/extraction.v @@ -106,7 +106,7 @@ Extract Constant Compopts.generate_float_constants => Extract Constant Compopts.optim_tailcalls => "fun _ -> !Clflags.option_ftailcalls". Extract Constant Compopts.optim_duplicate => - "fun _ -> !Clflags.option_fduplicate". + "fun _ -> (if !Clflags.option_fduplicate = -1 then false else true)". Extract Constant Compopts.optim_constprop => "fun _ -> !Clflags.option_fconstprop". Extract Constant Compopts.optim_CSE => @@ -116,6 +116,19 @@ Module Type TREE. forall (m1: t A) (m2: t B) (i: elt), get i (combine f m1 m2) = f (get i m1) (get i m2). + Parameter combine_null : + forall (A B C: Type) (f: A -> B -> option C), + t A -> t B -> t C. + + Axiom gcombine_null: + forall (A B C: Type) (f: A -> B -> option C), + forall (m1: t A) (m2: t B) (i: elt), + get i (combine_null f m1 m2) = + match (get i m1), (get i m2) with + | (Some x1), (Some x2) => f x1 x2 + | _, _ => None + end. + (** Enumerating the bindings of a tree. *) Parameter elements: forall (A: Type), t A -> list (elt * A). @@ -151,6 +164,12 @@ Module Type TREE. forall (A B: Type) (f: B -> A -> B) (v: B) (m: t A), fold1 f m v = List.fold_left (fun a p => f a (snd p)) (elements m) v. + + Parameter bempty_canon : + forall (A : Type), t A -> bool. + Axiom bempty_canon_correct: + forall (A : Type) (tr : t A) (i : elt), + bempty_canon tr = true -> get i tr = None. End TREE. (** * The abstract signatures of maps *) @@ -261,6 +280,12 @@ Module PTree <: TREE. induction i; simpl; auto. Qed. + Definition bempty_canon (A : Type) (tr : t A) : bool := + match tr with + | Leaf => true + | _ => false + end. + Theorem gss: forall (A: Type) (i: positive) (x: A) (m: t A), get i (set i x m) = Some x. Proof. @@ -269,7 +294,16 @@ Module PTree <: TREE. Lemma gleaf : forall (A : Type) (i : positive), get i (Leaf : t A) = None. Proof. exact gempty. Qed. - + + Lemma bempty_canon_correct: + forall (A : Type) (tr : t A) (i : elt), + bempty_canon tr = true -> get i tr = None. + Proof. + destruct tr; intros. + - rewrite gleaf; trivial. + - discriminate. + Qed. + Theorem gso: forall (A: Type) (i j: positive) (x: A) (m: t A), i <> j -> get i (set j x m) = get i m. @@ -625,7 +659,81 @@ Module PTree <: TREE. auto. Qed. - Fixpoint xelements (A : Type) (m : t A) (i : positive) + Section COMBINE_NULL. + + Variables A B C: Type. + Variable f: A -> B -> option C. + + + Fixpoint combine_null (m1: t A) (m2: t B) {struct m1} : t C := + match m1, m2 with + | (Node l1 o1 r1), (Node l2 o2 r2) => + Node' (combine_null l1 l2) + (match o1, o2 with + | (Some x1), (Some x2) => f x1 x2 + | _, _ => None + end) + (combine_null r1 r2) + | _, _ => Leaf + end. + + Theorem gcombine_null: + forall (m1: t A) (m2: t B) (i: positive), + get i (combine_null m1 m2) = + match (get i m1), (get i m2) with + | (Some x1), (Some x2) => f x1 x2 + | _, _ => None + end. + Proof. + induction m1; intros; simpl. + - rewrite gleaf. rewrite gleaf. + reflexivity. + - destruct m2; simpl. + + rewrite gleaf. rewrite gleaf. + destruct get; reflexivity. + + rewrite gnode'. + destruct i; simpl; try rewrite IHm1_1; try rewrite IHm1; trivial. + Qed. + + End COMBINE_NULL. + + Section REMOVE_TREE. + + Variables A B: Type. + + Fixpoint remove_t (m1: t A) (m2: t B) {struct m1} : t A := + match m1, m2 with + | Leaf, _ | _, Leaf => m1 + | (Node l1 o1 r1), (Node l2 o2 r2) => + Node' (remove_t l1 l2) + (match o2 with + | Some _ => None + | None => o1 + end) + (remove_t r1 r2) + end. + + Theorem gremove_t: + forall m1 : t A, + forall m2 : t B, + forall i : positive, + get i (remove_t m1 m2) = match get i m2 with + | None => get i m1 + | Some _ => None + end. + Proof. + induction m1; intros; simpl. + - rewrite gleaf. + destruct get; reflexivity. + - destruct m2; simpl. + + rewrite gleaf. + reflexivity. + + rewrite gnode'. + destruct i; simpl; try rewrite IHm1_1; try rewrite IHm1; trivial. + Qed. + End REMOVE_TREE. + + Fixpoint xelements (A : Type) (m : t A) (i : positive) (k: list (positive * A)) {struct m} : list (positive * A) := match m with diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v index 02f9141b..01eda623 100644 --- a/mppa_k1c/Asmblockdeps.v +++ b/mppa_k1c/Asmblockdeps.v @@ -339,7 +339,7 @@ Proof. } destruct (Mem.load _ m1 _ _) in *; destruct (Mem.load _ m0 _ _) in *; congruence. Qed. - + Definition goto_label_deps (f: function) (lbl: label) (vpc: val) := match label_pos lbl 0 (fn_blocks f) with | None => None @@ -1005,7 +1005,7 @@ Theorem bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw bi: Proof. (* a little tactic to automate reasoning on preg_eq *) -Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr. +Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core. Local Ltac preg_eq_discr r rd := destruct (preg_eq r rd); try (subst r; rewrite assign_eq, Pregmap.gss; auto); rewrite (assign_diff _ (#rd) (#r) _); auto; @@ -1053,7 +1053,7 @@ Local Ltac preg_eq_discr r rd := preg_eq_discr r rd0. } (* Load Octuple word *) - + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr. + + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core. unfold parexec_load_o_offset. destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3]; destruct Ge; simpl. rewrite H0, H. @@ -1423,7 +1423,7 @@ Section SECT_BBLOCK_EQUIV. Variable Ge: genv. -Local Hint Resolve trans_state_match. +Local Hint Resolve trans_state_match: core. Lemma bblock_simu_reduce: forall p1 p2 ge fn, diff --git a/mppa_k1c/Asmblockgen.v b/mppa_k1c/Asmblockgen.v index 50637723..36269954 100644 --- a/mppa_k1c/Asmblockgen.v +++ b/mppa_k1c/Asmblockgen.v @@ -28,6 +28,8 @@ Require Import Chunks. Local Open Scope string_scope. Local Open Scope error_monad_scope. +Import PArithCoercions. + (** The code generation functions take advantage of several characteristics of the [Mach] code generated by earlier passes of the compiler, mostly that argument and result registers are of the correct diff --git a/mppa_k1c/Asmblockgenproof1.v b/mppa_k1c/Asmblockgenproof1.v index ecb4629b..00df01e3 100644 --- a/mppa_k1c/Asmblockgenproof1.v +++ b/mppa_k1c/Asmblockgenproof1.v @@ -23,6 +23,8 @@ Require Import Op Locations Machblock Conventions. Require Import Asmblock Asmblockgen Asmblockgenproof0 Asmblockprops. Require Import Chunks. +Import PArithCoercions. + (** Decomposition of integer constants. *) Lemma make_immed32_sound: @@ -859,7 +861,7 @@ Proof. destruct cmp; discriminate. Qed. -Local Hint Resolve Val_cmpu_bool_correct Val_cmplu_bool_correct. +Local Hint Resolve Val_cmpu_bool_correct Val_cmplu_bool_correct: core. Lemma transl_cbranch_correct_1: forall cond args lbl k c m ms b sp rs m' tbb, @@ -1163,7 +1165,7 @@ Proof. split; intros; Simpl. Qed. -Local Hint Resolve Val_cmpu_correct Val_cmplu_correct. +Local Hint Resolve Val_cmpu_correct Val_cmplu_correct: core. Lemma transl_condimm_int32u_correct: forall cmp rd r1 n k rs m, @@ -1481,6 +1483,8 @@ Proof. destruct (Z_lt_dec _ _); destruct (Z.eq_dec _ _); trivial; omega. Qed. +Ltac splitall := repeat match goal with |- _ /\ _ => split end. + Lemma transl_op_correct: forall op args res k (rs: regset) m v c, transl_op op args res k = OK c -> @@ -1515,21 +1519,21 @@ Opaque Int.eq. - (* Ocast8signed *) econstructor; split. eapply exec_straight_two. simpl;eauto. simpl;eauto. - split; intros; simpl; Simpl. + repeat split; intros; simpl; Simpl. assert (A: Int.ltu (Int.repr 24) Int.iwordsize = true) by auto. destruct (rs x0); auto; simpl. rewrite A; simpl. Simpl. unfold Val.shr. rewrite A. apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. split; reflexivity. - (* Ocast16signed *) econstructor; split. eapply exec_straight_two. simpl;eauto. simpl;eauto. - split; intros; Simpl. + repeat split; intros; Simpl. assert (A: Int.ltu (Int.repr 16) Int.iwordsize = true) by auto. destruct (rs x0); auto; simpl. rewrite A; simpl. Simpl. unfold Val.shr. rewrite A. apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. split; reflexivity. - (* Oshrximm *) econstructor; split. + apply exec_straight_one. simpl. eauto. - + split. + + repeat split. * rewrite Pregmap.gss. subst v. destruct (rs x0); simpl; trivial. @@ -1540,7 +1544,7 @@ Opaque Int.eq. - (* Oshrxlimm *) econstructor; split. + apply exec_straight_one. simpl. eauto. - + split. + + repeat split. * rewrite Pregmap.gss. subst v. destruct (rs x0); simpl; trivial. @@ -1551,7 +1555,7 @@ Opaque Int.eq. - (* Ocmp *) exploit transl_cond_op_correct; eauto. intros (rs' & A & B & C). - exists rs'; split. eexact A. eauto with asmgen. + exists rs'; repeat split; eauto with asmgen. - (* Osel *) unfold conditional_move in *. @@ -1570,72 +1574,73 @@ Opaque Int.eq. destruct c0; simpl in *. - all: - destruct c; simpl in *; inv EQ2; - econstructor; split; try (apply exec_straight_one; constructor); - split; try (simpl; intros; rewrite Pregmap.gso; trivial; assumption); - unfold Val.select; simpl; - unfold cmove, cmoveu; - rewrite Pregmap.gss; - destruct (rs x1); simpl; trivial; - try rewrite int_ltu_to_neq; - try rewrite int64_ltu_to_neq; - try change (Int64.eq Int64.zero Int64.zero) with true; - try destruct Archi.ptr64; - repeat rewrite if_neg; - simpl; - trivial; - try destruct (_ || _); - trivial; - try apply Val.lessdef_normalize. + all: destruct c. + all: simpl in *. + all: inv EQ2. + all: econstructor; splitall. + all: try apply exec_straight_one. + all: intros; simpl; trivial. + all: unfold Val.select, cmove, cmoveu; simpl. + all: destruct (rs x1); simpl; trivial. + all: try rewrite int_ltu_to_neq. + all: try rewrite int64_ltu_to_neq. + all: try change (Int64.eq Int64.zero Int64.zero) with true. + all: try destruct Archi.ptr64. + all: try rewrite Pregmap.gss. + all: repeat rewrite if_neg. + all: simpl. + all: try destruct (_ || _). + all: try apply Val.lessdef_normalize. + all: trivial. (* no more lessdef *) + all: apply Pregmap.gso; congruence. - (* Oselimm *) unfold conditional_move_imm32 in *. destruct c0; simpl in *. - all: - destruct c; simpl in *; inv EQ0; - econstructor; split; try (apply exec_straight_one; constructor); - split; try (simpl; intros; rewrite Pregmap.gso; trivial; assumption); - unfold Val.select; simpl; - unfold cmove, cmoveu; - rewrite Pregmap.gss; - destruct (rs x0); simpl; trivial; - try rewrite int_ltu_to_neq; - try rewrite int64_ltu_to_neq; - try change (Int64.eq Int64.zero Int64.zero) with true; - try destruct Archi.ptr64; - repeat rewrite if_neg; - simpl; - trivial; - try destruct (_ || _); - trivial; - try apply Val.lessdef_normalize. - + all: destruct c. + all: simpl in *. + all: inv EQ0. + all: econstructor; splitall. + all: try apply exec_straight_one. + all: intros; simpl; trivial. + all: unfold Val.select, cmove, cmoveu; simpl. + all: destruct (rs x0); simpl; trivial. + all: try rewrite int_ltu_to_neq. + all: try rewrite int64_ltu_to_neq. + all: try change (Int64.eq Int64.zero Int64.zero) with true. + all: try destruct Archi.ptr64. + all: try rewrite Pregmap.gss. + all: repeat rewrite if_neg. + all: simpl. + all: try destruct (_ || _). + all: try apply Val.lessdef_normalize. + all: trivial. (* no more lessdef *) + all: apply Pregmap.gso; congruence. - (* Osellimm *) unfold conditional_move_imm64 in *. destruct c0; simpl in *. - all: - destruct c; simpl in *; inv EQ0; - econstructor; split; try (apply exec_straight_one; constructor); - split; try (simpl; intros; rewrite Pregmap.gso; trivial; assumption); - unfold Val.select; simpl; - unfold cmove, cmoveu; - rewrite Pregmap.gss; - destruct (rs x0); simpl; trivial; - try rewrite int_ltu_to_neq; - try rewrite int64_ltu_to_neq; - try change (Int64.eq Int64.zero Int64.zero) with true; - try destruct Archi.ptr64; - repeat rewrite if_neg; - simpl; - trivial; - try destruct (_ || _); - trivial; - try apply Val.lessdef_normalize. - + all: destruct c. + all: simpl in *. + all: inv EQ0. + all: econstructor; splitall. + all: try apply exec_straight_one. + all: intros; simpl; trivial. + all: unfold Val.select, cmove, cmoveu; simpl. + all: destruct (rs x0); simpl; trivial. + all: try rewrite int_ltu_to_neq. + all: try rewrite int64_ltu_to_neq. + all: try change (Int64.eq Int64.zero Int64.zero) with true. + all: try destruct Archi.ptr64. + all: try rewrite Pregmap.gss. + all: repeat rewrite if_neg. + all: simpl. + all: try destruct (_ || _). + all: try apply Val.lessdef_normalize. + all: trivial. (* no more lessdef *) + all: apply Pregmap.gso; congruence. Qed. (** Memory accesses *) diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v index e042d95a..946007c1 100644 --- a/mppa_k1c/Asmvliw.v +++ b/mppa_k1c/Asmvliw.v @@ -555,6 +555,8 @@ Inductive ar_instruction : Type := | PArithARRI64 (i: arith_name_arri64) (rd rs: ireg) (imm: int64) . +Module PArithCoercions. + Coercion PArithR: arith_name_r >-> Funclass. Coercion PArithRR: arith_name_rr >-> Funclass. Coercion PArithRI32: arith_name_ri32 >-> Funclass. @@ -569,6 +571,8 @@ Coercion PArithARR: arith_name_arr >-> Funclass. Coercion PArithARRI32: arith_name_arri32 >-> Funclass. Coercion PArithARRI64: arith_name_arri64 >-> Funclass. +End PArithCoercions. + Inductive basic : Type := | PArith (i: ar_instruction) | PLoad (i: ld_instruction) @@ -1709,7 +1713,7 @@ Proof. Qed. -Local Hint Resolve parexec_bblock_write_in_order. +Local Hint Resolve parexec_bblock_write_in_order: core. Lemma det_parexec_write_in_order f b rs m rs' m': det_parexec f b rs m rs' m' -> parexec_wio_bblock f b rs m = Next rs' m'. diff --git a/mppa_k1c/CSE2deps.v b/mppa_k1c/CSE2deps.v new file mode 100644 index 00000000..8ab9242a --- /dev/null +++ b/mppa_k1c/CSE2deps.v @@ -0,0 +1,20 @@ +Require Import BoolEqual Coqlib. +Require Import AST Integers Floats. +Require Import Values Memory Globalenvs Events. +Require Import Op. + + +Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw := + (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk)) + && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk)) + && ((ofsw + size_chunk chunkw <=? ofsr) || + (ofsr + size_chunk chunkr <=? ofsw)). + +Definition may_overlap chunk addr args chunk' addr' args' := + match addr, addr', args, args' with + | (Aindexed ofs), (Aindexed ofs'), + (base :: nil), (base' :: nil) => + if peq base base' + then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk) + else true | _, _, _, _ => true + end. diff --git a/mppa_k1c/CSE2depsproof.v b/mppa_k1c/CSE2depsproof.v new file mode 100644 index 00000000..a3811e78 --- /dev/null +++ b/mppa_k1c/CSE2depsproof.v @@ -0,0 +1,127 @@ +Require Import Coqlib Maps Errors Integers Floats Lattice Kildall. +Require Import AST Linking. +Require Import Memory Registers Op RTL Maps. + +Require Import Globalenvs Values. +Require Import Linking Values Memory Globalenvs Events Smallstep. +Require Import Registers Op RTL. +Require Import CSE2 CSE2deps. +Require Import Lia. + +Lemma ptrofs_size : + Ptrofs.wordsize = (if Archi.ptr64 then 64 else 32)%nat. +Proof. + unfold Ptrofs.wordsize. + unfold Wordsize_Ptrofs.wordsize. + trivial. +Qed. + +Lemma ptrofs_modulus : + Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296. +Proof. + unfold Ptrofs.modulus. + rewrite ptrofs_size. + destruct Archi.ptr64; reflexivity. +Qed. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Section MEMORY_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + Variable base : val. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : ptrofs. + Hypothesis ADDRW : eval_addressing genv sp + (Aindexed ofsw) (base :: nil) = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aindexed ofsr) (base :: nil) = Some addrr. + + Lemma load_store_away1 : + forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr + \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in *. + + rewrite ptrofs_modulus in *. + simpl in *. + inv ADDRR. + inv ADDRW. + destruct base; try discriminate. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW]; + rewrite OFSW). + all: try rewrite ptrofs_modulus in *. + all: destruct Archi.ptr64. + + all: intuition lia. + Qed. + + Theorem load_store_away : + can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. +End MEMORY_WRITE. +End SOUNDNESS. + + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Lemma may_overlap_sound: + forall m m' : mem, + forall chunk addr args chunk' addr' args' v a a' rs, + (eval_addressing genv sp addr (rs ## args)) = Some a -> + (eval_addressing genv sp addr' (rs ## args')) = Some a' -> + (may_overlap chunk addr args chunk' addr' args') = false -> + (Mem.storev chunk m a v) = Some m' -> + (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a'). +Proof. + intros until rs. + intros ADDR ADDR' OVERLAP STORE. + destruct addr; destruct addr'; try discriminate. + { (* Aindexed / Aindexed *) + destruct args as [ | base [ | ]]. 1,3: discriminate. + destruct args' as [ | base' [ | ]]. 1,3: discriminate. + simpl in OVERLAP. + destruct (peq base base'). 2: discriminate. + subst base'. + destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP. + 2: discriminate. + simpl in *. + eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. + } +Qed. + +End SOUNDNESS. diff --git a/mppa_k1c/DuplicateOpcodeHeuristic.ml b/mppa_k1c/DuplicateOpcodeHeuristic.ml index 690553ce..2ec314c1 100644 --- a/mppa_k1c/DuplicateOpcodeHeuristic.ml +++ b/mppa_k1c/DuplicateOpcodeHeuristic.ml @@ -2,10 +2,8 @@ open Op open Integers -exception HeuristicSucceeded - -let opcode_heuristic code cond ifso ifnot preferred = - let decision = match cond with +let opcode_heuristic code cond ifso ifnot is_loop_header = + match cond with | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with | Clt | Cle -> Some false | Cgt | Cge -> Some true @@ -27,6 +25,3 @@ let opcode_heuristic code cond ifso ifnot preferred = | _ -> None ) | _ -> None - in match decision with - | Some b -> (preferred := b; raise HeuristicSucceeded) - | None -> () diff --git a/mppa_k1c/PostpassSchedulingproof.v b/mppa_k1c/PostpassSchedulingproof.v index fbb06c9b..8cc7f0ab 100644 --- a/mppa_k1c/PostpassSchedulingproof.v +++ b/mppa_k1c/PostpassSchedulingproof.v @@ -61,9 +61,9 @@ Proof. - subst. repeat (rewrite Pregmap.gss); auto. destruct v; simpl; auto. rewrite Ptrofs.add_assoc. - cutrewrite (Ptrofs.repr (x + y) = Ptrofs.add (Ptrofs.repr x) (Ptrofs.repr y)); auto. + enough (Ptrofs.repr (x + y) = Ptrofs.add (Ptrofs.repr x) (Ptrofs.repr y)) as ->; auto. unfold Ptrofs.add. - cutrewrite (x + y = Ptrofs.unsigned (Ptrofs.repr x) + Ptrofs.unsigned (Ptrofs.repr y)); auto. + enough (x + y = Ptrofs.unsigned (Ptrofs.repr x) + Ptrofs.unsigned (Ptrofs.repr y)) as ->; auto. repeat (rewrite Ptrofs.unsigned_repr); auto. - repeat (rewrite Pregmap.gso; auto). Qed. @@ -220,7 +220,8 @@ Proof. destruct (zeq pos 0). + inv H. exists lbb. constructor; auto. + apply IHlbb in H. destruct H as (c & TAIL). exists c. - cutrewrite (pos = pos - size a + size a). apply code_tail_S; auto. + enough (pos = pos - size a + size a) as ->. + apply code_tail_S; auto. omega. Qed. @@ -567,7 +568,7 @@ Proof. unfold builtin_alone in H0. erewrite H0; eauto. Qed. -Local Hint Resolve verified_schedule_nob_checks_alls_bundles. +Local Hint Resolve verified_schedule_nob_checks_alls_bundles: core. Lemma verified_schedule_checks_alls_bundles bb lb bundle: verified_schedule bb = OK lb -> diff --git a/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v b/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v index 5c94d435..cf46072f 100644 --- a/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v +++ b/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v @@ -403,7 +403,7 @@ Proof. * eapply H2; eauto. intros; eapply H0; eauto. rewrite rev_append_rev, in_app_iff; auto. * intros; eapply H0; eauto. rewrite rev_append_rev, in_app_iff, <- in_rev; auto. Qed. -Local Hint Resolve app_fail_allvalid_correct. +Local Hint Resolve app_fail_allvalid_correct: core. Lemma app_fail_correct l pt t1 t2: match_pt t1 pt -> diff --git a/mppa_k1c/abstractbb/ImpSimuTest.v b/mppa_k1c/abstractbb/ImpSimuTest.v index ea55b735..7a77ec15 100644 --- a/mppa_k1c/abstractbb/ImpSimuTest.v +++ b/mppa_k1c/abstractbb/ImpSimuTest.v @@ -304,12 +304,12 @@ Proof. rewrite <- EQT; eauto. + exploit smem_valid_set_decompose_1; eauto. - clear DM0. unfold hsmem_post_eval, hsmem_post_eval in * |- *; simpl. - Local Hint Resolve smem_valid_set_decompose_1. + Local Hint Resolve smem_valid_set_decompose_1: core. intros; case (R.eq_dec x x0). + intros; subst; rewrite !Dict.set_spec_eq; simpl; eauto. + intros; rewrite !Dict.set_spec_diff; simpl; eauto. Qed. -Local Hint Resolve naive_set_correct. +Local Hint Resolve naive_set_correct: core. Definition equiv_hsmem ge (hd1 hd2: hsmem) := (forall m, allvalid ge hd1.(hpre) m <-> allvalid ge hd2.(hpre) m) @@ -523,7 +523,7 @@ Lemma hinst_smem_correct i: forall hd hod, WHEN hinst_smem i hd hod ~> hd' THEN forall ge od d, smem_model ge od hod -> smem_model ge d hd -> (forall m, smem_valid ge d m -> smem_valid ge od m) -> smem_model ge (inst_smem i d od) hd'. Proof. - Local Hint Resolve smem_valid_set_proof. + Local Hint Resolve smem_valid_set_proof: core. induction i; simpl; wlp_simplify; eauto 15 with wlp. Qed. Global Opaque hinst_smem. @@ -563,7 +563,7 @@ Definition bblock_hsmem: bblock -> ?? hsmem Lemma bblock_hsmem_correct p: WHEN bblock_hsmem p ~> hd THEN forall ge, smem_model ge (bblock_smem p) hd. Proof. - Local Hint Resolve hsmem_empty_correct. + Local Hint Resolve hsmem_empty_correct: core. wlp_simplify. Qed. Global Opaque bblock_hsmem. @@ -775,7 +775,7 @@ Proof. intro H; erewrite <- list_term_eval_set_hid; rewrite H. apply list_term_eval_set_hid. Qed. -Local Hint Resolve term_eval_set_hid_equiv list_term_eval_set_hid_equiv. +Local Hint Resolve term_eval_set_hid_equiv list_term_eval_set_hid_equiv: core. Program Definition bblock_simu_test (p1 p2: bblock): ?? bool := DO log <~ count_logger ();; @@ -802,7 +802,7 @@ Obligation 2. wlp_simplify. Qed. -Local Hint Resolve g_bblock_simu_test_correct. +Local Hint Resolve g_bblock_simu_test_correct: core. Theorem bblock_simu_test_correct p1 p2: WHEN bblock_simu_test p1 p2 ~> b THEN b=true -> forall ge, bblock_simu ge p1 p2. @@ -1123,7 +1123,7 @@ Definition get {A} (d:t A) (x:R.t): option A Definition set {A} (d:t A) (x:R.t) (v:A): t A := PositiveMap.add x v d. -Local Hint Unfold PositiveMap.E.eq. +Local Hint Unfold PositiveMap.E.eq: core. Lemma set_spec_eq A d x (v: A): get (set d x v) x = Some v. diff --git a/mppa_k1c/abstractbb/Impure/ImpHCons.v b/mppa_k1c/abstractbb/Impure/ImpHCons.v index d8002375..637116cc 100644 --- a/mppa_k1c/abstractbb/Impure/ImpHCons.v +++ b/mppa_k1c/abstractbb/Impure/ImpHCons.v @@ -95,7 +95,7 @@ Proof. wlp_simplify. Qed. Global Opaque assert_list_incl. -Hint Resolve assert_list_incl_correct. +Hint Resolve assert_list_incl_correct: wlp. End Sets. @@ -165,7 +165,7 @@ Lemma hConsV_correct A (hasheq: A -> A -> ?? bool): (forall x y, WHEN hasheq x y ~> b THEN b=true -> x=y) -> forall x, WHEN hco.(hC) x ~> x' THEN x.(hdata).(data)=x'.(data). Proof. - Local Hint Resolve f_equal2. + Local Hint Resolve f_equal2: core. wlp_simplify. exploit H; eauto. + wlp_simplify. diff --git a/mppa_k1c/abstractbb/Parallelizability.v b/mppa_k1c/abstractbb/Parallelizability.v index 22809095..30904b5d 100644 --- a/mppa_k1c/abstractbb/Parallelizability.v +++ b/mppa_k1c/abstractbb/Parallelizability.v @@ -332,7 +332,7 @@ Fixpoint bblock_wframe(p:bblock): list R.t := | i::p' => (inst_wframe i)++(bblock_wframe p') end. -Local Hint Resolve Permutation_app_head Permutation_app_tail Permutation_app_comm. +Local Hint Resolve Permutation_app_head Permutation_app_tail Permutation_app_comm: core. Lemma bblock_wframe_Permutation p p': Permutation p p' -> Permutation (bblock_wframe p) (bblock_wframe p'). @@ -620,7 +620,7 @@ Include ParallelizablityChecking L. Section PARALLEL2. Variable ge: genv. -Local Hint Resolve S.empty_match_frame S.add_match_frame S.union_match_frame S.is_disjoint_match_frame. +Local Hint Resolve S.empty_match_frame S.add_match_frame S.union_match_frame S.is_disjoint_match_frame: core. (** Now, refinement of each operation toward parallelizable *) @@ -659,14 +659,14 @@ Fixpoint inst_sframe (i: inst): S.t := | a::i' => S.add (fst a) (S.union (exp_sframe (snd a)) (inst_sframe i')) end. -Local Hint Resolve exp_sframe_correct. +Local Hint Resolve exp_sframe_correct: core. Lemma inst_sframe_correct i: S.match_frame (inst_sframe i) (inst_frame i). Proof. induction i as [|[y e] i']; simpl; auto. Qed. -Local Hint Resolve inst_wsframe_correct inst_sframe_correct. +Local Hint Resolve inst_wsframe_correct inst_sframe_correct: core. Fixpoint is_pararec (p: bblock) (wsframe: S.t): bool := match p with diff --git a/mppa_k1c/abstractbb/SeqSimuTheory.v b/mppa_k1c/abstractbb/SeqSimuTheory.v index 649dd083..e234883f 100644 --- a/mppa_k1c/abstractbb/SeqSimuTheory.v +++ b/mppa_k1c/abstractbb/SeqSimuTheory.v @@ -102,9 +102,6 @@ Fixpoint bblock_smem_rec (p: bblock) (d: smem): smem := let d':=inst_smem i d d in bblock_smem_rec p' d' end. -(* -Local Hint Resolve smem_eval_empty. -*) Definition bblock_smem: bblock -> smem := fun p => bblock_smem_rec p smem_empty. @@ -124,7 +121,7 @@ Proof. intros d a H; eapply inst_smem_pre_monotonic; eauto. Qed. -Local Hint Resolve inst_smem_pre_monotonic bblock_smem_pre_monotonic. +Local Hint Resolve inst_smem_pre_monotonic bblock_smem_pre_monotonic: core. Lemma term_eval_exp e (od:smem) m0 old: (forall x, term_eval ge (od x) m0 = Some (old x)) -> @@ -185,7 +182,7 @@ Lemma bblocks_smem_rec_Some_correct1 p m0: forall (m1 m2: mem) (d: smem), (forall x, term_eval ge (d x) m0 = Some (m1 x)) -> forall x, term_eval ge (bblock_smem_rec p d x) m0 = Some (m2 x). Proof. - Local Hint Resolve inst_smem_Some_correct1. + Local Hint Resolve inst_smem_Some_correct1: core. induction p as [ | i p]; simpl; intros m1 m2 d H. - inversion_clear H; eauto. - intros H0 x0. @@ -299,7 +296,7 @@ Lemma block_smem_rec_valid p m0: forall (m1 m2: mem) (d:smem), (forall x, term_eval ge (d x) m0 = Some (m1 x)) -> pre (bblock_smem_rec p d) ge m0. Proof. - Local Hint Resolve inst_valid. + Local Hint Resolve inst_valid: core. induction p as [ | i p]; simpl; intros m1 d H; auto. intros H0 H1. destruct (inst_run ge i m1 m1) eqn: Heqov; eauto. @@ -326,7 +323,7 @@ Theorem bblock_smem_simu p1 p2: smem_simu (bblock_smem p1) (bblock_smem p2) -> bblock_simu ge p1 p2. Proof. - Local Hint Resolve bblock_smem_valid bblock_smem_Some_correct1. + Local Hint Resolve bblock_smem_valid bblock_smem_Some_correct1: core. intros (INCL & EQUIV) m DONTFAIL; unfold smem_valid in * |-. destruct (run ge p1 m) as [m1|] eqn: RUN1; simpl; try congruence. assert (X: forall x, term_eval ge (bblock_smem p1 x) m = Some (m1 x)); eauto. diff --git a/mppa_k1c/lib/Asmblockgenproof0.v b/mppa_k1c/lib/Asmblockgenproof0.v index 940c6563..58455ada 100644 --- a/mppa_k1c/lib/Asmblockgenproof0.v +++ b/mppa_k1c/lib/Asmblockgenproof0.v @@ -414,7 +414,7 @@ Proof. Qed. -Local Hint Resolve code_tail_0 code_tail_S. +Local Hint Resolve code_tail_0 code_tail_S: core. Lemma code_tail_next: forall fn ofs c0, @@ -458,7 +458,7 @@ Proof. omega. Qed. -Local Hint Resolve code_tail_next. +Local Hint Resolve code_tail_next: core. Lemma code_tail_next_int: forall fn ofs bi c, diff --git a/mppa_k1c/lib/ForwardSimulationBlock.v b/mppa_k1c/lib/ForwardSimulationBlock.v index 39dd2234..224eda0a 100644 --- a/mppa_k1c/lib/ForwardSimulationBlock.v +++ b/mppa_k1c/lib/ForwardSimulationBlock.v @@ -21,7 +21,7 @@ Section starN_lemma. Variable L: semantics. -Local Hint Resolve starN_refl starN_step Eapp_assoc. +Local Hint Resolve starN_refl starN_step Eapp_assoc: core. Lemma starN_split n s t s': starN (step L) (globalenv L) n s t s' -> @@ -93,7 +93,7 @@ Hypothesis simu_end_block: (** Introduction d'une sémantique par bloc sur L1 appelée "memoL1" *) -Local Hint Resolve starN_refl starN_step. +Local Hint Resolve starN_refl starN_step: core. Definition follows_in_block (head current: state L1): Prop := dist_end_block head >= dist_end_block current @@ -164,7 +164,7 @@ Inductive is_well_memorized (s s': memostate): Prop := memorized s' = None -> is_well_memorized s s'. -Local Hint Resolve StartBloc MidBloc ExitBloc. +Local Hint Resolve StartBloc MidBloc ExitBloc: core. Definition memoL1 := {| state := memostate; diff --git a/mppa_k1c/lib/Machblockgen.v b/mppa_k1c/lib/Machblockgen.v index a65b218f..2ba42814 100644 --- a/mppa_k1c/lib/Machblockgen.v +++ b/mppa_k1c/lib/Machblockgen.v @@ -105,7 +105,7 @@ Inductive is_end_block: Machblock_inst -> code -> Prop := | End_basic bi bh bl: header bh <> nil -> is_end_block (MB_basic bi) (bh::bl) | End_cfi cfi bl: bl <> nil -> is_end_block (MB_cfi cfi) bl. -Local Hint Resolve End_empty End_basic End_cfi. +Local Hint Resolve End_empty End_basic End_cfi: core. Inductive is_trans_code: Mach.code -> code -> Prop := | Tr_nil: is_trans_code nil nil @@ -123,7 +123,7 @@ Inductive is_trans_code: Mach.code -> code -> Prop := header bh = nil -> is_trans_code (i::c) (add_basic bi bh::bl). -Local Hint Resolve Tr_nil Tr_end_block. +Local Hint Resolve Tr_nil Tr_end_block: core. Lemma add_to_code_is_trans_code i c bl: is_trans_code c bl -> @@ -145,7 +145,7 @@ Proof. rewrite <- Heqti. eapply End_cfi. congruence. Qed. -Local Hint Resolve add_to_code_is_trans_code. +Local Hint Resolve add_to_code_is_trans_code: core. Lemma trans_code_is_trans_code_rev c1: forall c2 mbi, is_trans_code c2 mbi -> @@ -185,7 +185,7 @@ Proof. exists mbi1. split; congruence. Qed. -Local Hint Resolve trans_code_is_trans_code. +Local Hint Resolve trans_code_is_trans_code: core. Theorem is_trans_code_inv c bl: is_trans_code c bl <-> bl=(trans_code c). Proof. diff --git a/mppa_k1c/lib/Machblockgenproof.v b/mppa_k1c/lib/Machblockgenproof.v index 91be5e2e..0de2df52 100644 --- a/mppa_k1c/lib/Machblockgenproof.v +++ b/mppa_k1c/lib/Machblockgenproof.v @@ -72,7 +72,7 @@ Proof. apply match_states_trans_state. Qed. -Local Hint Resolve match_states_trans_state. +Local Hint Resolve match_states_trans_state: core. Lemma symbols_preserved: forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. @@ -284,7 +284,7 @@ Proof. Qed. Local Hint Resolve symbols_preserved senv_preserved init_mem_preserved prog_main_preserved functions_translated - parent_sp_preserved. + parent_sp_preserved: core. Definition dist_end_block_code (c: Mach.code) := @@ -299,8 +299,8 @@ Definition dist_end_block (s: Mach.state): nat := | _ => 0 end. -Local Hint Resolve exec_nil_body exec_cons_body. -Local Hint Resolve exec_MBgetstack exec_MBsetstack exec_MBgetparam exec_MBop exec_MBload exec_MBstore. +Local Hint Resolve exec_nil_body exec_cons_body: core. +Local Hint Resolve exec_MBgetstack exec_MBsetstack exec_MBgetparam exec_MBop exec_MBload exec_MBstore: core. Lemma size_add_label l bh: size (add_label l bh) = size bh + 1. Proof. @@ -336,7 +336,7 @@ Proof. omega. Qed. -Local Hint Resolve dist_end_block_code_simu_mid_block. +Local Hint Resolve dist_end_block_code_simu_mid_block: core. Lemma size_nonzero c b bl: @@ -392,8 +392,8 @@ destruct i; congruence. Qed. -Local Hint Resolve Mlabel_is_not_cfi. -Local Hint Resolve MBbasic_is_not_cfi. +Local Hint Resolve Mlabel_is_not_cfi: core. +Local Hint Resolve MBbasic_is_not_cfi: core. Lemma add_to_new_block_is_label i: header (add_to_new_bblock (trans_inst i)) <> nil -> exists l, i = Mlabel l. @@ -408,7 +408,7 @@ Proof. + unfold cfi_bblock in H; simpl in H; congruence. Qed. -Local Hint Resolve Mlabel_is_not_basic. +Local Hint Resolve Mlabel_is_not_basic: core. Lemma trans_code_decompose c: forall b bl, is_trans_code c (b::bl) -> @@ -510,8 +510,8 @@ Proof. rewrite Hs2, Hb2; eauto. Qed. -Local Hint Resolve exec_MBcall exec_MBtailcall exec_MBbuiltin exec_MBgoto exec_MBcond_true exec_MBcond_false exec_MBjumptable exec_MBreturn exec_Some_exit exec_None_exit. -Local Hint Resolve eval_builtin_args_preserved external_call_symbols_preserved find_funct_ptr_same. +Local Hint Resolve exec_MBcall exec_MBtailcall exec_MBbuiltin exec_MBgoto exec_MBcond_true exec_MBcond_false exec_MBjumptable exec_MBreturn exec_Some_exit exec_None_exit: core. +Local Hint Resolve eval_builtin_args_preserved external_call_symbols_preserved find_funct_ptr_same: core. Lemma match_states_concat_trans_code st f sp c rs m h: diff --git a/powerpc/Archi.v b/powerpc/Archi.v index ab348c14..10f38391 100644 --- a/powerpc/Archi.v +++ b/powerpc/Archi.v @@ -30,6 +30,10 @@ Definition align_float64 := 8%Z. (** Can we use the 64-bit extensions to the PowerPC architecture? *) Parameter ppc64 : bool. +(** Should single-precision FP arguments passed on stack be passed + as singles or use double FP format. *) +Parameter single_passed_as_single : bool. + Definition splitlong := negb ppc64. Lemma splitlong_ptr32: splitlong = true -> ptr64 = false. diff --git a/powerpc/CSE2deps.v b/powerpc/CSE2deps.v new file mode 100644 index 00000000..9db51bbb --- /dev/null +++ b/powerpc/CSE2deps.v @@ -0,0 +1,20 @@ +Require Import BoolEqual Coqlib. +Require Import AST Integers Floats. +Require Import Values Memory Globalenvs Events. +Require Import Op. + + +Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw := + (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk)) + && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk)) + && ((ofsw + size_chunk chunkw <=? ofsr) || + (ofsr + size_chunk chunkr <=? ofsw)). + +Definition may_overlap chunk addr args chunk' addr' args' := + match addr, addr', args, args' with + | (Aindexed ofs), (Aindexed ofs'), + (base :: nil), (base' :: nil) => + if peq base base' + then negb (can_swap_accesses_ofs (Int.unsigned ofs') chunk' (Int.unsigned ofs) chunk) + else true | _, _, _, _ => true + end. diff --git a/powerpc/CSE2depsproof.v b/powerpc/CSE2depsproof.v new file mode 100644 index 00000000..fdded9b6 --- /dev/null +++ b/powerpc/CSE2depsproof.v @@ -0,0 +1,135 @@ +Require Import Coqlib Maps Errors Integers Floats Lattice Kildall. +Require Import AST Linking. +Require Import Memory Registers Op RTL Maps. + +Require Import Globalenvs Values. +Require Import Linking Values Memory Globalenvs Events Smallstep. +Require Import Registers Op RTL. +Require Import CSE2 CSE2deps. +Require Import Lia. + +Lemma ptrofs_size : + Ptrofs.wordsize = if Archi.ptr64 then 64%nat else 32%nat. +Proof. + unfold Ptrofs.wordsize. + unfold Wordsize_Ptrofs.wordsize. + trivial. +Qed. + +Lemma ptrofs_modulus : + Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296. +Proof. + unfold Ptrofs.modulus. + rewrite ptrofs_size. + destruct Archi.ptr64; reflexivity. +Qed. + +Lemma ptrofs_max_unsigned : + Ptrofs.max_unsigned = if Archi.ptr64 then 18446744073709551615 else 4294967295. +Proof. + unfold Ptrofs.max_unsigned. + rewrite ptrofs_modulus. + destruct Archi.ptr64; reflexivity. +Qed. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Section MEMORY_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + Variable base : val. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : int. + Hypothesis ADDRW : eval_addressing genv sp + (Aindexed ofsw) (base :: nil) = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aindexed ofsr) (base :: nil) = Some addrr. + + Lemma load_store_away1 : + forall RANGEW : 0 <= Int.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= Int.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : Int.unsigned ofsw + size_chunk chunkw <= Int.unsigned ofsr + \/ Int.unsigned ofsr + size_chunk chunkr <= Int.unsigned ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in *. + + rewrite ptrofs_modulus in *. + simpl in *. + inv ADDRR. + inv ADDRW. + destruct base; try discriminate. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsr)) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsw)) as [OFSW | OFSW]; + rewrite OFSW). + all: unfold Ptrofs.of_int. + + all: repeat rewrite Ptrofs.unsigned_repr by (unfold Ptrofs.max_unsigned; rewrite ptrofs_modulus; destruct Archi.ptr64; lia). + all: repeat rewrite ptrofs_modulus. + all: destruct Archi.ptr64; intuition lia. + Qed. + + Theorem load_store_away : + can_swap_accesses_ofs (Int.unsigned ofsr) chunkr (Int.unsigned ofsw) chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. +End MEMORY_WRITE. +End SOUNDNESS. + + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Lemma may_overlap_sound: + forall m m' : mem, + forall chunk addr args chunk' addr' args' v a a' rs, + (eval_addressing genv sp addr (rs ## args)) = Some a -> + (eval_addressing genv sp addr' (rs ## args')) = Some a' -> + (may_overlap chunk addr args chunk' addr' args') = false -> + (Mem.storev chunk m a v) = Some m' -> + (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a'). +Proof. + intros until rs. + intros ADDR ADDR' OVERLAP STORE. + destruct addr; destruct addr'; try discriminate. + { (* Aindexed / Aindexed *) + destruct args as [ | base [ | ]]. 1,3: discriminate. + destruct args' as [ | base' [ | ]]. 1,3: discriminate. + simpl in OVERLAP. + destruct (peq base base'). 2: discriminate. + subst base'. + destruct (can_swap_accesses_ofs (Int.unsigned i0) chunk' (Int.unsigned i) chunk) eqn:SWAP. + 2: discriminate. + simpl in *. + eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. + } +Qed. + +End SOUNDNESS. diff --git a/powerpc/Conventions1.v b/powerpc/Conventions1.v index 25d9c081..5c9cbd4f 100644 --- a/powerpc/Conventions1.v +++ b/powerpc/Conventions1.v @@ -208,7 +208,16 @@ Fixpoint loc_arguments_rec | Some ireg => One (R ireg) :: loc_arguments_rec tys (ir + 1) fr ofs end - | (Tfloat | Tsingle | Tany64) as ty :: tys => + | Tsingle as ty :: tys => + match list_nth_z float_param_regs fr with + | None => + let ty := if Archi.single_passed_as_single then Tsingle else Tany64 in + let ofs := align ofs (typesize ty) in + One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + (typesize ty)) + | Some freg => + One (R freg) :: loc_arguments_rec tys ir (fr + 1) ofs + end + | (Tfloat | Tany64) as ty :: tys => match list_nth_z float_param_regs fr with | None => let ofs := align ofs 2 in @@ -236,33 +245,6 @@ Fixpoint loc_arguments_rec Definition loc_arguments (s: signature) : list (rpair loc) := loc_arguments_rec s.(sig_args) 0 0 0. -(** [size_arguments s] returns the number of [Outgoing] slots used - to call a function with signature [s]. *) - -Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z := - match tyl with - | nil => ofs - | (Tint | Tany32) :: tys => - match list_nth_z int_param_regs ir with - | None => size_arguments_rec tys ir fr (ofs + 1) - | Some ireg => size_arguments_rec tys (ir + 1) fr ofs - end - | (Tfloat | Tsingle | Tany64) :: tys => - match list_nth_z float_param_regs fr with - | None => size_arguments_rec tys ir fr (align ofs 2 + 2) - | Some freg => size_arguments_rec tys ir (fr + 1) ofs - end - | Tlong :: tys => - let ir := align ir 2 in - match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with - | Some r1, Some r2 => size_arguments_rec tys (ir + 2) fr ofs - | _, _ => size_arguments_rec tys ir fr (align ofs 2 + 2) - end - end. - -Definition size_arguments (s: signature) : Z := - size_arguments_rec s.(sig_args) 0 0 0. - (** Argument locations are either caller-save registers or [Outgoing] stack slots at nonnegative offsets. *) @@ -322,12 +304,14 @@ Opaque list_nth_z. apply align_divides; omega. apply Z.divide_1_l. apply Z.divide_1_l. eapply Y; eauto. omega. - (* single *) + assert (ofs <= align ofs 1) by (apply align_le; omega). assert (ofs <= align ofs 2) by (apply align_le; omega). destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H. subst. right. eapply list_nth_z_in; eauto. eapply IHtyl; eauto. - subst. split. omega. apply Z.divide_1_l. - eapply Y; eauto. omega. + subst. split. destruct Archi.single_passed_as_single; simpl; omega. + destruct Archi.single_passed_as_single; simpl; apply Z.divide_1_l. + eapply Y; eauto. destruct Archi.single_passed_as_single; simpl; omega. - (* any32 *) destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H. subst. left. eapply list_nth_z_in; eauto. @@ -359,105 +343,6 @@ Qed. Hint Resolve loc_arguments_acceptable: locs. -(** The offsets of [Outgoing] arguments are below [size_arguments s]. *) - -Remark size_arguments_rec_above: - forall tyl ir fr ofs0, - ofs0 <= size_arguments_rec tyl ir fr ofs0. -Proof. - induction tyl; simpl; intros. - omega. - destruct a. - destruct (list_nth_z int_param_regs ir); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega. - destruct (list_nth_z float_param_regs fr); eauto. - apply Z.le_trans with (align ofs0 2). apply align_le; omega. - apply Z.le_trans with (align ofs0 2 + 2); auto; omega. - set (ir' := align ir 2). - destruct (list_nth_z int_param_regs ir'); eauto. - destruct (list_nth_z int_param_regs (ir' + 1)); eauto. - apply Z.le_trans with (align ofs0 2). apply align_le; omega. - apply Z.le_trans with (align ofs0 2 + 2); auto; omega. - apply Z.le_trans with (align ofs0 2). apply align_le; omega. - apply Z.le_trans with (align ofs0 2 + 2); auto; omega. - destruct (list_nth_z float_param_regs fr); eauto. - apply Z.le_trans with (align ofs0 2). apply align_le; omega. - apply Z.le_trans with (align ofs0 2 + 2); auto; omega. - destruct (list_nth_z int_param_regs ir); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega. - destruct (list_nth_z float_param_regs fr); eauto. - apply Z.le_trans with (align ofs0 2). apply align_le; omega. - apply Z.le_trans with (align ofs0 2 + 2); auto; omega. -Qed. - -Lemma size_arguments_above: - forall s, size_arguments s >= 0. -Proof. - intros; unfold size_arguments. apply Z.le_ge. - apply size_arguments_rec_above. -Qed. - -Lemma loc_arguments_bounded: - forall (s: signature) (ofs: Z) (ty: typ), - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) -> - ofs + typesize ty <= size_arguments s. -Proof. - intros. - assert (forall tyl ir fr ofs0, - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_rec tyl ir fr ofs0)) -> - ofs + typesize ty <= size_arguments_rec tyl ir fr ofs0). -{ - induction tyl; simpl; intros. - elim H0. - destruct a. -- (* int *) - destruct (list_nth_z int_param_regs ir); destruct H0. - congruence. - eauto. - inv H0. apply size_arguments_rec_above. - eauto. -- (* float *) - destruct (list_nth_z float_param_regs fr); destruct H0. - congruence. - eauto. - inv H0. apply size_arguments_rec_above. eauto. -- (* long *) - set (ir' := align ir 2) in *. - assert (DFL: - In (S Outgoing ofs ty) (regs_of_rpairs - ((if Archi.ptr64 - then One (S Outgoing (align ofs0 2) Tlong) - else Twolong (S Outgoing (align ofs0 2) Tint) - (S Outgoing (align ofs0 2 + 1) Tint)) - :: loc_arguments_rec tyl ir' fr (align ofs0 2 + 2))) -> - ofs + typesize ty <= size_arguments_rec tyl ir' fr (align ofs0 2 + 2)). - { destruct Archi.ptr64; intros IN. - - destruct IN. inv H1. apply size_arguments_rec_above. auto. - - destruct IN. inv H1. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above. - destruct H1. inv H1. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above. - auto. } - destruct (list_nth_z int_param_regs ir'); auto. - destruct (list_nth_z int_param_regs (ir' + 1)); auto. - destruct H0. congruence. destruct H0. congruence. eauto. -- (* single *) - destruct (list_nth_z float_param_regs fr); destruct H0. - congruence. - eauto. - inv H0. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above. - eauto. -- (* any32 *) - destruct (list_nth_z int_param_regs ir); destruct H0. - congruence. - eauto. - inv H0. apply size_arguments_rec_above. - eauto. -- (* any64 *) - destruct (list_nth_z float_param_regs fr); destruct H0. - congruence. - eauto. - inv H0. apply size_arguments_rec_above. eauto. - } - eauto. -Qed. - Lemma loc_arguments_main: loc_arguments signature_main = nil. Proof. diff --git a/powerpc/DuplicateOpcodeHeuristic.ml b/powerpc/DuplicateOpcodeHeuristic.ml index 85505245..33be79e8 100644 --- a/powerpc/DuplicateOpcodeHeuristic.ml +++ b/powerpc/DuplicateOpcodeHeuristic.ml @@ -1,3 +1,27 @@ -exception HeuristicSucceeded - -let opcode_heuristic code cond ifso ifnot preferred = () +(* open Camlcoq *) +open Op +open Integers + +let opcode_heuristic code cond ifso ifnot is_loop_header = + match cond with + | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccompf c -> (match c with + | Ceq -> Some false + | Cne -> Some true + | _ -> None + ) + | Cnotcompf c -> (match c with + | Ceq -> Some true + | Cne -> Some false + | _ -> None + ) + | _ -> None diff --git a/powerpc/extractionMachdep.v b/powerpc/extractionMachdep.v index 7482435f..a3e945bf 100644 --- a/powerpc/extractionMachdep.v +++ b/powerpc/extractionMachdep.v @@ -34,3 +34,6 @@ Extract Constant Archi.ppc64 => | ""e5500"" -> true | _ -> false end". + +(* Choice of passing of single *) +Extract Constant Archi.single_passed_as_single => "Configuration.gnu_toolchain". diff --git a/riscV/Asmexpand.ml b/riscV/Asmexpand.ml index d36b6230..7e36abf8 100644 --- a/riscV/Asmexpand.ml +++ b/riscV/Asmexpand.ml @@ -63,44 +63,44 @@ let expand_storeind_ptr src base ofs = let int_param_regs = [| X10; X11; X12; X13; X14; X15; X16; X17 |] let float_param_regs = [| F10; F11; F12; F13; F14; F15; F16; F17 |] -let rec fixup_variadic_call pos tyl = - if pos < 8 then +let rec fixup_variadic_call ri rf tyl = + if ri < 8 then match tyl with | [] -> () | (Tint | Tany32) :: tyl -> - fixup_variadic_call (pos + 1) tyl + fixup_variadic_call (ri + 1) rf tyl | Tsingle :: tyl -> - let rs =float_param_regs.(pos) - and rd = int_param_regs.(pos) in + let rs = float_param_regs.(rf) + and rd = int_param_regs.(ri) in emit (Pfmvxs(rd, rs)); - fixup_variadic_call (pos + 1) tyl + fixup_variadic_call (ri + 1) (rf + 1) tyl | Tlong :: tyl -> - let pos' = if Archi.ptr64 then pos + 1 else align pos 2 + 2 in - fixup_variadic_call pos' tyl + let ri' = if Archi.ptr64 then ri + 1 else align ri 2 + 2 in + fixup_variadic_call ri' rf tyl | (Tfloat | Tany64) :: tyl -> if Archi.ptr64 then begin - let rs = float_param_regs.(pos) - and rd = int_param_regs.(pos) in + let rs = float_param_regs.(rf) + and rd = int_param_regs.(ri) in emit (Pfmvxd(rd, rs)); - fixup_variadic_call (pos + 1) tyl + fixup_variadic_call (ri + 1) (rf + 1) tyl end else begin - let pos = align pos 2 in - if pos < 8 then begin - let rs = float_param_regs.(pos) - and rd1 = int_param_regs.(pos) - and rd2 = int_param_regs.(pos + 1) in + let ri = align ri 2 in + if ri < 8 then begin + let rs = float_param_regs.(rf) + and rd1 = int_param_regs.(ri) + and rd2 = int_param_regs.(ri + 1) in emit (Paddiw(X2, X X2, Integers.Int.neg _16)); emit (Pfsd(rs, X2, Ofsimm _0)); emit (Plw(rd1, X2, Ofsimm _0)); emit (Plw(rd2, X2, Ofsimm _4)); emit (Paddiw(X2, X X2, _16)); - fixup_variadic_call (pos + 2) tyl + fixup_variadic_call (ri + 2) (rf + 1) tyl end end let fixup_call sg = - if sg.sig_cc.cc_vararg then fixup_variadic_call 0 sg.sig_args + if sg.sig_cc.cc_vararg then fixup_variadic_call 0 0 sg.sig_args (* Handling of annotations *) diff --git a/riscV/CSE2deps.v b/riscV/CSE2deps.v new file mode 100644 index 00000000..8ab9242a --- /dev/null +++ b/riscV/CSE2deps.v @@ -0,0 +1,20 @@ +Require Import BoolEqual Coqlib. +Require Import AST Integers Floats. +Require Import Values Memory Globalenvs Events. +Require Import Op. + + +Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw := + (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk)) + && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk)) + && ((ofsw + size_chunk chunkw <=? ofsr) || + (ofsr + size_chunk chunkr <=? ofsw)). + +Definition may_overlap chunk addr args chunk' addr' args' := + match addr, addr', args, args' with + | (Aindexed ofs), (Aindexed ofs'), + (base :: nil), (base' :: nil) => + if peq base base' + then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk) + else true | _, _, _, _ => true + end. diff --git a/riscV/CSE2depsproof.v b/riscV/CSE2depsproof.v new file mode 100644 index 00000000..a3811e78 --- /dev/null +++ b/riscV/CSE2depsproof.v @@ -0,0 +1,127 @@ +Require Import Coqlib Maps Errors Integers Floats Lattice Kildall. +Require Import AST Linking. +Require Import Memory Registers Op RTL Maps. + +Require Import Globalenvs Values. +Require Import Linking Values Memory Globalenvs Events Smallstep. +Require Import Registers Op RTL. +Require Import CSE2 CSE2deps. +Require Import Lia. + +Lemma ptrofs_size : + Ptrofs.wordsize = (if Archi.ptr64 then 64 else 32)%nat. +Proof. + unfold Ptrofs.wordsize. + unfold Wordsize_Ptrofs.wordsize. + trivial. +Qed. + +Lemma ptrofs_modulus : + Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296. +Proof. + unfold Ptrofs.modulus. + rewrite ptrofs_size. + destruct Archi.ptr64; reflexivity. +Qed. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Section MEMORY_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + Variable base : val. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : ptrofs. + Hypothesis ADDRW : eval_addressing genv sp + (Aindexed ofsw) (base :: nil) = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aindexed ofsr) (base :: nil) = Some addrr. + + Lemma load_store_away1 : + forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr + \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in *. + + rewrite ptrofs_modulus in *. + simpl in *. + inv ADDRR. + inv ADDRW. + destruct base; try discriminate. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW]; + rewrite OFSW). + all: try rewrite ptrofs_modulus in *. + all: destruct Archi.ptr64. + + all: intuition lia. + Qed. + + Theorem load_store_away : + can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. +End MEMORY_WRITE. +End SOUNDNESS. + + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Lemma may_overlap_sound: + forall m m' : mem, + forall chunk addr args chunk' addr' args' v a a' rs, + (eval_addressing genv sp addr (rs ## args)) = Some a -> + (eval_addressing genv sp addr' (rs ## args')) = Some a' -> + (may_overlap chunk addr args chunk' addr' args') = false -> + (Mem.storev chunk m a v) = Some m' -> + (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a'). +Proof. + intros until rs. + intros ADDR ADDR' OVERLAP STORE. + destruct addr; destruct addr'; try discriminate. + { (* Aindexed / Aindexed *) + destruct args as [ | base [ | ]]. 1,3: discriminate. + destruct args' as [ | base' [ | ]]. 1,3: discriminate. + simpl in OVERLAP. + destruct (peq base base'). 2: discriminate. + subst base'. + destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP. + 2: discriminate. + simpl in *. + eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption. + } +Qed. + +End SOUNDNESS. diff --git a/riscV/Conventions1.v b/riscV/Conventions1.v index 27d09d94..17326139 100644 --- a/riscV/Conventions1.v +++ b/riscV/Conventions1.v @@ -105,7 +105,9 @@ Definition is_float_reg (r: mreg) := of function arguments), but this leaves much liberty in choosing actual locations. To ensure binary interoperability of code generated by our compiler with libraries compiled by another compiler, we - implement the standard RISC-V conventions. *) + implement the standard RISC-V conventions as found here: + https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md +*) (** ** Location of function result *) @@ -169,38 +171,32 @@ Qed. (** ** Location of function arguments *) -(** The RISC-V ABI states the following convention for passing arguments +(** The RISC-V ABI states the following conventions for passing arguments to a function: -- Arguments are passed in registers when possible. - -- Up to eight integer registers (ai: int_param_regs) and up to eight - floating-point registers (fai: float_param_regs) are used for this - purpose. - -- If the arguments to a function are conceptualized as fields of a C - struct, each with pointer alignment, the argument registers are a - shadow of the first eight pointer-words of that struct. If argument - i < 8 is a floating-point type, it is passed in floating-point - register fa_i; otherwise, it is passed in integer register a_i. - -- When primitive arguments twice the size of a pointer-word are passed - on the stack, they are naturally aligned. When they are passed in the - integer registers, they reside in an aligned even-odd register pair, - with the even register holding the least-significant bits. - -- Floating-point arguments to variadic functions (except those that - are explicitly named in the parameter list) are passed in integer - registers. - -- The portion of the conceptual struct that is not passed in argument - registers is passed on the stack. The stack pointer sp points to the - first argument not passed in a register. - -The bit about variadic functions doesn't quite fit CompCert's model. -We do our best by passing the FP arguments in registers, as usual, -and reserving the corresponding integer registers, so that fixup -code can be introduced in the Asmexpand pass. +- RV64, not variadic: pass the first 8 integer arguments in + integer registers (a1...a8: int_param_regs), the first 8 FP arguments + in FP registers (fa1...fa8: float_param_regs), and the remaining + arguments on the stack, in 8-byte slots. + +- RV32, not variadic: same, but arguments of 64-bit integer type + are passed in two consecutive integer registers (a(i), a(i+1)) + or in a(8) and on a 32-bit word on the stack. Stack-allocated + arguments are aligned to their natural alignment. + +- RV64, variadic: pass the first 8 arguments in integer registers + (a1...a8), including FP arguments; pass the remaining arguments on + the stack, in 8-byte slots. + +- RV32, variadic: same, but arguments of 64-bit types (integers as well + as floats) are passed in two consecutive aligned integer registers + (a(2i), a(2i+1)). + +The passing of FP arguments to variadic functions in integer registers +doesn't quite fit CompCert's model. We do our best by passing the FP +arguments in registers, as usual, and reserving the corresponding +integer registers, so that fixup code can be introduced in the +Asmexpand pass. *) Definition int_param_regs := @@ -208,80 +204,84 @@ Definition int_param_regs := Definition float_param_regs := F10 :: F11 :: F12 :: F13 :: F14 :: F15 :: F16 :: F17 :: nil. -Definition one_arg (regs: list mreg) (rn: Z) (ofs: Z) (ty: typ) - (rec: Z -> Z -> list (rpair loc)) := - match list_nth_z regs rn with +Definition int_arg (ri rf ofs: Z) (ty: typ) + (rec: Z -> Z -> Z -> list (rpair loc)) := + match list_nth_z int_param_regs ri with | Some r => - One(R r) :: rec (rn + 1) ofs + One(R r) :: rec (ri + 1) rf ofs | None => - let ofs := align ofs (typealign ty) in - One(S Outgoing ofs ty) :: rec rn (ofs + (if Archi.ptr64 then 2 else typesize ty)) + let ofs := align ofs (typesize ty) in + One(S Outgoing ofs ty) + :: rec ri rf (ofs + (if Archi.ptr64 then 2 else typesize ty)) end. -Definition two_args (regs: list mreg) (rn: Z) (ofs: Z) - (rec: Z -> Z -> list (rpair loc)) := - let rn := align rn 2 in - match list_nth_z regs rn, list_nth_z regs (rn + 1) with - | Some r1, Some r2 => - Twolong (R r2) (R r1) :: rec (rn + 2) ofs - | _, _ => - let ofs := align ofs 2 in - Twolong (S Outgoing (ofs + 1) Tint) (S Outgoing ofs Tint) :: - rec rn (ofs + 2) +Definition float_arg (va: bool) (ri rf ofs: Z) (ty: typ) + (rec: Z -> Z -> Z -> list (rpair loc)) := + match list_nth_z float_param_regs rf with + | Some r => + if va then + (let ri' := (* reserve 1 or 2 aligned integer registers *) + if Archi.ptr64 || zeq (typesize ty) 1 then ri + 1 else align ri 2 + 2 in + if zle ri' 8 then + (* we have enough integer registers, put argument in FP reg + and fixup code will put it in one or two integer regs *) + One (R r) :: rec ri' (rf + 1) ofs + else + (* we are out of integer registers, pass argument on stack *) + let ofs := align ofs (typesize ty) in + One(S Outgoing ofs ty) + :: rec ri' rf (ofs + (if Archi.ptr64 then 2 else typesize ty))) + else + One (R r) :: rec ri (rf + 1) ofs + | None => + let ofs := align ofs (typesize ty) in + One(S Outgoing ofs ty) + :: rec ri rf (ofs + (if Archi.ptr64 then 2 else typesize ty)) end. -Definition hybrid_arg (regs: list mreg) (rn: Z) (ofs: Z) (ty: typ) - (rec: Z -> Z -> list (rpair loc)) := - let rn := align rn 2 in - match list_nth_z regs rn with - | Some r => - One (R r) :: rec (rn + 2) ofs - | None => +Definition split_long_arg (va: bool) (ri rf ofs: Z) + (rec: Z -> Z -> Z -> list (rpair loc)) := + let ri := if va then align ri 2 else ri in + match list_nth_z int_param_regs ri, list_nth_z int_param_regs (ri + 1) with + | Some r1, Some r2 => + Twolong (R r2) (R r1) :: rec (ri + 2) rf ofs + | Some r1, None => + Twolong (S Outgoing ofs Tint) (R r1) :: rec (ri + 1) rf (ofs + 1) + | None, _ => let ofs := align ofs 2 in - One (S Outgoing ofs ty) :: rec rn (ofs + 2) + Twolong (S Outgoing (ofs + 1) Tint) (S Outgoing ofs Tint) :: + rec ri rf (ofs + 2) end. Fixpoint loc_arguments_rec (va: bool) - (tyl: list typ) (r ofs: Z) {struct tyl} : list (rpair loc) := + (tyl: list typ) (ri rf ofs: Z) {struct tyl} : list (rpair loc) := match tyl with | nil => nil | (Tint | Tany32) as ty :: tys => - one_arg int_param_regs r ofs ty (loc_arguments_rec va tys) + (* pass in one integer register or on stack *) + int_arg ri rf ofs ty (loc_arguments_rec va tys) | Tsingle as ty :: tys => - one_arg float_param_regs r ofs ty (loc_arguments_rec va tys) + (* pass in one FP register or on stack. + If vararg, reserve 1 integer register. *) + float_arg va ri rf ofs ty (loc_arguments_rec va tys) | Tlong as ty :: tys => - if Archi.ptr64 - then one_arg int_param_regs r ofs ty (loc_arguments_rec va tys) - else two_args int_param_regs r ofs (loc_arguments_rec va tys) + if Archi.ptr64 then + (* pass in one integer register or on stack *) + int_arg ri rf ofs ty (loc_arguments_rec va tys) + else + (* pass in register pair or on stack; align register pair if vararg *) + split_long_arg va ri rf ofs(loc_arguments_rec va tys) | (Tfloat | Tany64) as ty :: tys => - if va && negb Archi.ptr64 - then hybrid_arg float_param_regs r ofs ty (loc_arguments_rec va tys) - else one_arg float_param_regs r ofs ty (loc_arguments_rec va tys) + (* pass in one FP register or on stack. + If vararg, reserve 1 or 2 integer registers. *) + float_arg va ri rf ofs ty (loc_arguments_rec va tys) end. (** [loc_arguments s] returns the list of locations where to store arguments when calling a function with signature [s]. *) Definition loc_arguments (s: signature) : list (rpair loc) := - loc_arguments_rec s.(sig_cc).(cc_vararg) s.(sig_args) 0 0. - -(** [size_arguments s] returns the number of [Outgoing] slots used - to call a function with signature [s]. *) - -Definition max_outgoing_1 (accu: Z) (l: loc) : Z := - match l with - | S Outgoing ofs ty => Z.max accu (ofs + typesize ty) - | _ => accu - end. - -Definition max_outgoing_2 (accu: Z) (rl: rpair loc) : Z := - match rl with - | One l => max_outgoing_1 accu l - | Twolong l1 l2 => max_outgoing_1 (max_outgoing_1 accu l1) l2 - end. - -Definition size_arguments (s: signature) : Z := - List.fold_left max_outgoing_2 (loc_arguments s) 0. + loc_arguments_rec s.(sig_cc).(cc_vararg) s.(sig_args) 0 0 0. (** Argument locations are either non-temporary registers or [Outgoing] stack slots at nonnegative offsets. *) @@ -294,90 +294,87 @@ Definition loc_argument_acceptable (l: loc) : Prop := end. Lemma loc_arguments_rec_charact: - forall va tyl rn ofs p, + forall va tyl ri rf ofs p, ofs >= 0 -> - In p (loc_arguments_rec va tyl rn ofs) -> forall_rpair loc_argument_acceptable p. + In p (loc_arguments_rec va tyl ri rf ofs) -> forall_rpair loc_argument_acceptable p. Proof. set (OK := fun (l: list (rpair loc)) => forall p, In p l -> forall_rpair loc_argument_acceptable p). - set (OKF := fun (f: Z -> Z -> list (rpair loc)) => - forall rn ofs, ofs >= 0 -> OK (f rn ofs)). - set (OKREGS := fun (l: list mreg) => forall r, In r l -> is_callee_save r = false). - assert (AL: forall ofs ty, ofs >= 0 -> align ofs (typealign ty) >= 0). + set (OKF := fun (f: Z -> Z -> Z -> list (rpair loc)) => + forall ri rf ofs, ofs >= 0 -> OK (f ri rf ofs)). + assert (CSI: forall r, In r int_param_regs -> is_callee_save r = false). + { decide_goal. } + assert (CSF: forall r, In r float_param_regs -> is_callee_save r = false). + { decide_goal. } + assert (AL: forall ofs ty, ofs >= 0 -> align ofs (typesize ty) >= 0). { intros. - assert (ofs <= align ofs (typealign ty)) by (apply align_le; apply typealign_pos). + assert (ofs <= align ofs (typesize ty)) by (apply align_le; apply typesize_pos). omega. } + assert (ALD: forall ofs ty, ofs >= 0 -> (typealign ty | align ofs (typesize ty))). + { intros. eapply Z.divide_trans. apply typealign_typesize. + apply align_divides. apply typesize_pos. } assert (SK: (if Archi.ptr64 then 2 else 1) > 0). { destruct Archi.ptr64; omega. } assert (SKK: forall ty, (if Archi.ptr64 then 2 else typesize ty) > 0). { intros. destruct Archi.ptr64. omega. apply typesize_pos. } - assert (A: forall regs rn ofs ty f, - OKREGS regs -> OKF f -> ofs >= 0 -> OK (one_arg regs rn ofs ty f)). - { intros until f; intros OR OF OO; red; unfold one_arg; intros. - destruct (list_nth_z regs rn) as [r|] eqn:NTH; destruct H. - - subst p; simpl. apply OR. eapply list_nth_z_in; eauto. + assert (A: forall ri rf ofs ty f, + OKF f -> ofs >= 0 -> OK (int_arg ri rf ofs ty f)). + { intros until f; intros OF OO; red; unfold int_arg; intros. + destruct (list_nth_z int_param_regs ri) as [r|] eqn:NTH; destruct H. + - subst p; simpl. apply CSI. eapply list_nth_z_in; eauto. - eapply OF; eauto. - subst p; simpl. auto using align_divides, typealign_pos. - eapply OF; [idtac|eauto]. generalize (AL ofs ty OO) (SKK ty); omega. } - assert (B: forall regs rn ofs f, - OKREGS regs -> OKF f -> ofs >= 0 -> OK (two_args regs rn ofs f)). - { intros until f; intros OR OF OO; unfold two_args. - set (rn' := align rn 2). + assert (B: forall va ri rf ofs ty f, + OKF f -> ofs >= 0 -> OK (float_arg va ri rf ofs ty f)). + { intros until f; intros OF OO; red; unfold float_arg; intros. + destruct (list_nth_z float_param_regs rf) as [r|] eqn:NTH. + - set (ri' := if Archi.ptr64 || zeq (typesize ty) 1 then ri + 1 else align ri 2 + 2) in *. + destruct va; [destruct (zle ri' 8)|idtac]; destruct H. + + subst p; simpl. apply CSF. eapply list_nth_z_in; eauto. + + eapply OF; eauto. + + subst p; repeat split; auto. + + eapply OF; [idtac|eauto]. generalize (AL ofs ty OO) (SKK ty); omega. + + subst p; simpl. apply CSF. eapply list_nth_z_in; eauto. + + eapply OF; eauto. + - destruct H. + + subst p; repeat split; auto. + + eapply OF; [idtac|eauto]. generalize (AL ofs ty OO) (SKK ty); omega. + } + assert (C: forall va ri rf ofs f, + OKF f -> ofs >= 0 -> OK (split_long_arg va ri rf ofs f)). + { intros until f; intros OF OO; unfold split_long_arg. + set (ri' := if va then align ri 2 else ri). set (ofs' := align ofs 2). assert (OO': ofs' >= 0) by (apply (AL ofs Tlong); auto). - assert (DFL: OK (Twolong (S Outgoing (ofs' + 1) Tint) (S Outgoing ofs' Tint) - :: f rn' (ofs' + 2))). - { red; simpl; intros. destruct H. - - subst p; simpl. - repeat split; auto using Z.divide_1_l. omega. - - eapply OF; [idtac|eauto]. omega. - } - destruct (list_nth_z regs rn') as [r1|] eqn:NTH1; - destruct (list_nth_z regs (rn' + 1)) as [r2|] eqn:NTH2; - try apply DFL. - red; simpl; intros; destruct H. - - subst p; simpl. split; apply OR; eauto using list_nth_z_in. - - eapply OF; [idtac|eauto]. auto. - } - assert (C: forall regs rn ofs ty f, - OKREGS regs -> OKF f -> ofs >= 0 -> typealign ty = 1 -> OK (hybrid_arg regs rn ofs ty f)). - { intros until f; intros OR OF OO OTY; unfold hybrid_arg; red; intros. - set (rn' := align rn 2) in *. - destruct (list_nth_z regs rn') as [r|] eqn:NTH; destruct H. - - subst p; simpl. apply OR. eapply list_nth_z_in; eauto. - - eapply OF; eauto. - - subst p; simpl. rewrite OTY. split. apply (AL ofs Tlong OO). apply Z.divide_1_l. - - eapply OF; [idtac|eauto]. generalize (AL ofs Tlong OO); simpl; omega. + destruct (list_nth_z int_param_regs ri') as [r1|] eqn:NTH1; + [destruct (list_nth_z int_param_regs (ri'+1)) as [r2|] eqn:NTH2 | idtac]. + - red; simpl; intros; destruct H. + + subst p; split; apply CSI; eauto using list_nth_z_in. + + eapply OF; [idtac|eauto]. omega. + - red; simpl; intros; destruct H. + + subst p; split. split; auto using Z.divide_1_l. apply CSI; eauto using list_nth_z_in. + + eapply OF; [idtac|eauto]. omega. + - red; simpl; intros; destruct H. + + subst p; repeat split; auto using Z.divide_1_l. omega. + + eapply OF; [idtac|eauto]. omega. } - assert (D: OKREGS int_param_regs). - { red. decide_goal. } - assert (E: OKREGS float_param_regs). - { red. decide_goal. } - - cut (forall va tyl rn ofs, ofs >= 0 -> OK (loc_arguments_rec va tyl rn ofs)). + cut (forall va tyl ri rf ofs, ofs >= 0 -> OK (loc_arguments_rec va tyl ri rf ofs)). unfold OK. eauto. induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; simpl. - red; simpl; tauto. - destruct ty1. + (* int *) apply A; auto. -+ (* float *) - destruct (va && negb Archi.ptr64). - apply C; auto. - apply A; auto. ++ (* float *) apply B; auto. + (* long *) destruct Archi.ptr64. apply A; auto. - apply B; auto. -+ (* single *) - apply A; auto. -+ (* any32 *) - apply A; auto. -+ (* any64 *) - destruct (va && negb Archi.ptr64). apply C; auto. - apply A; auto. ++ (* single *) apply B; auto. ++ (* any32 *) apply A; auto. ++ (* any64 *) apply B; auto. Qed. Lemma loc_arguments_acceptable: @@ -387,52 +384,6 @@ Proof. unfold loc_arguments; intros. eapply loc_arguments_rec_charact; eauto. omega. Qed. -(** The offsets of [Outgoing] arguments are below [size_arguments s]. *) - -Remark fold_max_outgoing_above: - forall l n, fold_left max_outgoing_2 l n >= n. -Proof. - assert (A: forall n l, max_outgoing_1 n l >= n). - { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. } - induction l; simpl; intros. - - omega. - - eapply Zge_trans. eauto. - destruct a; simpl. apply A. eapply Zge_trans; eauto. -Qed. - -Lemma size_arguments_above: - forall s, size_arguments s >= 0. -Proof. - intros. apply fold_max_outgoing_above. -Qed. - -Lemma loc_arguments_bounded: - forall (s: signature) (ofs: Z) (ty: typ), - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) -> - ofs + typesize ty <= size_arguments s. -Proof. - intros until ty. - assert (A: forall n l, n <= max_outgoing_1 n l). - { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. } - assert (B: forall p n, - In (S Outgoing ofs ty) (regs_of_rpair p) -> - ofs + typesize ty <= max_outgoing_2 n p). - { intros. destruct p; simpl in H; intuition; subst; simpl. - - xomega. - - eapply Z.le_trans. 2: apply A. xomega. - - xomega. } - assert (C: forall l n, - In (S Outgoing ofs ty) (regs_of_rpairs l) -> - ofs + typesize ty <= fold_left max_outgoing_2 l n). - { induction l; simpl; intros. - - contradiction. - - rewrite in_app_iff in H. destruct H. - + eapply Z.le_trans. eapply B; eauto. apply Z.ge_le. apply fold_max_outgoing_above. - + apply IHl; auto. - } - apply C. -Qed. - Lemma loc_arguments_main: loc_arguments signature_main = nil. Proof. diff --git a/riscV/DuplicateOpcodeHeuristic.ml b/riscV/DuplicateOpcodeHeuristic.ml index 85505245..2ec314c1 100644 --- a/riscV/DuplicateOpcodeHeuristic.ml +++ b/riscV/DuplicateOpcodeHeuristic.ml @@ -1,3 +1,27 @@ -exception HeuristicSucceeded - -let opcode_heuristic code cond ifso ifnot preferred = () +(* open Camlcoq *) +open Op +open Integers + +let opcode_heuristic code cond ifso ifnot is_loop_header = + match cond with + | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccompf c | Ccompfs c -> (match c with + | Ceq -> Some false + | Cne -> Some true + | _ -> None + ) + | Cnotcompf c | Cnotcompfs c -> (match c with + | Ceq -> Some true + | Cne -> Some false + | _ -> None + ) + | _ -> None diff --git a/runtime/arm/i64_stof.S b/runtime/arm/i64_stof.S index bcfa471c..11e00a2a 100644 --- a/runtime/arm/i64_stof.S +++ b/runtime/arm/i64_stof.S @@ -39,12 +39,11 @@ @@@ Conversion from signed 64-bit integer to single float FUNCTION(__compcert_i64_stof) - @ Check whether -2^53 <= X < 2^53 - ASR r2, Reg0HI, #21 - ASR r3, Reg0HI, #31 @ (r2,r3) = X >> 53 + @ Check whether -2^53 <= X < 2^53 + ASR r2, Reg0HI, #21 @ r2 = high 32 bits of X >> 53 + @ -2^53 <= X < 2^53 iff r2 is -1 or 0, that is, iff r2 + 1 is 0 or 1 adds r2, r2, #1 - adc r3, r3, #0 @ (r2,r3) = X >> 53 + 1 - cmp r3, #2 + cmp r2, #2 blo 1f @ X is large enough that double rounding can occur. @ Avoid it by nudging X away from the points where double rounding diff --git a/runtime/include/math.h b/runtime/include/math.h index d6475df1..01b8d8d8 100644 --- a/runtime/include/math.h +++ b/runtime/include/math.h @@ -1,6 +1,8 @@ #ifndef _COMPCERT_MATH_H #define _COMPCERT_MATH_H +#ifdef __K1C__ + #define isfinite(__y) (fpclassify((__y)) >= FP_ZERO) #include_next <math.h> @@ -16,4 +18,9 @@ #define fmaf(x, y, z) __builtin_fmaf((x),(y),(z)) #endif +#else + +#include_next <math.h> + +#endif #endif diff --git a/runtime/powerpc/i64_stof.s b/runtime/powerpc/i64_stof.s index 97fa6bb8..ea23a1c8 100644 --- a/runtime/powerpc/i64_stof.s +++ b/runtime/powerpc/i64_stof.s @@ -43,20 +43,19 @@ __compcert_i64_stof: mflr r9 # Check whether -2^53 <= X < 2^53 - srawi r5, r3, 31 - srawi r6, r3, 21 # (r5,r6) = X >> 53 - addic r6, r6, 1 - addze r5, r5 # (r5,r6) = (X >> 53) + 1 + srawi r5, r3, 21 # r5 = high 32 bits of X >> 53 + # -2^53 <= X < 2^53 iff r5 is -1 or 0, that is, iff r5 + 1 is 0 or 1 + addi r5, r5, 1 cmplwi r5, 2 blt 1f # X is large enough that double rounding can occur. # Avoid it by nudging X away from the points where double rounding # occurs (the "round to odd" technique) - rlwinm r0, r4, 0, 21, 31 # extract bits 0 to 11 of X - addi r0, r0, 0x7FF # r0 = (X & 0x7FF) + 0x7FF - # bit 12 of r0 is 0 if all low 12 bits of X are 0, 1 otherwise - # bits 13-31 of r0 are 0 - or r4, r4, r0 # correct bit number 12 of X + rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X + addi r5, r5, 0x7FF # r5 = (X & 0x7FF) + 0x7FF + # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise + # bits 13-31 of r5 are 0 + or r4, r4, r5 # correct bit number 12 of X rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X # Convert to double, then round to single 1: bl __compcert_i64_stod diff --git a/runtime/powerpc/i64_utof.s b/runtime/powerpc/i64_utof.s index cdb2f867..4a2a172b 100644 --- a/runtime/powerpc/i64_utof.s +++ b/runtime/powerpc/i64_utof.s @@ -48,11 +48,11 @@ __compcert_i64_utof: # X is large enough that double rounding can occur. # Avoid it by nudging X away from the points where double rounding # occurs (the "round to odd" technique) - rlwinm r0, r4, 0, 21, 31 # extract bits 0 to 11 of X - addi r0, r0, 0x7FF # r0 = (X & 0x7FF) + 0x7FF - # bit 12 of r0 is 0 if all low 12 bits of X are 0, 1 otherwise - # bits 13-31 of r0 are 0 - or r4, r4, r0 # correct bit number 12 of X + rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X + addi r5, r5, 0x7FF # r5 = (X & 0x7FF) + 0x7FF + # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise + # bits 13-31 of r5 are 0 + or r4, r4, r5 # correct bit number 12 of X rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X # Convert to double, then round to single 1: bl __compcert_i64_utod diff --git a/runtime/powerpc64/i64_utof.s b/runtime/powerpc64/i64_utof.s index cdb2f867..4a2a172b 100644 --- a/runtime/powerpc64/i64_utof.s +++ b/runtime/powerpc64/i64_utof.s @@ -48,11 +48,11 @@ __compcert_i64_utof: # X is large enough that double rounding can occur. # Avoid it by nudging X away from the points where double rounding # occurs (the "round to odd" technique) - rlwinm r0, r4, 0, 21, 31 # extract bits 0 to 11 of X - addi r0, r0, 0x7FF # r0 = (X & 0x7FF) + 0x7FF - # bit 12 of r0 is 0 if all low 12 bits of X are 0, 1 otherwise - # bits 13-31 of r0 are 0 - or r4, r4, r0 # correct bit number 12 of X + rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X + addi r5, r5, 0x7FF # r5 = (X & 0x7FF) + 0x7FF + # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise + # bits 13-31 of r5 are 0 + or r4, r4, r5 # correct bit number 12 of X rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X # Convert to double, then round to single 1: bl __compcert_i64_utod diff --git a/test/Makefile b/test/Makefile index 7efcd8f1..e9c5d6a1 100644 --- a/test/Makefile +++ b/test/Makefile @@ -4,7 +4,9 @@ include ../Makefile.config # Kalray note - removing compression, raytracer and spass that cannot be executed by the simulator in reasonable time ifeq ($(ARCH),mppa_k1c) - DIRS:=c regression + DIRS=c regression +else + DIRS=c compression raytracer spass regression endif ifeq ($(CLIGHTGEN),true) diff --git a/test/c/mandelbrot.c b/test/c/mandelbrot.c index fb8b929c..548c3ffa 100644 --- a/test/c/mandelbrot.c +++ b/test/c/mandelbrot.c @@ -59,7 +59,6 @@ int main (int argc, char **argv) if(bit_num == 8) { - printf("%c", byte_acc); putc(byte_acc,stdout); #ifdef __K1C__ // stdout isn't flushed enough when --syscall=libstd_scalls.so is passed to the simulator k1-cluster fflush(stdout); @@ -70,7 +69,6 @@ int main (int argc, char **argv) else if(x == w-1) { byte_acc <<= (8-w%8); - printf("%c", byte_acc); putc(byte_acc,stdout); #ifdef __K1C__ // stdout isn't flushed enough when --syscall=libstd_scalls.so is passed to the simulator k1-cluster fflush(stdout); diff --git a/test/cse2/globals.c b/test/cse2/globals.c new file mode 100644 index 00000000..c6dd59cd --- /dev/null +++ b/test/cse2/globals.c @@ -0,0 +1,8 @@ +int glob1, glob2; + +void toto() { + if (glob1 > 4) { + glob2 ++; + glob1 --; + } +} diff --git a/test/cse2/indexed_addr.c b/test/cse2/indexed_addr.c new file mode 100644 index 00000000..30a7c571 --- /dev/null +++ b/test/cse2/indexed_addr.c @@ -0,0 +1,6 @@ +void foo(int *t) { + if (t[0] > 4) { + t[1] ++; + t[0] --; + } +} diff --git a/test/monniaux/clock.c b/test/monniaux/clock.c index fb636667..4ec679f6 100644 --- a/test/monniaux/clock.c +++ b/test/monniaux/clock.c @@ -24,9 +24,9 @@ cycle_t get_current_cycle(void) { } void print_total_clock(void) { - printf("time cycles: %lu\n", total_clock); + printf("time cycles: %" PRcycle "\n", total_clock); } void printerr_total_clock(void) { - fprintf(stderr, "time cycles: %lu\n", total_clock); + fprintf(stderr, "time cycles: %" PRcycle "\n", total_clock); } diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h index 21541145..c7dc582b 100644 --- a/test/monniaux/cycles.h +++ b/test/monniaux/cycles.h @@ -1,13 +1,11 @@ +#include <stdint.h> #include <inttypes.h> #include <stdio.h> -typedef unsigned long cycle_t; - -#ifdef MAX_MEASURES - static cycle_t _last_stop[MAX_MEASURES] = {0}; - static cycle_t _total_cycles[MAX_MEASURES] = {0}; -#endif #ifdef __K1C__ +typedef uint64_t cycle_t; +#define PRcycle PRId64 + #include <../../k1-cos/include/hal/cos_registers.h> static inline void cycle_count_config(void) @@ -27,18 +25,57 @@ static inline cycle_t get_cycle(void) #else // not K1C static inline void cycle_count_config(void) { } -#ifdef __x86_64__ +#if defined(__i386__) || defined( __x86_64__) +#define PRcycle PRId64 +typedef uint64_t cycle_t; #include <x86intrin.h> static inline cycle_t get_cycle(void) { return __rdtsc(); } #elif __riscv +#ifdef __riscv32 +#define PRcycle PRId32 +typedef uint32_t cycle_t; +#else +#define PRcycle PRId64 +typedef uint64_t cycle_t; +#endif static inline cycle_t get_cycle(void) { cycle_t cycles; asm volatile ("rdcycle %0" : "=r" (cycles)); return cycles; } +#elif defined (__ARM_ARCH) && (__ARM_ARCH >= 6) +#if (__ARM_ARCH < 8) +typedef uint32_t cycle_t; +#define PRcycle PRId32 + +/* need this kernel module +https://github.com/zertyz/MTL/tree/master/cpp/time/kernel/arm */ +static inline cycle_t get_cycle(void) { + cycle_t cycles; + __asm__ volatile ("mrc p15, 0, %0, c9, c13, 0":"=r" (cycles)); + return cycles; +} #else +#define PRcycle PRId64 +typedef uint64_t cycle_t; +/* need this kernel module: +https://github.com/jerinjacobk/armv8_pmu_cycle_counter_el0 + +on 5+ kernels, remove first argument of access_ok macro */ + +static inline cycle_t get_cycle(void) +{ + uint64_t val; + __asm__ volatile("mrs %0, pmccntr_el0" : "=r"(val)); + return val; +} +#endif + +#else +#define PRcycle PRId32 +typedef uint32_t cycle_t; static inline cycle_t get_cycle(void) { return 0; } #endif #endif @@ -48,3 +85,9 @@ static inline cycle_t get_cycle(void) { return 0; } #define TIMESTOP(i) {cycle_t cur = get_cycle(); _total_cycles[i] += cur - _last_stop[i]; _last_stop[i] = cur;} #define TIMEPRINT(n) { for (int i = 0; i <= n; i++) printf("%d cycles: %" PRIu64 "\n", i, _total_cycles[i]); } #endif + + +#ifdef MAX_MEASURES + static cycle_t _last_stop[MAX_MEASURES] = {0}; + static cycle_t _total_cycles[MAX_MEASURES] = {0}; +#endif diff --git a/test/monniaux/quicksort/quicksort_run.c b/test/monniaux/quicksort/quicksort_run.c index c35d0752..3c640b24 100644 --- a/test/monniaux/quicksort/quicksort_run.c +++ b/test/monniaux/quicksort/quicksort_run.c @@ -13,7 +13,7 @@ int main (void) { quicksort(vec, len); quicksort_time = get_cycle() - quicksort_time; printf("sorted=%s\n" - "time cycles:%" PRIu64 "\n", + "time cycles:%" PRcycle "\n", data_vec_is_sorted(vec, len)?"true":"false", quicksort_time); free(vec); diff --git a/test/monniaux/yarpgen/Makefile b/test/monniaux/yarpgen/Makefile index 9da82deb..28bd5ae0 100644 --- a/test/monniaux/yarpgen/Makefile +++ b/test/monniaux/yarpgen/Makefile @@ -1,52 +1,98 @@ -YARPGEN=yarpgen -MAX=300 +TARGET_CCOMP=../../../ccomp +TARGET_CC=gcc + +all: + +.SECONDARY: + +ifndef YARPGEN +YARPGEN=./yarpgen +GENERATOR=yarpgen +endif + +ifdef BITS +YARPGEN+=-m $(BITS) +CFLAGS+=-m$(BITS) +endif + +MAX=129 PREFIX=ran%06.f -include ../rules.mk - -K1C_CCOMPFLAGS += -funprototyped -fbitfields -CCOMPFLAGS += -funprototyped -fbitfields - -TARGETS_C=$(shell seq --format $(PREFIX)/func.c 0 $(MAX)) \ - $(shell seq --format $(PREFIX)/driver.c 0 $(MAX)) \ - $(shell seq --format $(PREFIX)/init.h 0 $(MAX)) -TARGETS_CCOMP_K1C_S=$(shell seq --format $(PREFIX)/func.ccomp.k1c.s 0 $(MAX)) \ - $(shell seq --format $(PREFIX)/driver.ccomp.k1c.s 0 $(MAX)) -TARGETS_GCC_K1C_S=$(shell seq --format $(PREFIX)/func.gcc.k1c.s 0 $(MAX)) \ - $(shell seq --format $(PREFIX)/driver.gcc.k1c.s 0 $(MAX)) -TARGETS_CCOMP_HOST_S=$(shell seq --format $(PREFIX)/func.ccomp.host.s 0 $(MAX)) \ - $(shell seq --format $(PREFIX)/driver.ccomp.host.s 0 $(MAX)) -TARGETS_GCC_HOST_S=$(shell seq --format $(PREFIX)/func.gcc.host.s 0 $(MAX)) \ - $(shell seq --format $(PREFIX)/driver.gcc.host.s 0 $(MAX)) -TARGETS_CCOMP_K1C_OUT=$(shell seq --format $(PREFIX)/example.ccomp.k1c.out 0 $(MAX)) -TARGETS_GCC_K1C_OUT=$(shell seq --format $(PREFIX)/example.gcc.k1c.out 0 $(MAX)) -TARGETS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 0 $(MAX)) -TARGETS_CCOMP_HOST_OUT=$(shell seq --format $(PREFIX)/example.ccomp.host.out 0 $(MAX)) -TARGETS_CMP=$(shell seq --format $(PREFIX)/example.k1c.cmp 0 $(MAX)) - -all: $(TARGETS_CCOMP_K1C_OUT) $(TARGETS_GCC_K1C_OUT) $(TARGETS_GCC_HOST_OUT) $(TARGETS_CCOMP_HOST_OUT) $(TARGETS_CCOMP_K1C_S) $(TARGETS_GCC_K1C_S) $(TARGETS_GCC_HOST_S) $(TARGETS_CCOMP_HOST_S) $(TARGETS_CMP) $(TARGETS_C) - -ran%/func.ccomp.k1c.s ran%/func.gcc.k1c.s ran%/func.ccomp.host.s ran%/func.gcc.host.s : ran%/init.h - -ran%/example.ccomp.k1c: ran%/func.ccomp.k1c.o ran%/driver.ccomp.k1c.o - $(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@ - -ran%/example.gcc.k1c: ran%/func.gcc.k1c.o ran%/driver.gcc.k1c.o - $(K1C_CC) $(K1C_CFLAGS) $+ -o $@ - -ran%/example.gcc.host: ran%/func.gcc.host.o ran%/driver.gcc.host.o - $(CC) $(CFLAGS) $+ -o $@ -ran%/example.ccomp.host: ran%/func.ccomp.host.o ran%/driver.ccomp.host.o - $(CCOMP) $(CCOMPFLAGS) $+ -o $@ +CCOMPOPTS=-static +CCOMPFLAGS+=-funprototyped -fbitfields -fno-cse2 -stdlib ../../../runtime # FIXME + +TESTS_C=$(shell seq --format $(PREFIX)/func.c 1 $(MAX)) \ + $(shell seq --format $(PREFIX)/driver.c 1 $(MAX)) \ + $(shell seq --format $(PREFIX)/init.c 1 $(MAX)) \ + $(shell seq --format $(PREFIX)/hash.c 1 $(MAX)) \ + $(shell seq --format $(PREFIX)/check.c 1 $(MAX)) \ + $(shell seq --format $(PREFIX)/init.h 1 $(MAX)) + +$(TESTS_C): $(GENERATOR) + +TESTS_CCOMP_TARGET_S=$(TEST_C:.c=.ccomp.target.s) +TESTS_GCC_TARGET_S=$(TEST_C:.c=.gcc.target.s) +TESTS_GCC_HOST_S=$(TEST_C:.c=.gcc.host.s) +TESTS_CCOMP_TARGET_OUT=$(shell seq --format $(PREFIX)/example.ccomp.target.out 1 $(MAX)) +TESTS_GCC_TARGET_OUT=$(shell seq --format $(PREFIX)/example.gcc.target.out 1 $(MAX)) +TESTS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 1 $(MAX)) +TESTS_CMP=$(shell seq --format $(PREFIX)/example.target.cmp 1 $(MAX)) # $(shell seq --format $(PREFIX)/example.host_target.cmp 1 $(MAX)) + +all: $(TESTS_CCOMP_TARGET_OUT) $(TESTS_GCC_TARGET_OUT) $(TESTS_CCOMP_TARGET_S) $(TESTS_GCC_TARGET_S) $(TESTS_CMP) $(TESTS_C) + +tests_c: $(TESTS_C) + +tests_s: $(TESTS_CCOMP_TARGET_S) + +%.ccomp.target.s : %.c + $(TARGET_CCOMP) $(CCOMPOPTS) $(CCOMPFLAGS) -S -o $@ $< + +%.gcc.target.s : %.c + $(TARGET_CC) $(CCOMPOPTS) -S -o $@ $< -ran%/driver.c ran%/func.c ran%/init.h: - -mkdir ran$* +%.gcc.host.s : %.c + $(CC) $(CFLAGS) -S -o $@ $< + +%.target.o : %.target.s + $(TARGET_CC) -c -o $@ $< + +%.host.o : %.host.s + $(CC) $(CFLAGS) -c -o $@ $< + +%.target.out : %.target + $(EXECUTE) $< | tee $@ + +%.host.out : %.host + ./$< | tee $@ + +ran%/func.ccomp.target.s ran%/func.gcc.target.s ran%/func.ccomp.host.s ran%/func.gcc.host.s ran%/init.gcc.host.s : ran%/init.h + +ran%/example.ccomp.target: ran%/func.ccomp.target.o ran%/driver.ccomp.target.o ran%/init.ccomp.target.o ran%/check.ccomp.target.o ran%/hash.ccomp.target.o + $(TARGET_CCOMP) $(CCOMPOPTS) $(CCOMPFLAGS) $+ -o $@ + +ran%/example.gcc.target: ran%/func.gcc.target.o ran%/driver.gcc.target.o ran%/init.gcc.target.o ran%/check.gcc.target.o ran%/hash.gcc.target.o + $(TARGET_CC) $(TARGET_CFLAGS) $+ -o $@ + +ran%/example.gcc.host: ran%/func.gcc.host.o ran%/driver.gcc.host.o ran%/init.gcc.host.o ran%/check.gcc.host.o ran%/hash.gcc.host.o + $(CC) $(CFLAGS) $+ -o $@ + +ran%/driver.c ran%/func.c ran%/init.c ran%/check.c ran%/hash.c ran%/init.h: + mkdir -p ran$* $(YARPGEN) --seed=$* --out-dir=ran$*/ --std=c99 -ran%/example.k1c.cmp : ran%/example.gcc.k1c.out ran%/example.ccomp.k1c.out +ran%/example.target.cmp : ran%/example.gcc.target.out ran%/example.ccomp.target.out cmp $+ > $@ -.PHONY: all clean +ran%/example.host_target.cmp : ran%/example.gcc.host.out ran%/example.ccomp.target.out + cmp $+ > $@ + +yarpgen: + curl -L -o yarpgen_v1.1.tar.gz https://github.com/intel/yarpgen/archive/v1.1.tar.gz + tar xfz yarpgen_v1.1.tar.gz + $(MAKE) CXX=g++ -C yarpgen-1.1 + cp yarpgen-1.1/yarpgen $@ + +.PHONY: all clean tests_c tests_c clean: -rm -rf ran* diff --git a/test/monniaux/yarpgen/Makefile.old b/test/monniaux/yarpgen/Makefile.old new file mode 100644 index 00000000..9da82deb --- /dev/null +++ b/test/monniaux/yarpgen/Makefile.old @@ -0,0 +1,52 @@ +YARPGEN=yarpgen +MAX=300 +PREFIX=ran%06.f +include ../rules.mk + +K1C_CCOMPFLAGS += -funprototyped -fbitfields +CCOMPFLAGS += -funprototyped -fbitfields + +TARGETS_C=$(shell seq --format $(PREFIX)/func.c 0 $(MAX)) \ + $(shell seq --format $(PREFIX)/driver.c 0 $(MAX)) \ + $(shell seq --format $(PREFIX)/init.h 0 $(MAX)) +TARGETS_CCOMP_K1C_S=$(shell seq --format $(PREFIX)/func.ccomp.k1c.s 0 $(MAX)) \ + $(shell seq --format $(PREFIX)/driver.ccomp.k1c.s 0 $(MAX)) +TARGETS_GCC_K1C_S=$(shell seq --format $(PREFIX)/func.gcc.k1c.s 0 $(MAX)) \ + $(shell seq --format $(PREFIX)/driver.gcc.k1c.s 0 $(MAX)) +TARGETS_CCOMP_HOST_S=$(shell seq --format $(PREFIX)/func.ccomp.host.s 0 $(MAX)) \ + $(shell seq --format $(PREFIX)/driver.ccomp.host.s 0 $(MAX)) +TARGETS_GCC_HOST_S=$(shell seq --format $(PREFIX)/func.gcc.host.s 0 $(MAX)) \ + $(shell seq --format $(PREFIX)/driver.gcc.host.s 0 $(MAX)) +TARGETS_CCOMP_K1C_OUT=$(shell seq --format $(PREFIX)/example.ccomp.k1c.out 0 $(MAX)) +TARGETS_GCC_K1C_OUT=$(shell seq --format $(PREFIX)/example.gcc.k1c.out 0 $(MAX)) +TARGETS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 0 $(MAX)) +TARGETS_CCOMP_HOST_OUT=$(shell seq --format $(PREFIX)/example.ccomp.host.out 0 $(MAX)) +TARGETS_CMP=$(shell seq --format $(PREFIX)/example.k1c.cmp 0 $(MAX)) + +all: $(TARGETS_CCOMP_K1C_OUT) $(TARGETS_GCC_K1C_OUT) $(TARGETS_GCC_HOST_OUT) $(TARGETS_CCOMP_HOST_OUT) $(TARGETS_CCOMP_K1C_S) $(TARGETS_GCC_K1C_S) $(TARGETS_GCC_HOST_S) $(TARGETS_CCOMP_HOST_S) $(TARGETS_CMP) $(TARGETS_C) + +ran%/func.ccomp.k1c.s ran%/func.gcc.k1c.s ran%/func.ccomp.host.s ran%/func.gcc.host.s : ran%/init.h + +ran%/example.ccomp.k1c: ran%/func.ccomp.k1c.o ran%/driver.ccomp.k1c.o + $(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@ + +ran%/example.gcc.k1c: ran%/func.gcc.k1c.o ran%/driver.gcc.k1c.o + $(K1C_CC) $(K1C_CFLAGS) $+ -o $@ + +ran%/example.gcc.host: ran%/func.gcc.host.o ran%/driver.gcc.host.o + $(CC) $(CFLAGS) $+ -o $@ + +ran%/example.ccomp.host: ran%/func.ccomp.host.o ran%/driver.ccomp.host.o + $(CCOMP) $(CCOMPFLAGS) $+ -o $@ + +ran%/driver.c ran%/func.c ran%/init.h: + -mkdir ran$* + $(YARPGEN) --seed=$* --out-dir=ran$*/ --std=c99 + +ran%/example.k1c.cmp : ran%/example.gcc.k1c.out ran%/example.ccomp.k1c.out + cmp $+ > $@ + +.PHONY: all clean + +clean: + -rm -rf ran* diff --git a/test/regression/Makefile b/test/regression/Makefile index 3447d6a5..97c25f6c 100644 --- a/test/regression/Makefile +++ b/test/regression/Makefile @@ -35,9 +35,9 @@ endif # but produce processor-dependent results, so no reference output in Results TESTS_DIFF=NaNs -ifeq ($(ARCH),mppa_k1c) +# FIXME ifeq ($(ARCH),mppa_k1c) TESTS_DIFF:=$(filter-out NaNs,$(TESTS_DIFF)) -endif +# endif # Other tests: should compile to .s without errors (but expect warnings) diff --git a/test/regression/Results/int64 b/test/regression/Results/int64 index af444cf6..ae8a3cc1 100644 --- a/test/regression/Results/int64 +++ b/test/regression/Results/int64 @@ -335,6 +335,48 @@ utof x = 0 stof x = 0 x = 0 +y = 52ce6b4000000063 +-x = 0 +x + y = 52ce6b4000000063 +x - y = ad3194bfffffff9d +x * y = 0 +x /u y = 0 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +x /u 3 = 0 +x %u 3 = 0 +x /s 3 = 0 +x %s 3 = 0 +x /u 5 = 0 +x %u 5 = 0 +x /s 5 = 0 +x %s 5 = 0 +x /u 11 = 0 +x %u 11 = 0 +x /s 11 = 0 +x %s 11 = 0 +~x = ffffffffffffffff +x & y = 0 +x | y = 52ce6b4000000063 +x ^ y = 52ce6b4000000063 +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = lt +utod x = 0 +dtou f = 0 +stod x = 0 +dtos f = 0 +utof x = 0 +stof x = 0 + +x = 0 y = 14057b7ef767814f -x = 0 x + y = 14057b7ef767814f @@ -755,6 +797,48 @@ utof x = 3f800000 stof x = 3f800000 x = 1 +y = 52ce6b4000000063 +-x = ffffffffffffffff +x + y = 52ce6b4000000064 +x - y = ad3194bfffffff9e +x * y = 52ce6b4000000063 +x /u y = 0 +x %u y = 1 +x /s y = 0 +x %s y = 1 +x /u y2 = 0 +x %u y2 = 1 +x /s y3 = 0 +x %s y3 = 1 +x /u 3 = 0 +x %u 3 = 1 +x /s 3 = 0 +x %s 3 = 1 +x /u 5 = 0 +x %u 5 = 1 +x /s 5 = 0 +x %s 5 = 1 +x /u 11 = 0 +x %u 11 = 1 +x /s 11 = 0 +x %s 11 = 1 +~x = fffffffffffffffe +x & y = 1 +x | y = 52ce6b4000000063 +x ^ y = 52ce6b4000000062 +x << i = 800000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = lt +utod x = 3ff0000000000000 +dtou f = 0 +stod x = 3ff0000000000000 +dtos f = 0 +utof x = 3f800000 +stof x = 3f800000 + +x = 1 y = 9af678222e728119 -x = ffffffffffffffff x + y = 9af678222e72811a @@ -1175,6 +1259,48 @@ utof x = 5f800000 stof x = bf800000 x = ffffffffffffffff +y = 52ce6b4000000063 +-x = 1 +x + y = 52ce6b4000000062 +x - y = ad3194bfffffff9c +x * y = ad3194bfffffff9d +x /u y = 3 +x %u y = 794be3ffffffed6 +x /s y = 0 +x %s y = ffffffffffffffff +x /u y2 = 3176fe836 +x %u y2 = 3683607f +x /s y3 = 0 +x %s y3 = ffffffffffffffff +x /u 3 = 5555555555555555 +x %u 3 = 0 +x /s 3 = 0 +x %s 3 = ffffffffffffffff +x /u 5 = 3333333333333333 +x %u 5 = 0 +x /s 5 = 0 +x %s 5 = ffffffffffffffff +x /u 11 = 1745d1745d1745d1 +x %u 11 = 4 +x /s 11 = 0 +x %s 11 = ffffffffffffffff +~x = 0 +x & y = 52ce6b4000000063 +x | y = ffffffffffffffff +x ^ y = ad3194bfffffff9c +x << i = fffffff800000000 +x >>u i = 1fffffff +x >>s i = ffffffffffffffff +x cmpu y = gt +x cmps y = lt +utod x = 43f0000000000000 +dtou f = 68db8bac710cb +stod x = bff0000000000000 +dtos f = 0 +utof x = 5f800000 +stof x = bf800000 + +x = ffffffffffffffff y = 62354cda6226d1f3 -x = 1 x + y = 62354cda6226d1f2 @@ -1595,6 +1721,48 @@ utof x = 4f000000 stof x = 4f000000 x = 7fffffff +y = 52ce6b4000000063 +-x = ffffffff80000001 +x + y = 52ce6b4080000062 +x - y = ad3194c07fffff9c +x * y = ad3194f17fffff9d +x /u y = 0 +x %u y = 7fffffff +x /s y = 0 +x %s y = 7fffffff +x /u y2 = 1 +x %u y2 = 2d3194bf +x /s y3 = 1 +x %s y3 = 2d3194bf +x /u 3 = 2aaaaaaa +x %u 3 = 1 +x /s 3 = 2aaaaaaa +x %s 3 = 1 +x /u 5 = 19999999 +x %u 5 = 2 +x /s 5 = 19999999 +x %s 5 = 2 +x /u 11 = ba2e8ba +x %u 11 = 1 +x /s 11 = ba2e8ba +x %s 11 = 1 +~x = ffffffff80000000 +x & y = 63 +x | y = 52ce6b407fffffff +x ^ y = 52ce6b407fffff9c +x << i = fffffff800000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = lt +utod x = 41dfffffffc00000 +dtou f = 346dc +stod x = 41dfffffffc00000 +dtos f = 346dc +utof x = 4f000000 +stof x = 4f000000 + +x = 7fffffff y = 144093704fadba5d -x = ffffffff80000001 x + y = 14409370cfadba5c @@ -2015,6 +2183,48 @@ utof x = 4f000000 stof x = 4f000000 x = 80000000 +y = 52ce6b4000000063 +-x = ffffffff80000000 +x + y = 52ce6b4080000063 +x - y = ad3194c07fffff9d +x * y = 3180000000 +x /u y = 0 +x %u y = 80000000 +x /s y = 0 +x %s y = 80000000 +x /u y2 = 1 +x %u y2 = 2d3194c0 +x /s y3 = 1 +x %s y3 = 2d3194c0 +x /u 3 = 2aaaaaaa +x %u 3 = 2 +x /s 3 = 2aaaaaaa +x %s 3 = 2 +x /u 5 = 19999999 +x %u 5 = 3 +x /s 5 = 19999999 +x %s 5 = 3 +x /u 11 = ba2e8ba +x %u 11 = 2 +x /s 11 = ba2e8ba +x %s 11 = 2 +~x = ffffffff7fffffff +x & y = 0 +x | y = 52ce6b4080000063 +x ^ y = 52ce6b4080000063 +x << i = 0 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = lt +utod x = 41e0000000000000 +dtou f = 346dc +stod x = 41e0000000000000 +dtos f = 346dc +utof x = 4f000000 +stof x = 4f000000 + +x = 80000000 y = 7b985bc1e7bce4d7 -x = ffffffff80000000 x + y = 7b985bc267bce4d7 @@ -2435,6 +2645,48 @@ utof x = 5f000000 stof x = 5f000000 x = 7fffffffffffffff +y = 52ce6b4000000063 +-x = 8000000000000001 +x + y = d2ce6b4000000062 +x - y = 2d3194bfffffff9c +x * y = 2d3194bfffffff9d +x /u y = 1 +x %u y = 2d3194bfffffff9c +x /s y = 1 +x %s y = 2d3194bfffffff9c +x /u y2 = 18bb7f41b +x %u y2 = 1b41b03f +x /s y3 = 18bb7f41b +x %s y3 = 1b41b03f +x /u 3 = 2aaaaaaaaaaaaaaa +x %u 3 = 1 +x /s 3 = 2aaaaaaaaaaaaaaa +x %s 3 = 1 +x /u 5 = 1999999999999999 +x %u 5 = 2 +x /s 5 = 1999999999999999 +x %s 5 = 2 +x /u 11 = ba2e8ba2e8ba2e8 +x %u 11 = 7 +x /s 11 = ba2e8ba2e8ba2e8 +x %s 11 = 7 +~x = 8000000000000000 +x & y = 52ce6b4000000063 +x | y = 7fffffffffffffff +x ^ y = 2d3194bfffffff9c +x << i = fffffff800000000 +x >>u i = fffffff +x >>s i = fffffff +x cmpu y = gt +x cmps y = gt +utod x = 43e0000000000000 +dtou f = 346dc5d638865 +stod x = 43e0000000000000 +dtos f = 346dc5d638865 +utof x = 5f000000 +stof x = 5f000000 + +x = 7fffffffffffffff y = a220229ec164ffe1 -x = 8000000000000001 x + y = 2220229ec164ffe0 @@ -2855,6 +3107,48 @@ utof x = 5f000000 stof x = df000000 x = 8000000000000000 +y = 52ce6b4000000063 +-x = 8000000000000000 +x + y = d2ce6b4000000063 +x - y = 2d3194bfffffff9d +x * y = 8000000000000000 +x /u y = 1 +x %u y = 2d3194bfffffff9d +x /s y = ffffffffffffffff +x %s y = d2ce6b4000000063 +x /u y2 = 18bb7f41b +x %u y2 = 1b41b040 +x /s y3 = fffffffe74480be5 +x %s y3 = ffffffffe4be4fc0 +x /u 3 = 2aaaaaaaaaaaaaaa +x %u 3 = 2 +x /s 3 = d555555555555556 +x %s 3 = fffffffffffffffe +x /u 5 = 1999999999999999 +x %u 5 = 3 +x /s 5 = e666666666666667 +x %s 5 = fffffffffffffffd +x /u 11 = ba2e8ba2e8ba2e8 +x %u 11 = 8 +x /s 11 = f45d1745d1745d18 +x %s 11 = fffffffffffffff8 +~x = 7fffffffffffffff +x & y = 0 +x | y = d2ce6b4000000063 +x ^ y = d2ce6b4000000063 +x << i = 0 +x >>u i = 10000000 +x >>s i = fffffffff0000000 +x cmpu y = gt +x cmps y = lt +utod x = 43e0000000000000 +dtou f = 346dc5d638865 +stod x = c3e0000000000000 +dtos f = fffcb923a29c779b +utof x = 5f000000 +stof x = df000000 + +x = 8000000000000000 y = c73aa0d9a415dfb -x = 8000000000000000 x + y = 8c73aa0d9a415dfb @@ -3275,6 +3569,48 @@ utof x = 4f800000 stof x = 4f800000 x = 100000003 +y = 52ce6b4000000063 +-x = fffffffefffffffd +x + y = 52ce6b4100000066 +x - y = ad3194c0ffffffa0 +x * y = f86b422300000129 +x /u y = 0 +x %u y = 100000003 +x /s y = 0 +x %s y = 100000003 +x /u y2 = 3 +x %u y2 = 794be43 +x /s y3 = 3 +x %s y3 = 794be43 +x /u 3 = 55555556 +x %u 3 = 1 +x /s 3 = 55555556 +x %s 3 = 1 +x /u 5 = 33333333 +x %u 5 = 4 +x /s 5 = 33333333 +x %s 5 = 4 +x /u 11 = 1745d174 +x %u 11 = 7 +x /s 11 = 1745d174 +x %s 11 = 7 +~x = fffffffefffffffc +x & y = 3 +x | y = 52ce6b4100000063 +x ^ y = 52ce6b4100000060 +x << i = 1800000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = lt +utod x = 41f0000000300000 +dtou f = 68db8 +stod x = 41f0000000300000 +dtos f = 68db8 +utof x = 4f800000 +stof x = 4f800000 + +x = 100000003 y = e9bcd26890f095a5 -x = fffffffefffffffd x + y = e9bcd26990f095a8 @@ -3358,47 +3694,467 @@ dtos f = 14bb101261e18 utof x = 5e4a72c9 stof x = 5e4a72c9 -x = 8362aa9340fe215f -y = f986342416ec8002 --x = 7c9d556cbf01dea1 -x + y = 7ce8deb757eaa161 -x - y = 89dc766f2a11a15d -x * y = e4a2b426803fc2be +x = 52ce6b4000000063 +y = 0 +-x = ad3194bfffffff9d +x + y = 52ce6b4000000063 +x - y = 52ce6b4000000063 +x * y = 0 x /u y = 0 -x %u y = 8362aa9340fe215f -x /s y = 13 -x %s y = fe6ccbe58d70a139 -x /u y2 = 86cb918b -x %u y2 = 910b6dd3 -x /s y3 = 133e437097 -x %s y3 = fffffffffe99a023 -x /u 3 = 2bcb8e3115aa0b1f -x %u 3 = 2 -x /s 3 = d67638dbc054b5cb -x %s 3 = fffffffffffffffe -x /u 5 = 1a46eeea4032d379 -x %u 5 = 2 -x /s 5 = e713bbb70cffa047 -x %s 5 = fffffffffffffffc -x /u 11 = bf1b26a7a45a5f1 -x %u 11 = 4 -x /s 11 = f4abe0f61d2e6020 -x %s 11 = ffffffffffffffff -~x = 7c9d556cbf01dea0 -x & y = 8102200000ec0002 -x | y = fbe6beb756fea15f -x ^ y = 7ae49eb75612a15d -x << i = d8aaa4d03f8857c -x >>u i = 20d8aaa4d03f8857 -x >>s i = e0d8aaa4d03f8857 +x %u y = 0 +x /s y = 0 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +x /u 3 = 1b9a23c000000021 +x %u 3 = 0 +x /s 3 = 1b9a23c000000021 +x %s 3 = 0 +x /u 5 = 108faf0ccccccce0 +x %u 5 = 3 +x /s 5 = 108faf0ccccccce0 +x %s 5 = 3 +x /u 11 = 7872105d1745d20 +x %u 11 = 3 +x /s 11 = 7872105d1745d20 +x %s 11 = 3 +~x = ad3194bfffffff9c +x & y = 0 +x | y = 52ce6b4000000063 +x ^ y = 52ce6b4000000063 +x << i = 52ce6b4000000063 +x >>u i = 52ce6b4000000063 +x >>s i = 52ce6b4000000063 +x cmpu y = gt +x cmps y = gt +utod x = 43d4b39ad0000000 +dtou f = 21eadf559b3d0 +stod x = 43d4b39ad0000000 +dtos f = 21eadf559b3d0 +utof x = 5ea59cd7 +stof x = 5ea59cd7 + +x = 52ce6b4000000063 +y = 1 +-x = ad3194bfffffff9d +x + y = 52ce6b4000000064 +x - y = 52ce6b4000000062 +x * y = 52ce6b4000000063 +x /u y = 52ce6b4000000063 +x %u y = 0 +x /s y = 52ce6b4000000063 +x %s y = 0 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +x /u 3 = 1b9a23c000000021 +x %u 3 = 0 +x /s 3 = 1b9a23c000000021 +x %s 3 = 0 +x /u 5 = 108faf0ccccccce0 +x %u 5 = 3 +x /s 5 = 108faf0ccccccce0 +x %s 5 = 3 +x /u 11 = 7872105d1745d20 +x %u 11 = 3 +x /s 11 = 7872105d1745d20 +x %s 11 = 3 +~x = ad3194bfffffff9c +x & y = 1 +x | y = 52ce6b4000000063 +x ^ y = 52ce6b4000000062 +x << i = a59cd680000000c6 +x >>u i = 296735a000000031 +x >>s i = 296735a000000031 +x cmpu y = gt +x cmps y = gt +utod x = 43d4b39ad0000000 +dtou f = 21eadf559b3d0 +stod x = 43d4b39ad0000000 +dtos f = 21eadf559b3d0 +utof x = 5ea59cd7 +stof x = 5ea59cd7 + +x = 52ce6b4000000063 +y = ffffffffffffffff +-x = ad3194bfffffff9d +x + y = 52ce6b4000000062 +x - y = 52ce6b4000000064 +x * y = ad3194bfffffff9d +x /u y = 0 +x %u y = 52ce6b4000000063 +x /s y = ad3194bfffffff9d +x %s y = 0 +x /u y2 = 52ce6b40 +x %u y2 = 52ce6ba3 +x /s y3 = ad3194bfffffff9d +x %s y3 = 0 +x /u 3 = 1b9a23c000000021 +x %u 3 = 0 +x /s 3 = 1b9a23c000000021 +x %s 3 = 0 +x /u 5 = 108faf0ccccccce0 +x %u 5 = 3 +x /s 5 = 108faf0ccccccce0 +x %s 5 = 3 +x /u 11 = 7872105d1745d20 +x %u 11 = 3 +x /s 11 = 7872105d1745d20 +x %s 11 = 3 +~x = ad3194bfffffff9c +x & y = 52ce6b4000000063 +x | y = ffffffffffffffff +x ^ y = ad3194bfffffff9c +x << i = 8000000000000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = lt +x cmps y = gt +utod x = 43d4b39ad0000000 +dtou f = 21eadf559b3d0 +stod x = 43d4b39ad0000000 +dtos f = 21eadf559b3d0 +utof x = 5ea59cd7 +stof x = 5ea59cd7 + +x = 52ce6b4000000063 +y = 7fffffff +-x = ad3194bfffffff9d +x + y = 52ce6b4080000062 +x - y = 52ce6b3f80000064 +x * y = ad3194f17fffff9d +x /u y = a59cd681 +x %u y = 259cd6e4 +x /s y = a59cd681 +x %s y = 259cd6e4 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +x /u 3 = 1b9a23c000000021 +x %u 3 = 0 +x /s 3 = 1b9a23c000000021 +x %s 3 = 0 +x /u 5 = 108faf0ccccccce0 +x %u 5 = 3 +x /s 5 = 108faf0ccccccce0 +x %s 5 = 3 +x /u 11 = 7872105d1745d20 +x %u 11 = 3 +x /s 11 = 7872105d1745d20 +x %s 11 = 3 +~x = ad3194bfffffff9c +x & y = 63 +x | y = 52ce6b407fffffff +x ^ y = 52ce6b407fffff9c +x << i = 8000000000000000 +x >>u i = 0 +x >>s i = 0 +x cmpu y = gt +x cmps y = gt +utod x = 43d4b39ad0000000 +dtou f = 21eadf559b3d0 +stod x = 43d4b39ad0000000 +dtos f = 21eadf559b3d0 +utof x = 5ea59cd7 +stof x = 5ea59cd7 + +x = 52ce6b4000000063 +y = 80000000 +-x = ad3194bfffffff9d +x + y = 52ce6b4080000063 +x - y = 52ce6b3f80000063 +x * y = 3180000000 +x /u y = a59cd680 +x %u y = 63 +x /s y = a59cd680 +x %s y = 63 +x /u y2 = 0 +x %u y2 = 0 +x /s y3 = 0 +x %s y3 = 0 +x /u 3 = 1b9a23c000000021 +x %u 3 = 0 +x /s 3 = 1b9a23c000000021 +x %s 3 = 0 +x /u 5 = 108faf0ccccccce0 +x %u 5 = 3 +x /s 5 = 108faf0ccccccce0 +x %s 5 = 3 +x /u 11 = 7872105d1745d20 +x %u 11 = 3 +x /s 11 = 7872105d1745d20 +x %s 11 = 3 +~x = ad3194bfffffff9c +x & y = 0 +x | y = 52ce6b4080000063 +x ^ y = 52ce6b4080000063 +x << i = 52ce6b4000000063 +x >>u i = 52ce6b4000000063 +x >>s i = 52ce6b4000000063 +x cmpu y = gt +x cmps y = gt +utod x = 43d4b39ad0000000 +dtou f = 21eadf559b3d0 +stod x = 43d4b39ad0000000 +dtos f = 21eadf559b3d0 +utof x = 5ea59cd7 +stof x = 5ea59cd7 + +x = 52ce6b4000000063 +y = 7fffffffffffffff +-x = ad3194bfffffff9d +x + y = d2ce6b4000000062 +x - y = d2ce6b4000000064 +x * y = 2d3194bfffffff9d +x /u y = 0 +x %u y = 52ce6b4000000063 +x /s y = 0 +x %s y = 52ce6b4000000063 +x /u y2 = a59cd681 +x %u y2 = 259cd6e4 +x /s y3 = a59cd681 +x %s y3 = 259cd6e4 +x /u 3 = 1b9a23c000000021 +x %u 3 = 0 +x /s 3 = 1b9a23c000000021 +x %s 3 = 0 +x /u 5 = 108faf0ccccccce0 +x %u 5 = 3 +x /s 5 = 108faf0ccccccce0 +x %s 5 = 3 +x /u 11 = 7872105d1745d20 +x %u 11 = 3 +x /s 11 = 7872105d1745d20 +x %s 11 = 3 +~x = ad3194bfffffff9c +x & y = 52ce6b4000000063 +x | y = 7fffffffffffffff +x ^ y = 2d3194bfffffff9c +x << i = 8000000000000000 +x >>u i = 0 +x >>s i = 0 x cmpu y = lt x cmps y = lt -utod x = 43e06c5552681fc4 -dtou f = 35d0c262d14d7 -stod x = c3df27555b2fc078 -dtos f = fffccf536b66040d -utof x = 5f0362ab -stof x = def93aab +utod x = 43d4b39ad0000000 +dtou f = 21eadf559b3d0 +stod x = 43d4b39ad0000000 +dtos f = 21eadf559b3d0 +utof x = 5ea59cd7 +stof x = 5ea59cd7 + +x = 52ce6b4000000063 +y = 8000000000000000 +-x = ad3194bfffffff9d +x + y = d2ce6b4000000063 +x - y = d2ce6b4000000063 +x * y = 8000000000000000 +x /u y = 0 +x %u y = 52ce6b4000000063 +x /s y = 0 +x %s y = 52ce6b4000000063 +x /u y2 = a59cd680 +x %u y2 = 63 +x /s y3 = ffffffff5a632980 +x %s y3 = 63 +x /u 3 = 1b9a23c000000021 +x %u 3 = 0 +x /s 3 = 1b9a23c000000021 +x %s 3 = 0 +x /u 5 = 108faf0ccccccce0 +x %u 5 = 3 +x /s 5 = 108faf0ccccccce0 +x %s 5 = 3 +x /u 11 = 7872105d1745d20 +x %u 11 = 3 +x /s 11 = 7872105d1745d20 +x %s 11 = 3 +~x = ad3194bfffffff9c +x & y = 0 +x | y = d2ce6b4000000063 +x ^ y = d2ce6b4000000063 +x << i = 52ce6b4000000063 +x >>u i = 52ce6b4000000063 +x >>s i = 52ce6b4000000063 +x cmpu y = lt +x cmps y = gt +utod x = 43d4b39ad0000000 +dtou f = 21eadf559b3d0 +stod x = 43d4b39ad0000000 +dtos f = 21eadf559b3d0 +utof x = 5ea59cd7 +stof x = 5ea59cd7 + +x = 52ce6b4000000063 +y = 100000003 +-x = ad3194bfffffff9d +x + y = 52ce6b4100000066 +x - y = 52ce6b3f00000060 +x * y = f86b422300000129 +x /u y = 52ce6b3f +x %u y = 794bea6 +x /s y = 52ce6b3f +x %s y = 794bea6 +x /u y2 = 52ce6b4000000063 +x %u y2 = 0 +x /s y3 = 52ce6b4000000063 +x %s y3 = 0 +x /u 3 = 1b9a23c000000021 +x %u 3 = 0 +x /s 3 = 1b9a23c000000021 +x %s 3 = 0 +x /u 5 = 108faf0ccccccce0 +x %u 5 = 3 +x /s 5 = 108faf0ccccccce0 +x %s 5 = 3 +x /u 11 = 7872105d1745d20 +x %u 11 = 3 +x /s 11 = 7872105d1745d20 +x %s 11 = 3 +~x = ad3194bfffffff9c +x & y = 3 +x | y = 52ce6b4100000063 +x ^ y = 52ce6b4100000060 +x << i = 96735a0000000318 +x >>u i = a59cd680000000c +x >>s i = a59cd680000000c +x cmpu y = gt +x cmps y = gt +utod x = 43d4b39ad0000000 +dtou f = 21eadf559b3d0 +stod x = 43d4b39ad0000000 +dtos f = 21eadf559b3d0 +utof x = 5ea59cd7 +stof x = 5ea59cd7 + +x = 52ce6b4000000063 +y = 52ce6b4000000063 +-x = ad3194bfffffff9d +x + y = a59cd680000000c6 +x - y = 0 +x * y = ba6f38000002649 +x /u y = 1 +x %u y = 0 +x /s y = 1 +x %s y = 0 +x /u y2 = 100000000 +x %u y2 = 63 +x /s y3 = 100000000 +x %s y3 = 63 +x /u 3 = 1b9a23c000000021 +x %u 3 = 0 +x /s 3 = 1b9a23c000000021 +x %s 3 = 0 +x /u 5 = 108faf0ccccccce0 +x %u 5 = 3 +x /s 5 = 108faf0ccccccce0 +x %s 5 = 3 +x /u 11 = 7872105d1745d20 +x %u 11 = 3 +x /s 11 = 7872105d1745d20 +x %s 11 = 3 +~x = ad3194bfffffff9c +x & y = 52ce6b4000000063 +x | y = 52ce6b4000000063 +x ^ y = 0 +x << i = 31800000000 +x >>u i = a59cd68 +x >>s i = a59cd68 +x cmpu y = eq +x cmps y = eq +utod x = 43d4b39ad0000000 +dtou f = 21eadf559b3d0 +stod x = 43d4b39ad0000000 +dtos f = 21eadf559b3d0 +utof x = 5ea59cd7 +stof x = 5ea59cd7 + +x = 52ce6b4000000063 +y = 8362aa9340fe215f +-x = ad3194bfffffff9d +x + y = d63115d340fe21c2 +x - y = cf6bc0acbf01df04 +x * y = 8f1503b22246e7bd +x /u y = 0 +x %u y = 52ce6b4000000063 +x /s y = 0 +x %s y = 52ce6b4000000063 +x /u y2 = a158656f +x %u y2 = 5640ba6 +x /s y3 = ffffffff55e35d11 +x %s y3 = 5f2245a0 +x /u 3 = 1b9a23c000000021 +x %u 3 = 0 +x /s 3 = 1b9a23c000000021 +x %s 3 = 0 +x /u 5 = 108faf0ccccccce0 +x %u 5 = 3 +x /s 5 = 108faf0ccccccce0 +x %s 5 = 3 +x /u 11 = 7872105d1745d20 +x %u 11 = 3 +x /s 11 = 7872105d1745d20 +x %s 11 = 3 +~x = ad3194bfffffff9c +x & y = 2422a0000000043 +x | y = d3eeebd340fe217f +x ^ y = d1acc1d340fe213c +x << i = 3180000000 +x >>u i = a59cd680 +x >>s i = a59cd680 +x cmpu y = lt +x cmps y = gt +utod x = 43d4b39ad0000000 +dtou f = 21eadf559b3d0 +stod x = 43d4b39ad0000000 +dtos f = 21eadf559b3d0 +utof x = 5ea59cd7 +stof x = 5ea59cd7 + +x = f986342416ec8002 +y = 52ce6b4000000063 +-x = 679cbdbe9137ffe +x + y = 4c549f6416ec8065 +x - y = a6b7c8e416ec7f9f +x * y = b9230074dd7580c6 +x /u y = 3 +x %u y = 11af26416ec7ed9 +x /s y = 0 +x %s y = f986342416ec8002 +x /u y2 = 3036abea3 +x %u y2 = 164b642 +x /s y3 = ffffffffebfad66d +x %s y3 = ffffffffcae155c2 +x /u 3 = 532cbc0c07a42aab +x %u 3 = 1 +x /s 3 = fdd766b6b24ed556 +x %s 3 = 0 +x /u 5 = 31e7a40737c8e666 +x %u 5 = 4 +x /s 5 = feb470d40495b334 +x %s 5 = fffffffffffffffe +x /u 11 = 16af1c0347e6f45d +x %u 11 = 3 +x /s 11 = ff694a8eeacfae8c +x %s 11 = fffffffffffffffe +~x = 679cbdbe9137ffd +x & y = 5086200000000002 +x | y = fbce7f6416ec8063 +x ^ y = ab485f6416ec8061 +x << i = b764001000000000 +x >>u i = 1f30c684 +x >>s i = ffffffffff30c684 +x cmpu y = gt +x cmps y = lt +utod x = 43ef30c68482dd90 +dtou f = 6634832136daf +stod x = c399e72f6fa44e00 +dtos f = ffffd58f774c5ce4 +utof x = 5f798634 +stof x = dccf397b x = 368083376ba4ffa9 y = 6912b247b79a4904 @@ -7558,3 +8314,45 @@ dtos f = b3fdf698d581 utof x = 5ddbb784 stof x = 5ddbb784 +x = ca9a47c1649d27a7 +y = d56d650045e652aa +-x = 3565b83e9b62d859 +x + y = a007acc1aa837a51 +x - y = f52ce2c11eb6d4fd +x * y = 630e3c88ca19d2e6 +x /u y = 0 +x %u y = ca9a47c1649d27a7 +x /s y = 1 +x %s y = f52ce2c11eb6d4fd +x /u y2 = f3042098 +x %u y2 = 6b092fa7 +x /s y3 = 141176486 +x %s y3 = ffffffffdee649a7 +x /u 3 = 4388c295cc34628d +x %u 3 = 0 +x /s 3 = ee336d4076df0d38 +x %s 3 = ffffffffffffffff +x /u 5 = 2885418d141f6e54 +x %u 5 = 3 +x /s 5 = f5520e59e0ec3b22 +x %s 5 = fffffffffffffffd +x /u 11 = 126b1dcbc3541ae0 +x %u 11 = 7 +x /s 11 = fb254c57663cd510 +x %s 11 = fffffffffffffff7 +~x = 3565b83e9b62d858 +x & y = c0084500448402a2 +x | y = dfff67c165ff77af +x ^ y = 1ff722c1217b750d +x << i = 749e9c0000000000 +x >>u i = 32a691 +x >>s i = fffffffffff2a691 +x cmpu y = lt +x cmps y = lt +utod x = 43e95348f82c93a5 +dtou f = 52fc6dac31674 +stod x = c3cab2dc1f4db16c +dtos f = fffea20e1ffc05aa +utof x = 5f4a9a48 +stof x = de5596e1 + diff --git a/test/regression/int64.c b/test/regression/int64.c index d9785e95..0da9602d 100644 --- a/test/regression/int64.c +++ b/test/regression/int64.c @@ -103,7 +103,8 @@ u64 special_values[] = { 0x80000000LLU, 0x7FFFFFFFFFFFFFFFLLU, 0x8000000000000000LLU, - 0x100000003LLU + 0x100000003LLU, + 0x52ce6b4000000063LLU }; #define NUM_SPECIAL_VALUES (sizeof(special_values) / sizeof(u64)) diff --git a/test/regression/packedstruct1.c b/test/regression/packedstruct1.c index ac68c698..b805c92a 100644 --- a/test/regression/packedstruct1.c +++ b/test/regression/packedstruct1.c @@ -23,9 +23,9 @@ void test1(void) struct s1 s1; printf("sizeof(struct s1) = %d\n", szof(s1)); printf("precomputed sizeof(struct s1) = %d\n", bszof(s1)); - printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n", + printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n", offsetOf(s1,x), offsetOf(s1,y), offsetOf(s1,z)); - printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n", + printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n", boffsetof(s1,x), boffsetof(s1,y), boffsetof(s1,z)); s1.x = 123; s1.y = -456; s1.z = 3.14159; printf("s1 = {x = %d, y = %d, z = %.5f}\n\n", s1.x, s1.y, s1.z); @@ -44,9 +44,9 @@ void test2(void) printf("sizeof(struct s2) = %d\n", szof(s2)); printf("precomputed sizeof(struct s2) = %d\n", bszof(s2)); printf("&s2 mod 16 = %d\n", ((int) &s2) & 0xF); - printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n", + printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n", offsetOf(s2,x), offsetOf(s2,y), offsetOf(s2,z)); - printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n", + printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n", boffsetof(s2,x), boffsetof(s2,y), boffsetof(s2,z)); s2.x = 12345; s2.y = -456; s2.z = 3.14159; printf("s2 = {x = %d, y = %d, z = %.5f}\n\n", s2.x, s2.y, s2.z); @@ -73,8 +73,8 @@ void test3(void) printf("sizeof(struct s3) = %d\n", szof(s3)); printf("precomputed sizeof(struct s3) = %d\n", bszof(s3)); - printf("offsetOf(s) = %d\n", offsetOf(s3,s)); - printf("precomputed offsetOf(s) = %d\n", boffsetof(s3,s)); + printf("offsetof(s) = %d\n", offsetOf(s3,s)); + printf("precomputed offsetof(s) = %d\n", boffsetof(s3,s)); s3.x = 123; s3.y = 45678; s3.z = 0x80000001U; @@ -103,9 +103,9 @@ void test4(void) printf("sizeof(struct s4) = %d\n", szof(s4)); printf("precomputed sizeof(struct s4) = %d\n", bszof(s4)); - printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n", + printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n", offsetOf(s4,x), offsetOf(s4,y), offsetOf(s4,z)); - printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n", + printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n", boffsetof(s4,x), boffsetof(s4,y), boffsetof(s4,z)); s4.x = 123; s4.y = -456; s4.z = 3.14159; printf("s4 = {x = %d, y = %d, z = %.5f}\n\n", s4.x, s4.y, s4.z); @@ -121,9 +121,9 @@ void test5(void) printf("sizeof(struct s5) = %d\n", szof(s5)); printf("precomputed sizeof(struct s5) = %d\n", bszof(s5)); - printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n", + printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n", offsetOf(s5,x), offsetOf(s5,y), offsetOf(s5,z)); - printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n", + printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n", boffsetof(s5,x), boffsetof(s5,y), boffsetof(s5,z)); s5.x = 123; s5.y = -456; s5.z = 3.14159; printf("s5 = {x = %d, y = %d, z = %.5f}\n\n", s5.x, s5.y, s5.z); @@ -139,9 +139,9 @@ void test6(void) printf("sizeof(struct s6) = %d\n", szof(s6)); printf("precomputed sizeof(struct s6) = %d\n", bszof(s6)); - printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n", + printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n", offsetOf(s6,x), offsetOf(s6,y), offsetOf(s6,z)); - printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n", + printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n", boffsetof(s6,x), boffsetof(s6,y), boffsetof(s6,z)); s62.x = 123; s62.y = -456; s62.z = 3.14159; printf("s62 = {x = %d, y = %d, z = %.5f}\n\n", s62.x, s62.y, s62.z); diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml index c82d406e..b8353046 100644 --- a/x86/Asmexpand.ml +++ b/x86/Asmexpand.ml @@ -251,7 +251,7 @@ let expand_builtin_va_start_32 r = invalid_arg "Fatal error: va_start used in non-vararg function"; let ofs = Int32.(add (add !PrintAsmaux.current_function_stacksize 4l) - (mul 4l (Z.to_int32 (Conventions1.size_arguments + (mul 4l (Z.to_int32 (Conventions.size_arguments (get_current_function_sig ()))))) in emit (Pleal (RAX, linear_addr RSP (Z.of_uint32 ofs))); emit (Pmovl_mr (linear_addr r _0z, RAX)) diff --git a/x86/CSE2deps.v b/x86/CSE2deps.v new file mode 100644 index 00000000..f4d9e254 --- /dev/null +++ b/x86/CSE2deps.v @@ -0,0 +1,24 @@ +Require Import BoolEqual Coqlib. +Require Import AST Integers Floats. +Require Import Values Memory Globalenvs Events. +Require Import Op. + +Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw := + (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk)) + && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk)) + && ((ofsw + size_chunk chunkw <=? ofsr) || + (ofsr + size_chunk chunkr <=? ofsw)). + +Definition may_overlap chunk addr args chunk' addr' args' := + match addr, addr', args, args' with + | (Aindexed ofs), (Aindexed ofs'), + (base :: nil), (base' :: nil) => + if peq base base' + then negb (can_swap_accesses_ofs ofs' chunk' ofs chunk) + else true + | (Aglobal symb ofs), (Aglobal symb' ofs'), nil, nil => + if peq symb symb' + then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk) + else false + | _, _, _, _ => true + end. diff --git a/x86/CSE2depsproof.v b/x86/CSE2depsproof.v new file mode 100644 index 00000000..1e913254 --- /dev/null +++ b/x86/CSE2depsproof.v @@ -0,0 +1,253 @@ +Require Import Coqlib Maps Errors Integers Floats Lattice Kildall. +Require Import AST Linking. +Require Import Memory Registers Op RTL Maps. + +Require Import Globalenvs Values. +Require Import Linking Values Memory Globalenvs Events Smallstep. +Require Import Registers Op RTL. +Require Import CSE2 CSE2deps. +Require Import Lia. + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Section MEMORY_WRITE. + Variable m m2 : mem. + Variable chunkw chunkr : memory_chunk. + Variable base : val. + + Variable addrw addrr valw : val. + Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2. + + Section INDEXED_AWAY. + Variable ofsw ofsr : Z. + Hypothesis ADDRW : eval_addressing genv sp + (Aindexed ofsw) (base :: nil) = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aindexed ofsr) (base :: nil) = Some addrr. + + Lemma load_store_away1 : + forall RANGEW : 0 <= ofsw <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= ofsr <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : ofsw + size_chunk chunkw <= ofsr + \/ ofsr + size_chunk chunkr <= ofsw, + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + try change (Ptrofs.modulus - largest_size_chunk) with 4294967288 in *. + try change (Ptrofs.modulus - largest_size_chunk) with 18446744073709551608 in *. + destruct addrr ; simpl in * ; trivial. + unfold eval_addressing, eval_addressing32, eval_addressing64 in *. + destruct Archi.ptr64 eqn:PTR64; destruct base; simpl in *; try discriminate. + rewrite PTR64 in *. + + inv ADDRR. + inv ADDRW. + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + + all: try (destruct (Ptrofs.unsigned_add_either i0 + (Ptrofs.of_int (Int.repr ofsr))) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i0 + (Ptrofs.of_int64 (Int64.repr ofsr))) as [OFSR | OFSR]; + rewrite OFSR). + all: try (destruct (Ptrofs.unsigned_add_either i0 + (Ptrofs.of_int (Int.repr ofsw))) as [OFSW | OFSW]; + rewrite OFSW). + all: try (destruct (Ptrofs.unsigned_add_either i0 + (Ptrofs.of_int64 (Int64.repr ofsw))) as [OFSW | OFSW]; + rewrite OFSW). + + all: unfold Ptrofs.of_int64. + all: unfold Ptrofs.of_int. + + + all: repeat rewrite Int.unsigned_repr by (change Int.max_unsigned with 4294967295; lia). + all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 4294967295; lia). + all: repeat rewrite Int64.unsigned_repr by (change Int64.max_unsigned with 18446744073709551615; lia). + all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 18446744073709551615; lia). + + all: try change Ptrofs.modulus with 4294967296. + all: try change Ptrofs.modulus with 18446744073709551616. + + all: intuition lia. + Qed. + + Theorem load_store_away : + can_swap_accesses_ofs ofsr chunkr ofsw chunkw = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply load_store_away1. + all: tauto. + Qed. + End INDEXED_AWAY. + + Section DIFFERENT_GLOBALS. + Variable ofsw ofsr : ptrofs. + Hypothesis symw symr : ident. + Hypothesis ADDRW : eval_addressing genv sp + (Aglobal symw ofsw) nil = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aglobal symr ofsr) nil = Some addrr. + + Lemma ptr64_cases: + forall {T : Type}, + forall b : bool, + forall x y : T, + (if b then (if b then x else y) else (if b then y else x)) = x. + Proof. + destruct b; reflexivity. + Qed. + + (* not needed + Lemma bool_cases_same: + forall {T : Type}, + forall b : bool, + forall x : T, + (if b then x else x) = x. + Proof. + destruct b; reflexivity. + Qed. + *) + + Lemma load_store_diff_globals : + symw <> symr -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intros. + unfold eval_addressing in *. + simpl in *. + rewrite ptr64_cases in ADDRR. + rewrite ptr64_cases in ADDRW. + unfold Genv.symbol_address in *. + unfold Genv.find_symbol in *. + destruct ((Genv.genv_symb genv) ! symw) as [bw |] eqn:SYMW; inv ADDRW. + 2: simpl in STORE; discriminate. + destruct ((Genv.genv_symb genv) ! symr) as [br |] eqn:SYMR; inv ADDRR. + 2: reflexivity. + assert (br <> bw). + { + intro EQ. + subst br. + assert (symr = symw). + { + eapply Genv.genv_vars_inj; eauto. + } + congruence. + } + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := bw). + - exact STORE. + - left. assumption. + Qed. + End DIFFERENT_GLOBALS. + + Section SAME_GLOBALS. + Variable ofsw ofsr : ptrofs. + Hypothesis sym : ident. + Hypothesis ADDRW : eval_addressing genv sp + (Aglobal sym ofsw) nil = Some addrw. + Hypothesis ADDRR : eval_addressing genv sp + (Aglobal sym ofsr) nil = Some addrr. + + Lemma load_store_glob_away1 : + forall RANGEW : 0 <= (Ptrofs.unsigned ofsw) <= Ptrofs.modulus - largest_size_chunk, + forall RANGER : 0 <= (Ptrofs.unsigned ofsr) <= Ptrofs.modulus - largest_size_chunk, + forall SWAPPABLE : (Ptrofs.unsigned ofsw) + size_chunk chunkw <= (Ptrofs.unsigned ofsr) + \/ (Ptrofs.unsigned ofsr) + size_chunk chunkr <= (Ptrofs.unsigned ofsw), + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intros. + + pose proof (max_size_chunk chunkr) as size_chunkr_bounded. + pose proof (max_size_chunk chunkw) as size_chunkw_bounded. + unfold largest_size_chunk in size_chunkr_bounded, size_chunkw_bounded. + try change (Ptrofs.modulus - largest_size_chunk) with 4294967288 in *. + try change (Ptrofs.modulus - largest_size_chunk) with 18446744073709551608 in *. + unfold eval_addressing, eval_addressing32, eval_addressing64 in *. + + rewrite ptr64_cases in ADDRR. + rewrite ptr64_cases in ADDRW. + unfold Genv.symbol_address in *. + inv ADDRR. + inv ADDRW. + destruct (Genv.find_symbol genv sym). + 2: discriminate. + + eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b). + exact STORE. + right. + tauto. + Qed. + + Lemma load_store_glob_away : + (can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw) = true -> + Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr. + Proof. + intro SWAP. + unfold can_swap_accesses_ofs in SWAP. + repeat rewrite andb_true_iff in SWAP. + repeat rewrite orb_true_iff in SWAP. + repeat rewrite Z.leb_le in SWAP. + apply load_store_glob_away1. + all: tauto. + Qed. + End SAME_GLOBALS. +End MEMORY_WRITE. +End SOUNDNESS. + + +Section SOUNDNESS. + Variable F V : Type. + Variable genv: Genv.t F V. + Variable sp : val. + +Lemma may_overlap_sound: + forall m m' : mem, + forall chunk addr args chunk' addr' args' v a a' rs, + (eval_addressing genv sp addr (rs ## args)) = Some a -> + (eval_addressing genv sp addr' (rs ## args')) = Some a' -> + (may_overlap chunk addr args chunk' addr' args') = false -> + (Mem.storev chunk m a v) = Some m' -> + (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a'). +Proof. + intros until rs. + intros ADDR ADDR' OVERLAP STORE. + destruct addr; destruct addr'; try discriminate. + { (* Aindexed / Aindexed *) + destruct args as [ | base [ | ]]. 1,3: discriminate. + destruct args' as [ | base' [ | ]]. 1,3: discriminate. + simpl in OVERLAP. + destruct (peq base base'). 2: discriminate. + subst base'. + destruct (can_swap_accesses_ofs z0 chunk' z chunk) eqn:SWAP. + 2: discriminate. + simpl in *. + eapply load_store_away; eassumption. + } + { (* Aglobal / Aglobal *) + destruct args. 2: discriminate. + destruct args'. 2: discriminate. + simpl in *. + destruct (peq i i1). + { + subst i1. + rewrite negb_false_iff in OVERLAP. + eapply load_store_glob_away; eassumption. + } + eapply load_store_diff_globals; eassumption. + } +Qed. + +End SOUNDNESS. diff --git a/x86/Conventions1.v b/x86/Conventions1.v index ab4c4b13..d9f5b8fa 100644 --- a/x86/Conventions1.v +++ b/x86/Conventions1.v @@ -221,36 +221,6 @@ Definition loc_arguments (s: signature) : list (rpair loc) := then loc_arguments_64 s.(sig_args) 0 0 0 else loc_arguments_32 s.(sig_args) 0. -(** [size_arguments s] returns the number of [Outgoing] slots used - to call a function with signature [s]. *) - -Fixpoint size_arguments_32 - (tyl: list typ) (ofs: Z) {struct tyl} : Z := - match tyl with - | nil => ofs - | ty :: tys => size_arguments_32 tys (ofs + typesize ty) - end. - -Fixpoint size_arguments_64 (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z := - match tyl with - | nil => ofs - | (Tint | Tlong | Tany32 | Tany64) :: tys => - match list_nth_z int_param_regs ir with - | None => size_arguments_64 tys ir fr (ofs + 2) - | Some ireg => size_arguments_64 tys (ir + 1) fr ofs - end - | (Tfloat | Tsingle) :: tys => - match list_nth_z float_param_regs fr with - | None => size_arguments_64 tys ir fr (ofs + 2) - | Some freg => size_arguments_64 tys ir (fr + 1) ofs - end - end. - -Definition size_arguments (s: signature) : Z := - if Archi.ptr64 - then size_arguments_64 s.(sig_args) 0 0 0 - else size_arguments_32 s.(sig_args) 0. - (** Argument locations are either caller-save registers or [Outgoing] stack slots at nonnegative offsets. *) @@ -352,121 +322,6 @@ Qed. Hint Resolve loc_arguments_acceptable: locs. -(** The offsets of [Outgoing] arguments are below [size_arguments s]. *) - -Remark size_arguments_32_above: - forall tyl ofs0, ofs0 <= size_arguments_32 tyl ofs0. -Proof. - induction tyl; simpl; intros. - omega. - apply Z.le_trans with (ofs0 + typesize a); auto. - generalize (typesize_pos a); omega. -Qed. - -Remark size_arguments_64_above: - forall tyl ir fr ofs0, - ofs0 <= size_arguments_64 tyl ir fr ofs0. -Proof. - induction tyl; simpl; intros. - omega. - assert (A: ofs0 <= - match list_nth_z int_param_regs ir with - | Some _ => size_arguments_64 tyl (ir + 1) fr ofs0 - | None => size_arguments_64 tyl ir fr (ofs0 + 2) - end). - { destruct (list_nth_z int_param_regs ir); eauto. - apply Z.le_trans with (ofs0 + 2); auto. omega. } - assert (B: ofs0 <= - match list_nth_z float_param_regs fr with - | Some _ => size_arguments_64 tyl ir (fr + 1) ofs0 - | None => size_arguments_64 tyl ir fr (ofs0 + 2) - end). - { destruct (list_nth_z float_param_regs fr); eauto. - apply Z.le_trans with (ofs0 + 2); auto. omega. } - destruct a; auto. -Qed. - -Lemma size_arguments_above: - forall s, size_arguments s >= 0. -Proof. - intros; unfold size_arguments. apply Z.le_ge. - destruct Archi.ptr64; [apply size_arguments_64_above|apply size_arguments_32_above]. -Qed. - -Lemma loc_arguments_32_bounded: - forall ofs ty tyl ofs0, - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_32 tyl ofs0)) -> - ofs + typesize ty <= size_arguments_32 tyl ofs0. -Proof. - induction tyl as [ | t l]; simpl; intros x IN. -- contradiction. -- rewrite in_app_iff in IN; destruct IN as [IN|IN]. -+ apply Z.le_trans with (x + typesize t); [|apply size_arguments_32_above]. - Ltac decomp := - match goal with - | [ H: _ \/ _ |- _ ] => destruct H; decomp - | [ H: S _ _ _ = S _ _ _ |- _ ] => inv H - | [ H: False |- _ ] => contradiction - end. - destruct t; simpl in IN; decomp; simpl; omega. -+ apply IHl; auto. -Qed. - -Lemma loc_arguments_64_bounded: - forall ofs ty tyl ir fr ofs0, - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_64 tyl ir fr ofs0)) -> - ofs + typesize ty <= size_arguments_64 tyl ir fr ofs0. -Proof. - induction tyl; simpl; intros. - contradiction. - assert (T: forall ty0, typesize ty0 <= 2). - { destruct ty0; simpl; omega. } - assert (A: forall ty0, - In (S Outgoing ofs ty) (regs_of_rpairs - match list_nth_z int_param_regs ir with - | Some ireg => - One (R ireg) :: loc_arguments_64 tyl (ir + 1) fr ofs0 - | None => One (S Outgoing ofs0 ty0) :: loc_arguments_64 tyl ir fr (ofs0 + 2) - end) -> - ofs + typesize ty <= - match list_nth_z int_param_regs ir with - | Some _ => size_arguments_64 tyl (ir + 1) fr ofs0 - | None => size_arguments_64 tyl ir fr (ofs0 + 2) - end). - { intros. destruct (list_nth_z int_param_regs ir); simpl in H0; destruct H0. - - discriminate. - - eapply IHtyl; eauto. - - inv H0. apply Z.le_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_64_above. - - eapply IHtyl; eauto. } - assert (B: forall ty0, - In (S Outgoing ofs ty) (regs_of_rpairs - match list_nth_z float_param_regs fr with - | Some ireg => - One (R ireg) :: loc_arguments_64 tyl ir (fr + 1) ofs0 - | None => One (S Outgoing ofs0 ty0) :: loc_arguments_64 tyl ir fr (ofs0 + 2) - end) -> - ofs + typesize ty <= - match list_nth_z float_param_regs fr with - | Some _ => size_arguments_64 tyl ir (fr + 1) ofs0 - | None => size_arguments_64 tyl ir fr (ofs0 + 2) - end). - { intros. destruct (list_nth_z float_param_regs fr); simpl in H0; destruct H0. - - discriminate. - - eapply IHtyl; eauto. - - inv H0. apply Z.le_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_64_above. - - eapply IHtyl; eauto. } - destruct a; eauto. -Qed. - -Lemma loc_arguments_bounded: - forall (s: signature) (ofs: Z) (ty: typ), - In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) -> - ofs + typesize ty <= size_arguments s. -Proof. - unfold loc_arguments, size_arguments; intros. - destruct Archi.ptr64; eauto using loc_arguments_32_bounded, loc_arguments_64_bounded. -Qed. - Lemma loc_arguments_main: loc_arguments signature_main = nil. Proof. diff --git a/x86/DuplicateOpcodeHeuristic.ml b/x86/DuplicateOpcodeHeuristic.ml index 85505245..2ec314c1 100644 --- a/x86/DuplicateOpcodeHeuristic.ml +++ b/x86/DuplicateOpcodeHeuristic.ml @@ -1,3 +1,27 @@ -exception HeuristicSucceeded - -let opcode_heuristic code cond ifso ifnot preferred = () +(* open Camlcoq *) +open Op +open Integers + +let opcode_heuristic code cond ifso ifnot is_loop_header = + match cond with + | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with + | Clt | Cle -> Some false + | Cgt | Cge -> Some true + | _ -> None + ) else None + | Ccompf c | Ccompfs c -> (match c with + | Ceq -> Some false + | Cne -> Some true + | _ -> None + ) + | Cnotcompf c | Cnotcompfs c -> (match c with + | Ceq -> Some true + | Cne -> Some false + | _ -> None + ) + | _ -> None |