aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-04-08 21:45:42 +0200
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-04-08 21:45:42 +0200
commit63915fbebe707cc1de7c0ed5a24148cac45a742c (patch)
treeda503cba224f14281a2ee841930b8843459cb42b
parentf78d61faf3db94ac1704ce0d11291211b5307629 (diff)
parente326ed9f28a2ed6869f0cb356ef9a8e189cb0a47 (diff)
downloadcompcert-kvx-63915fbebe707cc1de7c0ed5a24148cac45a742c.tar.gz
compcert-kvx-63915fbebe707cc1de7c0ed5a24148cac45a742c.zip
Merge remote-tracking branch 'origin/mppa-work' into mppa-thread
-rw-r--r--.gitlab-ci.yml240
-rw-r--r--Changelog40
-rw-r--r--Makefile1
-rw-r--r--VERSION2
-rw-r--r--aarch64/Asmgen.v9
-rw-r--r--aarch64/Asmgenproof.v141
-rw-r--r--aarch64/Asmgenproof1.v656
-rw-r--r--aarch64/CSE2deps.v20
-rw-r--r--aarch64/CSE2depsproof.v128
-rw-r--r--aarch64/Conventions1.v107
-rw-r--r--aarch64/DuplicateOpcodeHeuristic.ml30
-rw-r--r--arm/CSE2deps.v20
-rw-r--r--arm/CSE2depsproof.v129
-rw-r--r--arm/Conventions1.v206
-rw-r--r--arm/DuplicateOpcodeHeuristic.ml25
-rw-r--r--backend/Allocation.v4
-rw-r--r--backend/Allocproof.v6
-rw-r--r--backend/CSE.v6
-rw-r--r--backend/CSE2.v28
-rw-r--r--backend/CSE2proof.v249
-rw-r--r--backend/Constprop.v6
-rw-r--r--backend/Constpropproof.v4
-rw-r--r--backend/Conventions.v67
-rw-r--r--backend/Deadcode.v4
-rw-r--r--backend/Duplicate.v4
-rw-r--r--backend/Duplicateaux.ml638
-rw-r--r--backend/Duplicateproof.v10
-rw-r--r--backend/ForwardMoves.v6
-rw-r--r--backend/IRC.ml1
-rw-r--r--backend/Inlining.v4
-rw-r--r--backend/Inliningaux.ml9
-rw-r--r--backend/Inliningspec.v6
-rw-r--r--backend/LTL.v8
-rw-r--r--backend/Linearize.v2
-rw-r--r--backend/Linearizeaux.ml411
-rw-r--r--backend/Liveness.v2
-rw-r--r--backend/PrintLTL.ml5
-rw-r--r--backend/PrintRTL.ml5
-rw-r--r--backend/PrintXTL.ml2
-rw-r--r--backend/RTL.v19
-rw-r--r--backend/RTLgen.v2
-rw-r--r--backend/RTLgenspec.v4
-rw-r--r--backend/RTLtyping.v6
-rw-r--r--backend/Regalloc.ml20
-rw-r--r--backend/Renumber.v2
-rw-r--r--backend/Splitting.ml4
-rw-r--r--backend/Tunneling.v4
-rw-r--r--backend/Unusedglob.v2
-rw-r--r--backend/ValueAnalysis.v7
-rw-r--r--backend/ValueDomain.v5
-rw-r--r--backend/XTL.ml6
-rw-r--r--backend/XTL.mli2
-rw-r--r--cfrontend/C2C.ml4
-rw-r--r--cfrontend/Cexec.v73
-rw-r--r--common/Events.v88
-rw-r--r--common/Memdata.v7
-rw-r--r--common/Memory.v17
-rw-r--r--common/Sections.ml29
-rw-r--r--common/Sections.mli4
-rwxr-xr-xconfig_arm.sh2
-rwxr-xr-xconfig_armhf.sh1
-rwxr-xr-xconfig_ppc64.sh1
-rwxr-xr-xconfig_rv32.sh2
-rwxr-xr-xconfig_rv64.sh2
-rwxr-xr-xconfig_simple.sh7
-rwxr-xr-xconfigure2
-rw-r--r--cparser/Elab.ml7
-rw-r--r--doc/index.html4
-rw-r--r--driver/Clflags.ml5
-rw-r--r--driver/Compopts.v3
-rw-r--r--driver/Driver.ml14
-rw-r--r--extraction/extraction.v2
-rw-r--r--lib/Maps.v112
-rw-r--r--mppa_k1c/Asmblockdeps.v8
-rw-r--r--mppa_k1c/Asmblockgen.v2
-rw-r--r--mppa_k1c/Asmblockgenproof1.v131
-rw-r--r--mppa_k1c/Asmvliw.v6
-rw-r--r--mppa_k1c/CSE2deps.v20
-rw-r--r--mppa_k1c/CSE2depsproof.v127
-rw-r--r--mppa_k1c/DuplicateOpcodeHeuristic.ml9
-rw-r--r--mppa_k1c/PostpassSchedulingproof.v9
-rw-r--r--mppa_k1c/abstractbb/AbstractBasicBlocksDef.v2
-rw-r--r--mppa_k1c/abstractbb/ImpSimuTest.v14
-rw-r--r--mppa_k1c/abstractbb/Impure/ImpHCons.v4
-rw-r--r--mppa_k1c/abstractbb/Parallelizability.v8
-rw-r--r--mppa_k1c/abstractbb/SeqSimuTheory.v11
-rw-r--r--mppa_k1c/lib/Asmblockgenproof0.v4
-rw-r--r--mppa_k1c/lib/ForwardSimulationBlock.v6
-rw-r--r--mppa_k1c/lib/Machblockgen.v8
-rw-r--r--mppa_k1c/lib/Machblockgenproof.v20
-rw-r--r--powerpc/Archi.v4
-rw-r--r--powerpc/CSE2deps.v20
-rw-r--r--powerpc/CSE2depsproof.v135
-rw-r--r--powerpc/Conventions1.v143
-rw-r--r--powerpc/DuplicateOpcodeHeuristic.ml30
-rw-r--r--powerpc/extractionMachdep.v3
-rw-r--r--riscV/Asmexpand.ml36
-rw-r--r--riscV/CSE2deps.v20
-rw-r--r--riscV/CSE2depsproof.v127
-rw-r--r--riscV/Conventions1.v323
-rw-r--r--riscV/DuplicateOpcodeHeuristic.ml30
-rw-r--r--runtime/arm/i64_stof.S9
-rw-r--r--runtime/include/math.h7
-rw-r--r--runtime/powerpc/i64_stof.s17
-rw-r--r--runtime/powerpc/i64_utof.s10
-rw-r--r--runtime/powerpc64/i64_utof.s10
-rw-r--r--test/Makefile4
-rw-r--r--test/c/mandelbrot.c2
-rw-r--r--test/cse2/globals.c8
-rw-r--r--test/cse2/indexed_addr.c6
-rw-r--r--test/monniaux/clock.c4
-rw-r--r--test/monniaux/cycles.h57
-rw-r--r--test/monniaux/quicksort/quicksort_run.c2
-rw-r--r--test/monniaux/yarpgen/Makefile130
-rw-r--r--test/monniaux/yarpgen/Makefile.old52
-rw-r--r--test/regression/Makefile4
-rw-r--r--test/regression/Results/int64874
-rw-r--r--test/regression/int64.c3
-rw-r--r--test/regression/packedstruct1.c24
-rw-r--r--x86/Asmexpand.ml2
-rw-r--r--x86/CSE2deps.v24
-rw-r--r--x86/CSE2depsproof.v253
-rw-r--r--x86/Conventions1.v145
-rw-r--r--x86/DuplicateOpcodeHeuristic.ml30
124 files changed, 4766 insertions, 1881 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 00000000..1f854fc3
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,240 @@
+stages:
+ - build
+
+check-admitted:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_x86_64.sh
+ - make check-admitted
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
+
+build_x86_64:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_x86_64.sh
+ - make -j "$NJOBS"
+ - make -C test all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
+
+build_ia32:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+ - sudo apt-get -y install gcc-multilib
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_ia32.sh
+ - make -j "$NJOBS"
+ - make -C test all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen BITS=32 TARGET_CC='gcc -m32'
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
+
+build_aarch64:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+ - sudo apt-get -y install gcc-aarch64-linux-gnu qemu-user
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_aarch64.sh
+ - make -j "$NJOBS"
+ - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64' EXECUTE='qemu-aarch64' all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64' CCOMPOPTS='-static' TARGET_CFLAGS='-static'
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
+
+build_arm:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+ - sudo apt-get -y install gcc-arm-linux-gnueabi qemu-user
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_arm.sh
+ - make -j "$NJOBS"
+ - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
+
+
+build_armhf:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+ - sudo apt-get -y install gcc-arm-linux-gnueabihf qemu-user
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_armhf.sh
+ - make -j "$NJOBS"
+ - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
+
+build_ppc:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+ - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_ppc.sh
+ - make -j "$NJOBS"
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
+
+build_ppc64:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+ - sudo apt-get -y install gcc-powerpc64-linux-gnu
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_ppc64.sh
+ - make -j "$NJOBS"
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
+
+build_rv64:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+ - sudo apt-get -y install gcc-riscv64-linux-gnu qemu-user
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_rv64.sh
+ - make -j "$NJOBS"
+ - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64' EXECUTE='qemu-riscv64' all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64' CCOMPOPTS='-static' TARGET_CFLAGS='-static'
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
+
+build_rv32:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
+ - sudo apt-get -y install gcc-riscv64-linux-gnu qemu-user
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_rv32.sh -no-runtime-lib
+ - make -j "$NJOBS"
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
+
+build_k1c:
+ stage: build
+ image: "coqorg/coq"
+ before_script:
+ - opam switch 4.07.1+flambda
+ - eval `opam config env`
+ - opam install -y menhir
+ script:
+ - ./config_k1c.sh -no-runtime-lib
+ - make -j "$NJOBS"
+ rules:
+ - if: '$CI_COMMIT_BRANCH == "mppa-work"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "mppa-k1c"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "master"'
+ when: always
+ - when: manual
diff --git a/Changelog b/Changelog
index 08586da5..8cf4e548 100644
--- a/Changelog
+++ b/Changelog
@@ -1,7 +1,41 @@
-Coq development:
-- Compatibility with Coq version 8.11.0 (#316)
+Release 3.7, 2020-03-31
+=======================
+
+ISO C conformance:
+- Functions declared `extern` then implemented `inline` remain `extern`
+- The type of a wide char constant is `wchar_t`, not `int`
+- Support vertical tabs and treat them as whitespace
+- Define the semantics of `free(NULL)`
+
+Bug fixing:
+- Take sign into account for conversions from 32-bit integers to 64-bit pointers
+- PowerPC: more precise determination of small data accesses
+- AArch64: when addressing global variables, check for correct alignment
+- PowerPC, ARM: double rounding error in int64->float32 conversions
+
+ABI conformance:
+- x86, AArch64: re-normalize values of small integer types returned by
+ function calls
+- PowerPC: `float` arguments passed on stack are passed in 64-bit format
+- RISC-V: use the new ELF psABI instead of the old ABI from ISA 2.1
+
+Usability and diagnostics:
+- Unknown builtin functions trigger a specific error message
+- Improved error messages
+
+Coq formalization:
+- Revised modeling of the PowerPC/EREF `isel` instruction
+- Weaker `ec_readonly` condition over external calls
+ (permissions can be dropped on read-only locations)
+
+Coq and OCaml development:
+- Compatibility with Coq version 8.10.1, 8.10.2, 8.11.0
+- Compatibility with OCaml 4.10 and up
+- Compatibility with Menhir 20200123 and up
+- Coq versions prior to 8.8.0 are no longer supported
+- OCaml versions prior to 4.05.0 are no longer supported
+
-
Release 3.6, 2019-09-17
=======================
diff --git a/Makefile b/Makefile
index 5566cf57..2cd40800 100644
--- a/Makefile
+++ b/Makefile
@@ -86,6 +86,7 @@ BACKEND=\
ValueDomain.v ValueAOp.v ValueAnalysis.v \
ConstpropOp.v Constprop.v ConstpropOpproof.v Constpropproof.v \
CSEdomain.v CombineOp.v CSE.v CombineOpproof.v CSEproof.v \
+ CSE2deps.v CSE2depsproof.v \
CSE2.v CSE2proof.v \
NeedDomain.v NeedOp.v Deadcode.v Deadcodeproof.v \
Unusedglob.v Unusedglobproof.v \
diff --git a/VERSION b/VERSION
index 92686b06..b60e8d9b 100644
--- a/VERSION
+++ b/VERSION
@@ -1,3 +1,3 @@
-version=3.6
+version=3.7
buildnr=
tag=
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
index 46dd875d..024c9a17 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/Asmgen.v
@@ -1061,8 +1061,13 @@ Definition storeptr (src: ireg) (base: iregsp) (ofs: ptrofs) (k: code) :=
(** Function epilogue *)
Definition make_epilogue (f: Mach.function) (k: code) :=
- loadptr XSP f.(fn_retaddr_ofs) RA
- (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k).
+ (* FIXME
+ Cannot be used because memcpy destroys X30;
+ issue being discussed with X. Leroy *)
+ (* if is_leaf_function f
+ then Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k
+ else*) loadptr XSP f.(fn_retaddr_ofs) RA
+ (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: k).
(** Translation of a Mach instruction. *)
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index 88258cd6..6831509f 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -337,7 +337,12 @@ Qed.
Remark make_epilogue_label:
forall f k, tail_nolabel k (make_epilogue f k).
Proof.
- unfold make_epilogue; intros. eapply tail_nolabel_trans. apply loadptr_label. TailNoLabel.
+ unfold make_epilogue; intros.
+ (* FIXME destruct is_leaf_function.
+ { TailNoLabel. } *)
+ eapply tail_nolabel_trans.
+ apply loadptr_label.
+ TailNoLabel.
Qed.
Lemma transl_instr_label:
@@ -472,7 +477,8 @@ Inductive match_states: Mach.state -> Asm.state -> Prop :=
(MEXT: Mem.extends m m')
(AT: transl_code_at_pc ge (rs PC) fb f c ep tf tc)
(AG: agree ms sp rs)
- (DXP: ep = true -> rs#X29 = parent_sp s),
+ (DXP: ep = true -> rs#X29 = parent_sp s)
+ (LEAF: is_leaf_function f = true -> rs#RA = parent_ra s),
match_states (Mach.State s fb sp c ms m)
(Asm.State rs m')
| match_states_call:
@@ -503,16 +509,17 @@ Lemma exec_straight_steps:
exists rs2,
exec_straight tge tf c rs1 m1' k rs2 m2'
/\ agree ms2 sp rs2
- /\ (it1_is_parent ep i = true -> rs2#X29 = parent_sp s)) ->
+ /\ (it1_is_parent ep i = true -> rs2#X29 = parent_sp s)
+ /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) ->
exists st',
plus step tge (State rs1 m1') E0 st' /\
match_states (Mach.State s fb sp c ms2 m2) st'.
Proof.
intros. inversion H2. subst. monadInv H7.
- exploit H3; eauto. intros [rs2 [A [B C]]].
+ exploit H3; eauto. intros [rs2 [A [B [C D]]]].
exists (State rs2 m2'); split.
- eapply exec_straight_exec; eauto.
- econstructor; eauto. eapply exec_straight_at; eauto.
+ - eapply exec_straight_exec; eauto.
+ - econstructor; eauto. eapply exec_straight_at; eauto.
Qed.
Lemma exec_straight_steps_goto:
@@ -527,13 +534,14 @@ Lemma exec_straight_steps_goto:
exists jmp, exists k', exists rs2,
exec_straight tge tf c rs1 m1' (jmp :: k') rs2 m2'
/\ agree ms2 sp rs2
- /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') ->
+ /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2'
+ /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) ->
exists st',
plus step tge (State rs1 m1') E0 st' /\
match_states (Mach.State s fb sp c' ms2 m2) st'.
Proof.
intros. inversion H3. subst. monadInv H9.
- exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]].
+ exploit H5; eauto. intros [jmp [k' [rs2 [A [B [C D]]]]]].
generalize (functions_transl _ _ _ H7 H8); intro FN.
generalize (transf_function_no_overflow _ _ H8); intro NOOV.
exploit exec_straight_steps_2; eauto.
@@ -550,6 +558,7 @@ Proof.
econstructor; eauto.
apply agree_exten with rs2; auto with asmgen.
congruence.
+ rewrite OTH by congruence; auto.
Qed.
Lemma exec_straight_opt_steps_goto:
@@ -564,13 +573,14 @@ Lemma exec_straight_opt_steps_goto:
exists jmp, exists k', exists rs2,
exec_straight_opt tge tf c rs1 m1' (jmp :: k') rs2 m2'
/\ agree ms2 sp rs2
- /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2') ->
+ /\ exec_instr tge tf jmp rs2 m2' = goto_label tf lbl rs2 m2'
+ /\ (is_leaf_function f = true -> rs2#RA = parent_ra s)) ->
exists st',
plus step tge (State rs1 m1') E0 st' /\
match_states (Mach.State s fb sp c' ms2 m2) st'.
Proof.
intros. inversion H3. subst. monadInv H9.
- exploit H5; eauto. intros [jmp [k' [rs2 [A [B C]]]]].
+ exploit H5; eauto. intros [jmp [k' [rs2 [A [B [C D]]]]]].
generalize (functions_transl _ _ _ H7 H8); intro FN.
generalize (transf_function_no_overflow _ _ H8); intro NOOV.
inv A.
@@ -583,6 +593,7 @@ Proof.
econstructor; eauto.
apply agree_exten with rs2; auto with asmgen.
congruence.
+ rewrite OTH by congruence; auto.
- exploit exec_straight_steps_2; eauto.
intros [ofs' [PC2 CT2]].
exploit find_label_goto_label; eauto.
@@ -597,6 +608,7 @@ Proof.
econstructor; eauto.
apply agree_exten with rs2; auto with asmgen.
congruence.
+ rewrite OTH by congruence; auto.
Qed.
(** We need to show that, in the simulation diagram, we cannot
@@ -629,7 +641,7 @@ Qed.
Theorem step_simulation:
forall S1 t S2, Mach.step return_address_offset ge S1 t S2 ->
- forall S1' (MS: match_states S1 S1'),
+ forall S1' (MS: match_states S1 S1') (WF: wf_state ge S1),
(exists S2', plus step tge S1' t S2' /\ match_states S2 S2')
\/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 S1')%nat.
Proof.
@@ -638,17 +650,20 @@ Proof.
- (* Mlabel *)
left; eapply exec_straight_steps; eauto; intros.
monadInv TR. econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split. apply agree_nextinstr; auto. simpl; congruence.
+ split. { apply agree_nextinstr; auto. }
+ split. { simpl; congruence. }
+ rewrite nextinstr_inv by congruence; assumption.
- (* Mgetstack *)
unfold load_stack in H.
exploit Mem.loadv_extends; eauto. intros [v' [A B]].
rewrite (sp_val _ _ _ AG) in A.
left; eapply exec_straight_steps; eauto. intros. simpl in TR.
- exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q R]]].
+ exploit loadind_correct; eauto with asmgen. intros [rs' [P [Q [R S]]]].
exists rs'; split. eauto.
- split. eapply agree_set_mreg; eauto with asmgen. congruence.
- simpl; congruence.
+ split. { eapply agree_set_mreg; eauto with asmgen. congruence. }
+ split. { simpl; congruence. }
+ rewrite S. assumption.
- (* Msetstack *)
unfold store_stack in H.
@@ -656,10 +671,12 @@ Proof.
exploit Mem.storev_extends; eauto. intros [m2' [A B]].
left; eapply exec_straight_steps; eauto.
rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
- exploit storeind_correct; eauto with asmgen. intros [rs' [P Q]].
+ exploit storeind_correct; eauto with asmgen. intros [rs' [P [Q R]]].
exists rs'; split. eauto.
split. eapply agree_undef_regs; eauto with asmgen.
- simpl; intros. rewrite Q; auto with asmgen.
+ simpl; intros.
+ split. rewrite Q; auto with asmgen.
+ rewrite R. assumption.
- (* Mgetparam *)
assert (f0 = f) by congruence; subst f0.
@@ -675,39 +692,45 @@ Opaque loadind.
(* X30 contains parent *)
exploit loadind_correct. eexact EQ.
instantiate (2 := rs0). simpl; rewrite DXP; eauto. simpl; congruence.
- intros [rs1 [P [Q R]]].
+ intros [rs1 [P [Q [R S]]]].
exists rs1; split. eauto.
split. eapply agree_set_mreg. eapply agree_set_mreg; eauto. congruence. auto with asmgen.
- simpl; intros. rewrite R; auto with asmgen.
- apply preg_of_not_X29; auto.
+ simpl; split; intros.
+ { rewrite R; auto with asmgen.
+ apply preg_of_not_X29; auto.
+ }
+ { rewrite S; auto. }
+
(* X30 does not contain parent *)
exploit loadptr_correct. eexact A. simpl; congruence. intros [rs1 [P [Q R]]].
exploit loadind_correct. eexact EQ. instantiate (2 := rs1). simpl; rewrite Q. eauto. simpl; congruence.
- intros [rs2 [S [T U]]].
+ intros [rs2 [S [T [U V]]]].
exists rs2; split. eapply exec_straight_trans; eauto.
split. eapply agree_set_mreg. eapply agree_set_mreg. eauto. eauto.
instantiate (1 := rs1#X29 <- (rs2#X29)). intros.
rewrite Pregmap.gso; auto with asmgen.
congruence.
intros. unfold Pregmap.set. destruct (PregEq.eq r' X29). congruence. auto with asmgen.
- simpl; intros. rewrite U; auto with asmgen.
+ split; simpl; intros. rewrite U; auto with asmgen.
apply preg_of_not_X29; auto.
-
+ rewrite V. rewrite R by congruence. auto.
+
- (* Mop *)
assert (eval_operation tge sp op (map rs args) m = Some v).
{ rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. }
exploit eval_operation_lessdef. eapply preg_vals; eauto. eauto. eexact H0.
intros [v' [A B]]. rewrite (sp_val _ _ _ AG) in A.
left; eapply exec_straight_steps; eauto; intros. simpl in TR.
- exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]].
+ exploit transl_op_correct; eauto. intros [rs2 [P [Q [R S]]]].
exists rs2; split. eauto. split.
apply agree_set_undef_mreg with rs0; auto.
apply Val.lessdef_trans with v'; auto.
- simpl; intros. InvBooleans.
+ split; simpl; intros. InvBooleans.
rewrite R; auto. apply preg_of_not_X29; auto.
Local Transparent destroyed_by_op.
destruct op; try exact I; simpl; congruence.
-
+ rewrite S.
+ auto.
- (* Mload *)
destruct trap.
{
@@ -717,10 +740,11 @@ Local Transparent destroyed_by_op.
intros [a' [A B]]. rewrite (sp_val _ _ _ AG) in A.
exploit Mem.loadv_extends; eauto. intros [v' [C D]].
left; eapply exec_straight_steps; eauto; intros. simpl in TR.
- exploit transl_load_correct; eauto. intros [rs2 [P [Q R]]].
+ exploit transl_load_correct; eauto. intros [rs2 [P [Q [R S]]]].
exists rs2; split. eauto.
split. eapply agree_set_undef_mreg; eauto. congruence.
- simpl; congruence.
+ split. simpl; congruence.
+ rewrite S. assumption.
}
(* Mload notrap1 *)
@@ -740,10 +764,11 @@ Local Transparent destroyed_by_op.
assert (Val.lessdef (rs src) (rs0 (preg_of src))) by (eapply preg_val; eauto).
exploit Mem.storev_extends; eauto. intros [m2' [C D]].
left; eapply exec_straight_steps; eauto.
- intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P Q]].
+ intros. simpl in TR. exploit transl_store_correct; eauto. intros [rs2 [P [Q R]]].
exists rs2; split. eauto.
split. eapply agree_undef_regs; eauto with asmgen.
- simpl; congruence.
+ split. simpl; congruence.
+ rewrite R. assumption.
- (* Mcall *)
assert (f0 = f) by congruence. subst f0.
@@ -852,6 +877,18 @@ Local Transparent destroyed_by_op.
eapply agree_undef_regs; eauto. intros. rewrite undef_regs_other_2; auto.
congruence.
+ Simpl.
+ rewrite set_res_other by trivial.
+ rewrite undef_regs_other.
+ assumption.
+ intro.
+ rewrite in_map_iff.
+ intros (x0 & PREG & IN).
+ subst r'.
+ intro.
+ apply (preg_of_not_RA x0).
+ congruence.
+
- (* Mgoto *)
assert (f0 = f) by congruence. subst f0.
inv AT. monadInv H4.
@@ -865,25 +902,33 @@ Local Transparent destroyed_by_op.
eapply agree_exten; eauto with asmgen.
congruence.
+ rewrite INV by congruence.
+ assumption.
+
- (* Mcond true *)
assert (f0 = f) by congruence. subst f0.
exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
left; eapply exec_straight_opt_steps_goto; eauto.
intros. simpl in TR.
- exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C).
+ exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C & D).
exists jmp; exists k; exists rs'.
split. eexact A.
split. apply agree_exten with rs0; auto with asmgen.
- exact B.
+ split.
+ exact B.
+ rewrite D. exact LEAF.
- (* Mcond false *)
exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
left; eapply exec_straight_steps; eauto. intros. simpl in TR.
- exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C).
+ exploit transl_cond_branch_correct; eauto. intros (rs' & jmp & A & B & C & D).
econstructor; split.
eapply exec_straight_opt_right. eexact A. apply exec_straight_one. eexact B. auto.
split. apply agree_exten with rs0; auto. intros. Simpl.
+ split.
simpl; congruence.
+ Simpl. rewrite D.
+ exact LEAF.
- (* Mjumptable *)
assert (f0 = f) by congruence. subst f0.
@@ -905,6 +950,10 @@ Local Transparent destroyed_by_op.
simpl. intros. rewrite C; auto with asmgen. Simpl.
congruence.
+ rewrite C by congruence.
+ repeat rewrite Pregmap.gso by congruence.
+ assumption.
+
- (* Mreturn *)
assert (f0 = f) by congruence. subst f0.
inversion AT; subst. simpl in H6; monadInv H6.
@@ -947,7 +996,7 @@ Local Transparent destroyed_by_op.
simpl preg_of_iregsp. change (rs2 X30) with (rs0 X30). rewrite ATLR.
change (rs2 X2) with sp. eexact P.
simpl; congruence. congruence.
- intros (rs3 & U & V).
+ intros (rs3 & U & V & W).
assert (EXEC_PROLOGUE:
exec_straight tge tf
tf.(fn_code) rs0 m'
@@ -974,6 +1023,10 @@ Local Transparent destroyed_at_function_entry. simpl.
unfold sp; congruence.
intros. rewrite V by auto with asmgen. reflexivity.
+ rewrite W.
+ unfold rs2.
+ Simpl.
+
- (* external function *)
exploit functions_translated; eauto.
intros [tf [A B]]. simpl in B. inv B.
@@ -993,6 +1046,10 @@ Local Transparent destroyed_at_function_entry. simpl.
right. split. omega. split. auto.
rewrite <- ATPC in H5.
econstructor; eauto. congruence.
+ inv WF.
+ inv STACK.
+ inv H1.
+ congruence.
Qed.
Lemma transf_initial_states:
@@ -1028,11 +1085,17 @@ Qed.
Theorem transf_program_correct:
forward_simulation (Mach.semantics return_address_offset prog) (Asm.semantics tprog).
Proof.
- eapply forward_simulation_star with (measure := measure).
- apply senv_preserved.
- eexact transf_initial_states.
- eexact transf_final_states.
- exact step_simulation.
+ eapply forward_simulation_star with (measure := measure)
+ (match_states := fun S1 S2 => match_states S1 S2 /\ wf_state ge S1).
+ - apply senv_preserved.
+ - simpl; intros. exploit transf_initial_states; eauto.
+ intros (s2 & A & B).
+ exists s2; intuition auto. apply wf_initial; auto.
+ - simpl; intros. destruct H as [MS WF]. eapply transf_final_states; eauto.
+ - simpl; intros. destruct H0 as [MS WF].
+ exploit step_simulation; eauto. intros [ (s2' & A & B) | (A & B & C) ].
+ + left; exists s2'; intuition auto. eapply wf_step; eauto.
+ + right; intuition auto. eapply wf_step; eauto.
Qed.
End PRESERVATION.
diff --git a/aarch64/Asmgenproof1.v b/aarch64/Asmgenproof1.v
index 6f296f56..0e36bd05 100644
--- a/aarch64/Asmgenproof1.v
+++ b/aarch64/Asmgenproof1.v
@@ -22,6 +22,51 @@ Local Transparent Archi.ptr64.
(** Properties of registers *)
+Lemma preg_of_not_RA:
+ forall r, (preg_of r) <> RA.
+Proof.
+ destruct r; discriminate.
+Qed.
+
+Lemma RA_not_written:
+ forall (rs : regset) dst v,
+ rs # (preg_of dst) <- v RA = rs RA.
+Proof.
+ intros.
+ apply Pregmap.gso.
+ intro.
+ symmetry in H.
+ exact (preg_of_not_RA dst H).
+Qed.
+
+Hint Resolve RA_not_written : asmgen.
+
+Lemma RA_not_written2:
+ forall (rs : regset) dst v i,
+ preg_of dst = i ->
+ rs # i <- v RA = rs RA.
+Proof.
+ intros.
+ subst i.
+ apply RA_not_written.
+Qed.
+
+Hint Resolve RA_not_written2 : asmgen.
+
+Lemma RA_not_written3:
+ forall (rs : regset) dst v i,
+ ireg_of dst = OK i ->
+ rs # i <- v RA = rs RA.
+Proof.
+ intros.
+ unfold ireg_of in H.
+ destruct preg_of eqn:PREG; try discriminate.
+ replace i0 with i in * by congruence.
+ eapply RA_not_written2; eassumption.
+Qed.
+
+Hint Resolve RA_not_written3 : asmgen.
+
Lemma preg_of_iregsp_not_PC: forall r, preg_of_iregsp r <> PC.
Proof.
destruct r; simpl; congruence.
@@ -39,6 +84,26 @@ Proof.
red; intros; subst x. elim (preg_of_not_X16 r); auto.
Qed.
+Lemma ireg_of_not_RA: forall r x, ireg_of r = OK x -> x <> RA.
+Proof.
+ unfold ireg_of; intros. destruct (preg_of r) eqn:E; inv H.
+ red; intros; subst x. elim (preg_of_not_RA r); auto.
+Qed.
+
+Lemma ireg_of_not_RA': forall r x, ireg_of r = OK x -> RA <> x.
+Proof.
+ intros. intro.
+ apply (ireg_of_not_RA r x); auto.
+Qed.
+
+Lemma ireg_of_not_RA'': forall r x, ireg_of r = OK x -> IR RA <> IR x.
+Proof.
+ intros. intro.
+ apply (ireg_of_not_RA' r x); auto. congruence.
+Qed.
+
+Hint Resolve ireg_of_not_RA ireg_of_not_RA' ireg_of_not_RA'' : asmgen.
+
Lemma ireg_of_not_X16': forall r x, ireg_of r = OK x -> IR x <> IR X16.
Proof.
intros. apply ireg_of_not_X16 in H. congruence.
@@ -205,42 +270,49 @@ Qed.
Lemma exec_loadimm_k_w:
forall (rd: ireg) k m l,
wf_decomposition l ->
+ rd <> RA ->
forall (rs: regset) accu,
rs#rd = Vint (Int.repr accu) ->
exists rs',
exec_straight_opt ge fn (loadimm_k W rd l k) rs m k rs' m
/\ rs'#rd = Vint (Int.repr (recompose_int accu l))
- /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, r <> PC -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
- induction 1; intros rs accu ACCU; simpl.
+ induction 1; intros RD_NOT_RA rs accu ACCU; simpl.
- exists rs; split. apply exec_straight_opt_refl. auto.
-- destruct (IHwf_decomposition
+- destruct (IHwf_decomposition RD_NOT_RA
(nextinstr (rs#rd <- (insert_in_int rs#rd n p 16)))
(Zinsert accu n p 16))
- as (rs' & P & Q & R).
+ as (rs' & P & Q & R & S).
Simpl. rewrite ACCU. simpl. f_equal. apply Int.eqm_samerepr.
apply Zinsert_eqmod. auto. omega. apply Int.eqm_sym; apply Int.eqm_unsigned_repr.
exists rs'; split.
eapply exec_straight_opt_step_opt. simpl; eauto. auto. exact P.
- split. exact Q. intros; Simpl. rewrite R by auto. Simpl.
+ split. exact Q.
+ split.
+ { intros; Simpl.
+ rewrite R by auto. Simpl. }
+ { rewrite S. Simpl. }
Qed.
Lemma exec_loadimm_z_w:
forall rd l k rs m,
wf_decomposition l ->
+ rd <> RA ->
exists rs',
exec_straight ge fn (loadimm_z W rd l k) rs m k rs' m
/\ rs'#rd = Vint (Int.repr (recompose_int 0 l))
/\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
Proof.
- unfold loadimm_z; destruct 1.
+ unfold loadimm_z; destruct 1; intro RD_NOT_RA.
- econstructor; split.
apply exec_straight_one. simpl; eauto. auto.
split. Simpl.
intros; Simpl.
- set (accu0 := Zinsert 0 n p 16).
set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))).
- destruct (exec_loadimm_k_w rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto.
+ destruct (exec_loadimm_k_w rd k m l H1 RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R & S); auto.
unfold rs1; Simpl.
exists rs2; split.
eapply exec_straight_opt_step; eauto.
@@ -253,12 +325,13 @@ Qed.
Lemma exec_loadimm_n_w:
forall rd l k rs m,
wf_decomposition l ->
+ rd <> RA ->
exists rs',
exec_straight ge fn (loadimm_n W rd l k) rs m k rs' m
/\ rs'#rd = Vint (Int.repr (Z.lnot (recompose_int 0 l)))
/\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
Proof.
- unfold loadimm_n; destruct 1.
+ unfold loadimm_n; destruct 1; intro RD_NOT_RA.
- econstructor; split.
apply exec_straight_one. simpl; eauto. auto.
split. Simpl.
@@ -267,7 +340,8 @@ Proof.
set (rs1 := nextinstr (rs#rd <- (Vint (Int.repr accu0)))).
destruct (exec_loadimm_k_w rd k m (negate_decomposition l)
(negate_decomposition_wf l H1)
- rs1 accu0) as (rs2 & P & Q & R).
+ RD_NOT_RA rs1 accu0)
+ as (rs2 & P & Q & R & S).
unfold rs1; Simpl.
exists rs2; split.
eapply exec_straight_opt_step; eauto.
@@ -279,7 +353,8 @@ Proof.
Qed.
Lemma exec_loadimm32:
- forall rd n k rs m,
+ forall rd n k rs m
+ (RD_NOT_RA : rd <> RA),
exists rs',
exec_straight ge fn (loadimm32 rd n k) rs m k rs' m
/\ rs'#rd = Vint n
@@ -302,13 +377,14 @@ Proof.
apply Int.eqm_samerepr. apply decompose_notint_eqmod.
apply Int.repr_unsigned. }
destruct Nat.leb.
-+ rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; omega.
-+ rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; omega.
++ rewrite <- A. apply exec_loadimm_z_w. apply decompose_int_wf; omega. trivial.
++ rewrite <- B. apply exec_loadimm_n_w. apply decompose_int_wf; omega. trivial.
Qed.
Lemma exec_loadimm_k_x:
forall (rd: ireg) k m l,
- wf_decomposition l ->
+ wf_decomposition l ->
+ rd <> RA ->
forall (rs: regset) accu,
rs#rd = Vlong (Int64.repr accu) ->
exists rs',
@@ -316,9 +392,9 @@ Lemma exec_loadimm_k_x:
/\ rs'#rd = Vlong (Int64.repr (recompose_int accu l))
/\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
Proof.
- induction 1; intros rs accu ACCU; simpl.
+ induction 1; intros RD_NOT_RA rs accu ACCU; simpl.
- exists rs; split. apply exec_straight_opt_refl. auto.
-- destruct (IHwf_decomposition
+- destruct (IHwf_decomposition RD_NOT_RA
(nextinstr (rs#rd <- (insert_in_long rs#rd n p 16)))
(Zinsert accu n p 16))
as (rs' & P & Q & R).
@@ -332,19 +408,20 @@ Qed.
Lemma exec_loadimm_z_x:
forall rd l k rs m,
wf_decomposition l ->
+ rd <> RA ->
exists rs',
exec_straight ge fn (loadimm_z X rd l k) rs m k rs' m
/\ rs'#rd = Vlong (Int64.repr (recompose_int 0 l))
/\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
Proof.
- unfold loadimm_z; destruct 1.
+ unfold loadimm_z; destruct 1; intro RD_NOT_RA.
- econstructor; split.
apply exec_straight_one. simpl; eauto. auto.
split. Simpl.
intros; Simpl.
- set (accu0 := Zinsert 0 n p 16).
set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))).
- destruct (exec_loadimm_k_x rd k m l H1 rs1 accu0) as (rs2 & P & Q & R); auto.
+ destruct (exec_loadimm_k_x rd k m l H1 RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R); auto.
unfold rs1; Simpl.
exists rs2; split.
eapply exec_straight_opt_step; eauto.
@@ -357,12 +434,13 @@ Qed.
Lemma exec_loadimm_n_x:
forall rd l k rs m,
wf_decomposition l ->
+ rd <> RA ->
exists rs',
exec_straight ge fn (loadimm_n X rd l k) rs m k rs' m
/\ rs'#rd = Vlong (Int64.repr (Z.lnot (recompose_int 0 l)))
/\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
Proof.
- unfold loadimm_n; destruct 1.
+ unfold loadimm_n; destruct 1; intro RD_NOT_RA.
- econstructor; split.
apply exec_straight_one. simpl; eauto. auto.
split. Simpl.
@@ -371,7 +449,7 @@ Proof.
set (rs1 := nextinstr (rs#rd <- (Vlong (Int64.repr accu0)))).
destruct (exec_loadimm_k_x rd k m (negate_decomposition l)
(negate_decomposition_wf l H1)
- rs1 accu0) as (rs2 & P & Q & R).
+ RD_NOT_RA rs1 accu0) as (rs2 & P & Q & R).
unfold rs1; Simpl.
exists rs2; split.
eapply exec_straight_opt_step; eauto.
@@ -384,12 +462,13 @@ Qed.
Lemma exec_loadimm64:
forall rd n k rs m,
+ rd <> RA ->
exists rs',
exec_straight ge fn (loadimm64 rd n k) rs m k rs' m
/\ rs'#rd = Vlong n
/\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
Proof.
- unfold loadimm64, loadimm; intros.
+ unfold loadimm64, loadimm; intros until m; intro RD_NOT_RA.
destruct (is_logical_imm64 n).
- econstructor; split.
apply exec_straight_one. simpl; eauto. auto.
@@ -406,8 +485,8 @@ Proof.
apply Int64.eqm_samerepr. apply decompose_notint_eqmod.
apply Int64.repr_unsigned. }
destruct Nat.leb.
-+ rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; omega.
-+ rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; omega.
++ rewrite <- A. apply exec_loadimm_z_x. apply decompose_int_wf; omega. trivial.
++ rewrite <- B. apply exec_loadimm_n_x. apply decompose_int_wf; omega. trivial.
Qed.
(** Add immediate *)
@@ -419,55 +498,59 @@ Lemma exec_addimm_aux_32:
Next (nextinstr (rs#rd <- (sem rs#r1 (Vint (Int.repr n))))) m) ->
(forall v n1 n2, sem (sem v (Vint n1)) (Vint n2) = sem v (Vint (Int.add n1 n2))) ->
forall rd r1 n k rs m,
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
exists rs',
exec_straight ge fn (addimm_aux insn rd r1 (Int.unsigned n) k) rs m k rs' m
/\ rs'#rd = sem rs#r1 (Vint n)
- /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
- intros insn sem SEM ASSOC; intros. unfold addimm_aux.
+ intros insn sem SEM ASSOC; intros until m; intro RD_NOT_RA. unfold addimm_aux.
set (nlo := Zzero_ext 12 (Int.unsigned n)). set (nhi := Int.unsigned n - nlo).
assert (E: Int.unsigned n = nhi + nlo) by (unfold nhi; omega).
rewrite <- (Int.repr_unsigned n).
destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
- econstructor; split. apply exec_straight_one. apply SEM. Simpl.
split. Simpl. do 3 f_equal; omega.
- intros; Simpl.
+ split; intros; Simpl.
- econstructor; split. apply exec_straight_one. apply SEM. Simpl.
split. Simpl. do 3 f_equal; omega.
- intros; Simpl.
+ split; intros; Simpl.
- econstructor; split. eapply exec_straight_two.
apply SEM. apply SEM. Simpl. Simpl.
split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int.eqm_samerepr.
rewrite E. auto with ints.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
Lemma exec_addimm32:
forall rd r1 n k rs m,
r1 <> X16 ->
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
exists rs',
exec_straight ge fn (addimm32 rd r1 n k) rs m k rs' m
/\ rs'#rd = Val.add rs#r1 (Vint n)
- /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
intros. unfold addimm32. set (nn := Int.neg n).
destruct (Int.eq n (Int.zero_ext 24 n)); [| destruct (Int.eq nn (Int.zero_ext 24 nn))].
-- apply exec_addimm_aux_32 with (sem := Val.add). auto. intros; apply Val.add_assoc.
+- apply exec_addimm_aux_32 with (sem := Val.add); auto. intros; apply Val.add_assoc.
- rewrite <- Val.sub_opp_add.
- apply exec_addimm_aux_32 with (sem := Val.sub). auto.
+ apply exec_addimm_aux_32 with (sem := Val.sub); auto.
intros. rewrite ! Val.sub_add_opp, Val.add_assoc. rewrite Int.neg_add_distr. auto.
- destruct (Int.lt n Int.zero).
+ rewrite <- Val.sub_opp_add; fold nn.
- edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C).
+ edestruct (exec_loadimm32 X16 nn) as (rs1 & A & B & C). congruence.
econstructor; split.
eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
split. Simpl. rewrite B, C; eauto with asmgen.
- intros; Simpl.
-+ edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C).
+ split; intros; Simpl.
++ edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). congruence.
econstructor; split.
eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. auto.
split. Simpl. rewrite B, C; eauto with asmgen.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
Lemma exec_addimm_aux_64:
@@ -477,10 +560,12 @@ Lemma exec_addimm_aux_64:
Next (nextinstr (rs#rd <- (sem rs#r1 (Vlong (Int64.repr n))))) m) ->
(forall v n1 n2, sem (sem v (Vlong n1)) (Vlong n2) = sem v (Vlong (Int64.add n1 n2))) ->
forall rd r1 n k rs m,
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
exists rs',
exec_straight ge fn (addimm_aux insn rd r1 (Int64.unsigned n) k) rs m k rs' m
/\ rs'#rd = sem rs#r1 (Vlong n)
- /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
intros insn sem SEM ASSOC; intros. unfold addimm_aux.
set (nlo := Zzero_ext 12 (Int64.unsigned n)). set (nhi := Int64.unsigned n - nlo).
@@ -489,44 +574,46 @@ Proof.
destruct (Z.eqb_spec nhi 0); [|destruct (Z.eqb_spec nlo 0)].
- econstructor; split. apply exec_straight_one. apply SEM. Simpl.
split. Simpl. do 3 f_equal; omega.
- intros; Simpl.
+ split; intros; Simpl.
- econstructor; split. apply exec_straight_one. apply SEM. Simpl.
split. Simpl. do 3 f_equal; omega.
- intros; Simpl.
+ split; intros; Simpl.
- econstructor; split. eapply exec_straight_two.
apply SEM. apply SEM. Simpl. Simpl.
split. Simpl. rewrite ASSOC. do 2 f_equal. apply Int64.eqm_samerepr.
rewrite E. auto with ints.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
Lemma exec_addimm64:
forall rd r1 n k rs m,
preg_of_iregsp r1 <> X16 ->
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
exists rs',
exec_straight ge fn (addimm64 rd r1 n k) rs m k rs' m
/\ rs'#rd = Val.addl rs#r1 (Vlong n)
- /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
intros.
unfold addimm64. set (nn := Int64.neg n).
destruct (Int64.eq n (Int64.zero_ext 24 n)); [| destruct (Int64.eq nn (Int64.zero_ext 24 nn))].
-- apply exec_addimm_aux_64 with (sem := Val.addl). auto. intros; apply Val.addl_assoc.
+- apply exec_addimm_aux_64 with (sem := Val.addl); auto. intros; apply Val.addl_assoc.
- rewrite <- Val.subl_opp_addl.
- apply exec_addimm_aux_64 with (sem := Val.subl). auto.
+ apply exec_addimm_aux_64 with (sem := Val.subl); auto.
intros. rewrite ! Val.subl_addl_opp, Val.addl_assoc. rewrite Int64.neg_add_distr. auto.
- destruct (Int64.lt n Int64.zero).
+ rewrite <- Val.subl_opp_addl; fold nn.
- edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C).
+ edestruct (exec_loadimm64 X16 nn) as (rs1 & A & B & C). congruence.
econstructor; split.
eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl.
split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
- intros; Simpl.
-+ edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C).
+ split; intros; Simpl.
++ edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). congruence.
econstructor; split.
eapply exec_straight_trans. eexact A. eapply exec_straight_one. simpl; eauto. Simpl.
split. Simpl. rewrite B, C; eauto with asmgen. simpl. rewrite Int64.shl'_zero. auto.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
(** Logical immediate *)
@@ -543,22 +630,25 @@ Lemma exec_logicalimm32:
Next (nextinstr (rs#rd <- (sem rs##r1 (eval_shift_op_int rs#r2 s)))) m) ->
forall rd r1 n k rs m,
r1 <> X16 ->
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
exists rs',
exec_straight ge fn (logicalimm32 insn1 insn2 rd r1 n k) rs m k rs' m
/\ rs'#rd = sem rs#r1 (Vint n)
- /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
intros until sem; intros SEM1 SEM2; intros. unfold logicalimm32.
destruct (is_logical_imm32 n).
- econstructor; split.
apply exec_straight_one. apply SEM1. reflexivity.
- split. Simpl. rewrite Int.repr_unsigned; auto. intros; Simpl.
-- edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C).
+ split. Simpl. rewrite Int.repr_unsigned; auto.
+ split; intros; Simpl.
+- edestruct (exec_loadimm32 X16 n) as (rs1 & A & B & C). congruence.
econstructor; split.
eapply exec_straight_trans. eexact A.
apply exec_straight_one. apply SEM2. reflexivity.
split. Simpl. f_equal; auto. apply C; auto with asmgen.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
Lemma exec_logicalimm64:
@@ -573,50 +663,58 @@ Lemma exec_logicalimm64:
Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) ->
forall rd r1 n k rs m,
r1 <> X16 ->
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
exists rs',
exec_straight ge fn (logicalimm64 insn1 insn2 rd r1 n k) rs m k rs' m
/\ rs'#rd = sem rs#r1 (Vlong n)
- /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
intros until sem; intros SEM1 SEM2; intros. unfold logicalimm64.
destruct (is_logical_imm64 n).
- econstructor; split.
apply exec_straight_one. apply SEM1. reflexivity.
- split. Simpl. rewrite Int64.repr_unsigned. auto. intros; Simpl.
-- edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C).
+ split. Simpl. rewrite Int64.repr_unsigned. auto.
+ split; intros; Simpl.
+- edestruct (exec_loadimm64 X16 n) as (rs1 & A & B & C). congruence.
econstructor; split.
eapply exec_straight_trans. eexact A.
apply exec_straight_one. apply SEM2. reflexivity.
split. Simpl. f_equal; auto. apply C; auto with asmgen.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
(** Load address of symbol *)
Lemma exec_loadsymbol: forall rd s ofs k rs m,
- rd <> X16 \/ Archi.pic_code tt = false ->
+ rd <> X16 \/ Archi.pic_code tt = false ->
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
exists rs',
exec_straight ge fn (loadsymbol rd s ofs k) rs m k rs' m
/\ rs'#rd = Genv.symbol_address ge s ofs
- /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs'#RA = rs#RA.
Proof.
unfold loadsymbol; intros. destruct (Archi.pic_code tt).
- predSpec Ptrofs.eq Ptrofs.eq_spec ofs Ptrofs.zero.
+ subst ofs. econstructor; split.
apply exec_straight_one; [simpl; eauto | reflexivity].
- split. Simpl. intros; Simpl.
+ split. Simpl. split; intros; Simpl.
+
+ exploit exec_addimm64. instantiate (1 := rd). simpl. destruct H; congruence.
- intros (rs1 & A & B & C).
+ instantiate (1 := rd). assumption.
+ intros (rs1 & A & B & C & D).
econstructor; split.
econstructor. simpl; eauto. auto. eexact A.
split. simpl in B; rewrite B. Simpl.
rewrite <- Genv.shift_symbol_address_64 by auto.
rewrite Ptrofs.add_zero_l, Ptrofs.of_int64_to_int64 by auto. auto.
- intros. rewrite C by auto. Simpl.
+ split; intros. rewrite C by auto; Simpl.
+ rewrite D. Simpl.
- econstructor; split.
eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
split. Simpl. rewrite symbol_high_low; auto.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
(** Shifted operands *)
@@ -725,23 +823,25 @@ Lemma exec_arith_extended:
Next (nextinstr (rs#rd <- (sem rs###r1 (eval_shift_op_long rs#r2 s)))) m) ->
forall (rd r1 r2: ireg) (ex: extension) (a: amount64) (k: code) rs m,
r1 <> X16 ->
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
exists rs',
exec_straight ge fn (arith_extended insnX insnS rd r1 r2 ex a k) rs m k rs' m
/\ rs'#rd = sem rs#r1 (Op.eval_extend ex rs#r2 a)
- /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
intros sem insnX insnS EX ES; intros. unfold arith_extended. destruct (Int.ltu a (Int.repr 5)).
- econstructor; split.
apply exec_straight_one. rewrite EX; eauto. auto.
split. Simpl. f_equal. destruct ex; auto.
- intros; Simpl.
+ split; intros; Simpl.
- exploit (exec_move_extended_base X16 r2 ex). intros (rs' & A & B & C).
econstructor; split.
eapply exec_straight_trans. eexact A. apply exec_straight_one.
rewrite ES. eauto. auto.
split. Simpl. unfold ir0x. rewrite C by eauto with asmgen. f_equal.
rewrite B. destruct ex; auto.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
(** Extended right shift *)
@@ -749,15 +849,18 @@ Qed.
Lemma exec_shrx32: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
Val.shrx rs#r1 (Vint n) = Some v ->
r1 <> X16 ->
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
exists rs',
exec_straight ge fn (shrx32 rd r1 n k) rs m k rs' m
/\ rs'#rd = v
- /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
unfold shrx32; intros. apply Val.shrx_shr_3 in H.
destruct (Int.eq n Int.zero) eqn:E.
- econstructor; split. apply exec_straight_one; [simpl;eauto|auto].
- split. Simpl. subst v; auto. intros; Simpl.
+ split. Simpl. subst v; auto.
+ split; intros; Simpl.
- generalize (Int.eq_spec n Int.one).
destruct (Int.eq n Int.one); intro ONE.
* subst n.
@@ -769,27 +872,31 @@ Proof.
change (Int.ltu Int.one Int.iwordsize) with true; simpl.
rewrite Int.or_zero_l.
reflexivity.
- ** intros; Simpl.
+ ** split; intros; Simpl.
* econstructor; split. eapply exec_straight_three.
unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
simpl; eauto.
unfold exec_instr. rewrite or_zero_eval_shift_op_int by congruence. eauto.
auto. auto. auto.
- split. subst v; Simpl. intros; Simpl.
+ split. subst v; Simpl.
+ split; intros; Simpl.
Qed.
Lemma exec_shrx64: forall (rd r1: ireg) (n: int) k v (rs: regset) m,
Val.shrxl rs#r1 (Vint n) = Some v ->
r1 <> X16 ->
+ (IR RA) <> (preg_of_iregsp (RR1 rd)) ->
exists rs',
exec_straight ge fn (shrx64 rd r1 n k) rs m k rs' m
/\ rs'#rd = v
- /\ forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> r <> rd -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
unfold shrx64; intros. apply Val.shrxl_shrl_3 in H.
destruct (Int.eq n Int.zero) eqn:E.
- econstructor; split. apply exec_straight_one; [simpl;eauto|auto].
- split. Simpl. subst v; auto. intros; Simpl.
+ split. Simpl. subst v; auto.
+ split; intros; Simpl.
- generalize (Int.eq_spec n Int.one).
destruct (Int.eq n Int.one); intro ONE.
* subst n.
@@ -801,13 +908,14 @@ Proof.
change (Int.ltu Int.one Int64.iwordsize') with true; simpl.
rewrite Int64.or_zero_l.
reflexivity.
- ** intros; Simpl.
+ ** split; intros; Simpl.
* econstructor; split. eapply exec_straight_three.
unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
simpl; eauto.
unfold exec_instr. rewrite or_zero_eval_shift_op_long by congruence. eauto.
auto. auto. auto.
- split. subst v; Simpl. intros; Simpl.
+ split. subst v; Simpl.
+ split; intros; Simpl.
Qed.
(** Condition bits *)
@@ -1063,6 +1171,56 @@ Ltac ArgsInv :=
| [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *
end).
+Lemma compare_int_RA:
+ forall rs a b m,
+ compare_int rs a b m X30 = rs X30.
+Proof.
+ unfold compare_int.
+ intros.
+ repeat rewrite Pregmap.gso by congruence.
+ trivial.
+Qed.
+
+Hint Resolve compare_int_RA : asmgen.
+
+Lemma compare_long_RA:
+ forall rs a b m,
+ compare_long rs a b m X30 = rs X30.
+Proof.
+ unfold compare_long.
+ intros.
+ repeat rewrite Pregmap.gso by congruence.
+ trivial.
+Qed.
+
+Hint Resolve compare_long_RA : asmgen.
+
+Lemma compare_float_RA:
+ forall rs a b,
+ compare_float rs a b X30 = rs X30.
+Proof.
+ unfold compare_float.
+ intros.
+ destruct a; destruct b.
+ all: repeat rewrite Pregmap.gso by congruence; trivial.
+Qed.
+
+Hint Resolve compare_float_RA : asmgen.
+
+
+Lemma compare_single_RA:
+ forall rs a b,
+ compare_single rs a b X30 = rs X30.
+Proof.
+ unfold compare_single.
+ intros.
+ destruct a; destruct b.
+ all: repeat rewrite Pregmap.gso by congruence; trivial.
+Qed.
+
+Hint Resolve compare_single_RA : asmgen.
+
+
Lemma transl_cond_correct:
forall cond args k c rs m,
transl_cond cond args k = OK c ->
@@ -1071,185 +1229,218 @@ Lemma transl_cond_correct:
/\ (forall b,
eval_condition cond (map rs (map preg_of args)) m = Some b ->
eval_testcond (cond_for_cond cond) rs' = Some b)
- /\ forall r, data_preg r = true -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
intros until m; intros TR. destruct cond; simpl in TR; ArgsInv.
- (* Ccomp *)
econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. apply eval_testcond_compare_sint; auto.
+ repeat split; intros. apply eval_testcond_compare_sint; auto.
destruct r; reflexivity || discriminate.
- (* Ccompu *)
econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. apply eval_testcond_compare_uint; auto.
+ repeat split; intros. apply eval_testcond_compare_uint; auto.
destruct r; reflexivity || discriminate.
- (* Ccompimm *)
destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto.
+ repeat split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_sint; auto.
destruct r; reflexivity || discriminate.
+ econstructor; split.
apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
- split; intros. apply eval_testcond_compare_sint; auto.
+ repeat split; intros. apply eval_testcond_compare_sint; auto.
destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
econstructor; split.
eapply exec_straight_trans. eexact A. apply exec_straight_one.
simpl. rewrite B, C by eauto with asmgen. eauto. auto.
- split; intros. apply eval_testcond_compare_sint; auto.
- transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ repeat split; intros. apply eval_testcond_compare_sint; auto.
+ transitivity (rs' r). destruct r; reflexivity || discriminate.
+ auto with asmgen.
+ Simpl. rewrite compare_int_RA.
+ apply C; congruence.
- (* Ccompuimm *)
destruct (is_arith_imm32 n); [|destruct (is_arith_imm32 (Int.neg n))].
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto.
+ repeat split; intros. rewrite Int.repr_unsigned. apply eval_testcond_compare_uint; auto.
destruct r; reflexivity || discriminate.
+ econstructor; split.
apply exec_straight_one. simpl. rewrite Int.repr_unsigned, Int.neg_involutive. eauto. auto.
- split; intros. apply eval_testcond_compare_uint; auto.
+ repeat split; intros. apply eval_testcond_compare_uint; auto.
destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
econstructor; split.
eapply exec_straight_trans. eexact A. apply exec_straight_one.
simpl. rewrite B, C by eauto with asmgen. eauto. auto.
- split; intros. apply eval_testcond_compare_uint; auto.
+ repeat split; intros. apply eval_testcond_compare_uint; auto.
transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ Simpl. rewrite compare_int_RA.
+ apply C; congruence.
- (* Ccompshift *)
econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto.
+ repeat split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_sint; auto.
destruct r; reflexivity || discriminate.
- (* Ccompushift *)
econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto.
+ repeat split; intros. rewrite transl_eval_shift. apply eval_testcond_compare_uint; auto.
destruct r; reflexivity || discriminate.
- (* Cmaskzero *)
destruct (is_logical_imm32 n).
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto.
+ repeat split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Ceq); auto.
destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
econstructor; split.
eapply exec_straight_trans. eexact A.
apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
- split; intros. apply (eval_testcond_compare_sint Ceq); auto.
+ repeat split; intros. apply (eval_testcond_compare_sint Ceq); auto.
transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ Simpl. rewrite compare_int_RA.
+ apply C; congruence.
+
- (* Cmasknotzero *)
destruct (is_logical_imm32 n).
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto.
+ repeat split; intros. rewrite Int.repr_unsigned. apply (eval_testcond_compare_sint Cne); auto.
destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm32 X16 n). intros (rs' & A & B & C).
+
++ exploit (exec_loadimm32 X16 n). congruence. intros (rs' & A & B & C).
econstructor; split.
eapply exec_straight_trans. eexact A.
apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
- split; intros. apply (eval_testcond_compare_sint Cne); auto.
+ repeat split; intros. apply (eval_testcond_compare_sint Cne); auto.
transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ Simpl. rewrite compare_int_RA.
+ apply C; congruence.
+
- (* Ccompl *)
econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. apply eval_testcond_compare_slong; auto.
+ repeat split; intros. apply eval_testcond_compare_slong; auto.
destruct r; reflexivity || discriminate.
- (* Ccomplu *)
econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. apply eval_testcond_compare_ulong; auto.
+ repeat split; intros. apply eval_testcond_compare_ulong; auto.
destruct r; reflexivity || discriminate.
- (* Ccomplimm *)
destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto.
+ repeat split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_slong; auto.
destruct r; reflexivity || discriminate.
+ econstructor; split.
apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto.
- split; intros. apply eval_testcond_compare_slong; auto.
+ repeat split; intros. apply eval_testcond_compare_slong; auto.
destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
econstructor; split.
eapply exec_straight_trans. eexact A. apply exec_straight_one.
simpl. rewrite B, C by eauto with asmgen. eauto. auto.
- split; intros. apply eval_testcond_compare_slong; auto.
+ repeat split; intros. apply eval_testcond_compare_slong; auto.
transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ Simpl. rewrite compare_long_RA.
+ apply C; congruence.
+
- (* Ccompluimm *)
destruct (is_arith_imm64 n); [|destruct (is_arith_imm64 (Int64.neg n))].
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto.
+ repeat split; intros. rewrite Int64.repr_unsigned. apply eval_testcond_compare_ulong; auto.
destruct r; reflexivity || discriminate.
+ econstructor; split.
apply exec_straight_one. simpl. rewrite Int64.repr_unsigned, Int64.neg_involutive. eauto. auto.
- split; intros. apply eval_testcond_compare_ulong; auto.
+ repeat split; intros. apply eval_testcond_compare_ulong; auto.
destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
econstructor; split.
eapply exec_straight_trans. eexact A. apply exec_straight_one.
simpl. rewrite B, C by eauto with asmgen. eauto. auto.
- split; intros. apply eval_testcond_compare_ulong; auto.
+ repeat split; intros. apply eval_testcond_compare_ulong; auto.
transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ Simpl. rewrite compare_long_RA.
+ apply C; congruence.
+
- (* Ccomplshift *)
econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto.
+ repeat split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_slong; auto.
destruct r; reflexivity || discriminate.
- (* Ccomplushift *)
econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto.
+ repeat split; intros. rewrite transl_eval_shiftl. apply eval_testcond_compare_ulong; auto.
destruct r; reflexivity || discriminate.
- (* Cmasklzero *)
destruct (is_logical_imm64 n).
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto.
+ repeat split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Ceq); auto.
destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
econstructor; split.
eapply exec_straight_trans. eexact A.
apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
- split; intros. apply (eval_testcond_compare_slong Ceq); auto.
+ repeat split; intros. apply (eval_testcond_compare_slong Ceq); auto.
transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ Simpl. rewrite compare_long_RA.
+ apply C; congruence.
+
- (* Cmasknotzero *)
destruct (is_logical_imm64 n).
+ econstructor; split. apply exec_straight_one. simpl; eauto. auto.
- split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto.
+ repeat split; intros. rewrite Int64.repr_unsigned. apply (eval_testcond_compare_slong Cne); auto.
destruct r; reflexivity || discriminate.
-+ exploit (exec_loadimm64 X16 n). intros (rs' & A & B & C).
++ exploit (exec_loadimm64 X16 n). congruence. intros (rs' & A & B & C).
econstructor; split.
eapply exec_straight_trans. eexact A.
apply exec_straight_one. simpl. rewrite B, C by eauto with asmgen. eauto. auto.
- split; intros. apply (eval_testcond_compare_slong Cne); auto.
+ repeat split; intros. apply (eval_testcond_compare_slong Cne); auto.
transitivity (rs' r). destruct r; reflexivity || discriminate. auto with asmgen.
+ Simpl. rewrite compare_long_RA.
+ apply C; congruence.
+
- (* Ccompf *)
econstructor; split. apply exec_straight_one. simpl; eauto.
rewrite compare_float_inv; auto.
- split; intros. apply eval_testcond_compare_float; auto.
+ repeat split; intros. apply eval_testcond_compare_float; auto.
destruct r; discriminate || rewrite compare_float_inv; auto.
+ Simpl.
- (* Cnotcompf *)
econstructor; split. apply exec_straight_one. simpl; eauto.
rewrite compare_float_inv; auto.
- split; intros. apply eval_testcond_compare_not_float; auto.
+ repeat split; intros. apply eval_testcond_compare_not_float; auto.
destruct r; discriminate || rewrite compare_float_inv; auto.
+ Simpl.
- (* Ccompfzero *)
econstructor; split. apply exec_straight_one. simpl; eauto.
rewrite compare_float_inv; auto.
- split; intros. apply eval_testcond_compare_float; auto.
+ repeat split; intros. apply eval_testcond_compare_float; auto.
destruct r; discriminate || rewrite compare_float_inv; auto.
+ Simpl.
- (* Cnotcompfzero *)
econstructor; split. apply exec_straight_one. simpl; eauto.
rewrite compare_float_inv; auto.
- split; intros. apply eval_testcond_compare_not_float; auto.
+ repeat split; intros. apply eval_testcond_compare_not_float; auto.
destruct r; discriminate || rewrite compare_float_inv; auto.
+ Simpl.
- (* Ccompfs *)
econstructor; split. apply exec_straight_one. simpl; eauto.
rewrite compare_single_inv; auto.
- split; intros. apply eval_testcond_compare_single; auto.
+ repeat split; intros. apply eval_testcond_compare_single; auto.
destruct r; discriminate || rewrite compare_single_inv; auto.
+ Simpl.
- (* Cnotcompfs *)
econstructor; split. apply exec_straight_one. simpl; eauto.
rewrite compare_single_inv; auto.
- split; intros. apply eval_testcond_compare_not_single; auto.
+ repeat split; intros. apply eval_testcond_compare_not_single; auto.
destruct r; discriminate || rewrite compare_single_inv; auto.
+ Simpl.
- (* Ccompfszero *)
econstructor; split. apply exec_straight_one. simpl; eauto.
rewrite compare_single_inv; auto.
- split; intros. apply eval_testcond_compare_single; auto.
+ repeat split; intros. apply eval_testcond_compare_single; auto.
destruct r; discriminate || rewrite compare_single_inv; auto.
+ Simpl.
- (* Cnotcompfszero *)
econstructor; split. apply exec_straight_one. simpl; eauto.
rewrite compare_single_inv; auto.
- split; intros. apply eval_testcond_compare_not_single; auto.
+ repeat split; intros. apply eval_testcond_compare_not_single; auto.
destruct r; discriminate || rewrite compare_single_inv; auto.
+ Simpl.
Qed.
(** Translation of conditional branches *)
@@ -1262,7 +1453,8 @@ Lemma transl_cond_branch_correct:
exec_straight_opt ge fn c rs m (insn :: k) rs' m
/\ exec_instr ge fn insn rs' m =
(if b then goto_label fn lbl rs' m else Next (nextinstr rs') m)
- /\ forall r, data_preg r = true -> rs'#r = rs#r.
+ /\ (forall r, data_preg r = true -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA.
Proof.
intros until b; intros TR EV.
assert (DFL:
@@ -1271,13 +1463,14 @@ Proof.
exec_straight_opt ge fn c rs m (insn :: k) rs' m
/\ exec_instr ge fn insn rs' m =
(if b then goto_label fn lbl rs' m else Next (nextinstr rs') m)
- /\ forall r, data_preg r = true -> rs'#r = rs#r).
+ /\ (forall r, data_preg r = true -> rs'#r = rs#r)
+ /\ rs' # RA = rs # RA ).
{
unfold transl_cond_branch_default; intros.
- exploit transl_cond_correct; eauto. intros (rs' & A & B & C).
+ exploit transl_cond_correct; eauto. intros (rs' & A & B & C & D).
exists rs', (Pbc (cond_for_cond cond) lbl); split.
apply exec_straight_opt_intro. eexact A.
- split; auto. simpl. rewrite (B b) by auto. auto.
+ repeat split; auto. simpl. rewrite (B b) by auto. auto.
}
Local Opaque transl_cond transl_cond_branch_default.
destruct args as [ | a1 args]; simpl in TR; auto.
@@ -1371,13 +1564,15 @@ Ltac TranslOpSimpl :=
[ apply exec_straight_one; [simpl; eauto | reflexivity]
| split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl;
apply Val.lessdef_same; Simpl; fail
- | intros; Simpl; fail ] ].
+ | split; [ intros; Simpl; fail
+ | intros; Simpl; eauto with asmgen; fail] ]].
Ltac TranslOpBase :=
econstructor; split;
[ apply exec_straight_one; [simpl; eauto | reflexivity]
| split; [ rewrite ? transl_eval_shift, ? transl_eval_shiftl; Simpl
- | intros; Simpl; fail ] ].
+ | split; [ intros; Simpl; fail
+ | intros; Simpl; eapply RA_not_written2; eauto] ]].
Lemma transl_op_correct:
forall op args res k (rs: regset) m v c,
@@ -1386,21 +1581,29 @@ Lemma transl_op_correct:
exists rs',
exec_straight ge fn c rs m k rs' m
/\ Val.lessdef v rs'#(preg_of res)
- /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
+ /\ (forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r)
+ /\ rs' RA = rs RA.
Proof.
Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize.
intros until c; intros TR EV.
unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; SimplEval EV; try TranslOpSimpl.
- (* move *)
destruct (preg_of res) eqn:RR; try discriminate; destruct (preg_of m0) eqn:R1; inv TR.
-+ TranslOpSimpl.
-+ TranslOpSimpl.
+ all: TranslOpSimpl.
- (* intconst *)
- exploit exec_loadimm32. intros (rs' & A & B & C).
- exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen.
+ exploit exec_loadimm32. apply (ireg_of_not_RA res); eassumption.
+ intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto.
+ split. intros; auto with asmgen.
+ apply C. congruence.
+ eapply ireg_of_not_RA''; eauto.
- (* longconst *)
- exploit exec_loadimm64. intros (rs' & A & B & C).
- exists rs'; split. eexact A. split. rewrite B; auto. intros; auto with asmgen.
+ exploit exec_loadimm64. apply (ireg_of_not_RA res); eassumption.
+ intros (rs' & A & B & C).
+ exists rs'; split. eexact A. split. rewrite B; auto.
+ split. intros; auto with asmgen.
+ apply C. congruence.
+ eapply ireg_of_not_RA''; eauto.
- (* floatconst *)
destruct (Float.eq_dec n Float.zero).
+ subst n. TranslOpSimpl.
@@ -1410,11 +1613,15 @@ Local Opaque Int.eq Int64.eq Val.add Val.addl Int.zwordsize Int64.zwordsize.
+ subst n. TranslOpSimpl.
+ TranslOpSimpl.
- (* loadsymbol *)
- exploit (exec_loadsymbol x id ofs). eauto with asmgen. intros (rs' & A & B & C).
- exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ exploit (exec_loadsymbol x id ofs). eauto with asmgen.
+ apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
+ exists rs'; split. eexact A. split. rewrite B; auto.
+ split; auto.
- (* addrstack *)
exploit (exec_addimm64 x XSP (Ptrofs.to_int64 ofs)). simpl; eauto with asmgen.
- intros (rs' & A & B & C).
+ apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
exists rs'; split. eexact A. split. simpl in B; rewrite B.
Local Transparent Val.addl.
destruct (rs SP); simpl; auto. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
@@ -1422,7 +1629,8 @@ Local Transparent Val.addl.
- (* shift *)
rewrite <- transl_eval_shift'. TranslOpSimpl.
- (* addimm *)
- exploit (exec_addimm32 x x0 n). eauto with asmgen. intros (rs' & A & B & C).
+ exploit (exec_addimm32 x x0 n). eauto with asmgen. eapply ireg_of_not_RA''; eassumption.
+ intros (rs' & A & B & C & D).
exists rs'; split. eexact A. split. rewrite B; auto. auto.
- (* mul *)
TranslOpBase.
@@ -1430,18 +1638,20 @@ Local Transparent Val.add.
destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int.add_zero_l; auto.
- (* andimm *)
exploit (exec_logicalimm32 (Pandimm W) (Pand W)).
- intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
- intros (rs' & A & B & C).
- exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
+ exists rs'; split. eexact A. split. rewrite B; auto.
+ split; auto.
- (* orimm *)
exploit (exec_logicalimm32 (Porrimm W) (Porr W)).
- intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
- intros (rs' & A & B & C).
- exists rs'; split. eexact A. split. rewrite B; auto. auto.
+ intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
+ exists rs'; split. eexact A. split. rewrite B; auto.
+ split; auto.
- (* xorimm *)
exploit (exec_logicalimm32 (Peorimm W) (Peor W)).
- intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
- intros (rs' & A & B & C).
+ intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
exists rs'; split. eexact A. split. rewrite B; auto. auto.
- (* not *)
TranslOpBase.
@@ -1450,8 +1660,10 @@ Local Transparent Val.add.
TranslOpBase.
destruct (eval_shift s (rs x0) a); auto. simpl. rewrite Int.or_zero_l; auto.
- (* shrx *)
- exploit (exec_shrx32 x x0 n); eauto with asmgen. intros (rs' & A & B & C).
- econstructor; split. eexact A. split. rewrite B; auto. auto.
+ exploit (exec_shrx32 x x0 n); eauto with asmgen. apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
+ econstructor; split. eexact A. split. rewrite B; auto.
+ split; auto.
- (* zero-ext *)
TranslOpBase.
destruct (rs x0); auto; simpl. rewrite Int.shl_zero. auto.
@@ -1475,36 +1687,47 @@ Local Transparent Val.add.
- (* extend *)
exploit (exec_move_extended x0 x1 x a k). intros (rs' & A & B & C).
econstructor; split. eexact A.
- split. rewrite B; auto. eauto with asmgen.
+ split. rewrite B; auto.
+ split; eauto with asmgen.
- (* addext *)
exploit (exec_arith_extended Val.addl Paddext (Padd X)).
- auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C).
- econstructor; split. eexact A. split. rewrite B; auto. auto.
+ auto. auto. instantiate (1 := x1). eauto with asmgen.
+ apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
+ econstructor; split. eexact A. split. rewrite B; auto.
+ split; auto.
- (* addlimm *)
exploit (exec_addimm64 x x0 n). simpl. generalize (ireg_of_not_X16 _ _ EQ1). congruence.
- intros (rs' & A & B & C).
+ apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
exists rs'; split. eexact A. split. simpl in B; rewrite B; auto. auto.
- (* subext *)
exploit (exec_arith_extended Val.subl Psubext (Psub X)).
- auto. auto. instantiate (1 := x1). eauto with asmgen. intros (rs' & A & B & C).
- econstructor; split. eexact A. split. rewrite B; auto. auto.
+ auto. auto. instantiate (1 := x1). eauto with asmgen.
+ apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
+ econstructor; split. eexact A. split. rewrite B; auto.
+ split; auto.
- (* mull *)
TranslOpBase.
destruct (rs x0); auto; destruct (rs x1); auto. simpl. rewrite Int64.add_zero_l; auto.
- (* andlimm *)
exploit (exec_logicalimm64 (Pandimm X) (Pand X)).
intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
- intros (rs' & A & B & C).
+ apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
exists rs'; split. eexact A. split. rewrite B; auto. auto.
- (* orlimm *)
exploit (exec_logicalimm64 (Porrimm X) (Porr X)).
intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
- intros (rs' & A & B & C).
+ apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
exists rs'; split. eexact A. split. rewrite B; auto. auto.
- (* xorlimm *)
exploit (exec_logicalimm64 (Peorimm X) (Peor X)).
intros; reflexivity. intros; reflexivity. instantiate (1 := x0). eauto with asmgen.
- intros (rs' & A & B & C).
+ apply (ireg_of_not_RA'' res); eassumption.
+ intros (rs' & A & B & C & D).
exists rs'; split. eexact A. split. rewrite B; auto. auto.
- (* notl *)
TranslOpBase.
@@ -1513,7 +1736,8 @@ Local Transparent Val.add.
TranslOpBase.
destruct (eval_shiftl s (rs x0) a); auto. simpl. rewrite Int64.or_zero_l; auto.
- (* shrx *)
- exploit (exec_shrx64 x x0 n); eauto with asmgen. intros (rs' & A & B & C).
+ exploit (exec_shrx64 x x0 n); eauto with asmgen.
+ apply (ireg_of_not_RA'' res); eassumption. intros (rs' & A & B & C & D ).
econstructor; split. eexact A. split. rewrite B; auto. auto.
- (* zero-ext-l *)
TranslOpBase.
@@ -1534,35 +1758,37 @@ Local Transparent Val.add.
TranslOpBase.
destruct (rs x0); simpl; auto. rewrite ! a64_range; simpl. rewrite <- Int64.sign_ext_shr'_min; auto using a64_range.
- (* condition *)
- exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+ exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D).
econstructor; split.
eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
rewrite (B b) by auto. auto.
auto.
- intros; Simpl.
+ split; intros; Simpl.
- (* select *)
destruct (preg_of res) eqn:RES; monadInv TR.
+ (* integer *)
generalize (ireg_of_eq _ _ EQ) (ireg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
- exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+ exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D).
econstructor; split.
eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
auto.
- intros; Simpl.
+ split; intros; Simpl.
+ rewrite <- D.
+ eapply RA_not_written2; eassumption.
+ (* FP *)
generalize (freg_of_eq _ _ EQ) (freg_of_eq _ _ EQ1); intros E1 E2; rewrite E1, E2.
- exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C).
+ exploit (transl_cond_correct cond args); eauto. intros (rs' & A & B & C & D).
econstructor; split.
eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
split. Simpl. destruct (eval_condition cond (map rs (map preg_of args)) m) as [b|]; simpl in *.
rewrite (B b) by auto. rewrite !C. apply Val.lessdef_normalize.
rewrite <- E2; auto with asmgen. rewrite <- E1; auto with asmgen.
auto.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
(** Translation of addressing modes, loads, stores *)
@@ -1574,7 +1800,8 @@ Lemma transl_addressing_correct:
exists ad rs',
exec_straight_opt ge fn c rs m (insn ad :: k) rs' m
/\ Asm.eval_addressing ge ad rs' = Vptr b o
- /\ forall r, data_preg r = true -> rs' r = rs r.
+ /\ (forall r, data_preg r = true -> rs' r = rs r)
+ /\ rs' # RA = rs # RA.
Proof.
intros until o; intros TR EV.
unfold transl_addressing in TR; destruct addr; ArgsInv; SimplEval EV.
@@ -1582,10 +1809,10 @@ Proof.
destruct (offset_representable sz ofs); inv EQ0.
+ econstructor; econstructor; split. apply exec_straight_opt_refl.
auto.
-+ exploit (exec_loadimm64 X16 ofs). intros (rs' & A & B & C).
++ exploit (exec_loadimm64 X16 ofs). congruence. intros (rs' & A & B & C).
econstructor; exists rs'; split. apply exec_straight_opt_intro; eexact A.
split. simpl. rewrite B, C by eauto with asmgen. auto.
- eauto with asmgen.
+ split; eauto with asmgen.
- (* Aindexed2 *)
econstructor; econstructor; split. apply exec_straight_opt_refl.
auto.
@@ -1601,33 +1828,38 @@ Proof.
+ econstructor; econstructor; split.
apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto.
split. simpl. Simpl. rewrite H0. simpl. rewrite Ptrofs.add_zero. auto.
- intros; Simpl.
+ split; intros; Simpl.
- (* Aindexed2ext *)
destruct (Int.eq a Int.zero || Int.eq (Int.shl Int.one a) (Int.repr sz)); inv EQ2.
+ econstructor; econstructor; split. apply exec_straight_opt_refl.
split; auto. destruct x; auto.
+ exploit (exec_arith_extended Val.addl Paddext (Padd X)); auto.
instantiate (1 := x0). eauto with asmgen.
- intros (rs' & A & B & C).
+ instantiate (1 := X16). simpl. congruence.
+ intros (rs' & A & B & C & D).
econstructor; exists rs'; split.
apply exec_straight_opt_intro. eexact A.
split. simpl. rewrite B. rewrite Val.addl_assoc. f_equal.
unfold Op.eval_extend; destruct x, (rs x1); simpl; auto; rewrite ! a64_range;
simpl; rewrite Int64.add_zero; auto.
- intros. apply C; eauto with asmgen.
+ split; intros.
+ apply C; eauto with asmgen.
+ trivial.
- (* Aglobal *)
destruct (Ptrofs.eq (Ptrofs.modu ofs (Ptrofs.repr sz)) Ptrofs.zero && symbol_is_aligned id sz); inv TR.
+ econstructor; econstructor; split.
apply exec_straight_opt_intro. apply exec_straight_one. simpl; eauto. auto.
split. simpl. Simpl. rewrite symbol_high_low. simpl in EV. congruence.
- intros; Simpl.
-+ exploit (exec_loadsymbol X16 id ofs). auto. intros (rs' & A & B & C).
+ split; intros; Simpl.
++ exploit (exec_loadsymbol X16 id ofs). auto.
+ simpl. congruence.
+ intros (rs' & A & B & C & D).
econstructor; exists rs'; split.
apply exec_straight_opt_intro. eexact A.
split. simpl.
rewrite B. rewrite <- Genv.shift_symbol_address_64, Ptrofs.add_zero by auto.
simpl in EV. congruence.
- auto with asmgen.
+ split; auto with asmgen.
- (* Ainstrack *)
assert (E: Val.addl (rs SP) (Vlong (Ptrofs.to_int64 ofs)) = Vptr b o).
{ simpl in EV. inv EV. destruct (rs SP); simpl in H1; inv H1. simpl.
@@ -1635,7 +1867,9 @@ Proof.
destruct (offset_representable sz (Ptrofs.to_int64 ofs)); inv TR.
+ econstructor; econstructor; split. apply exec_straight_opt_refl.
auto.
-+ exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)). intros (rs' & A & B & C).
++ exploit (exec_loadimm64 X16 (Ptrofs.to_int64 ofs)).
+ simpl. congruence.
+ intros (rs' & A & B & C).
econstructor; exists rs'; split.
apply exec_straight_opt_intro. eexact A.
split. simpl. rewrite B, C by eauto with asmgen. auto.
@@ -1650,7 +1884,8 @@ Lemma transl_load_correct:
exists rs',
exec_straight ge fn c rs m k rs' m
/\ rs'#(preg_of dst) = v
- /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r.
+ /\ (forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r)
+ /\ rs' # RA = rs # RA.
Proof.
intros. destruct vaddr; try discriminate.
assert (A: exists sz insn,
@@ -1663,14 +1898,17 @@ Proof.
do 2 econstructor; (split; [eassumption|auto]).
}
destruct A as (sz & insn & B & C).
- exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R).
+ exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R & S).
assert (X: exec_load ge chunk (fun v => v) ad (preg_of dst) rs' m =
Next (nextinstr (rs'#(preg_of dst) <- v)) m).
{ unfold exec_load. rewrite Q, H1. auto. }
econstructor; split.
eapply exec_straight_opt_right. eexact P.
apply exec_straight_one. rewrite C, X; eauto. Simpl.
- split. Simpl. intros; Simpl.
+ split. Simpl.
+ split; intros; Simpl.
+ rewrite <- S.
+ apply RA_not_written.
Qed.
Lemma transl_store_correct:
@@ -1680,7 +1918,8 @@ Lemma transl_store_correct:
Mem.storev chunk m vaddr rs#(preg_of src) = Some m' ->
exists rs',
exec_straight ge fn c rs m k rs' m'
- /\ forall r, data_preg r = true -> rs' r = rs r.
+ /\ (forall r, data_preg r = true -> rs' r = rs r)
+ /\ rs' # RA = rs # RA.
Proof.
intros. destruct vaddr; try discriminate.
set (chunk' := match chunk with Mint8signed => Mint8unsigned
@@ -1696,7 +1935,7 @@ Proof.
do 2 econstructor; (split; [eassumption|auto]).
}
destruct A as (sz & insn & B & C).
- exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R).
+ exploit transl_addressing_correct. eexact B. eexact H0. intros (ad & rs' & P & Q & R & S).
assert (X: Mem.storev chunk' m (Vptr b i) rs#(preg_of src) = Some m').
{ rewrite <- H1. unfold chunk'. destruct chunk; auto; simpl; symmetry.
apply Mem.store_signed_unsigned_8.
@@ -1707,7 +1946,7 @@ Proof.
econstructor; split.
eapply exec_straight_opt_right. eexact P.
apply exec_straight_one. rewrite C, Y; eauto. Simpl.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
(** Translation of indexed memory accesses *)
@@ -1725,7 +1964,9 @@ Proof.
{ destruct (rs base); try discriminate. simpl in *. rewrite Ptrofs.of_int64_to_int64 by auto. auto. }
destruct offset_representable.
- econstructor; econstructor; split. apply exec_straight_opt_refl. auto.
-- exploit (exec_loadimm64 X16); eauto. intros (rs' & A & B & C).
+- exploit (exec_loadimm64 X16); eauto.
+ simpl. congruence.
+ intros (rs' & A & B & C).
econstructor; econstructor; split. apply exec_straight_opt_intro; eexact A.
split. simpl. rewrite B, C by eauto with asmgen. auto. auto.
Qed.
@@ -1736,7 +1977,7 @@ Lemma loadptr_correct: forall (base: iregsp) ofs dst k m v (rs: regset),
exists rs',
exec_straight ge fn (loadptr base ofs dst k) rs m k rs' m
/\ rs'#dst = v
- /\ forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r.
+ /\ (forall r, r <> PC -> r <> X16 -> r <> dst -> rs' r = rs r).
Proof.
intros.
destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
@@ -1744,7 +1985,8 @@ Proof.
econstructor; split.
eapply exec_straight_opt_right. eexact A.
apply exec_straight_one. simpl. unfold exec_load. rewrite B, H. eauto. auto.
- split. Simpl. intros; Simpl.
+ split. Simpl.
+ intros; Simpl.
Qed.
Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset),
@@ -1753,7 +1995,8 @@ Lemma storeptr_correct: forall (base: iregsp) ofs (src: ireg) k m m' (rs: regset
src <> X16 ->
exists rs',
exec_straight ge fn (storeptr src base ofs k) rs m k rs' m'
- /\ forall r, r <> PC -> r <> X16 -> rs' r = rs r.
+ /\ (forall r, r <> PC -> r <> X16 -> rs' r = rs r)
+ /\ rs' RA = rs RA.
Proof.
intros.
destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
@@ -1761,7 +2004,7 @@ Proof.
econstructor; split.
eapply exec_straight_opt_right. eexact A.
apply exec_straight_one. simpl. unfold exec_store. rewrite B, C, H by eauto with asmgen. eauto. auto.
- intros; Simpl.
+ split; intros; Simpl.
Qed.
Lemma loadind_correct: forall (base: iregsp) ofs ty dst k c (rs: regset) m v,
@@ -1771,7 +2014,8 @@ Lemma loadind_correct: forall (base: iregsp) ofs ty dst k c (rs: regset) m v,
exists rs',
exec_straight ge fn c rs m k rs' m
/\ rs'#(preg_of dst) = v
- /\ forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r.
+ /\ (forall r, data_preg r = true -> r <> preg_of dst -> rs' r = rs r)
+ /\ rs' RA = rs RA.
Proof.
intros.
destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
@@ -1787,7 +2031,10 @@ Proof.
econstructor; split.
eapply exec_straight_opt_right. eexact A.
apply exec_straight_one. rewrite SEM. unfold exec_load. rewrite B, H0. eauto. Simpl.
- split. Simpl. intros; Simpl.
+ split. Simpl.
+ split. intros; Simpl.
+ Simpl. rewrite RA_not_written.
+ apply C; congruence.
Qed.
Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m',
@@ -1796,7 +2043,8 @@ Lemma storeind_correct: forall (base: iregsp) ofs ty src k c (rs: regset) m m',
preg_of_iregsp base <> IR X16 ->
exists rs',
exec_straight ge fn c rs m k rs' m'
- /\ forall r, data_preg r = true -> rs' r = rs r.
+ /\ (forall r, data_preg r = true -> rs' r = rs r)
+ /\ rs' RA = rs RA.
Proof.
intros.
destruct (Val.offset_ptr rs#base ofs) eqn:V; try discriminate.
@@ -1814,13 +2062,15 @@ Proof.
apply exec_straight_one. rewrite SEM.
unfold exec_store. rewrite B, C, H0 by eauto with asmgen. eauto.
Simpl.
- intros; Simpl.
+ split. intros; Simpl.
+ Simpl.
Qed.
Lemma make_epilogue_correct:
forall ge0 f m stk soff cs m' ms rs k tm,
+ (is_leaf_function f = true -> rs # (IR RA) = parent_ra cs) ->
load_stack m (Vptr stk soff) Tptr f.(fn_link_ofs) = Some (parent_sp cs) ->
- load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs) ->
+ ((* FIXME is_leaf_function f = false -> *) load_stack m (Vptr stk soff) Tptr f.(fn_retaddr_ofs) = Some (parent_ra cs)) ->
Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
agree ms (Vptr stk soff) rs ->
Mem.extends m tm ->
@@ -1831,18 +2081,46 @@ Lemma make_epilogue_correct:
/\ Mem.extends m' tm'
/\ rs'#RA = parent_ra cs
/\ rs'#SP = parent_sp cs
- /\ (forall r, r <> PC -> r <> SP -> r <> X30 -> r <> X16 -> rs'#r = rs#r).
+ /\ (forall r, r <> PC -> r <> SP -> r <> RA -> r <> X16 -> rs'#r = rs#r).
Proof.
- intros until tm; intros LP LRA FREE AG MEXT MCS.
+ intros until tm; intros LEAF_RA LP LRA FREE AG MEXT MCS.
+
+ (* FIXME
+ Cannot be used at this point
+ destruct (is_leaf_function f) eqn:IS_LEAF.
+ {
+ exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
+ exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
+ exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
+ unfold make_epilogue.
+ rewrite IS_LEAF.
+
+ econstructor; econstructor; split.
+ apply exec_straight_one. simpl.
+ rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'.
+ rewrite FREE'. eauto. auto.
+ split. apply agree_nextinstr. apply agree_set_other; auto.
+ apply agree_change_sp with (Vptr stk soff).
+ apply agree_exten with rs; auto.
+ eapply parent_sp_def; eauto.
+ split. auto.
+ split. Simpl.
+ split. Simpl.
+ intros. Simpl.
+ }
+ lapply LRA. 2: reflexivity.
+ clear LRA. intro LRA. *)
exploit Mem.loadv_extends. eauto. eexact LP. auto. simpl. intros (parent' & LP' & LDP').
exploit Mem.loadv_extends. eauto. eexact LRA. auto. simpl. intros (ra' & LRA' & LDRA').
exploit lessdef_parent_sp; eauto. intros EQ; subst parent'; clear LDP'.
exploit lessdef_parent_ra; eauto. intros EQ; subst ra'; clear LDRA'.
exploit Mem.free_parallel_extends; eauto. intros (tm' & FREE' & MEXT').
- unfold make_epilogue.
+ unfold make_epilogue.
+ (* FIXME rewrite IS_LEAF. *)
exploit (loadptr_correct XSP (fn_retaddr_ofs f)).
instantiate (2 := rs). simpl. rewrite <- (sp_val _ _ _ AG). simpl. eexact LRA'. simpl; congruence.
intros (rs1 & A1 & B1 & C1).
+
econstructor; econstructor; split.
eapply exec_straight_trans. eexact A1. apply exec_straight_one. simpl.
simpl; rewrite (C1 SP) by auto with asmgen. rewrite <- (sp_val _ _ _ AG). simpl; rewrite LP'.
@@ -1857,4 +2135,4 @@ Proof.
intros. Simpl.
Qed.
-End CONSTRUCTORS. \ No newline at end of file
+End CONSTRUCTORS.
diff --git a/aarch64/CSE2deps.v b/aarch64/CSE2deps.v
new file mode 100644
index 00000000..90b514a2
--- /dev/null
+++ b/aarch64/CSE2deps.v
@@ -0,0 +1,20 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+ (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+ && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+ && ((ofsw + size_chunk chunkw <=? ofsr) ||
+ (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+ match addr, addr', args, args' with
+ | (Aindexed ofs), (Aindexed ofs'),
+ (base :: nil), (base' :: nil) =>
+ if peq base base'
+ then negb (can_swap_accesses_ofs (Int64.unsigned ofs') chunk' (Int64.unsigned ofs) chunk)
+ else true | _, _, _, _ => true
+ end.
diff --git a/aarch64/CSE2depsproof.v b/aarch64/CSE2depsproof.v
new file mode 100644
index 00000000..4aac23af
--- /dev/null
+++ b/aarch64/CSE2depsproof.v
@@ -0,0 +1,128 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+ Ptrofs.wordsize = 64%nat.
+Proof.
+ unfold Ptrofs.wordsize.
+ unfold Wordsize_Ptrofs.wordsize.
+ trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+ Ptrofs.modulus = 18446744073709551616.
+Proof.
+ reflexivity.
+Qed.
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Section MEMORY_WRITE.
+ Variable m m2 : mem.
+ Variable chunkw chunkr : memory_chunk.
+ Variable base : val.
+
+ Variable addrw addrr valw : val.
+ Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+ Section INDEXED_AWAY.
+ Variable ofsw ofsr : int64.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Aindexed ofsw) (base :: nil) = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Aindexed ofsr) (base :: nil) = Some addrr.
+
+ Lemma load_store_away1 :
+ forall RANGEW : 0 <= Int64.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= Int64.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : Int64.unsigned ofsw + size_chunk chunkw <= Int64.unsigned ofsr
+ \/ Int64.unsigned ofsr + size_chunk chunkr <= Int64.unsigned ofsw,
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ unfold largest_size_chunk in *.
+
+ rewrite ptrofs_modulus in *.
+ simpl in *.
+ inv ADDRR.
+ inv ADDRW.
+
+ destruct base; try discriminate.
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+
+ all: unfold Ptrofs.of_int64.
+
+ all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsr))) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.repr (Int64.unsigned ofsw))) as [OFSW | OFSW];
+ rewrite OFSW).
+ all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 18446744073709551615; lia).
+
+ all: try rewrite ptrofs_modulus in *.
+
+ all: intuition lia.
+ Qed.
+
+ Theorem load_store_away :
+ can_swap_accesses_ofs (Int64.unsigned ofsr) chunkr (Int64.unsigned ofsw) chunkw = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply load_store_away1.
+ all: tauto.
+ Qed.
+ End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Lemma may_overlap_sound:
+ forall m m' : mem,
+ forall chunk addr args chunk' addr' args' v a a' rs,
+ (eval_addressing genv sp addr (rs ## args)) = Some a ->
+ (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+ (may_overlap chunk addr args chunk' addr' args') = false ->
+ (Mem.storev chunk m a v) = Some m' ->
+ (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a').
+Proof.
+ intros until rs.
+ intros ADDR ADDR' OVERLAP STORE.
+ destruct addr; destruct addr'; try discriminate.
+ { (* Aindexed / Aindexed *)
+ destruct args as [ | base [ | ]]. 1,3: discriminate.
+ destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+ simpl in OVERLAP.
+ destruct (peq base base'). 2: discriminate.
+ subst base'.
+ destruct (can_swap_accesses_ofs (Int64.unsigned ofs0) chunk' (Int64.unsigned ofs) chunk) eqn:SWAP.
+ 2: discriminate.
+ simpl in *.
+ eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+ }
+Qed.
+
+End SOUNDNESS.
diff --git a/aarch64/Conventions1.v b/aarch64/Conventions1.v
index 14cb199f..efda835d 100644
--- a/aarch64/Conventions1.v
+++ b/aarch64/Conventions1.v
@@ -190,27 +190,6 @@ Fixpoint loc_arguments_rec
Definition loc_arguments (s: signature) : list (rpair loc) :=
loc_arguments_rec s.(sig_args) 0 0 0.
-(** [size_arguments s] returns the number of [Outgoing] slots used
- to call a function with signature [s]. *)
-
-Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
- match tyl with
- | nil => ofs
- | (Tint | Tlong | Tany32 | Tany64) :: tys =>
- match list_nth_z int_param_regs ir with
- | None => size_arguments_rec tys ir fr (ofs + 2)
- | Some ireg => size_arguments_rec tys (ir + 1) fr ofs
- end
- | (Tfloat | Tsingle) :: tys =>
- match list_nth_z float_param_regs fr with
- | None => size_arguments_rec tys ir fr (ofs + 2)
- | Some freg => size_arguments_rec tys ir (fr + 1) ofs
- end
- end.
-
-Definition size_arguments (s: signature) : Z :=
- size_arguments_rec s.(sig_args) 0 0 0.
-
(** Argument locations are either caller-save registers or [Outgoing]
stack slots at nonnegative offsets. *)
@@ -285,92 +264,6 @@ Qed.
Hint Resolve loc_arguments_acceptable: locs.
-(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
-
-Remark size_arguments_rec_above:
- forall tyl ir fr ofs0,
- ofs0 <= size_arguments_rec tyl ir fr ofs0.
-Proof.
- induction tyl; simpl; intros.
- omega.
- assert (A: ofs0 <=
- match list_nth_z int_param_regs ir with
- | Some _ => size_arguments_rec tyl (ir + 1) fr ofs0
- | None => size_arguments_rec tyl ir fr (ofs0 + 2)
- end).
- { destruct (list_nth_z int_param_regs ir); eauto.
- apply Z.le_trans with (ofs0 + 2); auto. omega. }
- assert (B: ofs0 <=
- match list_nth_z float_param_regs fr with
- | Some _ => size_arguments_rec tyl ir (fr + 1) ofs0
- | None => size_arguments_rec tyl ir fr (ofs0 + 2)
- end).
- { destruct (list_nth_z float_param_regs fr); eauto.
- apply Z.le_trans with (ofs0 + 2); auto. omega. }
- destruct a; auto.
-Qed.
-
-Lemma size_arguments_above:
- forall s, size_arguments s >= 0.
-Proof.
- intros; unfold size_arguments. apply Z.le_ge. apply size_arguments_rec_above.
-Qed.
-
-Lemma loc_arguments_rec_bounded:
- forall ofs ty tyl ir fr ofs0,
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_rec tyl ir fr ofs0)) ->
- ofs + typesize ty <= size_arguments_rec tyl ir fr ofs0.
-Proof.
- induction tyl; simpl; intros.
-- contradiction.
-- assert (T: forall ty0, typesize ty0 <= 2).
- { destruct ty0; simpl; omega. }
- assert (A: forall ty0,
- In (S Outgoing ofs ty) (regs_of_rpairs
- match list_nth_z int_param_regs ir with
- | Some ireg =>
- One (R ireg) :: loc_arguments_rec tyl (ir + 1) fr ofs0
- | None => One (S Outgoing ofs0 ty0) :: loc_arguments_rec tyl ir fr (ofs0 + 2)
- end) ->
- ofs + typesize ty <=
- match list_nth_z int_param_regs ir with
- | Some _ => size_arguments_rec tyl (ir + 1) fr ofs0
- | None => size_arguments_rec tyl ir fr (ofs0 + 2)
- end).
- { intros. destruct (list_nth_z int_param_regs ir); simpl in H0; destruct H0.
- - discriminate.
- - eapply IHtyl; eauto.
- - inv H0. apply Z.le_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_rec_above.
- - eapply IHtyl; eauto. }
- assert (B: forall ty0,
- In (S Outgoing ofs ty) (regs_of_rpairs
- match list_nth_z float_param_regs fr with
- | Some ireg =>
- One (R ireg) :: loc_arguments_rec tyl ir (fr + 1) ofs0
- | None => One (S Outgoing ofs0 ty0) :: loc_arguments_rec tyl ir fr (ofs0 + 2)
- end) ->
- ofs + typesize ty <=
- match list_nth_z float_param_regs fr with
- | Some _ => size_arguments_rec tyl ir (fr + 1) ofs0
- | None => size_arguments_rec tyl ir fr (ofs0 + 2)
- end).
- { intros. destruct (list_nth_z float_param_regs fr); simpl in H0; destruct H0.
- - discriminate.
- - eapply IHtyl; eauto.
- - inv H0. apply Z.le_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_rec_above.
- - eapply IHtyl; eauto. }
- destruct a; eauto.
-Qed.
-
-Lemma loc_arguments_bounded:
- forall (s: signature) (ofs: Z) (ty: typ),
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
- ofs + typesize ty <= size_arguments s.
-Proof.
- unfold loc_arguments, size_arguments; intros.
- eauto using loc_arguments_rec_bounded.
-Qed.
-
Lemma loc_arguments_main:
loc_arguments signature_main = nil.
Proof.
diff --git a/aarch64/DuplicateOpcodeHeuristic.ml b/aarch64/DuplicateOpcodeHeuristic.ml
index 85505245..5fc2156c 100644
--- a/aarch64/DuplicateOpcodeHeuristic.ml
+++ b/aarch64/DuplicateOpcodeHeuristic.ml
@@ -1,3 +1,27 @@
-exception HeuristicSucceeded
-
-let opcode_heuristic code cond ifso ifnot preferred = ()
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+ match cond with
+ | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccompf c | Ccompfs c -> (match c with
+ | Ceq -> Some false
+ | Cne -> Some true
+ | _ -> None
+ )
+ | Cnotcompf c | Cnotcompfs c -> (match c with
+ | Ceq -> Some true
+ | Cne -> Some false
+ | _ -> None
+ )
+ | _ -> None
+
diff --git a/arm/CSE2deps.v b/arm/CSE2deps.v
new file mode 100644
index 00000000..9db51bbb
--- /dev/null
+++ b/arm/CSE2deps.v
@@ -0,0 +1,20 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+ (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+ && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+ && ((ofsw + size_chunk chunkw <=? ofsr) ||
+ (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+ match addr, addr', args, args' with
+ | (Aindexed ofs), (Aindexed ofs'),
+ (base :: nil), (base' :: nil) =>
+ if peq base base'
+ then negb (can_swap_accesses_ofs (Int.unsigned ofs') chunk' (Int.unsigned ofs) chunk)
+ else true | _, _, _, _ => true
+ end.
diff --git a/arm/CSE2depsproof.v b/arm/CSE2depsproof.v
new file mode 100644
index 00000000..61fe5980
--- /dev/null
+++ b/arm/CSE2depsproof.v
@@ -0,0 +1,129 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+ Ptrofs.wordsize = 32%nat.
+Proof.
+ unfold Ptrofs.wordsize.
+ unfold Wordsize_Ptrofs.wordsize.
+ trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+ Ptrofs.modulus = 4294967296.
+Proof.
+ unfold Ptrofs.modulus.
+ rewrite ptrofs_size.
+ destruct Archi.ptr64; reflexivity.
+Qed.
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Section MEMORY_WRITE.
+ Variable m m2 : mem.
+ Variable chunkw chunkr : memory_chunk.
+ Variable base : val.
+
+ Variable addrw addrr valw : val.
+ Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+ Section INDEXED_AWAY.
+ Variable ofsw ofsr : int.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Aindexed ofsw) (base :: nil) = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Aindexed ofsr) (base :: nil) = Some addrr.
+
+ Lemma load_store_away1 :
+ forall RANGEW : 0 <= Int.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= Int.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : Int.unsigned ofsw + size_chunk chunkw <= Int.unsigned ofsr
+ \/ Int.unsigned ofsr + size_chunk chunkr <= Int.unsigned ofsw,
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ unfold largest_size_chunk in *.
+
+ rewrite ptrofs_modulus in *.
+ simpl in *.
+ inv ADDRR.
+ inv ADDRW.
+ destruct base; try discriminate.
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+
+ all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsr)) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsw)) as [OFSW | OFSW];
+ rewrite OFSW).
+
+ all: try rewrite ptrofs_modulus in *.
+
+ all: unfold Ptrofs.of_int.
+
+ all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 4294967295; lia).
+ all: intuition lia.
+ Qed.
+
+ Theorem load_store_away :
+ can_swap_accesses_ofs (Int.unsigned ofsr) chunkr (Int.unsigned ofsw) chunkw = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply load_store_away1.
+ all: tauto.
+ Qed.
+ End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Lemma may_overlap_sound:
+ forall m m' : mem,
+ forall chunk addr args chunk' addr' args' v a a' rs,
+ (eval_addressing genv sp addr (rs ## args)) = Some a ->
+ (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+ (may_overlap chunk addr args chunk' addr' args') = false ->
+ (Mem.storev chunk m a v) = Some m' ->
+ (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a').
+Proof.
+ intros until rs.
+ intros ADDR ADDR' OVERLAP STORE.
+ destruct addr; destruct addr'; try discriminate.
+ { (* Aindexed / Aindexed *)
+ destruct args as [ | base [ | ]]. 1,3: discriminate.
+ destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+ simpl in OVERLAP.
+ destruct (peq base base'). 2: discriminate.
+ subst base'.
+ destruct (can_swap_accesses_ofs (Int.unsigned i0) chunk' (Int.unsigned i) chunk) eqn:SWAP.
+ 2: discriminate.
+ simpl in *.
+ eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+ }
+Qed.
+
+End SOUNDNESS.
diff --git a/arm/Conventions1.v b/arm/Conventions1.v
index 7016c1ee..fe49a781 100644
--- a/arm/Conventions1.v
+++ b/arm/Conventions1.v
@@ -269,48 +269,6 @@ Definition loc_arguments (s: signature) : list (rpair loc) :=
else loc_arguments_hf s.(sig_args) 0 0 0
end.
-(** [size_arguments s] returns the number of [Outgoing] slots used
- to call a function with signature [s]. *)
-
-Fixpoint size_arguments_hf (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
- match tyl with
- | nil => ofs
- | (Tint|Tany32) :: tys =>
- if zlt ir 4
- then size_arguments_hf tys (ir + 1) fr ofs
- else size_arguments_hf tys ir fr (ofs + 1)
- | (Tfloat|Tany64) :: tys =>
- if zlt fr 8
- then size_arguments_hf tys ir (fr + 1) ofs
- else size_arguments_hf tys ir fr (align ofs 2 + 2)
- | Tsingle :: tys =>
- if zlt fr 8
- then size_arguments_hf tys ir (fr + 1) ofs
- else size_arguments_hf tys ir fr (ofs + 1)
- | Tlong :: tys =>
- let ir := align ir 2 in
- if zlt ir 4
- then size_arguments_hf tys (ir + 2) fr ofs
- else size_arguments_hf tys ir fr (align ofs 2 + 2)
- end.
-
-Fixpoint size_arguments_sf (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
- match tyl with
- | nil => Z.max 0 ofs
- | (Tint | Tsingle | Tany32) :: tys => size_arguments_sf tys (ofs + 1)
- | (Tfloat | Tlong | Tany64) :: tys => size_arguments_sf tys (align ofs 2 + 2)
- end.
-
-Definition size_arguments (s: signature) : Z :=
- match Archi.abi with
- | Archi.Softfloat =>
- size_arguments_sf s.(sig_args) (-4)
- | Archi.Hardfloat =>
- if s.(sig_cc).(cc_vararg)
- then size_arguments_sf s.(sig_args) (-4)
- else size_arguments_hf s.(sig_args) 0 0 0
- end.
-
(** Argument locations are either non-temporary registers or [Outgoing]
stack slots at nonnegative offsets. *)
@@ -471,170 +429,6 @@ Qed.
Hint Resolve loc_arguments_acceptable: locs.
-(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
-
-Remark size_arguments_hf_above:
- forall tyl ir fr ofs0,
- ofs0 <= size_arguments_hf tyl ir fr ofs0.
-Proof.
- induction tyl; simpl; intros.
- omega.
- destruct a.
- destruct (zlt ir 4); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega.
- destruct (zlt fr 8); eauto.
- apply Z.le_trans with (align ofs0 2). apply align_le; omega.
- apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
- set (ir' := align ir 2).
- destruct (zlt ir' 4); eauto.
- apply Z.le_trans with (align ofs0 2). apply align_le; omega.
- apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
- destruct (zlt fr 8); eauto.
- apply Z.le_trans with (ofs0 + 1); eauto. omega.
- destruct (zlt ir 4); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega.
- destruct (zlt fr 8); eauto.
- apply Z.le_trans with (align ofs0 2). apply align_le; omega.
- apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
-Qed.
-
-Remark size_arguments_sf_above:
- forall tyl ofs0,
- Z.max 0 ofs0 <= size_arguments_sf tyl ofs0.
-Proof.
- induction tyl; simpl; intros.
- omega.
- destruct a; (eapply Z.le_trans; [idtac|eauto]).
- xomega.
- assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
- assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
- xomega.
- xomega.
- assert (ofs0 <= align ofs0 2) by (apply align_le; omega). xomega.
-Qed.
-
-Lemma size_arguments_above:
- forall s, size_arguments s >= 0.
-Proof.
- intros; unfold size_arguments. apply Z.le_ge.
- assert (0 <= size_arguments_sf (sig_args s) (-4)).
- { change 0 with (Z.max 0 (-4)). apply size_arguments_sf_above. }
- assert (0 <= size_arguments_hf (sig_args s) 0 0 0).
- { apply size_arguments_hf_above. }
- destruct Archi.abi; [ | destruct (cc_vararg (sig_cc s)) ]; auto.
-Qed.
-
-Lemma loc_arguments_hf_bounded:
- forall ofs ty tyl ir fr ofs0,
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_hf tyl ir fr ofs0)) ->
- ofs + typesize ty <= size_arguments_hf tyl ir fr ofs0.
-Proof.
- induction tyl; simpl; intros.
- elim H.
- destruct a.
-- (* int *)
- destruct (zlt ir 4); destruct H.
- discriminate.
- eauto.
- inv H. apply size_arguments_hf_above.
- eauto.
-- (* float *)
- destruct (zlt fr 8); destruct H.
- discriminate.
- eauto.
- inv H. apply size_arguments_hf_above.
- eauto.
-- (* long *)
- destruct (zlt (align ir 2) 4).
- destruct H. discriminate. destruct H. discriminate. eauto.
- destruct Archi.big_endian.
- destruct H. inv H.
- eapply Z.le_trans. 2: apply size_arguments_hf_above. simpl; omega.
- destruct H. inv H.
- rewrite <- Z.add_assoc. simpl. apply size_arguments_hf_above.
- eauto.
- destruct H. inv H.
- rewrite <- Z.add_assoc. simpl. apply size_arguments_hf_above.
- destruct H. inv H.
- eapply Z.le_trans. 2: apply size_arguments_hf_above. simpl; omega.
- eauto.
-- (* float *)
- destruct (zlt fr 8); destruct H.
- discriminate.
- eauto.
- inv H. apply size_arguments_hf_above.
- eauto.
-- (* any32 *)
- destruct (zlt ir 4); destruct H.
- discriminate.
- eauto.
- inv H. apply size_arguments_hf_above.
- eauto.
-- (* any64 *)
- destruct (zlt fr 8); destruct H.
- discriminate.
- eauto.
- inv H. apply size_arguments_hf_above.
- eauto.
-Qed.
-
-Lemma loc_arguments_sf_bounded:
- forall ofs ty tyl ofs0,
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_sf tyl ofs0)) ->
- Z.max 0 (ofs + typesize ty) <= size_arguments_sf tyl ofs0.
-Proof.
- induction tyl; simpl; intros.
- elim H.
- destruct a.
-- (* int *)
- destruct H.
- destruct (zlt ofs0 0); inv H. apply size_arguments_sf_above.
- eauto.
-- (* float *)
- destruct H.
- destruct (zlt (align ofs0 2) 0); inv H. apply size_arguments_sf_above.
- eauto.
-- (* long *)
- destruct H.
- destruct Archi.big_endian.
- destruct (zlt (align ofs0 2) 0); inv H.
- eapply Z.le_trans. 2: apply size_arguments_sf_above. simpl; xomega.
- destruct (zlt (align ofs0 2) 0); inv H.
- rewrite <- Z.add_assoc. simpl. apply size_arguments_sf_above.
- destruct H.
- destruct Archi.big_endian.
- destruct (zlt (align ofs0 2) 0); inv H.
- rewrite <- Z.add_assoc. simpl. apply size_arguments_sf_above.
- destruct (zlt (align ofs0 2) 0); inv H.
- eapply Z.le_trans. 2: apply size_arguments_sf_above. simpl; xomega.
- eauto.
-- (* float *)
- destruct H.
- destruct (zlt ofs0 0); inv H. apply size_arguments_sf_above.
- eauto.
-- (* any32 *)
- destruct H.
- destruct (zlt ofs0 0); inv H. apply size_arguments_sf_above.
- eauto.
-- (* any64 *)
- destruct H.
- destruct (zlt (align ofs0 2) 0); inv H. apply size_arguments_sf_above.
- eauto.
-Qed.
-
-Lemma loc_arguments_bounded:
- forall (s: signature) (ofs: Z) (ty: typ),
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
- ofs + typesize ty <= size_arguments s.
-Proof.
- unfold loc_arguments, size_arguments; intros.
- assert (In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_sf (sig_args s) (-4))) ->
- ofs + typesize ty <= size_arguments_sf (sig_args s) (-4)).
- { intros. eapply Z.le_trans. 2: eapply loc_arguments_sf_bounded; eauto. xomega. }
- assert (In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_hf (sig_args s) 0 0 0)) ->
- ofs + typesize ty <= size_arguments_hf (sig_args s) 0 0 0).
- { intros. eapply loc_arguments_hf_bounded; eauto. }
- destruct Archi.abi; [ | destruct (cc_vararg (sig_cc s)) ]; eauto.
-Qed.
-
Lemma loc_arguments_main:
loc_arguments signature_main = nil.
Proof.
diff --git a/arm/DuplicateOpcodeHeuristic.ml b/arm/DuplicateOpcodeHeuristic.ml
index 85505245..9b6a6409 100644
--- a/arm/DuplicateOpcodeHeuristic.ml
+++ b/arm/DuplicateOpcodeHeuristic.ml
@@ -1,3 +1,22 @@
-exception HeuristicSucceeded
-
-let opcode_heuristic code cond ifso ifnot preferred = ()
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+ match cond with
+ | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccompf c | Ccompfs c -> (match c with
+ | Ceq -> Some false
+ | Cne -> Some true
+ | _ -> None
+ )
+ | Cnotcompf c | Cnotcompfs c -> (match c with
+ | Ceq -> Some true
+ | Cne -> Some false
+ | _ -> None
+ )
+ | _ -> None
+
diff --git a/backend/Allocation.v b/backend/Allocation.v
index d18b07a9..2323c050 100644
--- a/backend/Allocation.v
+++ b/backend/Allocation.v
@@ -314,10 +314,10 @@ Definition pair_instr_block
Some(BSbuiltin ef args res mv1 args' res' mv2 s)
| _ => None
end
- | Icond cond args s1 s2 =>
+ | Icond cond args s1 s2 i =>
let (mv1, b1) := extract_moves nil b in
match b1 with
- | Lcond cond' args' s1' s2' :: b2 =>
+ | Lcond cond' args' s1' s2' i' :: b2 =>
assertion (eq_condition cond cond');
assertion (peq s1 s1');
assertion (peq s2 s2');
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index b6880860..3c7df58a 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -169,11 +169,11 @@ Inductive expand_block_shape: block_shape -> RTL.instruction -> LTL.bblock -> Pr
(Ibuiltin ef args res s)
(expand_moves mv1
(Lbuiltin ef args' res' :: expand_moves mv2 (Lbranch s :: k)))
- | ebs_cond: forall cond args mv args' s1 s2 k,
+ | ebs_cond: forall cond args mv args' s1 s2 k i i',
wf_moves mv ->
expand_block_shape (BScond cond args mv args' s1 s2)
- (Icond cond args s1 s2)
- (expand_moves mv (Lcond cond args' s1 s2 :: k))
+ (Icond cond args s1 s2 i)
+ (expand_moves mv (Lcond cond args' s1 s2 i' :: k))
| ebs_jumptable: forall arg mv arg' tbl k,
wf_moves mv ->
expand_block_shape (BSjumptable arg mv arg' tbl)
diff --git a/backend/CSE.v b/backend/CSE.v
index 2827161d..1936d4e4 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -496,7 +496,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t) (pc: node) (before: numb
| EF_vload _ | EF_annot _ _ _ | EF_annot_val _ _ _ | EF_debug _ _ _ =>
set_res_unknown before res
end
- | Icond cond args ifso ifnot =>
+ | Icond cond args ifso ifnot _ =>
before
| Ijumptable arg tbl =>
before
@@ -549,10 +549,10 @@ Definition transf_instr (n: numbering) (instr: instruction) :=
let (n1, vl) := valnum_regs n args in
let (addr', args') := reduce _ combine_addr n1 addr args vl in
Istore chunk addr' args' src s
- | Icond cond args s1 s2 =>
+ | Icond cond args s1 s2 i =>
let (n1, vl) := valnum_regs n args in
let (cond', args') := reduce _ combine_cond n1 cond args vl in
- Icond cond' args' s1 s2
+ Icond cond' args' s1 s2 i
| _ =>
instr
end.
diff --git a/backend/CSE2.v b/backend/CSE2.v
index 38a46c1b..900a7517 100644
--- a/backend/CSE2.v
+++ b/backend/CSE2.v
@@ -6,7 +6,7 @@ David Monniaux, CNRS, VERIMAG
Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
Require Import AST Linking.
-Require Import Memory Registers Op RTL Maps.
+Require Import Memory Registers Op RTL Maps CSE2deps.
(* Static analysis *)
@@ -265,7 +265,7 @@ Definition kill_sym_val (dst : reg) (sv : sym_val) :=
Definition kill_reg (dst : reg) (rel : RELATION.t) :=
PTree.filter1 (fun x => negb (kill_sym_val dst x))
(PTree.remove dst rel).
-
+
Definition kill_sym_val_mem (sv: sym_val) :=
match sv with
| SMove _ => false
@@ -273,16 +273,28 @@ Definition kill_sym_val_mem (sv: sym_val) :=
| SLoad _ _ _ => true
end.
+Definition kill_sym_val_store chunk addr args (sv: sym_val) :=
+ match sv with
+ | SMove _ => false
+ | SOp op _ => op_depends_on_memory op
+ | SLoad chunk' addr' args' => may_overlap chunk addr args chunk' addr' args'
+ end.
+
Definition kill_mem (rel : RELATION.t) :=
PTree.filter1 (fun x => negb (kill_sym_val_mem x)) rel.
-
Definition forward_move (rel : RELATION.t) (x : reg) : reg :=
match rel ! x with
| Some (SMove org) => org
| _ => x
end.
+Definition kill_store1 chunk addr args rel :=
+ PTree.filter1 (fun x => negb (kill_sym_val_store chunk addr args x)) rel.
+
+Definition kill_store chunk addr args rel :=
+ kill_store1 chunk addr (List.map (forward_move rel) args) rel.
+
Definition move (src dst : reg) (rel : RELATION.t) :=
PTree.set dst (SMove (forward_move rel src)) (kill_reg dst rel).
@@ -393,9 +405,9 @@ Qed.
Definition apply_instr instr (rel : RELATION.t) : RB.t :=
match instr with
| Inop _
- | Icond _ _ _ _
+ | Icond _ _ _ _ _
| Ijumptable _ _ => Some rel
- | Istore _ _ _ _ _ => Some (kill_mem rel)
+ | Istore chunk addr args _ _ => Some (kill_store chunk addr args rel)
| Iop op args dst _ => Some (gen_oper op dst args rel)
| Iload trap chunk addr args dst _ => Some (load chunk addr dst args rel)
| Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel))
@@ -457,7 +469,7 @@ Definition transf_instr (fmap : option (PMap.t RB.t))
match instr with
| Iop op args dst s =>
let args' := subst_args fmap pc args in
- match find_op_in_fmap fmap pc op args' with
+ match (if is_trivial_op op then None else find_op_in_fmap fmap pc op args') with
| None => Iop op args' dst s
| Some src => Iop Omove (src::nil) dst s
end
@@ -473,8 +485,8 @@ Definition transf_instr (fmap : option (PMap.t RB.t))
Icall sig ros (subst_args fmap pc args) dst s
| Itailcall sig ros args =>
Itailcall sig ros (subst_args fmap pc args)
- | Icond cond args s1 s2 =>
- Icond cond (subst_args fmap pc args) s1 s2
+ | Icond cond args s1 s2 i =>
+ Icond cond (subst_args fmap pc args) s1 s2 i
| Ijumptable arg tbl =>
Ijumptable (subst_arg fmap pc arg) tbl
| Ireturn (Some arg) =>
diff --git a/backend/CSE2proof.v b/backend/CSE2proof.v
index 254cc4ce..309ccce1 100644
--- a/backend/CSE2proof.v
+++ b/backend/CSE2proof.v
@@ -13,7 +13,8 @@ Require Import Memory Registers Op RTL Maps.
Require Import Globalenvs Values.
Require Import Linking Values Memory Globalenvs Events Smallstep.
Require Import Registers Op RTL.
-Require Import CSE2.
+Require Import CSE2 CSE2deps CSE2depsproof.
+Require Import Lia.
Lemma args_unaffected:
forall rs : regset,
@@ -54,9 +55,9 @@ Definition sem_sym_val sym rs (v : option val) : Prop :=
match eval_addressing genv sp addr rs##args with
| Some a => match Mem.loadv chunk m a with
| Some dat => v = Some dat
- | None => v = None \/ v = Some (default_notrap_load_value chunk)
+ | None => v = None \/ v = Some Vundef
end
- | None => v = None \/ v = Some (default_notrap_load_value chunk)
+ | None => v = None \/ v = Some Vundef
end
end.
@@ -390,6 +391,7 @@ Proof.
apply REC; auto.
Qed.
+
Lemma find_load_sound :
forall rel : RELATION.t,
forall chunk : memory_chunk,
@@ -402,9 +404,9 @@ Lemma find_load_sound :
match eval_addressing genv sp addr rs##args with
| Some a => match Mem.loadv chunk m a with
| Some dat => rs#src = dat
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end.
Proof.
intros until rs.
@@ -419,9 +421,9 @@ Proof.
match eval_addressing genv sp addr rs##args with
| Some a => match Mem.loadv chunk m a with
| Some dat => rs#src = dat
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end
end ->
fold_left
@@ -431,9 +433,9 @@ Proof.
match eval_addressing genv sp addr rs##args with
| Some a => match Mem.loadv chunk m a with
| Some dat => rs#src = dat
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end) as REC.
{
@@ -521,7 +523,7 @@ Lemma find_load_notrap1_sound' :
sem_rel rel rs ->
find_load rel chunk addr args = Some src ->
eval_addressing genv sp addr rs##args = None ->
- rs # src = (default_notrap_load_value chunk).
+ rs # src = Vundef.
Proof.
intros until rs. intros REL FINDLOAD ADDR.
pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
@@ -541,7 +543,7 @@ Lemma find_load_notrap2_sound' :
find_load rel chunk addr args = Some src ->
eval_addressing genv sp addr rs##args = Some a ->
Mem.loadv chunk m a = None ->
- rs # src = (default_notrap_load_value chunk).
+ rs # src = Vundef.
Proof.
intros until a. intros REL FINDLOAD ADDR LOAD.
pose proof (find_load_sound rel chunk addr src args rs REL FINDLOAD) as Z.
@@ -566,6 +568,20 @@ Proof.
destruct s; congruence.
Qed.
+
+Lemma forward_move_rs:
+ forall rel arg rs,
+ sem_rel rel rs ->
+ rs # (forward_move rel arg) = rs # arg.
+Proof.
+ unfold forward_move, sem_rel, sem_reg, sem_sym_val in *.
+ intros until rs.
+ intro REL.
+ pose proof (REL arg) as RELarg.
+ destruct (rel ! arg); trivial.
+ destruct s; congruence.
+Qed.
+
Lemma oper_sound :
forall rel : RELATION.t,
forall op : operation,
@@ -673,11 +689,11 @@ Lemma load2_notrap1_sound :
sem_rel rel rs ->
not (In dst args) ->
eval_addressing genv sp addr (rs ## args) = None ->
- sem_rel (load2 chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)).
+ sem_rel (load2 chunk addr dst args rel) (rs # dst <- Vundef).
Proof.
intros until rs.
intros REL NOT_IN ADDR x.
- pose proof (kill_reg_sound rel dst rs (default_notrap_load_value chunk) REL x) as KILL.
+ pose proof (kill_reg_sound rel dst rs Vundef REL x) as KILL.
unfold load2.
destruct (peq x dst).
{
@@ -710,11 +726,11 @@ Lemma load2_notrap2_sound :
not (In dst args) ->
eval_addressing genv sp addr (rs ## args) = Some a ->
Mem.loadv chunk m a = None ->
- sem_rel (load2 chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)).
+ sem_rel (load2 chunk addr dst args rel) (rs # dst <- Vundef).
Proof.
intros until a.
intros REL NOT_IN ADDR LOAD x.
- pose proof (kill_reg_sound rel dst rs (default_notrap_load_value chunk) REL x) as KILL.
+ pose proof (kill_reg_sound rel dst rs Vundef REL x) as KILL.
unfold load2.
destruct (peq x dst).
{
@@ -768,7 +784,7 @@ Lemma load1_notrap1_sound :
forall rs : regset,
sem_rel rel rs ->
eval_addressing genv sp addr (rs ## args) = None ->
- sem_rel (load1 chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)).
+ sem_rel (load1 chunk addr dst args rel) (rs # dst <- Vundef).
Proof.
intros until rs.
intros REL ADDR LOAD.
@@ -791,7 +807,7 @@ Lemma load1_notrap2_sound :
sem_rel rel rs ->
eval_addressing genv sp addr (rs ## args) = Some a ->
Mem.loadv chunk m a = None ->
- sem_rel (load1 chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)).
+ sem_rel (load1 chunk addr dst args rel) (rs # dst <- Vundef).
Proof.
intros until a.
intros REL ADDR LOAD.
@@ -825,9 +841,9 @@ Proof.
assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
| Some a => match Mem.loadv chunk m a with
| Some dat => rs#src = dat
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end) as FIND_LOAD.
{
apply (find_load_sound rel); trivial.
@@ -853,7 +869,7 @@ Lemma load_notrap1_sound :
forall rs : regset,
sem_rel rel rs ->
eval_addressing genv sp addr (rs ## args) = None ->
- sem_rel (load chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)).
+ sem_rel (load chunk addr dst args rel) (rs # dst <- Vundef).
Proof.
intros until rs.
intros REL ADDR.
@@ -863,9 +879,9 @@ Proof.
assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
| Some a => match Mem.loadv chunk m a with
| Some dat => rs#src = dat
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end) as FIND_LOAD.
{
apply (find_load_sound rel); trivial.
@@ -890,7 +906,7 @@ Lemma load_notrap2_sound :
sem_rel rel rs ->
eval_addressing genv sp addr (rs ## args) = Some a ->
Mem.loadv chunk m a = None ->
- sem_rel (load chunk addr dst args rel) (rs # dst <- (default_notrap_load_value chunk)).
+ sem_rel (load chunk addr dst args rel) (rs # dst <- Vundef).
Proof.
intros until a.
intros REL ADDR.
@@ -900,9 +916,9 @@ Proof.
assert (match eval_addressing genv sp addr rs## (map (forward_move rel) args) with
| Some a => match Mem.loadv chunk m a with
| Some dat => rs#src = dat
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end
- | None => rs#src = default_notrap_load_value chunk
+ | None => rs#src = Vundef
end) as FIND_LOAD.
{
apply (find_load_sound rel); trivial.
@@ -984,7 +1000,40 @@ Proof.
apply op_depends_on_memory_correct; auto.
}
Qed.
-
+
+Lemma kill_store_sound :
+ forall m m' : mem,
+ forall rel : RELATION.t,
+ forall chunk addr args a v rs,
+ (eval_addressing genv sp addr (rs ## args)) = Some a ->
+ (Mem.storev chunk m a v) = Some m' ->
+ sem_rel m rel rs -> sem_rel m' (kill_store chunk addr args rel) rs.
+Proof.
+ unfold sem_rel, sem_reg.
+ intros until rs.
+ intros ADDR STORE SEM x.
+ pose proof (SEM x) as SEMx.
+ unfold kill_store, kill_store1.
+ rewrite PTree.gfilter1.
+ destruct (rel ! x) as [ sv | ].
+ 2: reflexivity.
+ destruct sv; simpl in *; trivial.
+ {
+ destruct op_depends_on_memory eqn:DEPENDS; simpl; trivial.
+ rewrite SEMx.
+ apply op_depends_on_memory_correct; auto.
+ }
+ destruct may_overlap eqn:OVERLAP; simpl; trivial.
+ destruct (eval_addressing genv sp addr0 rs ## args0) eqn:ADDR0.
+ {
+ erewrite may_overlap_sound with (args := (map (forward_move rel) args)).
+ all: try eassumption.
+
+ erewrite forward_move_map by eassumption.
+ assumption.
+ }
+ intuition congruence.
+Qed.
End SOUNDNESS.
Definition match_prog (p tp: RTL.program) :=
@@ -1072,6 +1121,7 @@ Definition fmap_sem' := fmap_sem fundef unit ge.
Definition subst_arg_ok' := subst_arg_ok fundef unit ge.
Definition subst_args_ok' := subst_args_ok fundef unit ge.
Definition kill_mem_sound' := kill_mem_sound fundef unit ge.
+Definition kill_store_sound' := kill_store_sound fundef unit ge.
Lemma sem_rel_b_ge:
forall rb1 rb2 : RB.t,
@@ -1150,8 +1200,11 @@ Proof.
reflexivity.
- (* op *)
unfold transf_instr in *.
- destruct find_op_in_fmap eqn:FIND_OP.
+ destruct (if is_trivial_op op then None else find_op_in_fmap (forward_map f) pc op
+ (subst_args (forward_map f) pc args)) eqn:FIND_OP.
{
+ destruct (is_trivial_op op).
+ discriminate.
unfold find_op_in_fmap, fmap_sem', fmap_sem in *.
destruct (forward_map f) as [map |] eqn:MAP.
2: discriminate.
@@ -1241,9 +1294,9 @@ Proof.
{
f_equal.
symmetry.
- apply find_load_sound' with (chunk := chunk) (m := m) (a := a) (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs); trivial.
rewrite MAP in H0.
- assumption.
+ eapply find_load_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+ all: eassumption.
}
unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
}
@@ -1268,36 +1321,37 @@ Proof.
apply load_sound with (a := a); auto.
}
{
- econstructor; split.
- assert (eval_addressing tge sp addr rs ## args = Some a).
- rewrite <- H0.
- apply eval_addressing_preserved. exact symbols_preserved.
- eapply exec_Iload; eauto.
- rewrite (subst_args_ok' sp m); assumption.
- constructor; auto.
-
- simpl in *.
- unfold fmap_sem', fmap_sem in *.
- destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
- destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
- apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+ econstructor; split.
+ assert (eval_addressing tge sp addr rs ## args = Some a).
+ rewrite <- H0.
+ apply eval_addressing_preserved. exact symbols_preserved.
+ eapply exec_Iload; eauto.
+ rewrite (subst_args_ok' sp m); assumption.
+ constructor; auto.
+
+ simpl in *.
+ unfold fmap_sem', fmap_sem in *.
+ destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+ destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+ apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+ {
+ replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
{
- replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
- {
- eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
- 2: apply apply_instr'_bot.
- simpl. tauto.
- }
- unfold apply_instr'.
- rewrite H.
- rewrite MPC.
- simpl.
- reflexivity.
+ eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+ 2: apply apply_instr'_bot.
+ simpl. tauto.
}
- apply load_sound with (a := a); assumption.
+ unfold apply_instr'.
+ rewrite H.
+ rewrite MPC.
+ simpl.
+ reflexivity.
+ }
+ apply load_sound with (a := a); assumption.
}
-- unfold transf_instr in *.
+- (* load notrap1 *)
+ unfold transf_instr in *.
destruct find_load_in_fmap eqn:FIND_LOAD.
{
unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
@@ -1306,16 +1360,16 @@ Proof.
change (@PMap.get (option RELATION.t) pc map) with (map # pc) in *.
destruct (map # pc) as [mpc | ] eqn:MPC.
2: discriminate.
-
econstructor; split.
{
eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto.
+ simpl.
rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
- { simpl.
+ {
f_equal.
- apply find_load_notrap1_sound' with (chunk := chunk) (m := m) (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs); trivial.
rewrite MAP in H0.
- assumption.
+ eapply find_load_notrap1_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+ all: eassumption.
}
unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
}
@@ -1339,37 +1393,38 @@ Proof.
unfold sem_rel_b', sem_rel_b.
apply load_notrap1_sound; auto.
}
- {
- econstructor; split.
- assert (eval_addressing tge sp addr rs ## args = None).
- rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
- eapply exec_Iload_notrap1; eauto.
- rewrite subst_args_ok with (genv := ge) (sp := sp) (m := m) ; assumption.
- constructor; auto.
+ {
+ econstructor; split.
+ assert (eval_addressing tge sp addr rs ## args = None).
+ rewrite <- H0.
+ apply eval_addressing_preserved. exact symbols_preserved.
+ eapply exec_Iload_notrap1; eauto.
+ rewrite (subst_args_ok' sp m); assumption.
+ constructor; auto.
- simpl in *.
- unfold fmap_sem', fmap_sem in *.
- destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
- destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
- apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+ simpl in *.
+ unfold fmap_sem', fmap_sem in *.
+ destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
+ destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
+ apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+ {
+ replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
{
- replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
- {
- eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
- 2: apply apply_instr'_bot.
- simpl. tauto.
- }
- unfold apply_instr'.
- rewrite H.
- rewrite MPC.
- simpl.
- reflexivity.
+ eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
+ 2: apply apply_instr'_bot.
+ simpl. tauto.
}
- apply load_notrap1_sound; trivial.
+ unfold apply_instr'.
+ rewrite H.
+ rewrite MPC.
+ simpl.
+ reflexivity.
+ }
+ apply load_notrap1_sound; assumption.
}
-- (* load notrap2 *)
- unfold transf_instr in *.
+(* load notrap2 *)
+- unfold transf_instr in *.
destruct find_load_in_fmap eqn:FIND_LOAD.
{
unfold find_load_in_fmap, fmap_sem', fmap_sem in *.
@@ -1385,9 +1440,9 @@ Proof.
rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
{
f_equal.
- apply find_load_notrap2_sound' with (chunk := chunk) (m := m) (a := a) (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs); trivial.
rewrite MAP in H0.
- assumption.
+ eapply find_load_notrap2_sound' with (genv := ge) (sp := sp) (addr := addr) (args := subst_args (Some map) pc args) (rel := mpc) (src := r) (rs := rs).
+ all: try eassumption.
}
unfold fmap_sem. rewrite MAP. rewrite MPC. assumption.
}
@@ -1411,19 +1466,20 @@ Proof.
unfold sem_rel_b', sem_rel_b.
apply load_notrap2_sound with (a := a); auto.
}
- {
+ {
econstructor; split.
assert (eval_addressing tge sp addr rs ## args = Some a).
- rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved.
+ rewrite <- H0.
+ apply eval_addressing_preserved. exact symbols_preserved.
eapply exec_Iload_notrap2; eauto.
- rewrite subst_args_ok with (genv := ge) (sp := sp) (m := m); assumption.
+ rewrite (subst_args_ok' sp m); assumption.
constructor; auto.
simpl in *.
unfold fmap_sem', fmap_sem in *.
destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
- apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
+ apply sem_rel_b_ge with (rb2 := Some (load chunk addr dst args mpc)).
{
replace (Some (load chunk addr dst args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
{
@@ -1454,9 +1510,9 @@ Proof.
unfold fmap_sem', fmap_sem in *.
destruct (forward_map _) as [map |] eqn:MAP in *; trivial.
destruct (map # pc) as [mpc |] eqn:MPC in *; try contradiction.
- apply sem_rel_b_ge with (rb2 := Some (kill_mem mpc)); trivial.
+ apply sem_rel_b_ge with (rb2 := Some (kill_store chunk addr args mpc)); trivial.
{
- replace (Some (kill_mem mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
+ replace (Some (kill_store chunk addr args mpc)) with (apply_instr' (fn_code f) pc (map # pc)).
{
eapply DS.fixpoint_solution with (code := fn_code f) (successors := successors_instr); try eassumption.
2: apply apply_instr'_bot.
@@ -1468,8 +1524,7 @@ Proof.
rewrite H.
reflexivity.
}
- apply (kill_mem_sound' sp m).
- assumption.
+ eapply (kill_store_sound' sp m); eassumption.
(* call *)
- econstructor; split.
@@ -1625,7 +1680,7 @@ Proof.
econstructor; split.
eapply exec_return; eauto.
constructor; auto.
-Admitted.
+Qed.
Lemma transf_initial_states:
@@ -1657,4 +1712,4 @@ Proof.
exact step_simulation.
Qed.
-End PRESERVATION. \ No newline at end of file
+End PRESERVATION.
diff --git a/backend/Constprop.v b/backend/Constprop.v
index eda41b39..0be9438c 100644
--- a/backend/Constprop.v
+++ b/backend/Constprop.v
@@ -69,7 +69,7 @@ Fixpoint successor_rec (n: nat) (f: function) (ae: AE.t) (pc: node) : node :=
match f.(fn_code)!pc with
| Some (Inop s) =>
successor_rec n' f ae s
- | Some (Icond cond args s1 s2) =>
+ | Some (Icond cond args s1 s2 _) =>
match resolve_branch (eval_static_condition cond (aregs ae args)) with
| Some b => successor_rec n' f ae (if b then s1 else s2)
| None => pc
@@ -217,14 +217,14 @@ Definition transf_instr (f: function) (an: PMap.t VA.t) (rm: romem)
end
| _, _ => dfl
end
- | Icond cond args s1 s2 =>
+ | Icond cond args s1 s2 i =>
let aargs := aregs ae args in
match resolve_branch (eval_static_condition cond aargs) with
| Some b =>
if b then Inop s1 else Inop s2
| None =>
let (cond', args') := cond_strength_reduction cond args aargs in
- Icond cond' args' s1 s2
+ Icond cond' args' s1 s2 i
end
| Ijumptable arg tbl =>
match areg ae arg with
diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v
index 63cfee24..60663503 100644
--- a/backend/Constpropproof.v
+++ b/backend/Constpropproof.v
@@ -142,8 +142,8 @@ Inductive match_pc (f: function) (rs: regset) (m: mem): nat -> node -> node -> P
f.(fn_code)!pc = Some (Inop s) ->
match_pc f rs m n s pcx ->
match_pc f rs m (S n) pc pcx
- | match_pc_cond: forall n pc cond args s1 s2 pcx,
- f.(fn_code)!pc = Some (Icond cond args s1 s2) ->
+ | match_pc_cond: forall n pc cond args s1 s2 pcx i,
+ f.(fn_code)!pc = Some (Icond cond args s1 s2 i) ->
(forall b,
eval_condition cond rs##args m = Some b ->
match_pc f rs m n (if b then s1 else s2) pcx) ->
diff --git a/backend/Conventions.v b/backend/Conventions.v
index 6025c6b4..14ffb587 100644
--- a/backend/Conventions.v
+++ b/backend/Conventions.v
@@ -34,6 +34,73 @@ Proof.
apply IHpl; auto.
Qed.
+(** ** Stack size of function arguments *)
+
+(** [size_arguments s] returns the number of [Outgoing] slots used
+ to call a function with signature [s]. *)
+
+Definition max_outgoing_1 (accu: Z) (l: loc) : Z :=
+ match l with
+ | S Outgoing ofs ty => Z.max accu (ofs + typesize ty)
+ | _ => accu
+ end.
+
+Definition max_outgoing_2 (accu: Z) (rl: rpair loc) : Z :=
+ match rl with
+ | One l => max_outgoing_1 accu l
+ | Twolong l1 l2 => max_outgoing_1 (max_outgoing_1 accu l1) l2
+ end.
+
+Definition size_arguments (s: signature) : Z :=
+ List.fold_left max_outgoing_2 (loc_arguments s) 0.
+
+(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
+
+Remark fold_max_outgoing_above:
+ forall l n, fold_left max_outgoing_2 l n >= n.
+Proof.
+ assert (A: forall n l, max_outgoing_1 n l >= n).
+ { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
+ induction l; simpl; intros.
+ - omega.
+ - eapply Zge_trans. eauto.
+ destruct a; simpl. apply A. eapply Zge_trans; eauto.
+Qed.
+
+Lemma size_arguments_above:
+ forall s, size_arguments s >= 0.
+Proof.
+ intros. apply fold_max_outgoing_above.
+Qed.
+
+Lemma loc_arguments_bounded:
+ forall (s: signature) (ofs: Z) (ty: typ),
+ In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
+ ofs + typesize ty <= size_arguments s.
+Proof.
+ intros until ty.
+ assert (A: forall n l, n <= max_outgoing_1 n l).
+ { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
+ assert (B: forall p n,
+ In (S Outgoing ofs ty) (regs_of_rpair p) ->
+ ofs + typesize ty <= max_outgoing_2 n p).
+ { intros. destruct p; simpl in H; intuition; subst; simpl.
+ - xomega.
+ - eapply Z.le_trans. 2: apply A. xomega.
+ - xomega. }
+ assert (C: forall l n,
+ In (S Outgoing ofs ty) (regs_of_rpairs l) ->
+ ofs + typesize ty <= fold_left max_outgoing_2 l n).
+ { induction l; simpl; intros.
+ - contradiction.
+ - rewrite in_app_iff in H. destruct H.
+ + eapply Z.le_trans. eapply B; eauto.
+ apply Z.ge_le. apply fold_max_outgoing_above.
+ + apply IHl; auto.
+ }
+ apply C.
+Qed.
+
(** ** Location of function parameters *)
(** A function finds the values of its parameter in the same locations
diff --git a/backend/Deadcode.v b/backend/Deadcode.v
index 1f208a91..3412a6fa 100644
--- a/backend/Deadcode.v
+++ b/backend/Deadcode.v
@@ -142,7 +142,7 @@ Definition transfer (f: function) (approx: PMap.t VA.t)
nmem_dead_stack f.(fn_stacksize))
| Some(Ibuiltin ef args res s) =>
transfer_builtin approx!!pc ef args res ne nm
- | Some(Icond cond args s1 s2) =>
+ | Some(Icond cond args s1 s2 _) =>
if peq s1 s2 then after else
(add_needs args (needs_of_condition cond) ne, nm)
| Some(Ijumptable arg tbl) =>
@@ -192,7 +192,7 @@ Definition transf_instr (approx: PMap.t VA.t) (an: PMap.t NA.t)
if nmem_contains (snd an!!pc) (aaddr_arg approx!!pc dst) sz
then instr
else Inop s
- | Icond cond args s1 s2 =>
+ | Icond cond args s1 s2 _ =>
if peq s1 s2 then Inop s1 else instr
| _ =>
instr
diff --git a/backend/Duplicate.v b/backend/Duplicate.v
index 82c17367..af85efe4 100644
--- a/backend/Duplicate.v
+++ b/backend/Duplicate.v
@@ -134,8 +134,8 @@ Definition verify_match_inst dupmap inst tinst :=
else Error (msg "Different ef in Ibuiltin")
| _ => Error (msg "verify_match_inst Ibuiltin") end
- | Icond cond lr n1 n2 => match tinst with
- | Icond cond' lr' n1' n2' =>
+ | Icond cond lr n1 n2 i => match tinst with
+ | Icond cond' lr' n1' n2' i' =>
if (list_eq_dec Pos.eq_dec lr lr') then
if (eq_condition cond cond') then
do u1 <- verify_is_copy dupmap n1 n1';
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index d0b7129e..89f187da 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -1,25 +1,32 @@
+(* Oracle for Duplicate pass.
+ * - Add static prediction information to Icond nodes
+ * - Performs tail duplication on interesting traces to form superblocks
+ * - (TODO: perform partial loop unrolling inside innermost loops)
+ *)
+
open RTL
open Maps
open Camlcoq
-(* TTL : IR emphasizing the preferred next node *)
-module TTL = struct
- type instruction =
- | Tleaf of RTL.instruction
- | Tnext of node * RTL.instruction
-
- type code = instruction PTree.t
-end;;
-
-open TTL
+let debug_flag = ref false
-(** RTL to TTL *)
+let debug fmt =
+ if !debug_flag then Printf.eprintf fmt
+ else Printf.ifprintf stderr fmt
let get_some = function
| None -> failwith "Did not get some"
| Some thing -> thing
-let bfs code entrypoint =
+let rtl_successors = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,_) -> [n1; n2]
+| Ijumptable (_,ln) -> ln
+
+let bfs code entrypoint = begin
+ debug "bfs\n";
let visited = ref (PTree.map (fun n i -> false) code)
and bfs_list = ref []
and to_visit = Queue.create ()
@@ -33,32 +40,24 @@ let bfs code entrypoint =
match PTree.get !node code with
| None -> failwith "No such node"
| Some i ->
- bfs_list := !bfs_list @ [!node];
- match i with
- | Icall(_, _, _, _, n) -> Queue.add n to_visit
- | Ibuiltin(_, _, _, n) -> Queue.add n to_visit
- | Ijumptable(_, ln) -> List.iter (fun n -> Queue.add n to_visit) ln
- | Itailcall _ | Ireturn _ -> ()
- | Icond (_, _, n1, n2) -> Queue.add n1 to_visit; Queue.add n2 to_visit
- | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) -> Queue.add n to_visit
+ bfs_list := !node :: !bfs_list;
+ let succ = rtl_successors i in
+ List.iter (fun n -> Queue.add n to_visit) succ
end
done;
- !bfs_list
+ List.rev !bfs_list
end
+end
let optbool o = match o with Some _ -> true | None -> false
let ptree_get_some n ptree = get_some @@ PTree.get n ptree
-let get_predecessors_rtl code =
+let get_predecessors_rtl code = begin
+ debug "get_predecessors_rtl\n";
let preds = ref (PTree.map (fun n i -> []) code) in
let process_inst (node, i) =
- let succ = match i with
- | Inop n | Iop (_,_,_,n) | Iload (_, _,_,_,_,n) | Istore (_,_,_,_,n)
- | Icall (_,_,_,_,n) | Ibuiltin (_, _, _, n) -> [n]
- | Icond (_,_,n1,n2) -> [n1;n2]
- | Ijumptable (_,ln) -> ln
- | Itailcall _ | Ireturn _ -> []
+ let succ = rtl_successors i
in List.iter (fun s ->
let previous_preds = ptree_get_some s !preds in
if optbool @@ List.find_opt (fun e -> e == node) previous_preds then ()
@@ -67,6 +66,7 @@ let get_predecessors_rtl code =
List.iter process_inst (PTree.elements code);
!preds
end
+end
module PInt = struct
type t = P.t
@@ -80,65 +80,23 @@ let print_intlist l =
| [] -> ()
| n::ln -> (Printf.printf "%d " (P.to_int n); f ln)
in begin
- Printf.printf "[";
- f l;
- Printf.printf "]"
+ if !debug_flag then begin
+ Printf.printf "[";
+ f l;
+ Printf.printf "]"
+ end
end
let print_intset s =
let seq = PSet.to_seq s
in begin
- Printf.printf "{";
- Seq.iter (fun n ->
- Printf.printf "%d " (P.to_int n)
- ) seq;
- Printf.printf "}"
- end
-
-(* FIXME - dominators not working well because the order of dataflow update isn't right *)
- (*
-let get_dominators code entrypoint =
- let bfs_order = bfs code entrypoint
- and predecessors = get_predecessors_rtl code
- in let doms = ref (PTree.map (fun n i -> PSet.of_list bfs_order) code)
- in begin
- Printf.printf "BFS: ";
- print_intlist bfs_order;
- Printf.printf "\n";
- List.iter (fun n ->
- let preds = get_some @@ PTree.get n predecessors
- and single = PSet.singleton n
- in match preds with
- | [] -> doms := PTree.set n single !doms
- | p::lp ->
- let set_p = get_some @@ PTree.get p !doms
- and set_lp = List.map (fun p -> get_some @@ PTree.get p !doms) lp
- in let inter = List.fold_left PSet.inter set_p set_lp
- in let union = PSet.union inter single
- in begin
- Printf.printf "----------------------------------------\n";
- Printf.printf "n = %d\n" (P.to_int n);
- Printf.printf "set_p = "; print_intset set_p; Printf.printf "\n";
- Printf.printf "set_lp = ["; List.iter (fun s -> print_intset s; Printf.printf ", ") set_lp; Printf.printf "]\n";
- Printf.printf "=> inter = "; print_intset inter; Printf.printf "\n";
- Printf.printf "=> union = "; print_intset union; Printf.printf "\n";
- doms := PTree.set n union !doms
- end
- ) bfs_order;
- !doms
- end
-*)
-
-let print_dominators dominators =
- let domlist = PTree.elements dominators
- in begin
- Printf.printf "{\n";
- List.iter (fun (n, doms) ->
- Printf.printf "\t";
- Printf.printf "%d:" (P.to_int n);
- print_intset doms;
- Printf.printf "\n"
- ) domlist
+ if !debug_flag then begin
+ Printf.printf "{";
+ Seq.iter (fun n ->
+ Printf.printf "%d " (P.to_int n)
+ ) seq;
+ Printf.printf "}"
+ end
end
type vstate = Unvisited | Processed | Visited
@@ -150,7 +108,8 @@ type vstate = Unvisited | Processed | Visited
*
* If we come accross an edge to a Processed node, it's a loop!
*)
-let get_loop_headers code entrypoint =
+let get_loop_headers code entrypoint = begin
+ debug "get_loop_headers\n";
let visited = ref (PTree.map (fun n i -> Unvisited) code)
and is_loop_header = ref (PTree.map (fun n i -> false) code)
in let rec dfs_visit code = function
@@ -159,6 +118,7 @@ let get_loop_headers code entrypoint =
match (get_some @@ PTree.get node !visited) with
| Visited -> ()
| Processed -> begin
+ debug "Node %d is a loop header\n" (P.to_int node);
is_loop_header := PTree.set node true !is_loop_header;
visited := PTree.set node Visited !visited
end
@@ -166,13 +126,7 @@ let get_loop_headers code entrypoint =
visited := PTree.set node Processed !visited;
match PTree.get node code with
| None -> failwith "No such node"
- | Some i -> let next_visits = (match i with
- | Icall (_, _, _, _, n) | Ibuiltin (_, _, _, n) | Inop n | Iop (_, _, _, n)
- | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) -> [n]
- | Icond (_, _, n1, n2) -> [n1; n2]
- | Itailcall _ | Ireturn _ -> []
- | Ijumptable (_, ln) -> ln
- ) in dfs_visit code next_visits;
+ | Some i -> let next_visits = rtl_successors i in dfs_visit code next_visits;
visited := PTree.set node Visited !visited;
dfs_visit code ln
end
@@ -180,140 +134,220 @@ let get_loop_headers code entrypoint =
dfs_visit code [entrypoint];
!is_loop_header
end
+end
let ptree_printbool pt =
let elements = PTree.elements pt
in begin
- Printf.printf "[";
- List.iter (fun (n, b) ->
- if b then Printf.printf "%d, " (P.to_int n) else ()
- ) elements;
- Printf.printf "]"
+ if !debug_flag then begin
+ Printf.printf "[";
+ List.iter (fun (n, b) ->
+ if b then Printf.printf "%d, " (P.to_int n) else ()
+ ) elements;
+ Printf.printf "]"
+ end
end
(* Looks ahead (until a branch) to see if a node further down verifies
* the given predicate *)
let rec look_ahead code node is_loop_header predicate =
if (predicate node) then true
- else match (get_some @@ PTree.get node code) with
- | Ireturn _ | Itailcall _ | Icond _ | Ijumptable _ -> false
- | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n)
- | Istore (_, _, _, _, n) | Icall (_, _, _, _, n)
- | Ibuiltin (_, _, _, n) ->
- if (predicate n) then true
- else (
- if (get_some @@ PTree.get n is_loop_header) then false
- else look_ahead code n is_loop_header predicate
- )
-
-exception HeuristicSucceeded
-
-let do_call_heuristic code ifso ifnot is_loop_header preferred =
- let predicate n = (function
- | Icall _ -> true
- | _ -> false) @@ get_some @@ PTree.get n code
- in if (look_ahead code ifso is_loop_header predicate) then
- (preferred := false; raise HeuristicSucceeded)
- else if (look_ahead code ifnot is_loop_header predicate) then
- (preferred := true; raise HeuristicSucceeded)
- else ()
-
-let do_opcode_heuristic code cond ifso ifnot preferred = DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot preferred
-
-let do_return_heuristic code ifso ifnot is_loop_header preferred =
- let predicate n = (function
- | Ireturn _ -> true
- | _ -> false) @@ get_some @@ PTree.get n code
- in if (look_ahead code ifso is_loop_header predicate) then
- (preferred := false; raise HeuristicSucceeded)
- else if (look_ahead code ifnot is_loop_header predicate) then
- (preferred := true; raise HeuristicSucceeded)
- else ()
-
-let do_store_heuristic code ifso ifnot is_loop_header preferred =
- let predicate n = (function
- | Istore _ -> true
- | _ -> false) @@ get_some @@ PTree.get n code
- in if (look_ahead code ifso is_loop_header predicate) then
- (preferred := false; raise HeuristicSucceeded)
- else if (look_ahead code ifnot is_loop_header predicate) then
- (preferred := true; raise HeuristicSucceeded)
- else ()
-
-let do_loop_heuristic code ifso ifnot is_loop_header preferred =
- let predicate n = get_some @@ PTree.get n is_loop_header
- in if (look_ahead code ifso is_loop_header predicate) then
- (preferred := true; raise HeuristicSucceeded)
- else if (look_ahead code ifnot is_loop_header predicate) then
- (preferred := false; raise HeuristicSucceeded)
- else ()
-
-let get_directions code entrypoint =
- let bfs_order = bfs code entrypoint
- and is_loop_header = get_loop_headers code entrypoint
- and directions = ref (PTree.map (fun n i -> false) code) (* false <=> fallthru *)
+ else match (rtl_successors @@ get_some @@ PTree.get node code) with
+ | [n] -> if (predicate n) then true
+ else (
+ if (get_some @@ PTree.get n is_loop_header) then false
+ else look_ahead code n is_loop_header predicate
+ )
+ | _ -> false
+
+let do_call_heuristic code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tCall heuristic..\n";
+ let predicate n = (function
+ | Icall _ -> true
+ | _ -> false) @@ get_some @@ PTree.get n code
+ in let ifso_call = look_ahead code ifso is_loop_header predicate
+ in let ifnot_call = look_ahead code ifnot is_loop_header predicate
+ in if ifso_call && ifnot_call then None
+ else if ifso_call then Some false
+ else if ifnot_call then Some true
+ else None
+ end
+
+let do_opcode_heuristic code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tOpcode heuristic..\n";
+ DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot is_loop_header
+ end
+
+let do_return_heuristic code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tReturn heuristic..\n";
+ let predicate n = (function
+ | Ireturn _ -> true
+ | _ -> false) @@ get_some @@ PTree.get n code
+ in let ifso_return = look_ahead code ifso is_loop_header predicate
+ in let ifnot_return = look_ahead code ifnot is_loop_header predicate
+ in if ifso_return && ifnot_return then None
+ else if ifso_return then Some false
+ else if ifnot_return then Some true
+ else None
+ end
+
+let do_store_heuristic code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tStore heuristic..\n";
+ let predicate n = (function
+ | Istore _ -> true
+ | _ -> false) @@ get_some @@ PTree.get n code
+ in let ifso_store = look_ahead code ifso is_loop_header predicate
+ in let ifnot_store = look_ahead code ifnot is_loop_header predicate
+ in if ifso_store && ifnot_store then None
+ else if ifso_store then Some false
+ else if ifnot_store then Some true
+ else None
+ end
+
+let do_loop_heuristic code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tLoop heuristic..\n";
+ let predicate n = get_some @@ PTree.get n is_loop_header in
+ let ifso_loop = look_ahead code ifso is_loop_header predicate in
+ let ifnot_loop = look_ahead code ifnot is_loop_header predicate in
+ if ifso_loop && ifnot_loop then None (* TODO - take the innermost loop ? *)
+ else if ifso_loop then Some true
+ else if ifnot_loop then Some false
+ else None
+ end
+
+let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tLoop2 heuristic..\n";
+ match get_some @@ PTree.get n loop_info with
+ | None -> None
+ | Some b -> Some b
+ end
+
+(* Returns a PTree of either None or Some b where b determines the node following the loop, for a cb instruction *)
+(* It uses the fact that loops in CompCert are done by a branch (backedge) instruction followed by a cb *)
+let get_loop_info is_loop_header bfs_order code =
+ let loop_info = ref (PTree.map (fun n i -> None) code) in
+ let mark_path s n =
+ let visited = ref (PTree.map (fun n i -> false) code) in
+ let rec explore src dest =
+ if (get_some @@ PTree.get src !visited) then false
+ else if src == dest then true
+ else begin
+ visited := PTree.set src true !visited;
+ match rtl_successors @@ get_some @@ PTree.get src code with
+ | [] -> false
+ | [s] -> explore s dest
+ | [s1; s2] -> (explore s1 dest) || (explore s2 dest)
+ | _ -> false
+ end
+ in let rec advance_to_cb src =
+ if (get_some @@ PTree.get src !visited) then None
+ else begin
+ visited := PTree.set src true !visited;
+ match get_some @@ PTree.get src code with
+ | Inop s | Iop (_, _, _, s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
+ | Ibuiltin (_,_,_,s) -> advance_to_cb s
+ | Icond _ -> Some src
+ | Ijumptable _ | Itailcall _ | Ireturn _ -> None
+ end
+ in begin
+ debug "Marking path from %d to %d\n" (P.to_int n) (P.to_int s);
+ match advance_to_cb s with
+ | None -> (debug "Nothing found\n")
+ | Some s -> ( debug "Advancing to %d\n" (P.to_int s);
+ match get_some @@ PTree.get s !loop_info with
+ | None | Some _ -> begin
+ match get_some @@ PTree.get s code with
+ | Icond (_, _, n1, n2, _) ->
+ let b1 = explore n1 n in
+ let b2 = explore n2 n in
+ if (b1 && b2) then (debug "both true\n")
+ else if b1 then (debug "true privileged\n"; loop_info := PTree.set s (Some true) !loop_info)
+ else if b2 then (debug "false privileged\n"; loop_info := PTree.set s (Some false) !loop_info)
+ else (debug "none true\n")
+ | _ -> ( debug "not an icond\n" )
+ end
+ (* | Some _ -> ( debug "already loop info there\n" ) FIXME - we don't know yet whether a branch to a loop head is a backedge or not *)
+ )
+ end
in begin
- Printf.printf "Loop headers: ";
- ptree_printbool is_loop_header;
- Printf.printf "\n";
+ List.iter (fun n ->
+ match get_some @@ PTree.get n code with
+ | Inop s | Iop (_,_,_,s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
+ | Ibuiltin (_, _, _, s) ->
+ if get_some @@ PTree.get s is_loop_header then mark_path s n
+ | Icond _ -> () (* loop backedges are never Icond in CompCert RTL.3 *)
+ | Ijumptable _ -> ()
+ | Itailcall _ | Ireturn _ -> ()
+ ) bfs_order;
+ !loop_info
+ end
+
+(* Remark - compared to the original paper, we don't use the store heuristic *)
+let get_directions code entrypoint = begin
+ debug "get_directions\n";
+ let bfs_order = bfs code entrypoint in
+ let is_loop_header = get_loop_headers code entrypoint in
+ let loop_info = get_loop_info is_loop_header bfs_order code in
+ let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *)
+ begin
+ (* ptree_printbool is_loop_header; *)
+ (* debug "\n"; *)
List.iter (fun n ->
match (get_some @@ PTree.get n code) with
- | Icond (cond, lr, ifso, ifnot) ->
- Printf.printf "Analyzing %d.." (P.to_int n);
- let preferred = ref false
- in (try
- Printf.printf " call..";
- do_call_heuristic code ifso ifnot is_loop_header preferred;
- Printf.printf " opcode..";
- do_opcode_heuristic code cond ifso ifnot preferred;
- Printf.printf " return..";
- do_return_heuristic code ifso ifnot is_loop_header preferred;
- Printf.printf " store..";
- do_store_heuristic code ifso ifnot is_loop_header preferred;
- Printf.printf " loop..";
- do_loop_heuristic code ifso ifnot is_loop_header preferred;
- Printf.printf "Random choice for %d\n" (P.to_int n);
- preferred := Random.bool ()
- with HeuristicSucceeded | DuplicateOpcodeHeuristic.HeuristicSucceeded
- -> Printf.printf " %s\n" (match !preferred with true -> "BRANCH"
- | false -> "FALLTHROUGH")
- ); directions := PTree.set n !preferred !directions
+ | Icond (cond, lr, ifso, ifnot, _) ->
+ (* debug "Analyzing %d.." (P.to_int n); *)
+ let heuristics = [ do_opcode_heuristic;
+ do_return_heuristic; do_loop2_heuristic loop_info n; do_loop_heuristic; do_call_heuristic;
+ (* do_store_heuristic *) ] in
+ let preferred = ref None in
+ begin
+ debug "Deciding condition for RTL node %d\n" (P.to_int n);
+ List.iter (fun do_heur ->
+ match !preferred with
+ | None -> preferred := do_heur code cond ifso ifnot is_loop_header
+ | Some _ -> ()
+ ) heuristics;
+ directions := PTree.set n !preferred !directions;
+ (match !preferred with | Some false -> debug "\tFALLTHROUGH\n"
+ | Some true -> debug "\tBRANCH\n"
+ | None -> debug "\tUNSURE\n");
+ debug "---------------------------------------\n"
+ end
| _ -> ()
) bfs_order;
!directions
end
+end
+
+let update_direction direction = function
+| Icond (cond, lr, n, n', _) -> Icond (cond, lr, n, n', direction)
+| i -> i
-let to_ttl_inst direction = function
-| Ireturn o -> Tleaf (Ireturn o)
-| Inop n -> Tnext (n, Inop n)
-| Iop (op, lr, r, n) -> Tnext (n, Iop(op, lr, r, n))
-| Iload (tm, m, a, lr, r, n) -> Tnext (n, Iload(tm, m, a, lr, r, n))
-| Istore (m, a, lr, r, n) -> Tnext (n, Istore(m, a, lr, r, n))
-| Icall (s, ri, lr, r, n) -> Tleaf (Icall(s, ri, lr, r, n))
-| Itailcall (s, ri, lr) -> Tleaf (Itailcall(s, ri, lr))
-| Ibuiltin (ef, lbr, br, n) -> Tleaf (Ibuiltin(ef, lbr, br, n))
-| Icond (cond, lr, n, n') -> (match direction with
- | false -> Tnext (n', Icond(cond, lr, n, n'))
- | true -> Tnext (n, Icond(cond, lr, n, n')))
-| Ijumptable (r, ln) -> Tleaf (Ijumptable(r, ln))
-
-let rec to_ttl_code_rec directions = function
+let rec update_direction_rec directions = function
| [] -> PTree.empty
| m::lm -> let (n, i) = m
in let direction = get_some @@ PTree.get n directions
- in PTree.set n (to_ttl_inst direction i) (to_ttl_code_rec directions lm)
+ in PTree.set n (update_direction direction i) (update_direction_rec directions lm)
-let to_ttl_code code entrypoint =
+(* Uses branch prediction to write prediction annotations in Icond *)
+let update_directions code entrypoint = begin
+ debug "Update_directions\n";
let directions = get_directions code entrypoint
in begin
- Printf.printf "Ifso directions: ";
+ (* debug "Ifso directions: ";
ptree_printbool directions;
- Printf.printf "\n";
- Random.init(0); (* using same seed to make it deterministic *)
- to_ttl_code_rec directions (PTree.elements code)
+ debug "\n"; *)
+ update_direction_rec directions (PTree.elements code)
end
+end
-(** Trace selection on TTL *)
+(** Trace selection *)
let rec exists_false_rec = function
| [] -> false
@@ -321,50 +355,29 @@ let rec exists_false_rec = function
let exists_false boolmap = exists_false_rec (PTree.elements boolmap)
-(* DFS on TTL to guide the exploration *)
-let dfs code entrypoint =
+(* DFS using prediction info to guide the exploration *)
+let dfs code entrypoint = begin
+ debug "dfs\n";
let visited = ref (PTree.map (fun n i -> false) code) in
let rec dfs_list code = function
| [] -> []
| node :: ln ->
- let node_dfs =
- if not (get_some @@ PTree.get node !visited) then begin
- visited := PTree.set node true !visited;
- match PTree.get node code with
- | None -> failwith "No such node"
- | Some ti -> [node] @ match ti with
- | Tleaf i -> (match i with
- | Icall(_, _, _, _, n) -> dfs_list code [n]
- | Ibuiltin(_, _, _, n) -> dfs_list code [n]
- | Ijumptable(_, ln) -> dfs_list code ln
- | Itailcall _ | Ireturn _ -> []
- | _ -> failwith "Tleaf case not handled in dfs" )
- | Tnext (n,i) -> (dfs_list code [n]) @ match i with
- | Icond (_, _, n1, n2) -> dfs_list code [n1; n2]
- | Inop _ | Iop _ | Iload _ | Istore _ -> []
- | _ -> failwith "Tnext case not handled in dfs"
- end
- else []
- in node_dfs @ (dfs_list code ln)
+ if get_some @@ PTree.get node !visited then dfs_list code ln
+ else begin
+ visited := PTree.set node true !visited;
+ let next_nodes = (match get_some @@ PTree.get node code with
+ | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n)
+ | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> [n]
+ | Ijumptable (_, ln) -> ln
+ | Itailcall _ | Ireturn _ -> []
+ | Icond (_, _, n1, n2, info) -> (match info with
+ | Some false -> [n2; n1]
+ | _ -> [n1; n2]
+ )
+ ) in node :: dfs_list code (next_nodes @ ln)
+ end
in dfs_list code [entrypoint]
-
-let get_predecessors_ttl code =
- let preds = ref (PTree.map (fun n i -> []) code) in
- let process_inst (node, ti) = match ti with
- | Tleaf _ -> ()
- | Tnext (_, i) -> let succ = match i with
- | Inop n | Iop (_,_,_,n) | Iload (_, _,_,_,_,n) | Istore (_,_,_,_,n)
- | Icall (_,_,_,_,n) | Ibuiltin (_, _, _, n) -> [n]
- | Icond (_,_,n1,n2) -> [n1;n2]
- | Ijumptable (_,ln) -> ln
- | _ -> []
- in List.iter (fun s -> preds := PTree.set s (node::(get_some @@ PTree.get s !preds)) !preds) succ
- in begin
- List.iter process_inst (PTree.elements code);
- !preds
- end
-
-let rtl_proj code = PTree.map (fun n ti -> match ti with Tleaf i | Tnext(_, i) -> i) code
+end
let rec select_unvisited_node is_visited = function
| [] -> failwith "Empty list"
@@ -373,24 +386,87 @@ let rec select_unvisited_node is_visited = function
let best_successor_of node code is_visited =
match (PTree.get node code) with
| None -> failwith "No such node in the code"
- | Some ti -> match ti with
- | Tleaf _ -> None
- | Tnext (n,_) -> if not (ptree_get_some n is_visited) then Some n
- else None
-
-let best_predecessor_of node predecessors order is_visited =
+ | Some i ->
+ let next_node = match i with
+ | Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore(_,_,_,_,n)
+ | Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> Some n
+ | Icond (_, _, n1, n2, ob) -> (match ob with None -> None | Some false -> Some n2 | Some true -> Some n1)
+ | _ -> None
+ in match next_node with
+ | None -> None
+ | Some n -> if not (ptree_get_some n is_visited) then Some n else None
+
+(* FIXME - could be improved by selecting in priority the predicted paths *)
+let best_predecessor_of node predecessors code order is_visited =
match (PTree.get node predecessors) with
| None -> failwith "No predecessor list found"
- | Some lp -> try Some (List.find (fun n -> (List.mem n lp) && (not (ptree_get_some n is_visited))) order)
- with Not_found -> None
+ | Some lp ->
+ try Some (List.find (fun n ->
+ if (List.mem n lp) && (not (ptree_get_some n is_visited)) then
+ match ptree_get_some n code with
+ | Icond (_, _, n1, n2, ob) -> (match ob with
+ | None -> false
+ | Some false -> n == n2
+ | Some true -> n == n1
+ )
+ | _ -> true
+ else false
+ ) order)
+ with Not_found -> None
+
+let print_trace t = print_intlist t
+
+let print_traces traces =
+ let rec f = function
+ | [] -> ()
+ | t::lt -> Printf.printf "\n\t"; print_trace t; Printf.printf ",\n"; f lt
+ in begin
+ if !debug_flag then begin
+ Printf.printf "Traces: {";
+ f traces;
+ Printf.printf "}\n";
+ end
+ end
+
+(* Dumb (but linear) trace selection *)
+let select_traces_linear code entrypoint =
+ let is_visited = ref (PTree.map (fun n i -> false) code) in
+ let bfs_order = bfs code entrypoint in
+ let rec go_through node = begin
+ is_visited := PTree.set node true !is_visited;
+ let next_node = match (get_some @@ PTree.get node code) with
+ | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n)
+ | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> Some n
+ | Ijumptable _ | Itailcall _ | Ireturn _ -> None
+ | Icond (_, _, n1, n2, info) -> (match info with
+ | Some false -> Some n2
+ | Some true -> Some n1
+ | None -> None
+ )
+ in match next_node with
+ | None -> [node]
+ | Some n ->
+ if not (get_some @@ PTree.get n !is_visited) then node :: go_through n
+ else [node]
+ end
+ in let traces = ref [] in begin
+ List.iter (fun n ->
+ if not (get_some @@ PTree.get n !is_visited) then
+ traces := (go_through n) :: !traces
+ ) bfs_order;
+ !traces
+ end
+
(* Algorithm mostly inspired from Chang and Hwu 1988
* "Trace Selection for Compiling Large C Application Programs to Microcode" *)
-let select_traces code entrypoint =
+let select_traces_chang code entrypoint = begin
+ debug "select_traces\n";
let order = dfs code entrypoint in
- let predecessors = get_predecessors_ttl code in
+ let predecessors = get_predecessors_rtl code in
let traces = ref [] in
let is_visited = ref (PTree.map (fun n i -> false) code) in begin (* mark all nodes visited *)
+ debug "Length: %d\n" (List.length order);
while exists_false !is_visited do (* while (there are unvisited nodes) *)
let seed = select_unvisited_node !is_visited order in
let trace = ref [seed] in
@@ -410,7 +486,7 @@ let select_traces code entrypoint =
current := seed;
quit_loop := false;
while not !quit_loop do
- let s = best_predecessor_of !current predecessors order !is_visited in
+ let s = best_predecessor_of !current predecessors code order !is_visited in
match s with
| None -> quit_loop := true (* if (s==0) exit loop *)
| Some pred -> begin
@@ -423,21 +499,16 @@ let select_traces code entrypoint =
end
end
done;
- Printf.printf "DFS: \t"; print_intlist order; Printf.printf "\n";
+ (* debug "DFS: \t"; print_intlist order; debug "\n"; *)
+ debug "Traces: "; print_traces !traces;
!traces
end
+end
-let print_trace t = print_intlist t
-
-let print_traces traces =
- let rec f = function
- | [] -> ()
- | t::lt -> Printf.printf "\n\t"; print_trace t; Printf.printf ",\n"; f lt
- in begin
- Printf.printf "Traces: {";
- f traces;
- Printf.printf "}\n";
- end
+let select_traces code entrypoint =
+ let length = List.length @@ PTree.elements code in
+ if (length < 5000) then select_traces_chang code entrypoint
+ else select_traces_linear code entrypoint
let rec make_identity_ptree_rec = function
| [] -> PTree.empty
@@ -454,10 +525,10 @@ let rec change_pointers code n n' = function
| Ibuiltin(a, b, c, n0) -> assert (n0 == n); Ibuiltin(a, b, c, n')
| Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e == n) ln);
Ijumptable(a, List.map (fun e -> if (e == n) then n' else e) ln)
- | Icond(a, b, n1, n2) -> assert (n1 == n || n2 == n);
+ | Icond(a, b, n1, n2, i) -> assert (n1 == n || n2 == n);
let n1' = if (n1 == n) then n' else n1
in let n2' = if (n2 == n) then n' else n2
- in Icond(a, b, n1', n2')
+ in Icond(a, b, n1', n2', i)
| Inop n0 -> assert (n0 == n); Inop n'
| Iop (a, b, c, n0) -> assert (n0 == n); Iop (a, b, c, n')
| Iload (a, b, c, d, e, n0) -> assert (n0 == n); Iload (a, b, c, d, e, n')
@@ -471,7 +542,7 @@ let rec change_pointers code n n' = function
* n': the integer which should contain the duplicate of n
* returns: new code, new ptree *)
let duplicate code ptree parent n preds n' =
- Printf.printf "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n');
+ debug "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n');
match PTree.get n' code with
| Some _ -> failwith "The PTree already has a node n'"
| None ->
@@ -529,30 +600,30 @@ let tail_duplicate code preds ptree trace =
in (new_code, new_ptree, !nb_duplicated)
let superblockify_traces code preds traces =
- let max_nb_duplicated = 1 (* FIXME - should be architecture dependent *)
+ let max_nb_duplicated = !Clflags.option_fduplicate (* FIXME - should be architecture dependent *)
in let ptree = make_identity_ptree code
in let rec f code ptree = function
| [] -> (code, ptree, 0)
| trace :: traces ->
let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace
- in if (nb_duplicated < max_nb_duplicated) then f new_code new_ptree traces
- else (Printf.printf "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
+ in if (nb_duplicated < max_nb_duplicated)
+ then (debug "End duplication\n"; f new_code new_ptree traces)
+ else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
in let new_code, new_ptree, _ = f code ptree traces
in (new_code, new_ptree)
let rec invert_iconds_trace code = function
| [] -> code
- | n::[] -> code
- | n :: n' :: t ->
+ | n :: ln ->
let code' = match ptree_get_some n code with
- | Icond (c, lr, ifso, ifnot) ->
- assert (n' == ifso || n' == ifnot);
- if (n' == ifso) then (
- Printf.printf "Reversing ifso/ifnot for node %d\n" (P.to_int n);
- PTree.set n (Icond (Op.negate_condition c, lr, ifnot, ifso)) code )
- else code
+ | Icond (c, lr, ifso, ifnot, info) -> (match info with
+ | Some true -> begin
+ (* debug "Reversing ifso/ifnot for node %d\n" (P.to_int n); *)
+ PTree.set n (Icond (Op.negate_condition c, lr, ifnot, ifso, Some false)) code
+ end
+ | _ -> code)
| _ -> code
- in invert_iconds_trace code' (n'::t)
+ in invert_iconds_trace code' ln
let rec invert_iconds code = function
| [] -> code
@@ -561,12 +632,17 @@ let rec invert_iconds code = function
else code
in invert_iconds code' ts
-(* For now, identity function *)
let duplicate_aux f =
let entrypoint = f.fn_entrypoint in
- let code = f.fn_code in
- let traces = select_traces (to_ttl_code code entrypoint) entrypoint in
- let icond_code = invert_iconds code traces in
- let preds = get_predecessors_rtl icond_code in
- let (new_code, pTreeId) = (print_traces traces; superblockify_traces icond_code preds traces) in
- ((new_code, f.fn_entrypoint), pTreeId)
+ if !Clflags.option_fduplicate < 0 then
+ ((f.fn_code, entrypoint), make_identity_ptree f.fn_code)
+ else
+ let code = update_directions (f.fn_code) entrypoint in
+ let traces = select_traces code entrypoint in
+ let icond_code = invert_iconds code traces in
+ let preds = get_predecessors_rtl icond_code in
+ if !Clflags.option_fduplicate >= 1 then
+ let (new_code, pTreeId) = ((* print_traces traces; *) superblockify_traces icond_code preds traces) in
+ ((new_code, f.fn_entrypoint), pTreeId)
+ else
+ ((icond_code, entrypoint), make_identity_ptree code)
diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v
index a8e9b16b..6b598dc7 100644
--- a/backend/Duplicateproof.v
+++ b/backend/Duplicateproof.v
@@ -23,12 +23,12 @@ Inductive match_inst (dupmap: PTree.t node): instruction -> instruction -> Prop
match_inst dupmap (Itailcall s ri lr) (Itailcall s ri lr)
| match_inst_builtin: forall n n' ef la br,
dupmap!n' = (Some n) -> match_inst dupmap (Ibuiltin ef la br n) (Ibuiltin ef la br n')
- | match_inst_cond: forall ifso ifso' ifnot ifnot' c lr,
+ | match_inst_cond: forall ifso ifso' ifnot ifnot' c lr i i',
dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) ->
- match_inst dupmap (Icond c lr ifso ifnot) (Icond c lr ifso' ifnot')
- | match_inst_revcond: forall ifso ifso' ifnot ifnot' c lr,
+ match_inst dupmap (Icond c lr ifso ifnot i) (Icond c lr ifso' ifnot' i')
+ | match_inst_revcond: forall ifso ifso' ifnot ifnot' c lr i i',
dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) ->
- match_inst dupmap (Icond c lr ifso ifnot) (Icond (negate_condition c) lr ifnot' ifso')
+ match_inst dupmap (Icond c lr ifso ifnot i) (Icond (negate_condition c) lr ifnot' ifso' i')
| match_inst_jumptable: forall ln ln' r,
list_forall2 (fun n n' => (dupmap!n' = (Some n))) ln ln' ->
match_inst dupmap (Ijumptable r ln) (Ijumptable r ln')
@@ -378,7 +378,7 @@ Theorem step_simulation:
step tge s2 t s2'
/\ match_states s1' s2'.
Proof.
- Local Hint Resolve transf_fundef_correct.
+ Local Hint Resolve transf_fundef_correct: core.
induction 1; intros; inv MS.
(* Inop *)
- eapply dupmap_correct in DUPLIC; eauto.
diff --git a/backend/ForwardMoves.v b/backend/ForwardMoves.v
index c73b0213..7cfd411f 100644
--- a/backend/ForwardMoves.v
+++ b/backend/ForwardMoves.v
@@ -250,7 +250,7 @@ Fixpoint kill_builtin_res (res : builtin_res reg) (rel : RELATION.t) :=
Definition apply_instr instr x :=
match instr with
| Inop _
- | Icond _ _ _ _
+ | Icond _ _ _ _ _
| Ijumptable _ _
| Istore _ _ _ _ _ => Some x
| Iop Omove (src :: nil) dst _ => Some (move src dst x)
@@ -309,8 +309,8 @@ Definition transf_instr (fmap : option (PMap.t RB.t))
Icall sig ros (subst_args fmap pc args) dst s
| Itailcall sig ros args =>
Itailcall sig ros (subst_args fmap pc args)
- | Icond cond args s1 s2 =>
- Icond cond (subst_args fmap pc args) s1 s2
+ | Icond cond args s1 s2 i =>
+ Icond cond (subst_args fmap pc args) s1 s2 i
| Ijumptable arg tbl =>
Ijumptable (subst_arg fmap pc arg) tbl
| Ireturn (Some arg) =>
diff --git a/backend/IRC.ml b/backend/IRC.ml
index 67da47da..785b0a2d 100644
--- a/backend/IRC.ml
+++ b/backend/IRC.ml
@@ -238,7 +238,6 @@ type graph = {
according to their types. A variable can be forced into class 2
by giving it a negative spill cost. *)
-
let class_of_reg r =
if Conventions1.is_float_reg r then 1 else 0
diff --git a/backend/Inlining.v b/backend/Inlining.v
index 9cf535b9..8c7e1898 100644
--- a/backend/Inlining.v
+++ b/backend/Inlining.v
@@ -397,9 +397,9 @@ Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit :=
| Ibuiltin ef args res s =>
set_instr (spc ctx pc)
(Ibuiltin ef (map (sbuiltinarg ctx) args) (sbuiltinres ctx res) (spc ctx s))
- | Icond cond args s1 s2 =>
+ | Icond cond args s1 s2 info =>
set_instr (spc ctx pc)
- (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2))
+ (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2) info)
| Ijumptable r tbl =>
set_instr (spc ctx pc)
(Ijumptable (sreg ctx r) (List.map (spc ctx) tbl))
diff --git a/backend/Inliningaux.ml b/backend/Inliningaux.ml
index d58704ca..cf308962 100644
--- a/backend/Inliningaux.ml
+++ b/backend/Inliningaux.ml
@@ -17,7 +17,8 @@ open Maps
open Op
open Ordered
open! RTL
-
+open Camlcoq
+
module PSet = Make(OrderedPositive)
type inlining_info = {
@@ -83,13 +84,15 @@ let static_called_once id io =
else
false
-(* To be considered: heuristics based on size of function? *)
+(* D. Monniaux: attempt at heuristic based on size *)
+let small_enough (f : coq_function) =
+ P.to_int (RTL.max_pc_function f) <= !Clflags.option_inline_auto_threshold
let should_inline (io: inlining_info) (id: ident) (f: coq_function) =
if !Clflags.option_finline then begin
match C2C.atom_inline id with
| C2C.Inline -> true
| C2C.Noinline -> false
- | C2C.No_specifier -> static_called_once id io
+ | C2C.No_specifier -> static_called_once id io || small_enough f
end else
false
diff --git a/backend/Inliningspec.v b/backend/Inliningspec.v
index e20fb373..eba026ec 100644
--- a/backend/Inliningspec.v
+++ b/backend/Inliningspec.v
@@ -312,9 +312,9 @@ Inductive tr_instr: context -> node -> instruction -> code -> Prop :=
match res with BR r => Ple r ctx.(mreg) | _ => True end ->
c!(spc ctx pc) = Some (Ibuiltin ef (map (sbuiltinarg ctx) args) (sbuiltinres ctx res) (spc ctx s)) ->
tr_instr ctx pc (Ibuiltin ef args res s) c
- | tr_cond: forall ctx pc cond args s1 s2 c,
- c!(spc ctx pc) = Some (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2)) ->
- tr_instr ctx pc (Icond cond args s1 s2) c
+ | tr_cond: forall ctx pc cond args s1 s2 c i,
+ c!(spc ctx pc) = Some (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2) i) ->
+ tr_instr ctx pc (Icond cond args s1 s2 i) c
| tr_jumptable: forall ctx pc r tbl c,
c!(spc ctx pc) = Some (Ijumptable (sreg ctx r) (List.map (spc ctx) tbl)) ->
tr_instr ctx pc (Ijumptable r tbl) c
diff --git a/backend/LTL.v b/backend/LTL.v
index ee8b4826..3edd60a2 100644
--- a/backend/LTL.v
+++ b/backend/LTL.v
@@ -37,7 +37,7 @@ Inductive instruction: Type :=
| Ltailcall (sg: signature) (ros: mreg + ident)
| Lbuiltin (ef: external_function) (args: list (builtin_arg loc)) (res: builtin_res mreg)
| Lbranch (s: node)
- | Lcond (cond: condition) (args: list mreg) (s1 s2: node)
+ | Lcond (cond: condition) (args: list mreg) (s1 s2: node) (info: option bool)
| Ljumptable (arg: mreg) (tbl: list node)
| Lreturn.
@@ -263,11 +263,11 @@ Inductive step: state -> trace -> state -> Prop :=
| exec_Lbranch: forall s f sp pc bb rs m,
step (Block s f sp (Lbranch pc :: bb) rs m)
E0 (State s f sp pc rs m)
- | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m,
+ | exec_Lcond: forall s f sp cond args pc1 pc2 bb rs b pc rs' m i,
eval_condition cond (reglist rs args) m = Some b ->
pc = (if b then pc1 else pc2) ->
rs' = undef_regs (destroyed_by_cond cond) rs ->
- step (Block s f sp (Lcond cond args pc1 pc2 :: bb) rs m)
+ step (Block s f sp (Lcond cond args pc1 pc2 i :: bb) rs m)
E0 (State s f sp pc rs' m)
| exec_Ljumptable: forall s f sp arg tbl bb rs m n pc rs',
rs (R arg) = Vint n ->
@@ -328,7 +328,7 @@ Fixpoint successors_block (b: bblock) : list node :=
| nil => nil (**r should never happen *)
| Ltailcall _ _ :: _ => nil
| Lbranch s :: _ => s :: nil
- | Lcond _ _ s1 s2 :: _ => s1 :: s2 :: nil
+ | Lcond _ _ s1 s2 _ :: _ => s1 :: s2 :: nil
| Ljumptable _ tbl :: _ => tbl
| Lreturn :: _ => nil
| instr :: b' => successors_block b'
diff --git a/backend/Linearize.v b/backend/Linearize.v
index 4216958c..66b36428 100644
--- a/backend/Linearize.v
+++ b/backend/Linearize.v
@@ -179,7 +179,7 @@ Fixpoint linearize_block (b: LTL.bblock) (k: code) : code :=
Lbuiltin ef args res :: linearize_block b' k
| LTL.Lbranch s :: b' =>
add_branch s k
- | LTL.Lcond cond args s1 s2 :: b' =>
+ | LTL.Lcond cond args s1 s2 _ :: b' =>
if starts_with s1 k then
Lcond (negate_condition cond) args s2 :: add_branch s1 k
else
diff --git a/backend/Linearizeaux.ml b/backend/Linearizeaux.ml
index a6964233..1381877b 100644
--- a/backend/Linearizeaux.ml
+++ b/backend/Linearizeaux.ml
@@ -13,6 +13,12 @@
open LTL
open Maps
+let debug_flag = ref false
+
+let debug fmt =
+ if !debug_flag then Printf.eprintf fmt
+ else Printf.ifprintf stderr fmt
+
(* Trivial enumeration, in decreasing order of PC *)
(***
@@ -81,7 +87,7 @@ let basic_blocks f joins =
| [] -> assert false
| Lbranch s :: _ -> next_in_block blk minpc s
| Ltailcall (sig0, ros) :: _ -> end_block blk minpc
- | Lcond (cond, args, ifso, ifnot) :: _ ->
+ | Lcond (cond, args, ifso, ifnot, _) :: _ ->
end_block blk minpc; start_block ifso; start_block ifnot
| Ljumptable(arg, tbl) :: _ ->
end_block blk minpc; List.iter start_block tbl
@@ -115,14 +121,11 @@ let enumerate_aux_flat f reach =
flatten_blocks (basic_blocks f (join_points f))
(**
- * Enumeration based on traces as identified by Duplicate.v
- *
- * The Duplicate phase heuristically identifies the most frequented paths. Each
- * Icond is modified so that the preferred condition is a fallthrough (ifnot)
- * rather than a branch (ifso).
+ * Alternate enumeration based on traces as identified by Duplicate.v
*
- * The enumeration below takes advantage of this - preferring to layout nodes
- * following the fallthroughs of the Lcond branches
+ * This is a slight alteration to the above heuristic, ensuring that any
+ * superblock will be contiguous in memory, while still following the original
+ * heuristic
*)
let get_some = function
@@ -136,29 +139,385 @@ let rec last_element = function
| e :: [] -> e
| e' :: e :: l -> last_element (e::l)
-let dfs code entrypoint =
+let print_plist l =
+ let rec f = function
+ | [] -> ()
+ | n :: l -> Printf.printf "%d, " (P.to_int n); f l
+ in begin
+ if !debug_flag then begin
+ Printf.printf "[";
+ f l;
+ Printf.printf "]"
+ end
+ end
+
+(* adapted from the above join_points function, but with PTree *)
+let get_join_points code entry =
+ let reached = ref (PTree.map (fun n i -> false) code) in
+ let reached_twice = ref (PTree.map (fun n i -> false) code) in
+ let rec traverse pc =
+ if get_some @@ PTree.get pc !reached then begin
+ if not (get_some @@ PTree.get pc !reached_twice) then
+ reached_twice := PTree.set pc true !reached_twice
+ end else begin
+ reached := PTree.set pc true !reached;
+ traverse_succs (successors_block @@ get_some @@ PTree.get pc code)
+ end
+ and traverse_succs = function
+ | [] -> ()
+ | [pc] -> traverse pc
+ | pc :: l -> traverse pc; traverse_succs l
+ in traverse entry; !reached_twice
+
+let forward_sequences code entry =
let visited = ref (PTree.map (fun n i -> false) code) in
- let rec dfs_list code = function
+ let join_points = get_join_points code entry in
+ (* returns the list of traversed nodes, and a list of nodes to start traversing next *)
+ let rec traverse_fallthrough code node =
+ (* debug "Traversing %d..\n" (P.to_int node); *)
+ if not (get_some @@ PTree.get node !visited) then begin
+ visited := PTree.set node true !visited;
+ match PTree.get node code with
+ | None -> failwith "No such node"
+ | Some bb ->
+ let ln, rem = match (last_element bb) with
+ | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+ | Lbuiltin _ -> assert false
+ | Ltailcall _ | Lreturn -> begin (* debug "STOP tailcall/return\n"; *) ([], []) end
+ | Lbranch n ->
+ if get_some @@ PTree.get n join_points then ([], [n])
+ else let ln, rem = traverse_fallthrough code n in (ln, rem)
+ | Lcond (_, _, ifso, ifnot, info) -> (match info with
+ | None -> begin (* debug "STOP Lcond None\n"; *) ([], [ifso; ifnot]) end
+ | Some false ->
+ if get_some @@ PTree.get ifnot join_points then ([], [ifso; ifnot])
+ else let ln, rem = traverse_fallthrough code ifnot in (ln, [ifso] @ rem)
+ | Some true ->
+ let errstr = Printf.sprintf ("Inconsistency detected in node %d: ifnot is not the preferred branch") (P.to_int node) in
+ failwith errstr)
+ | Ljumptable(_, ln) -> begin (* debug "STOP Ljumptable\n"; *) ([], ln) end
+ in ([node] @ ln, rem)
+ end
+ else ([], [])
+ in let rec f code = function
| [] -> []
| node :: ln ->
- let node_dfs =
- if not (get_some @@ PTree.get node !visited) then begin
- visited := PTree.set node true !visited;
- match PTree.get node code with
- | None -> failwith "No such node"
- | Some bb -> [node] @ match (last_element bb) with
- | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
- | Lbuiltin _ -> assert false
- | Ltailcall _ | Lreturn -> []
- | Lbranch n -> dfs_list code [n]
- | Lcond (_, _, ifso, ifnot) -> dfs_list code [ifnot; ifso]
- | Ljumptable(_, ln) -> dfs_list code ln
+ let fs, rem_from_node = traverse_fallthrough code node
+ in [fs] @ ((f code rem_from_node) @ (f code ln))
+ in (f code [entry])
+
+(** Unused code
+module PInt = struct
+ type t = P.t
+ let compare x y = compare (P.to_int x) (P.to_int y)
+end
+
+module PSet = Set.Make(PInt)
+
+module LPInt = struct
+ type t = P.t list
+ let rec compare x y =
+ match x with
+ | [] -> ( match y with
+ | [] -> 0
+ | _ -> 1 )
+ | e :: l -> match y with
+ | [] -> -1
+ | e' :: l' ->
+ let e_cmp = PInt.compare e e' in
+ if e_cmp == 0 then compare l l' else e_cmp
+end
+
+module LPSet = Set.Make(LPInt)
+
+let iter_lpset f s = Seq.iter f (LPSet.to_seq s)
+
+let first_of = function
+ | [] -> None
+ | e :: l -> Some e
+
+let rec last_of = function
+ | [] -> None
+ | e :: l -> (match l with [] -> Some e | e :: l -> last_of l)
+
+let can_be_merged code s s' =
+ let last_s = get_some @@ last_of s in
+ let first_s' = get_some @@ first_of s' in
+ match get_some @@ PTree.get last_s code with
+ | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+ | Lbuiltin _ | Ltailcall _ | Lreturn -> false
+ | Lbranch n -> n == first_s'
+ | Lcond (_, _, ifso, ifnot, info) -> (match info with
+ | None -> false
+ | Some false -> ifnot == first_s'
+ | Some true -> failwith "Inconsistency detected - ifnot is not the preferred branch")
+ | Ljumptable (_, ln) ->
+ match ln with
+ | [] -> false
+ | n :: ln -> n == first_s'
+
+let merge s s' = Some s
+
+let try_merge code (fs: (BinNums.positive list) list) =
+ let seqs = ref (LPSet.of_list fs) in
+ let oldLength = ref (LPSet.cardinal !seqs) in
+ let continue = ref true in
+ let found = ref false in
+ while !continue do
+ begin
+ found := false;
+ iter_lpset (fun s ->
+ if !found then ()
+ else iter_lpset (fun s' ->
+ if (!found || s == s') then ()
+ else if (can_be_merged code s s') then
+ begin
+ seqs := LPSet.remove s !seqs;
+ seqs := LPSet.remove s' !seqs;
+ seqs := LPSet.add (get_some (merge s s')) !seqs;
+ found := true;
+ end
+ else ()
+ ) !seqs
+ ) !seqs;
+ if !oldLength == LPSet.cardinal !seqs then
+ continue := false
+ else
+ oldLength := LPSet.cardinal !seqs
+ end
+ done;
+ !seqs
+*)
+
+(** Code adapted from Duplicateaux.get_loop_headers
+ *
+ * Getting loop branches with a DFS visit :
+ * Each node is either Unvisited, Visited, or Processed
+ * pre-order: node becomes Processed
+ * post-order: node becomes Visited
+ *
+ * If we come accross an edge to a Processed node, it's a loop!
+ *)
+type pos = BinNums.positive
+
+module PP = struct
+ type t = pos * pos
+ let compare a b =
+ let ax, ay = a in
+ let bx, by = b in
+ let dx = compare ax bx in
+ if (dx == 0) then compare ay by
+ else dx
+end
+
+module PPMap = Map.Make(PP)
+
+type vstate = Unvisited | Processed | Visited
+
+let get_loop_edges code entry =
+ let visited = ref (PTree.map (fun n i -> Unvisited) code) in
+ let is_loop_edge = ref PPMap.empty
+ in let rec dfs_visit code from = function
+ | [] -> ()
+ | node :: ln ->
+ match (get_some @@ PTree.get node !visited) with
+ | Visited -> ()
+ | Processed -> begin
+ let from_node = get_some from in
+ is_loop_edge := PPMap.add (from_node, node) true !is_loop_edge;
+ visited := PTree.set node Visited !visited
+ end
+ | Unvisited -> begin
+ visited := PTree.set node Processed !visited;
+ let bb = get_some @@ PTree.get node code in
+ let next_visits = (match (last_element bb) with
+ | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+ | Lbuiltin _ -> assert false
+ | Ltailcall _ | Lreturn -> []
+ | Lbranch n -> [n]
+ | Lcond (_, _, ifso, ifnot, _) -> [ifso; ifnot]
+ | Ljumptable(_, ln) -> ln
+ ) in dfs_visit code (Some node) next_visits;
+ visited := PTree.set node Visited !visited;
+ dfs_visit code from ln
+ end
+ in begin
+ dfs_visit code None [entry];
+ !is_loop_edge
+ end
+
+let ppmap_is_true pp ppmap = PPMap.mem pp ppmap && PPMap.find pp ppmap
+
+module Int = struct
+ type t = int
+ let compare x y = compare x y
+end
+
+module ISet = Set.Make(Int)
+
+let print_iset s = begin
+ if !debug_flag then begin
+ Printf.printf "{";
+ ISet.iter (fun e -> Printf.printf "%d, " e) s;
+ Printf.printf "}"
+ end
+end
+
+let print_depmap dm = begin
+ if !debug_flag then begin
+ Printf.printf "[|";
+ Array.iter (fun s -> print_iset s; Printf.printf ", ") dm;
+ Printf.printf "|]\n"
+ end
+end
+
+let construct_depmap code entry fs =
+ let is_loop_edge = get_loop_edges code entry in
+ let visited = ref (PTree.map (fun n i -> false) code) in
+ let depmap = Array.map (fun e -> ISet.empty) fs in
+ let find_index_of_node n =
+ let index = ref 0 in
+ begin
+ Array.iteri (fun i s ->
+ match List.find_opt (fun e -> e == n) s with
+ | Some _ -> index := i
+ | None -> ()
+ ) fs;
+ !index
+ end
+ in let check_and_update_depmap from target =
+ (* debug "From %d to %d\n" (P.to_int from) (P.to_int target); *)
+ if not (ppmap_is_true (from, target) is_loop_edge) then
+ let in_index_fs = find_index_of_node from in
+ let out_index_fs = find_index_of_node target in
+ if out_index_fs != in_index_fs then
+ depmap.(out_index_fs) <- ISet.add in_index_fs depmap.(out_index_fs)
+ else ()
+ else ()
+ in let rec dfs_visit code = function
+ | [] -> ()
+ | node :: ln ->
+ begin
+ match (get_some @@ PTree.get node !visited) with
+ | true -> ()
+ | false -> begin
+ visited := PTree.set node true !visited;
+ let bb = get_some @@ PTree.get node code in
+ let next_visits =
+ match (last_element bb) with
+ | Ltailcall _ | Lreturn -> []
+ | Lbranch n -> (check_and_update_depmap node n; [n])
+ | Lcond (_, _, ifso, ifnot, _) -> begin
+ check_and_update_depmap node ifso;
+ check_and_update_depmap node ifnot;
+ [ifso; ifnot]
+ end
+ | Ljumptable(_, ln) -> begin
+ List.iter (fun n -> check_and_update_depmap node n) ln;
+ ln
+ end
+ (* end of bblocks should not be another value than one of the above *)
+ | _ -> failwith "last_element gave an invalid output"
+ in dfs_visit code next_visits
+ end;
+ dfs_visit code ln
+ end
+ in begin
+ dfs_visit code [entry];
+ depmap
+ end
+
+let print_sequence s =
+ if !debug_flag then begin
+ Printf.printf "[";
+ List.iter (fun n -> Printf.printf "%d, " (P.to_int n)) s;
+ Printf.printf "]\n"
+ end
+
+let print_ssequence ofs =
+ if !debug_flag then begin
+ Printf.printf "[";
+ List.iter (fun s -> print_sequence s) ofs;
+ Printf.printf "]\n"
+ end
+
+let order_sequences code entry fs =
+ let fs_a = Array.of_list fs in
+ let depmap = construct_depmap code entry fs_a in
+ let fs_evaluated = Array.map (fun e -> false) fs_a in
+ let ordered_fs = ref [] in
+ let evaluate s_id =
+ begin
+ assert (not fs_evaluated.(s_id));
+ ordered_fs := fs_a.(s_id) :: !ordered_fs;
+ fs_evaluated.(s_id) <- true;
+ (* debug "++++++\n";
+ debug "Scheduling %d\n" s_id;
+ debug "Initial depmap: "; print_depmap depmap; *)
+ Array.iteri (fun i deps ->
+ depmap.(i) <- ISet.remove s_id deps
+ ) depmap;
+ (* debug "Final depmap: "; print_depmap depmap; *)
+ end
+ in let choose_best_of candidates =
+ let current_best_id = ref None in
+ let current_best_score = ref None in
+ begin
+ List.iter (fun id ->
+ match !current_best_id with
+ | None -> begin
+ current_best_id := Some id;
+ match fs_a.(id) with
+ | [] -> current_best_score := None
+ | n::l -> current_best_score := Some (P.to_int n)
+ end
+ | Some b -> begin
+ match fs_a.(id) with
+ | [] -> ()
+ | n::l -> let nscore = P.to_int n in
+ match !current_best_score with
+ | None -> (current_best_id := Some id; current_best_score := Some nscore)
+ | Some bs -> if nscore > bs then (current_best_id := Some id; current_best_score := Some nscore)
end
- else []
- in node_dfs @ (dfs_list code ln)
- in dfs_list code [entrypoint]
+ ) candidates;
+ !current_best_id
+ end
+ in let select_next () =
+ let candidates = ref [] in
+ begin
+ Array.iteri (fun i deps ->
+ begin
+ (* debug "Deps of %d: " i; print_iset deps; debug "\n"; *)
+ (* FIXME - if we keep it that way (no dependency check), remove all the unneeded stuff *)
+ if ((* deps == ISet.empty && *) not fs_evaluated.(i)) then
+ candidates := i :: !candidates
+ end
+ ) depmap;
+ if not (List.length !candidates > 0) then begin
+ Array.iteri (fun i deps ->
+ if (not fs_evaluated.(i)) then candidates := i :: !candidates
+ ) depmap;
+ end;
+ get_some (choose_best_of !candidates)
+ end
+ in begin
+ debug "-------------------------------\n";
+ debug "depmap: "; print_depmap depmap;
+ debug "forward sequences identified: "; print_ssequence fs;
+ while List.length !ordered_fs != List.length fs do
+ let next_id = select_next () in
+ evaluate next_id
+ done;
+ debug "forward sequences ordered: "; print_ssequence (List.rev (!ordered_fs));
+ List.rev (!ordered_fs)
+ end
-let enumerate_aux_trace f reach = dfs f.fn_code f.fn_entrypoint
+let enumerate_aux_trace f reach =
+ let code = f.fn_code in
+ let entry = f.fn_entrypoint in
+ let fs = forward_sequences code entry in
+ let ofs = order_sequences code entry fs in
+ List.flatten ofs
let enumerate_aux f reach =
if !Clflags.option_ftracelinearize then enumerate_aux_trace f reach
diff --git a/backend/Liveness.v b/backend/Liveness.v
index afe11ae6..9652b363 100644
--- a/backend/Liveness.v
+++ b/backend/Liveness.v
@@ -94,7 +94,7 @@ Definition transfer
| Ibuiltin ef args res s =>
reg_list_live (params_of_builtin_args args)
(reg_list_dead (params_of_builtin_res res) after)
- | Icond cond args ifso ifnot =>
+ | Icond cond args ifso ifnot _ =>
reg_list_live args after
| Ijumptable arg tbl =>
reg_live arg after
diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml
index b309a9f2..d8f2ac12 100644
--- a/backend/PrintLTL.ml
+++ b/backend/PrintLTL.ml
@@ -83,10 +83,11 @@ let print_instruction pp succ = function
(print_builtin_args loc) args
| Lbranch s ->
print_succ pp s succ
- | Lcond(cond, args, s1, s2) ->
- fprintf pp "if (%a) goto %d else goto %d"
+ | Lcond(cond, args, s1, s2, info) ->
+ fprintf pp "if (%a) goto %d else goto %d (prediction: %s)"
(print_condition mreg) (cond, args)
(P.to_int s1) (P.to_int s2)
+ (match info with None -> "none" | Some true -> "branch" | Some false -> "fallthrough")
| Ljumptable(arg, tbl) ->
let tbl = Array.of_list tbl in
fprintf pp "jumptable (%a)" mreg arg;
diff --git a/backend/PrintRTL.ml b/backend/PrintRTL.ml
index c25773e5..b2ef05ca 100644
--- a/backend/PrintRTL.ml
+++ b/backend/PrintRTL.ml
@@ -75,10 +75,11 @@ let print_instruction pp (pc, i) =
(name_of_external ef)
(print_builtin_args reg) args;
print_succ pp s (pc - 1)
- | Icond(cond, args, s1, s2) ->
- fprintf pp "if (%a) goto %d else goto %d\n"
+ | Icond(cond, args, s1, s2, info) ->
+ fprintf pp "if (%a) goto %d else goto %d (prediction: %s)\n"
(PrintOp.print_condition reg) (cond, args)
(P.to_int s1) (P.to_int s2)
+ (match info with None -> "none" | Some true -> "branch" | Some false -> "fallthrough")
| Ijumptable(arg, tbl) ->
let tbl = Array.of_list tbl in
fprintf pp "jumptable (%a)\n" reg arg;
diff --git a/backend/PrintXTL.ml b/backend/PrintXTL.ml
index 1c7655fb..d1b79623 100644
--- a/backend/PrintXTL.ml
+++ b/backend/PrintXTL.ml
@@ -104,7 +104,7 @@ let print_instruction pp succ = function
(print_builtin_args var) args
| Xbranch s ->
print_succ pp s succ
- | Xcond(cond, args, s1, s2) ->
+ | Xcond(cond, args, s1, s2, _) ->
fprintf pp "if (%a) goto %d else goto %d"
(print_condition var) (cond, args)
(P.to_int s1) (P.to_int s2)
diff --git a/backend/RTL.v b/backend/RTL.v
index 29a49311..dec59ca2 100644
--- a/backend/RTL.v
+++ b/backend/RTL.v
@@ -67,11 +67,12 @@ Inductive instruction: Type :=
(** [Ibuiltin ef args dest succ] calls the built-in function
identified by [ef], giving it the values of [args] as arguments.
It stores the return value in [dest] and branches to [succ]. *)
- | Icond: condition -> list reg -> node -> node -> instruction
- (** [Icond cond args ifso ifnot] evaluates the boolean condition
+ | Icond: condition -> list reg -> node -> node -> option bool -> instruction
+ (** [Icond cond args ifso ifnot info] evaluates the boolean condition
[cond] over the values of registers [args]. If the condition
is true, it transitions to [ifso]. If the condition is false,
- it transitions to [ifnot]. *)
+ it transitions to [ifnot]. [info] is a ghost field there to provide
+ information relative to branch prediction. *)
| Ijumptable: reg -> list node -> instruction
(** [Ijumptable arg tbl] transitions to the node that is the [n]-th
element of the list [tbl], where [n] is the unsigned integer
@@ -262,8 +263,8 @@ Inductive step: state -> trace -> state -> Prop :=
step (State s f sp pc rs m)
t (State s f sp pc' (regmap_setres res vres rs) m')
| exec_Icond:
- forall s f sp pc rs m cond args ifso ifnot b pc',
- (fn_code f)!pc = Some(Icond cond args ifso ifnot) ->
+ forall s f sp pc rs m cond args ifso ifnot b pc' predb,
+ (fn_code f)!pc = Some(Icond cond args ifso ifnot predb) ->
eval_condition cond rs##args m = Some b ->
pc' = (if b then ifso else ifnot) ->
step (State s f sp pc rs m)
@@ -403,7 +404,7 @@ Definition successors_instr (i: instruction) : list node :=
| Icall sig ros args res s => s :: nil
| Itailcall sig ros args => nil
| Ibuiltin ef args res s => s :: nil
- | Icond cond args ifso ifnot => ifso :: ifnot :: nil
+ | Icond cond args ifso ifnot _ => ifso :: ifnot :: nil
| Ijumptable arg tbl => tbl
| Ireturn optarg => nil
end.
@@ -424,7 +425,7 @@ Definition instr_uses (i: instruction) : list reg :=
| Itailcall sig (inl r) args => r :: args
| Itailcall sig (inr id) args => args
| Ibuiltin ef args res s => params_of_builtin_args args
- | Icond cond args ifso ifnot => args
+ | Icond cond args ifso ifnot _ => args
| Ijumptable arg tbl => arg :: nil
| Ireturn None => nil
| Ireturn (Some arg) => arg :: nil
@@ -442,7 +443,7 @@ Definition instr_defs (i: instruction) : option reg :=
| Itailcall sig ros args => None
| Ibuiltin ef args res s =>
match res with BR r => Some r | _ => None end
- | Icond cond args ifso ifnot => None
+ | Icond cond args ifso ifnot _ => None
| Ijumptable arg tbl => None
| Ireturn optarg => None
end.
@@ -485,7 +486,7 @@ Definition max_reg_instr (m: positive) (pc: node) (i: instruction) :=
| Ibuiltin ef args res s =>
fold_left Pos.max (params_of_builtin_args args)
(fold_left Pos.max (params_of_builtin_res res) m)
- | Icond cond args ifso ifnot => fold_left Pos.max args m
+ | Icond cond args ifso ifnot _ => fold_left Pos.max args m
| Ijumptable arg tbl => Pos.max arg m
| Ireturn None => m
| Ireturn (Some arg) => Pos.max arg m
diff --git a/backend/RTLgen.v b/backend/RTLgen.v
index 2c27944a..ac98f3a1 100644
--- a/backend/RTLgen.v
+++ b/backend/RTLgen.v
@@ -479,7 +479,7 @@ with transl_condexpr (map: mapping) (a: condexpr) (ntrue nfalse: node)
match a with
| CEcond c al =>
do rl <- alloc_regs map al;
- do nt <- add_instr (Icond c rl ntrue nfalse);
+ do nt <- add_instr (Icond c rl ntrue nfalse None);
transl_exprlist map al rl nt
| CEcondition a b c =>
do nc <- transl_condexpr map c ntrue nfalse;
diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v
index 92b48e2b..30ad7d82 100644
--- a/backend/RTLgenspec.v
+++ b/backend/RTLgenspec.v
@@ -744,9 +744,9 @@ Inductive tr_expr (c: code):
with tr_condition (c: code):
mapping -> list reg -> condexpr -> node -> node -> node -> Prop :=
- | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl,
+ | tr_CEcond: forall map pr cond bl ns ntrue nfalse n1 rl i,
tr_exprlist c map pr bl ns n1 rl ->
- c!n1 = Some (Icond cond rl ntrue nfalse) ->
+ c!n1 = Some (Icond cond rl ntrue nfalse i) ->
tr_condition c map pr (CEcond cond bl) ns ntrue nfalse
| tr_CEcondition: forall map pr a1 a2 a3 ns ntrue nfalse n2 n3,
tr_condition c map pr a1 ns n2 n3 ->
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index 857f2211..15ed6d8a 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -139,11 +139,11 @@ Inductive wt_instr : instruction -> Prop :=
valid_successor s ->
wt_instr (Ibuiltin ef args res s)
| wt_Icond:
- forall cond args s1 s2,
+ forall cond args s1 s2 i,
map env args = type_of_condition cond ->
valid_successor s1 ->
valid_successor s2 ->
- wt_instr (Icond cond args s1 s2)
+ wt_instr (Icond cond args s1 s2 i)
| wt_Ijumptable:
forall arg tbl,
env arg = Tint ->
@@ -313,7 +313,7 @@ Definition type_instr (e: S.typenv) (i: instruction) : res S.typenv :=
| _ => type_builtin_args e args sig.(sig_args)
end;
type_builtin_res e1 res (proj_sig_res sig)
- | Icond cond args s1 s2 =>
+ | Icond cond args s1 s2 _ =>
do x1 <- check_successor s1;
do x2 <- check_successor s2;
S.set_list e args (type_of_condition cond)
diff --git a/backend/Regalloc.ml b/backend/Regalloc.ml
index f2658b04..ffe26933 100644
--- a/backend/Regalloc.ml
+++ b/backend/Regalloc.ml
@@ -295,8 +295,8 @@ let block_of_RTL_instr funsig tyenv = function
(Xbuiltin(ef, args2, res2) ::
movelist (params_of_builtin_res res2) (params_of_builtin_res res1)
[Xbranch s])
- | RTL.Icond(cond, args, s1, s2) ->
- [Xcond(cond, vregs tyenv args, s1, s2)]
+ | RTL.Icond(cond, args, s1, s2, i) ->
+ [Xcond(cond, vregs tyenv args, s1, s2, i)]
| RTL.Ijumptable(arg, tbl) ->
[Xjumptable(vreg tyenv arg, tbl)]
| RTL.Ireturn None ->
@@ -380,7 +380,7 @@ let live_before instr after =
vset_addargs args (vset_removeres res after)
| Xbranch s ->
after
- | Xcond(cond, args, s1, s2) ->
+ | Xcond(cond, args, s1, s2, _) ->
List.fold_right VSet.add args after
| Xjumptable(arg, tbl) ->
VSet.add arg after
@@ -575,7 +575,7 @@ let spill_costs f =
charge_list 10 1 (params_of_builtin_res res)
end
| Xbranch _ -> ()
- | Xcond(cond, args, _, _) ->
+ | Xcond(cond, args, _, _, _) ->
charge_list 10 1 args
| Xjumptable(arg, _) ->
charge 10 1 arg
@@ -718,7 +718,7 @@ let add_interfs_instr g instr live =
end
| Xbranch s ->
()
- | Xcond(cond, args, s1, s2) ->
+ | Xcond(cond, args, s1, s2, _) ->
add_interfs_destroyed g live (destroyed_by_cond cond)
| Xjumptable(arg, tbl) ->
add_interfs_destroyed g live destroyed_by_jumptable
@@ -797,7 +797,7 @@ let tospill_instr alloc instr ts =
(addlist_tospill alloc (params_of_builtin_res res) ts)
| Xbranch s ->
ts
- | Xcond(cond, args, s1, s2) ->
+ | Xcond(cond, args, s1, s2, _) ->
addlist_tospill alloc args ts
| Xjumptable(arg, tbl) ->
add_tospill alloc arg ts
@@ -990,9 +990,9 @@ let spill_instr tospill eqs instr =
(c1 @ Xbuiltin(ef, args', res') :: c2, eqs2)
| Xbranch s ->
([instr], eqs)
- | Xcond(cond, args, s1, s2) ->
+ | Xcond(cond, args, s1, s2, i) ->
let (args', c1, eqs1) = reload_vars tospill eqs args in
- (c1 @ [Xcond(cond, args', s1, s2)], eqs1)
+ (c1 @ [Xcond(cond, args', s1, s2, i)], eqs1)
| Xjumptable(arg, tbl) ->
let (arg', c1, eqs1) = reload_var tospill eqs arg in
(c1 @ [Xjumptable(arg', tbl)], eqs1)
@@ -1128,8 +1128,8 @@ let transl_instr alloc instr k =
AST.map_builtin_res (mreg_of alloc) res) :: k
| Xbranch s ->
LTL.Lbranch s :: []
- | Xcond(cond, args, s1, s2) ->
- LTL.Lcond(cond, mregs_of alloc args, s1, s2) :: []
+ | Xcond(cond, args, s1, s2, i) ->
+ LTL.Lcond(cond, mregs_of alloc args, s1, s2, i) :: []
| Xjumptable(arg, tbl) ->
LTL.Ljumptable(mreg_of alloc arg, tbl) :: []
| Xreturn optarg ->
diff --git a/backend/Renumber.v b/backend/Renumber.v
index 7ba16658..2727b979 100644
--- a/backend/Renumber.v
+++ b/backend/Renumber.v
@@ -48,7 +48,7 @@ Definition renum_instr (i: instruction) : instruction :=
| Icall sg ros args res s => Icall sg ros args res (renum_pc s)
| Itailcall sg ros args => i
| Ibuiltin ef args res s => Ibuiltin ef args res (renum_pc s)
- | Icond cond args s1 s2 => Icond cond args (renum_pc s1) (renum_pc s2)
+ | Icond cond args s1 s2 info => Icond cond args (renum_pc s1) (renum_pc s2) info
| Ijumptable arg tbl => Ijumptable arg (List.map renum_pc tbl)
| Ireturn or => i
end.
diff --git a/backend/Splitting.ml b/backend/Splitting.ml
index 78eb66a5..3ca45c3b 100644
--- a/backend/Splitting.ml
+++ b/backend/Splitting.ml
@@ -162,8 +162,8 @@ let ren_instr f maps pc i =
| Ibuiltin(ef, args, res, s) ->
Ibuiltin(ef, List.map (AST.map_builtin_arg (ren_reg before)) args,
AST.map_builtin_res (ren_reg after) res, s)
- | Icond(cond, args, s1, s2) ->
- Icond(cond, ren_regs before args, s1, s2)
+ | Icond(cond, args, s1, s2, i) ->
+ Icond(cond, ren_regs before args, s1, s2, i)
| Ijumptable(arg, tbl) ->
Ijumptable(ren_reg before arg, tbl)
| Ireturn optarg ->
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index da1ce45a..a4c4a195 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -78,11 +78,11 @@ Definition record_gotos (f: LTL.function) : U.t :=
Definition tunnel_instr (uf: U.t) (i: instruction) : instruction :=
match i with
| Lbranch s => Lbranch (U.repr uf s)
- | Lcond cond args s1 s2 =>
+ | Lcond cond args s1 s2 info =>
let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in
if peq s1' s2'
then Lbranch s1'
- else Lcond cond args s1' s2'
+ else Lcond cond args s1' s2' info
| Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl)
| _ => i
end.
diff --git a/backend/Unusedglob.v b/backend/Unusedglob.v
index 1b5f2547..93ca7af4 100644
--- a/backend/Unusedglob.v
+++ b/backend/Unusedglob.v
@@ -53,7 +53,7 @@ Definition ref_instruction (i: instruction) : list ident :=
| Itailcall _ (inl r) _ => nil
| Itailcall _ (inr id) _ => id :: nil
| Ibuiltin _ args _ _ => globals_of_builtin_args args
- | Icond cond _ _ _ => nil
+ | Icond cond _ _ _ _ => nil
| Ijumptable _ _ => nil
| Ireturn _ => nil
end.
diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v
index 9a33768c..2e79d1a9 100644
--- a/backend/ValueAnalysis.v
+++ b/backend/ValueAnalysis.v
@@ -156,7 +156,7 @@ Definition transfer (f: function) (rm: romem) (pc: node) (ae: aenv) (am: amem) :
VA.Bot
| Some(Ibuiltin ef args res s) =>
transfer_builtin ae am rm ef args res
- | Some(Icond cond args s1 s2) =>
+ | Some(Icond cond args s1 s2 _) =>
VA.State ae am
| Some(Ijumptable arg tbl) =>
VA.State ae am
@@ -1044,9 +1044,8 @@ Proof.
red; simpl; intros. destruct (plt b (Mem.nextblock m)).
exploit RO; eauto. intros (R & P & Q).
split; auto.
- split. apply bmatch_incr with bc; auto. apply bmatch_inv with m; auto.
- intros. eapply Mem.loadbytes_unchanged_on_1. eapply external_call_readonly; eauto.
- auto. intros; red. apply Q.
+ split. apply bmatch_incr with bc; auto. apply bmatch_ext with m; auto.
+ intros. eapply external_call_readonly with (m2 := m'); eauto.
intros; red; intros; elim (Q ofs).
eapply external_call_max_perm with (m2 := m'); eauto.
destruct (j' b); congruence.
diff --git a/backend/ValueDomain.v b/backend/ValueDomain.v
index c132ce7c..779e7bb9 100644
--- a/backend/ValueDomain.v
+++ b/backend/ValueDomain.v
@@ -3502,11 +3502,6 @@ Proof.
- omegaContradiction.
Qed.
-Lemma max_size_chunk: forall chunk, size_chunk chunk <= 8.
-Proof.
- destruct chunk; simpl; omega.
-Qed.
-
Remark inval_before_contents:
forall i c chunk' av' j,
(inval_before i (i - 7) c)##j = Some (ACval chunk' av') ->
diff --git a/backend/XTL.ml b/backend/XTL.ml
index c496fafb..1d8e89c0 100644
--- a/backend/XTL.ml
+++ b/backend/XTL.ml
@@ -36,7 +36,7 @@ type instruction =
| Xtailcall of signature * (var, ident) sum * var list
| Xbuiltin of external_function * var builtin_arg list * var builtin_res
| Xbranch of node
- | Xcond of condition * var list * node * node
+ | Xcond of condition * var list * node * node * bool option
| Xjumptable of var * node list
| Xreturn of var list
@@ -105,7 +105,7 @@ let twin_reg r =
let rec successors_block = function
| Xbranch s :: _ -> [s]
| Xtailcall(sg, vos, args) :: _ -> []
- | Xcond(cond, args, s1, s2) :: _ -> [s1; s2]
+ | Xcond(cond, args, s1, s2, _) :: _ -> [s1; s2]
| Xjumptable(arg, tbl) :: _ -> tbl
| Xreturn _:: _ -> []
| instr :: blk -> successors_block blk
@@ -179,7 +179,7 @@ let type_instr = function
type_builtin_res res (proj_sig_res sg)
| Xbranch s ->
()
- | Xcond(cond, args, s1, s2) ->
+ | Xcond(cond, args, s1, s2, _) ->
set_vars_type args (type_of_condition cond)
| Xjumptable(arg, tbl) ->
set_var_type arg Tint
diff --git a/backend/XTL.mli b/backend/XTL.mli
index b4b77fab..7b7f7186 100644
--- a/backend/XTL.mli
+++ b/backend/XTL.mli
@@ -37,7 +37,7 @@ type instruction =
| Xtailcall of signature * (var, ident) sum * var list
| Xbuiltin of external_function * var builtin_arg list * var builtin_res
| Xbranch of node
- | Xcond of condition * var list * node * node
+ | Xcond of condition * var list * node * node * bool option
| Xjumptable of var * node list
| Xreturn of var list
diff --git a/cfrontend/C2C.ml b/cfrontend/C2C.ml
index f2a68156..f637b5e4 100644
--- a/cfrontend/C2C.ml
+++ b/cfrontend/C2C.ml
@@ -1272,7 +1272,7 @@ let convertFundef loc env fd =
{ a_storage = fd.fd_storage;
a_alignment = None;
a_size = None;
- a_sections = Sections.for_function env id' fd.fd_attrib;
+ a_sections = Sections.for_function env loc id' fd.fd_attrib;
a_access = Sections.Access_default;
a_inline = inline;
a_loc = loc };
@@ -1358,7 +1358,7 @@ let convertGlobvar loc env (sto, id, ty, optinit) =
| Some i ->
convertInitializer env ty i in
let (section, access) =
- Sections.for_variable env id' ty (optinit <> None)
+ Sections.for_variable env loc id' ty (optinit <> None)
(match sto with
| Storage_thread_local | Storage_thread_local_extern
| Storage_thread_local_static -> true
diff --git a/cfrontend/Cexec.v b/cfrontend/Cexec.v
index 2942080b..b08c3ad7 100644
--- a/cfrontend/Cexec.v
+++ b/cfrontend/Cexec.v
@@ -460,6 +460,14 @@ Definition do_ef_free
check (zlt 0 (Ptrofs.unsigned sz));
do m' <- Mem.free m b (Ptrofs.unsigned lo - size_chunk Mptr) (Ptrofs.unsigned lo + Ptrofs.unsigned sz);
Some(w, E0, Vundef, m')
+ | Vint n :: nil =>
+ if Int.eq_dec n Int.zero && negb Archi.ptr64
+ then Some(w, E0, Vundef, m)
+ else None
+ | Vlong n :: nil =>
+ if Int64.eq_dec n Int64.zero && Archi.ptr64
+ then Some(w, E0, Vundef, m)
+ else None
| _ => None
end.
@@ -544,45 +552,51 @@ Proof with try congruence.
- eapply do_external_function_sound; eauto.
}
destruct ef; simpl.
-(* EF_external *)
+- (* EF_external *)
eapply do_external_function_sound; eauto.
-(* EF_builtin *)
+- (* EF_builtin *)
eapply BF_EX; eauto.
-(* EF_runtime *)
+- (* EF_runtime *)
eapply BF_EX; eauto.
-(* EF_vload *)
+- (* EF_vload *)
unfold do_ef_volatile_load. destruct vargs... destruct v... destruct vargs...
mydestr. destruct p as [[w'' t''] v]; mydestr.
exploit do_volatile_load_sound; eauto. intuition. econstructor; eauto.
- auto.
-(* EF_vstore *)
+- (* EF_vstore *)
unfold do_ef_volatile_store. destruct vargs... destruct v... destruct vargs... destruct vargs...
mydestr. destruct p as [[w'' t''] m'']. mydestr.
exploit do_volatile_store_sound; eauto. intuition. econstructor; eauto.
- auto.
-(* EF_malloc *)
+- (* EF_malloc *)
unfold do_ef_malloc. destruct vargs... destruct vargs... mydestr.
destruct (Mem.alloc m (- size_chunk Mptr) (Ptrofs.unsigned i)) as [m1 b] eqn:?. mydestr.
split. apply SIZE in Heqo. subst v. econstructor; eauto. constructor.
-(* EF_free *)
- unfold do_ef_free. destruct vargs... destruct v... destruct vargs...
- mydestr. split. apply SIZE in Heqo0. econstructor; eauto. congruence. omega. constructor.
-(* EF_memcpy *)
+- (* EF_free *)
+ unfold do_ef_free. destruct vargs... destruct v...
++ destruct vargs... mydestr; InvBooleans; subst i.
+ replace (Vint Int.zero) with Vnullptr. split; constructor.
+ apply negb_true_iff in H0. unfold Vnullptr; rewrite H0; auto.
++ destruct vargs... mydestr; InvBooleans; subst i.
+ replace (Vlong Int64.zero) with Vnullptr. split; constructor.
+ unfold Vnullptr; rewrite H0; auto.
++ destruct vargs... mydestr.
+ split. apply SIZE in Heqo0. econstructor; eauto. congruence. omega.
+ constructor.
+- (* EF_memcpy *)
unfold do_ef_memcpy. destruct vargs... destruct v... destruct vargs...
destruct v... destruct vargs... mydestr.
apply Decidable_sound in Heqb1. red in Heqb1.
split. econstructor; eauto; tauto. constructor.
-(* EF_annot *)
+- (* EF_annot *)
unfold do_ef_annot. mydestr.
split. constructor. apply list_eventval_of_val_sound; auto.
econstructor. constructor; eauto. constructor.
-(* EF_annot_val *)
+- (* EF_annot_val *)
unfold do_ef_annot_val. destruct vargs... destruct vargs... mydestr.
split. constructor. apply eventval_of_val_sound; auto.
econstructor. constructor; eauto. constructor.
-(* EF_inline_asm *)
+- (* EF_inline_asm *)
eapply do_inline_assembly_sound; eauto.
-(* EF_debug *)
+- (* EF_debug *)
unfold do_ef_debug. mydestr. split; constructor.
Qed.
@@ -605,37 +619,38 @@ Proof.
- eapply do_external_function_complete; eauto.
}
destruct ef; simpl in *.
-(* EF_external *)
+- (* EF_external *)
eapply do_external_function_complete; eauto.
-(* EF_builtin *)
+- (* EF_builtin *)
eapply BF_EX; eauto.
-(* EF_runtime *)
+- (* EF_runtime *)
eapply BF_EX; eauto.
-(* EF_vload *)
+- (* EF_vload *)
inv H; unfold do_ef_volatile_load.
exploit do_volatile_load_complete; eauto. intros EQ; rewrite EQ; auto.
-(* EF_vstore *)
+- (* EF_vstore *)
inv H; unfold do_ef_volatile_store.
exploit do_volatile_store_complete; eauto. intros EQ; rewrite EQ; auto.
-(* EF_malloc *)
+- (* EF_malloc *)
inv H; unfold do_ef_malloc.
inv H0. erewrite SIZE by eauto. rewrite H1, H2. auto.
-(* EF_free *)
+- (* EF_free *)
inv H; unfold do_ef_free.
- inv H0. rewrite H1. erewrite SIZE by eauto. rewrite zlt_true. rewrite H3. auto. omega.
-(* EF_memcpy *)
++ inv H0. rewrite H1. erewrite SIZE by eauto. rewrite zlt_true. rewrite H3. auto. omega.
++ inv H0. unfold Vnullptr; destruct Archi.ptr64; auto.
+- (* EF_memcpy *)
inv H; unfold do_ef_memcpy.
inv H0. rewrite Decidable_complete. rewrite H7; rewrite H8; auto.
red. tauto.
-(* EF_annot *)
+- (* EF_annot *)
inv H; unfold do_ef_annot. inv H0. inv H6. inv H4.
rewrite (list_eventval_of_val_complete _ _ _ H1). auto.
-(* EF_annot_val *)
+- (* EF_annot_val *)
inv H; unfold do_ef_annot_val. inv H0. inv H6. inv H4.
rewrite (eventval_of_val_complete _ _ _ H1). auto.
-(* EF_inline_asm *)
+- (* EF_inline_asm *)
eapply do_inline_assembly_complete; eauto.
-(* EF_debug *)
+- (* EF_debug *)
inv H. inv H0. reflexivity.
Qed.
diff --git a/common/Events.v b/common/Events.v
index 10e0c232..28bb992a 100644
--- a/common/Events.v
+++ b/common/Events.v
@@ -649,9 +649,12 @@ Record extcall_properties (sem: extcall_sem) (sg: signature) : Prop :=
(** External call cannot modify memory unless they have [Max, Writable]
permissions. *)
ec_readonly:
- forall ge vargs m1 t vres m2,
+ forall ge vargs m1 t vres m2 b ofs n bytes,
sem ge vargs m1 t vres m2 ->
- Mem.unchanged_on (loc_not_writable m1) m1 m2;
+ Mem.valid_block m1 b ->
+ Mem.loadbytes m2 b ofs n = Some bytes ->
+ (forall i, ofs <= i < ofs + n -> ~Mem.perm m1 b i Max Writable) ->
+ Mem.loadbytes m1 b ofs n = Some bytes;
(** External calls must commute with memory extensions, in the
following sense. *)
@@ -784,7 +787,7 @@ Proof.
(* max perms *)
- inv H; auto.
(* readonly *)
-- inv H. apply Mem.unchanged_on_refl.
+- inv H; auto.
(* mem extends *)
- inv H. inv H1. inv H6. inv H4.
exploit volatile_load_extends; eauto. intros [v' [A B]].
@@ -833,14 +836,27 @@ Proof.
rewrite C; auto.
Qed.
+Lemma unchanged_on_readonly:
+ forall m1 m2 b ofs n bytes,
+ Mem.unchanged_on (loc_not_writable m1) m1 m2 ->
+ Mem.valid_block m1 b ->
+ Mem.loadbytes m2 b ofs n = Some bytes ->
+ (forall i, ofs <= i < ofs + n -> ~Mem.perm m1 b i Max Writable) ->
+ Mem.loadbytes m1 b ofs n = Some bytes.
+Proof.
+ intros.
+ rewrite <- H1. symmetry.
+ apply Mem.loadbytes_unchanged_on_1 with (P := loc_not_writable m1); auto.
+Qed.
+
Lemma volatile_store_readonly:
forall ge chunk1 m1 b1 ofs1 v t m2,
volatile_store ge chunk1 m1 b1 ofs1 v t m2 ->
Mem.unchanged_on (loc_not_writable m1) m1 m2.
Proof.
intros. inv H.
- apply Mem.unchanged_on_refl.
- eapply Mem.store_unchanged_on; eauto.
+- apply Mem.unchanged_on_refl.
+- eapply Mem.store_unchanged_on; eauto.
exploit Mem.store_valid_access_3; eauto. intros [P Q].
intros. unfold loc_not_writable. red; intros. elim H2.
apply Mem.perm_cur_max. apply P. auto.
@@ -934,7 +950,7 @@ Proof.
(* perms *)
- inv H. inv H2. auto. eauto with mem.
(* readonly *)
-- inv H. eapply volatile_store_readonly; eauto.
+- inv H. eapply unchanged_on_readonly; eauto. eapply volatile_store_readonly; eauto.
(* mem extends*)
- inv H. inv H1. inv H6. inv H7. inv H4.
exploit volatile_store_extends; eauto. intros [m2' [A [B C]]].
@@ -994,7 +1010,7 @@ Proof.
rewrite dec_eq_false. auto.
apply Mem.valid_not_valid_diff with m1; eauto with mem.
(* readonly *)
-- inv H. eapply UNCHANGED; eauto.
+- inv H. eapply unchanged_on_readonly; eauto.
(* mem extends *)
- inv H. inv H1. inv H7.
assert (SZ: v2 = Vptrofs sz).
@@ -1045,11 +1061,13 @@ Qed.
Inductive extcall_free_sem (ge: Senv.t):
list val -> mem -> trace -> val -> mem -> Prop :=
- | extcall_free_sem_intro: forall b lo sz m m',
+ | extcall_free_sem_ptr: forall b lo sz m m',
Mem.load Mptr m b (Ptrofs.unsigned lo - size_chunk Mptr) = Some (Vptrofs sz) ->
Ptrofs.unsigned sz > 0 ->
Mem.free m b (Ptrofs.unsigned lo - size_chunk Mptr) (Ptrofs.unsigned lo + Ptrofs.unsigned sz) = Some m' ->
- extcall_free_sem ge (Vptr b lo :: nil) m E0 Vundef m'.
+ extcall_free_sem ge (Vptr b lo :: nil) m E0 Vundef m'
+ | extcall_free_sem_null: forall m,
+ extcall_free_sem ge (Vnullptr :: nil) m E0 Vundef m.
Lemma extcall_free_ok:
extcall_properties extcall_free_sem
@@ -1057,26 +1075,29 @@ Lemma extcall_free_ok:
Proof.
constructor; intros.
(* well typed *)
-- inv H. simpl. auto.
+- inv H; simpl; auto.
(* symbols preserved *)
- inv H0; econstructor; eauto.
(* valid block *)
-- inv H. eauto with mem.
+- inv H; eauto with mem.
(* perms *)
-- inv H. eapply Mem.perm_free_3; eauto.
+- inv H; eauto using Mem.perm_free_3.
(* readonly *)
-- inv H. eapply Mem.free_unchanged_on; eauto.
- intros. red; intros. elim H3.
+- eapply unchanged_on_readonly; eauto. inv H.
++ eapply Mem.free_unchanged_on; eauto.
+ intros. red; intros. elim H6.
apply Mem.perm_cur_max. apply Mem.perm_implies with Freeable; auto with mem.
eapply Mem.free_range_perm; eauto.
++ apply Mem.unchanged_on_refl.
(* mem extends *)
-- inv H. inv H1. inv H8. inv H6.
+- inv H.
++ inv H1. inv H8. inv H6.
exploit Mem.load_extends; eauto. intros [v' [A B]].
assert (v' = Vptrofs sz).
{ unfold Vptrofs in *; destruct Archi.ptr64; inv B; auto. }
subst v'.
exploit Mem.free_parallel_extends; eauto. intros [m2' [C D]].
- exists Vundef; exists m2'; intuition.
+ exists Vundef; exists m2'; intuition auto.
econstructor; eauto.
eapply Mem.free_unchanged_on; eauto.
unfold loc_out_of_bounds; intros.
@@ -1084,8 +1105,14 @@ Proof.
{ apply Mem.perm_cur_max. apply Mem.perm_implies with Freeable; auto with mem.
eapply Mem.free_range_perm. eexact H4. eauto. }
tauto.
++ inv H1. inv H5. replace v2 with Vnullptr.
+ exists Vundef; exists m1'; intuition auto.
+ constructor.
+ apply Mem.unchanged_on_refl.
+ unfold Vnullptr in *; destruct Archi.ptr64; inv H3; auto.
(* mem inject *)
-- inv H0. inv H2. inv H7. inv H9.
+- inv H0.
++ inv H2. inv H7. inv H9.
exploit Mem.load_inject; eauto. intros [v' [A B]].
assert (v' = Vptrofs sz).
{ unfold Vptrofs in *; destruct Archi.ptr64; inv B; auto. }
@@ -1099,7 +1126,7 @@ Proof.
intro EQ.
exploit Mem.free_parallel_inject; eauto. intros (m2' & C & D).
exists f, Vundef, m2'; split.
- apply extcall_free_sem_intro with (sz := sz) (m' := m2').
+ apply extcall_free_sem_ptr with (sz := sz) (m' := m2').
rewrite EQ. rewrite <- A. f_equal. omega.
auto. auto.
rewrite ! EQ. rewrite <- C. f_equal; omega.
@@ -1112,14 +1139,19 @@ Proof.
apply P. omega.
split. auto.
red; intros. congruence.
++ inv H2. inv H6. replace v' with Vnullptr.
+ exists f, Vundef, m1'; intuition auto using Mem.unchanged_on_refl.
+ constructor.
+ red; intros; congruence.
+ unfold Vnullptr in *; destruct Archi.ptr64; inv H4; auto.
(* trace length *)
- inv H; simpl; omega.
(* receptive *)
-- assert (t1 = t2). inv H; inv H0; auto. subst t2.
+- assert (t1 = t2) by (inv H; inv H0; auto). subst t2.
exists vres1; exists m1; auto.
(* determ *)
-- inv H; inv H0.
- assert (EQ1: Vptrofs sz0 = Vptrofs sz) by congruence.
+- inv H; inv H0; try (unfold Vnullptr in *; destruct Archi.ptr64; discriminate).
++ assert (EQ1: Vptrofs sz0 = Vptrofs sz) by congruence.
assert (EQ2: sz0 = sz).
{ unfold Vptrofs in EQ1; destruct Archi.ptr64 eqn:SF.
rewrite <- (Ptrofs.of_int64_to_int64 SF sz0), <- (Ptrofs.of_int64_to_int64 SF sz). congruence.
@@ -1127,6 +1159,7 @@ Proof.
}
subst sz0.
split. constructor. intuition congruence.
++ split. constructor. intuition auto.
Qed.
(** ** Semantics of [memcpy] operations. *)
@@ -1159,8 +1192,9 @@ Proof.
- (* perms *)
intros. inv H. eapply Mem.perm_storebytes_2; eauto.
- (* readonly *)
- intros. inv H. eapply Mem.storebytes_unchanged_on; eauto.
- intros; red; intros. elim H8.
+ intros. inv H. eapply unchanged_on_readonly; eauto.
+ eapply Mem.storebytes_unchanged_on; eauto.
+ intros; red; intros. elim H11.
apply Mem.perm_cur_max. eapply Mem.storebytes_range_perm; eauto.
- (* extensions *)
intros. inv H.
@@ -1271,7 +1305,7 @@ Proof.
(* perms *)
- inv H; auto.
(* readonly *)
-- inv H. apply Mem.unchanged_on_refl.
+- inv H; auto.
(* mem extends *)
- inv H.
exists Vundef; exists m1'; intuition.
@@ -1316,7 +1350,7 @@ Proof.
(* perms *)
- inv H; auto.
(* readonly *)
-- inv H. apply Mem.unchanged_on_refl.
+- inv H; auto.
(* mem extends *)
- inv H. inv H1. inv H6.
exists v2; exists m1'; intuition.
@@ -1359,7 +1393,7 @@ Proof.
(* perms *)
- inv H; auto.
(* readonly *)
-- inv H. apply Mem.unchanged_on_refl.
+- inv H; auto.
(* mem extends *)
- inv H.
exists Vundef; exists m1'; intuition.
@@ -1406,7 +1440,7 @@ Proof.
(* perms *)
- inv H; auto.
(* readonly *)
-- inv H. apply Mem.unchanged_on_refl.
+- inv H; auto.
(* mem extends *)
- inv H. fold bsem in H2. apply val_inject_list_lessdef in H1.
specialize (bs_inject _ bsem _ _ _ H1).
diff --git a/common/Memdata.v b/common/Memdata.v
index f3016efe..a09b90f5 100644
--- a/common/Memdata.v
+++ b/common/Memdata.v
@@ -44,6 +44,13 @@ Definition size_chunk (chunk: memory_chunk) : Z :=
| Many64 => 8
end.
+Definition largest_size_chunk := 8.
+
+Lemma max_size_chunk: forall chunk, size_chunk chunk <= 8.
+Proof.
+ destruct chunk; simpl; omega.
+Qed.
+
Lemma size_chunk_pos:
forall chunk, size_chunk chunk > 0.
Proof.
diff --git a/common/Memory.v b/common/Memory.v
index f21d99af..cd8a2001 100644
--- a/common/Memory.v
+++ b/common/Memory.v
@@ -1307,6 +1307,23 @@ Proof.
}
Qed.
+Section STOREV.
+Variable chunk: memory_chunk.
+Variable m1: mem.
+Variables addr v: val.
+Variable m2: mem.
+Hypothesis STORE: storev chunk m1 addr v = Some m2.
+
+
+Theorem loadv_storev_same:
+ loadv chunk m2 addr = Some (Val.load_result chunk v).
+Proof.
+ destruct addr; simpl in *; try discriminate.
+ eapply load_store_same.
+ eassumption.
+Qed.
+End STOREV.
+
Lemma load_store_overlap:
forall chunk m1 b ofs v m2 chunk' ofs' v',
store chunk m1 b ofs v = Some m2 ->
diff --git a/common/Sections.ml b/common/Sections.ml
index 9555c203..ea0b6dbc 100644
--- a/common/Sections.ml
+++ b/common/Sections.ml
@@ -166,9 +166,22 @@ let gcc_section name readonly exec =
sec_writable = not readonly; sec_executable = exec;
sec_access = Access_default }
+(* Check and extract whether a section was given as attribute *)
+
+let get_attr_section loc attr =
+ match Cutil.find_custom_attributes ["section"; "__section__"] attr with
+ | [] -> None
+ | [[C.AString name]] -> Some name
+ | [[_]] ->
+ Diagnostics.error loc "'section' attribute requires a string";
+ None
+ | _ ->
+ Diagnostics.error loc "ambiguous 'section' attribute";
+ None
+
(* Determine section for a variable definition *)
-let for_variable env id ty init thrl =
+let for_variable env loc id ty init thrl =
let attr = Cutil.attributes_of_type env ty in
let readonly = List.mem C.AConst attr && not(List.mem C.AVolatile attr) in
let si =
@@ -176,11 +189,11 @@ let for_variable env id ty init thrl =
(* 1- Section explicitly associated with #use_section *)
Hashtbl.find use_section_table id
with Not_found ->
- match Cutil.find_custom_attributes ["section"; "__section__"] attr with
- | [[C.AString name]] ->
+ match get_attr_section loc attr with
+ | Some name ->
(* 2- Section given as an attribute, gcc-style *)
gcc_section name readonly false
- | _ ->
+ | None ->
(* 3- Default section appropriate for size and const-ness *)
let size =
match Cutil.sizeof env ty with Some sz -> sz | None -> max_int in
@@ -197,17 +210,17 @@ let for_variable env id ty init thrl =
(* Determine sections for a function definition *)
-let for_function env id attr =
+let for_function env loc id attr =
let si_code =
try
(* 1- Section explicitly associated with #use_section *)
Hashtbl.find use_section_table id
with Not_found ->
- match Cutil.find_custom_attributes ["section"; "__section__"] attr with
- | [[C.AString name]] ->
+ match get_attr_section loc attr with
+ | Some name ->
(* 2- Section given as an attribute, gcc-style *)
gcc_section name true true
- | _ ->
+ | None ->
(* 3- Default section *)
try
Hashtbl.find current_section_table "CODE"
diff --git a/common/Sections.mli b/common/Sections.mli
index e882f042..00c06c20 100644
--- a/common/Sections.mli
+++ b/common/Sections.mli
@@ -47,7 +47,7 @@ val define_section:
-> ?writable:bool -> ?executable:bool -> ?access:access_mode -> unit -> unit
val use_section_for: AST.ident -> string -> bool
-val for_variable: Env.t -> AST.ident -> C.typ -> bool -> bool ->
+val for_variable: Env.t -> C.location -> AST.ident -> C.typ -> bool -> bool ->
section_name * access_mode
-val for_function: Env.t -> AST.ident -> C.attributes -> section_name list
+val for_function: Env.t -> C.location -> AST.ident -> C.attributes -> section_name list
val for_stringlit: unit -> section_name
diff --git a/config_arm.sh b/config_arm.sh
index eed55fab..1861e029 100755
--- a/config_arm.sh
+++ b/config_arm.sh
@@ -1 +1 @@
-exec ./config_simple.sh arm-linux --toolprefix arm-linux-gnueabihf- "$@"
+exec ./config_simple.sh arm-linux --toolprefix arm-linux-gnueabi- "$@"
diff --git a/config_armhf.sh b/config_armhf.sh
new file mode 100755
index 00000000..8a1302bd
--- /dev/null
+++ b/config_armhf.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh arm-eabihf --toolprefix arm-linux-gnueabihf- "$@"
diff --git a/config_ppc64.sh b/config_ppc64.sh
new file mode 100755
index 00000000..df31c18f
--- /dev/null
+++ b/config_ppc64.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh ppc64-linux --toolprefix powerpc64-linux-gnu- "$@"
diff --git a/config_rv32.sh b/config_rv32.sh
index 654cacfa..a5a5cf1c 100755
--- a/config_rv32.sh
+++ b/config_rv32.sh
@@ -1 +1 @@
-exec ./config_simple.sh rv32-linux --toolprefix riscv64-unknown-elf- "$@"
+exec ./config_simple.sh rv32-linux --toolprefix riscv64-linux-gnu- "$@"
diff --git a/config_rv64.sh b/config_rv64.sh
index e95f8a70..0698c2ff 100755
--- a/config_rv64.sh
+++ b/config_rv64.sh
@@ -1 +1 @@
-exec ./config_simple.sh rv64-linux --toolprefix riscv64-unknown-elf- "$@"
+exec ./config_simple.sh rv64-linux --toolprefix riscv64-linux-gnu- "$@"
diff --git a/config_simple.sh b/config_simple.sh
index f02680c4..e2d3844c 100755
--- a/config_simple.sh
+++ b/config_simple.sh
@@ -3,4 +3,9 @@ shift
version=`git rev-parse --short HEAD`
branch=`git rev-parse --abbrev-ref HEAD`
date=`date -I`
-./configure --prefix /opt/CompCert/${branch}/${date}_${version}/$arch "$@" $arch
+
+if test "x$CCOMP_INSTALL_PREFIX" = "x" ;
+then CCOMP_INSTALL_PREFIX=/opt/CompCert ;
+fi
+
+./configure --prefix ${CCOMP_INSTALL_PREFIX}/${branch}/${date}_${version}/$arch "$@" $arch
diff --git a/configure b/configure
index f13d1af3..cb2f52ba 100755
--- a/configure
+++ b/configure
@@ -568,7 +568,7 @@ missingtools=false
echo "Testing Coq... " | tr -d '\n'
coq_ver=$(${COQBIN}coqc -v 2>/dev/null | sed -n -e 's/The Coq Proof Assistant, version \([^ ]*\).*$/\1/p')
case "$coq_ver" in
- 8.8.0|8.8.1|8.8.2|8.9.0|8.9.1|8.10.0|8.10.1|8.10.2|8.11.0)
+ 8.9.0|8.9.1|8.10.0|8.10.1|8.10.2|8.11.0|8.11.1)
echo "version $coq_ver -- good!";;
?*)
echo "version $coq_ver -- UNSUPPORTED"
diff --git a/cparser/Elab.ml b/cparser/Elab.ml
index c5295347..0504ad0b 100644
--- a/cparser/Elab.ml
+++ b/cparser/Elab.ml
@@ -469,7 +469,8 @@ let elab_constant loc = function
let (v, fk) = elab_float_constant f in
CFloat(v, fk)
| CONST_CHAR(wide, s) ->
- CInt(elab_char_constant loc wide s, IInt, "")
+ let ikind = if wide then wchar_ikind () else IInt in
+ CInt(elab_char_constant loc wide s, ikind, "")
| CONST_STRING(wide, s) ->
elab_string_literal loc wide s
@@ -2457,8 +2458,8 @@ let enter_typedef loc env sto (s, ty, init) =
env
end
else begin
- error loc "typedef redefinition with different types (%a vs %a)"
- (print_typ env) ty (print_typ env) ty';
+ error loc "redefinition of typedef '%s' with different type (%a vs %a)"
+ s (print_typ env) ty (print_typ env) ty';
env
end
| _ ->
diff --git a/doc/index.html b/doc/index.html
index 3a4cf6ba..631c5d99 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -24,7 +24,7 @@ a:active {color : Red; text-decoration : underline; }
<H1 align="center">The CompCert verified compiler</H1>
<H2 align="center">Commented Coq development</H2>
-<H3 align="center">Version 3.6, 2019-09-17</H3>
+<H3 align="center">Version 3.7, 2020-03-31</H3>
<H2>Introduction</H2>
@@ -101,6 +101,8 @@ See also: <A HREF="html/compcert.common.Memdata.html">Memdata</A> (in-memory rep
<LI> <A HREF="html/compcert.common.Determinism.html">Determinism</A>: determinism properties of small-step semantics.
<LI> <A HREF="html/compcert.powerpc.Op.html"><I>Op</I></A>: operators, addressing modes and their
semantics.
+<LI> <A HREF="html/compcert.common.Builtins.html">Builtins</A>: semantics of built-in functions. <BR>
+See also: <A HREF="html/compcert.common.Builtins0.html">Builtins0</A> (target-independent part), <A HREF="html/compcert.powerpc.Builtins1.html"><I>Builtins1</I></A> (target-dependent part).
<LI> <A HREF="html/compcert.common.Unityping.html">Unityping</A>: a solver for atomic unification constraints.
</UL>
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 6d6f1df4..6986fb96 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -28,8 +28,8 @@ let option_fconstprop = ref true
let option_fcse = ref true
let option_fcse2 = ref true
let option_fredundancy = ref true
-let option_fduplicate = ref false
-let option_finvertcond = ref true (* only active if option_fduplicate is also true *)
+let option_fduplicate = ref (-1)
+let option_finvertcond = ref true
let option_ftracelinearize = ref false
let option_fpostpass = ref true
let option_fpostpass_sched = ref "list"
@@ -81,3 +81,4 @@ let option_faddx = ref false
let option_fcoalesce_mem = ref true
let option_fforward_moves = ref true
let option_all_loads_nontrap = ref false
+let option_inline_auto_threshold = ref 0
diff --git a/driver/Compopts.v b/driver/Compopts.v
index b4b9f30d..848657e5 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -27,8 +27,7 @@ Parameter generate_float_constants: unit -> bool.
(** For value analysis. Currently always false. *)
Parameter va_strict: unit -> bool.
-(** Flag -fduplicate. For tail duplication optimization. Necessary to have
- * bigger superblocks *)
+(** Flag -fduplicate. Branch prediction annotation + tail duplication *)
Parameter optim_duplicate: unit -> bool.
(** Flag -ftailcalls. For tail call optimization. *)
diff --git a/driver/Driver.ml b/driver/Driver.ml
index db71aef9..388482a0 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -190,6 +190,7 @@ Processing options:
-Os Optimize for code size in preference to code speed
-Obranchless Optimize to generate fewer conditional branches; try to produce
branch-free instruction sequences as much as possible
+ -finline-auto-threshold n Inline functions under size n
-ftailcalls Optimize function calls in tail position [on]
-fconst-prop Perform global constant propagation [on]
-ffloat-const-prop <n> Control constant propagation of floats
@@ -200,11 +201,15 @@ Processing options:
-fpostpass Perform postpass scheduling (only for K1 architecture) [on]
-fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
(<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
- -fduplicate Perform tail duplication to form superblocks on predicted traces
+ -fduplicate <nb_nodes> Perform tail duplication to form superblocks on predicted traces
+ nb_nodes control the heuristic deciding to duplicate or not
+ A value of -1 desactivates the entire pass (including branch prediction)
+ A value of 0 desactivates the duplication (but activates the branch prediction)
+ FIXME : this is desactivated by default for now
-finvertcond Invert conditions based on predicted paths (to prefer fallthrough).
Requires -fduplicate to be also activated [on]
-ftracelinearize Linearizes based on the traces identified by duplicate phase
- It is recommended to also activate -fduplicate with this pass [off]
+ It is heavily recommended to activate -finvertcond with this pass [off]
-fforward-moves Forward moves after CSE
-finline Perform inlining of functions [on]
-finline-functions-called-once Integrate functions only required by their
@@ -318,10 +323,11 @@ let cmdline_actions =
[
Exact "-O0", Unit (unset_all optimization_options);
Exact "-O", Unit (set_all optimization_options);
- _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false; option_fduplicate := false);
+ _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false);
_Regexp "-O[123]$", Unit (set_all optimization_options);
Exact "-Os", Set option_Osize;
Exact "-Obranchless", Set option_Obranchless;
+ Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n);
Exact "-fsmall-data", Integer(fun n -> option_small_data := n);
Exact "-fsmall-const", Integer(fun n -> option_small_const := n);
Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n);
@@ -393,7 +399,7 @@ let cmdline_actions =
@ f_opt "cse2" option_fcse2
@ f_opt "redundancy" option_fredundancy
@ f_opt "postpass" option_fpostpass
- @ f_opt "duplicate" option_fduplicate
+ @ [ Exact "-fduplicate", Integer (fun n -> option_fduplicate := n) ]
@ f_opt "invertcond" option_finvertcond
@ f_opt "tracelinearize" option_ftracelinearize
@ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
diff --git a/extraction/extraction.v b/extraction/extraction.v
index 929c21e0..9b568951 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -106,7 +106,7 @@ Extract Constant Compopts.generate_float_constants =>
Extract Constant Compopts.optim_tailcalls =>
"fun _ -> !Clflags.option_ftailcalls".
Extract Constant Compopts.optim_duplicate =>
- "fun _ -> !Clflags.option_fduplicate".
+ "fun _ -> (if !Clflags.option_fduplicate = -1 then false else true)".
Extract Constant Compopts.optim_constprop =>
"fun _ -> !Clflags.option_fconstprop".
Extract Constant Compopts.optim_CSE =>
diff --git a/lib/Maps.v b/lib/Maps.v
index 1dec59a2..8de3c892 100644
--- a/lib/Maps.v
+++ b/lib/Maps.v
@@ -116,6 +116,19 @@ Module Type TREE.
forall (m1: t A) (m2: t B) (i: elt),
get i (combine f m1 m2) = f (get i m1) (get i m2).
+ Parameter combine_null :
+ forall (A B C: Type) (f: A -> B -> option C),
+ t A -> t B -> t C.
+
+ Axiom gcombine_null:
+ forall (A B C: Type) (f: A -> B -> option C),
+ forall (m1: t A) (m2: t B) (i: elt),
+ get i (combine_null f m1 m2) =
+ match (get i m1), (get i m2) with
+ | (Some x1), (Some x2) => f x1 x2
+ | _, _ => None
+ end.
+
(** Enumerating the bindings of a tree. *)
Parameter elements:
forall (A: Type), t A -> list (elt * A).
@@ -151,6 +164,12 @@ Module Type TREE.
forall (A B: Type) (f: B -> A -> B) (v: B) (m: t A),
fold1 f m v =
List.fold_left (fun a p => f a (snd p)) (elements m) v.
+
+ Parameter bempty_canon :
+ forall (A : Type), t A -> bool.
+ Axiom bempty_canon_correct:
+ forall (A : Type) (tr : t A) (i : elt),
+ bempty_canon tr = true -> get i tr = None.
End TREE.
(** * The abstract signatures of maps *)
@@ -261,6 +280,12 @@ Module PTree <: TREE.
induction i; simpl; auto.
Qed.
+ Definition bempty_canon (A : Type) (tr : t A) : bool :=
+ match tr with
+ | Leaf => true
+ | _ => false
+ end.
+
Theorem gss:
forall (A: Type) (i: positive) (x: A) (m: t A), get i (set i x m) = Some x.
Proof.
@@ -269,7 +294,16 @@ Module PTree <: TREE.
Lemma gleaf : forall (A : Type) (i : positive), get i (Leaf : t A) = None.
Proof. exact gempty. Qed.
-
+
+ Lemma bempty_canon_correct:
+ forall (A : Type) (tr : t A) (i : elt),
+ bempty_canon tr = true -> get i tr = None.
+ Proof.
+ destruct tr; intros.
+ - rewrite gleaf; trivial.
+ - discriminate.
+ Qed.
+
Theorem gso:
forall (A: Type) (i j: positive) (x: A) (m: t A),
i <> j -> get i (set j x m) = get i m.
@@ -625,7 +659,81 @@ Module PTree <: TREE.
auto.
Qed.
- Fixpoint xelements (A : Type) (m : t A) (i : positive)
+ Section COMBINE_NULL.
+
+ Variables A B C: Type.
+ Variable f: A -> B -> option C.
+
+
+ Fixpoint combine_null (m1: t A) (m2: t B) {struct m1} : t C :=
+ match m1, m2 with
+ | (Node l1 o1 r1), (Node l2 o2 r2) =>
+ Node' (combine_null l1 l2)
+ (match o1, o2 with
+ | (Some x1), (Some x2) => f x1 x2
+ | _, _ => None
+ end)
+ (combine_null r1 r2)
+ | _, _ => Leaf
+ end.
+
+ Theorem gcombine_null:
+ forall (m1: t A) (m2: t B) (i: positive),
+ get i (combine_null m1 m2) =
+ match (get i m1), (get i m2) with
+ | (Some x1), (Some x2) => f x1 x2
+ | _, _ => None
+ end.
+ Proof.
+ induction m1; intros; simpl.
+ - rewrite gleaf. rewrite gleaf.
+ reflexivity.
+ - destruct m2; simpl.
+ + rewrite gleaf. rewrite gleaf.
+ destruct get; reflexivity.
+ + rewrite gnode'.
+ destruct i; simpl; try rewrite IHm1_1; try rewrite IHm1; trivial.
+ Qed.
+
+ End COMBINE_NULL.
+
+ Section REMOVE_TREE.
+
+ Variables A B: Type.
+
+ Fixpoint remove_t (m1: t A) (m2: t B) {struct m1} : t A :=
+ match m1, m2 with
+ | Leaf, _ | _, Leaf => m1
+ | (Node l1 o1 r1), (Node l2 o2 r2) =>
+ Node' (remove_t l1 l2)
+ (match o2 with
+ | Some _ => None
+ | None => o1
+ end)
+ (remove_t r1 r2)
+ end.
+
+ Theorem gremove_t:
+ forall m1 : t A,
+ forall m2 : t B,
+ forall i : positive,
+ get i (remove_t m1 m2) = match get i m2 with
+ | None => get i m1
+ | Some _ => None
+ end.
+ Proof.
+ induction m1; intros; simpl.
+ - rewrite gleaf.
+ destruct get; reflexivity.
+ - destruct m2; simpl.
+ + rewrite gleaf.
+ reflexivity.
+ + rewrite gnode'.
+ destruct i; simpl; try rewrite IHm1_1; try rewrite IHm1; trivial.
+ Qed.
+ End REMOVE_TREE.
+
+ Fixpoint xelements (A : Type) (m : t A) (i : positive)
(k: list (positive * A)) {struct m}
: list (positive * A) :=
match m with
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index 02f9141b..01eda623 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -339,7 +339,7 @@ Proof.
}
destruct (Mem.load _ m1 _ _) in *; destruct (Mem.load _ m0 _ _) in *; congruence.
Qed.
-
+
Definition goto_label_deps (f: function) (lbl: label) (vpc: val) :=
match label_pos lbl 0 (fn_blocks f) with
| None => None
@@ -1005,7 +1005,7 @@ Theorem bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw bi:
Proof.
(* a little tactic to automate reasoning on preg_eq *)
-Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr.
+Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
Local Ltac preg_eq_discr r rd :=
destruct (preg_eq r rd); try (subst r; rewrite assign_eq, Pregmap.gss; auto);
rewrite (assign_diff _ (#rd) (#r) _); auto;
@@ -1053,7 +1053,7 @@ Local Ltac preg_eq_discr r rd :=
preg_eq_discr r rd0. }
(* Load Octuple word *)
- + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr.
+ + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
unfold parexec_load_o_offset.
destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3]; destruct Ge; simpl.
rewrite H0, H.
@@ -1423,7 +1423,7 @@ Section SECT_BBLOCK_EQUIV.
Variable Ge: genv.
-Local Hint Resolve trans_state_match.
+Local Hint Resolve trans_state_match: core.
Lemma bblock_simu_reduce:
forall p1 p2 ge fn,
diff --git a/mppa_k1c/Asmblockgen.v b/mppa_k1c/Asmblockgen.v
index 50637723..36269954 100644
--- a/mppa_k1c/Asmblockgen.v
+++ b/mppa_k1c/Asmblockgen.v
@@ -28,6 +28,8 @@ Require Import Chunks.
Local Open Scope string_scope.
Local Open Scope error_monad_scope.
+Import PArithCoercions.
+
(** The code generation functions take advantage of several
characteristics of the [Mach] code generated by earlier passes of the
compiler, mostly that argument and result registers are of the correct
diff --git a/mppa_k1c/Asmblockgenproof1.v b/mppa_k1c/Asmblockgenproof1.v
index ecb4629b..00df01e3 100644
--- a/mppa_k1c/Asmblockgenproof1.v
+++ b/mppa_k1c/Asmblockgenproof1.v
@@ -23,6 +23,8 @@ Require Import Op Locations Machblock Conventions.
Require Import Asmblock Asmblockgen Asmblockgenproof0 Asmblockprops.
Require Import Chunks.
+Import PArithCoercions.
+
(** Decomposition of integer constants. *)
Lemma make_immed32_sound:
@@ -859,7 +861,7 @@ Proof.
destruct cmp; discriminate.
Qed.
-Local Hint Resolve Val_cmpu_bool_correct Val_cmplu_bool_correct.
+Local Hint Resolve Val_cmpu_bool_correct Val_cmplu_bool_correct: core.
Lemma transl_cbranch_correct_1:
forall cond args lbl k c m ms b sp rs m' tbb,
@@ -1163,7 +1165,7 @@ Proof.
split; intros; Simpl.
Qed.
-Local Hint Resolve Val_cmpu_correct Val_cmplu_correct.
+Local Hint Resolve Val_cmpu_correct Val_cmplu_correct: core.
Lemma transl_condimm_int32u_correct:
forall cmp rd r1 n k rs m,
@@ -1481,6 +1483,8 @@ Proof.
destruct (Z_lt_dec _ _); destruct (Z.eq_dec _ _); trivial; omega.
Qed.
+Ltac splitall := repeat match goal with |- _ /\ _ => split end.
+
Lemma transl_op_correct:
forall op args res k (rs: regset) m v c,
transl_op op args res k = OK c ->
@@ -1515,21 +1519,21 @@ Opaque Int.eq.
- (* Ocast8signed *)
econstructor; split.
eapply exec_straight_two. simpl;eauto. simpl;eauto.
- split; intros; simpl; Simpl.
+ repeat split; intros; simpl; Simpl.
assert (A: Int.ltu (Int.repr 24) Int.iwordsize = true) by auto.
destruct (rs x0); auto; simpl. rewrite A; simpl. Simpl. unfold Val.shr. rewrite A.
apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. split; reflexivity.
- (* Ocast16signed *)
econstructor; split.
eapply exec_straight_two. simpl;eauto. simpl;eauto.
- split; intros; Simpl.
+ repeat split; intros; Simpl.
assert (A: Int.ltu (Int.repr 16) Int.iwordsize = true) by auto.
destruct (rs x0); auto; simpl. rewrite A; simpl. Simpl. unfold Val.shr. rewrite A.
apply Val.lessdef_same. f_equal. apply Int.sign_ext_shr_shl. split; reflexivity.
- (* Oshrximm *)
econstructor; split.
+ apply exec_straight_one. simpl. eauto.
- + split.
+ + repeat split.
* rewrite Pregmap.gss.
subst v.
destruct (rs x0); simpl; trivial.
@@ -1540,7 +1544,7 @@ Opaque Int.eq.
- (* Oshrxlimm *)
econstructor; split.
+ apply exec_straight_one. simpl. eauto.
- + split.
+ + repeat split.
* rewrite Pregmap.gss.
subst v.
destruct (rs x0); simpl; trivial.
@@ -1551,7 +1555,7 @@ Opaque Int.eq.
- (* Ocmp *)
exploit transl_cond_op_correct; eauto. intros (rs' & A & B & C).
- exists rs'; split. eexact A. eauto with asmgen.
+ exists rs'; repeat split; eauto with asmgen.
- (* Osel *)
unfold conditional_move in *.
@@ -1570,72 +1574,73 @@ Opaque Int.eq.
destruct c0; simpl in *.
- all:
- destruct c; simpl in *; inv EQ2;
- econstructor; split; try (apply exec_straight_one; constructor);
- split; try (simpl; intros; rewrite Pregmap.gso; trivial; assumption);
- unfold Val.select; simpl;
- unfold cmove, cmoveu;
- rewrite Pregmap.gss;
- destruct (rs x1); simpl; trivial;
- try rewrite int_ltu_to_neq;
- try rewrite int64_ltu_to_neq;
- try change (Int64.eq Int64.zero Int64.zero) with true;
- try destruct Archi.ptr64;
- repeat rewrite if_neg;
- simpl;
- trivial;
- try destruct (_ || _);
- trivial;
- try apply Val.lessdef_normalize.
+ all: destruct c.
+ all: simpl in *.
+ all: inv EQ2.
+ all: econstructor; splitall.
+ all: try apply exec_straight_one.
+ all: intros; simpl; trivial.
+ all: unfold Val.select, cmove, cmoveu; simpl.
+ all: destruct (rs x1); simpl; trivial.
+ all: try rewrite int_ltu_to_neq.
+ all: try rewrite int64_ltu_to_neq.
+ all: try change (Int64.eq Int64.zero Int64.zero) with true.
+ all: try destruct Archi.ptr64.
+ all: try rewrite Pregmap.gss.
+ all: repeat rewrite if_neg.
+ all: simpl.
+ all: try destruct (_ || _).
+ all: try apply Val.lessdef_normalize.
+ all: trivial. (* no more lessdef *)
+ all: apply Pregmap.gso; congruence.
- (* Oselimm *)
unfold conditional_move_imm32 in *.
destruct c0; simpl in *.
- all:
- destruct c; simpl in *; inv EQ0;
- econstructor; split; try (apply exec_straight_one; constructor);
- split; try (simpl; intros; rewrite Pregmap.gso; trivial; assumption);
- unfold Val.select; simpl;
- unfold cmove, cmoveu;
- rewrite Pregmap.gss;
- destruct (rs x0); simpl; trivial;
- try rewrite int_ltu_to_neq;
- try rewrite int64_ltu_to_neq;
- try change (Int64.eq Int64.zero Int64.zero) with true;
- try destruct Archi.ptr64;
- repeat rewrite if_neg;
- simpl;
- trivial;
- try destruct (_ || _);
- trivial;
- try apply Val.lessdef_normalize.
-
+ all: destruct c.
+ all: simpl in *.
+ all: inv EQ0.
+ all: econstructor; splitall.
+ all: try apply exec_straight_one.
+ all: intros; simpl; trivial.
+ all: unfold Val.select, cmove, cmoveu; simpl.
+ all: destruct (rs x0); simpl; trivial.
+ all: try rewrite int_ltu_to_neq.
+ all: try rewrite int64_ltu_to_neq.
+ all: try change (Int64.eq Int64.zero Int64.zero) with true.
+ all: try destruct Archi.ptr64.
+ all: try rewrite Pregmap.gss.
+ all: repeat rewrite if_neg.
+ all: simpl.
+ all: try destruct (_ || _).
+ all: try apply Val.lessdef_normalize.
+ all: trivial. (* no more lessdef *)
+ all: apply Pregmap.gso; congruence.
- (* Osellimm *)
unfold conditional_move_imm64 in *.
destruct c0; simpl in *.
- all:
- destruct c; simpl in *; inv EQ0;
- econstructor; split; try (apply exec_straight_one; constructor);
- split; try (simpl; intros; rewrite Pregmap.gso; trivial; assumption);
- unfold Val.select; simpl;
- unfold cmove, cmoveu;
- rewrite Pregmap.gss;
- destruct (rs x0); simpl; trivial;
- try rewrite int_ltu_to_neq;
- try rewrite int64_ltu_to_neq;
- try change (Int64.eq Int64.zero Int64.zero) with true;
- try destruct Archi.ptr64;
- repeat rewrite if_neg;
- simpl;
- trivial;
- try destruct (_ || _);
- trivial;
- try apply Val.lessdef_normalize.
-
+ all: destruct c.
+ all: simpl in *.
+ all: inv EQ0.
+ all: econstructor; splitall.
+ all: try apply exec_straight_one.
+ all: intros; simpl; trivial.
+ all: unfold Val.select, cmove, cmoveu; simpl.
+ all: destruct (rs x0); simpl; trivial.
+ all: try rewrite int_ltu_to_neq.
+ all: try rewrite int64_ltu_to_neq.
+ all: try change (Int64.eq Int64.zero Int64.zero) with true.
+ all: try destruct Archi.ptr64.
+ all: try rewrite Pregmap.gss.
+ all: repeat rewrite if_neg.
+ all: simpl.
+ all: try destruct (_ || _).
+ all: try apply Val.lessdef_normalize.
+ all: trivial. (* no more lessdef *)
+ all: apply Pregmap.gso; congruence.
Qed.
(** Memory accesses *)
diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v
index e042d95a..946007c1 100644
--- a/mppa_k1c/Asmvliw.v
+++ b/mppa_k1c/Asmvliw.v
@@ -555,6 +555,8 @@ Inductive ar_instruction : Type :=
| PArithARRI64 (i: arith_name_arri64) (rd rs: ireg) (imm: int64)
.
+Module PArithCoercions.
+
Coercion PArithR: arith_name_r >-> Funclass.
Coercion PArithRR: arith_name_rr >-> Funclass.
Coercion PArithRI32: arith_name_ri32 >-> Funclass.
@@ -569,6 +571,8 @@ Coercion PArithARR: arith_name_arr >-> Funclass.
Coercion PArithARRI32: arith_name_arri32 >-> Funclass.
Coercion PArithARRI64: arith_name_arri64 >-> Funclass.
+End PArithCoercions.
+
Inductive basic : Type :=
| PArith (i: ar_instruction)
| PLoad (i: ld_instruction)
@@ -1709,7 +1713,7 @@ Proof.
Qed.
-Local Hint Resolve parexec_bblock_write_in_order.
+Local Hint Resolve parexec_bblock_write_in_order: core.
Lemma det_parexec_write_in_order f b rs m rs' m':
det_parexec f b rs m rs' m' -> parexec_wio_bblock f b rs m = Next rs' m'.
diff --git a/mppa_k1c/CSE2deps.v b/mppa_k1c/CSE2deps.v
new file mode 100644
index 00000000..8ab9242a
--- /dev/null
+++ b/mppa_k1c/CSE2deps.v
@@ -0,0 +1,20 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+ (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+ && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+ && ((ofsw + size_chunk chunkw <=? ofsr) ||
+ (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+ match addr, addr', args, args' with
+ | (Aindexed ofs), (Aindexed ofs'),
+ (base :: nil), (base' :: nil) =>
+ if peq base base'
+ then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+ else true | _, _, _, _ => true
+ end.
diff --git a/mppa_k1c/CSE2depsproof.v b/mppa_k1c/CSE2depsproof.v
new file mode 100644
index 00000000..a3811e78
--- /dev/null
+++ b/mppa_k1c/CSE2depsproof.v
@@ -0,0 +1,127 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+ Ptrofs.wordsize = (if Archi.ptr64 then 64 else 32)%nat.
+Proof.
+ unfold Ptrofs.wordsize.
+ unfold Wordsize_Ptrofs.wordsize.
+ trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+ Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296.
+Proof.
+ unfold Ptrofs.modulus.
+ rewrite ptrofs_size.
+ destruct Archi.ptr64; reflexivity.
+Qed.
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Section MEMORY_WRITE.
+ Variable m m2 : mem.
+ Variable chunkw chunkr : memory_chunk.
+ Variable base : val.
+
+ Variable addrw addrr valw : val.
+ Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+ Section INDEXED_AWAY.
+ Variable ofsw ofsr : ptrofs.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Aindexed ofsw) (base :: nil) = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Aindexed ofsr) (base :: nil) = Some addrr.
+
+ Lemma load_store_away1 :
+ forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+ \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ unfold largest_size_chunk in *.
+
+ rewrite ptrofs_modulus in *.
+ simpl in *.
+ inv ADDRR.
+ inv ADDRW.
+ destruct base; try discriminate.
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+
+ all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+ rewrite OFSW).
+ all: try rewrite ptrofs_modulus in *.
+ all: destruct Archi.ptr64.
+
+ all: intuition lia.
+ Qed.
+
+ Theorem load_store_away :
+ can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply load_store_away1.
+ all: tauto.
+ Qed.
+ End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Lemma may_overlap_sound:
+ forall m m' : mem,
+ forall chunk addr args chunk' addr' args' v a a' rs,
+ (eval_addressing genv sp addr (rs ## args)) = Some a ->
+ (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+ (may_overlap chunk addr args chunk' addr' args') = false ->
+ (Mem.storev chunk m a v) = Some m' ->
+ (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a').
+Proof.
+ intros until rs.
+ intros ADDR ADDR' OVERLAP STORE.
+ destruct addr; destruct addr'; try discriminate.
+ { (* Aindexed / Aindexed *)
+ destruct args as [ | base [ | ]]. 1,3: discriminate.
+ destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+ simpl in OVERLAP.
+ destruct (peq base base'). 2: discriminate.
+ subst base'.
+ destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+ 2: discriminate.
+ simpl in *.
+ eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+ }
+Qed.
+
+End SOUNDNESS.
diff --git a/mppa_k1c/DuplicateOpcodeHeuristic.ml b/mppa_k1c/DuplicateOpcodeHeuristic.ml
index 690553ce..2ec314c1 100644
--- a/mppa_k1c/DuplicateOpcodeHeuristic.ml
+++ b/mppa_k1c/DuplicateOpcodeHeuristic.ml
@@ -2,10 +2,8 @@
open Op
open Integers
-exception HeuristicSucceeded
-
-let opcode_heuristic code cond ifso ifnot preferred =
- let decision = match cond with
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+ match cond with
| Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
| Clt | Cle -> Some false
| Cgt | Cge -> Some true
@@ -27,6 +25,3 @@ let opcode_heuristic code cond ifso ifnot preferred =
| _ -> None
)
| _ -> None
- in match decision with
- | Some b -> (preferred := b; raise HeuristicSucceeded)
- | None -> ()
diff --git a/mppa_k1c/PostpassSchedulingproof.v b/mppa_k1c/PostpassSchedulingproof.v
index fbb06c9b..8cc7f0ab 100644
--- a/mppa_k1c/PostpassSchedulingproof.v
+++ b/mppa_k1c/PostpassSchedulingproof.v
@@ -61,9 +61,9 @@ Proof.
- subst. repeat (rewrite Pregmap.gss); auto.
destruct v; simpl; auto.
rewrite Ptrofs.add_assoc.
- cutrewrite (Ptrofs.repr (x + y) = Ptrofs.add (Ptrofs.repr x) (Ptrofs.repr y)); auto.
+ enough (Ptrofs.repr (x + y) = Ptrofs.add (Ptrofs.repr x) (Ptrofs.repr y)) as ->; auto.
unfold Ptrofs.add.
- cutrewrite (x + y = Ptrofs.unsigned (Ptrofs.repr x) + Ptrofs.unsigned (Ptrofs.repr y)); auto.
+ enough (x + y = Ptrofs.unsigned (Ptrofs.repr x) + Ptrofs.unsigned (Ptrofs.repr y)) as ->; auto.
repeat (rewrite Ptrofs.unsigned_repr); auto.
- repeat (rewrite Pregmap.gso; auto).
Qed.
@@ -220,7 +220,8 @@ Proof.
destruct (zeq pos 0).
+ inv H. exists lbb. constructor; auto.
+ apply IHlbb in H. destruct H as (c & TAIL). exists c.
- cutrewrite (pos = pos - size a + size a). apply code_tail_S; auto.
+ enough (pos = pos - size a + size a) as ->.
+ apply code_tail_S; auto.
omega.
Qed.
@@ -567,7 +568,7 @@ Proof.
unfold builtin_alone in H0. erewrite H0; eauto.
Qed.
-Local Hint Resolve verified_schedule_nob_checks_alls_bundles.
+Local Hint Resolve verified_schedule_nob_checks_alls_bundles: core.
Lemma verified_schedule_checks_alls_bundles bb lb bundle:
verified_schedule bb = OK lb ->
diff --git a/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v b/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v
index 5c94d435..cf46072f 100644
--- a/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v
+++ b/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v
@@ -403,7 +403,7 @@ Proof.
* eapply H2; eauto. intros; eapply H0; eauto. rewrite rev_append_rev, in_app_iff; auto.
* intros; eapply H0; eauto. rewrite rev_append_rev, in_app_iff, <- in_rev; auto.
Qed.
-Local Hint Resolve app_fail_allvalid_correct.
+Local Hint Resolve app_fail_allvalid_correct: core.
Lemma app_fail_correct l pt t1 t2:
match_pt t1 pt ->
diff --git a/mppa_k1c/abstractbb/ImpSimuTest.v b/mppa_k1c/abstractbb/ImpSimuTest.v
index ea55b735..7a77ec15 100644
--- a/mppa_k1c/abstractbb/ImpSimuTest.v
+++ b/mppa_k1c/abstractbb/ImpSimuTest.v
@@ -304,12 +304,12 @@ Proof.
rewrite <- EQT; eauto.
+ exploit smem_valid_set_decompose_1; eauto.
- clear DM0. unfold hsmem_post_eval, hsmem_post_eval in * |- *; simpl.
- Local Hint Resolve smem_valid_set_decompose_1.
+ Local Hint Resolve smem_valid_set_decompose_1: core.
intros; case (R.eq_dec x x0).
+ intros; subst; rewrite !Dict.set_spec_eq; simpl; eauto.
+ intros; rewrite !Dict.set_spec_diff; simpl; eauto.
Qed.
-Local Hint Resolve naive_set_correct.
+Local Hint Resolve naive_set_correct: core.
Definition equiv_hsmem ge (hd1 hd2: hsmem) :=
(forall m, allvalid ge hd1.(hpre) m <-> allvalid ge hd2.(hpre) m)
@@ -523,7 +523,7 @@ Lemma hinst_smem_correct i: forall hd hod,
WHEN hinst_smem i hd hod ~> hd' THEN
forall ge od d, smem_model ge od hod -> smem_model ge d hd -> (forall m, smem_valid ge d m -> smem_valid ge od m) -> smem_model ge (inst_smem i d od) hd'.
Proof.
- Local Hint Resolve smem_valid_set_proof.
+ Local Hint Resolve smem_valid_set_proof: core.
induction i; simpl; wlp_simplify; eauto 15 with wlp.
Qed.
Global Opaque hinst_smem.
@@ -563,7 +563,7 @@ Definition bblock_hsmem: bblock -> ?? hsmem
Lemma bblock_hsmem_correct p:
WHEN bblock_hsmem p ~> hd THEN forall ge, smem_model ge (bblock_smem p) hd.
Proof.
- Local Hint Resolve hsmem_empty_correct.
+ Local Hint Resolve hsmem_empty_correct: core.
wlp_simplify.
Qed.
Global Opaque bblock_hsmem.
@@ -775,7 +775,7 @@ Proof.
intro H; erewrite <- list_term_eval_set_hid; rewrite H. apply list_term_eval_set_hid.
Qed.
-Local Hint Resolve term_eval_set_hid_equiv list_term_eval_set_hid_equiv.
+Local Hint Resolve term_eval_set_hid_equiv list_term_eval_set_hid_equiv: core.
Program Definition bblock_simu_test (p1 p2: bblock): ?? bool :=
DO log <~ count_logger ();;
@@ -802,7 +802,7 @@ Obligation 2.
wlp_simplify.
Qed.
-Local Hint Resolve g_bblock_simu_test_correct.
+Local Hint Resolve g_bblock_simu_test_correct: core.
Theorem bblock_simu_test_correct p1 p2:
WHEN bblock_simu_test p1 p2 ~> b THEN b=true -> forall ge, bblock_simu ge p1 p2.
@@ -1123,7 +1123,7 @@ Definition get {A} (d:t A) (x:R.t): option A
Definition set {A} (d:t A) (x:R.t) (v:A): t A
:= PositiveMap.add x v d.
-Local Hint Unfold PositiveMap.E.eq.
+Local Hint Unfold PositiveMap.E.eq: core.
Lemma set_spec_eq A d x (v: A):
get (set d x v) x = Some v.
diff --git a/mppa_k1c/abstractbb/Impure/ImpHCons.v b/mppa_k1c/abstractbb/Impure/ImpHCons.v
index d8002375..637116cc 100644
--- a/mppa_k1c/abstractbb/Impure/ImpHCons.v
+++ b/mppa_k1c/abstractbb/Impure/ImpHCons.v
@@ -95,7 +95,7 @@ Proof.
wlp_simplify.
Qed.
Global Opaque assert_list_incl.
-Hint Resolve assert_list_incl_correct.
+Hint Resolve assert_list_incl_correct: wlp.
End Sets.
@@ -165,7 +165,7 @@ Lemma hConsV_correct A (hasheq: A -> A -> ?? bool):
(forall x y, WHEN hasheq x y ~> b THEN b=true -> x=y) ->
forall x, WHEN hco.(hC) x ~> x' THEN x.(hdata).(data)=x'.(data).
Proof.
- Local Hint Resolve f_equal2.
+ Local Hint Resolve f_equal2: core.
wlp_simplify.
exploit H; eauto.
+ wlp_simplify.
diff --git a/mppa_k1c/abstractbb/Parallelizability.v b/mppa_k1c/abstractbb/Parallelizability.v
index 22809095..30904b5d 100644
--- a/mppa_k1c/abstractbb/Parallelizability.v
+++ b/mppa_k1c/abstractbb/Parallelizability.v
@@ -332,7 +332,7 @@ Fixpoint bblock_wframe(p:bblock): list R.t :=
| i::p' => (inst_wframe i)++(bblock_wframe p')
end.
-Local Hint Resolve Permutation_app_head Permutation_app_tail Permutation_app_comm.
+Local Hint Resolve Permutation_app_head Permutation_app_tail Permutation_app_comm: core.
Lemma bblock_wframe_Permutation p p':
Permutation p p' -> Permutation (bblock_wframe p) (bblock_wframe p').
@@ -620,7 +620,7 @@ Include ParallelizablityChecking L.
Section PARALLEL2.
Variable ge: genv.
-Local Hint Resolve S.empty_match_frame S.add_match_frame S.union_match_frame S.is_disjoint_match_frame.
+Local Hint Resolve S.empty_match_frame S.add_match_frame S.union_match_frame S.is_disjoint_match_frame: core.
(** Now, refinement of each operation toward parallelizable *)
@@ -659,14 +659,14 @@ Fixpoint inst_sframe (i: inst): S.t :=
| a::i' => S.add (fst a) (S.union (exp_sframe (snd a)) (inst_sframe i'))
end.
-Local Hint Resolve exp_sframe_correct.
+Local Hint Resolve exp_sframe_correct: core.
Lemma inst_sframe_correct i: S.match_frame (inst_sframe i) (inst_frame i).
Proof.
induction i as [|[y e] i']; simpl; auto.
Qed.
-Local Hint Resolve inst_wsframe_correct inst_sframe_correct.
+Local Hint Resolve inst_wsframe_correct inst_sframe_correct: core.
Fixpoint is_pararec (p: bblock) (wsframe: S.t): bool :=
match p with
diff --git a/mppa_k1c/abstractbb/SeqSimuTheory.v b/mppa_k1c/abstractbb/SeqSimuTheory.v
index 649dd083..e234883f 100644
--- a/mppa_k1c/abstractbb/SeqSimuTheory.v
+++ b/mppa_k1c/abstractbb/SeqSimuTheory.v
@@ -102,9 +102,6 @@ Fixpoint bblock_smem_rec (p: bblock) (d: smem): smem :=
let d':=inst_smem i d d in
bblock_smem_rec p' d'
end.
-(*
-Local Hint Resolve smem_eval_empty.
-*)
Definition bblock_smem: bblock -> smem
:= fun p => bblock_smem_rec p smem_empty.
@@ -124,7 +121,7 @@ Proof.
intros d a H; eapply inst_smem_pre_monotonic; eauto.
Qed.
-Local Hint Resolve inst_smem_pre_monotonic bblock_smem_pre_monotonic.
+Local Hint Resolve inst_smem_pre_monotonic bblock_smem_pre_monotonic: core.
Lemma term_eval_exp e (od:smem) m0 old:
(forall x, term_eval ge (od x) m0 = Some (old x)) ->
@@ -185,7 +182,7 @@ Lemma bblocks_smem_rec_Some_correct1 p m0: forall (m1 m2: mem) (d: smem),
(forall x, term_eval ge (d x) m0 = Some (m1 x)) ->
forall x, term_eval ge (bblock_smem_rec p d x) m0 = Some (m2 x).
Proof.
- Local Hint Resolve inst_smem_Some_correct1.
+ Local Hint Resolve inst_smem_Some_correct1: core.
induction p as [ | i p]; simpl; intros m1 m2 d H.
- inversion_clear H; eauto.
- intros H0 x0.
@@ -299,7 +296,7 @@ Lemma block_smem_rec_valid p m0: forall (m1 m2: mem) (d:smem),
(forall x, term_eval ge (d x) m0 = Some (m1 x)) ->
pre (bblock_smem_rec p d) ge m0.
Proof.
- Local Hint Resolve inst_valid.
+ Local Hint Resolve inst_valid: core.
induction p as [ | i p]; simpl; intros m1 d H; auto.
intros H0 H1.
destruct (inst_run ge i m1 m1) eqn: Heqov; eauto.
@@ -326,7 +323,7 @@ Theorem bblock_smem_simu p1 p2:
smem_simu (bblock_smem p1) (bblock_smem p2) ->
bblock_simu ge p1 p2.
Proof.
- Local Hint Resolve bblock_smem_valid bblock_smem_Some_correct1.
+ Local Hint Resolve bblock_smem_valid bblock_smem_Some_correct1: core.
intros (INCL & EQUIV) m DONTFAIL; unfold smem_valid in * |-.
destruct (run ge p1 m) as [m1|] eqn: RUN1; simpl; try congruence.
assert (X: forall x, term_eval ge (bblock_smem p1 x) m = Some (m1 x)); eauto.
diff --git a/mppa_k1c/lib/Asmblockgenproof0.v b/mppa_k1c/lib/Asmblockgenproof0.v
index 940c6563..58455ada 100644
--- a/mppa_k1c/lib/Asmblockgenproof0.v
+++ b/mppa_k1c/lib/Asmblockgenproof0.v
@@ -414,7 +414,7 @@ Proof.
Qed.
-Local Hint Resolve code_tail_0 code_tail_S.
+Local Hint Resolve code_tail_0 code_tail_S: core.
Lemma code_tail_next:
forall fn ofs c0,
@@ -458,7 +458,7 @@ Proof.
omega.
Qed.
-Local Hint Resolve code_tail_next.
+Local Hint Resolve code_tail_next: core.
Lemma code_tail_next_int:
forall fn ofs bi c,
diff --git a/mppa_k1c/lib/ForwardSimulationBlock.v b/mppa_k1c/lib/ForwardSimulationBlock.v
index 39dd2234..224eda0a 100644
--- a/mppa_k1c/lib/ForwardSimulationBlock.v
+++ b/mppa_k1c/lib/ForwardSimulationBlock.v
@@ -21,7 +21,7 @@ Section starN_lemma.
Variable L: semantics.
-Local Hint Resolve starN_refl starN_step Eapp_assoc.
+Local Hint Resolve starN_refl starN_step Eapp_assoc: core.
Lemma starN_split n s t s':
starN (step L) (globalenv L) n s t s' ->
@@ -93,7 +93,7 @@ Hypothesis simu_end_block:
(** Introduction d'une sémantique par bloc sur L1 appelée "memoL1" *)
-Local Hint Resolve starN_refl starN_step.
+Local Hint Resolve starN_refl starN_step: core.
Definition follows_in_block (head current: state L1): Prop :=
dist_end_block head >= dist_end_block current
@@ -164,7 +164,7 @@ Inductive is_well_memorized (s s': memostate): Prop :=
memorized s' = None ->
is_well_memorized s s'.
-Local Hint Resolve StartBloc MidBloc ExitBloc.
+Local Hint Resolve StartBloc MidBloc ExitBloc: core.
Definition memoL1 := {|
state := memostate;
diff --git a/mppa_k1c/lib/Machblockgen.v b/mppa_k1c/lib/Machblockgen.v
index a65b218f..2ba42814 100644
--- a/mppa_k1c/lib/Machblockgen.v
+++ b/mppa_k1c/lib/Machblockgen.v
@@ -105,7 +105,7 @@ Inductive is_end_block: Machblock_inst -> code -> Prop :=
| End_basic bi bh bl: header bh <> nil -> is_end_block (MB_basic bi) (bh::bl)
| End_cfi cfi bl: bl <> nil -> is_end_block (MB_cfi cfi) bl.
-Local Hint Resolve End_empty End_basic End_cfi.
+Local Hint Resolve End_empty End_basic End_cfi: core.
Inductive is_trans_code: Mach.code -> code -> Prop :=
| Tr_nil: is_trans_code nil nil
@@ -123,7 +123,7 @@ Inductive is_trans_code: Mach.code -> code -> Prop :=
header bh = nil ->
is_trans_code (i::c) (add_basic bi bh::bl).
-Local Hint Resolve Tr_nil Tr_end_block.
+Local Hint Resolve Tr_nil Tr_end_block: core.
Lemma add_to_code_is_trans_code i c bl:
is_trans_code c bl ->
@@ -145,7 +145,7 @@ Proof.
rewrite <- Heqti. eapply End_cfi. congruence.
Qed.
-Local Hint Resolve add_to_code_is_trans_code.
+Local Hint Resolve add_to_code_is_trans_code: core.
Lemma trans_code_is_trans_code_rev c1: forall c2 mbi,
is_trans_code c2 mbi ->
@@ -185,7 +185,7 @@ Proof.
exists mbi1. split; congruence.
Qed.
-Local Hint Resolve trans_code_is_trans_code.
+Local Hint Resolve trans_code_is_trans_code: core.
Theorem is_trans_code_inv c bl: is_trans_code c bl <-> bl=(trans_code c).
Proof.
diff --git a/mppa_k1c/lib/Machblockgenproof.v b/mppa_k1c/lib/Machblockgenproof.v
index 91be5e2e..0de2df52 100644
--- a/mppa_k1c/lib/Machblockgenproof.v
+++ b/mppa_k1c/lib/Machblockgenproof.v
@@ -72,7 +72,7 @@ Proof.
apply match_states_trans_state.
Qed.
-Local Hint Resolve match_states_trans_state.
+Local Hint Resolve match_states_trans_state: core.
Lemma symbols_preserved:
forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
@@ -284,7 +284,7 @@ Proof.
Qed.
Local Hint Resolve symbols_preserved senv_preserved init_mem_preserved prog_main_preserved functions_translated
- parent_sp_preserved.
+ parent_sp_preserved: core.
Definition dist_end_block_code (c: Mach.code) :=
@@ -299,8 +299,8 @@ Definition dist_end_block (s: Mach.state): nat :=
| _ => 0
end.
-Local Hint Resolve exec_nil_body exec_cons_body.
-Local Hint Resolve exec_MBgetstack exec_MBsetstack exec_MBgetparam exec_MBop exec_MBload exec_MBstore.
+Local Hint Resolve exec_nil_body exec_cons_body: core.
+Local Hint Resolve exec_MBgetstack exec_MBsetstack exec_MBgetparam exec_MBop exec_MBload exec_MBstore: core.
Lemma size_add_label l bh: size (add_label l bh) = size bh + 1.
Proof.
@@ -336,7 +336,7 @@ Proof.
omega.
Qed.
-Local Hint Resolve dist_end_block_code_simu_mid_block.
+Local Hint Resolve dist_end_block_code_simu_mid_block: core.
Lemma size_nonzero c b bl:
@@ -392,8 +392,8 @@ destruct i; congruence.
Qed.
-Local Hint Resolve Mlabel_is_not_cfi.
-Local Hint Resolve MBbasic_is_not_cfi.
+Local Hint Resolve Mlabel_is_not_cfi: core.
+Local Hint Resolve MBbasic_is_not_cfi: core.
Lemma add_to_new_block_is_label i:
header (add_to_new_bblock (trans_inst i)) <> nil -> exists l, i = Mlabel l.
@@ -408,7 +408,7 @@ Proof.
+ unfold cfi_bblock in H; simpl in H; congruence.
Qed.
-Local Hint Resolve Mlabel_is_not_basic.
+Local Hint Resolve Mlabel_is_not_basic: core.
Lemma trans_code_decompose c: forall b bl,
is_trans_code c (b::bl) ->
@@ -510,8 +510,8 @@ Proof.
rewrite Hs2, Hb2; eauto.
Qed.
-Local Hint Resolve exec_MBcall exec_MBtailcall exec_MBbuiltin exec_MBgoto exec_MBcond_true exec_MBcond_false exec_MBjumptable exec_MBreturn exec_Some_exit exec_None_exit.
-Local Hint Resolve eval_builtin_args_preserved external_call_symbols_preserved find_funct_ptr_same.
+Local Hint Resolve exec_MBcall exec_MBtailcall exec_MBbuiltin exec_MBgoto exec_MBcond_true exec_MBcond_false exec_MBjumptable exec_MBreturn exec_Some_exit exec_None_exit: core.
+Local Hint Resolve eval_builtin_args_preserved external_call_symbols_preserved find_funct_ptr_same: core.
Lemma match_states_concat_trans_code st f sp c rs m h:
diff --git a/powerpc/Archi.v b/powerpc/Archi.v
index ab348c14..10f38391 100644
--- a/powerpc/Archi.v
+++ b/powerpc/Archi.v
@@ -30,6 +30,10 @@ Definition align_float64 := 8%Z.
(** Can we use the 64-bit extensions to the PowerPC architecture? *)
Parameter ppc64 : bool.
+(** Should single-precision FP arguments passed on stack be passed
+ as singles or use double FP format. *)
+Parameter single_passed_as_single : bool.
+
Definition splitlong := negb ppc64.
Lemma splitlong_ptr32: splitlong = true -> ptr64 = false.
diff --git a/powerpc/CSE2deps.v b/powerpc/CSE2deps.v
new file mode 100644
index 00000000..9db51bbb
--- /dev/null
+++ b/powerpc/CSE2deps.v
@@ -0,0 +1,20 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+ (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+ && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+ && ((ofsw + size_chunk chunkw <=? ofsr) ||
+ (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+ match addr, addr', args, args' with
+ | (Aindexed ofs), (Aindexed ofs'),
+ (base :: nil), (base' :: nil) =>
+ if peq base base'
+ then negb (can_swap_accesses_ofs (Int.unsigned ofs') chunk' (Int.unsigned ofs) chunk)
+ else true | _, _, _, _ => true
+ end.
diff --git a/powerpc/CSE2depsproof.v b/powerpc/CSE2depsproof.v
new file mode 100644
index 00000000..fdded9b6
--- /dev/null
+++ b/powerpc/CSE2depsproof.v
@@ -0,0 +1,135 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+ Ptrofs.wordsize = if Archi.ptr64 then 64%nat else 32%nat.
+Proof.
+ unfold Ptrofs.wordsize.
+ unfold Wordsize_Ptrofs.wordsize.
+ trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+ Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296.
+Proof.
+ unfold Ptrofs.modulus.
+ rewrite ptrofs_size.
+ destruct Archi.ptr64; reflexivity.
+Qed.
+
+Lemma ptrofs_max_unsigned :
+ Ptrofs.max_unsigned = if Archi.ptr64 then 18446744073709551615 else 4294967295.
+Proof.
+ unfold Ptrofs.max_unsigned.
+ rewrite ptrofs_modulus.
+ destruct Archi.ptr64; reflexivity.
+Qed.
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Section MEMORY_WRITE.
+ Variable m m2 : mem.
+ Variable chunkw chunkr : memory_chunk.
+ Variable base : val.
+
+ Variable addrw addrr valw : val.
+ Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+ Section INDEXED_AWAY.
+ Variable ofsw ofsr : int.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Aindexed ofsw) (base :: nil) = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Aindexed ofsr) (base :: nil) = Some addrr.
+
+ Lemma load_store_away1 :
+ forall RANGEW : 0 <= Int.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= Int.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : Int.unsigned ofsw + size_chunk chunkw <= Int.unsigned ofsr
+ \/ Int.unsigned ofsr + size_chunk chunkr <= Int.unsigned ofsw,
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ unfold largest_size_chunk in *.
+
+ rewrite ptrofs_modulus in *.
+ simpl in *.
+ inv ADDRR.
+ inv ADDRW.
+ destruct base; try discriminate.
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+
+ all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsr)) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i (Ptrofs.of_int ofsw)) as [OFSW | OFSW];
+ rewrite OFSW).
+ all: unfold Ptrofs.of_int.
+
+ all: repeat rewrite Ptrofs.unsigned_repr by (unfold Ptrofs.max_unsigned; rewrite ptrofs_modulus; destruct Archi.ptr64; lia).
+ all: repeat rewrite ptrofs_modulus.
+ all: destruct Archi.ptr64; intuition lia.
+ Qed.
+
+ Theorem load_store_away :
+ can_swap_accesses_ofs (Int.unsigned ofsr) chunkr (Int.unsigned ofsw) chunkw = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply load_store_away1.
+ all: tauto.
+ Qed.
+ End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Lemma may_overlap_sound:
+ forall m m' : mem,
+ forall chunk addr args chunk' addr' args' v a a' rs,
+ (eval_addressing genv sp addr (rs ## args)) = Some a ->
+ (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+ (may_overlap chunk addr args chunk' addr' args') = false ->
+ (Mem.storev chunk m a v) = Some m' ->
+ (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a').
+Proof.
+ intros until rs.
+ intros ADDR ADDR' OVERLAP STORE.
+ destruct addr; destruct addr'; try discriminate.
+ { (* Aindexed / Aindexed *)
+ destruct args as [ | base [ | ]]. 1,3: discriminate.
+ destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+ simpl in OVERLAP.
+ destruct (peq base base'). 2: discriminate.
+ subst base'.
+ destruct (can_swap_accesses_ofs (Int.unsigned i0) chunk' (Int.unsigned i) chunk) eqn:SWAP.
+ 2: discriminate.
+ simpl in *.
+ eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+ }
+Qed.
+
+End SOUNDNESS.
diff --git a/powerpc/Conventions1.v b/powerpc/Conventions1.v
index 25d9c081..5c9cbd4f 100644
--- a/powerpc/Conventions1.v
+++ b/powerpc/Conventions1.v
@@ -208,7 +208,16 @@ Fixpoint loc_arguments_rec
| Some ireg =>
One (R ireg) :: loc_arguments_rec tys (ir + 1) fr ofs
end
- | (Tfloat | Tsingle | Tany64) as ty :: tys =>
+ | Tsingle as ty :: tys =>
+ match list_nth_z float_param_regs fr with
+ | None =>
+ let ty := if Archi.single_passed_as_single then Tsingle else Tany64 in
+ let ofs := align ofs (typesize ty) in
+ One (S Outgoing ofs ty) :: loc_arguments_rec tys ir fr (ofs + (typesize ty))
+ | Some freg =>
+ One (R freg) :: loc_arguments_rec tys ir (fr + 1) ofs
+ end
+ | (Tfloat | Tany64) as ty :: tys =>
match list_nth_z float_param_regs fr with
| None =>
let ofs := align ofs 2 in
@@ -236,33 +245,6 @@ Fixpoint loc_arguments_rec
Definition loc_arguments (s: signature) : list (rpair loc) :=
loc_arguments_rec s.(sig_args) 0 0 0.
-(** [size_arguments s] returns the number of [Outgoing] slots used
- to call a function with signature [s]. *)
-
-Fixpoint size_arguments_rec (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
- match tyl with
- | nil => ofs
- | (Tint | Tany32) :: tys =>
- match list_nth_z int_param_regs ir with
- | None => size_arguments_rec tys ir fr (ofs + 1)
- | Some ireg => size_arguments_rec tys (ir + 1) fr ofs
- end
- | (Tfloat | Tsingle | Tany64) :: tys =>
- match list_nth_z float_param_regs fr with
- | None => size_arguments_rec tys ir fr (align ofs 2 + 2)
- | Some freg => size_arguments_rec tys ir (fr + 1) ofs
- end
- | Tlong :: tys =>
- let ir := align ir 2 in
- match list_nth_z int_param_regs ir, list_nth_z int_param_regs (ir + 1) with
- | Some r1, Some r2 => size_arguments_rec tys (ir + 2) fr ofs
- | _, _ => size_arguments_rec tys ir fr (align ofs 2 + 2)
- end
- end.
-
-Definition size_arguments (s: signature) : Z :=
- size_arguments_rec s.(sig_args) 0 0 0.
-
(** Argument locations are either caller-save registers or [Outgoing]
stack slots at nonnegative offsets. *)
@@ -322,12 +304,14 @@ Opaque list_nth_z.
apply align_divides; omega. apply Z.divide_1_l. apply Z.divide_1_l.
eapply Y; eauto. omega.
- (* single *)
+ assert (ofs <= align ofs 1) by (apply align_le; omega).
assert (ofs <= align ofs 2) by (apply align_le; omega).
destruct (list_nth_z float_param_regs fr) as [r|] eqn:E; destruct H.
subst. right. eapply list_nth_z_in; eauto.
eapply IHtyl; eauto.
- subst. split. omega. apply Z.divide_1_l.
- eapply Y; eauto. omega.
+ subst. split. destruct Archi.single_passed_as_single; simpl; omega.
+ destruct Archi.single_passed_as_single; simpl; apply Z.divide_1_l.
+ eapply Y; eauto. destruct Archi.single_passed_as_single; simpl; omega.
- (* any32 *)
destruct (list_nth_z int_param_regs ir) as [r|] eqn:E; destruct H.
subst. left. eapply list_nth_z_in; eauto.
@@ -359,105 +343,6 @@ Qed.
Hint Resolve loc_arguments_acceptable: locs.
-(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
-
-Remark size_arguments_rec_above:
- forall tyl ir fr ofs0,
- ofs0 <= size_arguments_rec tyl ir fr ofs0.
-Proof.
- induction tyl; simpl; intros.
- omega.
- destruct a.
- destruct (list_nth_z int_param_regs ir); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega.
- destruct (list_nth_z float_param_regs fr); eauto.
- apply Z.le_trans with (align ofs0 2). apply align_le; omega.
- apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
- set (ir' := align ir 2).
- destruct (list_nth_z int_param_regs ir'); eauto.
- destruct (list_nth_z int_param_regs (ir' + 1)); eauto.
- apply Z.le_trans with (align ofs0 2). apply align_le; omega.
- apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
- apply Z.le_trans with (align ofs0 2). apply align_le; omega.
- apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
- destruct (list_nth_z float_param_regs fr); eauto.
- apply Z.le_trans with (align ofs0 2). apply align_le; omega.
- apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
- destruct (list_nth_z int_param_regs ir); eauto. apply Z.le_trans with (ofs0 + 1); auto; omega.
- destruct (list_nth_z float_param_regs fr); eauto.
- apply Z.le_trans with (align ofs0 2). apply align_le; omega.
- apply Z.le_trans with (align ofs0 2 + 2); auto; omega.
-Qed.
-
-Lemma size_arguments_above:
- forall s, size_arguments s >= 0.
-Proof.
- intros; unfold size_arguments. apply Z.le_ge.
- apply size_arguments_rec_above.
-Qed.
-
-Lemma loc_arguments_bounded:
- forall (s: signature) (ofs: Z) (ty: typ),
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
- ofs + typesize ty <= size_arguments s.
-Proof.
- intros.
- assert (forall tyl ir fr ofs0,
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_rec tyl ir fr ofs0)) ->
- ofs + typesize ty <= size_arguments_rec tyl ir fr ofs0).
-{
- induction tyl; simpl; intros.
- elim H0.
- destruct a.
-- (* int *)
- destruct (list_nth_z int_param_regs ir); destruct H0.
- congruence.
- eauto.
- inv H0. apply size_arguments_rec_above.
- eauto.
-- (* float *)
- destruct (list_nth_z float_param_regs fr); destruct H0.
- congruence.
- eauto.
- inv H0. apply size_arguments_rec_above. eauto.
-- (* long *)
- set (ir' := align ir 2) in *.
- assert (DFL:
- In (S Outgoing ofs ty) (regs_of_rpairs
- ((if Archi.ptr64
- then One (S Outgoing (align ofs0 2) Tlong)
- else Twolong (S Outgoing (align ofs0 2) Tint)
- (S Outgoing (align ofs0 2 + 1) Tint))
- :: loc_arguments_rec tyl ir' fr (align ofs0 2 + 2))) ->
- ofs + typesize ty <= size_arguments_rec tyl ir' fr (align ofs0 2 + 2)).
- { destruct Archi.ptr64; intros IN.
- - destruct IN. inv H1. apply size_arguments_rec_above. auto.
- - destruct IN. inv H1. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above.
- destruct H1. inv H1. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above.
- auto. }
- destruct (list_nth_z int_param_regs ir'); auto.
- destruct (list_nth_z int_param_regs (ir' + 1)); auto.
- destruct H0. congruence. destruct H0. congruence. eauto.
-- (* single *)
- destruct (list_nth_z float_param_regs fr); destruct H0.
- congruence.
- eauto.
- inv H0. transitivity (align ofs0 2 + 2). simpl; omega. apply size_arguments_rec_above.
- eauto.
-- (* any32 *)
- destruct (list_nth_z int_param_regs ir); destruct H0.
- congruence.
- eauto.
- inv H0. apply size_arguments_rec_above.
- eauto.
-- (* any64 *)
- destruct (list_nth_z float_param_regs fr); destruct H0.
- congruence.
- eauto.
- inv H0. apply size_arguments_rec_above. eauto.
- }
- eauto.
-Qed.
-
Lemma loc_arguments_main:
loc_arguments signature_main = nil.
Proof.
diff --git a/powerpc/DuplicateOpcodeHeuristic.ml b/powerpc/DuplicateOpcodeHeuristic.ml
index 85505245..33be79e8 100644
--- a/powerpc/DuplicateOpcodeHeuristic.ml
+++ b/powerpc/DuplicateOpcodeHeuristic.ml
@@ -1,3 +1,27 @@
-exception HeuristicSucceeded
-
-let opcode_heuristic code cond ifso ifnot preferred = ()
+(* open Camlcoq *)
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+ match cond with
+ | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccompf c -> (match c with
+ | Ceq -> Some false
+ | Cne -> Some true
+ | _ -> None
+ )
+ | Cnotcompf c -> (match c with
+ | Ceq -> Some true
+ | Cne -> Some false
+ | _ -> None
+ )
+ | _ -> None
diff --git a/powerpc/extractionMachdep.v b/powerpc/extractionMachdep.v
index 7482435f..a3e945bf 100644
--- a/powerpc/extractionMachdep.v
+++ b/powerpc/extractionMachdep.v
@@ -34,3 +34,6 @@ Extract Constant Archi.ppc64 =>
| ""e5500"" -> true
| _ -> false
end".
+
+(* Choice of passing of single *)
+Extract Constant Archi.single_passed_as_single => "Configuration.gnu_toolchain".
diff --git a/riscV/Asmexpand.ml b/riscV/Asmexpand.ml
index d36b6230..7e36abf8 100644
--- a/riscV/Asmexpand.ml
+++ b/riscV/Asmexpand.ml
@@ -63,44 +63,44 @@ let expand_storeind_ptr src base ofs =
let int_param_regs = [| X10; X11; X12; X13; X14; X15; X16; X17 |]
let float_param_regs = [| F10; F11; F12; F13; F14; F15; F16; F17 |]
-let rec fixup_variadic_call pos tyl =
- if pos < 8 then
+let rec fixup_variadic_call ri rf tyl =
+ if ri < 8 then
match tyl with
| [] ->
()
| (Tint | Tany32) :: tyl ->
- fixup_variadic_call (pos + 1) tyl
+ fixup_variadic_call (ri + 1) rf tyl
| Tsingle :: tyl ->
- let rs =float_param_regs.(pos)
- and rd = int_param_regs.(pos) in
+ let rs = float_param_regs.(rf)
+ and rd = int_param_regs.(ri) in
emit (Pfmvxs(rd, rs));
- fixup_variadic_call (pos + 1) tyl
+ fixup_variadic_call (ri + 1) (rf + 1) tyl
| Tlong :: tyl ->
- let pos' = if Archi.ptr64 then pos + 1 else align pos 2 + 2 in
- fixup_variadic_call pos' tyl
+ let ri' = if Archi.ptr64 then ri + 1 else align ri 2 + 2 in
+ fixup_variadic_call ri' rf tyl
| (Tfloat | Tany64) :: tyl ->
if Archi.ptr64 then begin
- let rs = float_param_regs.(pos)
- and rd = int_param_regs.(pos) in
+ let rs = float_param_regs.(rf)
+ and rd = int_param_regs.(ri) in
emit (Pfmvxd(rd, rs));
- fixup_variadic_call (pos + 1) tyl
+ fixup_variadic_call (ri + 1) (rf + 1) tyl
end else begin
- let pos = align pos 2 in
- if pos < 8 then begin
- let rs = float_param_regs.(pos)
- and rd1 = int_param_regs.(pos)
- and rd2 = int_param_regs.(pos + 1) in
+ let ri = align ri 2 in
+ if ri < 8 then begin
+ let rs = float_param_regs.(rf)
+ and rd1 = int_param_regs.(ri)
+ and rd2 = int_param_regs.(ri + 1) in
emit (Paddiw(X2, X X2, Integers.Int.neg _16));
emit (Pfsd(rs, X2, Ofsimm _0));
emit (Plw(rd1, X2, Ofsimm _0));
emit (Plw(rd2, X2, Ofsimm _4));
emit (Paddiw(X2, X X2, _16));
- fixup_variadic_call (pos + 2) tyl
+ fixup_variadic_call (ri + 2) (rf + 1) tyl
end
end
let fixup_call sg =
- if sg.sig_cc.cc_vararg then fixup_variadic_call 0 sg.sig_args
+ if sg.sig_cc.cc_vararg then fixup_variadic_call 0 0 sg.sig_args
(* Handling of annotations *)
diff --git a/riscV/CSE2deps.v b/riscV/CSE2deps.v
new file mode 100644
index 00000000..8ab9242a
--- /dev/null
+++ b/riscV/CSE2deps.v
@@ -0,0 +1,20 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+ (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+ && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+ && ((ofsw + size_chunk chunkw <=? ofsr) ||
+ (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+ match addr, addr', args, args' with
+ | (Aindexed ofs), (Aindexed ofs'),
+ (base :: nil), (base' :: nil) =>
+ if peq base base'
+ then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+ else true | _, _, _, _ => true
+ end.
diff --git a/riscV/CSE2depsproof.v b/riscV/CSE2depsproof.v
new file mode 100644
index 00000000..a3811e78
--- /dev/null
+++ b/riscV/CSE2depsproof.v
@@ -0,0 +1,127 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Lemma ptrofs_size :
+ Ptrofs.wordsize = (if Archi.ptr64 then 64 else 32)%nat.
+Proof.
+ unfold Ptrofs.wordsize.
+ unfold Wordsize_Ptrofs.wordsize.
+ trivial.
+Qed.
+
+Lemma ptrofs_modulus :
+ Ptrofs.modulus = if Archi.ptr64 then 18446744073709551616 else 4294967296.
+Proof.
+ unfold Ptrofs.modulus.
+ rewrite ptrofs_size.
+ destruct Archi.ptr64; reflexivity.
+Qed.
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Section MEMORY_WRITE.
+ Variable m m2 : mem.
+ Variable chunkw chunkr : memory_chunk.
+ Variable base : val.
+
+ Variable addrw addrr valw : val.
+ Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+ Section INDEXED_AWAY.
+ Variable ofsw ofsr : ptrofs.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Aindexed ofsw) (base :: nil) = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Aindexed ofsr) (base :: nil) = Some addrr.
+
+ Lemma load_store_away1 :
+ forall RANGEW : 0 <= Ptrofs.unsigned ofsw <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= Ptrofs.unsigned ofsr <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : Ptrofs.unsigned ofsw + size_chunk chunkw <= Ptrofs.unsigned ofsr
+ \/ Ptrofs.unsigned ofsr + size_chunk chunkr <= Ptrofs.unsigned ofsw,
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ unfold largest_size_chunk in *.
+
+ rewrite ptrofs_modulus in *.
+ simpl in *.
+ inv ADDRR.
+ inv ADDRW.
+ destruct base; try discriminate.
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+
+ all: try (destruct (Ptrofs.unsigned_add_either i ofsr) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i ofsw) as [OFSW | OFSW];
+ rewrite OFSW).
+ all: try rewrite ptrofs_modulus in *.
+ all: destruct Archi.ptr64.
+
+ all: intuition lia.
+ Qed.
+
+ Theorem load_store_away :
+ can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply load_store_away1.
+ all: tauto.
+ Qed.
+ End INDEXED_AWAY.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Lemma may_overlap_sound:
+ forall m m' : mem,
+ forall chunk addr args chunk' addr' args' v a a' rs,
+ (eval_addressing genv sp addr (rs ## args)) = Some a ->
+ (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+ (may_overlap chunk addr args chunk' addr' args') = false ->
+ (Mem.storev chunk m a v) = Some m' ->
+ (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a').
+Proof.
+ intros until rs.
+ intros ADDR ADDR' OVERLAP STORE.
+ destruct addr; destruct addr'; try discriminate.
+ { (* Aindexed / Aindexed *)
+ destruct args as [ | base [ | ]]. 1,3: discriminate.
+ destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+ simpl in OVERLAP.
+ destruct (peq base base'). 2: discriminate.
+ subst base'.
+ destruct (can_swap_accesses_ofs (Ptrofs.unsigned i0) chunk' (Ptrofs.unsigned i) chunk) eqn:SWAP.
+ 2: discriminate.
+ simpl in *.
+ eapply load_store_away with (F:=F) (V:=V) (genv:=genv) (sp:=sp); eassumption.
+ }
+Qed.
+
+End SOUNDNESS.
diff --git a/riscV/Conventions1.v b/riscV/Conventions1.v
index 27d09d94..17326139 100644
--- a/riscV/Conventions1.v
+++ b/riscV/Conventions1.v
@@ -105,7 +105,9 @@ Definition is_float_reg (r: mreg) :=
of function arguments), but this leaves much liberty in choosing actual
locations. To ensure binary interoperability of code generated by our
compiler with libraries compiled by another compiler, we
- implement the standard RISC-V conventions. *)
+ implement the standard RISC-V conventions as found here:
+ https://github.com/riscv/riscv-elf-psabi-doc/blob/master/riscv-elf.md
+*)
(** ** Location of function result *)
@@ -169,38 +171,32 @@ Qed.
(** ** Location of function arguments *)
-(** The RISC-V ABI states the following convention for passing arguments
+(** The RISC-V ABI states the following conventions for passing arguments
to a function:
-- Arguments are passed in registers when possible.
-
-- Up to eight integer registers (ai: int_param_regs) and up to eight
- floating-point registers (fai: float_param_regs) are used for this
- purpose.
-
-- If the arguments to a function are conceptualized as fields of a C
- struct, each with pointer alignment, the argument registers are a
- shadow of the first eight pointer-words of that struct. If argument
- i < 8 is a floating-point type, it is passed in floating-point
- register fa_i; otherwise, it is passed in integer register a_i.
-
-- When primitive arguments twice the size of a pointer-word are passed
- on the stack, they are naturally aligned. When they are passed in the
- integer registers, they reside in an aligned even-odd register pair,
- with the even register holding the least-significant bits.
-
-- Floating-point arguments to variadic functions (except those that
- are explicitly named in the parameter list) are passed in integer
- registers.
-
-- The portion of the conceptual struct that is not passed in argument
- registers is passed on the stack. The stack pointer sp points to the
- first argument not passed in a register.
-
-The bit about variadic functions doesn't quite fit CompCert's model.
-We do our best by passing the FP arguments in registers, as usual,
-and reserving the corresponding integer registers, so that fixup
-code can be introduced in the Asmexpand pass.
+- RV64, not variadic: pass the first 8 integer arguments in
+ integer registers (a1...a8: int_param_regs), the first 8 FP arguments
+ in FP registers (fa1...fa8: float_param_regs), and the remaining
+ arguments on the stack, in 8-byte slots.
+
+- RV32, not variadic: same, but arguments of 64-bit integer type
+ are passed in two consecutive integer registers (a(i), a(i+1))
+ or in a(8) and on a 32-bit word on the stack. Stack-allocated
+ arguments are aligned to their natural alignment.
+
+- RV64, variadic: pass the first 8 arguments in integer registers
+ (a1...a8), including FP arguments; pass the remaining arguments on
+ the stack, in 8-byte slots.
+
+- RV32, variadic: same, but arguments of 64-bit types (integers as well
+ as floats) are passed in two consecutive aligned integer registers
+ (a(2i), a(2i+1)).
+
+The passing of FP arguments to variadic functions in integer registers
+doesn't quite fit CompCert's model. We do our best by passing the FP
+arguments in registers, as usual, and reserving the corresponding
+integer registers, so that fixup code can be introduced in the
+Asmexpand pass.
*)
Definition int_param_regs :=
@@ -208,80 +204,84 @@ Definition int_param_regs :=
Definition float_param_regs :=
F10 :: F11 :: F12 :: F13 :: F14 :: F15 :: F16 :: F17 :: nil.
-Definition one_arg (regs: list mreg) (rn: Z) (ofs: Z) (ty: typ)
- (rec: Z -> Z -> list (rpair loc)) :=
- match list_nth_z regs rn with
+Definition int_arg (ri rf ofs: Z) (ty: typ)
+ (rec: Z -> Z -> Z -> list (rpair loc)) :=
+ match list_nth_z int_param_regs ri with
| Some r =>
- One(R r) :: rec (rn + 1) ofs
+ One(R r) :: rec (ri + 1) rf ofs
| None =>
- let ofs := align ofs (typealign ty) in
- One(S Outgoing ofs ty) :: rec rn (ofs + (if Archi.ptr64 then 2 else typesize ty))
+ let ofs := align ofs (typesize ty) in
+ One(S Outgoing ofs ty)
+ :: rec ri rf (ofs + (if Archi.ptr64 then 2 else typesize ty))
end.
-Definition two_args (regs: list mreg) (rn: Z) (ofs: Z)
- (rec: Z -> Z -> list (rpair loc)) :=
- let rn := align rn 2 in
- match list_nth_z regs rn, list_nth_z regs (rn + 1) with
- | Some r1, Some r2 =>
- Twolong (R r2) (R r1) :: rec (rn + 2) ofs
- | _, _ =>
- let ofs := align ofs 2 in
- Twolong (S Outgoing (ofs + 1) Tint) (S Outgoing ofs Tint) ::
- rec rn (ofs + 2)
+Definition float_arg (va: bool) (ri rf ofs: Z) (ty: typ)
+ (rec: Z -> Z -> Z -> list (rpair loc)) :=
+ match list_nth_z float_param_regs rf with
+ | Some r =>
+ if va then
+ (let ri' := (* reserve 1 or 2 aligned integer registers *)
+ if Archi.ptr64 || zeq (typesize ty) 1 then ri + 1 else align ri 2 + 2 in
+ if zle ri' 8 then
+ (* we have enough integer registers, put argument in FP reg
+ and fixup code will put it in one or two integer regs *)
+ One (R r) :: rec ri' (rf + 1) ofs
+ else
+ (* we are out of integer registers, pass argument on stack *)
+ let ofs := align ofs (typesize ty) in
+ One(S Outgoing ofs ty)
+ :: rec ri' rf (ofs + (if Archi.ptr64 then 2 else typesize ty)))
+ else
+ One (R r) :: rec ri (rf + 1) ofs
+ | None =>
+ let ofs := align ofs (typesize ty) in
+ One(S Outgoing ofs ty)
+ :: rec ri rf (ofs + (if Archi.ptr64 then 2 else typesize ty))
end.
-Definition hybrid_arg (regs: list mreg) (rn: Z) (ofs: Z) (ty: typ)
- (rec: Z -> Z -> list (rpair loc)) :=
- let rn := align rn 2 in
- match list_nth_z regs rn with
- | Some r =>
- One (R r) :: rec (rn + 2) ofs
- | None =>
+Definition split_long_arg (va: bool) (ri rf ofs: Z)
+ (rec: Z -> Z -> Z -> list (rpair loc)) :=
+ let ri := if va then align ri 2 else ri in
+ match list_nth_z int_param_regs ri, list_nth_z int_param_regs (ri + 1) with
+ | Some r1, Some r2 =>
+ Twolong (R r2) (R r1) :: rec (ri + 2) rf ofs
+ | Some r1, None =>
+ Twolong (S Outgoing ofs Tint) (R r1) :: rec (ri + 1) rf (ofs + 1)
+ | None, _ =>
let ofs := align ofs 2 in
- One (S Outgoing ofs ty) :: rec rn (ofs + 2)
+ Twolong (S Outgoing (ofs + 1) Tint) (S Outgoing ofs Tint) ::
+ rec ri rf (ofs + 2)
end.
Fixpoint loc_arguments_rec (va: bool)
- (tyl: list typ) (r ofs: Z) {struct tyl} : list (rpair loc) :=
+ (tyl: list typ) (ri rf ofs: Z) {struct tyl} : list (rpair loc) :=
match tyl with
| nil => nil
| (Tint | Tany32) as ty :: tys =>
- one_arg int_param_regs r ofs ty (loc_arguments_rec va tys)
+ (* pass in one integer register or on stack *)
+ int_arg ri rf ofs ty (loc_arguments_rec va tys)
| Tsingle as ty :: tys =>
- one_arg float_param_regs r ofs ty (loc_arguments_rec va tys)
+ (* pass in one FP register or on stack.
+ If vararg, reserve 1 integer register. *)
+ float_arg va ri rf ofs ty (loc_arguments_rec va tys)
| Tlong as ty :: tys =>
- if Archi.ptr64
- then one_arg int_param_regs r ofs ty (loc_arguments_rec va tys)
- else two_args int_param_regs r ofs (loc_arguments_rec va tys)
+ if Archi.ptr64 then
+ (* pass in one integer register or on stack *)
+ int_arg ri rf ofs ty (loc_arguments_rec va tys)
+ else
+ (* pass in register pair or on stack; align register pair if vararg *)
+ split_long_arg va ri rf ofs(loc_arguments_rec va tys)
| (Tfloat | Tany64) as ty :: tys =>
- if va && negb Archi.ptr64
- then hybrid_arg float_param_regs r ofs ty (loc_arguments_rec va tys)
- else one_arg float_param_regs r ofs ty (loc_arguments_rec va tys)
+ (* pass in one FP register or on stack.
+ If vararg, reserve 1 or 2 integer registers. *)
+ float_arg va ri rf ofs ty (loc_arguments_rec va tys)
end.
(** [loc_arguments s] returns the list of locations where to store arguments
when calling a function with signature [s]. *)
Definition loc_arguments (s: signature) : list (rpair loc) :=
- loc_arguments_rec s.(sig_cc).(cc_vararg) s.(sig_args) 0 0.
-
-(** [size_arguments s] returns the number of [Outgoing] slots used
- to call a function with signature [s]. *)
-
-Definition max_outgoing_1 (accu: Z) (l: loc) : Z :=
- match l with
- | S Outgoing ofs ty => Z.max accu (ofs + typesize ty)
- | _ => accu
- end.
-
-Definition max_outgoing_2 (accu: Z) (rl: rpair loc) : Z :=
- match rl with
- | One l => max_outgoing_1 accu l
- | Twolong l1 l2 => max_outgoing_1 (max_outgoing_1 accu l1) l2
- end.
-
-Definition size_arguments (s: signature) : Z :=
- List.fold_left max_outgoing_2 (loc_arguments s) 0.
+ loc_arguments_rec s.(sig_cc).(cc_vararg) s.(sig_args) 0 0 0.
(** Argument locations are either non-temporary registers or [Outgoing]
stack slots at nonnegative offsets. *)
@@ -294,90 +294,87 @@ Definition loc_argument_acceptable (l: loc) : Prop :=
end.
Lemma loc_arguments_rec_charact:
- forall va tyl rn ofs p,
+ forall va tyl ri rf ofs p,
ofs >= 0 ->
- In p (loc_arguments_rec va tyl rn ofs) -> forall_rpair loc_argument_acceptable p.
+ In p (loc_arguments_rec va tyl ri rf ofs) -> forall_rpair loc_argument_acceptable p.
Proof.
set (OK := fun (l: list (rpair loc)) =>
forall p, In p l -> forall_rpair loc_argument_acceptable p).
- set (OKF := fun (f: Z -> Z -> list (rpair loc)) =>
- forall rn ofs, ofs >= 0 -> OK (f rn ofs)).
- set (OKREGS := fun (l: list mreg) => forall r, In r l -> is_callee_save r = false).
- assert (AL: forall ofs ty, ofs >= 0 -> align ofs (typealign ty) >= 0).
+ set (OKF := fun (f: Z -> Z -> Z -> list (rpair loc)) =>
+ forall ri rf ofs, ofs >= 0 -> OK (f ri rf ofs)).
+ assert (CSI: forall r, In r int_param_regs -> is_callee_save r = false).
+ { decide_goal. }
+ assert (CSF: forall r, In r float_param_regs -> is_callee_save r = false).
+ { decide_goal. }
+ assert (AL: forall ofs ty, ofs >= 0 -> align ofs (typesize ty) >= 0).
{ intros.
- assert (ofs <= align ofs (typealign ty)) by (apply align_le; apply typealign_pos).
+ assert (ofs <= align ofs (typesize ty)) by (apply align_le; apply typesize_pos).
omega. }
+ assert (ALD: forall ofs ty, ofs >= 0 -> (typealign ty | align ofs (typesize ty))).
+ { intros. eapply Z.divide_trans. apply typealign_typesize.
+ apply align_divides. apply typesize_pos. }
assert (SK: (if Archi.ptr64 then 2 else 1) > 0).
{ destruct Archi.ptr64; omega. }
assert (SKK: forall ty, (if Archi.ptr64 then 2 else typesize ty) > 0).
{ intros. destruct Archi.ptr64. omega. apply typesize_pos. }
- assert (A: forall regs rn ofs ty f,
- OKREGS regs -> OKF f -> ofs >= 0 -> OK (one_arg regs rn ofs ty f)).
- { intros until f; intros OR OF OO; red; unfold one_arg; intros.
- destruct (list_nth_z regs rn) as [r|] eqn:NTH; destruct H.
- - subst p; simpl. apply OR. eapply list_nth_z_in; eauto.
+ assert (A: forall ri rf ofs ty f,
+ OKF f -> ofs >= 0 -> OK (int_arg ri rf ofs ty f)).
+ { intros until f; intros OF OO; red; unfold int_arg; intros.
+ destruct (list_nth_z int_param_regs ri) as [r|] eqn:NTH; destruct H.
+ - subst p; simpl. apply CSI. eapply list_nth_z_in; eauto.
- eapply OF; eauto.
- subst p; simpl. auto using align_divides, typealign_pos.
- eapply OF; [idtac|eauto].
generalize (AL ofs ty OO) (SKK ty); omega.
}
- assert (B: forall regs rn ofs f,
- OKREGS regs -> OKF f -> ofs >= 0 -> OK (two_args regs rn ofs f)).
- { intros until f; intros OR OF OO; unfold two_args.
- set (rn' := align rn 2).
+ assert (B: forall va ri rf ofs ty f,
+ OKF f -> ofs >= 0 -> OK (float_arg va ri rf ofs ty f)).
+ { intros until f; intros OF OO; red; unfold float_arg; intros.
+ destruct (list_nth_z float_param_regs rf) as [r|] eqn:NTH.
+ - set (ri' := if Archi.ptr64 || zeq (typesize ty) 1 then ri + 1 else align ri 2 + 2) in *.
+ destruct va; [destruct (zle ri' 8)|idtac]; destruct H.
+ + subst p; simpl. apply CSF. eapply list_nth_z_in; eauto.
+ + eapply OF; eauto.
+ + subst p; repeat split; auto.
+ + eapply OF; [idtac|eauto]. generalize (AL ofs ty OO) (SKK ty); omega.
+ + subst p; simpl. apply CSF. eapply list_nth_z_in; eauto.
+ + eapply OF; eauto.
+ - destruct H.
+ + subst p; repeat split; auto.
+ + eapply OF; [idtac|eauto]. generalize (AL ofs ty OO) (SKK ty); omega.
+ }
+ assert (C: forall va ri rf ofs f,
+ OKF f -> ofs >= 0 -> OK (split_long_arg va ri rf ofs f)).
+ { intros until f; intros OF OO; unfold split_long_arg.
+ set (ri' := if va then align ri 2 else ri).
set (ofs' := align ofs 2).
assert (OO': ofs' >= 0) by (apply (AL ofs Tlong); auto).
- assert (DFL: OK (Twolong (S Outgoing (ofs' + 1) Tint) (S Outgoing ofs' Tint)
- :: f rn' (ofs' + 2))).
- { red; simpl; intros. destruct H.
- - subst p; simpl.
- repeat split; auto using Z.divide_1_l. omega.
- - eapply OF; [idtac|eauto]. omega.
- }
- destruct (list_nth_z regs rn') as [r1|] eqn:NTH1;
- destruct (list_nth_z regs (rn' + 1)) as [r2|] eqn:NTH2;
- try apply DFL.
- red; simpl; intros; destruct H.
- - subst p; simpl. split; apply OR; eauto using list_nth_z_in.
- - eapply OF; [idtac|eauto]. auto.
- }
- assert (C: forall regs rn ofs ty f,
- OKREGS regs -> OKF f -> ofs >= 0 -> typealign ty = 1 -> OK (hybrid_arg regs rn ofs ty f)).
- { intros until f; intros OR OF OO OTY; unfold hybrid_arg; red; intros.
- set (rn' := align rn 2) in *.
- destruct (list_nth_z regs rn') as [r|] eqn:NTH; destruct H.
- - subst p; simpl. apply OR. eapply list_nth_z_in; eauto.
- - eapply OF; eauto.
- - subst p; simpl. rewrite OTY. split. apply (AL ofs Tlong OO). apply Z.divide_1_l.
- - eapply OF; [idtac|eauto]. generalize (AL ofs Tlong OO); simpl; omega.
+ destruct (list_nth_z int_param_regs ri') as [r1|] eqn:NTH1;
+ [destruct (list_nth_z int_param_regs (ri'+1)) as [r2|] eqn:NTH2 | idtac].
+ - red; simpl; intros; destruct H.
+ + subst p; split; apply CSI; eauto using list_nth_z_in.
+ + eapply OF; [idtac|eauto]. omega.
+ - red; simpl; intros; destruct H.
+ + subst p; split. split; auto using Z.divide_1_l. apply CSI; eauto using list_nth_z_in.
+ + eapply OF; [idtac|eauto]. omega.
+ - red; simpl; intros; destruct H.
+ + subst p; repeat split; auto using Z.divide_1_l. omega.
+ + eapply OF; [idtac|eauto]. omega.
}
- assert (D: OKREGS int_param_regs).
- { red. decide_goal. }
- assert (E: OKREGS float_param_regs).
- { red. decide_goal. }
-
- cut (forall va tyl rn ofs, ofs >= 0 -> OK (loc_arguments_rec va tyl rn ofs)).
+ cut (forall va tyl ri rf ofs, ofs >= 0 -> OK (loc_arguments_rec va tyl ri rf ofs)).
unfold OK. eauto.
induction tyl as [ | ty1 tyl]; intros until ofs; intros OO; simpl.
- red; simpl; tauto.
- destruct ty1.
+ (* int *) apply A; auto.
-+ (* float *)
- destruct (va && negb Archi.ptr64).
- apply C; auto.
- apply A; auto.
++ (* float *) apply B; auto.
+ (* long *)
destruct Archi.ptr64.
apply A; auto.
- apply B; auto.
-+ (* single *)
- apply A; auto.
-+ (* any32 *)
- apply A; auto.
-+ (* any64 *)
- destruct (va && negb Archi.ptr64).
apply C; auto.
- apply A; auto.
++ (* single *) apply B; auto.
++ (* any32 *) apply A; auto.
++ (* any64 *) apply B; auto.
Qed.
Lemma loc_arguments_acceptable:
@@ -387,52 +384,6 @@ Proof.
unfold loc_arguments; intros. eapply loc_arguments_rec_charact; eauto. omega.
Qed.
-(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
-
-Remark fold_max_outgoing_above:
- forall l n, fold_left max_outgoing_2 l n >= n.
-Proof.
- assert (A: forall n l, max_outgoing_1 n l >= n).
- { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
- induction l; simpl; intros.
- - omega.
- - eapply Zge_trans. eauto.
- destruct a; simpl. apply A. eapply Zge_trans; eauto.
-Qed.
-
-Lemma size_arguments_above:
- forall s, size_arguments s >= 0.
-Proof.
- intros. apply fold_max_outgoing_above.
-Qed.
-
-Lemma loc_arguments_bounded:
- forall (s: signature) (ofs: Z) (ty: typ),
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
- ofs + typesize ty <= size_arguments s.
-Proof.
- intros until ty.
- assert (A: forall n l, n <= max_outgoing_1 n l).
- { intros; unfold max_outgoing_1. destruct l as [_ | []]; xomega. }
- assert (B: forall p n,
- In (S Outgoing ofs ty) (regs_of_rpair p) ->
- ofs + typesize ty <= max_outgoing_2 n p).
- { intros. destruct p; simpl in H; intuition; subst; simpl.
- - xomega.
- - eapply Z.le_trans. 2: apply A. xomega.
- - xomega. }
- assert (C: forall l n,
- In (S Outgoing ofs ty) (regs_of_rpairs l) ->
- ofs + typesize ty <= fold_left max_outgoing_2 l n).
- { induction l; simpl; intros.
- - contradiction.
- - rewrite in_app_iff in H. destruct H.
- + eapply Z.le_trans. eapply B; eauto. apply Z.ge_le. apply fold_max_outgoing_above.
- + apply IHl; auto.
- }
- apply C.
-Qed.
-
Lemma loc_arguments_main:
loc_arguments signature_main = nil.
Proof.
diff --git a/riscV/DuplicateOpcodeHeuristic.ml b/riscV/DuplicateOpcodeHeuristic.ml
index 85505245..2ec314c1 100644
--- a/riscV/DuplicateOpcodeHeuristic.ml
+++ b/riscV/DuplicateOpcodeHeuristic.ml
@@ -1,3 +1,27 @@
-exception HeuristicSucceeded
-
-let opcode_heuristic code cond ifso ifnot preferred = ()
+(* open Camlcoq *)
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+ match cond with
+ | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccompf c | Ccompfs c -> (match c with
+ | Ceq -> Some false
+ | Cne -> Some true
+ | _ -> None
+ )
+ | Cnotcompf c | Cnotcompfs c -> (match c with
+ | Ceq -> Some true
+ | Cne -> Some false
+ | _ -> None
+ )
+ | _ -> None
diff --git a/runtime/arm/i64_stof.S b/runtime/arm/i64_stof.S
index bcfa471c..11e00a2a 100644
--- a/runtime/arm/i64_stof.S
+++ b/runtime/arm/i64_stof.S
@@ -39,12 +39,11 @@
@@@ Conversion from signed 64-bit integer to single float
FUNCTION(__compcert_i64_stof)
- @ Check whether -2^53 <= X < 2^53
- ASR r2, Reg0HI, #21
- ASR r3, Reg0HI, #31 @ (r2,r3) = X >> 53
+ @ Check whether -2^53 <= X < 2^53
+ ASR r2, Reg0HI, #21 @ r2 = high 32 bits of X >> 53
+ @ -2^53 <= X < 2^53 iff r2 is -1 or 0, that is, iff r2 + 1 is 0 or 1
adds r2, r2, #1
- adc r3, r3, #0 @ (r2,r3) = X >> 53 + 1
- cmp r3, #2
+ cmp r2, #2
blo 1f
@ X is large enough that double rounding can occur.
@ Avoid it by nudging X away from the points where double rounding
diff --git a/runtime/include/math.h b/runtime/include/math.h
index d6475df1..01b8d8d8 100644
--- a/runtime/include/math.h
+++ b/runtime/include/math.h
@@ -1,6 +1,8 @@
#ifndef _COMPCERT_MATH_H
#define _COMPCERT_MATH_H
+#ifdef __K1C__
+
#define isfinite(__y) (fpclassify((__y)) >= FP_ZERO)
#include_next <math.h>
@@ -16,4 +18,9 @@
#define fmaf(x, y, z) __builtin_fmaf((x),(y),(z))
#endif
+#else
+
+#include_next <math.h>
+
+#endif
#endif
diff --git a/runtime/powerpc/i64_stof.s b/runtime/powerpc/i64_stof.s
index 97fa6bb8..ea23a1c8 100644
--- a/runtime/powerpc/i64_stof.s
+++ b/runtime/powerpc/i64_stof.s
@@ -43,20 +43,19 @@
__compcert_i64_stof:
mflr r9
# Check whether -2^53 <= X < 2^53
- srawi r5, r3, 31
- srawi r6, r3, 21 # (r5,r6) = X >> 53
- addic r6, r6, 1
- addze r5, r5 # (r5,r6) = (X >> 53) + 1
+ srawi r5, r3, 21 # r5 = high 32 bits of X >> 53
+ # -2^53 <= X < 2^53 iff r5 is -1 or 0, that is, iff r5 + 1 is 0 or 1
+ addi r5, r5, 1
cmplwi r5, 2
blt 1f
# X is large enough that double rounding can occur.
# Avoid it by nudging X away from the points where double rounding
# occurs (the "round to odd" technique)
- rlwinm r0, r4, 0, 21, 31 # extract bits 0 to 11 of X
- addi r0, r0, 0x7FF # r0 = (X & 0x7FF) + 0x7FF
- # bit 12 of r0 is 0 if all low 12 bits of X are 0, 1 otherwise
- # bits 13-31 of r0 are 0
- or r4, r4, r0 # correct bit number 12 of X
+ rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X
+ addi r5, r5, 0x7FF # r5 = (X & 0x7FF) + 0x7FF
+ # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise
+ # bits 13-31 of r5 are 0
+ or r4, r4, r5 # correct bit number 12 of X
rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X
# Convert to double, then round to single
1: bl __compcert_i64_stod
diff --git a/runtime/powerpc/i64_utof.s b/runtime/powerpc/i64_utof.s
index cdb2f867..4a2a172b 100644
--- a/runtime/powerpc/i64_utof.s
+++ b/runtime/powerpc/i64_utof.s
@@ -48,11 +48,11 @@ __compcert_i64_utof:
# X is large enough that double rounding can occur.
# Avoid it by nudging X away from the points where double rounding
# occurs (the "round to odd" technique)
- rlwinm r0, r4, 0, 21, 31 # extract bits 0 to 11 of X
- addi r0, r0, 0x7FF # r0 = (X & 0x7FF) + 0x7FF
- # bit 12 of r0 is 0 if all low 12 bits of X are 0, 1 otherwise
- # bits 13-31 of r0 are 0
- or r4, r4, r0 # correct bit number 12 of X
+ rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X
+ addi r5, r5, 0x7FF # r5 = (X & 0x7FF) + 0x7FF
+ # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise
+ # bits 13-31 of r5 are 0
+ or r4, r4, r5 # correct bit number 12 of X
rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X
# Convert to double, then round to single
1: bl __compcert_i64_utod
diff --git a/runtime/powerpc64/i64_utof.s b/runtime/powerpc64/i64_utof.s
index cdb2f867..4a2a172b 100644
--- a/runtime/powerpc64/i64_utof.s
+++ b/runtime/powerpc64/i64_utof.s
@@ -48,11 +48,11 @@ __compcert_i64_utof:
# X is large enough that double rounding can occur.
# Avoid it by nudging X away from the points where double rounding
# occurs (the "round to odd" technique)
- rlwinm r0, r4, 0, 21, 31 # extract bits 0 to 11 of X
- addi r0, r0, 0x7FF # r0 = (X & 0x7FF) + 0x7FF
- # bit 12 of r0 is 0 if all low 12 bits of X are 0, 1 otherwise
- # bits 13-31 of r0 are 0
- or r4, r4, r0 # correct bit number 12 of X
+ rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X
+ addi r5, r5, 0x7FF # r5 = (X & 0x7FF) + 0x7FF
+ # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise
+ # bits 13-31 of r5 are 0
+ or r4, r4, r5 # correct bit number 12 of X
rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X
# Convert to double, then round to single
1: bl __compcert_i64_utod
diff --git a/test/Makefile b/test/Makefile
index 7efcd8f1..e9c5d6a1 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -4,7 +4,9 @@ include ../Makefile.config
# Kalray note - removing compression, raytracer and spass that cannot be executed by the simulator in reasonable time
ifeq ($(ARCH),mppa_k1c)
- DIRS:=c regression
+ DIRS=c regression
+else
+ DIRS=c compression raytracer spass regression
endif
ifeq ($(CLIGHTGEN),true)
diff --git a/test/c/mandelbrot.c b/test/c/mandelbrot.c
index fb8b929c..548c3ffa 100644
--- a/test/c/mandelbrot.c
+++ b/test/c/mandelbrot.c
@@ -59,7 +59,6 @@ int main (int argc, char **argv)
if(bit_num == 8)
{
- printf("%c", byte_acc);
putc(byte_acc,stdout);
#ifdef __K1C__ // stdout isn't flushed enough when --syscall=libstd_scalls.so is passed to the simulator k1-cluster
fflush(stdout);
@@ -70,7 +69,6 @@ int main (int argc, char **argv)
else if(x == w-1)
{
byte_acc <<= (8-w%8);
- printf("%c", byte_acc);
putc(byte_acc,stdout);
#ifdef __K1C__ // stdout isn't flushed enough when --syscall=libstd_scalls.so is passed to the simulator k1-cluster
fflush(stdout);
diff --git a/test/cse2/globals.c b/test/cse2/globals.c
new file mode 100644
index 00000000..c6dd59cd
--- /dev/null
+++ b/test/cse2/globals.c
@@ -0,0 +1,8 @@
+int glob1, glob2;
+
+void toto() {
+ if (glob1 > 4) {
+ glob2 ++;
+ glob1 --;
+ }
+}
diff --git a/test/cse2/indexed_addr.c b/test/cse2/indexed_addr.c
new file mode 100644
index 00000000..30a7c571
--- /dev/null
+++ b/test/cse2/indexed_addr.c
@@ -0,0 +1,6 @@
+void foo(int *t) {
+ if (t[0] > 4) {
+ t[1] ++;
+ t[0] --;
+ }
+}
diff --git a/test/monniaux/clock.c b/test/monniaux/clock.c
index fb636667..4ec679f6 100644
--- a/test/monniaux/clock.c
+++ b/test/monniaux/clock.c
@@ -24,9 +24,9 @@ cycle_t get_current_cycle(void) {
}
void print_total_clock(void) {
- printf("time cycles: %lu\n", total_clock);
+ printf("time cycles: %" PRcycle "\n", total_clock);
}
void printerr_total_clock(void) {
- fprintf(stderr, "time cycles: %lu\n", total_clock);
+ fprintf(stderr, "time cycles: %" PRcycle "\n", total_clock);
}
diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h
index 21541145..c7dc582b 100644
--- a/test/monniaux/cycles.h
+++ b/test/monniaux/cycles.h
@@ -1,13 +1,11 @@
+#include <stdint.h>
#include <inttypes.h>
#include <stdio.h>
-typedef unsigned long cycle_t;
-
-#ifdef MAX_MEASURES
- static cycle_t _last_stop[MAX_MEASURES] = {0};
- static cycle_t _total_cycles[MAX_MEASURES] = {0};
-#endif
#ifdef __K1C__
+typedef uint64_t cycle_t;
+#define PRcycle PRId64
+
#include <../../k1-cos/include/hal/cos_registers.h>
static inline void cycle_count_config(void)
@@ -27,18 +25,57 @@ static inline cycle_t get_cycle(void)
#else // not K1C
static inline void cycle_count_config(void) { }
-#ifdef __x86_64__
+#if defined(__i386__) || defined( __x86_64__)
+#define PRcycle PRId64
+typedef uint64_t cycle_t;
#include <x86intrin.h>
static inline cycle_t get_cycle(void) { return __rdtsc(); }
#elif __riscv
+#ifdef __riscv32
+#define PRcycle PRId32
+typedef uint32_t cycle_t;
+#else
+#define PRcycle PRId64
+typedef uint64_t cycle_t;
+#endif
static inline cycle_t get_cycle(void) {
cycle_t cycles;
asm volatile ("rdcycle %0" : "=r" (cycles));
return cycles;
}
+#elif defined (__ARM_ARCH) && (__ARM_ARCH >= 6)
+#if (__ARM_ARCH < 8)
+typedef uint32_t cycle_t;
+#define PRcycle PRId32
+
+/* need this kernel module
+https://github.com/zertyz/MTL/tree/master/cpp/time/kernel/arm */
+static inline cycle_t get_cycle(void) {
+ cycle_t cycles;
+ __asm__ volatile ("mrc p15, 0, %0, c9, c13, 0":"=r" (cycles));
+ return cycles;
+}
#else
+#define PRcycle PRId64
+typedef uint64_t cycle_t;
+/* need this kernel module:
+https://github.com/jerinjacobk/armv8_pmu_cycle_counter_el0
+
+on 5+ kernels, remove first argument of access_ok macro */
+
+static inline cycle_t get_cycle(void)
+{
+ uint64_t val;
+ __asm__ volatile("mrs %0, pmccntr_el0" : "=r"(val));
+ return val;
+}
+#endif
+
+#else
+#define PRcycle PRId32
+typedef uint32_t cycle_t;
static inline cycle_t get_cycle(void) { return 0; }
#endif
#endif
@@ -48,3 +85,9 @@ static inline cycle_t get_cycle(void) { return 0; }
#define TIMESTOP(i) {cycle_t cur = get_cycle(); _total_cycles[i] += cur - _last_stop[i]; _last_stop[i] = cur;}
#define TIMEPRINT(n) { for (int i = 0; i <= n; i++) printf("%d cycles: %" PRIu64 "\n", i, _total_cycles[i]); }
#endif
+
+
+#ifdef MAX_MEASURES
+ static cycle_t _last_stop[MAX_MEASURES] = {0};
+ static cycle_t _total_cycles[MAX_MEASURES] = {0};
+#endif
diff --git a/test/monniaux/quicksort/quicksort_run.c b/test/monniaux/quicksort/quicksort_run.c
index c35d0752..3c640b24 100644
--- a/test/monniaux/quicksort/quicksort_run.c
+++ b/test/monniaux/quicksort/quicksort_run.c
@@ -13,7 +13,7 @@ int main (void) {
quicksort(vec, len);
quicksort_time = get_cycle() - quicksort_time;
printf("sorted=%s\n"
- "time cycles:%" PRIu64 "\n",
+ "time cycles:%" PRcycle "\n",
data_vec_is_sorted(vec, len)?"true":"false",
quicksort_time);
free(vec);
diff --git a/test/monniaux/yarpgen/Makefile b/test/monniaux/yarpgen/Makefile
index 9da82deb..28bd5ae0 100644
--- a/test/monniaux/yarpgen/Makefile
+++ b/test/monniaux/yarpgen/Makefile
@@ -1,52 +1,98 @@
-YARPGEN=yarpgen
-MAX=300
+TARGET_CCOMP=../../../ccomp
+TARGET_CC=gcc
+
+all:
+
+.SECONDARY:
+
+ifndef YARPGEN
+YARPGEN=./yarpgen
+GENERATOR=yarpgen
+endif
+
+ifdef BITS
+YARPGEN+=-m $(BITS)
+CFLAGS+=-m$(BITS)
+endif
+
+MAX=129
PREFIX=ran%06.f
-include ../rules.mk
-
-K1C_CCOMPFLAGS += -funprototyped -fbitfields
-CCOMPFLAGS += -funprototyped -fbitfields
-
-TARGETS_C=$(shell seq --format $(PREFIX)/func.c 0 $(MAX)) \
- $(shell seq --format $(PREFIX)/driver.c 0 $(MAX)) \
- $(shell seq --format $(PREFIX)/init.h 0 $(MAX))
-TARGETS_CCOMP_K1C_S=$(shell seq --format $(PREFIX)/func.ccomp.k1c.s 0 $(MAX)) \
- $(shell seq --format $(PREFIX)/driver.ccomp.k1c.s 0 $(MAX))
-TARGETS_GCC_K1C_S=$(shell seq --format $(PREFIX)/func.gcc.k1c.s 0 $(MAX)) \
- $(shell seq --format $(PREFIX)/driver.gcc.k1c.s 0 $(MAX))
-TARGETS_CCOMP_HOST_S=$(shell seq --format $(PREFIX)/func.ccomp.host.s 0 $(MAX)) \
- $(shell seq --format $(PREFIX)/driver.ccomp.host.s 0 $(MAX))
-TARGETS_GCC_HOST_S=$(shell seq --format $(PREFIX)/func.gcc.host.s 0 $(MAX)) \
- $(shell seq --format $(PREFIX)/driver.gcc.host.s 0 $(MAX))
-TARGETS_CCOMP_K1C_OUT=$(shell seq --format $(PREFIX)/example.ccomp.k1c.out 0 $(MAX))
-TARGETS_GCC_K1C_OUT=$(shell seq --format $(PREFIX)/example.gcc.k1c.out 0 $(MAX))
-TARGETS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 0 $(MAX))
-TARGETS_CCOMP_HOST_OUT=$(shell seq --format $(PREFIX)/example.ccomp.host.out 0 $(MAX))
-TARGETS_CMP=$(shell seq --format $(PREFIX)/example.k1c.cmp 0 $(MAX))
-
-all: $(TARGETS_CCOMP_K1C_OUT) $(TARGETS_GCC_K1C_OUT) $(TARGETS_GCC_HOST_OUT) $(TARGETS_CCOMP_HOST_OUT) $(TARGETS_CCOMP_K1C_S) $(TARGETS_GCC_K1C_S) $(TARGETS_GCC_HOST_S) $(TARGETS_CCOMP_HOST_S) $(TARGETS_CMP) $(TARGETS_C)
-
-ran%/func.ccomp.k1c.s ran%/func.gcc.k1c.s ran%/func.ccomp.host.s ran%/func.gcc.host.s : ran%/init.h
-
-ran%/example.ccomp.k1c: ran%/func.ccomp.k1c.o ran%/driver.ccomp.k1c.o
- $(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@
-
-ran%/example.gcc.k1c: ran%/func.gcc.k1c.o ran%/driver.gcc.k1c.o
- $(K1C_CC) $(K1C_CFLAGS) $+ -o $@
-
-ran%/example.gcc.host: ran%/func.gcc.host.o ran%/driver.gcc.host.o
- $(CC) $(CFLAGS) $+ -o $@
-ran%/example.ccomp.host: ran%/func.ccomp.host.o ran%/driver.ccomp.host.o
- $(CCOMP) $(CCOMPFLAGS) $+ -o $@
+CCOMPOPTS=-static
+CCOMPFLAGS+=-funprototyped -fbitfields -fno-cse2 -stdlib ../../../runtime # FIXME
+
+TESTS_C=$(shell seq --format $(PREFIX)/func.c 1 $(MAX)) \
+ $(shell seq --format $(PREFIX)/driver.c 1 $(MAX)) \
+ $(shell seq --format $(PREFIX)/init.c 1 $(MAX)) \
+ $(shell seq --format $(PREFIX)/hash.c 1 $(MAX)) \
+ $(shell seq --format $(PREFIX)/check.c 1 $(MAX)) \
+ $(shell seq --format $(PREFIX)/init.h 1 $(MAX))
+
+$(TESTS_C): $(GENERATOR)
+
+TESTS_CCOMP_TARGET_S=$(TEST_C:.c=.ccomp.target.s)
+TESTS_GCC_TARGET_S=$(TEST_C:.c=.gcc.target.s)
+TESTS_GCC_HOST_S=$(TEST_C:.c=.gcc.host.s)
+TESTS_CCOMP_TARGET_OUT=$(shell seq --format $(PREFIX)/example.ccomp.target.out 1 $(MAX))
+TESTS_GCC_TARGET_OUT=$(shell seq --format $(PREFIX)/example.gcc.target.out 1 $(MAX))
+TESTS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 1 $(MAX))
+TESTS_CMP=$(shell seq --format $(PREFIX)/example.target.cmp 1 $(MAX)) # $(shell seq --format $(PREFIX)/example.host_target.cmp 1 $(MAX))
+
+all: $(TESTS_CCOMP_TARGET_OUT) $(TESTS_GCC_TARGET_OUT) $(TESTS_CCOMP_TARGET_S) $(TESTS_GCC_TARGET_S) $(TESTS_CMP) $(TESTS_C)
+
+tests_c: $(TESTS_C)
+
+tests_s: $(TESTS_CCOMP_TARGET_S)
+
+%.ccomp.target.s : %.c
+ $(TARGET_CCOMP) $(CCOMPOPTS) $(CCOMPFLAGS) -S -o $@ $<
+
+%.gcc.target.s : %.c
+ $(TARGET_CC) $(CCOMPOPTS) -S -o $@ $<
-ran%/driver.c ran%/func.c ran%/init.h:
- -mkdir ran$*
+%.gcc.host.s : %.c
+ $(CC) $(CFLAGS) -S -o $@ $<
+
+%.target.o : %.target.s
+ $(TARGET_CC) -c -o $@ $<
+
+%.host.o : %.host.s
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+%.target.out : %.target
+ $(EXECUTE) $< | tee $@
+
+%.host.out : %.host
+ ./$< | tee $@
+
+ran%/func.ccomp.target.s ran%/func.gcc.target.s ran%/func.ccomp.host.s ran%/func.gcc.host.s ran%/init.gcc.host.s : ran%/init.h
+
+ran%/example.ccomp.target: ran%/func.ccomp.target.o ran%/driver.ccomp.target.o ran%/init.ccomp.target.o ran%/check.ccomp.target.o ran%/hash.ccomp.target.o
+ $(TARGET_CCOMP) $(CCOMPOPTS) $(CCOMPFLAGS) $+ -o $@
+
+ran%/example.gcc.target: ran%/func.gcc.target.o ran%/driver.gcc.target.o ran%/init.gcc.target.o ran%/check.gcc.target.o ran%/hash.gcc.target.o
+ $(TARGET_CC) $(TARGET_CFLAGS) $+ -o $@
+
+ran%/example.gcc.host: ran%/func.gcc.host.o ran%/driver.gcc.host.o ran%/init.gcc.host.o ran%/check.gcc.host.o ran%/hash.gcc.host.o
+ $(CC) $(CFLAGS) $+ -o $@
+
+ran%/driver.c ran%/func.c ran%/init.c ran%/check.c ran%/hash.c ran%/init.h:
+ mkdir -p ran$*
$(YARPGEN) --seed=$* --out-dir=ran$*/ --std=c99
-ran%/example.k1c.cmp : ran%/example.gcc.k1c.out ran%/example.ccomp.k1c.out
+ran%/example.target.cmp : ran%/example.gcc.target.out ran%/example.ccomp.target.out
cmp $+ > $@
-.PHONY: all clean
+ran%/example.host_target.cmp : ran%/example.gcc.host.out ran%/example.ccomp.target.out
+ cmp $+ > $@
+
+yarpgen:
+ curl -L -o yarpgen_v1.1.tar.gz https://github.com/intel/yarpgen/archive/v1.1.tar.gz
+ tar xfz yarpgen_v1.1.tar.gz
+ $(MAKE) CXX=g++ -C yarpgen-1.1
+ cp yarpgen-1.1/yarpgen $@
+
+.PHONY: all clean tests_c tests_c
clean:
-rm -rf ran*
diff --git a/test/monniaux/yarpgen/Makefile.old b/test/monniaux/yarpgen/Makefile.old
new file mode 100644
index 00000000..9da82deb
--- /dev/null
+++ b/test/monniaux/yarpgen/Makefile.old
@@ -0,0 +1,52 @@
+YARPGEN=yarpgen
+MAX=300
+PREFIX=ran%06.f
+include ../rules.mk
+
+K1C_CCOMPFLAGS += -funprototyped -fbitfields
+CCOMPFLAGS += -funprototyped -fbitfields
+
+TARGETS_C=$(shell seq --format $(PREFIX)/func.c 0 $(MAX)) \
+ $(shell seq --format $(PREFIX)/driver.c 0 $(MAX)) \
+ $(shell seq --format $(PREFIX)/init.h 0 $(MAX))
+TARGETS_CCOMP_K1C_S=$(shell seq --format $(PREFIX)/func.ccomp.k1c.s 0 $(MAX)) \
+ $(shell seq --format $(PREFIX)/driver.ccomp.k1c.s 0 $(MAX))
+TARGETS_GCC_K1C_S=$(shell seq --format $(PREFIX)/func.gcc.k1c.s 0 $(MAX)) \
+ $(shell seq --format $(PREFIX)/driver.gcc.k1c.s 0 $(MAX))
+TARGETS_CCOMP_HOST_S=$(shell seq --format $(PREFIX)/func.ccomp.host.s 0 $(MAX)) \
+ $(shell seq --format $(PREFIX)/driver.ccomp.host.s 0 $(MAX))
+TARGETS_GCC_HOST_S=$(shell seq --format $(PREFIX)/func.gcc.host.s 0 $(MAX)) \
+ $(shell seq --format $(PREFIX)/driver.gcc.host.s 0 $(MAX))
+TARGETS_CCOMP_K1C_OUT=$(shell seq --format $(PREFIX)/example.ccomp.k1c.out 0 $(MAX))
+TARGETS_GCC_K1C_OUT=$(shell seq --format $(PREFIX)/example.gcc.k1c.out 0 $(MAX))
+TARGETS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 0 $(MAX))
+TARGETS_CCOMP_HOST_OUT=$(shell seq --format $(PREFIX)/example.ccomp.host.out 0 $(MAX))
+TARGETS_CMP=$(shell seq --format $(PREFIX)/example.k1c.cmp 0 $(MAX))
+
+all: $(TARGETS_CCOMP_K1C_OUT) $(TARGETS_GCC_K1C_OUT) $(TARGETS_GCC_HOST_OUT) $(TARGETS_CCOMP_HOST_OUT) $(TARGETS_CCOMP_K1C_S) $(TARGETS_GCC_K1C_S) $(TARGETS_GCC_HOST_S) $(TARGETS_CCOMP_HOST_S) $(TARGETS_CMP) $(TARGETS_C)
+
+ran%/func.ccomp.k1c.s ran%/func.gcc.k1c.s ran%/func.ccomp.host.s ran%/func.gcc.host.s : ran%/init.h
+
+ran%/example.ccomp.k1c: ran%/func.ccomp.k1c.o ran%/driver.ccomp.k1c.o
+ $(K1C_CCOMP) $(K1C_CCOMPFLAGS) $+ -o $@
+
+ran%/example.gcc.k1c: ran%/func.gcc.k1c.o ran%/driver.gcc.k1c.o
+ $(K1C_CC) $(K1C_CFLAGS) $+ -o $@
+
+ran%/example.gcc.host: ran%/func.gcc.host.o ran%/driver.gcc.host.o
+ $(CC) $(CFLAGS) $+ -o $@
+
+ran%/example.ccomp.host: ran%/func.ccomp.host.o ran%/driver.ccomp.host.o
+ $(CCOMP) $(CCOMPFLAGS) $+ -o $@
+
+ran%/driver.c ran%/func.c ran%/init.h:
+ -mkdir ran$*
+ $(YARPGEN) --seed=$* --out-dir=ran$*/ --std=c99
+
+ran%/example.k1c.cmp : ran%/example.gcc.k1c.out ran%/example.ccomp.k1c.out
+ cmp $+ > $@
+
+.PHONY: all clean
+
+clean:
+ -rm -rf ran*
diff --git a/test/regression/Makefile b/test/regression/Makefile
index 3447d6a5..97c25f6c 100644
--- a/test/regression/Makefile
+++ b/test/regression/Makefile
@@ -35,9 +35,9 @@ endif
# but produce processor-dependent results, so no reference output in Results
TESTS_DIFF=NaNs
-ifeq ($(ARCH),mppa_k1c)
+# FIXME ifeq ($(ARCH),mppa_k1c)
TESTS_DIFF:=$(filter-out NaNs,$(TESTS_DIFF))
-endif
+# endif
# Other tests: should compile to .s without errors (but expect warnings)
diff --git a/test/regression/Results/int64 b/test/regression/Results/int64
index af444cf6..ae8a3cc1 100644
--- a/test/regression/Results/int64
+++ b/test/regression/Results/int64
@@ -335,6 +335,48 @@ utof x = 0
stof x = 0
x = 0
+y = 52ce6b4000000063
+-x = 0
+x + y = 52ce6b4000000063
+x - y = ad3194bfffffff9d
+x * y = 0
+x /u y = 0
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+x /u 3 = 0
+x %u 3 = 0
+x /s 3 = 0
+x %s 3 = 0
+x /u 5 = 0
+x %u 5 = 0
+x /s 5 = 0
+x %s 5 = 0
+x /u 11 = 0
+x %u 11 = 0
+x /s 11 = 0
+x %s 11 = 0
+~x = ffffffffffffffff
+x & y = 0
+x | y = 52ce6b4000000063
+x ^ y = 52ce6b4000000063
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 0
+dtou f = 0
+stod x = 0
+dtos f = 0
+utof x = 0
+stof x = 0
+
+x = 0
y = 14057b7ef767814f
-x = 0
x + y = 14057b7ef767814f
@@ -755,6 +797,48 @@ utof x = 3f800000
stof x = 3f800000
x = 1
+y = 52ce6b4000000063
+-x = ffffffffffffffff
+x + y = 52ce6b4000000064
+x - y = ad3194bfffffff9e
+x * y = 52ce6b4000000063
+x /u y = 0
+x %u y = 1
+x /s y = 0
+x %s y = 1
+x /u y2 = 0
+x %u y2 = 1
+x /s y3 = 0
+x %s y3 = 1
+x /u 3 = 0
+x %u 3 = 1
+x /s 3 = 0
+x %s 3 = 1
+x /u 5 = 0
+x %u 5 = 1
+x /s 5 = 0
+x %s 5 = 1
+x /u 11 = 0
+x %u 11 = 1
+x /s 11 = 0
+x %s 11 = 1
+~x = fffffffffffffffe
+x & y = 1
+x | y = 52ce6b4000000063
+x ^ y = 52ce6b4000000062
+x << i = 800000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 3ff0000000000000
+dtou f = 0
+stod x = 3ff0000000000000
+dtos f = 0
+utof x = 3f800000
+stof x = 3f800000
+
+x = 1
y = 9af678222e728119
-x = ffffffffffffffff
x + y = 9af678222e72811a
@@ -1175,6 +1259,48 @@ utof x = 5f800000
stof x = bf800000
x = ffffffffffffffff
+y = 52ce6b4000000063
+-x = 1
+x + y = 52ce6b4000000062
+x - y = ad3194bfffffff9c
+x * y = ad3194bfffffff9d
+x /u y = 3
+x %u y = 794be3ffffffed6
+x /s y = 0
+x %s y = ffffffffffffffff
+x /u y2 = 3176fe836
+x %u y2 = 3683607f
+x /s y3 = 0
+x %s y3 = ffffffffffffffff
+x /u 3 = 5555555555555555
+x %u 3 = 0
+x /s 3 = 0
+x %s 3 = ffffffffffffffff
+x /u 5 = 3333333333333333
+x %u 5 = 0
+x /s 5 = 0
+x %s 5 = ffffffffffffffff
+x /u 11 = 1745d1745d1745d1
+x %u 11 = 4
+x /s 11 = 0
+x %s 11 = ffffffffffffffff
+~x = 0
+x & y = 52ce6b4000000063
+x | y = ffffffffffffffff
+x ^ y = ad3194bfffffff9c
+x << i = fffffff800000000
+x >>u i = 1fffffff
+x >>s i = ffffffffffffffff
+x cmpu y = gt
+x cmps y = lt
+utod x = 43f0000000000000
+dtou f = 68db8bac710cb
+stod x = bff0000000000000
+dtos f = 0
+utof x = 5f800000
+stof x = bf800000
+
+x = ffffffffffffffff
y = 62354cda6226d1f3
-x = 1
x + y = 62354cda6226d1f2
@@ -1595,6 +1721,48 @@ utof x = 4f000000
stof x = 4f000000
x = 7fffffff
+y = 52ce6b4000000063
+-x = ffffffff80000001
+x + y = 52ce6b4080000062
+x - y = ad3194c07fffff9c
+x * y = ad3194f17fffff9d
+x /u y = 0
+x %u y = 7fffffff
+x /s y = 0
+x %s y = 7fffffff
+x /u y2 = 1
+x %u y2 = 2d3194bf
+x /s y3 = 1
+x %s y3 = 2d3194bf
+x /u 3 = 2aaaaaaa
+x %u 3 = 1
+x /s 3 = 2aaaaaaa
+x %s 3 = 1
+x /u 5 = 19999999
+x %u 5 = 2
+x /s 5 = 19999999
+x %s 5 = 2
+x /u 11 = ba2e8ba
+x %u 11 = 1
+x /s 11 = ba2e8ba
+x %s 11 = 1
+~x = ffffffff80000000
+x & y = 63
+x | y = 52ce6b407fffffff
+x ^ y = 52ce6b407fffff9c
+x << i = fffffff800000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 41dfffffffc00000
+dtou f = 346dc
+stod x = 41dfffffffc00000
+dtos f = 346dc
+utof x = 4f000000
+stof x = 4f000000
+
+x = 7fffffff
y = 144093704fadba5d
-x = ffffffff80000001
x + y = 14409370cfadba5c
@@ -2015,6 +2183,48 @@ utof x = 4f000000
stof x = 4f000000
x = 80000000
+y = 52ce6b4000000063
+-x = ffffffff80000000
+x + y = 52ce6b4080000063
+x - y = ad3194c07fffff9d
+x * y = 3180000000
+x /u y = 0
+x %u y = 80000000
+x /s y = 0
+x %s y = 80000000
+x /u y2 = 1
+x %u y2 = 2d3194c0
+x /s y3 = 1
+x %s y3 = 2d3194c0
+x /u 3 = 2aaaaaaa
+x %u 3 = 2
+x /s 3 = 2aaaaaaa
+x %s 3 = 2
+x /u 5 = 19999999
+x %u 5 = 3
+x /s 5 = 19999999
+x %s 5 = 3
+x /u 11 = ba2e8ba
+x %u 11 = 2
+x /s 11 = ba2e8ba
+x %s 11 = 2
+~x = ffffffff7fffffff
+x & y = 0
+x | y = 52ce6b4080000063
+x ^ y = 52ce6b4080000063
+x << i = 0
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 41e0000000000000
+dtou f = 346dc
+stod x = 41e0000000000000
+dtos f = 346dc
+utof x = 4f000000
+stof x = 4f000000
+
+x = 80000000
y = 7b985bc1e7bce4d7
-x = ffffffff80000000
x + y = 7b985bc267bce4d7
@@ -2435,6 +2645,48 @@ utof x = 5f000000
stof x = 5f000000
x = 7fffffffffffffff
+y = 52ce6b4000000063
+-x = 8000000000000001
+x + y = d2ce6b4000000062
+x - y = 2d3194bfffffff9c
+x * y = 2d3194bfffffff9d
+x /u y = 1
+x %u y = 2d3194bfffffff9c
+x /s y = 1
+x %s y = 2d3194bfffffff9c
+x /u y2 = 18bb7f41b
+x %u y2 = 1b41b03f
+x /s y3 = 18bb7f41b
+x %s y3 = 1b41b03f
+x /u 3 = 2aaaaaaaaaaaaaaa
+x %u 3 = 1
+x /s 3 = 2aaaaaaaaaaaaaaa
+x %s 3 = 1
+x /u 5 = 1999999999999999
+x %u 5 = 2
+x /s 5 = 1999999999999999
+x %s 5 = 2
+x /u 11 = ba2e8ba2e8ba2e8
+x %u 11 = 7
+x /s 11 = ba2e8ba2e8ba2e8
+x %s 11 = 7
+~x = 8000000000000000
+x & y = 52ce6b4000000063
+x | y = 7fffffffffffffff
+x ^ y = 2d3194bfffffff9c
+x << i = fffffff800000000
+x >>u i = fffffff
+x >>s i = fffffff
+x cmpu y = gt
+x cmps y = gt
+utod x = 43e0000000000000
+dtou f = 346dc5d638865
+stod x = 43e0000000000000
+dtos f = 346dc5d638865
+utof x = 5f000000
+stof x = 5f000000
+
+x = 7fffffffffffffff
y = a220229ec164ffe1
-x = 8000000000000001
x + y = 2220229ec164ffe0
@@ -2855,6 +3107,48 @@ utof x = 5f000000
stof x = df000000
x = 8000000000000000
+y = 52ce6b4000000063
+-x = 8000000000000000
+x + y = d2ce6b4000000063
+x - y = 2d3194bfffffff9d
+x * y = 8000000000000000
+x /u y = 1
+x %u y = 2d3194bfffffff9d
+x /s y = ffffffffffffffff
+x %s y = d2ce6b4000000063
+x /u y2 = 18bb7f41b
+x %u y2 = 1b41b040
+x /s y3 = fffffffe74480be5
+x %s y3 = ffffffffe4be4fc0
+x /u 3 = 2aaaaaaaaaaaaaaa
+x %u 3 = 2
+x /s 3 = d555555555555556
+x %s 3 = fffffffffffffffe
+x /u 5 = 1999999999999999
+x %u 5 = 3
+x /s 5 = e666666666666667
+x %s 5 = fffffffffffffffd
+x /u 11 = ba2e8ba2e8ba2e8
+x %u 11 = 8
+x /s 11 = f45d1745d1745d18
+x %s 11 = fffffffffffffff8
+~x = 7fffffffffffffff
+x & y = 0
+x | y = d2ce6b4000000063
+x ^ y = d2ce6b4000000063
+x << i = 0
+x >>u i = 10000000
+x >>s i = fffffffff0000000
+x cmpu y = gt
+x cmps y = lt
+utod x = 43e0000000000000
+dtou f = 346dc5d638865
+stod x = c3e0000000000000
+dtos f = fffcb923a29c779b
+utof x = 5f000000
+stof x = df000000
+
+x = 8000000000000000
y = c73aa0d9a415dfb
-x = 8000000000000000
x + y = 8c73aa0d9a415dfb
@@ -3275,6 +3569,48 @@ utof x = 4f800000
stof x = 4f800000
x = 100000003
+y = 52ce6b4000000063
+-x = fffffffefffffffd
+x + y = 52ce6b4100000066
+x - y = ad3194c0ffffffa0
+x * y = f86b422300000129
+x /u y = 0
+x %u y = 100000003
+x /s y = 0
+x %s y = 100000003
+x /u y2 = 3
+x %u y2 = 794be43
+x /s y3 = 3
+x %s y3 = 794be43
+x /u 3 = 55555556
+x %u 3 = 1
+x /s 3 = 55555556
+x %s 3 = 1
+x /u 5 = 33333333
+x %u 5 = 4
+x /s 5 = 33333333
+x %s 5 = 4
+x /u 11 = 1745d174
+x %u 11 = 7
+x /s 11 = 1745d174
+x %s 11 = 7
+~x = fffffffefffffffc
+x & y = 3
+x | y = 52ce6b4100000063
+x ^ y = 52ce6b4100000060
+x << i = 1800000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = lt
+utod x = 41f0000000300000
+dtou f = 68db8
+stod x = 41f0000000300000
+dtos f = 68db8
+utof x = 4f800000
+stof x = 4f800000
+
+x = 100000003
y = e9bcd26890f095a5
-x = fffffffefffffffd
x + y = e9bcd26990f095a8
@@ -3358,47 +3694,467 @@ dtos f = 14bb101261e18
utof x = 5e4a72c9
stof x = 5e4a72c9
-x = 8362aa9340fe215f
-y = f986342416ec8002
--x = 7c9d556cbf01dea1
-x + y = 7ce8deb757eaa161
-x - y = 89dc766f2a11a15d
-x * y = e4a2b426803fc2be
+x = 52ce6b4000000063
+y = 0
+-x = ad3194bfffffff9d
+x + y = 52ce6b4000000063
+x - y = 52ce6b4000000063
+x * y = 0
x /u y = 0
-x %u y = 8362aa9340fe215f
-x /s y = 13
-x %s y = fe6ccbe58d70a139
-x /u y2 = 86cb918b
-x %u y2 = 910b6dd3
-x /s y3 = 133e437097
-x %s y3 = fffffffffe99a023
-x /u 3 = 2bcb8e3115aa0b1f
-x %u 3 = 2
-x /s 3 = d67638dbc054b5cb
-x %s 3 = fffffffffffffffe
-x /u 5 = 1a46eeea4032d379
-x %u 5 = 2
-x /s 5 = e713bbb70cffa047
-x %s 5 = fffffffffffffffc
-x /u 11 = bf1b26a7a45a5f1
-x %u 11 = 4
-x /s 11 = f4abe0f61d2e6020
-x %s 11 = ffffffffffffffff
-~x = 7c9d556cbf01dea0
-x & y = 8102200000ec0002
-x | y = fbe6beb756fea15f
-x ^ y = 7ae49eb75612a15d
-x << i = d8aaa4d03f8857c
-x >>u i = 20d8aaa4d03f8857
-x >>s i = e0d8aaa4d03f8857
+x %u y = 0
+x /s y = 0
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 0
+x | y = 52ce6b4000000063
+x ^ y = 52ce6b4000000063
+x << i = 52ce6b4000000063
+x >>u i = 52ce6b4000000063
+x >>s i = 52ce6b4000000063
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 1
+-x = ad3194bfffffff9d
+x + y = 52ce6b4000000064
+x - y = 52ce6b4000000062
+x * y = 52ce6b4000000063
+x /u y = 52ce6b4000000063
+x %u y = 0
+x /s y = 52ce6b4000000063
+x %s y = 0
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 1
+x | y = 52ce6b4000000063
+x ^ y = 52ce6b4000000062
+x << i = a59cd680000000c6
+x >>u i = 296735a000000031
+x >>s i = 296735a000000031
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = ffffffffffffffff
+-x = ad3194bfffffff9d
+x + y = 52ce6b4000000062
+x - y = 52ce6b4000000064
+x * y = ad3194bfffffff9d
+x /u y = 0
+x %u y = 52ce6b4000000063
+x /s y = ad3194bfffffff9d
+x %s y = 0
+x /u y2 = 52ce6b40
+x %u y2 = 52ce6ba3
+x /s y3 = ad3194bfffffff9d
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 52ce6b4000000063
+x | y = ffffffffffffffff
+x ^ y = ad3194bfffffff9c
+x << i = 8000000000000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 7fffffff
+-x = ad3194bfffffff9d
+x + y = 52ce6b4080000062
+x - y = 52ce6b3f80000064
+x * y = ad3194f17fffff9d
+x /u y = a59cd681
+x %u y = 259cd6e4
+x /s y = a59cd681
+x %s y = 259cd6e4
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 63
+x | y = 52ce6b407fffffff
+x ^ y = 52ce6b407fffff9c
+x << i = 8000000000000000
+x >>u i = 0
+x >>s i = 0
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 80000000
+-x = ad3194bfffffff9d
+x + y = 52ce6b4080000063
+x - y = 52ce6b3f80000063
+x * y = 3180000000
+x /u y = a59cd680
+x %u y = 63
+x /s y = a59cd680
+x %s y = 63
+x /u y2 = 0
+x %u y2 = 0
+x /s y3 = 0
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 0
+x | y = 52ce6b4080000063
+x ^ y = 52ce6b4080000063
+x << i = 52ce6b4000000063
+x >>u i = 52ce6b4000000063
+x >>s i = 52ce6b4000000063
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 7fffffffffffffff
+-x = ad3194bfffffff9d
+x + y = d2ce6b4000000062
+x - y = d2ce6b4000000064
+x * y = 2d3194bfffffff9d
+x /u y = 0
+x %u y = 52ce6b4000000063
+x /s y = 0
+x %s y = 52ce6b4000000063
+x /u y2 = a59cd681
+x %u y2 = 259cd6e4
+x /s y3 = a59cd681
+x %s y3 = 259cd6e4
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 52ce6b4000000063
+x | y = 7fffffffffffffff
+x ^ y = 2d3194bfffffff9c
+x << i = 8000000000000000
+x >>u i = 0
+x >>s i = 0
x cmpu y = lt
x cmps y = lt
-utod x = 43e06c5552681fc4
-dtou f = 35d0c262d14d7
-stod x = c3df27555b2fc078
-dtos f = fffccf536b66040d
-utof x = 5f0362ab
-stof x = def93aab
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 8000000000000000
+-x = ad3194bfffffff9d
+x + y = d2ce6b4000000063
+x - y = d2ce6b4000000063
+x * y = 8000000000000000
+x /u y = 0
+x %u y = 52ce6b4000000063
+x /s y = 0
+x %s y = 52ce6b4000000063
+x /u y2 = a59cd680
+x %u y2 = 63
+x /s y3 = ffffffff5a632980
+x %s y3 = 63
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 0
+x | y = d2ce6b4000000063
+x ^ y = d2ce6b4000000063
+x << i = 52ce6b4000000063
+x >>u i = 52ce6b4000000063
+x >>s i = 52ce6b4000000063
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 100000003
+-x = ad3194bfffffff9d
+x + y = 52ce6b4100000066
+x - y = 52ce6b3f00000060
+x * y = f86b422300000129
+x /u y = 52ce6b3f
+x %u y = 794bea6
+x /s y = 52ce6b3f
+x %s y = 794bea6
+x /u y2 = 52ce6b4000000063
+x %u y2 = 0
+x /s y3 = 52ce6b4000000063
+x %s y3 = 0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 3
+x | y = 52ce6b4100000063
+x ^ y = 52ce6b4100000060
+x << i = 96735a0000000318
+x >>u i = a59cd680000000c
+x >>s i = a59cd680000000c
+x cmpu y = gt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 52ce6b4000000063
+-x = ad3194bfffffff9d
+x + y = a59cd680000000c6
+x - y = 0
+x * y = ba6f38000002649
+x /u y = 1
+x %u y = 0
+x /s y = 1
+x %s y = 0
+x /u y2 = 100000000
+x %u y2 = 63
+x /s y3 = 100000000
+x %s y3 = 63
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 52ce6b4000000063
+x | y = 52ce6b4000000063
+x ^ y = 0
+x << i = 31800000000
+x >>u i = a59cd68
+x >>s i = a59cd68
+x cmpu y = eq
+x cmps y = eq
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = 52ce6b4000000063
+y = 8362aa9340fe215f
+-x = ad3194bfffffff9d
+x + y = d63115d340fe21c2
+x - y = cf6bc0acbf01df04
+x * y = 8f1503b22246e7bd
+x /u y = 0
+x %u y = 52ce6b4000000063
+x /s y = 0
+x %s y = 52ce6b4000000063
+x /u y2 = a158656f
+x %u y2 = 5640ba6
+x /s y3 = ffffffff55e35d11
+x %s y3 = 5f2245a0
+x /u 3 = 1b9a23c000000021
+x %u 3 = 0
+x /s 3 = 1b9a23c000000021
+x %s 3 = 0
+x /u 5 = 108faf0ccccccce0
+x %u 5 = 3
+x /s 5 = 108faf0ccccccce0
+x %s 5 = 3
+x /u 11 = 7872105d1745d20
+x %u 11 = 3
+x /s 11 = 7872105d1745d20
+x %s 11 = 3
+~x = ad3194bfffffff9c
+x & y = 2422a0000000043
+x | y = d3eeebd340fe217f
+x ^ y = d1acc1d340fe213c
+x << i = 3180000000
+x >>u i = a59cd680
+x >>s i = a59cd680
+x cmpu y = lt
+x cmps y = gt
+utod x = 43d4b39ad0000000
+dtou f = 21eadf559b3d0
+stod x = 43d4b39ad0000000
+dtos f = 21eadf559b3d0
+utof x = 5ea59cd7
+stof x = 5ea59cd7
+
+x = f986342416ec8002
+y = 52ce6b4000000063
+-x = 679cbdbe9137ffe
+x + y = 4c549f6416ec8065
+x - y = a6b7c8e416ec7f9f
+x * y = b9230074dd7580c6
+x /u y = 3
+x %u y = 11af26416ec7ed9
+x /s y = 0
+x %s y = f986342416ec8002
+x /u y2 = 3036abea3
+x %u y2 = 164b642
+x /s y3 = ffffffffebfad66d
+x %s y3 = ffffffffcae155c2
+x /u 3 = 532cbc0c07a42aab
+x %u 3 = 1
+x /s 3 = fdd766b6b24ed556
+x %s 3 = 0
+x /u 5 = 31e7a40737c8e666
+x %u 5 = 4
+x /s 5 = feb470d40495b334
+x %s 5 = fffffffffffffffe
+x /u 11 = 16af1c0347e6f45d
+x %u 11 = 3
+x /s 11 = ff694a8eeacfae8c
+x %s 11 = fffffffffffffffe
+~x = 679cbdbe9137ffd
+x & y = 5086200000000002
+x | y = fbce7f6416ec8063
+x ^ y = ab485f6416ec8061
+x << i = b764001000000000
+x >>u i = 1f30c684
+x >>s i = ffffffffff30c684
+x cmpu y = gt
+x cmps y = lt
+utod x = 43ef30c68482dd90
+dtou f = 6634832136daf
+stod x = c399e72f6fa44e00
+dtos f = ffffd58f774c5ce4
+utof x = 5f798634
+stof x = dccf397b
x = 368083376ba4ffa9
y = 6912b247b79a4904
@@ -7558,3 +8314,45 @@ dtos f = b3fdf698d581
utof x = 5ddbb784
stof x = 5ddbb784
+x = ca9a47c1649d27a7
+y = d56d650045e652aa
+-x = 3565b83e9b62d859
+x + y = a007acc1aa837a51
+x - y = f52ce2c11eb6d4fd
+x * y = 630e3c88ca19d2e6
+x /u y = 0
+x %u y = ca9a47c1649d27a7
+x /s y = 1
+x %s y = f52ce2c11eb6d4fd
+x /u y2 = f3042098
+x %u y2 = 6b092fa7
+x /s y3 = 141176486
+x %s y3 = ffffffffdee649a7
+x /u 3 = 4388c295cc34628d
+x %u 3 = 0
+x /s 3 = ee336d4076df0d38
+x %s 3 = ffffffffffffffff
+x /u 5 = 2885418d141f6e54
+x %u 5 = 3
+x /s 5 = f5520e59e0ec3b22
+x %s 5 = fffffffffffffffd
+x /u 11 = 126b1dcbc3541ae0
+x %u 11 = 7
+x /s 11 = fb254c57663cd510
+x %s 11 = fffffffffffffff7
+~x = 3565b83e9b62d858
+x & y = c0084500448402a2
+x | y = dfff67c165ff77af
+x ^ y = 1ff722c1217b750d
+x << i = 749e9c0000000000
+x >>u i = 32a691
+x >>s i = fffffffffff2a691
+x cmpu y = lt
+x cmps y = lt
+utod x = 43e95348f82c93a5
+dtou f = 52fc6dac31674
+stod x = c3cab2dc1f4db16c
+dtos f = fffea20e1ffc05aa
+utof x = 5f4a9a48
+stof x = de5596e1
+
diff --git a/test/regression/int64.c b/test/regression/int64.c
index d9785e95..0da9602d 100644
--- a/test/regression/int64.c
+++ b/test/regression/int64.c
@@ -103,7 +103,8 @@ u64 special_values[] = {
0x80000000LLU,
0x7FFFFFFFFFFFFFFFLLU,
0x8000000000000000LLU,
- 0x100000003LLU
+ 0x100000003LLU,
+ 0x52ce6b4000000063LLU
};
#define NUM_SPECIAL_VALUES (sizeof(special_values) / sizeof(u64))
diff --git a/test/regression/packedstruct1.c b/test/regression/packedstruct1.c
index ac68c698..b805c92a 100644
--- a/test/regression/packedstruct1.c
+++ b/test/regression/packedstruct1.c
@@ -23,9 +23,9 @@ void test1(void)
struct s1 s1;
printf("sizeof(struct s1) = %d\n", szof(s1));
printf("precomputed sizeof(struct s1) = %d\n", bszof(s1));
- printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+ printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
offsetOf(s1,x), offsetOf(s1,y), offsetOf(s1,z));
- printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+ printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
boffsetof(s1,x), boffsetof(s1,y), boffsetof(s1,z));
s1.x = 123; s1.y = -456; s1.z = 3.14159;
printf("s1 = {x = %d, y = %d, z = %.5f}\n\n", s1.x, s1.y, s1.z);
@@ -44,9 +44,9 @@ void test2(void)
printf("sizeof(struct s2) = %d\n", szof(s2));
printf("precomputed sizeof(struct s2) = %d\n", bszof(s2));
printf("&s2 mod 16 = %d\n", ((int) &s2) & 0xF);
- printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+ printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
offsetOf(s2,x), offsetOf(s2,y), offsetOf(s2,z));
- printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+ printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
boffsetof(s2,x), boffsetof(s2,y), boffsetof(s2,z));
s2.x = 12345; s2.y = -456; s2.z = 3.14159;
printf("s2 = {x = %d, y = %d, z = %.5f}\n\n", s2.x, s2.y, s2.z);
@@ -73,8 +73,8 @@ void test3(void)
printf("sizeof(struct s3) = %d\n", szof(s3));
printf("precomputed sizeof(struct s3) = %d\n", bszof(s3));
- printf("offsetOf(s) = %d\n", offsetOf(s3,s));
- printf("precomputed offsetOf(s) = %d\n", boffsetof(s3,s));
+ printf("offsetof(s) = %d\n", offsetOf(s3,s));
+ printf("precomputed offsetof(s) = %d\n", boffsetof(s3,s));
s3.x = 123;
s3.y = 45678;
s3.z = 0x80000001U;
@@ -103,9 +103,9 @@ void test4(void)
printf("sizeof(struct s4) = %d\n", szof(s4));
printf("precomputed sizeof(struct s4) = %d\n", bszof(s4));
- printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+ printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
offsetOf(s4,x), offsetOf(s4,y), offsetOf(s4,z));
- printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+ printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
boffsetof(s4,x), boffsetof(s4,y), boffsetof(s4,z));
s4.x = 123; s4.y = -456; s4.z = 3.14159;
printf("s4 = {x = %d, y = %d, z = %.5f}\n\n", s4.x, s4.y, s4.z);
@@ -121,9 +121,9 @@ void test5(void)
printf("sizeof(struct s5) = %d\n", szof(s5));
printf("precomputed sizeof(struct s5) = %d\n", bszof(s5));
- printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+ printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
offsetOf(s5,x), offsetOf(s5,y), offsetOf(s5,z));
- printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+ printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
boffsetof(s5,x), boffsetof(s5,y), boffsetof(s5,z));
s5.x = 123; s5.y = -456; s5.z = 3.14159;
printf("s5 = {x = %d, y = %d, z = %.5f}\n\n", s5.x, s5.y, s5.z);
@@ -139,9 +139,9 @@ void test6(void)
printf("sizeof(struct s6) = %d\n", szof(s6));
printf("precomputed sizeof(struct s6) = %d\n", bszof(s6));
- printf("offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+ printf("offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
offsetOf(s6,x), offsetOf(s6,y), offsetOf(s6,z));
- printf("precomputed offsetOf(x) = %d, offsetOf(y) = %d, offsetOf(z) = %d\n",
+ printf("precomputed offsetof(x) = %d, offsetof(y) = %d, offsetof(z) = %d\n",
boffsetof(s6,x), boffsetof(s6,y), boffsetof(s6,z));
s62.x = 123; s62.y = -456; s62.z = 3.14159;
printf("s62 = {x = %d, y = %d, z = %.5f}\n\n", s62.x, s62.y, s62.z);
diff --git a/x86/Asmexpand.ml b/x86/Asmexpand.ml
index c82d406e..b8353046 100644
--- a/x86/Asmexpand.ml
+++ b/x86/Asmexpand.ml
@@ -251,7 +251,7 @@ let expand_builtin_va_start_32 r =
invalid_arg "Fatal error: va_start used in non-vararg function";
let ofs =
Int32.(add (add !PrintAsmaux.current_function_stacksize 4l)
- (mul 4l (Z.to_int32 (Conventions1.size_arguments
+ (mul 4l (Z.to_int32 (Conventions.size_arguments
(get_current_function_sig ()))))) in
emit (Pleal (RAX, linear_addr RSP (Z.of_uint32 ofs)));
emit (Pmovl_mr (linear_addr r _0z, RAX))
diff --git a/x86/CSE2deps.v b/x86/CSE2deps.v
new file mode 100644
index 00000000..f4d9e254
--- /dev/null
+++ b/x86/CSE2deps.v
@@ -0,0 +1,24 @@
+Require Import BoolEqual Coqlib.
+Require Import AST Integers Floats.
+Require Import Values Memory Globalenvs Events.
+Require Import Op.
+
+Definition can_swap_accesses_ofs ofsr chunkr ofsw chunkw :=
+ (0 <=? ofsw) && (ofsw <=? (Ptrofs.modulus - largest_size_chunk))
+ && (0 <=? ofsr) && (ofsr <=? (Ptrofs.modulus - largest_size_chunk))
+ && ((ofsw + size_chunk chunkw <=? ofsr) ||
+ (ofsr + size_chunk chunkr <=? ofsw)).
+
+Definition may_overlap chunk addr args chunk' addr' args' :=
+ match addr, addr', args, args' with
+ | (Aindexed ofs), (Aindexed ofs'),
+ (base :: nil), (base' :: nil) =>
+ if peq base base'
+ then negb (can_swap_accesses_ofs ofs' chunk' ofs chunk)
+ else true
+ | (Aglobal symb ofs), (Aglobal symb' ofs'), nil, nil =>
+ if peq symb symb'
+ then negb (can_swap_accesses_ofs (Ptrofs.unsigned ofs') chunk' (Ptrofs.unsigned ofs) chunk)
+ else false
+ | _, _, _, _ => true
+ end.
diff --git a/x86/CSE2depsproof.v b/x86/CSE2depsproof.v
new file mode 100644
index 00000000..1e913254
--- /dev/null
+++ b/x86/CSE2depsproof.v
@@ -0,0 +1,253 @@
+Require Import Coqlib Maps Errors Integers Floats Lattice Kildall.
+Require Import AST Linking.
+Require Import Memory Registers Op RTL Maps.
+
+Require Import Globalenvs Values.
+Require Import Linking Values Memory Globalenvs Events Smallstep.
+Require Import Registers Op RTL.
+Require Import CSE2 CSE2deps.
+Require Import Lia.
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Section MEMORY_WRITE.
+ Variable m m2 : mem.
+ Variable chunkw chunkr : memory_chunk.
+ Variable base : val.
+
+ Variable addrw addrr valw : val.
+ Hypothesis STORE : Mem.storev chunkw m addrw valw = Some m2.
+
+ Section INDEXED_AWAY.
+ Variable ofsw ofsr : Z.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Aindexed ofsw) (base :: nil) = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Aindexed ofsr) (base :: nil) = Some addrr.
+
+ Lemma load_store_away1 :
+ forall RANGEW : 0 <= ofsw <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= ofsr <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : ofsw + size_chunk chunkw <= ofsr
+ \/ ofsr + size_chunk chunkr <= ofsw,
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ try change (Ptrofs.modulus - largest_size_chunk) with 4294967288 in *.
+ try change (Ptrofs.modulus - largest_size_chunk) with 18446744073709551608 in *.
+ destruct addrr ; simpl in * ; trivial.
+ unfold eval_addressing, eval_addressing32, eval_addressing64 in *.
+ destruct Archi.ptr64 eqn:PTR64; destruct base; simpl in *; try discriminate.
+ rewrite PTR64 in *.
+
+ inv ADDRR.
+ inv ADDRW.
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+
+ all: try (destruct (Ptrofs.unsigned_add_either i0
+ (Ptrofs.of_int (Int.repr ofsr))) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i0
+ (Ptrofs.of_int64 (Int64.repr ofsr))) as [OFSR | OFSR];
+ rewrite OFSR).
+ all: try (destruct (Ptrofs.unsigned_add_either i0
+ (Ptrofs.of_int (Int.repr ofsw))) as [OFSW | OFSW];
+ rewrite OFSW).
+ all: try (destruct (Ptrofs.unsigned_add_either i0
+ (Ptrofs.of_int64 (Int64.repr ofsw))) as [OFSW | OFSW];
+ rewrite OFSW).
+
+ all: unfold Ptrofs.of_int64.
+ all: unfold Ptrofs.of_int.
+
+
+ all: repeat rewrite Int.unsigned_repr by (change Int.max_unsigned with 4294967295; lia).
+ all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 4294967295; lia).
+ all: repeat rewrite Int64.unsigned_repr by (change Int64.max_unsigned with 18446744073709551615; lia).
+ all: repeat rewrite Ptrofs.unsigned_repr by (change Ptrofs.max_unsigned with 18446744073709551615; lia).
+
+ all: try change Ptrofs.modulus with 4294967296.
+ all: try change Ptrofs.modulus with 18446744073709551616.
+
+ all: intuition lia.
+ Qed.
+
+ Theorem load_store_away :
+ can_swap_accesses_ofs ofsr chunkr ofsw chunkw = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply load_store_away1.
+ all: tauto.
+ Qed.
+ End INDEXED_AWAY.
+
+ Section DIFFERENT_GLOBALS.
+ Variable ofsw ofsr : ptrofs.
+ Hypothesis symw symr : ident.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Aglobal symw ofsw) nil = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Aglobal symr ofsr) nil = Some addrr.
+
+ Lemma ptr64_cases:
+ forall {T : Type},
+ forall b : bool,
+ forall x y : T,
+ (if b then (if b then x else y) else (if b then y else x)) = x.
+ Proof.
+ destruct b; reflexivity.
+ Qed.
+
+ (* not needed
+ Lemma bool_cases_same:
+ forall {T : Type},
+ forall b : bool,
+ forall x : T,
+ (if b then x else x) = x.
+ Proof.
+ destruct b; reflexivity.
+ Qed.
+ *)
+
+ Lemma load_store_diff_globals :
+ symw <> symr ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intros.
+ unfold eval_addressing in *.
+ simpl in *.
+ rewrite ptr64_cases in ADDRR.
+ rewrite ptr64_cases in ADDRW.
+ unfold Genv.symbol_address in *.
+ unfold Genv.find_symbol in *.
+ destruct ((Genv.genv_symb genv) ! symw) as [bw |] eqn:SYMW; inv ADDRW.
+ 2: simpl in STORE; discriminate.
+ destruct ((Genv.genv_symb genv) ! symr) as [br |] eqn:SYMR; inv ADDRR.
+ 2: reflexivity.
+ assert (br <> bw).
+ {
+ intro EQ.
+ subst br.
+ assert (symr = symw).
+ {
+ eapply Genv.genv_vars_inj; eauto.
+ }
+ congruence.
+ }
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := bw).
+ - exact STORE.
+ - left. assumption.
+ Qed.
+ End DIFFERENT_GLOBALS.
+
+ Section SAME_GLOBALS.
+ Variable ofsw ofsr : ptrofs.
+ Hypothesis sym : ident.
+ Hypothesis ADDRW : eval_addressing genv sp
+ (Aglobal sym ofsw) nil = Some addrw.
+ Hypothesis ADDRR : eval_addressing genv sp
+ (Aglobal sym ofsr) nil = Some addrr.
+
+ Lemma load_store_glob_away1 :
+ forall RANGEW : 0 <= (Ptrofs.unsigned ofsw) <= Ptrofs.modulus - largest_size_chunk,
+ forall RANGER : 0 <= (Ptrofs.unsigned ofsr) <= Ptrofs.modulus - largest_size_chunk,
+ forall SWAPPABLE : (Ptrofs.unsigned ofsw) + size_chunk chunkw <= (Ptrofs.unsigned ofsr)
+ \/ (Ptrofs.unsigned ofsr) + size_chunk chunkr <= (Ptrofs.unsigned ofsw),
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intros.
+
+ pose proof (max_size_chunk chunkr) as size_chunkr_bounded.
+ pose proof (max_size_chunk chunkw) as size_chunkw_bounded.
+ unfold largest_size_chunk in size_chunkr_bounded, size_chunkw_bounded.
+ try change (Ptrofs.modulus - largest_size_chunk) with 4294967288 in *.
+ try change (Ptrofs.modulus - largest_size_chunk) with 18446744073709551608 in *.
+ unfold eval_addressing, eval_addressing32, eval_addressing64 in *.
+
+ rewrite ptr64_cases in ADDRR.
+ rewrite ptr64_cases in ADDRW.
+ unfold Genv.symbol_address in *.
+ inv ADDRR.
+ inv ADDRW.
+ destruct (Genv.find_symbol genv sym).
+ 2: discriminate.
+
+ eapply Mem.load_store_other with (chunk := chunkw) (v := valw) (b := b).
+ exact STORE.
+ right.
+ tauto.
+ Qed.
+
+ Lemma load_store_glob_away :
+ (can_swap_accesses_ofs (Ptrofs.unsigned ofsr) chunkr (Ptrofs.unsigned ofsw) chunkw) = true ->
+ Mem.loadv chunkr m2 addrr = Mem.loadv chunkr m addrr.
+ Proof.
+ intro SWAP.
+ unfold can_swap_accesses_ofs in SWAP.
+ repeat rewrite andb_true_iff in SWAP.
+ repeat rewrite orb_true_iff in SWAP.
+ repeat rewrite Z.leb_le in SWAP.
+ apply load_store_glob_away1.
+ all: tauto.
+ Qed.
+ End SAME_GLOBALS.
+End MEMORY_WRITE.
+End SOUNDNESS.
+
+
+Section SOUNDNESS.
+ Variable F V : Type.
+ Variable genv: Genv.t F V.
+ Variable sp : val.
+
+Lemma may_overlap_sound:
+ forall m m' : mem,
+ forall chunk addr args chunk' addr' args' v a a' rs,
+ (eval_addressing genv sp addr (rs ## args)) = Some a ->
+ (eval_addressing genv sp addr' (rs ## args')) = Some a' ->
+ (may_overlap chunk addr args chunk' addr' args') = false ->
+ (Mem.storev chunk m a v) = Some m' ->
+ (Mem.loadv chunk' m' a') = (Mem.loadv chunk' m a').
+Proof.
+ intros until rs.
+ intros ADDR ADDR' OVERLAP STORE.
+ destruct addr; destruct addr'; try discriminate.
+ { (* Aindexed / Aindexed *)
+ destruct args as [ | base [ | ]]. 1,3: discriminate.
+ destruct args' as [ | base' [ | ]]. 1,3: discriminate.
+ simpl in OVERLAP.
+ destruct (peq base base'). 2: discriminate.
+ subst base'.
+ destruct (can_swap_accesses_ofs z0 chunk' z chunk) eqn:SWAP.
+ 2: discriminate.
+ simpl in *.
+ eapply load_store_away; eassumption.
+ }
+ { (* Aglobal / Aglobal *)
+ destruct args. 2: discriminate.
+ destruct args'. 2: discriminate.
+ simpl in *.
+ destruct (peq i i1).
+ {
+ subst i1.
+ rewrite negb_false_iff in OVERLAP.
+ eapply load_store_glob_away; eassumption.
+ }
+ eapply load_store_diff_globals; eassumption.
+ }
+Qed.
+
+End SOUNDNESS.
diff --git a/x86/Conventions1.v b/x86/Conventions1.v
index ab4c4b13..d9f5b8fa 100644
--- a/x86/Conventions1.v
+++ b/x86/Conventions1.v
@@ -221,36 +221,6 @@ Definition loc_arguments (s: signature) : list (rpair loc) :=
then loc_arguments_64 s.(sig_args) 0 0 0
else loc_arguments_32 s.(sig_args) 0.
-(** [size_arguments s] returns the number of [Outgoing] slots used
- to call a function with signature [s]. *)
-
-Fixpoint size_arguments_32
- (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
- match tyl with
- | nil => ofs
- | ty :: tys => size_arguments_32 tys (ofs + typesize ty)
- end.
-
-Fixpoint size_arguments_64 (tyl: list typ) (ir fr ofs: Z) {struct tyl} : Z :=
- match tyl with
- | nil => ofs
- | (Tint | Tlong | Tany32 | Tany64) :: tys =>
- match list_nth_z int_param_regs ir with
- | None => size_arguments_64 tys ir fr (ofs + 2)
- | Some ireg => size_arguments_64 tys (ir + 1) fr ofs
- end
- | (Tfloat | Tsingle) :: tys =>
- match list_nth_z float_param_regs fr with
- | None => size_arguments_64 tys ir fr (ofs + 2)
- | Some freg => size_arguments_64 tys ir (fr + 1) ofs
- end
- end.
-
-Definition size_arguments (s: signature) : Z :=
- if Archi.ptr64
- then size_arguments_64 s.(sig_args) 0 0 0
- else size_arguments_32 s.(sig_args) 0.
-
(** Argument locations are either caller-save registers or [Outgoing]
stack slots at nonnegative offsets. *)
@@ -352,121 +322,6 @@ Qed.
Hint Resolve loc_arguments_acceptable: locs.
-(** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
-
-Remark size_arguments_32_above:
- forall tyl ofs0, ofs0 <= size_arguments_32 tyl ofs0.
-Proof.
- induction tyl; simpl; intros.
- omega.
- apply Z.le_trans with (ofs0 + typesize a); auto.
- generalize (typesize_pos a); omega.
-Qed.
-
-Remark size_arguments_64_above:
- forall tyl ir fr ofs0,
- ofs0 <= size_arguments_64 tyl ir fr ofs0.
-Proof.
- induction tyl; simpl; intros.
- omega.
- assert (A: ofs0 <=
- match list_nth_z int_param_regs ir with
- | Some _ => size_arguments_64 tyl (ir + 1) fr ofs0
- | None => size_arguments_64 tyl ir fr (ofs0 + 2)
- end).
- { destruct (list_nth_z int_param_regs ir); eauto.
- apply Z.le_trans with (ofs0 + 2); auto. omega. }
- assert (B: ofs0 <=
- match list_nth_z float_param_regs fr with
- | Some _ => size_arguments_64 tyl ir (fr + 1) ofs0
- | None => size_arguments_64 tyl ir fr (ofs0 + 2)
- end).
- { destruct (list_nth_z float_param_regs fr); eauto.
- apply Z.le_trans with (ofs0 + 2); auto. omega. }
- destruct a; auto.
-Qed.
-
-Lemma size_arguments_above:
- forall s, size_arguments s >= 0.
-Proof.
- intros; unfold size_arguments. apply Z.le_ge.
- destruct Archi.ptr64; [apply size_arguments_64_above|apply size_arguments_32_above].
-Qed.
-
-Lemma loc_arguments_32_bounded:
- forall ofs ty tyl ofs0,
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_32 tyl ofs0)) ->
- ofs + typesize ty <= size_arguments_32 tyl ofs0.
-Proof.
- induction tyl as [ | t l]; simpl; intros x IN.
-- contradiction.
-- rewrite in_app_iff in IN; destruct IN as [IN|IN].
-+ apply Z.le_trans with (x + typesize t); [|apply size_arguments_32_above].
- Ltac decomp :=
- match goal with
- | [ H: _ \/ _ |- _ ] => destruct H; decomp
- | [ H: S _ _ _ = S _ _ _ |- _ ] => inv H
- | [ H: False |- _ ] => contradiction
- end.
- destruct t; simpl in IN; decomp; simpl; omega.
-+ apply IHl; auto.
-Qed.
-
-Lemma loc_arguments_64_bounded:
- forall ofs ty tyl ir fr ofs0,
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments_64 tyl ir fr ofs0)) ->
- ofs + typesize ty <= size_arguments_64 tyl ir fr ofs0.
-Proof.
- induction tyl; simpl; intros.
- contradiction.
- assert (T: forall ty0, typesize ty0 <= 2).
- { destruct ty0; simpl; omega. }
- assert (A: forall ty0,
- In (S Outgoing ofs ty) (regs_of_rpairs
- match list_nth_z int_param_regs ir with
- | Some ireg =>
- One (R ireg) :: loc_arguments_64 tyl (ir + 1) fr ofs0
- | None => One (S Outgoing ofs0 ty0) :: loc_arguments_64 tyl ir fr (ofs0 + 2)
- end) ->
- ofs + typesize ty <=
- match list_nth_z int_param_regs ir with
- | Some _ => size_arguments_64 tyl (ir + 1) fr ofs0
- | None => size_arguments_64 tyl ir fr (ofs0 + 2)
- end).
- { intros. destruct (list_nth_z int_param_regs ir); simpl in H0; destruct H0.
- - discriminate.
- - eapply IHtyl; eauto.
- - inv H0. apply Z.le_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_64_above.
- - eapply IHtyl; eauto. }
- assert (B: forall ty0,
- In (S Outgoing ofs ty) (regs_of_rpairs
- match list_nth_z float_param_regs fr with
- | Some ireg =>
- One (R ireg) :: loc_arguments_64 tyl ir (fr + 1) ofs0
- | None => One (S Outgoing ofs0 ty0) :: loc_arguments_64 tyl ir fr (ofs0 + 2)
- end) ->
- ofs + typesize ty <=
- match list_nth_z float_param_regs fr with
- | Some _ => size_arguments_64 tyl ir (fr + 1) ofs0
- | None => size_arguments_64 tyl ir fr (ofs0 + 2)
- end).
- { intros. destruct (list_nth_z float_param_regs fr); simpl in H0; destruct H0.
- - discriminate.
- - eapply IHtyl; eauto.
- - inv H0. apply Z.le_trans with (ofs + 2). specialize (T ty). omega. apply size_arguments_64_above.
- - eapply IHtyl; eauto. }
- destruct a; eauto.
-Qed.
-
-Lemma loc_arguments_bounded:
- forall (s: signature) (ofs: Z) (ty: typ),
- In (S Outgoing ofs ty) (regs_of_rpairs (loc_arguments s)) ->
- ofs + typesize ty <= size_arguments s.
-Proof.
- unfold loc_arguments, size_arguments; intros.
- destruct Archi.ptr64; eauto using loc_arguments_32_bounded, loc_arguments_64_bounded.
-Qed.
-
Lemma loc_arguments_main:
loc_arguments signature_main = nil.
Proof.
diff --git a/x86/DuplicateOpcodeHeuristic.ml b/x86/DuplicateOpcodeHeuristic.ml
index 85505245..2ec314c1 100644
--- a/x86/DuplicateOpcodeHeuristic.ml
+++ b/x86/DuplicateOpcodeHeuristic.ml
@@ -1,3 +1,27 @@
-exception HeuristicSucceeded
-
-let opcode_heuristic code cond ifso ifnot preferred = ()
+(* open Camlcoq *)
+open Op
+open Integers
+
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+ match cond with
+ | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+ | Clt | Cle -> Some false
+ | Cgt | Cge -> Some true
+ | _ -> None
+ ) else None
+ | Ccompf c | Ccompfs c -> (match c with
+ | Ceq -> Some false
+ | Cne -> Some true
+ | _ -> None
+ )
+ | Cnotcompf c | Cnotcompfs c -> (match c with
+ | Ceq -> Some true
+ | Cne -> Some false
+ | _ -> None
+ )
+ | _ -> None