diff options
author | Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr> | 2021-07-24 11:42:09 +0200 |
---|---|---|
committer | Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr> | 2021-07-24 11:42:09 +0200 |
commit | a4570fed198034e535d0d6d99e23cfbb1d40b926 (patch) | |
tree | e4e5a9dc845fc0972622ae05fd9084234ed9a44d | |
parent | 95f918c38b1e59f40ae7af455ec2c6746289375e (diff) | |
parent | b5c4192c73d7b02e0c90354e26b35a84ee141083 (diff) | |
download | compcert-kvx-a4570fed198034e535d0d6d99e23cfbb1d40b926.tar.gz compcert-kvx-a4570fed198034e535d0d6d99e23cfbb1d40b926.zip |
Merge branch 'kvx-work' into rtl-tunneling
64 files changed, 2394 insertions, 170 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a7811ae3..b3bb418f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -14,7 +14,9 @@ check-admitted: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -35,7 +37,9 @@ build_x86_64: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -58,7 +62,9 @@ build_ia32: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -76,14 +82,14 @@ build_aarch64: script: - ./config_aarch64.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/aarch64-linux-gnu/lib - - sudo ln -s /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 /lib - - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64' EXECUTE='qemu-aarch64' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64' CCOMPOPTS='-static' TARGET_CFLAGS='-static' + - make -C test SIMU='qemu-aarch64 -L /usr/aarch64-linux-gnu' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -101,14 +107,14 @@ build_arm: script: - ./config_arm.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/arm-linux-gnueabi/lib - - sudo ln -s /usr/arm-linux-gnueabi/lib/ld-linux.so.3 /lib # FIXME: UGLY ! - - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + - make -C test SIMU='qemu-arm -L /usr/arm-linux-gnueabi' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -127,14 +133,14 @@ build_armhf: script: - ./config_armhf.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/arm-linux-gnueabihf/lib - - sudo ln -s /usr/arm-linux-gnueabihf/lib/ld-linux-armhf.so.3 /lib # FIXME: UGLY ! - - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + - make -C test SIMU='qemu-arm -L /usr/arm-linux-gnueabihf' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -145,17 +151,25 @@ build_ppc: image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda before_script: - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update - - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user + - sudo apt-get -y install gcc-powerpc-linux-gnu wget ninja-build libglib2.0-dev libpixman-1-dev + - wget --no-verbose https://download.qemu.org/qemu-6.0.0.tar.xz + - tar xJf qemu-6.0.0.tar.xz + - (cd qemu-6.0.0 && ./configure --target-list=ppc-linux-user && make && sudo make install) - eval `opam config env` - opam update - opam install -y menhir script: - ./config_ppc.sh - make -j "$NJOBS" + - qemu-ppc --version + - make -C test SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -173,10 +187,14 @@ build_ppc64: script: - ./config_ppc64.sh - make -j "$NJOBS" + #- make -C test SIMU='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test + #- ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -194,14 +212,14 @@ build_rv64: script: - ./config_rv64.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/riscv64-linux-gnu/lib - - sudo ln -s /usr/riscv64-linux-gnu/lib/ld-linux-riscv64-lp64d.so.1 /lib - - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64' EXECUTE='qemu-riscv64' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64' CCOMPOPTS='-static' TARGET_CFLAGS='-static' + - make -C test SIMU='qemu-riscv64 -L /usr/riscv64-linux-gnu' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -222,7 +240,9 @@ build_rv32: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -249,7 +269,9 @@ build_kvx: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml index 41db3bd4..98e461eb 100644 --- a/aarch64/Machregsaux.ml +++ b/aarch64/Machregsaux.ml @@ -19,3 +19,6 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +(* number of available registers per class *) +let nr_regs = [| 29; 32 |] diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli index 01b0f9fd..23ac1c9a 100644 --- a/aarch64/Machregsaux.mli +++ b/aarch64/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index cde3e7a7..6f784238 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -507,6 +507,9 @@ let build_problem bb = { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None; instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 2c3eb14f..03a9e202 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -1,3 +1,16 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* Nicolas Nardino ENS-Lyon, VERIMAG *) +(* *) +(* *) +(* *************************************************************) + open AST open RTL open Maps @@ -406,11 +419,15 @@ let get_alias_dependencies seqa = !deps;; *) -let define_problem (opweights : opweights) seqa = +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; - instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) then (get_alias_dependencies seqa) @ simple_deps @@ -439,7 +456,10 @@ let prepass_scheduler_by_name name problem early_ones = | "zigzag" -> zigzag_scheduler problem early_ones | _ -> scheduler_by_name name problem -let schedule_sequence (seqa : (instruction*Regset.t) array) = +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = let opweights = OpWeights.get_opweights () in try if (Array.length seqa) <= 1 @@ -449,7 +469,8 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) = let nr_instructions = Array.length seqa in (if !Clflags.option_debug_compcert > 6 then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); - let problem = define_problem opweights seqa in + let problem = define_problem opweights live_regs_entry + typing reference seqa in (if !Clflags.option_debug_compcert > 7 then (print_sequence stdout (Array.map fst seqa); print_problem stdout problem)); @@ -461,7 +482,8 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) = | Icond _ -> true | _ -> false) seqa) with | None -> Printf.printf "no solution in prepass scheduling\n"; - None + Stdlib.exit 1 + (* TODO None *) | Some solution -> let positions = Array.init nr_instructions (fun i -> i) in Array.sort (fun i j -> diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml index ee2674bf..3b63b80d 120000..100644 --- a/arm/ExpansionOracle.ml +++ b/arm/ExpansionOracle.ml @@ -1 +1,17 @@ -../aarch64/ExpansionOracle.ml
\ No newline at end of file +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml index 9885fd52..42a3da23 120000..100644 --- a/arm/PrepassSchedulingOracle.ml +++ b/arm/PrepassSchedulingOracle.ml @@ -1 +1,6 @@ -../x86/PrepassSchedulingOracle.ml
\ No newline at end of file +open RTL +open Registers + +(* Do not do anything *) +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None diff --git a/backend/CSE2proof.v b/backend/CSE2proof.v index 49dbd409..252240c9 100644 --- a/backend/CSE2proof.v +++ b/backend/CSE2proof.v @@ -1399,7 +1399,7 @@ Proof. 2: discriminate. econstructor; split. { - eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto. + eapply exec_Iop with (v := Vundef); eauto. simpl. rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0. { @@ -1472,7 +1472,7 @@ Proof. 2: discriminate. econstructor; split. { - eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto. + eapply exec_Iop with (v := Vundef); eauto. simpl. rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0. { diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v index d53cf604..382c9f4c 100644 --- a/backend/CSE3analysisproof.v +++ b/backend/CSE3analysisproof.v @@ -502,7 +502,7 @@ Section SOUNDNESS. end with | Some dat => v' = dat - | None => v' = default_notrap_load_value chunk + | None => v' = Vundef end end. diff --git a/backend/CSEdomain.v b/backend/CSEdomain.v index f78e1d25..9641d012 100644 --- a/backend/CSEdomain.v +++ b/backend/CSEdomain.v @@ -113,12 +113,12 @@ Inductive rhs_eval_to (valu: valuation) (ge: genv) (sp: val) (m: mem): (* | load_notrap1_eval_to: forall chunk addr vl, eval_addressing ge sp addr (map valu vl) = None -> rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl) - (default_notrap_load_value chunk) + Vundef | load_notrap2_eval_to: forall chunk addr vl a, eval_addressing ge sp addr (map valu vl) = Some a -> Mem.loadv chunk m a = None -> rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl) - (default_notrap_load_value chunk) *). + Vundef *). Inductive equation_holds (valu: valuation) (ge: genv) (sp: val) (m: mem): equation -> Prop := diff --git a/backend/CSEproof.v b/backend/CSEproof.v index cf51f5a2..556b44b3 100644 --- a/backend/CSEproof.v +++ b/backend/CSEproof.v @@ -402,7 +402,7 @@ Lemma add_load_holds_none1: forall valu1 ge sp rs m n addr (args: list reg) chunk dst, numbering_holds valu1 ge sp rs m n -> eval_addressing ge sp addr rs##args = None -> - exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst chunk addr args). + exists valu2, numbering_holds valu2 ge sp (rs#dst <- Vundef) m (add_load n dst chunk addr args). Proof. unfold add_load; intros. destruct (valnum_regs n args) as [n1 vl] eqn:VN. @@ -418,7 +418,7 @@ Lemma add_load_holds_none2: numbering_holds valu1 ge sp rs m n -> eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst NOTRAP chunk addr args). + exists valu2, numbering_holds valu2 ge sp (rs#dst <- Vundef) m (add_load n dst NOTRAP chunk addr args). Proof. unfold add_load; intros. destruct (valnum_regs n args) as [n1 vl] eqn:VN. @@ -1210,7 +1210,6 @@ Proof. exists valu1. apply set_unknown_holds. assumption. - unfold default_notrap_load_value. apply set_reg_lessdef; eauto. } { diff --git a/backend/Deadcodeproof.v b/backend/Deadcodeproof.v index b51d6cce..be20af0b 100644 --- a/backend/Deadcodeproof.v +++ b/backend/Deadcodeproof.v @@ -845,7 +845,6 @@ Ltac UseTransfer := eapply match_succ_states; eauto. simpl; auto. apply eagree_update; auto. rewrite is_int_zero_sound by auto. - unfold default_notrap_load_value. constructor. + (* preserved *) exploit eval_addressing_lessdef_none. eapply add_needs_all_lessdef; eauto. eassumption. @@ -878,7 +877,6 @@ Ltac UseTransfer := eapply match_succ_states; eauto. simpl; auto. apply eagree_update; auto. rewrite is_int_zero_sound by auto. - unfold default_notrap_load_value. constructor. + (* preserved *) exploit eval_addressing_lessdef. eapply add_needs_all_lessdef; eauto. eauto. diff --git a/backend/LTL.v b/backend/LTL.v index 3edd60a2..a382ef0e 100644 --- a/backend/LTL.v +++ b/backend/LTL.v @@ -217,14 +217,14 @@ Inductive step: state -> trace -> state -> Prop := E0 (Block s f sp bb rs' m) | exec_Lload_notrap1: forall s f sp chunk addr args dst bb rs m rs', eval_addressing ge sp addr (reglist rs args) = None -> - rs' = Locmap.set (R dst) (default_notrap_load_value chunk) + rs' = Locmap.set (R dst) Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m) E0 (Block s f sp bb rs' m) | exec_Lload_notrap2: forall s f sp chunk addr args dst bb rs m a rs', eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.loadv chunk m a = None -> - rs' = Locmap.set (R dst) (default_notrap_load_value chunk) + rs' = Locmap.set (R dst) Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m) E0 (Block s f sp bb rs' m) diff --git a/backend/Linear.v b/backend/Linear.v index 1443f795..cb11f7dc 100644 --- a/backend/Linear.v +++ b/backend/Linear.v @@ -170,7 +170,7 @@ Inductive step: state -> trace -> state -> Prop := forall s f sp chunk addr args dst b rs m rs', eval_addressing ge sp addr (reglist rs args) = None -> rs' = Locmap.set (R dst) - (default_notrap_load_value chunk) + Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m) E0 (State s f sp b rs' m) @@ -179,7 +179,7 @@ Inductive step: state -> trace -> state -> Prop := eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.loadv chunk m a = None -> rs' = Locmap.set (R dst) - (default_notrap_load_value chunk) + Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m) E0 (State s f sp b rs' m) diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v index 22658fb7..cf903aad 100644 --- a/backend/Lineartyping.v +++ b/backend/Lineartyping.v @@ -338,14 +338,12 @@ Local Opaque mreg_type. simpl in *; InvBooleans. econstructor; eauto. apply wt_setreg. eapply Val.has_subtype; eauto. - unfold default_notrap_load_value. constructor. apply wt_undef_regs; auto. - (* load notrap2 *) simpl in *; InvBooleans. econstructor; eauto. apply wt_setreg. eapply Val.has_subtype; eauto. - unfold default_notrap_load_value. constructor. apply wt_undef_regs; auto. - (* store *) diff --git a/backend/Mach.v b/backend/Mach.v index 1c6fdb18..2cfd738d 100644 --- a/backend/Mach.v +++ b/backend/Mach.v @@ -330,14 +330,14 @@ Inductive step: state -> trace -> state -> Prop := | exec_Mload_notrap1: forall s f sp chunk addr args dst c rs m rs', eval_addressing ge sp addr rs##args = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m) E0 (State s f sp c rs' m) | exec_Mload_notrap2: forall s f sp chunk addr args dst c rs m a rs', eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m) E0 (State s f sp c rs' m) | exec_Mstore: diff --git a/backend/RTL.v b/backend/RTL.v index 31b5cf99..fe350adf 100644 --- a/backend/RTL.v +++ b/backend/RTL.v @@ -225,14 +225,14 @@ Inductive step: state -> trace -> state -> Prop := (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') -> eval_addressing ge sp addr rs##args = None -> step (State s f sp pc rs m) - E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m) + E0 (State s f sp pc' (rs#dst <- Vundef) m) | exec_Iload_notrap2: forall s f sp pc rs m chunk addr args dst pc' a, (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') -> eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None-> step (State s f sp pc rs m) - E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m) + E0 (State s f sp pc' (rs#dst <- Vundef) m) | exec_Istore: forall s f sp pc rs m chunk addr args src pc' a m', (fn_code f)!pc = Some(Istore chunk addr args src pc') -> diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v index 15ed6d8a..6048f895 100644 --- a/backend/RTLtyping.v +++ b/backend/RTLtyping.v @@ -858,7 +858,7 @@ Lemma wt_exec_Iload_notrap: forall env f chunk addr args dst s rs, wt_instr f env (Iload NOTRAP chunk addr args dst s) -> wt_regset env rs -> - wt_regset env (rs#dst <- (default_notrap_load_value chunk)). + wt_regset env (rs#dst <- Vundef). Proof. intros. eapply wt_regset_assign; eauto. simpl. trivial. diff --git a/backend/Tailcallproof.v b/backend/Tailcallproof.v index 80a68327..39fc10fb 100644 --- a/backend/Tailcallproof.v +++ b/backend/Tailcallproof.v @@ -440,7 +440,7 @@ Proof. TransfInstr. assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto. left. - exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split. + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- Vundef) m'); split. eapply exec_Iload_notrap1. eassumption. eapply eval_addressing_lessdef_none. eassumption. @@ -465,7 +465,7 @@ Proof. exact symbols_preserved. assumption. econstructor; eauto. apply set_reg_lessdef; auto. - + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split. + + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- Vundef) m'); split. eapply exec_Iload_notrap2. eassumption. erewrite eval_addressing_preserved. eassumption. diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v index 561e94c9..e20edff7 100644 --- a/backend/ValueAnalysis.v +++ b/backend/ValueAnalysis.v @@ -1287,13 +1287,11 @@ Proof. eapply sound_succ_state; eauto. simpl; auto. unfold transfer; rewrite H. eauto. apply ematch_update; auto. - unfold default_notrap_load_value. constructor. - (* load notrap2 *) eapply sound_succ_state; eauto. simpl; auto. unfold transfer; rewrite H. eauto. apply ematch_update; auto. - unfold default_notrap_load_value. constructor. - (* store *) exploit eval_static_addressing_sound; eauto with va. intros VMADDR. diff --git a/common/DebugPrint.ml b/common/DebugPrint.ml index 6f8449ee..275e6a71 100644 --- a/common/DebugPrint.ml +++ b/common/DebugPrint.ml @@ -132,10 +132,10 @@ let print_instructions insts code = | None -> failwith "Did not get some" | Some thing -> thing in if (!debug_flag) then begin - debug "[ "; + debug "[\n"; List.iter ( fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code)) - ) insts; debug "]" + ) insts; debug " ]" end let print_arrayp arr = begin diff --git a/common/Memory.v b/common/Memory.v index bf8ca083..ff17efb0 100644 --- a/common/Memory.v +++ b/common/Memory.v @@ -41,8 +41,6 @@ Require Export Memdata. Require Export Memtype. Require Import Lia. -Definition default_notrap_load_value (chunk : memory_chunk) := Vundef. - (* To avoid useless definitions of inductors in extracted code. *) Local Unset Elimination Schemes. diff --git a/config_macos_x86_64.sh b/config_macos_x86_64.sh new file mode 100755 index 00000000..9d5b3f5e --- /dev/null +++ b/config_macos_x86_64.sh @@ -0,0 +1 @@ +exec ./config_simple.sh x86_64-macos "$@" diff --git a/config_simple.sh b/config_simple.sh index e2d3844c..52b7d1a6 100755 --- a/config_simple.sh +++ b/config_simple.sh @@ -2,7 +2,7 @@ arch=$1 shift version=`git rev-parse --short HEAD` branch=`git rev-parse --abbrev-ref HEAD` -date=`date -I` +date=`date +%Y-%m-%d` if test "x$CCOMP_INSTALL_PREFIX" = "x" ; then CCOMP_INSTALL_PREFIX=/opt/CompCert ; diff --git a/driver/Clflags.ml b/driver/Clflags.ml index fa17c2d9..25bd2c78 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi let option_mtune = ref "" let option_fprepass = ref true -let option_fprepass_sched = ref "list" +let option_fprepass_sched = ref "regpres" let option_fpostpass = ref true let option_fpostpass_sched = ref "list" @@ -115,4 +115,6 @@ let option_inline_auto_threshold = ref 0 let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 +let option_regpres_threshold = ref 2 +let option_regpres_wait_window = ref false let main_function_name = ref "main" diff --git a/driver/Driver.ml b/driver/Driver.ml index 7192ba4b..3f5a4bd9 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -210,7 +210,9 @@ Processing options: -mtune= Type of CPU (for scheduling on some architectures) -fprepass Perform prepass scheduling (only on some architectures) [on] -fprepass= <optim> Perform postpass scheduling with the specified optimization [list] - (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles) + (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=regpres_bis: variant of regpres, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles) + -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure + -fregpres-wait-window When register pressure is high, use a 5-cycle waiting window instead of scheduling short paths first (default no) -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list] (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles) @@ -296,9 +298,9 @@ let num_input_files = ref 0 let cmdline_actions = let f_opt name ref = [Exact("-f" ^ name), Set ref; Exact("-fno-" ^ name), Unset ref] in - let f_opt_str name ref strref = + let f_opt_str name default ref strref = [Exact("-f" ^ name ^ "="), String - (fun s -> (strref := (if s == "" then "list" else s)); ref := true) + (fun s -> (strref := (if s == "" then default else s)); ref := true) ] in let f_str name strref default = [Exact("-f" ^ name ^ "="), String @@ -342,6 +344,7 @@ let cmdline_actions = Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s); Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n); Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n); + Exact "-regpres-threshold", Integer (fun n -> option_regpres_threshold := n); Exact "-fsmall-data", Integer(fun n -> option_small_data := n); Exact "-fsmall-const", Integer(fun n -> option_small_const := n); Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); @@ -424,6 +427,7 @@ let cmdline_actions = @ f_opt "redundancy" option_fredundancy @ [ Exact "-mtune", String (fun s -> option_mtune := s) ] @ f_opt "prepass" option_fprepass + @ f_opt "regpres-wait-window" option_regpres_wait_window @ f_opt "postpass" option_fpostpass @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ] @ f_opt "predict" option_fpredict @@ -431,8 +435,8 @@ let cmdline_actions = @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ] @ f_opt "tracelinearize" option_ftracelinearize - @ f_opt_str "prepass" option_fprepass option_fprepass_sched - @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched + @ f_opt_str "prepass" "regpress" option_fprepass option_fprepass_sched + @ f_opt_str "postpass" "list" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline @ f_opt "inline-functions-called-once" option_finline_functions_called_once @ f_opt "globaladdrtmp" option_fglobaladdrtmp diff --git a/filter_peeplog.fish b/filter_peeplog.fish deleted file mode 100755 index 72a0eaf1..00000000 --- a/filter_peeplog.fish +++ /dev/null @@ -1,39 +0,0 @@ -echo "LDP_CONSEC_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr32" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr64" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr32" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr64" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64" | wc -l) -echo "\n" -echo "LDP_CONSEC_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr32f" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr64f" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr32f" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr64f" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32f" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64f" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32f" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64f" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32f" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64f" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32f" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64f" | wc -l) -echo "\n" -echo "STP_CONSEC_PEEP_IMM_INC_str32" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str32" | wc -l) -echo "STP_CONSEC_PEEP_IMM_INC_str64" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str64" | wc -l) -echo "STP_FORW_SPACED_PEEP_IMM_INC_str32" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str32" | wc -l) -echo "STP_FORW_SPACED_PEEP_IMM_INC_str64" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str64" | wc -l) -echo "STP_BACK_SPACED_PEEP_IMM_INC_str32" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str32" | wc -l) -echo "STP_BACK_SPACED_PEEP_IMM_INC_str64" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str64" | wc -l) -echo "\n" -echo "STP_CONSEC_PEEP_IMM_INC_str32f" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str32f" | wc -l) -echo "STP_CONSEC_PEEP_IMM_INC_str64f" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str64f" | wc -l) -echo "STP_FORW_SPACED_PEEP_IMM_INC_str32f" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str32f" | wc -l) -echo "STP_FORW_SPACED_PEEP_IMM_INC_str64f" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str64f" | wc -l) -echo "STP_BACK_SPACED_PEEP_IMM_INC_str32f" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str32f" | wc -l) -echo "STP_BACK_SPACED_PEEP_IMM_INC_str64f" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str64f" | wc -l) diff --git a/kvx/Asmblockdeps.v b/kvx/Asmblockdeps.v index b6d18c3e..a9786e0a 100644 --- a/kvx/Asmblockdeps.v +++ b/kvx/Asmblockdeps.v @@ -164,17 +164,17 @@ Definition arith_eval (ao: arith_op) (l: list value) := | _, _ => None end. -Definition exec_incorrect_load trap chunk := +Definition exec_incorrect_load trap := match trap with | TRAP => None - | NOTRAP => Some (Val (concrete_default_notrap_load_value chunk)) + | NOTRAP => Some (Val Vundef) end. Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v: val) (ofs: offset) := let (ge, fn) := Ge in match (eval_offset ofs) with | OK ptr => match Mem.loadv chunk m (Val.offset_ptr v ptr) with - | None => exec_incorrect_load trap chunk + | None => exec_incorrect_load trap | Some vl => Some (Val vl) end | _ => None @@ -182,13 +182,13 @@ Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: Definition exec_load_deps_reg (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) := match Mem.loadv chunk m (Val.addl v vo) with - | None => exec_incorrect_load trap chunk + | None => exec_incorrect_load trap | Some vl => Some (Val vl) end. Definition exec_load_deps_regxs (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) := match Mem.loadv chunk m (Val.addl v (Val.shll vo (scale_of_chunk chunk))) with - | None => exec_incorrect_load trap chunk + | None => exec_incorrect_load trap | Some vl => Some (Val vl) end. diff --git a/kvx/Asmblockgenproof1.v b/kvx/Asmblockgenproof1.v index a65bd5bc..259c4f9c 100644 --- a/kvx/Asmblockgenproof1.v +++ b/kvx/Asmblockgenproof1.v @@ -1914,7 +1914,7 @@ Lemma transl_load_access2_correct_notrap2: Mem.loadv chunk m v = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#rd = concrete_default_notrap_load_value chunk + /\ rs'#rd = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. Proof. intros until ro; intros ARGS IREGE INSTR TR EV LOAD. @@ -1963,7 +1963,7 @@ Lemma transl_load_access2XS_correct_notrap2: Mem.loadv chunk m v = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#rd = concrete_default_notrap_load_value chunk + /\ rs'#rd = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. Proof. intros until ro; intros ARGS IREGE INSTR TR EV LOAD. @@ -2008,7 +2008,7 @@ Lemma transl_load_access_correct_notrap2: Mem.loadv chunk m v = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#rd = concrete_default_notrap_load_value chunk + /\ rs'#rd = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. Proof. intros until v; intros INSTR TR EV LOAD. @@ -2185,7 +2185,7 @@ Lemma transl_load_correct_notrap2: Mem.loadv chunk m a = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#(preg_of dst) = (concrete_default_notrap_load_value chunk) + /\ rs'#(preg_of dst) = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> preg_of dst -> rs'#r = rs#r. Proof. intros until a; intros TR EV LOAD. destruct addr. diff --git a/kvx/Asmblockprops.v b/kvx/Asmblockprops.v index c3929be5..a732d29b 100644 --- a/kvx/Asmblockprops.v +++ b/kvx/Asmblockprops.v @@ -96,9 +96,9 @@ Theorem exec_basic_instr_pc: Proof. intros. destruct b; try destruct i; try destruct i. all: try (inv H; Simpl). - 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail. + 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail. - 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail. + 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail. { (* PLoadQRRO *) unfold parexec_load_q_offset in H1. diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v index aa2e0885..45b230e6 100644 --- a/kvx/Asmvliw.v +++ b/kvx/Asmvliw.v @@ -313,7 +313,11 @@ Inductive cf_instruction : Type := . (** *** Loads *) -Definition concrete_default_notrap_load_value (chunk : memory_chunk) := + +(* What follows was the original spec, but is subtly incorrect. + Our definition of the assembly-level memory model is already an abstraction of the real world. + In particular, we consider that a load is incorrect when it points outside of CompCert's visible memory, whereas this memory could be correct at the assembly level. + This means that CompCert would believe an incorrect load would yield 0 whereas it would yield another value. match chunk with | Mint8signed | Mint8unsigned | Mint16signed | Mint16unsigned | Mint32 => Vint Int.zero @@ -321,7 +325,7 @@ Definition concrete_default_notrap_load_value (chunk : memory_chunk) := | Many32 | Many64 => Vundef | Mfloat32 => Vsingle Float32.zero | Mfloat64 => Vfloat Float.zero - end. + end. *) Inductive load_name : Type := | Plb (**r load byte *) @@ -1169,16 +1173,16 @@ Definition eval_offset (ofs: offset) : res ptrofs := OK ofs. (** *** load/store instructions *) -Definition parexec_incorrect_load trap chunk d rsw mw := +Definition parexec_incorrect_load trap d rsw mw := match trap with | TRAP => Stuck - | NOTRAP => Next (rsw#d <- (concrete_default_notrap_load_value chunk)) mw + | NOTRAP => Next (rsw#d <- Vundef) mw end. Definition parexec_load_offset (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a: ireg) (ofs: offset) := match (eval_offset ofs) with | OK ptr => match Mem.loadv chunk mr (Val.offset_ptr (rsr a) ptr) with - | None => parexec_incorrect_load trap chunk d rsw mw + | None => parexec_incorrect_load trap d rsw mw | Some v => Next (rsw#d <- v) mw end | _ => Stuck @@ -1225,13 +1229,13 @@ Definition parexec_load_o_offset (rsr rsw: regset) (mr mw: mem) (d : gpreg_o) (a Definition parexec_load_reg (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) := match Mem.loadv chunk mr (Val.addl (rsr a) (rsr ro)) with - | None => parexec_incorrect_load trap chunk d rsw mw + | None => parexec_incorrect_load trap d rsw mw | Some v => Next (rsw#d <- v) mw end. Definition parexec_load_regxs (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) := match Mem.loadv chunk mr (Val.addl (rsr a) (Val.shll (rsr ro) (scale_of_chunk chunk))) with - | None => parexec_incorrect_load trap chunk d rsw mw + | None => parexec_incorrect_load trap d rsw mw | Some v => Next (rsw#d <- v) mw end. diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml index ee2674bf..3b63b80d 120000..100644 --- a/kvx/ExpansionOracle.ml +++ b/kvx/ExpansionOracle.ml @@ -1 +1,17 @@ -../aarch64/ExpansionOracle.ml
\ No newline at end of file +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/kvx/Machregsaux.ml b/kvx/Machregsaux.ml index e3b18181..dbb89727 100644 --- a/kvx/Machregsaux.ml +++ b/kvx/Machregsaux.ml @@ -31,3 +31,5 @@ let class_of_type = function | AST.Tint | AST.Tlong | AST.Tfloat | AST.Tsingle -> 0 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 59 |] diff --git a/kvx/Machregsaux.mli b/kvx/Machregsaux.mli index 01b0f9fd..23ac1c9a 100644 --- a/kvx/Machregsaux.mli +++ b/kvx/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml index 2107ce22..5ebad421 100644 --- a/kvx/PostpassSchedulingOracle.ml +++ b/kvx/PostpassSchedulingOracle.ml @@ -787,8 +787,14 @@ let latency_constraints bb = *) let build_problem bb = - { max_latency = -1; resource_bounds = resource_bounds; - instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb } +{ max_latency = -1; + resource_bounds = resource_bounds; + instruction_usages = instruction_usages bb; + latency_constraints = latency_constraints bb; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None +} let rec find_min_opt (l: int option list) = match l with diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml index 912e9ffa..53a81095 120000..100644 --- a/kvx/PrepassSchedulingOracle.ml +++ b/kvx/PrepassSchedulingOracle.ml @@ -1 +1,485 @@ -../aarch64/PrepassSchedulingOracle.ml
\ No newline at end of file +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights live_regs_entry + typing reference seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml index 1e955b85..8d10d406 120000..100644 --- a/kvx/PrepassSchedulingOracleDeps.ml +++ b/kvx/PrepassSchedulingOracleDeps.ml @@ -1 +1,17 @@ -../aarch64/PrepassSchedulingOracleDeps.ml
\ No newline at end of file +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/lib/Integers.v b/lib/Integers.v index 3e103ab7..2addc78b 100644 --- a/lib/Integers.v +++ b/lib/Integers.v @@ -3747,8 +3747,7 @@ Proof. unfold lt. rewrite signed_zero. rewrite bits_zero. - destruct (zlt _ _); try lia. - reflexivity. + destruct (zlt _ _); try lia; reflexivity. } change (Z.testbit (unsigned x) (i + 63)) with (testbit x (i + 63)). rewrite bits_zero. diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml index ee2674bf..3b63b80d 120000..100644 --- a/powerpc/ExpansionOracle.ml +++ b/powerpc/ExpansionOracle.ml @@ -1 +1,17 @@ -../aarch64/ExpansionOracle.ml
\ No newline at end of file +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml index 9885fd52..42a3da23 120000..100644 --- a/powerpc/PrepassSchedulingOracle.ml +++ b/powerpc/PrepassSchedulingOracle.ml @@ -1 +1,6 @@ -../x86/PrepassSchedulingOracle.ml
\ No newline at end of file +open RTL +open Registers + +(* Do not do anything *) +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None diff --git a/riscV/Asmgenproof1.v b/riscV/Asmgenproof1.v index 6abde89f..42ab8375 100644 --- a/riscV/Asmgenproof1.v +++ b/riscV/Asmgenproof1.v @@ -821,18 +821,18 @@ Proof. unfold Val.cmp; destruct (rs#r1); simpl; auto. rewrite B1. unfold Int.lt. rewrite zlt_false. auto. change (Int.signed (Int.repr Int.max_signed)) with Int.max_signed. - generalize (Int.signed_range i); omega. + generalize (Int.signed_range i); lia. * exploit (opimm32_correct Psltw Psltiw (Val.cmp Clt)); eauto. intros (rs1 & A1 & B1 & C1). exists rs1; split. eexact A1. split; auto. rewrite B1. unfold Val.cmp; simpl; destruct (rs#r1); simpl; auto. unfold Int.lt. replace (Int.signed (Int.add n Int.one)) with (Int.signed n + 1). destruct (zlt (Int.signed n) (Int.signed i)). - rewrite zlt_false by omega. auto. - rewrite zlt_true by omega. auto. + rewrite zlt_false by lia. auto. + rewrite zlt_true by lia. auto. rewrite Int.add_signed. symmetry; apply Int.signed_repr. assert (Int.signed n <> Int.max_signed). { red; intros E. elim H1. rewrite <- (Int.repr_signed n). rewrite E. auto. } - generalize (Int.signed_range n); omega. + generalize (Int.signed_range n); lia. + apply DFL. + apply DFL. Qed. @@ -919,18 +919,18 @@ Proof. unfold Val.cmpl; destruct (rs#r1); simpl; auto. rewrite B1. unfold Int64.lt. rewrite zlt_false. auto. change (Int64.signed (Int64.repr Int64.max_signed)) with Int64.max_signed. - generalize (Int64.signed_range i); omega. + generalize (Int64.signed_range i); lia. * exploit (opimm64_correct Psltl Psltil (fun v1 v2 => Val.maketotal (Val.cmpl Clt v1 v2))); eauto. intros (rs1 & A1 & B1 & C1). exists rs1; split. eexact A1. split; auto. rewrite B1. unfold Val.cmpl; simpl; destruct (rs#r1); simpl; auto. unfold Int64.lt. replace (Int64.signed (Int64.add n Int64.one)) with (Int64.signed n + 1). destruct (zlt (Int64.signed n) (Int64.signed i)). - rewrite zlt_false by omega. auto. - rewrite zlt_true by omega. auto. + rewrite zlt_false by lia. auto. + rewrite zlt_true by lia. auto. rewrite Int64.add_signed. symmetry; apply Int64.signed_repr. assert (Int64.signed n <> Int64.max_signed). { red; intros E. elim H1. rewrite <- (Int64.repr_signed n). rewrite E. auto. } - generalize (Int64.signed_range n); omega. + generalize (Int64.signed_range n); lia. + apply DFL. + apply DFL. Qed. diff --git a/riscV/Machregsaux.ml b/riscV/Machregsaux.ml index 840943e7..e3e47946 100644 --- a/riscV/Machregsaux.ml +++ b/riscV/Machregsaux.ml @@ -18,3 +18,5 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 26; 32|] diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli index 01b0f9fd..bb3777bf 100644 --- a/riscV/Machregsaux.mli +++ b/riscV/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/riscV/OpWeights.ml b/riscV/OpWeights.ml index 0a1d9ad4..a5ece6d5 100644 --- a/riscV/OpWeights.ml +++ b/riscV/OpWeights.ml @@ -66,7 +66,8 @@ module Rocket = struct | OEmayundef _ -> 0 | _ -> 1 - let resources_of_op (op : operation) (nargs : int) = resource_bounds + let resources_of_op (op : operation) (nargs : int) = + match op with OEmayundef _ -> [| 0 |] | _ -> resource_bounds let non_pipelined_resources_of_op (op : operation) (nargs : int) = match op with diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml index 912e9ffa..53a81095 120000..100644 --- a/riscV/PrepassSchedulingOracle.ml +++ b/riscV/PrepassSchedulingOracle.ml @@ -1 +1,485 @@ -../aarch64/PrepassSchedulingOracle.ml
\ No newline at end of file +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights live_regs_entry + typing reference seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml index 1e955b85..8d10d406 120000..100644 --- a/riscV/PrepassSchedulingOracleDeps.ml +++ b/riscV/PrepassSchedulingOracleDeps.ml @@ -1 +1,17 @@ -../aarch64/PrepassSchedulingOracleDeps.ml
\ No newline at end of file +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/runtime/Makefile b/runtime/Makefile index 6f70fa87..ff51b7d7 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -1,5 +1,7 @@ include ../Makefile.config +.PRECIOUS: .s + CFLAGS=-O1 -Wall ifeq ($(ARCH),x86) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index eab0b21a..e3a421a5 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -33,6 +33,10 @@ type latency_constraint = { type problem = { max_latency : int; resource_bounds : int array; + live_regs_entry : Registers.Regset.t; + typing : RTLtyping.regenv; + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * ((Registers.reg * bool) list array)) option; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -118,6 +122,13 @@ let vector_less_equal a b = true with Exit -> false;; +(* let vector_add a b = + * assert ((Array.length a) = (Array.length b)); + * for i=0 to (Array.length a)-1 + * do + * b.(i) <- b.(i) + a.(i) + * done;; *) + let vector_subtract a b = assert ((Array.length a) = (Array.length b)); for i=0 to (Array.length a)-1 @@ -257,8 +268,8 @@ let priority_list_scheduler (order : list_scheduler_order) assert(!time >= 0); !time with Exit -> -1 - in + let advance_time() = begin (if !current_time < max_time-1 @@ -267,7 +278,8 @@ let priority_list_scheduler (order : list_scheduler_order) Array.blit problem.resource_bounds 0 current_resources 0 (Array.length current_resources); ready.(!current_time + 1) <- - InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1)); + InstrSet.union (ready.(!current_time)) + (ready.(!current_time + 1)); ready.(!current_time) <- InstrSet.empty; end); incr current_time @@ -334,6 +346,470 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;; (* dummy code for placating ocaml's warnings *) let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; + +(* A scheduler sensitive to register pressure *) +let reg_pres_scheduler (problem : problem) : solution option = + (* DebugPrint.debug_flag := true; *) + let nr_instructions = get_nr_instructions problem in + + if !Clflags.option_debug_compcert > 6 then + (Printf.eprintf "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions; + flush stderr); + + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + let available_regs = Array.copy Machregsaux.nr_regs in + + let nr_types_regs = Array.length available_regs in + + let thres = Array.fold_left (min) + (max !(Clflags.option_regpres_threshold) 0) + Machregsaux.nr_regs + in + + + let regs_thresholds = Array.make nr_types_regs thres in + (* placeholder value *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + available_regs.(classe) + <- available_regs.(classe) - 1; + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + let csr_b = ref false in + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta i = (fun a (r, b) -> + a + + if class_r r <> i then 0 else + (if b then + if (Hashtbl.find counts r = (i, 1)) + then 1 else 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + )) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let module InstrSet = + struct + module MSet = + Set.Make (struct + type t=int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) + + let empty = MSet.empty + let is_empty = MSet.is_empty + let add = MSet.add + let remove = MSet.remove + let union = MSet.union + let iter = MSet.iter + + let compare_regs i x y = + let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in + (* print_int y; + * print_string " "; + * print_int pyi; + * print_newline (); + * flush stdout; *) + let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in + match pyi - pxi with + | 0 -> (match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z) + | z -> z + + (** t is the register class *) + let sched_CSR t ready usages = + (* print_string "looking for max delta"; + * print_newline (); + * flush stdout; *) + let result = ref (-1) in + iter (fun i -> + if vector_less_equal usages.(i) current_resources + then if !result = -1 || (compare_regs t !result i > 0) + then result := i) ready; + !result + end + in + + let max_time = bound_max_time problem + 5*nr_instructions in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *) + let cnt = ref 0 in + + let attempt_scheduling ready usages = + let result = ref (-1) in + DebugPrint.debug "\n\nREADY: "; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready; + DebugPrint.debug "\n\n"; + try + Array.iteri (fun i avlregs -> + DebugPrint.debug "avlregs: %d %d\nlive regs: %d\n" + i avlregs (Hashtbl.length live_regs); + if !cnt < 5 && avlregs <= regs_thresholds.(i) + then ( + csr_b := true; + let maybe = InstrSet.sched_CSR i ready usages in + DebugPrint.debug "maybe %d\n" maybe; + (if maybe >= 0 && + let delta = + List.fold_left (fold_delta i) 0 mentions.(maybe) in + DebugPrint.debug "delta %d\n" delta; + delta > 0 + then + (vector_subtract usages.(maybe) current_resources; + result := maybe) + else + if not !Clflags.option_regpres_wait_window + then + (InstrSet.iter (fun ins -> + if vector_less_equal usages.(ins) current_resources && + List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0 + then result := ins + ) ready; + if !result <> -1 then + vector_subtract usages.(!result) current_resources; + incr cnt) + else + (incr cnt) + ); + raise Exit)) available_regs; + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then ( + vector_subtract usages.(i) current_resources; + result := i; + raise Exit)) ready; + -1 + with Exit -> + !result in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> (assert(times.(i) < 0); + (DebugPrint.debug "INSTR ISSUED: %d\n" i; + if !csr_b && !Clflags.option_debug_compcert > 6 then + (Printf.eprintf "REGPRES: high pres class %d\n" i; + flush stderr); + csr_b := false; + (* if !Clflags.option_regpres_wait_window then *) + cnt := 0; + List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + available_regs.(t) + <- available_regs.(t) + 1)) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(i)); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + DebugPrint.debug "%d " i; + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + (* DebugPrint.debug_flag := false; *) + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + (* DebugPrint.debug_flag := false; *) + None + +;; + + +(********************************************************************) + +let reg_pres_scheduler_bis (problem : problem) : solution option = + (* Printf.printf "\nNEW\n\n"; *) + let nr_instructions = get_nr_instructions problem in + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + (* let available_regs = Array.copy Machregsaux.nr_regs in *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + (* available_regs.(classe) + * <- available_regs.(classe) - 1; *) + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta a (r, b) = + a + (if b then + match Hashtbl.find_opt counts r with + | Some (_, 1) -> 1 + | _ -> 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + ) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let compare_pres x y = + let pdy = List.fold_left (fold_delta) 0 mentions.(y) in + let pdx = List.fold_left (fold_delta) 0 mentions.(x) in + match pdy - pdx with + | 0 -> x - y + | z -> z + in + + let module InstrSet = + Set.Make (struct + type t = int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) in + + let max_time = bound_max_time problem (* + 5*nr_instructions *) in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (* Printf.printf "ADV\n"; + * flush stdout; *) + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + + let attempt_scheduling ready usages = + let result = ref [] in + try + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then + if !result = [] || priorities.(i) = priorities.(List.hd (!result)) + then + result := i::(!result) + else raise Exit + ) ready; + if !result <> [] then raise Exit; + -1 + with + Exit -> + let mini = List.fold_left (fun a b -> + if a = -1 || compare_pres a b > 0 + then b else a + ) (-1) !result in + vector_subtract usages.(mini) current_resources; + mini + in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> ( + DebugPrint.debug "ISSUED: %d\nREADY: " i; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) + ready.(!current_time); + DebugPrint.debug "\nSUCC: "; + List.iter (fun (i, l) -> DebugPrint.debug "%d " i) + successors.(i); + DebugPrint.debug "\n\n"; + assert(times.(i) < 0); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + (List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + (* available_regs.(t) + * <- available_regs.(t) + 1 *))) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + (* available_regs.(t) + * <- available_regs.(t) - 1 *)) + | Some i -> () + ) mentions.(i)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + None + +;; + +(********************************************************************) + type bundle = int list;; let rec extract_deps_to index = function @@ -438,6 +914,12 @@ let reverse_problem problem = { max_latency = problem.max_latency; resource_bounds = problem.resource_bounds; + live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) + (* Not needed for the revlist sched, and for now we wont bother + with creating a reverse scheduler aware of reg press *) + + typing = problem.typing; + reference_counting = problem.reference_counting; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 @@ -1259,5 +1741,7 @@ let scheduler_by_name name = | "ilp" -> validated_scheduler cascaded_scheduler | "list" -> validated_scheduler list_scheduler | "revlist" -> validated_scheduler reverse_list_scheduler + | "regpres" -> validated_scheduler reg_pres_scheduler + | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis | "greedy" -> greedy_scheduler | s -> failwith ("unknown scheduler: " ^ s);; diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index fb7af3f6..48c7bc09 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -23,6 +23,16 @@ type problem = { resource_bounds : int array; (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *) + live_regs_entry : Registers.Regset.t; + (** The set of live pseudo-registers at entry. *) + + typing : RTLtyping.regenv; + (** Register type map. *) + + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * ((Registers.reg * bool) list array)) option; + (** See RTLpathScheduleraux.reference_counting. *) + instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) @@ -68,6 +78,12 @@ Once a clock tick is full go to the next. @return [Some solution] when a solution is found, [None] if not. *) val list_scheduler : problem -> solution option +(** WIP : Same as list_scheduler, but schedules instructions which decrease +register pressure when it gets too high. *) +val reg_pres_scheduler : problem -> solution option + +val reg_pres_scheduler_bis : problem -> solution option + (** Schedule the problem using the order of instructions without any reordering *) val greedy_scheduler : problem -> solution option diff --git a/scheduling/RTLpath.v b/scheduling/RTLpath.v index a4fce97e..b29a7759 100644 --- a/scheduling/RTLpath.v +++ b/scheduling/RTLpath.v @@ -156,7 +156,7 @@ Definition istep (ge: RTL.genv) (i: instruction) (sp: val) (rs: regset) (m: mem) SOME v <- Mem.loadv chunk m a IN Some (mk_istate true pc' (rs#dst <- v) m) | Iload NOTRAP chunk addr args dst pc' => - let default_state := mk_istate true pc' rs#dst <- (default_notrap_load_value chunk) m in + let default_state := mk_istate true pc' rs#dst <- Vundef m in match (eval_addressing ge sp addr rs##args) with | None => Some default_state | Some a => match (Mem.loadv chunk m a) with diff --git a/scheduling/RTLpathSE_theory.v b/scheduling/RTLpathSE_theory.v index aa8db342..2a791feb 100644 --- a/scheduling/RTLpathSE_theory.v +++ b/scheduling/RTLpathSE_theory.v @@ -87,11 +87,11 @@ Fixpoint seval_sval (ge: RTL.genv) (sp:val) (sv: sval) (rs0: regset) (m0: mem): | NOTRAP => SOME args <- seval_list_sval ge sp lsv rs0 m0 IN match (eval_addressing ge sp addr args) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some a => SOME m <- seval_smem ge sp sm rs0 m0 IN match (Mem.loadv chunk m a) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some val => Some val end end diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index aeed39df..f3f09954 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -17,7 +17,7 @@ let print_superblock (sb: superblock) code = begin debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n"; debug " liveins = "; print_ptree_regset li; debug "\n"; - debug " output_regs = "; print_regset outs; debug "}" + debug " output_regs = "; print_regset outs; debug "\n}" end let print_superblocks lsb code = @@ -72,6 +72,168 @@ let get_superblocks code entry pm typing = lsb end +(** the useful one. Returns a hashtable with bindings of shape + ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), + ** [t] is its class (according to [typing]), and [n] the number of + ** times it's referenced as an argument in instructions of [seqa] ; + ** and an arrray containg the list of regs referenced by each + ** instruction, with a boolean to know whether it's as arg or dest *) +let reference_counting (seqa : (instruction * Regset.t) array) + (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) : + (Registers.reg, int * int) Hashtbl.t * + (Registers.reg * bool) list array = + let retl = Hashtbl.create 42 in + let retr = Array.make (Array.length seqa) [] in + (* retr.(i) : (r, b) -> (r', b') -> ... + * where b = true if seen as arg, false if seen as dest + *) + List.iter (fun reg -> + Hashtbl.add retl + reg (Machregsaux.class_of_type (typing reg), 1) + ) (Registers.Regset.elements out_regs); + let add_reg reg = + match Hashtbl.find_opt retl reg with + | Some (t, n) -> Hashtbl.add retl reg (t, n+1) + | None -> Hashtbl.add retl reg (Machregsaux.class_of_type + (typing reg), 1) + in + let map_true = List.map (fun r -> r, true) in + Array.iteri (fun i (ins, _) -> + match ins with + | Iop(_,args,dest,_) | Iload(_,_,_,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (dest, false)::(map_true args) + | Icond(_,args,_,_,_) -> + List.iter (add_reg) args; + retr.(i) <- map_true args + | Istore(_,_,args,src,_) -> + List.iter (add_reg) args; + add_reg src; + retr.(i) <- (src, true)::(map_true args) + | Icall(_,fn,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (dest,false)::(reg, true)::(map_true args) + | _ -> (dest,false)::(map_true args)) + + | Itailcall(_,fn,args) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (reg, true)::(map_true args) + | _ -> map_true args) + | Ibuiltin(_,args,dest,_) -> + let rec bar = function + | AST.BA r -> add_reg r; + retr.(i) <- (r, true)::retr.(i) + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + bar hi; bar lo + | _ -> () + in + List.iter (bar) args; + let rec bad = function + | AST.BR r -> retr.(i) <- (r, false)::retr.(i) + | AST.BR_splitlong (hi, lo) -> + bad hi; bad lo + | _ -> () + in + bad dest; + | Ijumptable (reg,_) | Ireturn (Some reg) -> + add_reg reg; + retr.(i) <- [reg, true] + | _ -> () + ) seqa; + (* print_string "mentions\n"; + * Array.iteri (fun i l -> + * print_int i; + * print_string ": ["; + * List.iter (fun (r, b) -> + * print_int (Camlcoq.P.to_int r); + * print_string ":"; + * print_string (if b then "a:" else "d"); + * if b then print_int (snd (Hashtbl.find retl r)); + * print_string ", " + * ) l; + * print_string "]\n"; + * flush stdout; + * ) retr; *) + retl, retr + + +let get_live_regs_entry (sb : superblock) code = + (if !Clflags.option_debug_compcert > 6 + then debug_flag := true); + debug "getting live regs for superblock:\n"; + print_superblock sb code; + debug "\n"; + let seqa = Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.get_live_regs_entry" + ), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + sb.instructions in + let ret = + Array.fold_right (fun (ins, liveins) regset_i -> + let regset = Registers.Regset.union liveins regset_i in + match ins with + | Inop _ -> regset + | Iop (_, args, dest, _) + | Iload (_, _, _, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.remove dest regset) args + | Istore (_, _, args, src, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.add src regset) args + | Icall (_, fn, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + ((match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg) + | Datatypes.Coq_inr _ -> (fun x -> x)) + (Registers.Regset.remove dest regset)) + args + | Itailcall (_, fn, args) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset) + | Datatypes.Coq_inr _ -> regset) + args + | Ibuiltin (_, args, dest, _) -> + List.fold_left (fun set arg -> + let rec add reg set = + match reg with + | AST.BA r -> Registers.Regset.add r set + | AST.BA_splitlong (hi, lo) + | AST.BA_addptr (hi, lo) -> add hi (add lo set) + | _ -> set + in add arg set) + (let rec rem dest set = + match dest with + | AST.BR r -> Registers.Regset.remove r set + | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set) + | _ -> set + in rem dest regset) + args + | Icond (_, args, _, _, _) -> + List.fold_left (fun set reg -> + Registers.Regset.add reg set) + regset args + | Ijumptable (reg, _) + | Ireturn (Some reg) -> + Registers.Regset.add reg regset + | _ -> regset + ) seqa sb.s_output_regs + in debug "live in regs: "; + print_regset ret; + debug "\n"; + debug_flag := false; + ret + (* TODO David *) let schedule_superblock sb code = if not !Clflags.option_fprepass @@ -90,15 +252,22 @@ let schedule_superblock sb code = match predicted_successor ii with | Some _ -> 0 | None -> 1 in + debug "hello\n"; + let live_regs_entry = get_live_regs_entry sb code in + let seqa = + Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.schedule_superblock"), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + (Array.sub sb.instructions 0 (nr_instr-trailer_length)) in match PrepassSchedulingOracle.schedule_sequence - (Array.map (fun i -> - (match PTree.get i code with - | Some ii -> ii - | None -> failwith "RTLpathScheduleraux.schedule_superblock"), - (match PTree.get i sb.liveins with - | Some s -> s - | None -> Regset.empty)) - (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with + seqa + live_regs_entry + sb.typing + (reference_counting seqa sb.s_output_regs sb.typing) with | None -> sb.instructions | Some order -> let ins' = diff --git a/scheduling/postpass_lib/Machblock.v b/scheduling/postpass_lib/Machblock.v index c8eadbd7..b588cca8 100644 --- a/scheduling/postpass_lib/Machblock.v +++ b/scheduling/postpass_lib/Machblock.v @@ -237,13 +237,13 @@ Inductive basic_step (s: list stackframe) (fb: block) (sp: val) (rs: regset) (m: | exec_MBload_notrap1: forall addr args rs' chunk dst, eval_addressing ge sp addr rs##args = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m | exec_MBload_notrap2: forall addr args a rs' chunk dst, eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m | exec_MBstore: forall chunk addr args src m' a rs', diff --git a/test/gourdinl/compare_pp.sh b/test/gourdinl/compare_pp.sh new file mode 100755 index 00000000..09183cf9 --- /dev/null +++ b/test/gourdinl/compare_pp.sh @@ -0,0 +1,16 @@ +ffname=$(basename $1) +fname=${ffname%.*} +nopp=$fname.nopp.s +pp=$fname.pp.s + +../../ccomp -fno-coalesce-mem -fno-postpass -S $1 -o $nopp +../../ccomp -fno-coalesce-mem -fpostpass= list -S $1 -o $pp +sed -i '1,2d' $nopp +sed -i '1,2d' $pp +if cmp -s $nopp $pp; then + echo "same!" +else + echo "differents!" + diff -y $nopp $pp +fi + diff --git a/test/gourdinl/postpass_exp.c b/test/gourdinl/postpass_exp.c new file mode 100644 index 00000000..522ac2a6 --- /dev/null +++ b/test/gourdinl/postpass_exp.c @@ -0,0 +1,5 @@ +int main(int x, int y) { + int z = x << 32; + y = y - z; + return x + y; +} diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h index f26060a7..2905938b 100644 --- a/test/monniaux/cycles.h +++ b/test/monniaux/cycles.h @@ -6,7 +6,7 @@ typedef uint64_t cycle_t; #define PRcycle PRId64 -#include <../../kvx-cos/include/hal/cos_registers.h> +#include "/opt/kalray/accesscore/kvx-cos/include/hal/cos_registers.h" static inline void cycle_count_config(void) { diff --git a/test/monniaux/division/harness.c b/test/monniaux/division/harness.c new file mode 100644 index 00000000..b6ce674d --- /dev/null +++ b/test/monniaux/division/harness.c @@ -0,0 +1,82 @@ +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <math.h> +#include <assert.h> +#include "../cycles.h" + +static uint32_t dm_random_uint32(void) { + static uint32_t current=UINT32_C(0xDEADBEEF); + current = ((uint64_t) current << 6) % UINT32_C(4294967291); + return current; +} + +static uint64_t dm_biased_random_uint32(void) { + uint32_t flags = dm_random_uint32(); + uint32_t r; + switch (flags & 15) { + case 0: + r = dm_random_uint32() & 0xFU; + break; + case 1: + r = dm_random_uint32() & 0xFFU; + break; + case 2: + r = dm_random_uint32() & 0xFFFU; + break; + case 3: + r = dm_random_uint32() & 0xFFFFU; + break; + case 4: + r = dm_random_uint32() & 0xFFFFFU; + break; + case 5: + r = dm_random_uint32() & 0xFFFFFFU; + break; + case 6: + r = dm_random_uint32() & 0xFFFFFFFU; + break; + case 7: + r = dm_random_uint32() & 0x3; + break; + default: + r = dm_random_uint32(); + } + return r; +} + +inline uint32_t native_udiv32(uint32_t x, uint32_t y) { + return x/y; +} +extern uint32_t my_udiv32(uint32_t x, uint32_t y); + +int main() { + cycle_t time_me=0, time_native=0; + cycle_count_config(); + + for(int i=0; i<1000; i++) { + uint32_t x = dm_biased_random_uint32(); + uint32_t y = dm_biased_random_uint32(); + if (y == 0) continue; + + cycle_t cycle_a, cycle_b, cycle_c; + + uint32_t q1, q2; + cycle_a = get_cycle(); + q1 = native_udiv32(x, y); + cycle_b = get_cycle(); + q2 = my_udiv32(x, y); + cycle_c = get_cycle(); + + if(q1 != q2) { + printf("ERREUR %u %u\n", q1, q2); + } + + time_native += cycle_b - cycle_a; + time_me += cycle_c - cycle_b; + } + + printf("%" PRcycle "\t%" PRcycle "\n", time_native, time_me); + + return 0; +} diff --git a/test/monniaux/division/my_udiv32.s b/test/monniaux/division/my_udiv32.s new file mode 100644 index 00000000..0f4fd127 --- /dev/null +++ b/test/monniaux/division/my_udiv32.s @@ -0,0 +1,36 @@ + .align 8 + .global my_udiv32 + .type my_udiv32, @function +my_udiv32: + zxwd $r1 = $r1 + make $r3 = 0x3ff0000000000000 # 1.0 + zxwd $r0 = $r0 + ;; + floatud.rn $r5 = $r1, 0 + ;; + floatuw.rn $r2 = $r1, 0 + ;; + finvw $r2 = $r2 + ;; + + fwidenlwd $r2 = $r2 + floatud.rn $r4 = $r0, 0 + ;; + ffmsd $r3 = $r2, $r5 + ;; + ffmad $r2 = $r2, $r3 + ;; + fmuld $r2 = $r2, $r4 + ;; + fixedud.rn $r2 = $r2, 0 + ;; + msbfw $r0 = $r2, $r1 + zxwd $r1 = $r2 + addw $r2 = $r2, -1 + ;; + cmoved.wltz $r0? $r1 = $r2 + ;; + copyd $r0 = $r1 + ret + ;; + .size my_udiv32, .-my_udiv32 diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c new file mode 100644 index 00000000..9e6adacb --- /dev/null +++ b/test/nardino/scheduling/entry_regs.c @@ -0,0 +1,19 @@ +#include <stdio.h> + +int f(int n) { + if (n > 0) + return 42; + else + return n; +} + + +int main(int argc, char *argv[]) { + int a=1; + float b=2.; + int c = f(a); + a = 3; + int d = f(a); + printf("%e, %d, %d, %d", b, a, c, d); + return 0; +} diff --git a/test/nardino/scheduling/spille_backw.c b/test/nardino/scheduling/spille_backw.c new file mode 100644 index 00000000..1c36ee86 --- /dev/null +++ b/test/nardino/scheduling/spille_backw.c @@ -0,0 +1,114 @@ +int f(int k) { + int a1 = k; + int b1 = 2*a1; + int c = a1; + int a2 = k+1; + int b2 = 2*a2; + c += a2; + int a3 = k+2; + int b3 = 2*a3; + c += a3; + int a4 = k+3; + int b4 = 2*a4; + c += a4; + int a5 = k+4; + int b5 = 2*a5; + c += a5; + int a6 = k+5; + int b6 = 2*a6; + c += a6; + int a7 = k+6; + int b7 = 2*a7; + c += a7; + int a8 = k+7; + int b8 = 2*a8; + c += a8; + int a9 = k+8; + int b9 = 2*a9; + c += a9; + int a10 = k+9; + int b10 = 2*a10; + c += a10; + int a11 = k+10; + int b11 = 2*a11; + c += a11; + int a12 = k+11; + int b12 = 2*a12; + c += a12; + int a13 = k+12; + int b13 = 2*a13; + c += a13; + int a14 = k+13; + int b14 = 2*a14; + c += a14; + int a15 = k+14; + int b15 = 2*a15; + c += a15; + int a16 = k+15; + int b16 = 2*a16; + c += a16; + int a17 = k+16; + int b17 = 2*a17; + c += a17; + int a18 = k+17; + int b18 = 2*a18; + c += a18; + int a19 = k+18; + int b19 = 2*a19; + c += a19; + int a20 = k+19; + int b20 = 2*a20; + c += a20; + int a21 = k+20; + int b21 = 2*a21; + c += a21; + int a22 = k+21; + int b22 = 2*a22; + c += a22; + int a23 = k+22; + int b23 = 2*a23; + c += a23; + int a24 = k+23; + int b24 = 2*a24; + c += a24; + int a25 = k+24; + int b25 = 2*a25; + c += a25; + int a26 = k+25; + int b26 = 2*a26; + c += a26; + return + b13+ + b12+ + b11+ + b10+ + b9+ + b8+ + b7+ + b6+ + b5+ + b4+ + b3+ + b2+ + b1+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b23+ + b24+ + b25+ + b26+ + c; +} + +int main(int argc, char *argv[]) { + f(3); + return 0; +} diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c new file mode 100644 index 00000000..db88588b --- /dev/null +++ b/test/nardino/scheduling/spille_forw.c @@ -0,0 +1,166 @@ +#include <stdio.h> + +int f(int n, float * arr) { + float a1 = (float) n; + float b1 = 2.*a1; + float c = a1; + float a2 = (float) n+1; + float b2 = 2.*a2; + c += a2; + float a3 = (float) n+2; + float b3 = 2.*a3; + c += a3; + float a4 = (float) n+3; + float b4 = 2.*a4; + c += a4; + float a5 = (float) n+4; + float b5 = 2.*a5; + c += a5; + float a6 = (float) n+5; + float b6 = 2.*a6; + c += a6; + float a7 = (float) n+6; + float b7 = 2.*a7; + c += a7; + float a8 = (float) n+7; + float b8 = 2.*a8; + c += a8; + float a9 = (float) n+8; + float b9 = 2.*a9; + c += a9; + float a10 = (float) n+9; + float b10 = 2.*a10; + c += a10; + float a11 = (float) n+10; + float b11 = 2.*a11; + c += a11; + float a12 = (float) n+11; + float b12 = 2.*a12; + c += a12; + float a13 = (float) n+12; + float b13 = 2.*a13; + c += a13; + float a14 = (float) n+13; + float b14 = 2.*a14; + c += a14; + float a15 = (float) n+14; + float b15 = 2.*a15; + c += a15; + float a16 = (float) n+15; + float b16 = 2.*a16; + c += a16; + float a17 = (float) n+16; + float b17 = 2.*a17; + c += a17; + float a18 = (float) n+17; + float b18 = 2.*a18; + c += a18; + float a19 = (float) n+18; + float b19 = 2.*a19; + c += a19; + float a20 = (float) n+19; + float b20 = 2.*a20; + c += a20; + float a21 = (float) n+20; + float b21 = 2.*a21; + c += a21; + float a22 = (float) n+21; + float b22 = 2.*a22; + c += a22; + float a23 = (float) n+22; + float b23 = 2.*a23; + c += a23; + float a24 = (float) n+23; + float b24 = 2.*a24; + c += a24; + float a25 = (float) n+24; + float b25 = 2.*a25; + c += a25; + float a26 = (float) n+25; + float b26 = 2.*a26; + c += a26; + float a27 = (float) n+26; + float b27 = 2.*a27; + c += a27; + float a28 = (float) n+27; + float b28 = 2.*a28; + c += a28; + float a29 = (float) n+28; + float b29 = 2.*a29; + c += a29; + float a30 = (float) n+29; + float b30 = 2.*a30; + c += a30; + /* arr[0] = a1; */ + /* arr[1] = a2; */ + /* arr[2] = a3; */ + /* arr[3] = a4; */ + /* arr[4] = a5; */ + /* arr[5] = a6; */ + /* arr[6] = a7; */ + /* arr[7] = a8; */ + /* arr[8] = a9; */ + /* arr[9] = a10; */ + /* arr[10] = a11; */ + /* arr[11] = a12; */ + /* arr[12] = a13; */ + /* arr[13] = a14; */ + /* arr[14] = a15; */ + /* arr[15] = a16; */ + /* arr[16] = a17; */ + /* arr[17] = a18; */ + /* arr[18] = a19; */ + /* arr[19] = a20; */ + /* arr[20] = a21; */ + /* arr[21] = a22; */ + /* arr[22] = a23; */ + /* arr[23] = a24; */ + /* arr[24] = a25; */ + /* arr[25] = a26; */ + /* arr[26] = a27; */ + /* arr[27] = a28; */ + /* arr[28] = a29; */ + /* arr[29] = a30; */ + return c + + b1+ + b2+ + b3+ + b4+ + b5+ + b6+ + b7+ + b8+ + b9+ + b10+ + b11+ + b12+ + b13+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b24+ + b25+ + b26+ + b27+ + b28+ + b29+ + b30; +} + + + + + + +int main(int argc, char *argv[]) { + float arr[30]; + f(5, arr); + return 0; +} diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml index ee2674bf..3b63b80d 120000..100644 --- a/x86/ExpansionOracle.ml +++ b/x86/ExpansionOracle.ml @@ -1 +1,17 @@ -../aarch64/ExpansionOracle.ml
\ No newline at end of file +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/x86/PrepassSchedulingOracle.ml b/x86/PrepassSchedulingOracle.ml index 7b6a1b14..42a3da23 100644 --- a/x86/PrepassSchedulingOracle.ml +++ b/x86/PrepassSchedulingOracle.ml @@ -2,4 +2,5 @@ open RTL open Registers (* Do not do anything *) -let schedule_sequence (seqa : (instruction*Regset.t) array) = None +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml index 2000f96a..00e70f65 100644 --- a/x86/TargetPrinter.ml +++ b/x86/TargetPrinter.ml @@ -95,9 +95,6 @@ let z oc n = output_string oc (Z.to_string n) let data_pointer = if Archi.ptr64 then ".quad" else ".long" -(* The comment deliminiter *) -let comment = "#" - (* Base-2 log of a Caml integer *) let rec log2 n = assert (n > 0); @@ -106,6 +103,7 @@ let rec log2 n = (* System dependent printer functions *) module type SYSTEM = sig + val comment: string val raw_symbol: out_channel -> string -> unit val symbol: out_channel -> P.t -> unit val label: out_channel -> int -> unit @@ -124,6 +122,9 @@ module type SYSTEM = module ELF_System : SYSTEM = struct + (* The comment delimiter *) + let comment = "#" + let raw_symbol oc s = fprintf oc "%s" s @@ -219,6 +220,10 @@ module ELF_System : SYSTEM = module MacOS_System : SYSTEM = struct + (* The comment delimiter. + `##` instead of `#` to please the Clang assembler. *) + let comment = "##" + let raw_symbol oc s = fprintf oc "_%s" s @@ -280,6 +285,9 @@ module MacOS_System : SYSTEM = module Cygwin_System : SYSTEM = struct + (* The comment delimiter *) + let comment = "#" + let symbol_prefix = if Archi.ptr64 then "" else "_" |