aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr>2021-07-24 11:42:09 +0200
committerSylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr>2021-07-24 11:42:09 +0200
commita4570fed198034e535d0d6d99e23cfbb1d40b926 (patch)
treee4e5a9dc845fc0972622ae05fd9084234ed9a44d
parent95f918c38b1e59f40ae7af455ec2c6746289375e (diff)
parentb5c4192c73d7b02e0c90354e26b35a84ee141083 (diff)
downloadcompcert-kvx-a4570fed198034e535d0d6d99e23cfbb1d40b926.tar.gz
compcert-kvx-a4570fed198034e535d0d6d99e23cfbb1d40b926.zip
Merge branch 'kvx-work' into rtl-tunneling
-rw-r--r--.gitlab-ci.yml78
-rw-r--r--aarch64/Machregsaux.ml3
-rw-r--r--aarch64/Machregsaux.mli3
-rw-r--r--aarch64/PostpassSchedulingOracle.ml3
-rw-r--r--aarch64/PrepassSchedulingOracle.ml32
-rw-r--r--[l---------]arm/ExpansionOracle.ml18
-rw-r--r--[l---------]arm/PrepassSchedulingOracle.ml7
-rw-r--r--backend/CSE2proof.v4
-rw-r--r--backend/CSE3analysisproof.v2
-rw-r--r--backend/CSEdomain.v4
-rw-r--r--backend/CSEproof.v5
-rw-r--r--backend/Deadcodeproof.v2
-rw-r--r--backend/LTL.v4
-rw-r--r--backend/Linear.v4
-rw-r--r--backend/Lineartyping.v2
-rw-r--r--backend/Mach.v4
-rw-r--r--backend/RTL.v4
-rw-r--r--backend/RTLtyping.v2
-rw-r--r--backend/Tailcallproof.v4
-rw-r--r--backend/ValueAnalysis.v2
-rw-r--r--common/DebugPrint.ml4
-rw-r--r--common/Memory.v2
-rwxr-xr-xconfig_macos_x86_64.sh1
-rwxr-xr-xconfig_simple.sh2
-rw-r--r--driver/Clflags.ml4
-rw-r--r--driver/Driver.ml14
-rwxr-xr-xfilter_peeplog.fish39
-rw-r--r--kvx/Asmblockdeps.v10
-rw-r--r--kvx/Asmblockgenproof1.v8
-rw-r--r--kvx/Asmblockprops.v4
-rw-r--r--kvx/Asmvliw.v18
-rw-r--r--[l---------]kvx/ExpansionOracle.ml18
-rw-r--r--kvx/Machregsaux.ml2
-rw-r--r--kvx/Machregsaux.mli3
-rw-r--r--kvx/PostpassSchedulingOracle.ml10
-rw-r--r--[l---------]kvx/PrepassSchedulingOracle.ml486
-rw-r--r--[l---------]kvx/PrepassSchedulingOracleDeps.ml18
-rw-r--r--lib/Integers.v3
-rw-r--r--[l---------]powerpc/ExpansionOracle.ml18
-rw-r--r--[l---------]powerpc/PrepassSchedulingOracle.ml7
-rw-r--r--riscV/Asmgenproof1.v16
-rw-r--r--riscV/Machregsaux.ml2
-rw-r--r--riscV/Machregsaux.mli3
-rw-r--r--riscV/OpWeights.ml3
-rw-r--r--[l---------]riscV/PrepassSchedulingOracle.ml486
-rw-r--r--[l---------]riscV/PrepassSchedulingOracleDeps.ml18
-rw-r--r--runtime/Makefile2
-rw-r--r--scheduling/InstructionScheduler.ml488
-rw-r--r--scheduling/InstructionScheduler.mli16
-rw-r--r--scheduling/RTLpath.v2
-rw-r--r--scheduling/RTLpathSE_theory.v4
-rw-r--r--scheduling/RTLpathScheduleraux.ml187
-rw-r--r--scheduling/postpass_lib/Machblock.v4
-rwxr-xr-xtest/gourdinl/compare_pp.sh16
-rw-r--r--test/gourdinl/postpass_exp.c5
-rw-r--r--test/monniaux/cycles.h2
-rw-r--r--test/monniaux/division/harness.c82
-rw-r--r--test/monniaux/division/my_udiv32.s36
-rw-r--r--test/nardino/scheduling/entry_regs.c19
-rw-r--r--test/nardino/scheduling/spille_backw.c114
-rw-r--r--test/nardino/scheduling/spille_forw.c166
-rw-r--r--[l---------]x86/ExpansionOracle.ml18
-rw-r--r--x86/PrepassSchedulingOracle.ml3
-rw-r--r--x86/TargetPrinter.ml14
64 files changed, 2394 insertions, 170 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a7811ae3..b3bb418f 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -14,7 +14,9 @@ check-admitted:
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
@@ -35,7 +37,9 @@ build_x86_64:
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
@@ -58,7 +62,9 @@ build_ia32:
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
@@ -76,14 +82,14 @@ build_aarch64:
script:
- ./config_aarch64.sh
- make -j "$NJOBS"
- - export LD_LIBRARY_PATH=/usr/aarch64-linux-gnu/lib
- - sudo ln -s /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 /lib
- - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64' EXECUTE='qemu-aarch64' all test
- - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64' CCOMPOPTS='-static' TARGET_CFLAGS='-static'
+ - make -C test SIMU='qemu-aarch64 -L /usr/aarch64-linux-gnu' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static'
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
@@ -101,14 +107,14 @@ build_arm:
script:
- ./config_arm.sh
- make -j "$NJOBS"
- - export LD_LIBRARY_PATH=/usr/arm-linux-gnueabi/lib
- - sudo ln -s /usr/arm-linux-gnueabi/lib/ld-linux.so.3 /lib # FIXME: UGLY !
- - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test
- - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32
+ - make -C test SIMU='qemu-arm -L /usr/arm-linux-gnueabi' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
@@ -127,14 +133,14 @@ build_armhf:
script:
- ./config_armhf.sh
- make -j "$NJOBS"
- - export LD_LIBRARY_PATH=/usr/arm-linux-gnueabihf/lib
- - sudo ln -s /usr/arm-linux-gnueabihf/lib/ld-linux-armhf.so.3 /lib # FIXME: UGLY !
- - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test
- - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32
+ - make -C test SIMU='qemu-arm -L /usr/arm-linux-gnueabihf' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
@@ -145,17 +151,25 @@ build_ppc:
image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda
before_script:
- sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
- - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user
+ - sudo apt-get -y install gcc-powerpc-linux-gnu wget ninja-build libglib2.0-dev libpixman-1-dev
+ - wget --no-verbose https://download.qemu.org/qemu-6.0.0.tar.xz
+ - tar xJf qemu-6.0.0.tar.xz
+ - (cd qemu-6.0.0 && ./configure --target-list=ppc-linux-user && make && sudo make install)
- eval `opam config env`
- opam update
- opam install -y menhir
script:
- ./config_ppc.sh
- make -j "$NJOBS"
+ - qemu-ppc --version
+ - make -C test SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' BITS=32
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
@@ -173,10 +187,14 @@ build_ppc64:
script:
- ./config_ppc64.sh
- make -j "$NJOBS"
+ #- make -C test SIMU='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test
+ #- ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' BITS=32
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
@@ -194,14 +212,14 @@ build_rv64:
script:
- ./config_rv64.sh
- make -j "$NJOBS"
- - export LD_LIBRARY_PATH=/usr/riscv64-linux-gnu/lib
- - sudo ln -s /usr/riscv64-linux-gnu/lib/ld-linux-riscv64-lp64d.so.1 /lib
- - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64' EXECUTE='qemu-riscv64' all test
- - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64' CCOMPOPTS='-static' TARGET_CFLAGS='-static'
+ - make -C test SIMU='qemu-riscv64 -L /usr/riscv64-linux-gnu' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' all test
+ - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static'
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
@@ -222,7 +240,9 @@ build_rv32:
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
@@ -249,7 +269,9 @@ build_kvx:
rules:
- if: '$CI_COMMIT_BRANCH == "kvx-work"'
when: always
- - if: '$CI_COMMIT_BRANCH == "mppa-kvx"'
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"'
+ when: always
+ - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"'
when: always
- if: '$CI_COMMIT_BRANCH == "master"'
when: always
diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml
index 41db3bd4..98e461eb 100644
--- a/aarch64/Machregsaux.ml
+++ b/aarch64/Machregsaux.ml
@@ -19,3 +19,6 @@ let class_of_type = function
| AST.Tint | AST.Tlong -> 0
| AST.Tfloat | AST.Tsingle -> 1
| AST.Tany32 | AST.Tany64 -> assert false
+
+(* number of available registers per class *)
+let nr_regs = [| 29; 32 |]
diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli
index 01b0f9fd..23ac1c9a 100644
--- a/aarch64/Machregsaux.mli
+++ b/aarch64/Machregsaux.mli
@@ -15,3 +15,6 @@
val is_scratch_register: string -> bool
val class_of_type: AST.typ -> int
+
+(* Number of registers in each class *)
+val nr_regs : int array
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index cde3e7a7..6f784238 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -507,6 +507,9 @@ let build_problem bb =
{
max_latency = -1;
resource_bounds = opweights.pipelined_resource_bounds;
+ live_regs_entry = Registers.Regset.empty; (* unused here *)
+ typing = (fun x -> AST.Tint); (* unused here *)
+ reference_counting = None;
instruction_usages = instruction_usages bb;
latency_constraints = latency_constraints bb;
}
diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml
index 2c3eb14f..03a9e202 100644
--- a/aarch64/PrepassSchedulingOracle.ml
+++ b/aarch64/PrepassSchedulingOracle.ml
@@ -1,3 +1,16 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* Nicolas Nardino ENS-Lyon, VERIMAG *)
+(* *)
+(* *)
+(* *************************************************************)
+
open AST
open RTL
open Maps
@@ -406,11 +419,15 @@ let get_alias_dependencies seqa =
!deps;;
*)
-let define_problem (opweights : opweights) seqa =
+let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
+ (typing : RTLtyping.regenv) reference_counting seqa =
let simple_deps = get_simple_dependencies opweights seqa in
{ max_latency = -1;
resource_bounds = opweights.pipelined_resource_bounds;
- instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+ live_regs_entry = live_entry_regs;
+ typing = typing;
+ reference_counting = Some reference_counting;
+ instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
latency_constraints =
(* if (use_alias_analysis ())
then (get_alias_dependencies seqa) @ simple_deps
@@ -439,7 +456,10 @@ let prepass_scheduler_by_name name problem early_ones =
| "zigzag" -> zigzag_scheduler problem early_ones
| _ -> scheduler_by_name name problem
-let schedule_sequence (seqa : (instruction*Regset.t) array) =
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+ (live_regs_entry : Registers.Regset.t)
+ (typing : RTLtyping.regenv)
+ reference =
let opweights = OpWeights.get_opweights () in
try
if (Array.length seqa) <= 1
@@ -449,7 +469,8 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) =
let nr_instructions = Array.length seqa in
(if !Clflags.option_debug_compcert > 6
then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
- let problem = define_problem opweights seqa in
+ let problem = define_problem opweights live_regs_entry
+ typing reference seqa in
(if !Clflags.option_debug_compcert > 7
then (print_sequence stdout (Array.map fst seqa);
print_problem stdout problem));
@@ -461,7 +482,8 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) =
| Icond _ -> true
| _ -> false) seqa) with
| None -> Printf.printf "no solution in prepass scheduling\n";
- None
+ Stdlib.exit 1
+ (* TODO None *)
| Some solution ->
let positions = Array.init nr_instructions (fun i -> i) in
Array.sort (fun i j ->
diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml
index ee2674bf..3b63b80d 120000..100644
--- a/arm/ExpansionOracle.ml
+++ b/arm/ExpansionOracle.ml
@@ -1 +1,17 @@
-../aarch64/ExpansionOracle.ml \ No newline at end of file
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open RTLpathCommon
+
+let expanse (sb : superblock) code pm = (code, pm)
+
+let find_last_node_reg c = ()
diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml
index 9885fd52..42a3da23 120000..100644
--- a/arm/PrepassSchedulingOracle.ml
+++ b/arm/PrepassSchedulingOracle.ml
@@ -1 +1,6 @@
-../x86/PrepassSchedulingOracle.ml \ No newline at end of file
+open RTL
+open Registers
+
+(* Do not do anything *)
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+ live_regs_entry typing reference = None
diff --git a/backend/CSE2proof.v b/backend/CSE2proof.v
index 49dbd409..252240c9 100644
--- a/backend/CSE2proof.v
+++ b/backend/CSE2proof.v
@@ -1399,7 +1399,7 @@ Proof.
2: discriminate.
econstructor; split.
{
- eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto.
+ eapply exec_Iop with (v := Vundef); eauto.
simpl.
rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
{
@@ -1472,7 +1472,7 @@ Proof.
2: discriminate.
econstructor; split.
{
- eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto.
+ eapply exec_Iop with (v := Vundef); eauto.
simpl.
rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0.
{
diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v
index d53cf604..382c9f4c 100644
--- a/backend/CSE3analysisproof.v
+++ b/backend/CSE3analysisproof.v
@@ -502,7 +502,7 @@ Section SOUNDNESS.
end
with
| Some dat => v' = dat
- | None => v' = default_notrap_load_value chunk
+ | None => v' = Vundef
end
end.
diff --git a/backend/CSEdomain.v b/backend/CSEdomain.v
index f78e1d25..9641d012 100644
--- a/backend/CSEdomain.v
+++ b/backend/CSEdomain.v
@@ -113,12 +113,12 @@ Inductive rhs_eval_to (valu: valuation) (ge: genv) (sp: val) (m: mem):
(* | load_notrap1_eval_to: forall chunk addr vl,
eval_addressing ge sp addr (map valu vl) = None ->
rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl)
- (default_notrap_load_value chunk)
+ Vundef
| load_notrap2_eval_to: forall chunk addr vl a,
eval_addressing ge sp addr (map valu vl) = Some a ->
Mem.loadv chunk m a = None ->
rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl)
- (default_notrap_load_value chunk) *).
+ Vundef *).
Inductive equation_holds (valu: valuation) (ge: genv) (sp: val) (m: mem):
equation -> Prop :=
diff --git a/backend/CSEproof.v b/backend/CSEproof.v
index cf51f5a2..556b44b3 100644
--- a/backend/CSEproof.v
+++ b/backend/CSEproof.v
@@ -402,7 +402,7 @@ Lemma add_load_holds_none1:
forall valu1 ge sp rs m n addr (args: list reg) chunk dst,
numbering_holds valu1 ge sp rs m n ->
eval_addressing ge sp addr rs##args = None ->
- exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst chunk addr args).
+ exists valu2, numbering_holds valu2 ge sp (rs#dst <- Vundef) m (add_load n dst chunk addr args).
Proof.
unfold add_load; intros.
destruct (valnum_regs n args) as [n1 vl] eqn:VN.
@@ -418,7 +418,7 @@ Lemma add_load_holds_none2:
numbering_holds valu1 ge sp rs m n ->
eval_addressing ge sp addr rs##args = Some a ->
Mem.loadv chunk m a = None ->
- exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst NOTRAP chunk addr args).
+ exists valu2, numbering_holds valu2 ge sp (rs#dst <- Vundef) m (add_load n dst NOTRAP chunk addr args).
Proof.
unfold add_load; intros.
destruct (valnum_regs n args) as [n1 vl] eqn:VN.
@@ -1210,7 +1210,6 @@ Proof.
exists valu1.
apply set_unknown_holds.
assumption.
- unfold default_notrap_load_value.
apply set_reg_lessdef; eauto.
}
{
diff --git a/backend/Deadcodeproof.v b/backend/Deadcodeproof.v
index b51d6cce..be20af0b 100644
--- a/backend/Deadcodeproof.v
+++ b/backend/Deadcodeproof.v
@@ -845,7 +845,6 @@ Ltac UseTransfer :=
eapply match_succ_states; eauto. simpl; auto.
apply eagree_update; auto.
rewrite is_int_zero_sound by auto.
- unfold default_notrap_load_value.
constructor.
+ (* preserved *)
exploit eval_addressing_lessdef_none. eapply add_needs_all_lessdef; eauto. eassumption.
@@ -878,7 +877,6 @@ Ltac UseTransfer :=
eapply match_succ_states; eauto. simpl; auto.
apply eagree_update; auto.
rewrite is_int_zero_sound by auto.
- unfold default_notrap_load_value.
constructor.
+ (* preserved *)
exploit eval_addressing_lessdef. eapply add_needs_all_lessdef; eauto. eauto.
diff --git a/backend/LTL.v b/backend/LTL.v
index 3edd60a2..a382ef0e 100644
--- a/backend/LTL.v
+++ b/backend/LTL.v
@@ -217,14 +217,14 @@ Inductive step: state -> trace -> state -> Prop :=
E0 (Block s f sp bb rs' m)
| exec_Lload_notrap1: forall s f sp chunk addr args dst bb rs m rs',
eval_addressing ge sp addr (reglist rs args) = None ->
- rs' = Locmap.set (R dst) (default_notrap_load_value chunk)
+ rs' = Locmap.set (R dst) Vundef
(undef_regs (destroyed_by_load chunk addr) rs) ->
step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m)
E0 (Block s f sp bb rs' m)
| exec_Lload_notrap2: forall s f sp chunk addr args dst bb rs m a rs',
eval_addressing ge sp addr (reglist rs args) = Some a ->
Mem.loadv chunk m a = None ->
- rs' = Locmap.set (R dst) (default_notrap_load_value chunk)
+ rs' = Locmap.set (R dst) Vundef
(undef_regs (destroyed_by_load chunk addr) rs) ->
step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m)
E0 (Block s f sp bb rs' m)
diff --git a/backend/Linear.v b/backend/Linear.v
index 1443f795..cb11f7dc 100644
--- a/backend/Linear.v
+++ b/backend/Linear.v
@@ -170,7 +170,7 @@ Inductive step: state -> trace -> state -> Prop :=
forall s f sp chunk addr args dst b rs m rs',
eval_addressing ge sp addr (reglist rs args) = None ->
rs' = Locmap.set (R dst)
- (default_notrap_load_value chunk)
+ Vundef
(undef_regs (destroyed_by_load chunk addr) rs) ->
step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m)
E0 (State s f sp b rs' m)
@@ -179,7 +179,7 @@ Inductive step: state -> trace -> state -> Prop :=
eval_addressing ge sp addr (reglist rs args) = Some a ->
Mem.loadv chunk m a = None ->
rs' = Locmap.set (R dst)
- (default_notrap_load_value chunk)
+ Vundef
(undef_regs (destroyed_by_load chunk addr) rs) ->
step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m)
E0 (State s f sp b rs' m)
diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v
index 22658fb7..cf903aad 100644
--- a/backend/Lineartyping.v
+++ b/backend/Lineartyping.v
@@ -338,14 +338,12 @@ Local Opaque mreg_type.
simpl in *; InvBooleans.
econstructor; eauto.
apply wt_setreg. eapply Val.has_subtype; eauto.
- unfold default_notrap_load_value.
constructor.
apply wt_undef_regs; auto.
- (* load notrap2 *)
simpl in *; InvBooleans.
econstructor; eauto.
apply wt_setreg. eapply Val.has_subtype; eauto.
- unfold default_notrap_load_value.
constructor.
apply wt_undef_regs; auto.
- (* store *)
diff --git a/backend/Mach.v b/backend/Mach.v
index 1c6fdb18..2cfd738d 100644
--- a/backend/Mach.v
+++ b/backend/Mach.v
@@ -330,14 +330,14 @@ Inductive step: state -> trace -> state -> Prop :=
| exec_Mload_notrap1:
forall s f sp chunk addr args dst c rs m rs',
eval_addressing ge sp addr rs##args = None ->
- rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+ rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) ->
step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m)
E0 (State s f sp c rs' m)
| exec_Mload_notrap2:
forall s f sp chunk addr args dst c rs m a rs',
eval_addressing ge sp addr rs##args = Some a ->
Mem.loadv chunk m a = None ->
- rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+ rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) ->
step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m)
E0 (State s f sp c rs' m)
| exec_Mstore:
diff --git a/backend/RTL.v b/backend/RTL.v
index 31b5cf99..fe350adf 100644
--- a/backend/RTL.v
+++ b/backend/RTL.v
@@ -225,14 +225,14 @@ Inductive step: state -> trace -> state -> Prop :=
(fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') ->
eval_addressing ge sp addr rs##args = None ->
step (State s f sp pc rs m)
- E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m)
+ E0 (State s f sp pc' (rs#dst <- Vundef) m)
| exec_Iload_notrap2:
forall s f sp pc rs m chunk addr args dst pc' a,
(fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') ->
eval_addressing ge sp addr rs##args = Some a ->
Mem.loadv chunk m a = None->
step (State s f sp pc rs m)
- E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m)
+ E0 (State s f sp pc' (rs#dst <- Vundef) m)
| exec_Istore:
forall s f sp pc rs m chunk addr args src pc' a m',
(fn_code f)!pc = Some(Istore chunk addr args src pc') ->
diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v
index 15ed6d8a..6048f895 100644
--- a/backend/RTLtyping.v
+++ b/backend/RTLtyping.v
@@ -858,7 +858,7 @@ Lemma wt_exec_Iload_notrap:
forall env f chunk addr args dst s rs,
wt_instr f env (Iload NOTRAP chunk addr args dst s) ->
wt_regset env rs ->
- wt_regset env (rs#dst <- (default_notrap_load_value chunk)).
+ wt_regset env (rs#dst <- Vundef).
Proof.
intros.
eapply wt_regset_assign; eauto. simpl. trivial.
diff --git a/backend/Tailcallproof.v b/backend/Tailcallproof.v
index 80a68327..39fc10fb 100644
--- a/backend/Tailcallproof.v
+++ b/backend/Tailcallproof.v
@@ -440,7 +440,7 @@ Proof.
TransfInstr.
assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto.
left.
- exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split.
+ exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- Vundef) m'); split.
eapply exec_Iload_notrap1.
eassumption.
eapply eval_addressing_lessdef_none. eassumption.
@@ -465,7 +465,7 @@ Proof.
exact symbols_preserved.
assumption.
econstructor; eauto. apply set_reg_lessdef; auto.
- + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split.
+ + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- Vundef) m'); split.
eapply exec_Iload_notrap2. eassumption.
erewrite eval_addressing_preserved.
eassumption.
diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v
index 561e94c9..e20edff7 100644
--- a/backend/ValueAnalysis.v
+++ b/backend/ValueAnalysis.v
@@ -1287,13 +1287,11 @@ Proof.
eapply sound_succ_state; eauto. simpl; auto.
unfold transfer; rewrite H. eauto.
apply ematch_update; auto.
- unfold default_notrap_load_value.
constructor.
- (* load notrap2 *)
eapply sound_succ_state; eauto. simpl; auto.
unfold transfer; rewrite H. eauto.
apply ematch_update; auto.
- unfold default_notrap_load_value.
constructor.
- (* store *)
exploit eval_static_addressing_sound; eauto with va. intros VMADDR.
diff --git a/common/DebugPrint.ml b/common/DebugPrint.ml
index 6f8449ee..275e6a71 100644
--- a/common/DebugPrint.ml
+++ b/common/DebugPrint.ml
@@ -132,10 +132,10 @@ let print_instructions insts code =
| None -> failwith "Did not get some"
| Some thing -> thing
in if (!debug_flag) then begin
- debug "[ ";
+ debug "[\n";
List.iter (
fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code))
- ) insts; debug "]"
+ ) insts; debug " ]"
end
let print_arrayp arr = begin
diff --git a/common/Memory.v b/common/Memory.v
index bf8ca083..ff17efb0 100644
--- a/common/Memory.v
+++ b/common/Memory.v
@@ -41,8 +41,6 @@ Require Export Memdata.
Require Export Memtype.
Require Import Lia.
-Definition default_notrap_load_value (chunk : memory_chunk) := Vundef.
-
(* To avoid useless definitions of inductors in extracted code. *)
Local Unset Elimination Schemes.
diff --git a/config_macos_x86_64.sh b/config_macos_x86_64.sh
new file mode 100755
index 00000000..9d5b3f5e
--- /dev/null
+++ b/config_macos_x86_64.sh
@@ -0,0 +1 @@
+exec ./config_simple.sh x86_64-macos "$@"
diff --git a/config_simple.sh b/config_simple.sh
index e2d3844c..52b7d1a6 100755
--- a/config_simple.sh
+++ b/config_simple.sh
@@ -2,7 +2,7 @@ arch=$1
shift
version=`git rev-parse --short HEAD`
branch=`git rev-parse --abbrev-ref HEAD`
-date=`date -I`
+date=`date +%Y-%m-%d`
if test "x$CCOMP_INSTALL_PREFIX" = "x" ;
then CCOMP_INSTALL_PREFIX=/opt/CompCert ;
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index fa17c2d9..25bd2c78 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi
let option_mtune = ref ""
let option_fprepass = ref true
-let option_fprepass_sched = ref "list"
+let option_fprepass_sched = ref "regpres"
let option_fpostpass = ref true
let option_fpostpass_sched = ref "list"
@@ -115,4 +115,6 @@ let option_inline_auto_threshold = ref 0
let option_profile_arcs = ref false
let option_fbranch_probabilities = ref true
let option_debug_compcert = ref 0
+let option_regpres_threshold = ref 2
+let option_regpres_wait_window = ref false
let main_function_name = ref "main"
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 7192ba4b..3f5a4bd9 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -210,7 +210,9 @@ Processing options:
-mtune= Type of CPU (for scheduling on some architectures)
-fprepass Perform prepass scheduling (only on some architectures) [on]
-fprepass= <optim> Perform postpass scheduling with the specified optimization [list]
- (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+ (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=regpres_bis: variant of regpres, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+ -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure
+ -fregpres-wait-window When register pressure is high, use a 5-cycle waiting window instead of scheduling short paths first (default no)
-fpostpass Perform postpass scheduling (only for K1 architecture) [on]
-fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
(<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
@@ -296,9 +298,9 @@ let num_input_files = ref 0
let cmdline_actions =
let f_opt name ref =
[Exact("-f" ^ name), Set ref; Exact("-fno-" ^ name), Unset ref] in
- let f_opt_str name ref strref =
+ let f_opt_str name default ref strref =
[Exact("-f" ^ name ^ "="), String
- (fun s -> (strref := (if s == "" then "list" else s)); ref := true)
+ (fun s -> (strref := (if s == "" then default else s)); ref := true)
] in
let f_str name strref default =
[Exact("-f" ^ name ^ "="), String
@@ -342,6 +344,7 @@ let cmdline_actions =
Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s);
Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n);
Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n);
+ Exact "-regpres-threshold", Integer (fun n -> option_regpres_threshold := n);
Exact "-fsmall-data", Integer(fun n -> option_small_data := n);
Exact "-fsmall-const", Integer(fun n -> option_small_const := n);
Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n);
@@ -424,6 +427,7 @@ let cmdline_actions =
@ f_opt "redundancy" option_fredundancy
@ [ Exact "-mtune", String (fun s -> option_mtune := s) ]
@ f_opt "prepass" option_fprepass
+ @ f_opt "regpres-wait-window" option_regpres_wait_window
@ f_opt "postpass" option_fpostpass
@ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ]
@ f_opt "predict" option_fpredict
@@ -431,8 +435,8 @@ let cmdline_actions =
@ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ]
@ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ]
@ f_opt "tracelinearize" option_ftracelinearize
- @ f_opt_str "prepass" option_fprepass option_fprepass_sched
- @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
+ @ f_opt_str "prepass" "regpress" option_fprepass option_fprepass_sched
+ @ f_opt_str "postpass" "list" option_fpostpass option_fpostpass_sched
@ f_opt "inline" option_finline
@ f_opt "inline-functions-called-once" option_finline_functions_called_once
@ f_opt "globaladdrtmp" option_fglobaladdrtmp
diff --git a/filter_peeplog.fish b/filter_peeplog.fish
deleted file mode 100755
index 72a0eaf1..00000000
--- a/filter_peeplog.fish
+++ /dev/null
@@ -1,39 +0,0 @@
-echo "LDP_CONSEC_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr32" | wc -l)
-echo "LDP_CONSEC_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr64" | wc -l)
-echo "LDP_CONSEC_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr32" | wc -l)
-echo "LDP_CONSEC_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr64" | wc -l)
-echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32" | wc -l)
-echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64" | wc -l)
-echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32" | wc -l)
-echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64" | wc -l)
-echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32" | wc -l)
-echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64" | wc -l)
-echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32" | wc -l)
-echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64" | wc -l)
-echo "\n"
-echo "LDP_CONSEC_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr32f" | wc -l)
-echo "LDP_CONSEC_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr64f" | wc -l)
-echo "LDP_CONSEC_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr32f" | wc -l)
-echo "LDP_CONSEC_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr64f" | wc -l)
-echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32f" | wc -l)
-echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64f" | wc -l)
-echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32f" | wc -l)
-echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64f" | wc -l)
-echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32f" | wc -l)
-echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64f" | wc -l)
-echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32f" | wc -l)
-echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64f" | wc -l)
-echo "\n"
-echo "STP_CONSEC_PEEP_IMM_INC_str32" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str32" | wc -l)
-echo "STP_CONSEC_PEEP_IMM_INC_str64" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str64" | wc -l)
-echo "STP_FORW_SPACED_PEEP_IMM_INC_str32" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str32" | wc -l)
-echo "STP_FORW_SPACED_PEEP_IMM_INC_str64" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str64" | wc -l)
-echo "STP_BACK_SPACED_PEEP_IMM_INC_str32" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str32" | wc -l)
-echo "STP_BACK_SPACED_PEEP_IMM_INC_str64" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str64" | wc -l)
-echo "\n"
-echo "STP_CONSEC_PEEP_IMM_INC_str32f" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str32f" | wc -l)
-echo "STP_CONSEC_PEEP_IMM_INC_str64f" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str64f" | wc -l)
-echo "STP_FORW_SPACED_PEEP_IMM_INC_str32f" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str32f" | wc -l)
-echo "STP_FORW_SPACED_PEEP_IMM_INC_str64f" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str64f" | wc -l)
-echo "STP_BACK_SPACED_PEEP_IMM_INC_str32f" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str32f" | wc -l)
-echo "STP_BACK_SPACED_PEEP_IMM_INC_str64f" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str64f" | wc -l)
diff --git a/kvx/Asmblockdeps.v b/kvx/Asmblockdeps.v
index b6d18c3e..a9786e0a 100644
--- a/kvx/Asmblockdeps.v
+++ b/kvx/Asmblockdeps.v
@@ -164,17 +164,17 @@ Definition arith_eval (ao: arith_op) (l: list value) :=
| _, _ => None
end.
-Definition exec_incorrect_load trap chunk :=
+Definition exec_incorrect_load trap :=
match trap with
| TRAP => None
- | NOTRAP => Some (Val (concrete_default_notrap_load_value chunk))
+ | NOTRAP => Some (Val Vundef)
end.
Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v: val) (ofs: offset) :=
let (ge, fn) := Ge in
match (eval_offset ofs) with
| OK ptr => match Mem.loadv chunk m (Val.offset_ptr v ptr) with
- | None => exec_incorrect_load trap chunk
+ | None => exec_incorrect_load trap
| Some vl => Some (Val vl)
end
| _ => None
@@ -182,13 +182,13 @@ Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m:
Definition exec_load_deps_reg (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) :=
match Mem.loadv chunk m (Val.addl v vo) with
- | None => exec_incorrect_load trap chunk
+ | None => exec_incorrect_load trap
| Some vl => Some (Val vl)
end.
Definition exec_load_deps_regxs (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) :=
match Mem.loadv chunk m (Val.addl v (Val.shll vo (scale_of_chunk chunk))) with
- | None => exec_incorrect_load trap chunk
+ | None => exec_incorrect_load trap
| Some vl => Some (Val vl)
end.
diff --git a/kvx/Asmblockgenproof1.v b/kvx/Asmblockgenproof1.v
index a65bd5bc..259c4f9c 100644
--- a/kvx/Asmblockgenproof1.v
+++ b/kvx/Asmblockgenproof1.v
@@ -1914,7 +1914,7 @@ Lemma transl_load_access2_correct_notrap2:
Mem.loadv chunk m v = None ->
exists rs',
exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
- /\ rs'#rd = concrete_default_notrap_load_value chunk
+ /\ rs'#rd = Vundef
/\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
Proof.
intros until ro; intros ARGS IREGE INSTR TR EV LOAD.
@@ -1963,7 +1963,7 @@ Lemma transl_load_access2XS_correct_notrap2:
Mem.loadv chunk m v = None ->
exists rs',
exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
- /\ rs'#rd = concrete_default_notrap_load_value chunk
+ /\ rs'#rd = Vundef
/\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
Proof.
intros until ro; intros ARGS IREGE INSTR TR EV LOAD.
@@ -2008,7 +2008,7 @@ Lemma transl_load_access_correct_notrap2:
Mem.loadv chunk m v = None ->
exists rs',
exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
- /\ rs'#rd = concrete_default_notrap_load_value chunk
+ /\ rs'#rd = Vundef
/\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r.
Proof.
intros until v; intros INSTR TR EV LOAD.
@@ -2185,7 +2185,7 @@ Lemma transl_load_correct_notrap2:
Mem.loadv chunk m a = None ->
exists rs',
exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m
- /\ rs'#(preg_of dst) = (concrete_default_notrap_load_value chunk)
+ /\ rs'#(preg_of dst) = Vundef
/\ forall r, r <> PC -> r <> RTMP -> r <> preg_of dst -> rs'#r = rs#r.
Proof.
intros until a; intros TR EV LOAD. destruct addr.
diff --git a/kvx/Asmblockprops.v b/kvx/Asmblockprops.v
index c3929be5..a732d29b 100644
--- a/kvx/Asmblockprops.v
+++ b/kvx/Asmblockprops.v
@@ -96,9 +96,9 @@ Theorem exec_basic_instr_pc:
Proof.
intros. destruct b; try destruct i; try destruct i.
all: try (inv H; Simpl).
- 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail.
+ 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail.
- 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail.
+ 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail.
{ (* PLoadQRRO *)
unfold parexec_load_q_offset in H1.
diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v
index aa2e0885..45b230e6 100644
--- a/kvx/Asmvliw.v
+++ b/kvx/Asmvliw.v
@@ -313,7 +313,11 @@ Inductive cf_instruction : Type :=
.
(** *** Loads *)
-Definition concrete_default_notrap_load_value (chunk : memory_chunk) :=
+
+(* What follows was the original spec, but is subtly incorrect.
+ Our definition of the assembly-level memory model is already an abstraction of the real world.
+ In particular, we consider that a load is incorrect when it points outside of CompCert's visible memory, whereas this memory could be correct at the assembly level.
+ This means that CompCert would believe an incorrect load would yield 0 whereas it would yield another value.
match chunk with
| Mint8signed | Mint8unsigned | Mint16signed | Mint16unsigned
| Mint32 => Vint Int.zero
@@ -321,7 +325,7 @@ Definition concrete_default_notrap_load_value (chunk : memory_chunk) :=
| Many32 | Many64 => Vundef
| Mfloat32 => Vsingle Float32.zero
| Mfloat64 => Vfloat Float.zero
- end.
+ end. *)
Inductive load_name : Type :=
| Plb (**r load byte *)
@@ -1169,16 +1173,16 @@ Definition eval_offset (ofs: offset) : res ptrofs := OK ofs.
(** *** load/store instructions *)
-Definition parexec_incorrect_load trap chunk d rsw mw :=
+Definition parexec_incorrect_load trap d rsw mw :=
match trap with
| TRAP => Stuck
- | NOTRAP => Next (rsw#d <- (concrete_default_notrap_load_value chunk)) mw
+ | NOTRAP => Next (rsw#d <- Vundef) mw
end.
Definition parexec_load_offset (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a: ireg) (ofs: offset) :=
match (eval_offset ofs) with
| OK ptr => match Mem.loadv chunk mr (Val.offset_ptr (rsr a) ptr) with
- | None => parexec_incorrect_load trap chunk d rsw mw
+ | None => parexec_incorrect_load trap d rsw mw
| Some v => Next (rsw#d <- v) mw
end
| _ => Stuck
@@ -1225,13 +1229,13 @@ Definition parexec_load_o_offset (rsr rsw: regset) (mr mw: mem) (d : gpreg_o) (a
Definition parexec_load_reg (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) :=
match Mem.loadv chunk mr (Val.addl (rsr a) (rsr ro)) with
- | None => parexec_incorrect_load trap chunk d rsw mw
+ | None => parexec_incorrect_load trap d rsw mw
| Some v => Next (rsw#d <- v) mw
end.
Definition parexec_load_regxs (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) :=
match Mem.loadv chunk mr (Val.addl (rsr a) (Val.shll (rsr ro) (scale_of_chunk chunk))) with
- | None => parexec_incorrect_load trap chunk d rsw mw
+ | None => parexec_incorrect_load trap d rsw mw
| Some v => Next (rsw#d <- v) mw
end.
diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml
index ee2674bf..3b63b80d 120000..100644
--- a/kvx/ExpansionOracle.ml
+++ b/kvx/ExpansionOracle.ml
@@ -1 +1,17 @@
-../aarch64/ExpansionOracle.ml \ No newline at end of file
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open RTLpathCommon
+
+let expanse (sb : superblock) code pm = (code, pm)
+
+let find_last_node_reg c = ()
diff --git a/kvx/Machregsaux.ml b/kvx/Machregsaux.ml
index e3b18181..dbb89727 100644
--- a/kvx/Machregsaux.ml
+++ b/kvx/Machregsaux.ml
@@ -31,3 +31,5 @@ let class_of_type = function
| AST.Tint | AST.Tlong
| AST.Tfloat | AST.Tsingle -> 0
| AST.Tany32 | AST.Tany64 -> assert false
+
+let nr_regs = [| 59 |]
diff --git a/kvx/Machregsaux.mli b/kvx/Machregsaux.mli
index 01b0f9fd..23ac1c9a 100644
--- a/kvx/Machregsaux.mli
+++ b/kvx/Machregsaux.mli
@@ -15,3 +15,6 @@
val is_scratch_register: string -> bool
val class_of_type: AST.typ -> int
+
+(* Number of registers in each class *)
+val nr_regs : int array
diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml
index 2107ce22..5ebad421 100644
--- a/kvx/PostpassSchedulingOracle.ml
+++ b/kvx/PostpassSchedulingOracle.ml
@@ -787,8 +787,14 @@ let latency_constraints bb =
*)
let build_problem bb =
- { max_latency = -1; resource_bounds = resource_bounds;
- instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb }
+{ max_latency = -1;
+ resource_bounds = resource_bounds;
+ instruction_usages = instruction_usages bb;
+ latency_constraints = latency_constraints bb;
+ live_regs_entry = Registers.Regset.empty; (* unused here *)
+ typing = (fun x -> AST.Tint); (* unused here *)
+ reference_counting = None
+}
let rec find_min_opt (l: int option list) =
match l with
diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml
index 912e9ffa..53a81095 120000..100644
--- a/kvx/PrepassSchedulingOracle.ml
+++ b/kvx/PrepassSchedulingOracle.ml
@@ -1 +1,485 @@
-../aarch64/PrepassSchedulingOracle.ml \ No newline at end of file
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+
+let use_alias_analysis () = false
+
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64
+| Many64 -> 8;;
+
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+ let last_reg_reads : int list PTree.t ref = ref PTree.empty
+ and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+ and last_mem_reads : int list ref = ref []
+ and last_mem_write : int option ref = ref None
+ and last_branch : int option ref = ref None
+ and last_non_pipelined_op : int array = Array.make
+ opweights.nr_non_pipelined_units ( -1 )
+ and latency_constraints : latency_constraint list ref = ref [] in
+ let add_constraint instr_from instr_to latency =
+ assert (instr_from <= instr_to);
+ assert (latency >= 0);
+ if instr_from = instr_to
+ then (if latency = 0
+ then ()
+ else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+ else
+ latency_constraints :=
+ { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !latency_constraints
+ and get_last_reads reg =
+ match PTree.get reg !last_reg_reads
+ with Some l -> l
+ | None -> [] in
+ let add_input_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_output_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Write after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+ last_mem_write := Some i;
+ last_mem_reads := []
+ end
+ and add_input_reg i reg =
+ begin
+ (* Read after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, latency) -> add_constraint j i latency
+ end;
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_output_reg i latency reg =
+ begin
+ (* Write after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _) -> add_constraint j i 1
+ end;
+ begin
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+ end;
+ last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+ last_reg_reads := PTree.remove reg !last_reg_reads
+ in
+ let add_input_regs i regs = List.iter (add_input_reg i) regs in
+ let rec add_builtin_res i (res : reg builtin_res) =
+ match res with
+ | BR r -> add_output_reg i 10 r
+ | BR_none -> ()
+ | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+ add_builtin_res i lo in
+ let rec add_builtin_arg i (ba : reg builtin_arg) =
+ match ba with
+ | BA r -> add_input_reg i r
+ | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+ | BA_loadstack(_,_) -> add_input_mem i
+ | BA_addrstack _ -> ()
+ | BA_loadglobal(_, _, _) -> add_input_mem i
+ | BA_addrglobal _ -> ()
+ | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+ add_builtin_arg i lo
+ | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+ add_builtin_arg i a2 in
+ let irreversible_action i =
+ match !last_branch with
+ | None -> ()
+ | Some j -> add_constraint j i 1 in
+ let set_branch i =
+ irreversible_action i;
+ last_branch := Some i in
+ let add_non_pipelined_resources i resources =
+ Array.iter2
+ (fun latency last ->
+ if latency >= 0 && last >= 0 then add_constraint last i latency)
+ resources last_non_pipelined_op;
+ Array.iteri (fun rsc latency ->
+ if latency >= 0
+ then last_non_pipelined_op.(rsc) <- i) resources
+ in
+ Array.iteri
+ begin
+ fun i (insn, other_uses) ->
+ List.iter (fun use ->
+ add_input_reg i use)
+ (Regset.elements other_uses);
+
+ match insn with
+ | Inop _ -> ()
+ | Iop(op, inputs, output, _) ->
+ add_non_pipelined_resources i
+ (opweights.non_pipelined_resources_of_op op (List.length inputs));
+ (if Op.is_trapping_op op then irreversible_action i);
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ (if trap=TRAP then irreversible_action i);
+ add_input_mem i;
+ add_input_regs i addr_regs;
+ add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ irreversible_action i;
+ add_input_regs i addr_regs;
+ add_input_reg i input;
+ add_output_mem i
+ | Icall(signature, ef, inputs, output, _) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_call signature ef) output;
+ add_output_mem i;
+ failwith "Icall"
+ | Itailcall(signature, ef, inputs) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ failwith "Itailcall"
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ set_branch i;
+ add_input_mem i;
+ List.iter (add_builtin_arg i) builtin_inputs;
+ add_builtin_res i builtin_output;
+ add_output_mem i;
+ failwith "Ibuiltin"
+ | Icond(cond, inputs, _, _, _) ->
+ set_branch i;
+ add_input_mem i;
+ add_input_regs i inputs
+ | Ijumptable(input, _) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ijumptable"
+ | Ireturn(Some input) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ireturn"
+ | Ireturn(None) ->
+ set_branch i;
+ failwith "Ireturn none"
+ end seqa;
+ !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+ | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+ | Iop(op, inputs, output, _) ->
+ opweights.resources_of_op op (List.length inputs)
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ opweights.resources_of_store chunk addressing (List.length addr_regs)
+ | Icall(signature, ef, inputs, output, _) ->
+ opweights.resources_of_call signature ef
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ opweights.resources_of_builtin ef
+ | Icond(cond, args, _, _ , _) ->
+ opweights.resources_of_cond cond (List.length args)
+ | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+
+let print_sequence pp (seqa : instruction array) =
+ Array.iteri (
+ fun i (insn : instruction) ->
+ PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+
+type 'a symbolic_term_node =
+ | STop of Op.operation * 'a list
+ | STinitial_reg of int
+ | STother of int;;
+
+type symbolic_term = {
+ hash_id : unique_id;
+ hash_ct : symbolic_term symbolic_term_node
+ };;
+
+let rec print_term channel term =
+ match term.hash_ct with
+ | STop(op, args) ->
+ PrintOp.print_operation print_term channel (op, args)
+ | STinitial_reg n -> Printf.fprintf channel "x%d" n
+ | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+ st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+ mutable st_next_id : unique_id };;
+
+let hash_init () = {
+ st_table = Hashtbl.create 20;
+ st_next_id = 0
+ };;
+
+let ground_to_id = function
+ | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+ | STinitial_reg r -> STinitial_reg r
+ | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+ let grounded = ground_to_id term in
+ match Hashtbl.find_opt table.st_table grounded with
+ | Some x -> x
+ | None ->
+ let term' = { hash_id = table.st_next_id;
+ hash_ct = term } in
+ (if table.st_next_id = max_int then failwith "hash: max_int");
+ table.st_next_id <- table.st_next_id + 1;
+ Hashtbl.add table.st_table grounded term';
+ term';;
+
+type access = {
+ base : symbolic_term;
+ offset : int64;
+ length : int
+ };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+ match addressing, args with
+ | (Op.Aindexed ofs), [reg] -> Some
+ { base = get_reg reg;
+ offset = Camlcoq.camlint64_of_ptrofs ofs;
+ length = length_of_chunk chunk
+ }
+ | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+ let regs = ref PTree.empty
+ and table = hash_init() in
+ let assign reg term = regs := PTree.set reg term !regs
+ and hash term = hash_node table term in
+ let get_reg reg =
+ match PTree.get reg !regs with
+ | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+ | Some x -> x in
+ let targets = Array.make (Array.length seqa) None in
+ Array.iteri
+ begin
+ fun i insn ->
+ match insn with
+ | Iop(Op.Omove, [input], output, _) ->
+ assign output (get_reg input)
+ | Iop(op, inputs, output, _) ->
+ assign output (hash (STop(op, List.map get_reg inputs)))
+
+ | Iload(trap, chunk, addressing, args, output, _) ->
+ let access = access_of_addressing get_reg chunk addressing args in
+ targets.(i) <- access;
+ assign output (hash (STother(i)))
+
+ | Icall(_, _, _, output, _)
+ | Ibuiltin(_, _, BR output, _) ->
+ assign output (hash (STother(i)))
+
+ | Istore(chunk, addressing, args, va, _) ->
+ let access = access_of_addressing get_reg chunk addressing args in
+ targets.(i) <- access
+
+ | Inop _ -> ()
+ | Ibuiltin(_, _, BR_none, _) -> ()
+ | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+ | Itailcall (_, _, _)
+ |Icond (_, _, _, _, _)
+ |Ijumptable (_, _)
+ |Ireturn _ -> ()
+ end seqa;
+ targets;;
+
+let print_access channel = function
+ | None -> Printf.fprintf channel "any"
+ | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+ let targets = symbolic_execution seqa in
+ Array.iteri
+ (fun i insn ->
+ match insn with
+ | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+ i print_access targets.(i)
+ | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+ i print_access targets.(i)
+ | _ -> ()
+ ) seqa;;
+
+let may_overlap a0 b0 =
+ match a0, b0 with
+ | (None, _) | (_ , None) -> true
+ | (Some a), (Some b) ->
+ if term_equal a.base b.base
+ then (max a.offset b.offset) <
+ (min (Int64.add (Int64.of_int a.length) a.offset)
+ (Int64.add (Int64.of_int b.length) b.offset))
+ else match a.base.hash_ct, b.base.hash_ct with
+ | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+ STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+ (ida=idb) &&
+ let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+ and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+ (max ao bo) <
+ (min (Int64.add (Int64.of_int a.length) ao)
+ (Int64.add (Int64.of_int b.length) bo))
+ | STop(Op.Oaddrstack _, []),
+ STop(Op.Oaddrsymbol _, [])
+ | STop(Op.Oaddrsymbol _, []),
+ STop(Op.Oaddrstack _, []) -> false
+ | STop(Op.Oaddrstack(ofsa),[]),
+ STop(Op.Oaddrstack(ofsb),[]) ->
+ let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+ and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+ (max ao bo) <
+ (min (Int64.add (Int64.of_int a.length) ao)
+ (Int64.add (Int64.of_int b.length) bo))
+ | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+ let targets = symbolic_execution seqa
+ and deps = ref [] in
+ let add_constraint instr_from instr_to latency =
+ deps := { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !deps in
+ for i=0 to (Array.length seqa)-1
+ do
+ for j=0 to i-1
+ do
+ match seqa.(j), seqa.(i) with
+ | (Istore _), ((Iload _) | (Istore _)) ->
+ if may_overlap targets.(j) targets.(i)
+ then add_constraint j i 1
+ | (Iload _), (Istore _) ->
+ if may_overlap targets.(j) targets.(i)
+ then add_constraint j i 0
+ | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+ | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+ add_constraint j i 1
+ | (Inop _ | Iop _), _
+ | _, (Inop _ | Iop _)
+ | (Iload _), (Iload _) -> ()
+ done
+ done;
+ !deps;;
+ *)
+
+let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
+ (typing : RTLtyping.regenv) reference_counting seqa =
+ let simple_deps = get_simple_dependencies opweights seqa in
+ { max_latency = -1;
+ resource_bounds = opweights.pipelined_resource_bounds;
+ live_regs_entry = live_entry_regs;
+ typing = typing;
+ reference_counting = Some reference_counting;
+ instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+ latency_constraints =
+ (* if (use_alias_analysis ())
+ then (get_alias_dependencies seqa) @ simple_deps
+ else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+ let nr_instructions = get_nr_instructions problem in
+ assert(nr_instructions = (Array.length early_ones));
+ match list_scheduler problem with
+ | Some fwd_schedule ->
+ let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+ let constraints' = ref problem.latency_constraints in
+ Array.iteri (fun i is_early ->
+ if is_early then
+ constraints' := {
+ instr_from = i;
+ instr_to = nr_instructions ;
+ latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+ early_ones;
+ validated_scheduler reverse_list_scheduler
+ { problem with latency_constraints = !constraints' }
+ | None -> None;;
+
+let prepass_scheduler_by_name name problem early_ones =
+ match name with
+ | "zigzag" -> zigzag_scheduler problem early_ones
+ | _ -> scheduler_by_name name problem
+
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+ (live_regs_entry : Registers.Regset.t)
+ (typing : RTLtyping.regenv)
+ reference =
+ let opweights = OpWeights.get_opweights () in
+ try
+ if (Array.length seqa) <= 1
+ then None
+ else
+ begin
+ let nr_instructions = Array.length seqa in
+ (if !Clflags.option_debug_compcert > 6
+ then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+ let problem = define_problem opweights live_regs_entry
+ typing reference seqa in
+ (if !Clflags.option_debug_compcert > 7
+ then (print_sequence stdout (Array.map fst seqa);
+ print_problem stdout problem));
+ match prepass_scheduler_by_name
+ (!Clflags.option_fprepass_sched)
+ problem
+ (Array.map (fun (ins, _) ->
+ match ins with
+ | Icond _ -> true
+ | _ -> false) seqa) with
+ | None -> Printf.printf "no solution in prepass scheduling\n";
+ None
+ | Some solution ->
+ let positions = Array.init nr_instructions (fun i -> i) in
+ Array.sort (fun i j ->
+ let si = solution.(i) and sj = solution.(j) in
+ if si < sj then -1
+ else if si > sj then 1
+ else i - j) positions;
+ Some positions
+ end
+ with (Failure s) ->
+ Printf.printf "failure in prepass scheduling: %s\n" s;
+ None;;
+
diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml
index 1e955b85..8d10d406 120000..100644
--- a/kvx/PrepassSchedulingOracleDeps.ml
+++ b/kvx/PrepassSchedulingOracleDeps.ml
@@ -1 +1,17 @@
-../aarch64/PrepassSchedulingOracleDeps.ml \ No newline at end of file
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+
+type opweights =
+ {
+ pipelined_resource_bounds : int array;
+ nr_non_pipelined_units : int;
+ latency_of_op : Op.operation -> int -> int;
+ resources_of_op : Op.operation -> int -> int array;
+ non_pipelined_resources_of_op : Op.operation -> int -> int array;
+ latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+ resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+ resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+ resources_of_cond : Op.condition -> int -> int array;
+ latency_of_call : AST.signature -> called_function -> int;
+ resources_of_call : AST.signature -> called_function -> int array;
+ resources_of_builtin : AST.external_function -> int array
+ };;
diff --git a/lib/Integers.v b/lib/Integers.v
index 3e103ab7..2addc78b 100644
--- a/lib/Integers.v
+++ b/lib/Integers.v
@@ -3747,8 +3747,7 @@ Proof.
unfold lt.
rewrite signed_zero.
rewrite bits_zero.
- destruct (zlt _ _); try lia.
- reflexivity.
+ destruct (zlt _ _); try lia; reflexivity.
}
change (Z.testbit (unsigned x) (i + 63)) with (testbit x (i + 63)).
rewrite bits_zero.
diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml
index ee2674bf..3b63b80d 120000..100644
--- a/powerpc/ExpansionOracle.ml
+++ b/powerpc/ExpansionOracle.ml
@@ -1 +1,17 @@
-../aarch64/ExpansionOracle.ml \ No newline at end of file
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open RTLpathCommon
+
+let expanse (sb : superblock) code pm = (code, pm)
+
+let find_last_node_reg c = ()
diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml
index 9885fd52..42a3da23 120000..100644
--- a/powerpc/PrepassSchedulingOracle.ml
+++ b/powerpc/PrepassSchedulingOracle.ml
@@ -1 +1,6 @@
-../x86/PrepassSchedulingOracle.ml \ No newline at end of file
+open RTL
+open Registers
+
+(* Do not do anything *)
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+ live_regs_entry typing reference = None
diff --git a/riscV/Asmgenproof1.v b/riscV/Asmgenproof1.v
index 6abde89f..42ab8375 100644
--- a/riscV/Asmgenproof1.v
+++ b/riscV/Asmgenproof1.v
@@ -821,18 +821,18 @@ Proof.
unfold Val.cmp; destruct (rs#r1); simpl; auto. rewrite B1.
unfold Int.lt. rewrite zlt_false. auto.
change (Int.signed (Int.repr Int.max_signed)) with Int.max_signed.
- generalize (Int.signed_range i); omega.
+ generalize (Int.signed_range i); lia.
* exploit (opimm32_correct Psltw Psltiw (Val.cmp Clt)); eauto. intros (rs1 & A1 & B1 & C1).
exists rs1; split. eexact A1. split; auto.
rewrite B1. unfold Val.cmp; simpl; destruct (rs#r1); simpl; auto.
unfold Int.lt. replace (Int.signed (Int.add n Int.one)) with (Int.signed n + 1).
destruct (zlt (Int.signed n) (Int.signed i)).
- rewrite zlt_false by omega. auto.
- rewrite zlt_true by omega. auto.
+ rewrite zlt_false by lia. auto.
+ rewrite zlt_true by lia. auto.
rewrite Int.add_signed. symmetry; apply Int.signed_repr.
assert (Int.signed n <> Int.max_signed).
{ red; intros E. elim H1. rewrite <- (Int.repr_signed n). rewrite E. auto. }
- generalize (Int.signed_range n); omega.
+ generalize (Int.signed_range n); lia.
+ apply DFL.
+ apply DFL.
Qed.
@@ -919,18 +919,18 @@ Proof.
unfold Val.cmpl; destruct (rs#r1); simpl; auto. rewrite B1.
unfold Int64.lt. rewrite zlt_false. auto.
change (Int64.signed (Int64.repr Int64.max_signed)) with Int64.max_signed.
- generalize (Int64.signed_range i); omega.
+ generalize (Int64.signed_range i); lia.
* exploit (opimm64_correct Psltl Psltil (fun v1 v2 => Val.maketotal (Val.cmpl Clt v1 v2))); eauto. intros (rs1 & A1 & B1 & C1).
exists rs1; split. eexact A1. split; auto.
rewrite B1. unfold Val.cmpl; simpl; destruct (rs#r1); simpl; auto.
unfold Int64.lt. replace (Int64.signed (Int64.add n Int64.one)) with (Int64.signed n + 1).
destruct (zlt (Int64.signed n) (Int64.signed i)).
- rewrite zlt_false by omega. auto.
- rewrite zlt_true by omega. auto.
+ rewrite zlt_false by lia. auto.
+ rewrite zlt_true by lia. auto.
rewrite Int64.add_signed. symmetry; apply Int64.signed_repr.
assert (Int64.signed n <> Int64.max_signed).
{ red; intros E. elim H1. rewrite <- (Int64.repr_signed n). rewrite E. auto. }
- generalize (Int64.signed_range n); omega.
+ generalize (Int64.signed_range n); lia.
+ apply DFL.
+ apply DFL.
Qed.
diff --git a/riscV/Machregsaux.ml b/riscV/Machregsaux.ml
index 840943e7..e3e47946 100644
--- a/riscV/Machregsaux.ml
+++ b/riscV/Machregsaux.ml
@@ -18,3 +18,5 @@ let class_of_type = function
| AST.Tint | AST.Tlong -> 0
| AST.Tfloat | AST.Tsingle -> 1
| AST.Tany32 | AST.Tany64 -> assert false
+
+let nr_regs = [| 26; 32|]
diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli
index 01b0f9fd..bb3777bf 100644
--- a/riscV/Machregsaux.mli
+++ b/riscV/Machregsaux.mli
@@ -15,3 +15,6 @@
val is_scratch_register: string -> bool
val class_of_type: AST.typ -> int
+
+(* Number of registers in each class *)
+val nr_regs : int array
diff --git a/riscV/OpWeights.ml b/riscV/OpWeights.ml
index 0a1d9ad4..a5ece6d5 100644
--- a/riscV/OpWeights.ml
+++ b/riscV/OpWeights.ml
@@ -66,7 +66,8 @@ module Rocket = struct
| OEmayundef _ -> 0
| _ -> 1
- let resources_of_op (op : operation) (nargs : int) = resource_bounds
+ let resources_of_op (op : operation) (nargs : int) =
+ match op with OEmayundef _ -> [| 0 |] | _ -> resource_bounds
let non_pipelined_resources_of_op (op : operation) (nargs : int) =
match op with
diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml
index 912e9ffa..53a81095 120000..100644
--- a/riscV/PrepassSchedulingOracle.ml
+++ b/riscV/PrepassSchedulingOracle.ml
@@ -1 +1,485 @@
-../aarch64/PrepassSchedulingOracle.ml \ No newline at end of file
+open AST
+open RTL
+open Maps
+open InstructionScheduler
+open Registers
+open PrepassSchedulingOracleDeps
+
+let use_alias_analysis () = false
+
+let length_of_chunk = function
+| Mint8signed
+| Mint8unsigned -> 1
+| Mint16signed
+| Mint16unsigned -> 2
+| Mint32
+| Mfloat32
+| Many32 -> 4
+| Mint64
+| Mfloat64
+| Many64 -> 8;;
+
+let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) =
+ let last_reg_reads : int list PTree.t ref = ref PTree.empty
+ and last_reg_write : (int*int) PTree.t ref = ref PTree.empty
+ and last_mem_reads : int list ref = ref []
+ and last_mem_write : int option ref = ref None
+ and last_branch : int option ref = ref None
+ and last_non_pipelined_op : int array = Array.make
+ opweights.nr_non_pipelined_units ( -1 )
+ and latency_constraints : latency_constraint list ref = ref [] in
+ let add_constraint instr_from instr_to latency =
+ assert (instr_from <= instr_to);
+ assert (latency >= 0);
+ if instr_from = instr_to
+ then (if latency = 0
+ then ()
+ else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop")
+ else
+ latency_constraints :=
+ { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !latency_constraints
+ and get_last_reads reg =
+ match PTree.get reg !last_reg_reads
+ with Some l -> l
+ | None -> [] in
+ let add_input_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Read after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ last_mem_reads := i :: !last_mem_reads
+ end
+ and add_output_mem i =
+ if not (use_alias_analysis ())
+ then
+ begin
+ begin
+ (* Write after write *)
+ match !last_mem_write with
+ | None -> ()
+ | Some j -> add_constraint j i 1
+ end;
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) !last_mem_reads;
+ last_mem_write := Some i;
+ last_mem_reads := []
+ end
+ and add_input_reg i reg =
+ begin
+ (* Read after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, latency) -> add_constraint j i latency
+ end;
+ last_reg_reads := PTree.set reg
+ (i :: get_last_reads reg)
+ !last_reg_reads
+ and add_output_reg i latency reg =
+ begin
+ (* Write after write *)
+ match PTree.get reg !last_reg_write with
+ | None -> ()
+ | Some (j, _) -> add_constraint j i 1
+ end;
+ begin
+ (* Write after read *)
+ List.iter (fun j -> add_constraint j i 0) (get_last_reads reg)
+ end;
+ last_reg_write := PTree.set reg (i, latency) !last_reg_write;
+ last_reg_reads := PTree.remove reg !last_reg_reads
+ in
+ let add_input_regs i regs = List.iter (add_input_reg i) regs in
+ let rec add_builtin_res i (res : reg builtin_res) =
+ match res with
+ | BR r -> add_output_reg i 10 r
+ | BR_none -> ()
+ | BR_splitlong (hi, lo) -> add_builtin_res i hi;
+ add_builtin_res i lo in
+ let rec add_builtin_arg i (ba : reg builtin_arg) =
+ match ba with
+ | BA r -> add_input_reg i r
+ | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> ()
+ | BA_loadstack(_,_) -> add_input_mem i
+ | BA_addrstack _ -> ()
+ | BA_loadglobal(_, _, _) -> add_input_mem i
+ | BA_addrglobal _ -> ()
+ | BA_splitlong(hi, lo) -> add_builtin_arg i hi;
+ add_builtin_arg i lo
+ | BA_addptr(a1, a2) -> add_builtin_arg i a1;
+ add_builtin_arg i a2 in
+ let irreversible_action i =
+ match !last_branch with
+ | None -> ()
+ | Some j -> add_constraint j i 1 in
+ let set_branch i =
+ irreversible_action i;
+ last_branch := Some i in
+ let add_non_pipelined_resources i resources =
+ Array.iter2
+ (fun latency last ->
+ if latency >= 0 && last >= 0 then add_constraint last i latency)
+ resources last_non_pipelined_op;
+ Array.iteri (fun rsc latency ->
+ if latency >= 0
+ then last_non_pipelined_op.(rsc) <- i) resources
+ in
+ Array.iteri
+ begin
+ fun i (insn, other_uses) ->
+ List.iter (fun use ->
+ add_input_reg i use)
+ (Regset.elements other_uses);
+
+ match insn with
+ | Inop _ -> ()
+ | Iop(op, inputs, output, _) ->
+ add_non_pipelined_resources i
+ (opweights.non_pipelined_resources_of_op op (List.length inputs));
+ (if Op.is_trapping_op op then irreversible_action i);
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_op op (List.length inputs)) output
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ (if trap=TRAP then irreversible_action i);
+ add_input_mem i;
+ add_input_regs i addr_regs;
+ add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ irreversible_action i;
+ add_input_regs i addr_regs;
+ add_input_reg i input;
+ add_output_mem i
+ | Icall(signature, ef, inputs, output, _) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ add_output_reg i (opweights.latency_of_call signature ef) output;
+ add_output_mem i;
+ failwith "Icall"
+ | Itailcall(signature, ef, inputs) ->
+ set_branch i;
+ (match ef with
+ | Datatypes.Coq_inl r -> add_input_reg i r
+ | Datatypes.Coq_inr symbol -> ()
+ );
+ add_input_mem i;
+ add_input_regs i inputs;
+ failwith "Itailcall"
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ set_branch i;
+ add_input_mem i;
+ List.iter (add_builtin_arg i) builtin_inputs;
+ add_builtin_res i builtin_output;
+ add_output_mem i;
+ failwith "Ibuiltin"
+ | Icond(cond, inputs, _, _, _) ->
+ set_branch i;
+ add_input_mem i;
+ add_input_regs i inputs
+ | Ijumptable(input, _) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ijumptable"
+ | Ireturn(Some input) ->
+ set_branch i;
+ add_input_reg i input;
+ failwith "Ireturn"
+ | Ireturn(None) ->
+ set_branch i;
+ failwith "Ireturn none"
+ end seqa;
+ !latency_constraints;;
+
+let resources_of_instruction (opweights : opweights) = function
+ | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds
+ | Iop(op, inputs, output, _) ->
+ opweights.resources_of_op op (List.length inputs)
+ | Iload(trap, chunk, addressing, addr_regs, output, _) ->
+ opweights.resources_of_load trap chunk addressing (List.length addr_regs)
+ | Istore(chunk, addressing, addr_regs, input, _) ->
+ opweights.resources_of_store chunk addressing (List.length addr_regs)
+ | Icall(signature, ef, inputs, output, _) ->
+ opweights.resources_of_call signature ef
+ | Ibuiltin(ef, builtin_inputs, builtin_output, _) ->
+ opweights.resources_of_builtin ef
+ | Icond(cond, args, _, _ , _) ->
+ opweights.resources_of_cond cond (List.length args)
+ | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds
+
+let print_sequence pp (seqa : instruction array) =
+ Array.iteri (
+ fun i (insn : instruction) ->
+ PrintRTL.print_instruction pp (i, insn)) seqa;;
+
+type unique_id = int
+
+type 'a symbolic_term_node =
+ | STop of Op.operation * 'a list
+ | STinitial_reg of int
+ | STother of int;;
+
+type symbolic_term = {
+ hash_id : unique_id;
+ hash_ct : symbolic_term symbolic_term_node
+ };;
+
+let rec print_term channel term =
+ match term.hash_ct with
+ | STop(op, args) ->
+ PrintOp.print_operation print_term channel (op, args)
+ | STinitial_reg n -> Printf.fprintf channel "x%d" n
+ | STother n -> Printf.fprintf channel "y%d" n;;
+
+type symbolic_term_table = {
+ st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t;
+ mutable st_next_id : unique_id };;
+
+let hash_init () = {
+ st_table = Hashtbl.create 20;
+ st_next_id = 0
+ };;
+
+let ground_to_id = function
+ | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l)
+ | STinitial_reg r -> STinitial_reg r
+ | STother i -> STother i;;
+
+let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term =
+ let grounded = ground_to_id term in
+ match Hashtbl.find_opt table.st_table grounded with
+ | Some x -> x
+ | None ->
+ let term' = { hash_id = table.st_next_id;
+ hash_ct = term } in
+ (if table.st_next_id = max_int then failwith "hash: max_int");
+ table.st_next_id <- table.st_next_id + 1;
+ Hashtbl.add table.st_table grounded term';
+ term';;
+
+type access = {
+ base : symbolic_term;
+ offset : int64;
+ length : int
+ };;
+
+let term_equal a b = (a.hash_id = b.hash_id);;
+
+let access_of_addressing get_reg chunk addressing args =
+ match addressing, args with
+ | (Op.Aindexed ofs), [reg] -> Some
+ { base = get_reg reg;
+ offset = Camlcoq.camlint64_of_ptrofs ofs;
+ length = length_of_chunk chunk
+ }
+ | _, _ -> None ;;
+(* TODO: global *)
+
+let symbolic_execution (seqa : instruction array) =
+ let regs = ref PTree.empty
+ and table = hash_init() in
+ let assign reg term = regs := PTree.set reg term !regs
+ and hash term = hash_node table term in
+ let get_reg reg =
+ match PTree.get reg !regs with
+ | None -> hash (STinitial_reg (Camlcoq.P.to_int reg))
+ | Some x -> x in
+ let targets = Array.make (Array.length seqa) None in
+ Array.iteri
+ begin
+ fun i insn ->
+ match insn with
+ | Iop(Op.Omove, [input], output, _) ->
+ assign output (get_reg input)
+ | Iop(op, inputs, output, _) ->
+ assign output (hash (STop(op, List.map get_reg inputs)))
+
+ | Iload(trap, chunk, addressing, args, output, _) ->
+ let access = access_of_addressing get_reg chunk addressing args in
+ targets.(i) <- access;
+ assign output (hash (STother(i)))
+
+ | Icall(_, _, _, output, _)
+ | Ibuiltin(_, _, BR output, _) ->
+ assign output (hash (STother(i)))
+
+ | Istore(chunk, addressing, args, va, _) ->
+ let access = access_of_addressing get_reg chunk addressing args in
+ targets.(i) <- access
+
+ | Inop _ -> ()
+ | Ibuiltin(_, _, BR_none, _) -> ()
+ | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong"
+
+ | Itailcall (_, _, _)
+ |Icond (_, _, _, _, _)
+ |Ijumptable (_, _)
+ |Ireturn _ -> ()
+ end seqa;
+ targets;;
+
+let print_access channel = function
+ | None -> Printf.fprintf channel "any"
+ | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;;
+
+let print_targets channel seqa =
+ let targets = symbolic_execution seqa in
+ Array.iteri
+ (fun i insn ->
+ match insn with
+ | Iload _ -> Printf.fprintf channel "%d: load %a\n"
+ i print_access targets.(i)
+ | Istore _ -> Printf.fprintf channel "%d: store %a\n"
+ i print_access targets.(i)
+ | _ -> ()
+ ) seqa;;
+
+let may_overlap a0 b0 =
+ match a0, b0 with
+ | (None, _) | (_ , None) -> true
+ | (Some a), (Some b) ->
+ if term_equal a.base b.base
+ then (max a.offset b.offset) <
+ (min (Int64.add (Int64.of_int a.length) a.offset)
+ (Int64.add (Int64.of_int b.length) b.offset))
+ else match a.base.hash_ct, b.base.hash_ct with
+ | STop(Op.Oaddrsymbol(ida, ofsa),[]),
+ STop(Op.Oaddrsymbol(idb, ofsb),[]) ->
+ (ida=idb) &&
+ let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+ and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+ (max ao bo) <
+ (min (Int64.add (Int64.of_int a.length) ao)
+ (Int64.add (Int64.of_int b.length) bo))
+ | STop(Op.Oaddrstack _, []),
+ STop(Op.Oaddrsymbol _, [])
+ | STop(Op.Oaddrsymbol _, []),
+ STop(Op.Oaddrstack _, []) -> false
+ | STop(Op.Oaddrstack(ofsa),[]),
+ STop(Op.Oaddrstack(ofsb),[]) ->
+ let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa)
+ and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in
+ (max ao bo) <
+ (min (Int64.add (Int64.of_int a.length) ao)
+ (Int64.add (Int64.of_int b.length) bo))
+ | _ -> true;;
+
+(*
+(* TODO suboptimal quadratic algorithm *)
+let get_alias_dependencies seqa =
+ let targets = symbolic_execution seqa
+ and deps = ref [] in
+ let add_constraint instr_from instr_to latency =
+ deps := { instr_from = instr_from;
+ instr_to = instr_to;
+ latency = latency
+ }:: !deps in
+ for i=0 to (Array.length seqa)-1
+ do
+ for j=0 to i-1
+ do
+ match seqa.(j), seqa.(i) with
+ | (Istore _), ((Iload _) | (Istore _)) ->
+ if may_overlap targets.(j) targets.(i)
+ then add_constraint j i 1
+ | (Iload _), (Istore _) ->
+ if may_overlap targets.(j) targets.(i)
+ then add_constraint j i 0
+ | (Istore _ | Iload _), (Icall _ | Ibuiltin _)
+ | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) ->
+ add_constraint j i 1
+ | (Inop _ | Iop _), _
+ | _, (Inop _ | Iop _)
+ | (Iload _), (Iload _) -> ()
+ done
+ done;
+ !deps;;
+ *)
+
+let define_problem (opweights : opweights) (live_entry_regs : Regset.t)
+ (typing : RTLtyping.regenv) reference_counting seqa =
+ let simple_deps = get_simple_dependencies opweights seqa in
+ { max_latency = -1;
+ resource_bounds = opweights.pipelined_resource_bounds;
+ live_regs_entry = live_entry_regs;
+ typing = typing;
+ reference_counting = Some reference_counting;
+ instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa);
+ latency_constraints =
+ (* if (use_alias_analysis ())
+ then (get_alias_dependencies seqa) @ simple_deps
+ else *) simple_deps };;
+
+let zigzag_scheduler problem early_ones =
+ let nr_instructions = get_nr_instructions problem in
+ assert(nr_instructions = (Array.length early_ones));
+ match list_scheduler problem with
+ | Some fwd_schedule ->
+ let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in
+ let constraints' = ref problem.latency_constraints in
+ Array.iteri (fun i is_early ->
+ if is_early then
+ constraints' := {
+ instr_from = i;
+ instr_to = nr_instructions ;
+ latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' )
+ early_ones;
+ validated_scheduler reverse_list_scheduler
+ { problem with latency_constraints = !constraints' }
+ | None -> None;;
+
+let prepass_scheduler_by_name name problem early_ones =
+ match name with
+ | "zigzag" -> zigzag_scheduler problem early_ones
+ | _ -> scheduler_by_name name problem
+
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+ (live_regs_entry : Registers.Regset.t)
+ (typing : RTLtyping.regenv)
+ reference =
+ let opweights = OpWeights.get_opweights () in
+ try
+ if (Array.length seqa) <= 1
+ then None
+ else
+ begin
+ let nr_instructions = Array.length seqa in
+ (if !Clflags.option_debug_compcert > 6
+ then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa));
+ let problem = define_problem opweights live_regs_entry
+ typing reference seqa in
+ (if !Clflags.option_debug_compcert > 7
+ then (print_sequence stdout (Array.map fst seqa);
+ print_problem stdout problem));
+ match prepass_scheduler_by_name
+ (!Clflags.option_fprepass_sched)
+ problem
+ (Array.map (fun (ins, _) ->
+ match ins with
+ | Icond _ -> true
+ | _ -> false) seqa) with
+ | None -> Printf.printf "no solution in prepass scheduling\n";
+ None
+ | Some solution ->
+ let positions = Array.init nr_instructions (fun i -> i) in
+ Array.sort (fun i j ->
+ let si = solution.(i) and sj = solution.(j) in
+ if si < sj then -1
+ else if si > sj then 1
+ else i - j) positions;
+ Some positions
+ end
+ with (Failure s) ->
+ Printf.printf "failure in prepass scheduling: %s\n" s;
+ None;;
+
diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml
index 1e955b85..8d10d406 120000..100644
--- a/riscV/PrepassSchedulingOracleDeps.ml
+++ b/riscV/PrepassSchedulingOracleDeps.ml
@@ -1 +1,17 @@
-../aarch64/PrepassSchedulingOracleDeps.ml \ No newline at end of file
+type called_function = (Registers.reg, AST.ident) Datatypes.sum
+
+type opweights =
+ {
+ pipelined_resource_bounds : int array;
+ nr_non_pipelined_units : int;
+ latency_of_op : Op.operation -> int -> int;
+ resources_of_op : Op.operation -> int -> int array;
+ non_pipelined_resources_of_op : Op.operation -> int -> int array;
+ latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int;
+ resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array;
+ resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array;
+ resources_of_cond : Op.condition -> int -> int array;
+ latency_of_call : AST.signature -> called_function -> int;
+ resources_of_call : AST.signature -> called_function -> int array;
+ resources_of_builtin : AST.external_function -> int array
+ };;
diff --git a/runtime/Makefile b/runtime/Makefile
index 6f70fa87..ff51b7d7 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -1,5 +1,7 @@
include ../Makefile.config
+.PRECIOUS: .s
+
CFLAGS=-O1 -Wall
ifeq ($(ARCH),x86)
diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml
index eab0b21a..e3a421a5 100644
--- a/scheduling/InstructionScheduler.ml
+++ b/scheduling/InstructionScheduler.ml
@@ -33,6 +33,10 @@ type latency_constraint = {
type problem = {
max_latency : int;
resource_bounds : int array;
+ live_regs_entry : Registers.Regset.t;
+ typing : RTLtyping.regenv;
+ reference_counting : ((Registers.reg, int * int) Hashtbl.t
+ * ((Registers.reg * bool) list array)) option;
instruction_usages : int array array;
latency_constraints : latency_constraint list;
};;
@@ -118,6 +122,13 @@ let vector_less_equal a b =
true
with Exit -> false;;
+(* let vector_add a b =
+ * assert ((Array.length a) = (Array.length b));
+ * for i=0 to (Array.length a)-1
+ * do
+ * b.(i) <- b.(i) + a.(i)
+ * done;; *)
+
let vector_subtract a b =
assert ((Array.length a) = (Array.length b));
for i=0 to (Array.length a)-1
@@ -257,8 +268,8 @@ let priority_list_scheduler (order : list_scheduler_order)
assert(!time >= 0);
!time
with Exit -> -1
-
in
+
let advance_time() =
begin
(if !current_time < max_time-1
@@ -267,7 +278,8 @@ let priority_list_scheduler (order : list_scheduler_order)
Array.blit problem.resource_bounds 0 current_resources 0
(Array.length current_resources);
ready.(!current_time + 1) <-
- InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1));
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time + 1));
ready.(!current_time) <- InstrSet.empty;
end);
incr current_time
@@ -334,6 +346,470 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;;
(* dummy code for placating ocaml's warnings *)
let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;;
+
+(* A scheduler sensitive to register pressure *)
+let reg_pres_scheduler (problem : problem) : solution option =
+ (* DebugPrint.debug_flag := true; *)
+ let nr_instructions = get_nr_instructions problem in
+
+ if !Clflags.option_debug_compcert > 6 then
+ (Printf.eprintf "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions;
+ flush stderr);
+
+ let successors = get_successors problem
+ and predecessors = get_predecessors problem
+ and times = Array.make (nr_instructions+1) (-1) in
+ let live_regs_entry = problem.live_regs_entry in
+
+ let available_regs = Array.copy Machregsaux.nr_regs in
+
+ let nr_types_regs = Array.length available_regs in
+
+ let thres = Array.fold_left (min)
+ (max !(Clflags.option_regpres_threshold) 0)
+ Machregsaux.nr_regs
+ in
+
+
+ let regs_thresholds = Array.make nr_types_regs thres in
+ (* placeholder value *)
+
+ let class_r r =
+ Machregsaux.class_of_type (problem.typing r) in
+
+ let live_regs = Hashtbl.create 42 in
+
+ List.iter (fun r -> let classe = Machregsaux.class_of_type
+ (problem.typing r) in
+ available_regs.(classe)
+ <- available_regs.(classe) - 1;
+ Hashtbl.add live_regs r classe)
+ (Registers.Regset.elements live_regs_entry);
+
+ let csr_b = ref false in
+
+ let counts, mentions =
+ match problem.reference_counting with
+ | Some (l, r) -> l, r
+ | None -> assert false
+ in
+
+ let fold_delta i = (fun a (r, b) ->
+ a +
+ if class_r r <> i then 0 else
+ (if b then
+ if (Hashtbl.find counts r = (i, 1))
+ then 1 else 0
+ else
+ match Hashtbl.find_opt live_regs r with
+ | None -> -1
+ | Some t -> 0
+ )) in
+
+ let priorities = critical_paths successors in
+
+ let current_resources = Array.copy problem.resource_bounds in
+
+ let module InstrSet =
+ struct
+ module MSet =
+ Set.Make (struct
+ type t=int
+ let compare x y =
+ match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z
+ end)
+
+ let empty = MSet.empty
+ let is_empty = MSet.is_empty
+ let add = MSet.add
+ let remove = MSet.remove
+ let union = MSet.union
+ let iter = MSet.iter
+
+ let compare_regs i x y =
+ let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in
+ (* print_int y;
+ * print_string " ";
+ * print_int pyi;
+ * print_newline ();
+ * flush stdout; *)
+ let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in
+ match pyi - pxi with
+ | 0 -> (match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z)
+ | z -> z
+
+ (** t is the register class *)
+ let sched_CSR t ready usages =
+ (* print_string "looking for max delta";
+ * print_newline ();
+ * flush stdout; *)
+ let result = ref (-1) in
+ iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then if !result = -1 || (compare_regs t !result i > 0)
+ then result := i) ready;
+ !result
+ end
+ in
+
+ let max_time = bound_max_time problem + 5*nr_instructions in
+ let ready = Array.make max_time InstrSet.empty in
+
+ Array.iteri (fun i preds ->
+ if i < nr_instructions && preds = []
+ then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
+
+ let current_time = ref 0
+ and earliest_time i =
+ try
+ let time = ref (-1) in
+ List.iter (fun (j, latency) ->
+ if times.(j) < 0
+ then raise Exit
+ else let t = times.(j) + latency in
+ if t > !time
+ then time := t) predecessors.(i);
+ assert (!time >= 0);
+ !time
+ with Exit -> -1
+ in
+
+ let advance_time () =
+ (if !current_time < max_time-1
+ then (
+ Array.blit problem.resource_bounds 0 current_resources 0
+ (Array.length current_resources);
+ ready.(!current_time + 1) <-
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time +1));
+ ready.(!current_time) <- InstrSet.empty));
+ incr current_time
+ in
+
+ (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *)
+ let cnt = ref 0 in
+
+ let attempt_scheduling ready usages =
+ let result = ref (-1) in
+ DebugPrint.debug "\n\nREADY: ";
+ InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready;
+ DebugPrint.debug "\n\n";
+ try
+ Array.iteri (fun i avlregs ->
+ DebugPrint.debug "avlregs: %d %d\nlive regs: %d\n"
+ i avlregs (Hashtbl.length live_regs);
+ if !cnt < 5 && avlregs <= regs_thresholds.(i)
+ then (
+ csr_b := true;
+ let maybe = InstrSet.sched_CSR i ready usages in
+ DebugPrint.debug "maybe %d\n" maybe;
+ (if maybe >= 0 &&
+ let delta =
+ List.fold_left (fold_delta i) 0 mentions.(maybe) in
+ DebugPrint.debug "delta %d\n" delta;
+ delta > 0
+ then
+ (vector_subtract usages.(maybe) current_resources;
+ result := maybe)
+ else
+ if not !Clflags.option_regpres_wait_window
+ then
+ (InstrSet.iter (fun ins ->
+ if vector_less_equal usages.(ins) current_resources &&
+ List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0
+ then result := ins
+ ) ready;
+ if !result <> -1 then
+ vector_subtract usages.(!result) current_resources;
+ incr cnt)
+ else
+ (incr cnt)
+ );
+ raise Exit)) available_regs;
+ InstrSet.iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then (
+ vector_subtract usages.(i) current_resources;
+ result := i;
+ raise Exit)) ready;
+ -1
+ with Exit ->
+ !result in
+
+ while !current_time < max_time
+ do
+ if (InstrSet.is_empty ready.(!current_time))
+ then advance_time ()
+ else
+ match attempt_scheduling ready.(!current_time)
+ problem.instruction_usages with
+ | -1 -> advance_time()
+ | i -> (assert(times.(i) < 0);
+ (DebugPrint.debug "INSTR ISSUED: %d\n" i;
+ if !csr_b && !Clflags.option_debug_compcert > 6 then
+ (Printf.eprintf "REGPRES: high pres class %d\n" i;
+ flush stderr);
+ csr_b := false;
+ (* if !Clflags.option_regpres_wait_window then *)
+ cnt := 0;
+ List.iter (fun (r,b) ->
+ if b then
+ (match Hashtbl.find_opt counts r with
+ | None -> assert false
+ | Some (t, n) ->
+ Hashtbl.remove counts r;
+ if n = 1 then
+ (Hashtbl.remove live_regs r;
+ available_regs.(t)
+ <- available_regs.(t) + 1))
+ else
+ let t = class_r r in
+ match Hashtbl.find_opt live_regs r with
+ | None -> (Hashtbl.add live_regs r t;
+ available_regs.(t)
+ <- available_regs.(t) - 1)
+ | Some i -> ()
+ ) mentions.(i));
+ times.(i) <- !current_time;
+ ready.(!current_time)
+ <- InstrSet.remove i (ready.(!current_time));
+ List.iter (fun (instr_to, latency) ->
+ if instr_to < nr_instructions then
+ match earliest_time instr_to with
+ | -1 -> ()
+ | to_time ->
+ ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
+ * (Array.length ready); *)
+ ready.(to_time)
+ <- InstrSet.add instr_to ready.(to_time))
+ ) successors.(i);
+ successors.(i) <- []
+ )
+ done;
+
+ try
+ let final_time = ref (-1) in
+ for i = 0 to nr_instructions - 1 do
+ DebugPrint.debug "%d " i;
+ (if times.(i) < 0 then raise Exit);
+ (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
+ done;
+ List.iter (fun (i, latency) ->
+ let target_time = latency + times.(i) in
+ if target_time > !final_time then
+ final_time := target_time) predecessors.(nr_instructions);
+ times.(nr_instructions) <- !final_time;
+ (* DebugPrint.debug_flag := false; *)
+ Some times
+ with Exit ->
+ DebugPrint.debug "reg_pres_sched failed\n";
+ (* DebugPrint.debug_flag := false; *)
+ None
+
+;;
+
+
+(********************************************************************)
+
+let reg_pres_scheduler_bis (problem : problem) : solution option =
+ (* Printf.printf "\nNEW\n\n"; *)
+ let nr_instructions = get_nr_instructions problem in
+ let successors = get_successors problem
+ and predecessors = get_predecessors problem
+ and times = Array.make (nr_instructions+1) (-1) in
+ let live_regs_entry = problem.live_regs_entry in
+
+ (* let available_regs = Array.copy Machregsaux.nr_regs in *)
+
+ let class_r r =
+ Machregsaux.class_of_type (problem.typing r) in
+
+ let live_regs = Hashtbl.create 42 in
+
+ List.iter (fun r -> let classe = Machregsaux.class_of_type
+ (problem.typing r) in
+ (* available_regs.(classe)
+ * <- available_regs.(classe) - 1; *)
+ Hashtbl.add live_regs r classe)
+ (Registers.Regset.elements live_regs_entry);
+
+
+ let counts, mentions =
+ match problem.reference_counting with
+ | Some (l, r) -> l, r
+ | None -> assert false
+ in
+
+ let fold_delta a (r, b) =
+ a + (if b then
+ match Hashtbl.find_opt counts r with
+ | Some (_, 1) -> 1
+ | _ -> 0
+ else
+ match Hashtbl.find_opt live_regs r with
+ | None -> -1
+ | Some t -> 0
+ ) in
+
+ let priorities = critical_paths successors in
+
+ let current_resources = Array.copy problem.resource_bounds in
+
+ let compare_pres x y =
+ let pdy = List.fold_left (fold_delta) 0 mentions.(y) in
+ let pdx = List.fold_left (fold_delta) 0 mentions.(x) in
+ match pdy - pdx with
+ | 0 -> x - y
+ | z -> z
+ in
+
+ let module InstrSet =
+ Set.Make (struct
+ type t = int
+ let compare x y =
+ match priorities.(y) - priorities.(x) with
+ | 0 -> x - y
+ | z -> z
+ end) in
+
+ let max_time = bound_max_time problem (* + 5*nr_instructions *) in
+ let ready = Array.make max_time InstrSet.empty in
+
+ Array.iteri (fun i preds ->
+ if i < nr_instructions && preds = []
+ then ready.(0) <- InstrSet.add i ready.(0)) predecessors;
+
+ let current_time = ref 0
+ and earliest_time i =
+ try
+ let time = ref (-1) in
+ List.iter (fun (j, latency) ->
+ if times.(j) < 0
+ then raise Exit
+ else let t = times.(j) + latency in
+ if t > !time
+ then time := t) predecessors.(i);
+ assert (!time >= 0);
+ !time
+ with Exit -> -1
+ in
+
+ let advance_time () =
+ (* Printf.printf "ADV\n";
+ * flush stdout; *)
+ (if !current_time < max_time-1
+ then (
+ Array.blit problem.resource_bounds 0 current_resources 0
+ (Array.length current_resources);
+ ready.(!current_time + 1) <-
+ InstrSet.union (ready.(!current_time))
+ (ready.(!current_time +1));
+ ready.(!current_time) <- InstrSet.empty));
+ incr current_time
+ in
+
+
+ let attempt_scheduling ready usages =
+ let result = ref [] in
+ try
+ InstrSet.iter (fun i ->
+ if vector_less_equal usages.(i) current_resources
+ then
+ if !result = [] || priorities.(i) = priorities.(List.hd (!result))
+ then
+ result := i::(!result)
+ else raise Exit
+ ) ready;
+ if !result <> [] then raise Exit;
+ -1
+ with
+ Exit ->
+ let mini = List.fold_left (fun a b ->
+ if a = -1 || compare_pres a b > 0
+ then b else a
+ ) (-1) !result in
+ vector_subtract usages.(mini) current_resources;
+ mini
+ in
+
+ while !current_time < max_time
+ do
+ if (InstrSet.is_empty ready.(!current_time))
+ then advance_time ()
+ else
+ match attempt_scheduling ready.(!current_time)
+ problem.instruction_usages with
+ | -1 -> advance_time()
+ | i -> (
+ DebugPrint.debug "ISSUED: %d\nREADY: " i;
+ InstrSet.iter (fun i -> DebugPrint.debug "%d " i)
+ ready.(!current_time);
+ DebugPrint.debug "\nSUCC: ";
+ List.iter (fun (i, l) -> DebugPrint.debug "%d " i)
+ successors.(i);
+ DebugPrint.debug "\n\n";
+ assert(times.(i) < 0);
+ times.(i) <- !current_time;
+ ready.(!current_time)
+ <- InstrSet.remove i (ready.(!current_time));
+ (List.iter (fun (r,b) ->
+ if b then
+ (match Hashtbl.find_opt counts r with
+ | None -> assert false
+ | Some (t, n) ->
+ Hashtbl.remove counts r;
+ if n = 1 then
+ (Hashtbl.remove live_regs r;
+ (* available_regs.(t)
+ * <- available_regs.(t) + 1 *)))
+ else
+ let t = class_r r in
+ match Hashtbl.find_opt live_regs r with
+ | None -> (Hashtbl.add live_regs r t;
+ (* available_regs.(t)
+ * <- available_regs.(t) - 1 *))
+ | Some i -> ()
+ ) mentions.(i));
+ List.iter (fun (instr_to, latency) ->
+ if instr_to < nr_instructions then
+ match earliest_time instr_to with
+ | -1 -> ()
+ | to_time ->
+ ((* DebugPrint.debug "TO TIME %d : %d\n" to_time
+ * (Array.length ready); *)
+ ready.(to_time)
+ <- InstrSet.add instr_to ready.(to_time))
+ ) successors.(i);
+ successors.(i) <- []
+ )
+ done;
+
+ try
+ let final_time = ref (-1) in
+ for i = 0 to nr_instructions - 1 do
+ (* print_int i;
+ * flush stdout; *)
+ (if times.(i) < 0 then raise Exit);
+ (if !final_time < times.(i) + 1 then final_time := times.(i) + 1)
+ done;
+ List.iter (fun (i, latency) ->
+ let target_time = latency + times.(i) in
+ if target_time > !final_time then
+ final_time := target_time) predecessors.(nr_instructions);
+ times.(nr_instructions) <- !final_time;
+ Some times
+ with Exit ->
+ DebugPrint.debug "reg_pres_sched failed\n";
+ None
+
+;;
+
+(********************************************************************)
+
type bundle = int list;;
let rec extract_deps_to index = function
@@ -438,6 +914,12 @@ let reverse_problem problem =
{
max_latency = problem.max_latency;
resource_bounds = problem.resource_bounds;
+ live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *)
+ (* Not needed for the revlist sched, and for now we wont bother
+ with creating a reverse scheduler aware of reg press *)
+
+ typing = problem.typing;
+ reference_counting = problem.reference_counting;
instruction_usages = Array.init (nr_instructions + 1)
(fun i ->
if i=0
@@ -1259,5 +1741,7 @@ let scheduler_by_name name =
| "ilp" -> validated_scheduler cascaded_scheduler
| "list" -> validated_scheduler list_scheduler
| "revlist" -> validated_scheduler reverse_list_scheduler
+ | "regpres" -> validated_scheduler reg_pres_scheduler
+ | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis
| "greedy" -> greedy_scheduler
| s -> failwith ("unknown scheduler: " ^ s);;
diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli
index fb7af3f6..48c7bc09 100644
--- a/scheduling/InstructionScheduler.mli
+++ b/scheduling/InstructionScheduler.mli
@@ -23,6 +23,16 @@ type problem = {
resource_bounds : int array;
(** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *)
+ live_regs_entry : Registers.Regset.t;
+ (** The set of live pseudo-registers at entry. *)
+
+ typing : RTLtyping.regenv;
+ (** Register type map. *)
+
+ reference_counting : ((Registers.reg, int * int) Hashtbl.t
+ * ((Registers.reg * bool) list array)) option;
+ (** See RTLpathScheduleraux.reference_counting. *)
+
instruction_usages: int array array;
(** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *)
@@ -68,6 +78,12 @@ Once a clock tick is full go to the next.
@return [Some solution] when a solution is found, [None] if not. *)
val list_scheduler : problem -> solution option
+(** WIP : Same as list_scheduler, but schedules instructions which decrease
+register pressure when it gets too high. *)
+val reg_pres_scheduler : problem -> solution option
+
+val reg_pres_scheduler_bis : problem -> solution option
+
(** Schedule the problem using the order of instructions without any reordering *)
val greedy_scheduler : problem -> solution option
diff --git a/scheduling/RTLpath.v b/scheduling/RTLpath.v
index a4fce97e..b29a7759 100644
--- a/scheduling/RTLpath.v
+++ b/scheduling/RTLpath.v
@@ -156,7 +156,7 @@ Definition istep (ge: RTL.genv) (i: instruction) (sp: val) (rs: regset) (m: mem)
SOME v <- Mem.loadv chunk m a IN
Some (mk_istate true pc' (rs#dst <- v) m)
| Iload NOTRAP chunk addr args dst pc' =>
- let default_state := mk_istate true pc' rs#dst <- (default_notrap_load_value chunk) m in
+ let default_state := mk_istate true pc' rs#dst <- Vundef m in
match (eval_addressing ge sp addr rs##args) with
| None => Some default_state
| Some a => match (Mem.loadv chunk m a) with
diff --git a/scheduling/RTLpathSE_theory.v b/scheduling/RTLpathSE_theory.v
index aa8db342..2a791feb 100644
--- a/scheduling/RTLpathSE_theory.v
+++ b/scheduling/RTLpathSE_theory.v
@@ -87,11 +87,11 @@ Fixpoint seval_sval (ge: RTL.genv) (sp:val) (sv: sval) (rs0: regset) (m0: mem):
| NOTRAP =>
SOME args <- seval_list_sval ge sp lsv rs0 m0 IN
match (eval_addressing ge sp addr args) with
- | None => Some (default_notrap_load_value chunk)
+ | None => Some Vundef
| Some a =>
SOME m <- seval_smem ge sp sm rs0 m0 IN
match (Mem.loadv chunk m a) with
- | None => Some (default_notrap_load_value chunk)
+ | None => Some Vundef
| Some val => Some val
end
end
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index aeed39df..f3f09954 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -17,7 +17,7 @@ let print_superblock (sb: superblock) code =
begin
debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n";
debug " liveins = "; print_ptree_regset li; debug "\n";
- debug " output_regs = "; print_regset outs; debug "}"
+ debug " output_regs = "; print_regset outs; debug "\n}"
end
let print_superblocks lsb code =
@@ -72,6 +72,168 @@ let get_superblocks code entry pm typing =
lsb
end
+(** the useful one. Returns a hashtable with bindings of shape
+ ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg),
+ ** [t] is its class (according to [typing]), and [n] the number of
+ ** times it's referenced as an argument in instructions of [seqa] ;
+ ** and an arrray containg the list of regs referenced by each
+ ** instruction, with a boolean to know whether it's as arg or dest *)
+let reference_counting (seqa : (instruction * Regset.t) array)
+ (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) :
+ (Registers.reg, int * int) Hashtbl.t *
+ (Registers.reg * bool) list array =
+ let retl = Hashtbl.create 42 in
+ let retr = Array.make (Array.length seqa) [] in
+ (* retr.(i) : (r, b) -> (r', b') -> ...
+ * where b = true if seen as arg, false if seen as dest
+ *)
+ List.iter (fun reg ->
+ Hashtbl.add retl
+ reg (Machregsaux.class_of_type (typing reg), 1)
+ ) (Registers.Regset.elements out_regs);
+ let add_reg reg =
+ match Hashtbl.find_opt retl reg with
+ | Some (t, n) -> Hashtbl.add retl reg (t, n+1)
+ | None -> Hashtbl.add retl reg (Machregsaux.class_of_type
+ (typing reg), 1)
+ in
+ let map_true = List.map (fun r -> r, true) in
+ Array.iteri (fun i (ins, _) ->
+ match ins with
+ | Iop(_,args,dest,_) | Iload(_,_,_,args,dest,_) ->
+ List.iter (add_reg) args;
+ retr.(i) <- (dest, false)::(map_true args)
+ | Icond(_,args,_,_,_) ->
+ List.iter (add_reg) args;
+ retr.(i) <- map_true args
+ | Istore(_,_,args,src,_) ->
+ List.iter (add_reg) args;
+ add_reg src;
+ retr.(i) <- (src, true)::(map_true args)
+ | Icall(_,fn,args,dest,_) ->
+ List.iter (add_reg) args;
+ retr.(i) <- (match fn with
+ | Datatypes.Coq_inl reg ->
+ add_reg reg;
+ (dest,false)::(reg, true)::(map_true args)
+ | _ -> (dest,false)::(map_true args))
+
+ | Itailcall(_,fn,args) ->
+ List.iter (add_reg) args;
+ retr.(i) <- (match fn with
+ | Datatypes.Coq_inl reg ->
+ add_reg reg;
+ (reg, true)::(map_true args)
+ | _ -> map_true args)
+ | Ibuiltin(_,args,dest,_) ->
+ let rec bar = function
+ | AST.BA r -> add_reg r;
+ retr.(i) <- (r, true)::retr.(i)
+ | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) ->
+ bar hi; bar lo
+ | _ -> ()
+ in
+ List.iter (bar) args;
+ let rec bad = function
+ | AST.BR r -> retr.(i) <- (r, false)::retr.(i)
+ | AST.BR_splitlong (hi, lo) ->
+ bad hi; bad lo
+ | _ -> ()
+ in
+ bad dest;
+ | Ijumptable (reg,_) | Ireturn (Some reg) ->
+ add_reg reg;
+ retr.(i) <- [reg, true]
+ | _ -> ()
+ ) seqa;
+ (* print_string "mentions\n";
+ * Array.iteri (fun i l ->
+ * print_int i;
+ * print_string ": [";
+ * List.iter (fun (r, b) ->
+ * print_int (Camlcoq.P.to_int r);
+ * print_string ":";
+ * print_string (if b then "a:" else "d");
+ * if b then print_int (snd (Hashtbl.find retl r));
+ * print_string ", "
+ * ) l;
+ * print_string "]\n";
+ * flush stdout;
+ * ) retr; *)
+ retl, retr
+
+
+let get_live_regs_entry (sb : superblock) code =
+ (if !Clflags.option_debug_compcert > 6
+ then debug_flag := true);
+ debug "getting live regs for superblock:\n";
+ print_superblock sb code;
+ debug "\n";
+ let seqa = Array.map (fun i ->
+ (match PTree.get i code with
+ | Some ii -> ii
+ | None -> failwith "RTLpathScheduleraux.get_live_regs_entry"
+ ),
+ (match PTree.get i sb.liveins with
+ | Some s -> s
+ | None -> Regset.empty))
+ sb.instructions in
+ let ret =
+ Array.fold_right (fun (ins, liveins) regset_i ->
+ let regset = Registers.Regset.union liveins regset_i in
+ match ins with
+ | Inop _ -> regset
+ | Iop (_, args, dest, _)
+ | Iload (_, _, _, args, dest, _) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ (Registers.Regset.remove dest regset) args
+ | Istore (_, _, args, src, _) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ (Registers.Regset.add src regset) args
+ | Icall (_, fn, args, dest, _) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ ((match fn with
+ | Datatypes.Coq_inl reg -> (Registers.Regset.add reg)
+ | Datatypes.Coq_inr _ -> (fun x -> x))
+ (Registers.Regset.remove dest regset))
+ args
+ | Itailcall (_, fn, args) ->
+ List.fold_left (fun set reg -> Registers.Regset.add reg set)
+ (match fn with
+ | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset)
+ | Datatypes.Coq_inr _ -> regset)
+ args
+ | Ibuiltin (_, args, dest, _) ->
+ List.fold_left (fun set arg ->
+ let rec add reg set =
+ match reg with
+ | AST.BA r -> Registers.Regset.add r set
+ | AST.BA_splitlong (hi, lo)
+ | AST.BA_addptr (hi, lo) -> add hi (add lo set)
+ | _ -> set
+ in add arg set)
+ (let rec rem dest set =
+ match dest with
+ | AST.BR r -> Registers.Regset.remove r set
+ | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set)
+ | _ -> set
+ in rem dest regset)
+ args
+ | Icond (_, args, _, _, _) ->
+ List.fold_left (fun set reg ->
+ Registers.Regset.add reg set)
+ regset args
+ | Ijumptable (reg, _)
+ | Ireturn (Some reg) ->
+ Registers.Regset.add reg regset
+ | _ -> regset
+ ) seqa sb.s_output_regs
+ in debug "live in regs: ";
+ print_regset ret;
+ debug "\n";
+ debug_flag := false;
+ ret
+
(* TODO David *)
let schedule_superblock sb code =
if not !Clflags.option_fprepass
@@ -90,15 +252,22 @@ let schedule_superblock sb code =
match predicted_successor ii with
| Some _ -> 0
| None -> 1 in
+ debug "hello\n";
+ let live_regs_entry = get_live_regs_entry sb code in
+ let seqa =
+ Array.map (fun i ->
+ (match PTree.get i code with
+ | Some ii -> ii
+ | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
+ (match PTree.get i sb.liveins with
+ | Some s -> s
+ | None -> Regset.empty))
+ (Array.sub sb.instructions 0 (nr_instr-trailer_length)) in
match PrepassSchedulingOracle.schedule_sequence
- (Array.map (fun i ->
- (match PTree.get i code with
- | Some ii -> ii
- | None -> failwith "RTLpathScheduleraux.schedule_superblock"),
- (match PTree.get i sb.liveins with
- | Some s -> s
- | None -> Regset.empty))
- (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with
+ seqa
+ live_regs_entry
+ sb.typing
+ (reference_counting seqa sb.s_output_regs sb.typing) with
| None -> sb.instructions
| Some order ->
let ins' =
diff --git a/scheduling/postpass_lib/Machblock.v b/scheduling/postpass_lib/Machblock.v
index c8eadbd7..b588cca8 100644
--- a/scheduling/postpass_lib/Machblock.v
+++ b/scheduling/postpass_lib/Machblock.v
@@ -237,13 +237,13 @@ Inductive basic_step (s: list stackframe) (fb: block) (sp: val) (rs: regset) (m:
| exec_MBload_notrap1:
forall addr args rs' chunk dst,
eval_addressing ge sp addr rs##args = None ->
- rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+ rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) ->
basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m
| exec_MBload_notrap2:
forall addr args a rs' chunk dst,
eval_addressing ge sp addr rs##args = Some a ->
Mem.loadv chunk m a = None ->
- rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) ->
+ rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) ->
basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m
| exec_MBstore:
forall chunk addr args src m' a rs',
diff --git a/test/gourdinl/compare_pp.sh b/test/gourdinl/compare_pp.sh
new file mode 100755
index 00000000..09183cf9
--- /dev/null
+++ b/test/gourdinl/compare_pp.sh
@@ -0,0 +1,16 @@
+ffname=$(basename $1)
+fname=${ffname%.*}
+nopp=$fname.nopp.s
+pp=$fname.pp.s
+
+../../ccomp -fno-coalesce-mem -fno-postpass -S $1 -o $nopp
+../../ccomp -fno-coalesce-mem -fpostpass= list -S $1 -o $pp
+sed -i '1,2d' $nopp
+sed -i '1,2d' $pp
+if cmp -s $nopp $pp; then
+ echo "same!"
+else
+ echo "differents!"
+ diff -y $nopp $pp
+fi
+
diff --git a/test/gourdinl/postpass_exp.c b/test/gourdinl/postpass_exp.c
new file mode 100644
index 00000000..522ac2a6
--- /dev/null
+++ b/test/gourdinl/postpass_exp.c
@@ -0,0 +1,5 @@
+int main(int x, int y) {
+ int z = x << 32;
+ y = y - z;
+ return x + y;
+}
diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h
index f26060a7..2905938b 100644
--- a/test/monniaux/cycles.h
+++ b/test/monniaux/cycles.h
@@ -6,7 +6,7 @@
typedef uint64_t cycle_t;
#define PRcycle PRId64
-#include <../../kvx-cos/include/hal/cos_registers.h>
+#include "/opt/kalray/accesscore/kvx-cos/include/hal/cos_registers.h"
static inline void cycle_count_config(void)
{
diff --git a/test/monniaux/division/harness.c b/test/monniaux/division/harness.c
new file mode 100644
index 00000000..b6ce674d
--- /dev/null
+++ b/test/monniaux/division/harness.c
@@ -0,0 +1,82 @@
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <math.h>
+#include <assert.h>
+#include "../cycles.h"
+
+static uint32_t dm_random_uint32(void) {
+ static uint32_t current=UINT32_C(0xDEADBEEF);
+ current = ((uint64_t) current << 6) % UINT32_C(4294967291);
+ return current;
+}
+
+static uint64_t dm_biased_random_uint32(void) {
+ uint32_t flags = dm_random_uint32();
+ uint32_t r;
+ switch (flags & 15) {
+ case 0:
+ r = dm_random_uint32() & 0xFU;
+ break;
+ case 1:
+ r = dm_random_uint32() & 0xFFU;
+ break;
+ case 2:
+ r = dm_random_uint32() & 0xFFFU;
+ break;
+ case 3:
+ r = dm_random_uint32() & 0xFFFFU;
+ break;
+ case 4:
+ r = dm_random_uint32() & 0xFFFFFU;
+ break;
+ case 5:
+ r = dm_random_uint32() & 0xFFFFFFU;
+ break;
+ case 6:
+ r = dm_random_uint32() & 0xFFFFFFFU;
+ break;
+ case 7:
+ r = dm_random_uint32() & 0x3;
+ break;
+ default:
+ r = dm_random_uint32();
+ }
+ return r;
+}
+
+inline uint32_t native_udiv32(uint32_t x, uint32_t y) {
+ return x/y;
+}
+extern uint32_t my_udiv32(uint32_t x, uint32_t y);
+
+int main() {
+ cycle_t time_me=0, time_native=0;
+ cycle_count_config();
+
+ for(int i=0; i<1000; i++) {
+ uint32_t x = dm_biased_random_uint32();
+ uint32_t y = dm_biased_random_uint32();
+ if (y == 0) continue;
+
+ cycle_t cycle_a, cycle_b, cycle_c;
+
+ uint32_t q1, q2;
+ cycle_a = get_cycle();
+ q1 = native_udiv32(x, y);
+ cycle_b = get_cycle();
+ q2 = my_udiv32(x, y);
+ cycle_c = get_cycle();
+
+ if(q1 != q2) {
+ printf("ERREUR %u %u\n", q1, q2);
+ }
+
+ time_native += cycle_b - cycle_a;
+ time_me += cycle_c - cycle_b;
+ }
+
+ printf("%" PRcycle "\t%" PRcycle "\n", time_native, time_me);
+
+ return 0;
+}
diff --git a/test/monniaux/division/my_udiv32.s b/test/monniaux/division/my_udiv32.s
new file mode 100644
index 00000000..0f4fd127
--- /dev/null
+++ b/test/monniaux/division/my_udiv32.s
@@ -0,0 +1,36 @@
+ .align 8
+ .global my_udiv32
+ .type my_udiv32, @function
+my_udiv32:
+ zxwd $r1 = $r1
+ make $r3 = 0x3ff0000000000000 # 1.0
+ zxwd $r0 = $r0
+ ;;
+ floatud.rn $r5 = $r1, 0
+ ;;
+ floatuw.rn $r2 = $r1, 0
+ ;;
+ finvw $r2 = $r2
+ ;;
+
+ fwidenlwd $r2 = $r2
+ floatud.rn $r4 = $r0, 0
+ ;;
+ ffmsd $r3 = $r2, $r5
+ ;;
+ ffmad $r2 = $r2, $r3
+ ;;
+ fmuld $r2 = $r2, $r4
+ ;;
+ fixedud.rn $r2 = $r2, 0
+ ;;
+ msbfw $r0 = $r2, $r1
+ zxwd $r1 = $r2
+ addw $r2 = $r2, -1
+ ;;
+ cmoved.wltz $r0? $r1 = $r2
+ ;;
+ copyd $r0 = $r1
+ ret
+ ;;
+ .size my_udiv32, .-my_udiv32
diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c
new file mode 100644
index 00000000..9e6adacb
--- /dev/null
+++ b/test/nardino/scheduling/entry_regs.c
@@ -0,0 +1,19 @@
+#include <stdio.h>
+
+int f(int n) {
+ if (n > 0)
+ return 42;
+ else
+ return n;
+}
+
+
+int main(int argc, char *argv[]) {
+ int a=1;
+ float b=2.;
+ int c = f(a);
+ a = 3;
+ int d = f(a);
+ printf("%e, %d, %d, %d", b, a, c, d);
+ return 0;
+}
diff --git a/test/nardino/scheduling/spille_backw.c b/test/nardino/scheduling/spille_backw.c
new file mode 100644
index 00000000..1c36ee86
--- /dev/null
+++ b/test/nardino/scheduling/spille_backw.c
@@ -0,0 +1,114 @@
+int f(int k) {
+ int a1 = k;
+ int b1 = 2*a1;
+ int c = a1;
+ int a2 = k+1;
+ int b2 = 2*a2;
+ c += a2;
+ int a3 = k+2;
+ int b3 = 2*a3;
+ c += a3;
+ int a4 = k+3;
+ int b4 = 2*a4;
+ c += a4;
+ int a5 = k+4;
+ int b5 = 2*a5;
+ c += a5;
+ int a6 = k+5;
+ int b6 = 2*a6;
+ c += a6;
+ int a7 = k+6;
+ int b7 = 2*a7;
+ c += a7;
+ int a8 = k+7;
+ int b8 = 2*a8;
+ c += a8;
+ int a9 = k+8;
+ int b9 = 2*a9;
+ c += a9;
+ int a10 = k+9;
+ int b10 = 2*a10;
+ c += a10;
+ int a11 = k+10;
+ int b11 = 2*a11;
+ c += a11;
+ int a12 = k+11;
+ int b12 = 2*a12;
+ c += a12;
+ int a13 = k+12;
+ int b13 = 2*a13;
+ c += a13;
+ int a14 = k+13;
+ int b14 = 2*a14;
+ c += a14;
+ int a15 = k+14;
+ int b15 = 2*a15;
+ c += a15;
+ int a16 = k+15;
+ int b16 = 2*a16;
+ c += a16;
+ int a17 = k+16;
+ int b17 = 2*a17;
+ c += a17;
+ int a18 = k+17;
+ int b18 = 2*a18;
+ c += a18;
+ int a19 = k+18;
+ int b19 = 2*a19;
+ c += a19;
+ int a20 = k+19;
+ int b20 = 2*a20;
+ c += a20;
+ int a21 = k+20;
+ int b21 = 2*a21;
+ c += a21;
+ int a22 = k+21;
+ int b22 = 2*a22;
+ c += a22;
+ int a23 = k+22;
+ int b23 = 2*a23;
+ c += a23;
+ int a24 = k+23;
+ int b24 = 2*a24;
+ c += a24;
+ int a25 = k+24;
+ int b25 = 2*a25;
+ c += a25;
+ int a26 = k+25;
+ int b26 = 2*a26;
+ c += a26;
+ return
+ b13+
+ b12+
+ b11+
+ b10+
+ b9+
+ b8+
+ b7+
+ b6+
+ b5+
+ b4+
+ b3+
+ b2+
+ b1+
+ b14+
+ b15+
+ b16+
+ b17+
+ b18+
+ b19+
+ b20+
+ b21+
+ b22+
+ b23+
+ b23+
+ b24+
+ b25+
+ b26+
+ c;
+}
+
+int main(int argc, char *argv[]) {
+ f(3);
+ return 0;
+}
diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c
new file mode 100644
index 00000000..db88588b
--- /dev/null
+++ b/test/nardino/scheduling/spille_forw.c
@@ -0,0 +1,166 @@
+#include <stdio.h>
+
+int f(int n, float * arr) {
+ float a1 = (float) n;
+ float b1 = 2.*a1;
+ float c = a1;
+ float a2 = (float) n+1;
+ float b2 = 2.*a2;
+ c += a2;
+ float a3 = (float) n+2;
+ float b3 = 2.*a3;
+ c += a3;
+ float a4 = (float) n+3;
+ float b4 = 2.*a4;
+ c += a4;
+ float a5 = (float) n+4;
+ float b5 = 2.*a5;
+ c += a5;
+ float a6 = (float) n+5;
+ float b6 = 2.*a6;
+ c += a6;
+ float a7 = (float) n+6;
+ float b7 = 2.*a7;
+ c += a7;
+ float a8 = (float) n+7;
+ float b8 = 2.*a8;
+ c += a8;
+ float a9 = (float) n+8;
+ float b9 = 2.*a9;
+ c += a9;
+ float a10 = (float) n+9;
+ float b10 = 2.*a10;
+ c += a10;
+ float a11 = (float) n+10;
+ float b11 = 2.*a11;
+ c += a11;
+ float a12 = (float) n+11;
+ float b12 = 2.*a12;
+ c += a12;
+ float a13 = (float) n+12;
+ float b13 = 2.*a13;
+ c += a13;
+ float a14 = (float) n+13;
+ float b14 = 2.*a14;
+ c += a14;
+ float a15 = (float) n+14;
+ float b15 = 2.*a15;
+ c += a15;
+ float a16 = (float) n+15;
+ float b16 = 2.*a16;
+ c += a16;
+ float a17 = (float) n+16;
+ float b17 = 2.*a17;
+ c += a17;
+ float a18 = (float) n+17;
+ float b18 = 2.*a18;
+ c += a18;
+ float a19 = (float) n+18;
+ float b19 = 2.*a19;
+ c += a19;
+ float a20 = (float) n+19;
+ float b20 = 2.*a20;
+ c += a20;
+ float a21 = (float) n+20;
+ float b21 = 2.*a21;
+ c += a21;
+ float a22 = (float) n+21;
+ float b22 = 2.*a22;
+ c += a22;
+ float a23 = (float) n+22;
+ float b23 = 2.*a23;
+ c += a23;
+ float a24 = (float) n+23;
+ float b24 = 2.*a24;
+ c += a24;
+ float a25 = (float) n+24;
+ float b25 = 2.*a25;
+ c += a25;
+ float a26 = (float) n+25;
+ float b26 = 2.*a26;
+ c += a26;
+ float a27 = (float) n+26;
+ float b27 = 2.*a27;
+ c += a27;
+ float a28 = (float) n+27;
+ float b28 = 2.*a28;
+ c += a28;
+ float a29 = (float) n+28;
+ float b29 = 2.*a29;
+ c += a29;
+ float a30 = (float) n+29;
+ float b30 = 2.*a30;
+ c += a30;
+ /* arr[0] = a1; */
+ /* arr[1] = a2; */
+ /* arr[2] = a3; */
+ /* arr[3] = a4; */
+ /* arr[4] = a5; */
+ /* arr[5] = a6; */
+ /* arr[6] = a7; */
+ /* arr[7] = a8; */
+ /* arr[8] = a9; */
+ /* arr[9] = a10; */
+ /* arr[10] = a11; */
+ /* arr[11] = a12; */
+ /* arr[12] = a13; */
+ /* arr[13] = a14; */
+ /* arr[14] = a15; */
+ /* arr[15] = a16; */
+ /* arr[16] = a17; */
+ /* arr[17] = a18; */
+ /* arr[18] = a19; */
+ /* arr[19] = a20; */
+ /* arr[20] = a21; */
+ /* arr[21] = a22; */
+ /* arr[22] = a23; */
+ /* arr[23] = a24; */
+ /* arr[24] = a25; */
+ /* arr[25] = a26; */
+ /* arr[26] = a27; */
+ /* arr[27] = a28; */
+ /* arr[28] = a29; */
+ /* arr[29] = a30; */
+ return c +
+ b1+
+ b2+
+ b3+
+ b4+
+ b5+
+ b6+
+ b7+
+ b8+
+ b9+
+ b10+
+ b11+
+ b12+
+ b13+
+ b14+
+ b15+
+ b16+
+ b17+
+ b18+
+ b19+
+ b20+
+ b21+
+ b22+
+ b23+
+ b24+
+ b25+
+ b26+
+ b27+
+ b28+
+ b29+
+ b30;
+}
+
+
+
+
+
+
+int main(int argc, char *argv[]) {
+ float arr[30];
+ f(5, arr);
+ return 0;
+}
diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml
index ee2674bf..3b63b80d 120000..100644
--- a/x86/ExpansionOracle.ml
+++ b/x86/ExpansionOracle.ml
@@ -1 +1,17 @@
-../aarch64/ExpansionOracle.ml \ No newline at end of file
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open RTLpathCommon
+
+let expanse (sb : superblock) code pm = (code, pm)
+
+let find_last_node_reg c = ()
diff --git a/x86/PrepassSchedulingOracle.ml b/x86/PrepassSchedulingOracle.ml
index 7b6a1b14..42a3da23 100644
--- a/x86/PrepassSchedulingOracle.ml
+++ b/x86/PrepassSchedulingOracle.ml
@@ -2,4 +2,5 @@ open RTL
open Registers
(* Do not do anything *)
-let schedule_sequence (seqa : (instruction*Regset.t) array) = None
+let schedule_sequence (seqa : (instruction*Regset.t) array)
+ live_regs_entry typing reference = None
diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml
index 2000f96a..00e70f65 100644
--- a/x86/TargetPrinter.ml
+++ b/x86/TargetPrinter.ml
@@ -95,9 +95,6 @@ let z oc n = output_string oc (Z.to_string n)
let data_pointer = if Archi.ptr64 then ".quad" else ".long"
-(* The comment deliminiter *)
-let comment = "#"
-
(* Base-2 log of a Caml integer *)
let rec log2 n =
assert (n > 0);
@@ -106,6 +103,7 @@ let rec log2 n =
(* System dependent printer functions *)
module type SYSTEM =
sig
+ val comment: string
val raw_symbol: out_channel -> string -> unit
val symbol: out_channel -> P.t -> unit
val label: out_channel -> int -> unit
@@ -124,6 +122,9 @@ module type SYSTEM =
module ELF_System : SYSTEM =
struct
+ (* The comment delimiter *)
+ let comment = "#"
+
let raw_symbol oc s =
fprintf oc "%s" s
@@ -219,6 +220,10 @@ module ELF_System : SYSTEM =
module MacOS_System : SYSTEM =
struct
+ (* The comment delimiter.
+ `##` instead of `#` to please the Clang assembler. *)
+ let comment = "##"
+
let raw_symbol oc s =
fprintf oc "_%s" s
@@ -280,6 +285,9 @@ module MacOS_System : SYSTEM =
module Cygwin_System : SYSTEM =
struct
+ (* The comment delimiter *)
+ let comment = "#"
+
let symbol_prefix =
if Archi.ptr64 then "" else "_"