diff options
64 files changed, 2908 insertions, 532 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9f407912..b3bb418f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -14,6 +14,8 @@ check-admitted: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' @@ -35,6 +37,8 @@ build_x86_64: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' @@ -58,6 +62,8 @@ build_ia32: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' @@ -76,13 +82,13 @@ build_aarch64: script: - ./config_aarch64.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/aarch64-linux-gnu/lib - - sudo ln -s /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 /lib - - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64' EXECUTE='qemu-aarch64' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64' CCOMPOPTS='-static' TARGET_CFLAGS='-static' + - make -C test SIMU='qemu-aarch64 -L /usr/aarch64-linux-gnu' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' @@ -101,10 +107,8 @@ build_arm: script: - ./config_arm.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/arm-linux-gnueabi/lib - - sudo ln -s /usr/arm-linux-gnueabi/lib/ld-linux.so.3 /lib # FIXME: UGLY ! - - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + - make -C test SIMU='qemu-arm -L /usr/arm-linux-gnueabi' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always @@ -129,10 +133,8 @@ build_armhf: script: - ./config_armhf.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/arm-linux-gnueabihf/lib - - sudo ln -s /usr/arm-linux-gnueabihf/lib/ld-linux-armhf.so.3 /lib # FIXME: UGLY ! - - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + - make -C test SIMU='qemu-arm -L /usr/arm-linux-gnueabihf' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always @@ -144,47 +146,59 @@ build_armhf: when: always - when: manual -# build_ppc: -# stage: build -# image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda -# before_script: -# - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update -# - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user -# - eval `opam config env` -# - opam update -# - opam install -y menhir -# script: -# - ./config_ppc.sh -# - make -j "$NJOBS" -# rules: -# - if: '$CI_COMMIT_BRANCH == "kvx-work"' -# when: always -# - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' -# when: always -# - if: '$CI_COMMIT_BRANCH == "master"' -# when: always -# - when: manual +build_ppc: + stage: build + image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-powerpc-linux-gnu wget ninja-build libglib2.0-dev libpixman-1-dev + - wget --no-verbose https://download.qemu.org/qemu-6.0.0.tar.xz + - tar xJf qemu-6.0.0.tar.xz + - (cd qemu-6.0.0 && ./configure --target-list=ppc-linux-user && make && sudo make install) + - eval `opam config env` + - opam update + - opam install -y menhir + script: + - ./config_ppc.sh + - make -j "$NJOBS" + - qemu-ppc --version + - make -C test SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' BITS=32 + rules: + - if: '$CI_COMMIT_BRANCH == "kvx-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual -# build_ppc64: -# stage: build -# image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda -# before_script: -# - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update -# - sudo apt-get -y install gcc-powerpc64-linux-gnu -# - eval `opam config env` -# - opam update -# - opam install -y menhir -# script: -# - ./config_ppc64.sh -# - make -j "$NJOBS" -# rules: -# - if: '$CI_COMMIT_BRANCH == "kvx-work"' -# when: always -# - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' -# when: always -# - if: '$CI_COMMIT_BRANCH == "master"' -# when: always -# - when: manual +build_ppc64: + stage: build + image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-powerpc64-linux-gnu + - eval `opam config env` + - opam update + - opam install -y menhir + script: + - ./config_ppc64.sh + - make -j "$NJOBS" + #- make -C test SIMU='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test + #- ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' BITS=32 + rules: + - if: '$CI_COMMIT_BRANCH == "kvx-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual build_rv64: stage: build @@ -198,13 +212,13 @@ build_rv64: script: - ./config_rv64.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/riscv64-linux-gnu/lib - - sudo ln -s /usr/riscv64-linux-gnu/lib/ld-linux-riscv64-lp64d.so.1 /lib - - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64' EXECUTE='qemu-riscv64' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64' CCOMPOPTS='-static' TARGET_CFLAGS='-static' + - make -C test SIMU='qemu-riscv64 -L /usr/riscv64-linux-gnu' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' @@ -144,7 +144,8 @@ BACKEND=\ FirstNop.v FirstNopproof.v \ Allnontrap.v Allnontrapproof.v \ Allocation.v Allocationproof.v \ - Tunneling.v Tunnelingproof.v \ + LTLTunneling.v LTLTunnelingproof.v \ + RTLTunneling.v RTLTunnelingproof.v \ Linear.v Lineartyping.v \ Linearize.v Linearizeproof.v \ CleanupLabels.v CleanupLabelsproof.v \ diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml index 41db3bd4..98e461eb 100644 --- a/aarch64/Machregsaux.ml +++ b/aarch64/Machregsaux.ml @@ -19,3 +19,6 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +(* number of available registers per class *) +let nr_regs = [| 29; 32 |] diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli index 01b0f9fd..23ac1c9a 100644 --- a/aarch64/Machregsaux.mli +++ b/aarch64/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index cde3e7a7..6f784238 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -507,6 +507,9 @@ let build_problem bb = { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None; instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 1935e785..d7e80cd9 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -1,14 +1,26 @@ +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* Nicolas Nardino ENS-Lyon, VERIMAG *) +(* *) +(* *) +(* *************************************************************) + open AST open BTL open Maps open InstructionScheduler open Registers open PrepassSchedulingOracleDeps -open RTLcommonaux - +open PrintBTL +open DebugPrint + let use_alias_analysis () = false -let build_constraints_and_resources (opweights : opweights) insts btl = +let build_constraints_and_resources (opweights : opweights) seqa btl = let last_reg_reads : int list PTree.t ref = ref PTree.empty and last_reg_write : (int * int) PTree.t ref = ref PTree.empty and last_mem_reads : int list ref = ref [] @@ -16,8 +28,7 @@ let build_constraints_and_resources (opweights : opweights) insts btl = and last_branch : int option ref = ref None and last_non_pipelined_op : int array = Array.make opweights.nr_non_pipelined_units (-1) - and latency_constraints : latency_constraint list ref = ref [] - and resources = ref [] in + and latency_constraints : latency_constraint list ref = ref [] in let add_constraint instr_from instr_to latency = assert (instr_from <= instr_to); assert (latency >= 0); @@ -60,7 +71,29 @@ let build_constraints_and_resources (opweights : opweights) insts btl = last_reg_write := PTree.set reg (i, latency) !last_reg_write; last_reg_reads := PTree.remove reg !last_reg_reads in - let add_input_regs i regs = List.iter (add_input_reg i) regs + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> + add_builtin_res i hi; + add_builtin_res i lo + in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack (_, _) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal (_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong (hi, lo) -> + add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr (a1, a2) -> + add_builtin_arg i a1; + add_builtin_arg i a2 and irreversible_action i = match !last_branch with None -> () | Some j -> add_constraint j i 1 in @@ -77,58 +110,122 @@ let build_constraints_and_resources (opweights : opweights) insts btl = resources in Array.iteri - (fun i inst -> + (fun i (inst, other_uses) -> + List.iter (fun use -> add_input_reg i use) (Regset.elements other_uses); match inst with - | Bnop _ -> - let rs = Array.map (fun _ -> 0) opweights.pipelined_resource_bounds in - resources := rs :: !resources + | Bnop _ -> () | Bop (op, lr, rd, _) -> add_non_pipelined_resources i (opweights.non_pipelined_resources_of_op op (List.length lr)); if Op.is_trapping_op op then irreversible_action i; add_input_regs i lr; - add_output_reg i (opweights.latency_of_op op (List.length lr)) rd; - let rs = opweights.resources_of_op op (List.length lr) in - resources := rs :: !resources + add_output_reg i (opweights.latency_of_op op (List.length lr)) rd | Bload (trap, chk, addr, lr, rd, _) -> if trap = TRAP then irreversible_action i; add_input_mem i; add_input_regs i lr; add_output_reg i (opweights.latency_of_load trap chk addr (List.length lr)) - rd; - let rs = opweights.resources_of_load trap chk addr (List.length lr) in - resources := rs :: !resources + rd | Bstore (chk, addr, lr, src, _) -> irreversible_action i; add_input_regs i lr; add_input_reg i src; - add_output_mem i; - let rs = opweights.resources_of_store chk addr (List.length lr) in - resources := rs :: !resources + add_output_mem i | Bcond (cond, lr, BF (Bgoto s, _), ibnot, _) -> - let live = (get_some @@ PTree.get s btl).input_regs in - add_input_regs i (Regset.elements live); set_branch i; add_input_mem i; - add_input_regs i lr; - let rs = opweights.resources_of_cond cond (List.length lr) in - resources := rs :: !resources + add_input_regs i lr | Bcond (_, _, _, _, _) -> - failwith "get_simple_dependencies: invalid Bcond" - | BF (_, _) -> failwith "get_simple_dependencies: BF" - | Bseq (_, _) -> failwith "get_simple_dependencies: Bseq") - insts; - (!latency_constraints, Array.of_list (List.rev !resources)) + failwith "build_constraints_and_resources: invalid Bcond" + | BF (Bcall (signature, ef, lr, rd, _), _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> ()); + add_input_mem i; + add_input_regs i lr; + add_output_reg i (opweights.latency_of_call signature ef) rd; + add_output_mem i; + failwith "build_constraints_and_resources: invalid Bcall" + | BF (Btailcall (signature, ef, lr), _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> ()); + add_input_mem i; + add_input_regs i lr; + failwith "build_constraints_and_resources: invalid Btailcall" + | BF (Bbuiltin (ef, lr, rd, _), _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) lr; + add_builtin_res i rd; + add_output_mem i; + failwith "build_constraints_and_resources: invalid Bbuiltin" + | BF (Bjumptable (lr, _), _) -> + set_branch i; + add_input_reg i lr; + failwith "build_constraints_and_resources: invalid Bjumptable" + | BF (Breturn (Some r), _) -> + set_branch i; + add_input_reg i r; + failwith "build_constraints_and_resources: invalid Breturn Some" + | BF (Breturn None, _) -> + set_branch i; + failwith "build_constraints_and_resources: invalid Breturn None" + | BF (Bgoto _, _) -> + failwith "build_constraints_and_resources: invalid Bgoto" + | Bseq (_, _) -> failwith "build_constraints_and_resources: Bseq") + seqa; + !latency_constraints -let define_problem (opweights : opweights) ibf btl = - let simple_deps, resources = - build_constraints_and_resources opweights ibf btl - in +let resources_of_instruction (opweights : opweights) = function + | Bnop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Bop (op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Bload (trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Bstore (chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | BF (Bcall (signature, ef, inputs, output, _), _) -> + opweights.resources_of_call signature ef + | BF (Bbuiltin (ef, builtin_inputs, builtin_output, _), _) -> + opweights.resources_of_builtin ef + | Bcond (cond, args, _, _, _) -> + opweights.resources_of_cond cond (List.length args) + | BF (Btailcall _, _) | BF (Bjumptable _, _) | BF (Breturn _, _) -> + opweights.pipelined_resource_bounds + | BF (Bgoto _, _) | Bseq (_, _) -> + failwith "resources_of_instruction: invalid btl instruction" + +let print_sequence pp seqa = + Array.iteri + (fun i (inst, other_uses) -> + debug "i=%d\n inst = " i; + print_btl_inst pp inst; + debug "\n other_uses="; + print_regset other_uses; + debug "\n") + seqa + +let length_of_chunk = function + | Mint8signed | Mint8unsigned -> 1 + | Mint16signed | Mint16unsigned -> 2 + | Mint32 | Mfloat32 | Many32 -> 4 + | Mint64 | Mfloat64 | Many64 -> 8 + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa btl = + let simple_deps = build_constraints_and_resources opweights seqa btl in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; - instruction_usages = resources; + live_regs_entry = live_entry_regs; + typing; + reference_counting = Some reference_counting; + instruction_usages = + Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = simple_deps; } @@ -154,27 +251,35 @@ let zigzag_scheduler problem early_ones = { problem with latency_constraints = !constraints' } | None -> None -let prepass_scheduler_by_name name problem insts = +let prepass_scheduler_by_name name problem seqa = match name with | "zigzag" -> let early_ones = Array.map - (fun inst -> + (fun (inst, _) -> match inst with Bcond (_, _, _, _, _) -> true | _ -> false) - insts + seqa in zigzag_scheduler problem early_ones | _ -> scheduler_by_name name problem -let schedule_sequence insts btl = +let schedule_sequence seqa btl (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) reference = let opweights = OpWeights.get_opweights () in try - if Array.length insts <= 1 then None + if Array.length seqa <= 1 then None else - let nr_instructions = Array.length insts in - let problem = define_problem opweights insts btl in + let nr_instructions = Array.length seqa in + if !Clflags.option_debug_compcert > 6 then + Printf.printf "prepass scheduling length = %d\n" nr_instructions; + let problem = + define_problem opweights live_regs_entry typing reference seqa btl + in + if !Clflags.option_debug_compcert > 7 then ( + print_sequence stdout seqa; + print_problem stdout problem); match - prepass_scheduler_by_name !Clflags.option_fprepass_sched problem insts + prepass_scheduler_by_name !Clflags.option_fprepass_sched problem seqa with | None -> Printf.printf "no solution in prepass scheduling\n"; @@ -190,5 +295,3 @@ let schedule_sequence insts btl = with Failure s -> Printf.printf "failure in prepass scheduling: %s\n" s; None - - diff --git a/backend/CSE2proof.v b/backend/CSE2proof.v index 49dbd409..252240c9 100644 --- a/backend/CSE2proof.v +++ b/backend/CSE2proof.v @@ -1399,7 +1399,7 @@ Proof. 2: discriminate. econstructor; split. { - eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto. + eapply exec_Iop with (v := Vundef); eauto. simpl. rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0. { @@ -1472,7 +1472,7 @@ Proof. 2: discriminate. econstructor; split. { - eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto. + eapply exec_Iop with (v := Vundef); eauto. simpl. rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0. { diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v index d53cf604..382c9f4c 100644 --- a/backend/CSE3analysisproof.v +++ b/backend/CSE3analysisproof.v @@ -502,7 +502,7 @@ Section SOUNDNESS. end with | Some dat => v' = dat - | None => v' = default_notrap_load_value chunk + | None => v' = Vundef end end. diff --git a/backend/CSEdomain.v b/backend/CSEdomain.v index f78e1d25..9641d012 100644 --- a/backend/CSEdomain.v +++ b/backend/CSEdomain.v @@ -113,12 +113,12 @@ Inductive rhs_eval_to (valu: valuation) (ge: genv) (sp: val) (m: mem): (* | load_notrap1_eval_to: forall chunk addr vl, eval_addressing ge sp addr (map valu vl) = None -> rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl) - (default_notrap_load_value chunk) + Vundef | load_notrap2_eval_to: forall chunk addr vl a, eval_addressing ge sp addr (map valu vl) = Some a -> Mem.loadv chunk m a = None -> rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl) - (default_notrap_load_value chunk) *). + Vundef *). Inductive equation_holds (valu: valuation) (ge: genv) (sp: val) (m: mem): equation -> Prop := diff --git a/backend/CSEproof.v b/backend/CSEproof.v index cf51f5a2..556b44b3 100644 --- a/backend/CSEproof.v +++ b/backend/CSEproof.v @@ -402,7 +402,7 @@ Lemma add_load_holds_none1: forall valu1 ge sp rs m n addr (args: list reg) chunk dst, numbering_holds valu1 ge sp rs m n -> eval_addressing ge sp addr rs##args = None -> - exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst chunk addr args). + exists valu2, numbering_holds valu2 ge sp (rs#dst <- Vundef) m (add_load n dst chunk addr args). Proof. unfold add_load; intros. destruct (valnum_regs n args) as [n1 vl] eqn:VN. @@ -418,7 +418,7 @@ Lemma add_load_holds_none2: numbering_holds valu1 ge sp rs m n -> eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst NOTRAP chunk addr args). + exists valu2, numbering_holds valu2 ge sp (rs#dst <- Vundef) m (add_load n dst NOTRAP chunk addr args). Proof. unfold add_load; intros. destruct (valnum_regs n args) as [n1 vl] eqn:VN. @@ -1210,7 +1210,6 @@ Proof. exists valu1. apply set_unknown_holds. assumption. - unfold default_notrap_load_value. apply set_reg_lessdef; eauto. } { diff --git a/backend/Deadcodeproof.v b/backend/Deadcodeproof.v index b51d6cce..be20af0b 100644 --- a/backend/Deadcodeproof.v +++ b/backend/Deadcodeproof.v @@ -845,7 +845,6 @@ Ltac UseTransfer := eapply match_succ_states; eauto. simpl; auto. apply eagree_update; auto. rewrite is_int_zero_sound by auto. - unfold default_notrap_load_value. constructor. + (* preserved *) exploit eval_addressing_lessdef_none. eapply add_needs_all_lessdef; eauto. eassumption. @@ -878,7 +877,6 @@ Ltac UseTransfer := eapply match_succ_states; eauto. simpl; auto. apply eagree_update; auto. rewrite is_int_zero_sound by auto. - unfold default_notrap_load_value. constructor. + (* preserved *) exploit eval_addressing_lessdef. eapply add_needs_all_lessdef; eauto. eauto. diff --git a/backend/LTL.v b/backend/LTL.v index 3edd60a2..a382ef0e 100644 --- a/backend/LTL.v +++ b/backend/LTL.v @@ -217,14 +217,14 @@ Inductive step: state -> trace -> state -> Prop := E0 (Block s f sp bb rs' m) | exec_Lload_notrap1: forall s f sp chunk addr args dst bb rs m rs', eval_addressing ge sp addr (reglist rs args) = None -> - rs' = Locmap.set (R dst) (default_notrap_load_value chunk) + rs' = Locmap.set (R dst) Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m) E0 (Block s f sp bb rs' m) | exec_Lload_notrap2: forall s f sp chunk addr args dst bb rs m a rs', eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.loadv chunk m a = None -> - rs' = Locmap.set (R dst) (default_notrap_load_value chunk) + rs' = Locmap.set (R dst) Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m) E0 (Block s f sp bb rs' m) diff --git a/backend/Tunneling.v b/backend/LTLTunneling.v index c849ea92..4b404724 100644 --- a/backend/Tunneling.v +++ b/backend/LTLTunneling.v @@ -77,7 +77,7 @@ Definition UF := PTree.t (node * Z). (* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) Axiom branch_target: LTL.function -> UF. -Extract Constant branch_target => "Tunnelingaux.branch_target". +Extract Constant branch_target => "LTLTunnelingaux.branch_target". Local Open Scope error_monad_scope. diff --git a/backend/LTLTunnelingaux.ml b/backend/LTLTunnelingaux.ml new file mode 100644 index 00000000..c3b8cf82 --- /dev/null +++ b/backend/LTLTunnelingaux.ml @@ -0,0 +1,108 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* + +This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function, +and computes their target node with the distance (ie the number of cummulated nops) toward this target. + +See [LTLTunneling.v] + +*) + +open Coqlib +open LTL +open Maps +open Camlcoq +open Tunnelinglibs + +module LANG = struct + type code_unit = LTL.bblock + type funct = LTL.coq_function +end + +module OPT = struct + let langname = "LTL" + let limit_tunneling = None + let debug_flag = ref false + let final_dump = false +end + +module Partial = Tunnelinglibs.Tunneling(LANG)(OPT) + +module FUNS = struct + let build_simplified_cfg c acc pc bb = + match bb with + | Lbranch s :: _ -> + let ns = get_node c s in + set_branch c pc ns; + acc + | Lcond (_, _, s1, s2, _) :: _ -> + c.num_rems <- c.num_rems + 1; + let ns1 = get_node c s1 in + let ns2 = get_node c s2 in + let npc = get_node c pc in + npc.inst <- COND(ns1, ns2); + npc::acc + | _ -> acc + + let print_code_unit c println (pc, bb) = + match bb with + | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false + | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false + | _ -> Partial.debug "%d " pc; true + + let fn_code f = f.fn_code + let fn_entrypoint f = f.fn_entrypoint + + + (*************************************************************) + (* Copy-paste of the extracted code of the verifier *) + (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + + let check_code_unit td pc bb = + match PTree.get pc td with + | Some p -> + let (tpc, dpc) = p in + let dpc0 = dpc in + (match bb with + | [] -> + raise (BugOnPC (P.to_int pc)) + | i :: _ -> + (match i with + | Lbranch s -> + let (ts, ds) = get td s in + if peq tpc ts + then if zlt ds dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | Lcond (_, _, s1, s2, _) -> + let (ts1, ds1) = get td s1 in + let (ts2, ds2) = get td s2 in + if peq tpc ts1 + then if peq tpc ts2 + then if zlt ds1 dpc0 + then if zlt ds2 dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | _ -> + raise (BugOnPC (P.to_int pc)))) + | None -> () +end + +module T = Partial.T(FUNS) +let branch_target = T.branch_target + diff --git a/backend/Tunnelingproof.v b/backend/LTLTunnelingproof.v index 3bc92f75..d36d3c76 100644 --- a/backend/Tunnelingproof.v +++ b/backend/LTLTunnelingproof.v @@ -17,7 +17,7 @@ Require Import Coqlib Maps Errors. Require Import AST Linking. Require Import Values Memory Events Globalenvs Smallstep. Require Import Op Locations LTL. -Require Import Tunneling. +Require Import LTLTunneling. Local Open Scope nat. diff --git a/backend/Linear.v b/backend/Linear.v index 1443f795..cb11f7dc 100644 --- a/backend/Linear.v +++ b/backend/Linear.v @@ -170,7 +170,7 @@ Inductive step: state -> trace -> state -> Prop := forall s f sp chunk addr args dst b rs m rs', eval_addressing ge sp addr (reglist rs args) = None -> rs' = Locmap.set (R dst) - (default_notrap_load_value chunk) + Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m) E0 (State s f sp b rs' m) @@ -179,7 +179,7 @@ Inductive step: state -> trace -> state -> Prop := eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.loadv chunk m a = None -> rs' = Locmap.set (R dst) - (default_notrap_load_value chunk) + Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m) E0 (State s f sp b rs' m) diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v index 22658fb7..cf903aad 100644 --- a/backend/Lineartyping.v +++ b/backend/Lineartyping.v @@ -338,14 +338,12 @@ Local Opaque mreg_type. simpl in *; InvBooleans. econstructor; eauto. apply wt_setreg. eapply Val.has_subtype; eauto. - unfold default_notrap_load_value. constructor. apply wt_undef_regs; auto. - (* load notrap2 *) simpl in *; InvBooleans. econstructor; eauto. apply wt_setreg. eapply Val.has_subtype; eauto. - unfold default_notrap_load_value. constructor. apply wt_undef_regs; auto. - (* store *) diff --git a/backend/Mach.v b/backend/Mach.v index 1c6fdb18..2cfd738d 100644 --- a/backend/Mach.v +++ b/backend/Mach.v @@ -330,14 +330,14 @@ Inductive step: state -> trace -> state -> Prop := | exec_Mload_notrap1: forall s f sp chunk addr args dst c rs m rs', eval_addressing ge sp addr rs##args = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m) E0 (State s f sp c rs' m) | exec_Mload_notrap2: forall s f sp chunk addr args dst c rs m a rs', eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m) E0 (State s f sp c rs' m) | exec_Mstore: diff --git a/backend/RTL.v b/backend/RTL.v index 31b5cf99..fe350adf 100644 --- a/backend/RTL.v +++ b/backend/RTL.v @@ -225,14 +225,14 @@ Inductive step: state -> trace -> state -> Prop := (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') -> eval_addressing ge sp addr rs##args = None -> step (State s f sp pc rs m) - E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m) + E0 (State s f sp pc' (rs#dst <- Vundef) m) | exec_Iload_notrap2: forall s f sp pc rs m chunk addr args dst pc' a, (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') -> eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None-> step (State s f sp pc rs m) - E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m) + E0 (State s f sp pc' (rs#dst <- Vundef) m) | exec_Istore: forall s f sp pc rs m chunk addr args src pc' a m', (fn_code f)!pc = Some(Istore chunk addr args src pc') -> diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v new file mode 100644 index 00000000..df663f48 --- /dev/null +++ b/backend/RTLTunneling.v @@ -0,0 +1,126 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* TODO: Proper author information *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Branch tunneling for the RTL representation *) + +(* rq: ce code est commenté avec des remarques sur des éléments du code pas évidents, sur lequels j'ai buté, ou qui changent du tunneling LTL. Ils pourront être enlevés ultérieurement *) + +Require Import Coqlib Maps Errors. +Require Import AST. +Require Import RTL. + +(* This is a port of tunneling for LTL. See LTLTunneling.v *) + +Definition UF := PTree.t (node * Z). + +(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) +Axiom branch_target: RTL.function -> UF. +Extract Constant branch_target => "RTLTunnelingaux.branch_target". +(* TODO: add an extraction command to link branch_target with its implementation *) + +Local Open Scope error_monad_scope. + +Definition get (td: UF) (pc: node): node*Z := + match td!pc with (* rq: notation pour `PTree.get pc td` *) + | Some (t,d) => (t,Z.abs d) + | None => (pc,0) + end. + +(* rq: "coerce" la structure d'abre UF en une fonction qui retrouve le représentant canonique de la classe d'un noeud *) +Definition target (td: UF) (pc: node): node := fst (get td pc). +Coercion target: UF >-> Funclass. + +(* we check that the domain of [td] is included in the domain of [c] *) +(* rq: pas de basic block en RTL *) +Definition check_included (td: UF) (c: code): option instruction + := PTree.fold (fun (ok:option instruction) pc _ => if ok then c!pc else None) td (Some (Inop xH)). +(* rq: PTree.fold replie l'arbre avec un parcours préfixe. La fonction qui prend en argument l'accumulateur, le label du noeud et le pointeur (`positive`) vers ce noeud *) + +(* we check the validity of targets and their bound: +* the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents. +*) +Definition check_instr (td: UF) (pc: node) (i: instruction): res unit := + match td!pc with + | None => OK tt (* rq: le type `unit` est le singleton {`tt`} *) + | Some (tpc, dpc) => + let dpc := Z.abs dpc in + match i with + | Inop s => + let (ts,ds) := get td s in + if peq tpc ts then + if zlt ds dpc then OK tt + else Error (msg "bad distance in Inop") + else Error (msg "invalid skip of Inop") + | Icond _ _ ifso ifnot _ => + let (tso,dso) := get td ifso in + let (tnot,dnot) := get td ifnot in + if peq tpc tso then + if peq tpc tnot then + if zlt dso dpc then + if zlt dnot dpc then OK tt + else Error (msg "bad distance on else branch") + else Error (msg "bad distance on then branch") + else Error (msg "invalid skip of else branch") + else Error (msg "invalid skip of then branch") + | _ => Error (msg "cannot skip this instruction") + end + end. + +Definition check_code (td: UF) (c: code): res unit := + PTree.fold (fun ok pc i => do _ <- ok; check_instr td pc i) c (OK tt). +(* rq: `do _ <- ok; ...` exécute le bloc à droite du ';' si `ok` est un `OK _`, sinon il passe simplement l'erreur *) + +(* The second pass rewrites all LTL instructions, replacing every + * successor [s] of every instruction by [t s], the canonical representative + * of its equivalence class in the union-find data structure. + *) + +(* rq: beaucoup plus de cas que pour LTL, car plein d'instructions ont des successeurs *) +Definition tunnel_instr (t: node -> node) (i: instruction) : instruction := + match i with + | Inop s => Inop (t s) + | Iop op args res s => Iop op args res (t s) + | Iload trap chunk addr args dst s => Iload trap chunk addr args dst (t s) + | Istore chunk addr args src s => Istore chunk addr args src (t s) + | Icall sig ros args res s => Icall sig ros args res (t s) + | Ibuiltin ef args res s => Ibuiltin ef args res (t s) + | Icond cond args ifso ifnot info => + let ifso' := t ifso in + let ifnot' := t ifnot in + if peq ifso' ifnot' + (* rq: si les deux branches se rejoignent, le if ne sert à rien *) + then Inop ifso' + else Icond cond args ifso' ifnot' info + | Ijumptable arg tbl => Ijumptable arg (List.map t tbl) + | _ => i + end. + +Definition tunnel_function (f: RTL.function): res RTL.function := + let td := branch_target f in + let c := fn_code f in + if check_included td c then + do _ <- check_code td c ; OK + (mkfunction (* rq: mkfunction construit un "Record" `RTL.function` *) + (fn_sig f) + (fn_params f) + (fn_stacksize f) + (PTree.map1 (tunnel_instr td) c) (* rq: `map1` est un map sans le pointeur *) + (td (fn_entrypoint f))) + else Error (msg "Some node of the union-find is not in the CFG"). + +Definition tunnel_fundef (f: fundef): res fundef := + transf_partial_fundef tunnel_function f. + +Definition transf_program (p: program): res program := + transform_partial_program tunnel_fundef p. + diff --git a/backend/RTLTunnelingaux.ml b/backend/RTLTunnelingaux.ml new file mode 100644 index 00000000..9333e357 --- /dev/null +++ b/backend/RTLTunnelingaux.ml @@ -0,0 +1,112 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* TODO: Proper author information *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* + +This file implements the [branch_target] oracle that identifies "nop" branches in a RTL function, +and computes their target node with the distance (ie the number of cummulated nops) toward this target. + +See [RTLTunneling.v] + +*) + +open Coqlib +open RTL +open Maps +open Camlcoq +open Tunnelinglibs + +module LANG = struct + type code_unit = RTL.instruction + type funct = RTL.coq_function +end + +module OPT = struct + let langname = "RTL" + let limit_tunneling = None + let debug_flag = ref false + let final_dump = false +end + +module Partial = Tunnelinglibs.Tunneling(LANG)(OPT) + +module FUNS = struct + let build_simplified_cfg c acc pc i = + match i with + | Inop s -> + let ns = get_node c s in + set_branch c pc ns; + incr nopcounter; + acc + | Icond (_, _, s1, s2, _) -> + c.num_rems <- c.num_rems + 1; + let ns1 = get_node c s1 in + let ns2 = get_node c s2 in + let npc = get_node c pc in + npc.inst <- COND(ns1, ns2); + npc::acc + | _ -> acc + + let print_code_unit c println (pc, i) = + match i with + | Inop s -> (if println then Partial.debug "\n"); + Partial.debug "%d:Inop %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); + false + | Icond (_, _, s1, s2, _) -> (if println then Partial.debug "\n"); + Partial.debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); + false + | _ -> Partial.debug "%d " pc; + true + + let fn_code f = f.fn_code + let fn_entrypoint f = f.fn_entrypoint + + + (*************************************************************) + (* Copy-paste of the extracted code of the verifier *) + (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + + let check_code_unit td pc i = + match PTree.get pc td with + | Some p -> + let (tpc, dpc) = p in + let dpc0 = dpc in begin + match i with + | Inop s -> + let (ts, ds) = get td s in + if peq tpc ts + then if zlt ds dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | Icond (_, _, s1, s2, _) -> + let (ts1, ds1) = get td s1 in + let (ts2, ds2) = get td s2 in + if peq tpc ts1 + then if peq tpc ts2 + then if zlt ds1 dpc0 + then if zlt ds2 dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | _ -> + raise (BugOnPC (P.to_int pc)) end + | None -> () + +end + +module T = Partial.T(FUNS) +let branch_target = T.branch_target + diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v new file mode 100644 index 00000000..51ef0f7a --- /dev/null +++ b/backend/RTLTunnelingproof.v @@ -0,0 +1,670 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* TODO: Proper author information *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Correctness proof for the branch tunneling optimization for RTL. *) +(* This is a port of Tunnelingproof.v, the same optimisation for LTL. *) + +Require Import Coqlib Maps Errors. +Require Import AST Linking. +Require Import Values Memory Registers Events Globalenvs Smallstep. +Require Import Op Locations RTL. +Require Import RTLTunneling. +Require Import Conventions1. + +Local Open Scope nat. + +(**) Definition check_included_spec (c:code) (td:UF) (ok: option instruction) := + ok <> None -> forall pc, c!pc = None -> td!pc = None. + +Lemma check_included_correct (td:UF) (c:code): + check_included_spec c td (check_included td c). +Proof. + apply PTree_Properties.fold_rec with (P:=check_included_spec c); unfold check_included_spec. + - intros m m' oi EQ IND N pc. rewrite <- EQ. apply IND. apply N. + - intros N pc. rewrite PTree.gempty. auto. + - intros m oi pc v N S IND. destruct oi. + + intros. rewrite PTree.gsspec. destruct (peq _ _); try congruence. apply IND. congruence. apply H0. + + contradiction. +Qed. + +(**) Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node) : option instruction -> Prop := + | TB_default (TB: target pc = pc) oi: + target_bounds target bound pc oi + | TB_nop s + (EQ: target pc = target s) + (DEC: bound s < bound pc): + target_bounds target bound pc (Some (Inop s)) + | TB_cond cond args ifso ifnot info + (EQSO: target pc = target ifso) + (EQNOT: target pc = target ifnot) + (DECSO: bound ifso < bound pc) + (DECNOT: bound ifnot < bound pc): + target_bounds target bound pc (Some (Icond cond args ifso ifnot info)) +. +Local Hint Resolve TB_default: core. + +Lemma target_None (td: UF) (pc: node): td!pc = None -> td pc = pc. +Proof. + unfold target, get. intro EQ. rewrite EQ. auto. +Qed. +Local Hint Resolve target_None Z.abs_nonneg: core. + +Lemma get_nonneg td pc t d: get td pc = (t,d) -> (0 <= d)%Z. +Proof. + unfold get. destruct td!pc as [(tpc,dpc)|]; intro H; inv H; lia. +Qed. +Local Hint Resolve get_nonneg: core. + +(**) Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)). + + +(* TODO: à réécrire proprement *) +Lemma check_instr_correct (td: UF) (pc: node) (i: instruction): + check_instr td pc i = OK tt -> + target_bounds (target td) (bound td) pc (Some i). +Proof. + unfold check_instr. destruct (td!pc) as [(tpc,dpc)|] eqn:EQ. + assert (DPC: snd (get td pc) = Z.abs dpc). { unfold get. rewrite EQ. auto. } + - destruct i; try congruence. + + destruct (get td n) as (ts,ds) eqn:EQs. + destruct (peq _ _); try congruence. + destruct (zlt _ _); try congruence. intros _. + apply TB_nop. replace (td pc) with tpc. + unfold target. rewrite EQs. auto. + unfold target. unfold get. rewrite EQ. auto. + unfold bound. rewrite DPC. rewrite EQs; simpl. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n ts. apply EQs. + + destruct (get td n) as (tso,dso) eqn:EQSO. + destruct (get td n0) as (tnot,dnot) eqn:EQNOT. + intro H. + repeat ((destruct (peq _ _) in H || destruct (zlt _ _) in H); try congruence). + apply TB_cond; subst. + * unfold target. replace (fst (get td pc)) with tnot. rewrite EQSO. auto. + unfold get. rewrite EQ. auto. + * unfold target. replace (fst (get td pc)) with tnot. rewrite EQNOT. auto. + unfold get. rewrite EQ. auto. + * unfold bound. rewrite DPC. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n tnot. rewrite EQSO. auto. rewrite EQSO. auto. + * unfold bound. rewrite DPC. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n0 tnot. rewrite EQNOT; auto. rewrite EQNOT; auto. + - intros _. apply TB_default. unfold target. unfold get. rewrite EQ. auto. +Qed. + +Definition check_code_spec (td:UF) (c:code) (ok: res unit) := + ok = OK tt -> forall pc i, c!pc = Some i -> target_bounds (target td) (bound td) pc (Some i). + +Lemma check_code_correct (td:UF) c: + check_code_spec td c (check_code td c). +Proof. + unfold check_code. apply PTree_Properties.fold_rec; unfold check_code_spec. + - intros. rewrite <- H in H2. apply H0; auto. + - intros. rewrite PTree.gempty in H0. congruence. + - intros m [[]|e] pc i N S IND; simpl; try congruence. + intros H pc0 i0. rewrite PTree.gsspec. destruct (peq _ _). + subst. intro. inv H0. apply check_instr_correct. apply H. + auto. +Qed. + +Theorem branch_target_bounds: + forall f tf pc, + tunnel_function f = OK tf -> + target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc). +Proof. +(* ~old + intros t tf pc. unfold tunnel_function. + destruct (check_included _ _) eqn:HI; try congruence. + destruct (check_code _ _) eqn: HC. + - intros _. destruct ((fn_code t) ! pc) eqn:EQ. + + exploit check_code_correct; eauto. + replace tt with u. auto. {destruct u; auto. } + + exploit check_included_correct; eauto. rewrite HI. congruence. + intro. apply TB_default. unfold target. unfold get. rewrite H. debug auto. + - simpl. congruence. +*) + + intros. unfold tunnel_function in H. + destruct (check_included _ _) eqn:EQinc; try congruence. + monadInv H. rename EQ into EQcode. + destruct (_ ! _) eqn:EQ. + - exploit check_code_correct. destruct x. apply EQcode. apply EQ. auto. + - exploit check_included_correct. + rewrite EQinc. congruence. + apply EQ. + intro. apply TB_default. apply target_None. apply H. +Qed. + +(** Preservation of semantics *) + +Definition match_prog (p tp: program) := + match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp. + (* rq: `(fun _ ...)` est la fonction pour matcher des fonctions + * `eq` la fonction pour matcher les variables ? (`varinfo` dans la def) + * `p` et `tp` sont les programmes donc on doit dire s'ils match + *) + + (**) Lemma transf_program_match: + forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog. +Proof. + intros. eapply match_transform_partial_program_contextual; eauto. +Qed. + + +Section PRESERVATION. + + +Variables prog tprog: program. +Hypothesis TRANSL: match_prog prog tprog. (* rq: on suppose que les programmes match *) +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +(* rq: pour les deux lemmes suivants, j'ai recopié les preuves, mais je ne les comprends pas du tout... D'où vient `exploit` ?? *) +(* rq: `exploit` vient de Coqlib! Mais je comprends pas encore vraiment ce qui se passe... *) +Lemma functions_translated: + forall (v: val) (f: fundef), + Genv.find_funct ge v = Some f -> + exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf. +Proof. + intros. + exploit (Genv.find_funct_match TRANSL). apply H. + intros (cu & tf & A & B & C). + (* exists tf. split. apply B. apply A. *) + (* rq: on peut remplacer les `split` et `apply` par `eauto`, et ne pas spécifier le + * `exists`, avec un `eexists` *) + eexists. eauto. + (* rq: Je ne comprends pas à quoi sert le `intuition`... *) + (* repeat eexists; intuition eauto. *) +Qed. + +Lemma function_ptr_translated: + forall v f, + Genv.find_funct_ptr ge v = Some f -> + exists tf, + Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf. +Proof. + intros. exploit (Genv.find_funct_ptr_match TRANSL). + - apply H. + - intros (cu & tf & A & B & C). exists tf. split. apply A. apply B. +Qed. + +Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof. + apply (Genv.find_symbol_match TRANSL). + (* rq: ici pas de `exploit` mais un `apply` parce que `Genv.find_symbol_match + * prouve vraiment pile ce qu'on veut *) +Qed. + +Lemma sig_preserved: + forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f. +Proof. + intros. destruct f; simpl in H. + - + (* + unfold bind in H. + destruct (tunnel_function _) as [x|] eqn:EQ; try congruence. + inversion H. + unfold tunnel_function in EQ. + destruct (check_included _ _) in EQ; try congruence. + unfold bind in EQ. destruct (check_code _ _) in EQ; try congruence. inversion EQ. auto. + *) + + monadInv H. (* rq: c'est une tactique maison... *) + unfold tunnel_function in EQ. + destruct (check_included _ _) in EQ; try congruence. + monadInv EQ. auto. + - monadInv H. auto. +Qed. + +Lemma senv_preserved: + Senv.equiv ge tge. +Proof. + eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve de cette propriété très exactement, je ne vais pas réinventer la roue ici *) +Qed. + +(* TODO: vérifier s'il faut faire quelque chose avec le `res` ? *) +Inductive match_stackframes: stackframe -> stackframe -> Prop := + | match_stackframes_intro: + forall res f tf sp pc rs trs + (TF: tunnel_function f = OK tf) + (RS: Registers.regs_lessdef rs trs), + match_stackframes + (Stackframe res f sp pc rs) + (Stackframe res tf sp (branch_target f pc) trs). + +(* rq: `match_states s1 s2` correspond à s1 ~ s2 *) +Inductive match_states: state -> state -> Prop := + | match_states_intro: + forall s ts f tf sp pc rs trs m tm + (STK: list_forall2 match_stackframes s ts) + (TF: tunnel_function f = OK tf) + (RS: Registers.regs_lessdef rs trs) + (MEM: Mem.extends m tm), + match_states + (State s f sp pc rs m) + (State ts tf sp (branch_target f pc) trs tm) + | match_states_call: + forall s ts f tf a ta m tm + (STK: list_forall2 match_stackframes s ts) + (TF: tunnel_fundef f = OK tf) + (ARGS: list_forall2 Val.lessdef a ta) + (MEM: Mem.extends m tm), + match_states + (Callstate s f a m) + (Callstate ts tf ta tm) + | match_states_return: + forall s ts v tv m tm + (STK: list_forall2 match_stackframes s ts) + (VAL: Val.lessdef v tv) + (MEM: Mem.extends m tm), + match_states + (Returnstate s v m) + (Returnstate ts tv tm). + +(* TODO: il faut définir une bonne mesure *) +(* (**) Definition measure (st: state) : nat := + match st with + | State s f sp pc rs m => + match (fn_code f)!pc with + | Some (Inop pc') => (bound (branch_target f) pc') * 2 + 1 + | Some (Icond _ _ ifso ifnot _) => (max + (bound (branch_target f) ifso) + (bound (branch_target f) ifnot) + ) * 2 + 1 + | Some _ => (bound (branch_target f) pc) * 2 + | None => 0 + end + | Callstate s f v m => 0 + | Returnstate s v m => 0 + end. +*) + +Definition measure (st: state): nat := + match st with + | State s f sp pc rs m => bound (branch_target f) pc + | Callstate s f v m => 0 + | Returnstate s v m => 0 + end. + + +(* rq: sans les lemmes définis au-dessus je ne vois pas trop comment j'aurais + * fait... ni comment j'aurais eu l'idée d'en faire des lemmes ? *) +(**) Lemma transf_initial_states: + forall s1: state, initial_state prog s1 -> + exists s2: state, initial_state tprog s2 /\ match_states s1 s2. +Proof. + intros. inversion H as [b f m0 ge0 MEM SYM PTR SIG CALL]. + exploit function_ptr_translated. + - apply PTR. + - intros (tf & TPTR & TUN). + exists (Callstate nil tf nil m0). split. + + apply initial_state_intro with b. + * apply (Genv.init_mem_match TRANSL). apply MEM. + * rewrite (match_program_main TRANSL). + rewrite symbols_preserved. apply SYM. + * apply TPTR. + * rewrite <- SIG. apply sig_preserved. apply TUN. + + apply match_states_call. + * apply list_forall2_nil. + * apply TUN. + * apply list_forall2_nil. + * apply Mem.extends_refl. +Qed. + +(**) Lemma transf_final_states: + forall (s1 : state) + (s2 : state) (r : Integers.Int.int), + match_states s1 s2 -> + final_state s1 r -> + final_state s2 r. +Proof. + (* rq: `inv` au lieu de `inversion` fait beaucoup de nettoyage dans les + * hypothèses, mais je sais pas trop ce que ça fait exactement *) + intros. inv H0. inv H. inv VAL. inversion STK. apply final_state_intro. +Qed. + +Lemma tunnel_function_unfold: + forall f tf pc, + tunnel_function f = OK tf -> + (fn_code tf) ! pc = + option_map (tunnel_instr (branch_target f)) (fn_code f) ! pc. +Proof. + intros f tf pc. + unfold tunnel_function. + destruct (check_included _ _) eqn:EQinc; try congruence. + destruct (check_code _ _) eqn:EQcode; simpl; try congruence. + intro. inv H. simpl. rewrite PTree.gmap1. reflexivity. +Qed. + +Lemma reglist_lessdef: + forall (rs trs: Registers.Regmap.t val) (args: list Registers.reg), + regs_lessdef rs trs -> Val.lessdef_list (rs##args) (trs##args). +Proof. + intros. induction args; simpl; constructor. + apply H. apply IHargs. +Qed. + +Lemma instruction_type_preserved: + forall (f tf:function) (pc:node) (i ti:instruction) + (TF: tunnel_function f = OK tf) + (FATPC: (fn_code f) ! pc = Some i) + (NOTINOP: forall s, i <> Inop s) + (NOTICOND: forall cond args ifso ifnot info, i <> Icond cond args ifso ifnot info) + (TI: ti = tunnel_instr (branch_target f) i), + (fn_code tf) ! (branch_target f pc) = Some ti. +Proof. + intros. + assert ((fn_code tf) ! pc = Some (tunnel_instr (branch_target f) i)) as TFATPC. + rewrite (tunnel_function_unfold f tf pc); eauto. + rewrite FATPC; eauto. + exploit branch_target_bounds; eauto. + intro TB. inversion TB as [BT|s|cond args ifso ifnot info]; try (rewrite FATPC in H; congruence). +Qed. + +(* rq: utilisée pour `tunnel_step_correct` mais je ne comprends pas tout *) +(**) Lemma find_function_translated: + forall ros rs trs fd, + regs_lessdef rs trs -> + find_function ge ros rs = Some fd -> + exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros trs = Some tfd. +Proof. + intros. destruct ros; simpl in *. + - (* reg *) + assert (E: trs # r = rs # r). + { exploit Genv.find_funct_inv. apply H0. intros (b & EQ). + generalize (H r) . rewrite EQ. intro LD. inv LD. auto. } + rewrite E. exploit functions_translated; eauto. + - (* ident *) + rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. + exploit function_ptr_translated; eauto. + intros (tf & X1 & X2). exists tf; intuition. +Qed. + +Lemma list_forall2_lessdef_rs: + forall rs trs args, + regs_lessdef rs trs -> + list_forall2 Val.lessdef rs ## args trs ## args. +Proof. + intros rs trs args LD. + exploit (reglist_lessdef rs trs args). apply LD. + induction args; simpl; intros H; try constructor; inv H. + apply H3. apply IHargs. apply H5. +Qed. + +Lemma fn_stacksize_preserved: + forall f tf + (TF: tunnel_function f = OK tf), + fn_stacksize f = fn_stacksize tf. +Proof. + intros f tf. unfold tunnel_function. + destruct (check_included _ _); try congruence. + intro H. monadInv H. simpl. reflexivity. +Qed. + +Lemma regs_setres_lessdef: + forall res vres tvres rs trs, + regs_lessdef rs trs -> Val.lessdef vres tvres -> + regs_lessdef (regmap_setres res vres rs) (regmap_setres res tvres trs). +Proof. + induction res; intros; simpl; try auto using set_reg_lessdef. +Qed. + +Lemma regmap_optget_lessdef: + forall or rs trs, + regs_lessdef rs trs -> Val.lessdef (regmap_optget or Vundef rs) (regmap_optget or Vundef trs). +Proof. + intros or rs trs RS. + induction or; simpl; auto using set_reg_lessdef. +Qed. + +Lemma tunnel_fundef_Internal: + forall (f: function) (tf: fundef) + (TF: tunnel_fundef (Internal f) = OK tf), + exists (tf': function), tf = (Internal tf') /\ tunnel_function f = OK tf'. +Proof. + intros f tf. + unfold tunnel_fundef. simpl. intro H. monadInv H. exists x. + split. reflexivity. apply EQ. +Qed. + +Lemma tunnel_fundef_External: + forall (ef: external_function) (tf: fundef) + (TF: tunnel_fundef (External ef) = OK tf), + tf = (External ef). +Proof. + intros f tf. + unfold tunnel_fundef. simpl. intro H. monadInv H. reflexivity. +Qed. + + +Lemma fn_entrypoint_preserved: + forall f tf + (TF: tunnel_function f = OK tf), + fn_entrypoint tf = branch_target f (fn_entrypoint f). +Proof. + intros f tf. + unfold tunnel_function. destruct (check_included _ _); try congruence. + intro TF. monadInv TF. simpl. reflexivity. +Qed. + +Lemma init_regs_lessdef: + forall f tf args targs + (ARGS: list_forall2 Val.lessdef args targs) + (TF: tunnel_function f = OK tf), + regs_lessdef (init_regs args (fn_params f)) (init_regs targs (fn_params tf)). +Proof. + assert (regs_lessdef (Regmap.init Vundef) (Regmap.init Vundef)) as Hundef. + { unfold Regmap.init. unfold regs_lessdef. intro. unfold Regmap.get. rewrite PTree.gempty. apply Val.lessdef_undef. } + + intros f tf args targs ARGS. + + unfold tunnel_function. destruct (check_included _ _) eqn:EQinc; try congruence. + intro TF. monadInv TF. simpl. + (* + induction ARGS. + - induction (fn_params f) eqn:EQP; apply Hundef. + - induction (fn_params f) eqn:EQP. + * simpl. apply Hundef. + * simpl. apply set_reg_lessdef. apply H. + *) + + generalize (fn_params f) as l. induction ARGS; induction l; try (simpl; apply Hundef). + simpl. apply set_reg_lessdef; try assumption. apply IHARGS. +Qed. + +Lemma lessdef_forall2_list: + forall args ta, + list_forall2 Val.lessdef args ta -> Val.lessdef_list args ta. +Proof. + intros args ta H. induction H. apply Val.lessdef_list_nil. apply Val.lessdef_list_cons. apply H. apply IHlist_forall2. +Qed. + + +(* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *) +Lemma tunnel_step_correct: + forall st1 t st2, step ge st1 t st2 -> + forall st1' (MS: match_states st1 st1'), + (exists st2', step tge st1' t st2' /\ match_states st2 st2') + \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat. +Proof. + intros st1 t st2 H. induction H; intros; try (inv MS). + - (* Inop *) + exploit branch_target_bounds. apply TF. + rewrite H. intro. inv H0. + + (* TB_default *) + rewrite TB. left. eexists. split. + * apply exec_Inop. rewrite (tunnel_function_unfold f tf pc). rewrite H. simpl. eauto. apply TF. + * constructor; try assumption. + + (* TB_nop *) + simpl. right. repeat split. apply DEC. + rewrite EQ. apply match_states_intro; assumption. + - (* Iop *) + exploit eval_operation_lessdef; try eassumption. + apply reglist_lessdef. apply RS. + intros (tv & EVAL & LD). + left; eexists; split. + + eapply exec_Iop with (v:=tv). + apply instruction_type_preserved with (Iop op args res pc'); (simpl; auto)||congruence. + rewrite <- EVAL. apply eval_operation_preserved. apply symbols_preserved. + + apply match_states_intro; eauto. apply set_reg_lessdef. apply LD. apply RS. + - (* Iload *) + exploit eval_addressing_lessdef; try eassumption. + apply reglist_lessdef. apply RS. + intros (ta & EVAL & LD). + exploit Mem.loadv_extends; try eassumption. + intros (tv & LOAD & LD'). + left. eexists. split. + + eapply exec_Iload. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved. + * apply LOAD. + + apply match_states_intro; try assumption. apply set_reg_lessdef. apply LD'. apply RS. + - (* Iload NOTRAP1 *) + exploit eval_addressing_lessdef_none; try eassumption. + apply reglist_lessdef; apply RS. + left. eexists. split. + + eapply exec_Iload_notrap1. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * rewrite <- H1. apply eval_addressing_preserved. apply symbols_preserved. + + apply match_states_intro; try assumption. apply set_reg_lessdef. apply Val.lessdef_undef. apply RS. + - (* Iload NOTRAP2 *) + exploit eval_addressing_lessdef; try eassumption. + apply reglist_lessdef; apply RS. + intros (ta & EVAL & LD). + (* TODO: on peut sans doute factoriser ça *) + destruct (Mem.loadv chunk tm ta) eqn:Htload. + + left; eexists; split. + eapply exec_Iload. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved. + * apply Htload. + * apply match_states_intro; try assumption. apply set_reg_lessdef; eauto. + + left; eexists; split. + eapply exec_Iload_notrap2. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved. + * apply Htload. + * apply match_states_intro; try assumption. apply set_reg_lessdef; eauto. + - (* Lstore *) + exploit eval_addressing_lessdef; try eassumption. + apply reglist_lessdef; apply RS. + intros (ta & EVAL & LD). + exploit Mem.storev_extends; try eassumption. apply RS. + intros (tm' & STORE & MEM'). + left. eexists. split. + + eapply exec_Istore. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved. + * rewrite STORE. reflexivity. + + apply match_states_intro; try eassumption. + - (* Icall *) + left. + exploit find_function_translated; try eassumption. + intros (tfd & TFD & FIND). + eexists. split. + + eapply exec_Icall. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * apply FIND. + * apply sig_preserved. apply TFD. + + apply match_states_call; try assumption. + * apply list_forall2_cons; try assumption. apply match_stackframes_intro; try assumption. + * apply list_forall2_lessdef_rs. apply RS. + - (* Itailcall *) + exploit find_function_translated; try eassumption. + intros (tfd & TFD & FIND). + exploit Mem.free_parallel_extends; try eassumption. + intros (tm' & FREE & MEM'). + left. eexists. split. + + eapply exec_Itailcall. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * apply FIND. + * apply sig_preserved. apply TFD. + * erewrite <- fn_stacksize_preserved; try eassumption. + + apply match_states_call; try assumption. + apply list_forall2_lessdef_rs. apply RS. + - (* Ibuiltin *) + exploit eval_builtin_args_lessdef; try eassumption. apply RS. + intros (vl2 & EVAL & LD). + exploit external_call_mem_extends; try eassumption. + intros (tvres & tm' & EXT & LDRES & MEM' & UNCHGD). + left. eexists. split. + + eapply exec_Ibuiltin. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * eapply eval_builtin_args_preserved. eapply symbols_preserved. eapply EVAL. + * eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXT. + + apply match_states_intro; try assumption. apply regs_setres_lessdef; try assumption. + - (* Icond *) + simpl. exploit branch_target_bounds. apply TF. rewrite H. intro. inv H1. + + (* TB_default *) + rewrite TB. + destruct (fn_code tf)!pc as [[]|] eqn:EQ; + assert (tunnel_function f = OK tf) as TF'; auto; + unfold tunnel_function in TF; destruct (check_included _ _) in TF; monadInv TF; + simpl in EQ; rewrite PTree.gmap1 in EQ; rewrite H in EQ; simpl in EQ; destruct (peq _ _) eqn: EQpeq in EQ; try congruence. + * left. eexists. split. + -- eapply exec_Inop. simpl. rewrite PTree.gmap1. rewrite H. simpl. rewrite EQpeq. reflexivity. + -- destruct b. apply match_states_intro; eauto. rewrite e. apply match_states_intro; eauto. + * left. eexists. split. + -- eapply exec_Icond; auto. simpl. rewrite PTree.gmap1. rewrite H. simpl. rewrite EQpeq. reflexivity. eapply eval_condition_lessdef; try eassumption. apply reglist_lessdef. apply RS. + -- destruct b; apply match_states_intro; auto. + + (* TB_cond *) right; repeat split; destruct b; try assumption. + * rewrite EQSO. apply match_states_intro; try assumption. + * rewrite EQNOT. apply match_states_intro; try assumption. + - (* Ijumptable *) + left. eexists. split. + + eapply exec_Ijumptable. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * generalize (RS arg). rewrite H0. intro. inv H2. reflexivity. + * rewrite list_nth_z_map. rewrite H1. simpl. reflexivity. + + apply match_states_intro; try eassumption. + - (* Ireturn *) + exploit Mem.free_parallel_extends; try eassumption. + intros (tm' & FREE & MEM'). + left. eexists. split. + + eapply exec_Ireturn. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * erewrite <- fn_stacksize_preserved. eapply FREE. eapply TF. + + apply match_states_return; try eassumption. + apply regmap_optget_lessdef. apply RS. + - (* internal function *) + exploit tunnel_fundef_Internal; try eassumption. + intros (tf' & EQ & TF'). subst. + exploit Mem.alloc_extends; try eassumption. reflexivity. reflexivity. + intros (m2' & ALLOC & EXT). + left. eexists. split. + + eapply exec_function_internal. + rewrite <- (fn_stacksize_preserved f tf'). eapply ALLOC. eapply TF'. + + rewrite (fn_entrypoint_preserved f tf'); try eassumption. apply match_states_intro; try eassumption. + apply init_regs_lessdef. apply ARGS. apply TF'. + - (* external function *) + exploit external_call_mem_extends. eapply H. eapply MEM. eapply lessdef_forall2_list. eapply ARGS. + intros (tvres & tm' & EXTCALL & LD & EXT & MEMUNCHGD). + left. eexists. split. + + erewrite (tunnel_fundef_External ef tf); try eassumption. + eapply exec_function_external. eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXTCALL. + + eapply match_states_return; try assumption. + - (* return *) + inv STK. inv H1. + left. eexists. split. + + eapply exec_return. + + eapply match_states_intro; try assumption. + apply set_reg_lessdef; try assumption. +Qed. + + +Theorem transf_program_correct: + forward_simulation (RTL.semantics prog) (RTL.semantics tprog). +Proof. + eapply forward_simulation_opt. + apply senv_preserved. + apply transf_initial_states. + apply transf_final_states. + exact tunnel_step_correct. +Qed. + +End PRESERVATION. diff --git a/backend/RTLcommonaux.ml b/backend/RTLcommonaux.ml index 0e369d04..f7e49aa4 100644 --- a/backend/RTLcommonaux.ml +++ b/backend/RTLcommonaux.ml @@ -6,12 +6,15 @@ open Kildall open Lattice let p2i r = P.to_int r + let i2p i = P.of_int i let get_some = function | None -> failwith "Got None instead of Some _" | Some thing -> thing +let get_ok r = match r with Errors.OK x -> x | _ -> failwith "Did not get OK" + let successors_inst = function | Inop n | Iop (_, _, _, n) @@ -42,49 +45,70 @@ let get_join_points code entry = let reached = ref (PTree.map (fun n i -> false) code) in let reached_twice = ref (PTree.map (fun n i -> false) code) in let rec traverse pc = - if get_some @@ PTree.get pc !reached then begin + if get_some @@ PTree.get pc !reached then ( if not (get_some @@ PTree.get pc !reached_twice) then - reached_twice := PTree.set pc true !reached_twice - end else begin + reached_twice := PTree.set pc true !reached_twice) + else ( reached := PTree.set pc true !reached; - traverse_succs (successors_inst @@ get_some @@ PTree.get pc code) - end + traverse_succs (successors_inst @@ get_some @@ PTree.get pc code)) and traverse_succs = function | [] -> () - | [pc] -> traverse pc - | pc :: l -> traverse pc; traverse_succs l - in traverse entry; !reached_twice + | [ pc ] -> traverse pc + | pc :: l -> + traverse pc; + traverse_succs l + in + traverse entry; + !reached_twice -let transfer f pc after = let open Liveness in +let transfer f pc after = + let open Liveness in match PTree.get pc f.fn_code with - | Some i -> - (match i with - | Inop _ -> after - | Iop (_, args, res, _) -> - reg_list_live args (Regset.remove res after) - | Iload (_, _, _, args, dst, _) -> - reg_list_live args (Regset.remove dst after) - | Istore (_, _, args, src, _) -> - reg_list_live args (Regset.add src after) - | Icall (_, ros, args, res, _) -> - reg_list_live args (reg_sum_live ros (Regset.remove res after)) - | Itailcall (_, ros, args) -> - reg_list_live args (reg_sum_live ros Regset.empty) - | Ibuiltin (_, args, res, _) -> - reg_list_live (AST.params_of_builtin_args args) - (reg_list_dead (AST.params_of_builtin_res res) after) - | Icond (_, args, _, _, _) -> - reg_list_live args after - | Ijumptable (arg, _) -> - Regset.add arg after - | Ireturn optarg -> - reg_option_live optarg Regset.empty) + | Some i -> ( + match i with + | Inop _ -> after + | Iop (_, args, res, _) -> reg_list_live args (Regset.remove res after) + | Iload (_, _, _, args, dst, _) -> + reg_list_live args (Regset.remove dst after) + | Istore (_, _, args, src, _) -> reg_list_live args (Regset.add src after) + | Icall (_, ros, args, res, _) -> + reg_list_live args (reg_sum_live ros (Regset.remove res after)) + | Itailcall (_, ros, args) -> + reg_list_live args (reg_sum_live ros Regset.empty) + | Ibuiltin (_, args, res, _) -> + reg_list_live + (AST.params_of_builtin_args args) + (reg_list_dead (AST.params_of_builtin_res res) after) + | Icond (_, args, _, _, _) -> reg_list_live args after + | Ijumptable (arg, _) -> Regset.add arg after + | Ireturn optarg -> reg_option_live optarg Regset.empty) | None -> Regset.empty -module RegsetLat = LFSet(Regset) - -module DS = Backward_Dataflow_Solver(RegsetLat)(NodeSetBackward) +module RegsetLat = LFSet (Regset) +module DS = Backward_Dataflow_Solver (RegsetLat) (NodeSetBackward) let analyze f = - let liveouts = get_some @@ DS.fixpoint f.fn_code successors_instr (transfer f) in - PTree.map (fun n _ -> let lo = PMap.get n liveouts in transfer f n lo) f.fn_code + let liveouts = + get_some @@ DS.fixpoint f.fn_code successors_instr (transfer f) + in + PTree.map + (fun n _ -> + let lo = PMap.get n liveouts in + transfer f n lo) + f.fn_code + +let get_outputs liveness n last = + let path_last_successors = successors_inst last in + let list_input_regs = + List.map (fun n -> get_some @@ PTree.get n liveness) path_last_successors + in + List.fold_left Regset.union Regset.empty list_input_regs +(* TODO gourdinl to remove, as we do not need por anymore? + let por = match last_instruction with (* see RTLpathLivegen.final_inst_checker *) + | Icall (_, _, _, res, _) -> Regset.remove res outputs + | Ibuiltin (_, _, res, _) -> Liveness.reg_list_dead (AST.params_of_builtin_res res) outputs + | Itailcall (_, _, _) | Ireturn _ -> + assert (outputs = Regset.empty); (* defensive check for performance *) + outputs + | _ -> outputs + in (por, outputs) *) diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v index 15ed6d8a..6048f895 100644 --- a/backend/RTLtyping.v +++ b/backend/RTLtyping.v @@ -858,7 +858,7 @@ Lemma wt_exec_Iload_notrap: forall env f chunk addr args dst s rs, wt_instr f env (Iload NOTRAP chunk addr args dst s) -> wt_regset env rs -> - wt_regset env (rs#dst <- (default_notrap_load_value chunk)). + wt_regset env (rs#dst <- Vundef). Proof. intros. eapply wt_regset_assign; eauto. simpl. trivial. diff --git a/backend/Tailcallproof.v b/backend/Tailcallproof.v index 80a68327..39fc10fb 100644 --- a/backend/Tailcallproof.v +++ b/backend/Tailcallproof.v @@ -440,7 +440,7 @@ Proof. TransfInstr. assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto. left. - exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split. + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- Vundef) m'); split. eapply exec_Iload_notrap1. eassumption. eapply eval_addressing_lessdef_none. eassumption. @@ -465,7 +465,7 @@ Proof. exact symbols_preserved. assumption. econstructor; eauto. apply set_reg_lessdef; auto. - + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split. + + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- Vundef) m'); split. eapply exec_Iload_notrap2. eassumption. erewrite eval_addressing_preserved. eassumption. diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml deleted file mode 100644 index 87e6d303..00000000 --- a/backend/Tunnelingaux.ml +++ /dev/null @@ -1,283 +0,0 @@ -(* *************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Sylvain Boulmé Grenoble-INP, VERIMAG *) -(* *) -(* Copyright VERIMAG. All rights reserved. *) -(* This file is distributed under the terms of the INRIA *) -(* Non-Commercial License Agreement. *) -(* *) -(* *************************************************************) - -(* - -This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function, -and computes their target node with the distance (ie the number of cummulated nops) toward this target. - -See [Tunneling.v] - -*) - -open Coqlib -open LTL -open Maps -open Camlcoq - -let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *) -let debug_flag = ref false -let final_dump = false (* set to true to have a more verbose debugging *) - -let debug fmt = - if !debug_flag then Printf.eprintf fmt - else Printf.ifprintf stderr fmt - -exception BugOnPC of int - -(* type of labels in the cfg *) -type label = int * P.t - -(* instructions under analyzis *) -type simple_inst = (* a simplified view of LTL instructions *) - LBRANCH of node -| LCOND of node * node -| OTHER -and node = { - lab : label; - mutable inst: simple_inst; - mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *) - mutable dist: int; - mutable tag: int - } - -(* type of the (simplified) CFG *) -type cfg = { - nodes: (int, node) Hashtbl.t; - mutable rems: node list; (* remaining conditions that may become lbranch or not *) - mutable num_rems: int; - mutable iter_num: int (* number of iterations in elimination of conditions *) - } - -let lab_i (n: node): int = fst n.lab -let lab_p (n: node): P.t = snd n.lab - -let rec target c n = (* inspired from the "find" of union-find algorithm *) - match n.inst with - | LCOND(s1,s2) -> - if n.link != n - then update c n - else if n.tag < c.iter_num then ( - (* we try to change the condition ... *) - n.tag <- c.iter_num; (* ... but at most once by iteration *) - let ts1 = target c s1 in - let ts2 = target c s2 in - if ts1 == ts2 then (n.link <- ts1; ts1) else n - ) else n - | _ -> - if n.link != n - then update c n - else n -and update c n = - let t = target c n.link in - n.link <- t; t - -let get_node c p = - let li = P.to_int p in - try - Hashtbl.find c.nodes li - with - Not_found -> - let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 } in - Hashtbl.add c.nodes li n; - n - -let set_branch c p s = - let li = P.to_int p in - try - let n = Hashtbl.find c.nodes li in - n.inst <- LBRANCH s; - n.link <- target c s - with - Not_found -> - let n = { lab = (li,p); inst = LBRANCH s; link = target c s; dist = 0; tag = 0 } in - Hashtbl.add c.nodes li n - - -(* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *) -let build_simplified_cfg c acc pc bb = - match bb with - | Lbranch s :: _ -> - let ns = get_node c s in - set_branch c pc ns; - acc - | Lcond (_, _, s1, s2, _) :: _ -> - c.num_rems <- c.num_rems + 1; - let ns1 = get_node c s1 in - let ns2 = get_node c s2 in - let npc = get_node c pc in - npc.inst <- LCOND(ns1, ns2); - npc::acc - | _ -> acc - -(* try to change a condition into a branch -[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond -*) -let try_change_cond c acc pc = - match pc.inst with - | LCOND(s1,s2) -> - let ts1 = target c s1 in - let ts2 = target c s2 in - if ts1 == ts2 then ( - pc.link <- ts1; - c.num_rems <- c.num_rems - 1; - acc - ) else - pc::acc - | _ -> raise (BugOnPC (lab_i pc)) (* LCOND expected *) - -(* repeat [try_change_cond] until no condition is changed into a branch *) -let rec repeat_change_cond c = - c.iter_num <- c.iter_num + 1; - debug "++ Tunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems); - let old = c.num_rems in - c.rems <- List.fold_left (try_change_cond c) [] c.rems; - let curr = c.num_rems in - let continue = - match limit_tunneling with - | Some n -> curr < old && c.iter_num < n - | None -> curr < old - in - if continue - then repeat_change_cond c - - -(* compute the final distance of each nop nodes to its target *) -let undef_dist = -1 -let self_dist = undef_dist-1 -let rec dist n = - if n.dist = undef_dist - then ( - n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *) - n.dist <- - (match n.inst with - | OTHER -> 0 - | LBRANCH p -> 1 + dist p - | LCOND (p1,p2) -> 1 + (max (dist p1) (dist p2))); - n.dist - ) else if n.dist=self_dist then raise (BugOnPC (lab_i n)) - else n.dist - -let final_export f c = - let count = ref 0 in - let filter_nops_init_dist _ n acc = - let tn = target c n in - if tn == n - then ( - n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *) - acc - ) else ( - n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *) - count := !count+1; - n::acc - ) - in - let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in - let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in - debug "* Tunneling.branch_target: final number of eliminated nops = %d\n" !count; - res - -(*********************************************) -(*** START: printing and debugging functions *) - -let string_of_labeli nodes ipc = - try - let pc = Hashtbl.find nodes ipc in - if pc.link == pc - then Printf.sprintf "(Target@%d)" (dist pc) - else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc) - with - Not_found -> "" - -let print_bblock c println (pc, bb) = - match bb with - | Lbranch s::_ -> (if println then debug "\n"); debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false - | Lcond (_, _, s1, s2, _)::_ -> (if println then debug "\n"); debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false - | _ -> debug "%d " pc; true - - -let print_cfg f c = - let a = Array.of_list (PTree.fold (fun acc pc bb -> (P.to_int pc,bb)::acc) f.fn_code []) in - Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a; - let ep = P.to_int f.fn_entrypoint in - debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep); - let println = Array.fold_left (print_bblock c) false a in - (if println then debug "\n");debug "remaining cond:"; - List.iter (fun n -> debug "%d " (lab_i n)) c.rems; - debug "\n" - -(*************************************************************) -(* Copy-paste of the extracted code of the verifier *) -(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) - -let get td pc = - match PTree.get pc td with - | Some p -> let (t0, d) = p in (t0, d) - | None -> (pc, Z.of_uint 0) - -let check_bblock td pc bb = - match PTree.get pc td with - | Some p -> - let (tpc, dpc) = p in - let dpc0 = dpc in - (match bb with - | [] -> - raise (BugOnPC (P.to_int pc)) - | i :: _ -> - (match i with - | Lbranch s -> - let (ts, ds) = get td s in - if peq tpc ts - then if zlt ds dpc0 - then () - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - | Lcond (_, _, s1, s2, _) -> - let (ts1, ds1) = get td s1 in - let (ts2, ds2) = get td s2 in - if peq tpc ts1 - then if peq tpc ts2 - then if zlt ds1 dpc0 - then if zlt ds2 dpc0 - then () - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - | _ -> - raise (BugOnPC (P.to_int pc)))) - | None -> () - -(** val check_code : coq_UF -> code -> unit res **) - -let check_code td c = - PTree.fold (fun _ pc bb -> check_bblock td pc bb) c (()) - -(*** END: copy-paste & debugging functions *******) - -let branch_target f = - debug "* Tunneling.branch_target: starting on a new function\n"; - if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n"; - let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in - c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code []; - repeat_change_cond c; - let res = final_export f c in - if !debug_flag then ( - try - check_code res f.fn_code; - if final_dump then print_cfg f c; - with e -> ( - print_cfg f c; - check_code res f.fn_code - ) - ); - res diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml new file mode 100644 index 00000000..7c826ba4 --- /dev/null +++ b/backend/Tunnelinglibs.ml @@ -0,0 +1,272 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* TODO: Proper author information *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* + +This file implements the core functions of the tunneling passes, for both RTL +and LTL, by using a simplified CFG as a transparent interface + +See [LTLTunneling.v] and [RTLTunneling.v] + +*) + +open Maps +open Camlcoq + +(* type of labels in the cfg *) +type label = int * P.t + +(* instructions under analyzis *) +type simple_inst = (* a simplified view of instructions *) + BRANCH of node +| COND of node * node +| OTHER +and node = { + lab: label; + mutable inst: simple_inst; + mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *) + mutable dist: int; + mutable tag: int + } + +type positive = P.t +type integer = Z.t + +(* type of the (simplified) CFG *) +type cfg = { + nodes: (int, node) Hashtbl.t; + mutable rems: node list; (* remaining conditions that may become lbranch or not *) + mutable num_rems: int; + mutable iter_num: int (* number of iterations in elimination of conditions *) + } + +exception BugOnPC of int + +(* keeps track of the total number of nops seen, for debugging purposes *) +let nopcounter = ref 0 + +(* General functions that do not require language-specific context, and that + are used for building language-specific functions *) + +let rec target c n = (* inspired from the "find" of union-find algorithm *) + match n.inst with + | COND(s1,s2) -> + if n.link != n + then update c n + else if n.tag < c.iter_num then ( + (* we try to change the condition ... *) + n.tag <- c.iter_num; (* ... but at most once by iteration *) + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then (n.link <- ts1; ts1) else n + ) else n + | _ -> + if n.link != n + then update c n + else n +and update c n = + let t = target c n.link in + n.link <- t; t + +let get_node c p = + let li = P.to_int p in + try + Hashtbl.find c.nodes li + with + Not_found -> + let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 } in + Hashtbl.add c.nodes li n; + n + +let set_branch c p s = + let li = P.to_int p in + try + let n = Hashtbl.find c.nodes li in + n.inst <- BRANCH s; + n.link <- target c s + with + Not_found -> + let n = { lab = (li,p); inst = BRANCH s; link = target c s; dist = 0; tag = 0 } in + Hashtbl.add c.nodes li n + +let get td pc = + match PTree.get pc td with + | Some p -> let (t0, d) = p in (t0, d) + | None -> (pc, Z.of_uint 0) + +let lab_i (n: node): int = fst n.lab +let lab_p (n: node): P.t = snd n.lab + +let undef_dist = -1 +let self_dist = undef_dist-1 +let rec dist n = + if n.dist = undef_dist + then ( + n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *) + n.dist <- + (match n.inst with + | OTHER -> 0 + | BRANCH p -> 1 + dist p + | COND (p1,p2) -> 1 + (max (dist p1) (dist p2))); + n.dist + ) else if n.dist=self_dist then raise (BugOnPC (lab_i n)) + else n.dist + +let string_of_labeli nodes ipc = + try + let pc = Hashtbl.find nodes ipc in + if pc.link == pc + then Printf.sprintf "(Target@%d)" (dist pc) + else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc) + with + Not_found -> "" + +(* + * When given the necessary types and options as context, and then some + * language-specific functions that cannot be factorised between LTL and RTL, the + * `Tunneling` functor returns a module containing the corresponding + * `branch_target` function. + *) + +module Tunneling = functor + (* Language-specific types *) + (LANG: sig + type code_unit (* the type of a node of the code cfg (an instruction or a bblock *) + type funct (* type of internal functions *) + end) + + (* Compilation options for debugging *) + (OPT: sig + val langname: string + val limit_tunneling: int option (* for debugging: [Some x] limit the number of iterations *) + val debug_flag: bool ref + val final_dump: bool (* set to true to have a more verbose debugging *) + end) + -> struct + + (* The `debug` function uses values from `OPT`, and is used in functions passed to `F` + so it must be defined between the two *) + let debug fmt = + if !OPT.debug_flag then Printf.eprintf fmt + else Printf.ifprintf stderr fmt + + module T + (* Language-specific functions *) + (FUNS: sig + (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *) + val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list + val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool + val fn_code: LANG.funct -> LANG.code_unit PTree.t + val fn_entrypoint: LANG.funct -> positive + val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit + end) + (* only export what's needed *) + : sig val branch_target: LANG.funct -> (positive * integer) PTree.t end + = struct + + (* try to change a condition into a branch [acc] is the current accumulator of + conditions to consider in the next iteration of repeat_change_cond *) + let try_change_cond c acc pc = + match pc.inst with + | COND(s1,s2) -> + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then ( + pc.link <- ts1; + c.num_rems <- c.num_rems - 1; + acc + ) else + pc::acc + | _ -> raise (BugOnPC (lab_i pc)) (* COND expected *) + + (* repeat [try_change_cond] until no condition is changed into a branch *) + let rec repeat_change_cond c = + c.iter_num <- c.iter_num + 1; + debug "++ %sTunneling.branch_target %d: remaining number of conds to consider = %d\n" OPT.langname (c.iter_num) (c.num_rems); + let old = c.num_rems in + c.rems <- List.fold_left (try_change_cond c) [] c.rems; + let curr = c.num_rems in + let continue = + match OPT.limit_tunneling with + | Some n -> curr < old && c.iter_num < n + | None -> curr < old + in + if continue + then repeat_change_cond c + + + (*********************************************) + (*** START: printing and debugging functions *) + + let print_cfg (f: LANG.funct) c = + let a = Array.of_list (PTree.fold (fun acc pc cu -> (P.to_int pc,cu)::acc) (FUNS.fn_code f) []) in + Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a; + let ep = P.to_int (FUNS.fn_entrypoint f) in + debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep); + let println = Array.fold_left (FUNS.print_code_unit c) false a in + (if println then debug "\n");debug "remaining cond:"; + List.iter (fun n -> debug "%d " (lab_i n)) c.rems; + debug "\n" + + + (*************************************************************) + (* Copy-paste of the extracted code of the verifier *) + (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + + (** val check_code : coq_UF -> code -> unit res **) + + let check_code td c = + PTree.fold (fun _ pc cu -> FUNS.check_code_unit td pc cu) c (()) + + (*** END: copy-paste & debugging functions *******) + + (* compute the final distance of each nop nodes to its target *) + let final_export f c = + let count = ref 0 in + let filter_nops_init_dist _ n acc = + let tn = target c n in + if tn == n + then ( + n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *) + acc + ) else ( + n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *) + count := !count+1; + n::acc + ) + in + let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in + let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in + debug "* %sTunneling.branch_target: initial number of nops = %d\n" OPT.langname !nopcounter; + debug "* %sTunneling.branch_target: final number of eliminated nops = %d\n" OPT.langname !count; + res + + let branch_target f = + debug "* %sTunneling.branch_target: starting on a new function\n" OPT.langname; + if OPT.limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n"; + let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in + c.rems <- PTree.fold (FUNS.build_simplified_cfg c) (FUNS.fn_code f) []; + repeat_change_cond c; + let res = final_export f c in + if !OPT.debug_flag then ( + try + check_code res (FUNS.fn_code f); + if OPT.final_dump then print_cfg f c; + with e -> ( + print_cfg f c; + check_code res (FUNS.fn_code f) + ) + ); + res + end +end diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v index 561e94c9..e20edff7 100644 --- a/backend/ValueAnalysis.v +++ b/backend/ValueAnalysis.v @@ -1287,13 +1287,11 @@ Proof. eapply sound_succ_state; eauto. simpl; auto. unfold transfer; rewrite H. eauto. apply ematch_update; auto. - unfold default_notrap_load_value. constructor. - (* load notrap2 *) eapply sound_succ_state; eauto. simpl; auto. unfold transfer; rewrite H. eauto. apply ematch_update; auto. - unfold default_notrap_load_value. constructor. - (* store *) exploit eval_static_addressing_sound; eauto with va. intros VMADDR. diff --git a/common/DebugPrint.ml b/common/DebugPrint.ml index d94b31d8..83a485b0 100644 --- a/common/DebugPrint.ml +++ b/common/DebugPrint.ml @@ -133,7 +133,7 @@ let print_instructions insts code = debug "[ "; List.iter ( fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code)) - ) insts; debug "]" + ) insts; debug " ]" end let print_arrayp arr = begin diff --git a/common/Memory.v b/common/Memory.v index bf8ca083..ff17efb0 100644 --- a/common/Memory.v +++ b/common/Memory.v @@ -41,8 +41,6 @@ Require Export Memdata. Require Export Memtype. Require Import Lia. -Definition default_notrap_load_value (chunk : memory_chunk) := Vundef. - (* To avoid useless definitions of inductors in extracted code. *) Local Unset Elimination Schemes. diff --git a/doc/index-kvx.html b/doc/index-kvx.html index 62afb423..daa4cdc4 100644 --- a/doc/index-kvx.html +++ b/doc/index-kvx.html @@ -315,10 +315,10 @@ This IR is generic over the processor, even if currently, only used for KVX. </TR> <TR valign="top"> - <TD>Branch tunneling</TD> + <TD>(LTL) Branch tunneling</TD> <TD>LTL to LTL</TD> - <TD><A HREF="html/compcert.backend.Tunneling.html">Tunneling</A></TD> - <TD><A HREF="html/compcert.backend.Tunnelingproof.html">Tunnelingproof</A></TD> + <TD><A HREF="html/compcert.backend.LTLTunneling.html">LTLTunneling</A></TD> + <TD><A HREF="html/compcert.backend.LTLTunnelingproof.html">LTLTunnelingproof</A></TD> </TR> <TR valign="top"> diff --git a/doc/index.html b/doc/index.html index c3912cb2..34b87924 100644 --- a/doc/index.html +++ b/doc/index.html @@ -270,10 +270,10 @@ code. </TR> <TR valign="top"> - <TD>Branch tunneling</TD> + <TD>(LTL) Branch tunneling</TD> <TD>LTL to LTL</TD> - <TD><A HREF="html/compcert.backend.Tunneling.html">Tunneling</A></TD> - <TD><A HREF="html/compcert.backend.Tunnelingproof.html">Tunnelingproof</A></TD> + <TD><A HREF="html/compcert.backend.LTLTunneling.html">LTLTunneling</A></TD> + <TD><A HREF="html/compcert.backend.LTLTunnelingproof.html">LTLTunnelingproof</A></TD> </TR> <TR valign="top"> diff --git a/driver/Clflags.ml b/driver/Clflags.ml index fa17c2d9..25bd2c78 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi let option_mtune = ref "" let option_fprepass = ref true -let option_fprepass_sched = ref "list" +let option_fprepass_sched = ref "regpres" let option_fpostpass = ref true let option_fpostpass_sched = ref "list" @@ -115,4 +115,6 @@ let option_inline_auto_threshold = ref 0 let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 +let option_regpres_threshold = ref 2 +let option_regpres_wait_window = ref false let main_function_name = ref "main" diff --git a/driver/Compiler.vexpand b/driver/Compiler.vexpand index 1d218657..9673267d 100644 --- a/driver/Compiler.vexpand +++ b/driver/Compiler.vexpand @@ -306,7 +306,7 @@ EXPAND_RTL_FORWARD_SIMULATIONS eapply compose_forward_simulations. eapply Allocationproof.transf_program_correct; eassumption. eapply compose_forward_simulations. - eapply Tunnelingproof.transf_program_correct; eassumption. + eapply LTLTunnelingproof.transf_program_correct; eassumption. eapply compose_forward_simulations. eapply Linearizeproof.transf_program_correct; eassumption. eapply compose_forward_simulations. diff --git a/driver/Driver.ml b/driver/Driver.ml index 7192ba4b..3f5a4bd9 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -210,7 +210,9 @@ Processing options: -mtune= Type of CPU (for scheduling on some architectures) -fprepass Perform prepass scheduling (only on some architectures) [on] -fprepass= <optim> Perform postpass scheduling with the specified optimization [list] - (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles) + (<optim>=list: list scheduling, <optim>=revlist: reverse list scheduling, <optim>=regpres: list scheduling aware of register pressure, <optim>=regpres_bis: variant of regpres, <optim>=zigzag: zigzag scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles) + -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure + -fregpres-wait-window When register pressure is high, use a 5-cycle waiting window instead of scheduling short paths first (default no) -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list] (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles) @@ -296,9 +298,9 @@ let num_input_files = ref 0 let cmdline_actions = let f_opt name ref = [Exact("-f" ^ name), Set ref; Exact("-fno-" ^ name), Unset ref] in - let f_opt_str name ref strref = + let f_opt_str name default ref strref = [Exact("-f" ^ name ^ "="), String - (fun s -> (strref := (if s == "" then "list" else s)); ref := true) + (fun s -> (strref := (if s == "" then default else s)); ref := true) ] in let f_str name strref default = [Exact("-f" ^ name ^ "="), String @@ -342,6 +344,7 @@ let cmdline_actions = Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s); Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n); Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n); + Exact "-regpres-threshold", Integer (fun n -> option_regpres_threshold := n); Exact "-fsmall-data", Integer(fun n -> option_small_data := n); Exact "-fsmall-const", Integer(fun n -> option_small_const := n); Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); @@ -424,6 +427,7 @@ let cmdline_actions = @ f_opt "redundancy" option_fredundancy @ [ Exact "-mtune", String (fun s -> option_mtune := s) ] @ f_opt "prepass" option_fprepass + @ f_opt "regpres-wait-window" option_regpres_wait_window @ f_opt "postpass" option_fpostpass @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ] @ f_opt "predict" option_fpredict @@ -431,8 +435,8 @@ let cmdline_actions = @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ] @ f_opt "tracelinearize" option_ftracelinearize - @ f_opt_str "prepass" option_fprepass option_fprepass_sched - @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched + @ f_opt_str "prepass" "regpress" option_fprepass option_fprepass_sched + @ f_opt_str "postpass" "list" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline @ f_opt "inline-functions-called-once" option_finline_functions_called_once @ f_opt "globaladdrtmp" option_fglobaladdrtmp diff --git a/kvx/Asmblockdeps.v b/kvx/Asmblockdeps.v index b6d18c3e..a9786e0a 100644 --- a/kvx/Asmblockdeps.v +++ b/kvx/Asmblockdeps.v @@ -164,17 +164,17 @@ Definition arith_eval (ao: arith_op) (l: list value) := | _, _ => None end. -Definition exec_incorrect_load trap chunk := +Definition exec_incorrect_load trap := match trap with | TRAP => None - | NOTRAP => Some (Val (concrete_default_notrap_load_value chunk)) + | NOTRAP => Some (Val Vundef) end. Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v: val) (ofs: offset) := let (ge, fn) := Ge in match (eval_offset ofs) with | OK ptr => match Mem.loadv chunk m (Val.offset_ptr v ptr) with - | None => exec_incorrect_load trap chunk + | None => exec_incorrect_load trap | Some vl => Some (Val vl) end | _ => None @@ -182,13 +182,13 @@ Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: Definition exec_load_deps_reg (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) := match Mem.loadv chunk m (Val.addl v vo) with - | None => exec_incorrect_load trap chunk + | None => exec_incorrect_load trap | Some vl => Some (Val vl) end. Definition exec_load_deps_regxs (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) := match Mem.loadv chunk m (Val.addl v (Val.shll vo (scale_of_chunk chunk))) with - | None => exec_incorrect_load trap chunk + | None => exec_incorrect_load trap | Some vl => Some (Val vl) end. diff --git a/kvx/Asmblockgenproof1.v b/kvx/Asmblockgenproof1.v index a65bd5bc..259c4f9c 100644 --- a/kvx/Asmblockgenproof1.v +++ b/kvx/Asmblockgenproof1.v @@ -1914,7 +1914,7 @@ Lemma transl_load_access2_correct_notrap2: Mem.loadv chunk m v = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#rd = concrete_default_notrap_load_value chunk + /\ rs'#rd = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. Proof. intros until ro; intros ARGS IREGE INSTR TR EV LOAD. @@ -1963,7 +1963,7 @@ Lemma transl_load_access2XS_correct_notrap2: Mem.loadv chunk m v = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#rd = concrete_default_notrap_load_value chunk + /\ rs'#rd = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. Proof. intros until ro; intros ARGS IREGE INSTR TR EV LOAD. @@ -2008,7 +2008,7 @@ Lemma transl_load_access_correct_notrap2: Mem.loadv chunk m v = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#rd = concrete_default_notrap_load_value chunk + /\ rs'#rd = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. Proof. intros until v; intros INSTR TR EV LOAD. @@ -2185,7 +2185,7 @@ Lemma transl_load_correct_notrap2: Mem.loadv chunk m a = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#(preg_of dst) = (concrete_default_notrap_load_value chunk) + /\ rs'#(preg_of dst) = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> preg_of dst -> rs'#r = rs#r. Proof. intros until a; intros TR EV LOAD. destruct addr. diff --git a/kvx/Asmblockprops.v b/kvx/Asmblockprops.v index c3929be5..a732d29b 100644 --- a/kvx/Asmblockprops.v +++ b/kvx/Asmblockprops.v @@ -96,9 +96,9 @@ Theorem exec_basic_instr_pc: Proof. intros. destruct b; try destruct i; try destruct i. all: try (inv H; Simpl). - 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail. + 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail. - 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail. + 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail. { (* PLoadQRRO *) unfold parexec_load_q_offset in H1. diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v index aa2e0885..45b230e6 100644 --- a/kvx/Asmvliw.v +++ b/kvx/Asmvliw.v @@ -313,7 +313,11 @@ Inductive cf_instruction : Type := . (** *** Loads *) -Definition concrete_default_notrap_load_value (chunk : memory_chunk) := + +(* What follows was the original spec, but is subtly incorrect. + Our definition of the assembly-level memory model is already an abstraction of the real world. + In particular, we consider that a load is incorrect when it points outside of CompCert's visible memory, whereas this memory could be correct at the assembly level. + This means that CompCert would believe an incorrect load would yield 0 whereas it would yield another value. match chunk with | Mint8signed | Mint8unsigned | Mint16signed | Mint16unsigned | Mint32 => Vint Int.zero @@ -321,7 +325,7 @@ Definition concrete_default_notrap_load_value (chunk : memory_chunk) := | Many32 | Many64 => Vundef | Mfloat32 => Vsingle Float32.zero | Mfloat64 => Vfloat Float.zero - end. + end. *) Inductive load_name : Type := | Plb (**r load byte *) @@ -1169,16 +1173,16 @@ Definition eval_offset (ofs: offset) : res ptrofs := OK ofs. (** *** load/store instructions *) -Definition parexec_incorrect_load trap chunk d rsw mw := +Definition parexec_incorrect_load trap d rsw mw := match trap with | TRAP => Stuck - | NOTRAP => Next (rsw#d <- (concrete_default_notrap_load_value chunk)) mw + | NOTRAP => Next (rsw#d <- Vundef) mw end. Definition parexec_load_offset (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a: ireg) (ofs: offset) := match (eval_offset ofs) with | OK ptr => match Mem.loadv chunk mr (Val.offset_ptr (rsr a) ptr) with - | None => parexec_incorrect_load trap chunk d rsw mw + | None => parexec_incorrect_load trap d rsw mw | Some v => Next (rsw#d <- v) mw end | _ => Stuck @@ -1225,13 +1229,13 @@ Definition parexec_load_o_offset (rsr rsw: regset) (mr mw: mem) (d : gpreg_o) (a Definition parexec_load_reg (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) := match Mem.loadv chunk mr (Val.addl (rsr a) (rsr ro)) with - | None => parexec_incorrect_load trap chunk d rsw mw + | None => parexec_incorrect_load trap d rsw mw | Some v => Next (rsw#d <- v) mw end. Definition parexec_load_regxs (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) := match Mem.loadv chunk mr (Val.addl (rsr a) (Val.shll (rsr ro) (scale_of_chunk chunk))) with - | None => parexec_incorrect_load trap chunk d rsw mw + | None => parexec_incorrect_load trap d rsw mw | Some v => Next (rsw#d <- v) mw end. diff --git a/kvx/Machregsaux.ml b/kvx/Machregsaux.ml index e3b18181..dbb89727 100644 --- a/kvx/Machregsaux.ml +++ b/kvx/Machregsaux.ml @@ -31,3 +31,5 @@ let class_of_type = function | AST.Tint | AST.Tlong | AST.Tfloat | AST.Tsingle -> 0 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 59 |] diff --git a/kvx/Machregsaux.mli b/kvx/Machregsaux.mli index 01b0f9fd..23ac1c9a 100644 --- a/kvx/Machregsaux.mli +++ b/kvx/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml index 2107ce22..5ebad421 100644 --- a/kvx/PostpassSchedulingOracle.ml +++ b/kvx/PostpassSchedulingOracle.ml @@ -787,8 +787,14 @@ let latency_constraints bb = *) let build_problem bb = - { max_latency = -1; resource_bounds = resource_bounds; - instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb } +{ max_latency = -1; + resource_bounds = resource_bounds; + instruction_usages = instruction_usages bb; + latency_constraints = latency_constraints bb; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None +} let rec find_min_opt (l: int option list) = match l with diff --git a/riscV/Machregsaux.ml b/riscV/Machregsaux.ml index 840943e7..e3e47946 100644 --- a/riscV/Machregsaux.ml +++ b/riscV/Machregsaux.ml @@ -18,3 +18,5 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 26; 32|] diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli index 01b0f9fd..bb3777bf 100644 --- a/riscV/Machregsaux.mli +++ b/riscV/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/riscV/OpWeights.ml b/riscV/OpWeights.ml index 0a1d9ad4..a5ece6d5 100644 --- a/riscV/OpWeights.ml +++ b/riscV/OpWeights.ml @@ -66,7 +66,8 @@ module Rocket = struct | OEmayundef _ -> 0 | _ -> 1 - let resources_of_op (op : operation) (nargs : int) = resource_bounds + let resources_of_op (op : operation) (nargs : int) = + match op with OEmayundef _ -> [| 0 |] | _ -> resource_bounds let non_pipelined_resources_of_op (op : operation) (nargs : int) = match op with diff --git a/runtime/Makefile b/runtime/Makefile index 6f70fa87..ff51b7d7 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -1,5 +1,7 @@ include ../Makefile.config +.PRECIOUS: .s + CFLAGS=-O1 -Wall ifeq ($(ARCH),x86) diff --git a/scheduling/BTL.v b/scheduling/BTL.v index 96377943..f832085c 100644 --- a/scheduling/BTL.v +++ b/scheduling/BTL.v @@ -214,7 +214,7 @@ Inductive has_loaded sp rs m chunk addr args v: trapping_mode -> Prop := : has_loaded sp rs m chunk addr args v trap | has_loaded_default (LOAD: forall a, eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None) - (DEFAULT: v = default_notrap_load_value chunk) + (DEFAULT: v = Vundef) : has_loaded sp rs m chunk addr args v NOTRAP . Local Hint Constructors has_loaded: core. @@ -272,12 +272,10 @@ Fixpoint iblock_istep_run sp ib rs m: option outcome := match Mem.loadv chunk m a with | Some v => Some {| _rs := rs#dst <- v; _m:= m; _fin := None |} | None => - let v := default_notrap_load_value chunk in - Some {| _rs := rs#dst <- v; _m:= m; _fin := None |} + Some {| _rs := rs#dst <- Vundef; _m:= m; _fin := None |} end | None => - let v := default_notrap_load_value chunk in - Some {| _rs := rs#dst <- v; _m:= m; _fin := None |} + Some {| _rs := rs#dst <- Vundef; _m:= m; _fin := None |} end | Bstore chunk addr args src _ => SOME a <- eval_addressing ge sp addr rs##args IN diff --git a/scheduling/BTLRenumber.ml b/scheduling/BTLRenumber.ml index 6ff42a27..2f4f9203 100644 --- a/scheduling/BTLRenumber.ml +++ b/scheduling/BTLRenumber.ml @@ -94,7 +94,7 @@ let regenerate_btl_tree btl entry = let ib = renumber_iblock old_ibf.entry in let n = get_inumb_or_next ib in let n_pos = i2p n in - let bi = mk_binfo n in + let bi = mk_binfo n old_ibf.binfo.s_output_regs old_ibf.binfo.typing in let ibf = { entry = ib; input_regs = old_ibf.input_regs; binfo = bi } in if old_n = entry then new_entry := n_pos; dm := PTree.set old_n n_pos !dm; diff --git a/scheduling/BTLScheduleraux.ml b/scheduling/BTLScheduleraux.ml index 00be2aa7..75672243 100644 --- a/scheduling/BTLScheduleraux.ml +++ b/scheduling/BTLScheduleraux.ml @@ -4,7 +4,9 @@ open BTLtypes open Machine open DebugPrint open PrintBTL +open Registers open RTLcommonaux +open BTLcommonaux open ExpansionOracle open PrepassSchedulingOracle @@ -88,11 +90,179 @@ let apply_schedule bseq olast positions = in build_iblock ibl +(** the useful one. Returns a hashtable with bindings of shape + ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), + ** [t] is its class (according to [typing]), and [n] the number of + ** times it's referenced as an argument in instructions of [seqa] ; + ** and an arrray containg the list of regs referenced by each + ** instruction, with a boolean to know whether it's as arg or dest *) +let reference_counting (seqa : (iblock * Regset.t) array) (out_regs : Regset.t) + (typing : RTLtyping.regenv) : + (reg, int * int) Hashtbl.t * (reg * bool) list array = + let retl = Hashtbl.create 42 in + let retr = Array.make (Array.length seqa) [] in + (* retr.(i) : (r, b) -> (r', b') -> ... + * where b = true if seen as arg, false if seen as dest + *) + List.iter + (fun reg -> + Hashtbl.add retl reg (Machregsaux.class_of_type (typing reg), 1)) + (Regset.elements out_regs); + let add_reg reg = + match Hashtbl.find_opt retl reg with + | Some (t, n) -> Hashtbl.add retl reg (t, n + 1) + | None -> Hashtbl.add retl reg (Machregsaux.class_of_type (typing reg), 1) + in + let map_true = List.map (fun r -> (r, true)) in + Array.iteri + (fun i (ins, _) -> + match ins with + | Bop (_, args, dest, _) | Bload (_, _, _, args, dest, _) -> + List.iter add_reg args; + retr.(i) <- (dest, false) :: map_true args + | Bcond (_, args, _, _, _) -> + List.iter add_reg args; + retr.(i) <- map_true args + | Bstore (_, _, args, src, _) -> + List.iter add_reg args; + add_reg src; + retr.(i) <- (src, true) :: map_true args + | BF (Bcall (_, fn, args, dest, _), _) -> + List.iter add_reg args; + retr.(i) <- + (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (dest, false) :: (reg, true) :: map_true args + | _ -> (dest, false) :: map_true args) + | BF (Btailcall (_, fn, args), _) -> + List.iter add_reg args; + retr.(i) <- + (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (reg, true) :: map_true args + | _ -> map_true args) + | BF (Bbuiltin (_, args, dest, _), _) -> + let rec bar = function + | AST.BA r -> + add_reg r; + retr.(i) <- (r, true) :: retr.(i) + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + bar hi; + bar lo + | _ -> () + in + List.iter bar args; + let rec bad = function + | AST.BR r -> retr.(i) <- (r, false) :: retr.(i) + | AST.BR_splitlong (hi, lo) -> + bad hi; + bad lo + | _ -> () + in + bad dest + | BF (Bjumptable (reg, _), _) | BF (Breturn (Some reg), _) -> + add_reg reg; + retr.(i) <- [ (reg, true) ] + | _ -> ()) + seqa; + (* print_string "mentions\n"; + * Array.iteri (fun i l -> + * print_int i; + * print_string ": ["; + * List.iter (fun (r, b) -> + * print_int (Camlcoq.P.to_int r); + * print_string ":"; + * print_string (if b then "a:" else "d"); + * if b then print_int (snd (Hashtbl.find retl r)); + * print_string ", " + * ) l; + * print_string "]\n"; + * flush stdout; + * ) retr; *) + (retl, retr) + +let get_live_regs_entry seqa ibf btl = + if !Clflags.option_debug_compcert > 6 then debug_flag := true; + let ret = + Array.fold_right + (fun (ins, liveins) regset_i -> + let regset = Regset.union liveins regset_i in + match ins with + | Bnop _ -> regset + | Bop (_, args, dest, _) | Bload (_, _, _, args, dest, _) -> + List.fold_left + (fun set reg -> Regset.add reg set) + (Regset.remove dest regset) + args + | Bstore (_, _, args, src, _) -> + List.fold_left + (fun set reg -> Regset.add reg set) + (Regset.add src regset) args + | BF (Bcall (_, fn, args, dest, _), _) -> + List.fold_left + (fun set reg -> Regset.add reg set) + ((match fn with + | Datatypes.Coq_inl reg -> Regset.add reg + | Datatypes.Coq_inr _ -> fun x -> x) + (Regset.remove dest regset)) + args + | BF (Btailcall (_, fn, args), _) -> + List.fold_left + (fun set reg -> Regset.add reg set) + (match fn with + | Datatypes.Coq_inl reg -> Regset.add reg regset + | Datatypes.Coq_inr _ -> regset) + args + | BF (Bbuiltin (_, args, dest, _), _) -> + List.fold_left + (fun set arg -> + let rec add reg set = + match reg with + | AST.BA r -> Regset.add r set + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + add hi (add lo set) + | _ -> set + in + add arg set) + (let rec rem dest set = + match dest with + | AST.BR r -> Regset.remove r set + | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set) + | _ -> set + in + rem dest regset) + args + | BF (Bjumptable (reg, _), _) | BF (Breturn (Some reg), _) -> + Regset.add reg regset + | Bcond (_, args, _, _, _) -> + List.fold_left (fun set reg -> Regset.add reg set) regset args + | _ -> regset) + seqa ibf.binfo.s_output_regs + in + debug "live in regs: "; + print_regset ret; + debug "\n"; + debug_flag := false; + ret + let schedule_blk n ibf btl = if not !Clflags.option_fprepass then btl else let bseq, olast = flatten_blk_basics ibf in - match schedule_sequence bseq btl with + let seqa = + Array.mapi + (fun i inst -> (inst, get_liveins inst)) + bseq + in + let live_regs_entry = get_live_regs_entry seqa ibf btl in + let refcount = + reference_counting seqa ibf.binfo.s_output_regs ibf.binfo.typing + in + match + schedule_sequence seqa btl live_regs_entry ibf.binfo.typing refcount + with | Some positions -> let new_ib = apply_schedule bseq olast positions in let new_ibf = @@ -116,29 +286,52 @@ let turn_all_loads_nontrap n ibf btl = in PTree.set n ibf' btl +let compute_liveins n ibf btl = + let rec traverse_rec ib = + match ib with + | Bseq (ib1, ib2) -> + traverse_rec ib1; + traverse_rec ib2 + | BF (Bgoto succ, iinfo) + | BF (Bcall (_, _, _, _, succ), iinfo) + | BF (Bbuiltin (_, _, _, succ), iinfo) -> + let lives = (get_some @@ PTree.get succ btl).input_regs in + iinfo.liveins <- lives + | BF (Bjumptable (_, tbl), iinfo) -> + List.iter + (fun ex -> + let lives = (get_some @@ PTree.get ex btl).input_regs in + iinfo.liveins <- Regset.union iinfo.liveins lives) + tbl + | Bcond (_, _, BF (Bgoto succ, _), Bnop None, iinfo) -> ( + match iinfo.pcond with + | Some predb -> + assert (predb = false); + let lives = (get_some @@ PTree.get succ btl).input_regs in + iinfo.liveins <- lives + | None -> ()) + | _ -> () + in + traverse_rec ibf.entry + let rec do_schedule btl = function | [] -> btl | (n, ibf) :: blks -> + compute_liveins n ibf btl; let code_exp = expanse n ibf btl in let ibf_exp = get_some @@ PTree.get n code_exp in let code_nt = turn_all_loads_nontrap n ibf_exp code_exp in let ibf_nt = get_some @@ PTree.get n code_nt in let btl' = schedule_blk n ibf_nt code_nt in - begin - (*debug_flag := true;*) - print_btl_code stderr code_nt; - print_btl_code stderr btl'; - (*debug_flag := false;*) - do_schedule btl' blks - end + (*debug_flag := true;*) + print_btl_code stderr btl; + print_btl_code stderr btl'; + (*debug_flag := false;*) + do_schedule btl' blks let btl_scheduler f = let btl = f.fn_code in - (*debug_flag := true;*) let elts = PTree.elements btl in find_last_reg elts; let btl' = do_schedule btl elts in - debug "Scheduled BTL Code:\n"; - print_btl_code stderr btl'; - (*debug_flag := false;*) btl' diff --git a/scheduling/BTL_SEtheory.v b/scheduling/BTL_SEtheory.v index e2e6be6b..033ed843 100644 --- a/scheduling/BTL_SEtheory.v +++ b/scheduling/BTL_SEtheory.v @@ -93,11 +93,11 @@ Fixpoint eval_sval ctx (sv: sval): option val := | NOTRAP => SOME args <- eval_list_sval ctx lsv IN match (eval_addressing (cge ctx) (csp ctx) addr args) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some a => SOME m <- eval_smem ctx sm IN match (Mem.loadv chunk m a) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some val => Some val end end diff --git a/scheduling/BTLcommonaux.ml b/scheduling/BTLcommonaux.ml index 4605d613..577e4828 100644 --- a/scheduling/BTLcommonaux.ml +++ b/scheduling/BTLcommonaux.ml @@ -1,15 +1,22 @@ open Maps open BTL +open Registers open BTLtypes open RTLcommonaux let undef_node = -1 -let mk_iinfo _inumb _pcond = { inumb = _inumb; pcond = _pcond; visited = false } +let mk_iinfo _inumb _pcond = { inumb = _inumb; pcond = _pcond; visited = false; liveins = Regset.empty } -let def_iinfo () = { inumb = undef_node; pcond = None; visited = false } +let def_iinfo () = { inumb = undef_node; pcond = None; visited = false; liveins = Regset.empty } -let mk_binfo _bnumb = { bnumb = _bnumb; visited = false } +let mk_binfo _bnumb _s_output_regs _typing = + { + bnumb = _bnumb; + visited = false; + s_output_regs = _s_output_regs; + typing = _typing; + } let reset_visited_ibf btl = PTree.map @@ -68,3 +75,13 @@ let rec get_inumb_or_next = function iinfo.inumb | Bseq (ib1, _) -> get_inumb_or_next ib1 | _ -> failwith "get_inumb_or_next: Bnop None" + +let get_liveins = function + | BF (_, iinfo) + | Bnop (Some iinfo) + | Bop (_, _, _, iinfo) + | Bload (_, _, _, _, _, iinfo) + | Bstore (_, _, _, _, iinfo) + | Bcond (_, _, _, _, iinfo) -> + iinfo.liveins + | _ -> failwith "get_liveins: invalid iblock" diff --git a/scheduling/BTLtypes.ml b/scheduling/BTLtypes.ml index 3972fd6b..12ca30e8 100644 --- a/scheduling/BTLtypes.ml +++ b/scheduling/BTLtypes.ml @@ -1,7 +1,15 @@ +open Registers + type inst_info = { mutable inumb : int; mutable pcond : bool option; mutable visited : bool; + mutable liveins: Regset.t; } -type block_info = { mutable bnumb : int; mutable visited : bool } +type block_info = { + mutable bnumb : int; + mutable visited : bool; + s_output_regs : Regset.t; + typing : RTLtyping.regenv; +} diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index eab0b21a..e3a421a5 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -33,6 +33,10 @@ type latency_constraint = { type problem = { max_latency : int; resource_bounds : int array; + live_regs_entry : Registers.Regset.t; + typing : RTLtyping.regenv; + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * ((Registers.reg * bool) list array)) option; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -118,6 +122,13 @@ let vector_less_equal a b = true with Exit -> false;; +(* let vector_add a b = + * assert ((Array.length a) = (Array.length b)); + * for i=0 to (Array.length a)-1 + * do + * b.(i) <- b.(i) + a.(i) + * done;; *) + let vector_subtract a b = assert ((Array.length a) = (Array.length b)); for i=0 to (Array.length a)-1 @@ -257,8 +268,8 @@ let priority_list_scheduler (order : list_scheduler_order) assert(!time >= 0); !time with Exit -> -1 - in + let advance_time() = begin (if !current_time < max_time-1 @@ -267,7 +278,8 @@ let priority_list_scheduler (order : list_scheduler_order) Array.blit problem.resource_bounds 0 current_resources 0 (Array.length current_resources); ready.(!current_time + 1) <- - InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1)); + InstrSet.union (ready.(!current_time)) + (ready.(!current_time + 1)); ready.(!current_time) <- InstrSet.empty; end); incr current_time @@ -334,6 +346,470 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;; (* dummy code for placating ocaml's warnings *) let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; + +(* A scheduler sensitive to register pressure *) +let reg_pres_scheduler (problem : problem) : solution option = + (* DebugPrint.debug_flag := true; *) + let nr_instructions = get_nr_instructions problem in + + if !Clflags.option_debug_compcert > 6 then + (Printf.eprintf "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions; + flush stderr); + + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + let available_regs = Array.copy Machregsaux.nr_regs in + + let nr_types_regs = Array.length available_regs in + + let thres = Array.fold_left (min) + (max !(Clflags.option_regpres_threshold) 0) + Machregsaux.nr_regs + in + + + let regs_thresholds = Array.make nr_types_regs thres in + (* placeholder value *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + available_regs.(classe) + <- available_regs.(classe) - 1; + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + let csr_b = ref false in + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta i = (fun a (r, b) -> + a + + if class_r r <> i then 0 else + (if b then + if (Hashtbl.find counts r = (i, 1)) + then 1 else 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + )) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let module InstrSet = + struct + module MSet = + Set.Make (struct + type t=int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) + + let empty = MSet.empty + let is_empty = MSet.is_empty + let add = MSet.add + let remove = MSet.remove + let union = MSet.union + let iter = MSet.iter + + let compare_regs i x y = + let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in + (* print_int y; + * print_string " "; + * print_int pyi; + * print_newline (); + * flush stdout; *) + let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in + match pyi - pxi with + | 0 -> (match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z) + | z -> z + + (** t is the register class *) + let sched_CSR t ready usages = + (* print_string "looking for max delta"; + * print_newline (); + * flush stdout; *) + let result = ref (-1) in + iter (fun i -> + if vector_less_equal usages.(i) current_resources + then if !result = -1 || (compare_regs t !result i > 0) + then result := i) ready; + !result + end + in + + let max_time = bound_max_time problem + 5*nr_instructions in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *) + let cnt = ref 0 in + + let attempt_scheduling ready usages = + let result = ref (-1) in + DebugPrint.debug "\n\nREADY: "; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready; + DebugPrint.debug "\n\n"; + try + Array.iteri (fun i avlregs -> + DebugPrint.debug "avlregs: %d %d\nlive regs: %d\n" + i avlregs (Hashtbl.length live_regs); + if !cnt < 5 && avlregs <= regs_thresholds.(i) + then ( + csr_b := true; + let maybe = InstrSet.sched_CSR i ready usages in + DebugPrint.debug "maybe %d\n" maybe; + (if maybe >= 0 && + let delta = + List.fold_left (fold_delta i) 0 mentions.(maybe) in + DebugPrint.debug "delta %d\n" delta; + delta > 0 + then + (vector_subtract usages.(maybe) current_resources; + result := maybe) + else + if not !Clflags.option_regpres_wait_window + then + (InstrSet.iter (fun ins -> + if vector_less_equal usages.(ins) current_resources && + List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0 + then result := ins + ) ready; + if !result <> -1 then + vector_subtract usages.(!result) current_resources; + incr cnt) + else + (incr cnt) + ); + raise Exit)) available_regs; + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then ( + vector_subtract usages.(i) current_resources; + result := i; + raise Exit)) ready; + -1 + with Exit -> + !result in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> (assert(times.(i) < 0); + (DebugPrint.debug "INSTR ISSUED: %d\n" i; + if !csr_b && !Clflags.option_debug_compcert > 6 then + (Printf.eprintf "REGPRES: high pres class %d\n" i; + flush stderr); + csr_b := false; + (* if !Clflags.option_regpres_wait_window then *) + cnt := 0; + List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + available_regs.(t) + <- available_regs.(t) + 1)) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(i)); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + DebugPrint.debug "%d " i; + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + (* DebugPrint.debug_flag := false; *) + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + (* DebugPrint.debug_flag := false; *) + None + +;; + + +(********************************************************************) + +let reg_pres_scheduler_bis (problem : problem) : solution option = + (* Printf.printf "\nNEW\n\n"; *) + let nr_instructions = get_nr_instructions problem in + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + (* let available_regs = Array.copy Machregsaux.nr_regs in *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + (* available_regs.(classe) + * <- available_regs.(classe) - 1; *) + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta a (r, b) = + a + (if b then + match Hashtbl.find_opt counts r with + | Some (_, 1) -> 1 + | _ -> 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + ) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let compare_pres x y = + let pdy = List.fold_left (fold_delta) 0 mentions.(y) in + let pdx = List.fold_left (fold_delta) 0 mentions.(x) in + match pdy - pdx with + | 0 -> x - y + | z -> z + in + + let module InstrSet = + Set.Make (struct + type t = int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) in + + let max_time = bound_max_time problem (* + 5*nr_instructions *) in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (* Printf.printf "ADV\n"; + * flush stdout; *) + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + + let attempt_scheduling ready usages = + let result = ref [] in + try + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then + if !result = [] || priorities.(i) = priorities.(List.hd (!result)) + then + result := i::(!result) + else raise Exit + ) ready; + if !result <> [] then raise Exit; + -1 + with + Exit -> + let mini = List.fold_left (fun a b -> + if a = -1 || compare_pres a b > 0 + then b else a + ) (-1) !result in + vector_subtract usages.(mini) current_resources; + mini + in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> ( + DebugPrint.debug "ISSUED: %d\nREADY: " i; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) + ready.(!current_time); + DebugPrint.debug "\nSUCC: "; + List.iter (fun (i, l) -> DebugPrint.debug "%d " i) + successors.(i); + DebugPrint.debug "\n\n"; + assert(times.(i) < 0); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + (List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + (* available_regs.(t) + * <- available_regs.(t) + 1 *))) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + (* available_regs.(t) + * <- available_regs.(t) - 1 *)) + | Some i -> () + ) mentions.(i)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + None + +;; + +(********************************************************************) + type bundle = int list;; let rec extract_deps_to index = function @@ -438,6 +914,12 @@ let reverse_problem problem = { max_latency = problem.max_latency; resource_bounds = problem.resource_bounds; + live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) + (* Not needed for the revlist sched, and for now we wont bother + with creating a reverse scheduler aware of reg press *) + + typing = problem.typing; + reference_counting = problem.reference_counting; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 @@ -1259,5 +1741,7 @@ let scheduler_by_name name = | "ilp" -> validated_scheduler cascaded_scheduler | "list" -> validated_scheduler list_scheduler | "revlist" -> validated_scheduler reverse_list_scheduler + | "regpres" -> validated_scheduler reg_pres_scheduler + | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis | "greedy" -> greedy_scheduler | s -> failwith ("unknown scheduler: " ^ s);; diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index fb7af3f6..48c7bc09 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -23,6 +23,16 @@ type problem = { resource_bounds : int array; (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *) + live_regs_entry : Registers.Regset.t; + (** The set of live pseudo-registers at entry. *) + + typing : RTLtyping.regenv; + (** Register type map. *) + + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * ((Registers.reg * bool) list array)) option; + (** See RTLpathScheduleraux.reference_counting. *) + instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) @@ -68,6 +78,12 @@ Once a clock tick is full go to the next. @return [Some solution] when a solution is found, [None] if not. *) val list_scheduler : problem -> solution option +(** WIP : Same as list_scheduler, but schedules instructions which decrease +register pressure when it gets too high. *) +val reg_pres_scheduler : problem -> solution option + +val reg_pres_scheduler_bis : problem -> solution option + (** Schedule the problem using the order of instructions without any reordering *) val greedy_scheduler : problem -> solution option diff --git a/scheduling/RTLpath.v b/scheduling/RTLpath.v index a4fce97e..b29a7759 100644 --- a/scheduling/RTLpath.v +++ b/scheduling/RTLpath.v @@ -156,7 +156,7 @@ Definition istep (ge: RTL.genv) (i: instruction) (sp: val) (rs: regset) (m: mem) SOME v <- Mem.loadv chunk m a IN Some (mk_istate true pc' (rs#dst <- v) m) | Iload NOTRAP chunk addr args dst pc' => - let default_state := mk_istate true pc' rs#dst <- (default_notrap_load_value chunk) m in + let default_state := mk_istate true pc' rs#dst <- Vundef m in match (eval_addressing ge sp addr rs##args) with | None => Some default_state | Some a => match (Mem.loadv chunk m a) with diff --git a/scheduling/RTLpathSE_theory.v b/scheduling/RTLpathSE_theory.v index aa8db342..2a791feb 100644 --- a/scheduling/RTLpathSE_theory.v +++ b/scheduling/RTLpathSE_theory.v @@ -87,11 +87,11 @@ Fixpoint seval_sval (ge: RTL.genv) (sp:val) (sv: sval) (rs0: regset) (m0: mem): | NOTRAP => SOME args <- seval_list_sval ge sp lsv rs0 m0 IN match (eval_addressing ge sp addr args) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some a => SOME m <- seval_smem ge sp sm rs0 m0 IN match (Mem.loadv chunk m a) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some val => Some val end end diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 3db25d82..659a8ba7 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -18,7 +18,7 @@ let print_superblock (sb: superblock) code = begin debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n"; debug " liveins = "; print_ptree_regset li; debug "\n"; - debug " output_regs = "; print_regset outs; debug "}" + debug " output_regs = "; print_regset outs; debug "\n}" end let print_superblocks lsb code = @@ -73,6 +73,168 @@ let get_superblocks code entry pm typing = lsb end +(** the useful one. Returns a hashtable with bindings of shape + ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), + ** [t] is its class (according to [typing]), and [n] the number of + ** times it's referenced as an argument in instructions of [seqa] ; + ** and an arrray containg the list of regs referenced by each + ** instruction, with a boolean to know whether it's as arg or dest *) +let reference_counting (seqa : (instruction * Regset.t) array) + (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) : + (Registers.reg, int * int) Hashtbl.t * + (Registers.reg * bool) list array = + let retl = Hashtbl.create 42 in + let retr = Array.make (Array.length seqa) [] in + (* retr.(i) : (r, b) -> (r', b') -> ... + * where b = true if seen as arg, false if seen as dest + *) + List.iter (fun reg -> + Hashtbl.add retl + reg (Machregsaux.class_of_type (typing reg), 1) + ) (Registers.Regset.elements out_regs); + let add_reg reg = + match Hashtbl.find_opt retl reg with + | Some (t, n) -> Hashtbl.add retl reg (t, n+1) + | None -> Hashtbl.add retl reg (Machregsaux.class_of_type + (typing reg), 1) + in + let map_true = List.map (fun r -> r, true) in + Array.iteri (fun i (ins, _) -> + match ins with + | Iop(_,args,dest,_) | Iload(_,_,_,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (dest, false)::(map_true args) + | Icond(_,args,_,_,_) -> + List.iter (add_reg) args; + retr.(i) <- map_true args + | Istore(_,_,args,src,_) -> + List.iter (add_reg) args; + add_reg src; + retr.(i) <- (src, true)::(map_true args) + | Icall(_,fn,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (dest,false)::(reg, true)::(map_true args) + | _ -> (dest,false)::(map_true args)) + + | Itailcall(_,fn,args) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (reg, true)::(map_true args) + | _ -> map_true args) + | Ibuiltin(_,args,dest,_) -> + let rec bar = function + | AST.BA r -> add_reg r; + retr.(i) <- (r, true)::retr.(i) + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + bar hi; bar lo + | _ -> () + in + List.iter (bar) args; + let rec bad = function + | AST.BR r -> retr.(i) <- (r, false)::retr.(i) + | AST.BR_splitlong (hi, lo) -> + bad hi; bad lo + | _ -> () + in + bad dest; + | Ijumptable (reg,_) | Ireturn (Some reg) -> + add_reg reg; + retr.(i) <- [reg, true] + | _ -> () + ) seqa; + (* print_string "mentions\n"; + * Array.iteri (fun i l -> + * print_int i; + * print_string ": ["; + * List.iter (fun (r, b) -> + * print_int (Camlcoq.P.to_int r); + * print_string ":"; + * print_string (if b then "a:" else "d"); + * if b then print_int (snd (Hashtbl.find retl r)); + * print_string ", " + * ) l; + * print_string "]\n"; + * flush stdout; + * ) retr; *) + retl, retr + + +let get_live_regs_entry (sb : superblock) code = + (if !Clflags.option_debug_compcert > 6 + then debug_flag := true); + debug "getting live regs for superblock:\n"; + print_superblock sb code; + debug "\n"; + let seqa = Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.get_live_regs_entry" + ), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + sb.instructions in + let ret = + Array.fold_right (fun (ins, liveins) regset_i -> + let regset = Registers.Regset.union liveins regset_i in + match ins with + | Inop _ -> regset + | Iop (_, args, dest, _) + | Iload (_, _, _, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.remove dest regset) args + | Istore (_, _, args, src, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.add src regset) args + | Icall (_, fn, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + ((match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg) + | Datatypes.Coq_inr _ -> (fun x -> x)) + (Registers.Regset.remove dest regset)) + args + | Itailcall (_, fn, args) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset) + | Datatypes.Coq_inr _ -> regset) + args + | Ibuiltin (_, args, dest, _) -> + List.fold_left (fun set arg -> + let rec add reg set = + match reg with + | AST.BA r -> Registers.Regset.add r set + | AST.BA_splitlong (hi, lo) + | AST.BA_addptr (hi, lo) -> add hi (add lo set) + | _ -> set + in add arg set) + (let rec rem dest set = + match dest with + | AST.BR r -> Registers.Regset.remove r set + | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set) + | _ -> set + in rem dest regset) + args + | Icond (_, args, _, _, _) -> + List.fold_left (fun set reg -> + Registers.Regset.add reg set) + regset args + | Ijumptable (reg, _) + | Ireturn (Some reg) -> + Registers.Regset.add reg regset + | _ -> regset + ) seqa sb.s_output_regs + in debug "live in regs: "; + print_regset ret; + debug "\n"; + debug_flag := false; + ret + (* TODO David *) let schedule_superblock sb code = if not !Clflags.option_fprepass @@ -91,15 +253,22 @@ let schedule_superblock sb code = match predicted_successor ii with | Some _ -> 0 | None -> 1 in + debug "hello\n"; + let live_regs_entry = get_live_regs_entry sb code in + let seqa = + Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.schedule_superblock"), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + (Array.sub sb.instructions 0 (nr_instr-trailer_length)) in match PrepassSchedulingOracle.schedule_sequence - (Array.map (fun i -> - (match PTree.get i code with - | Some ii -> ii - | None -> failwith "RTLpathScheduleraux.schedule_superblock"), - (match PTree.get i sb.liveins with - | Some s -> s - | None -> Regset.empty)) - (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with + seqa + live_regs_entry + sb.typing + (reference_counting seqa sb.s_output_regs sb.typing) with | None -> sb.instructions | Some order -> let ins' = diff --git a/scheduling/RTLtoBTLaux.ml b/scheduling/RTLtoBTLaux.ml index d544e87f..3c06f4cb 100644 --- a/scheduling/RTLtoBTLaux.ml +++ b/scheduling/RTLtoBTLaux.ml @@ -45,7 +45,7 @@ let translate_inst (iinfo : BTL.inst_info) inst is_final = in if is_final then encaps_final btli !osucc else btli -let translate_function code entry join_points liveness = +let translate_function code entry join_points liveness (typing : RTLtyping.regenv) = let reached = ref (PTree.map (fun n i -> false) code) in let btl_code = ref PTree.empty in let rec build_btl_tree e = @@ -53,6 +53,7 @@ let translate_function code entry join_points liveness = else ( reached := PTree.set e true !reached; let next_nodes = ref [] in + let last = ref None in let rec build_btl_block n = let inst = get_some @@ PTree.get n code in let psucc = predicted_successor inst in @@ -78,13 +79,16 @@ let translate_function code entry join_points liveness = | None -> debug "BLOCK END.\n"; next_nodes := !next_nodes @ successors_inst inst; + last := Some inst; translate_inst iinfo inst true in let ib = build_btl_block e in let succs = !next_nodes in let inputs = get_some @@ PTree.get e liveness in - let bi = mk_binfo (p2i e) in + let soutput = get_outputs liveness e (get_some @@ !last) in + + let bi = mk_binfo (p2i e) soutput typing in let ibf = { entry = ib; input_regs = inputs; binfo = bi } in btl_code := PTree.set e ibf !btl_code; List.iter build_btl_tree succs) @@ -97,7 +101,8 @@ let rtl2btl (f : RTL.coq_function) = let entry = f.fn_entrypoint in let join_points = get_join_points code entry in let liveness = analyze f in - let btl = translate_function code entry join_points liveness in + let typing = get_ok @@ RTLtyping.type_function f in + let btl = translate_function code entry join_points liveness typing in let dm = PTree.map (fun n i -> n) btl in (*debug_flag := true;*) debug "Entry %d\n" (p2i entry); diff --git a/scheduling/postpass_lib/Machblock.v b/scheduling/postpass_lib/Machblock.v index c8eadbd7..b588cca8 100644 --- a/scheduling/postpass_lib/Machblock.v +++ b/scheduling/postpass_lib/Machblock.v @@ -237,13 +237,13 @@ Inductive basic_step (s: list stackframe) (fb: block) (sp: val) (rs: regset) (m: | exec_MBload_notrap1: forall addr args rs' chunk dst, eval_addressing ge sp addr rs##args = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m | exec_MBload_notrap2: forall addr args a rs' chunk dst, eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m | exec_MBstore: forall chunk addr args src m' a rs', diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c new file mode 100644 index 00000000..9e6adacb --- /dev/null +++ b/test/nardino/scheduling/entry_regs.c @@ -0,0 +1,19 @@ +#include <stdio.h> + +int f(int n) { + if (n > 0) + return 42; + else + return n; +} + + +int main(int argc, char *argv[]) { + int a=1; + float b=2.; + int c = f(a); + a = 3; + int d = f(a); + printf("%e, %d, %d, %d", b, a, c, d); + return 0; +} diff --git a/test/nardino/scheduling/spille_backw.c b/test/nardino/scheduling/spille_backw.c new file mode 100644 index 00000000..1c36ee86 --- /dev/null +++ b/test/nardino/scheduling/spille_backw.c @@ -0,0 +1,114 @@ +int f(int k) { + int a1 = k; + int b1 = 2*a1; + int c = a1; + int a2 = k+1; + int b2 = 2*a2; + c += a2; + int a3 = k+2; + int b3 = 2*a3; + c += a3; + int a4 = k+3; + int b4 = 2*a4; + c += a4; + int a5 = k+4; + int b5 = 2*a5; + c += a5; + int a6 = k+5; + int b6 = 2*a6; + c += a6; + int a7 = k+6; + int b7 = 2*a7; + c += a7; + int a8 = k+7; + int b8 = 2*a8; + c += a8; + int a9 = k+8; + int b9 = 2*a9; + c += a9; + int a10 = k+9; + int b10 = 2*a10; + c += a10; + int a11 = k+10; + int b11 = 2*a11; + c += a11; + int a12 = k+11; + int b12 = 2*a12; + c += a12; + int a13 = k+12; + int b13 = 2*a13; + c += a13; + int a14 = k+13; + int b14 = 2*a14; + c += a14; + int a15 = k+14; + int b15 = 2*a15; + c += a15; + int a16 = k+15; + int b16 = 2*a16; + c += a16; + int a17 = k+16; + int b17 = 2*a17; + c += a17; + int a18 = k+17; + int b18 = 2*a18; + c += a18; + int a19 = k+18; + int b19 = 2*a19; + c += a19; + int a20 = k+19; + int b20 = 2*a20; + c += a20; + int a21 = k+20; + int b21 = 2*a21; + c += a21; + int a22 = k+21; + int b22 = 2*a22; + c += a22; + int a23 = k+22; + int b23 = 2*a23; + c += a23; + int a24 = k+23; + int b24 = 2*a24; + c += a24; + int a25 = k+24; + int b25 = 2*a25; + c += a25; + int a26 = k+25; + int b26 = 2*a26; + c += a26; + return + b13+ + b12+ + b11+ + b10+ + b9+ + b8+ + b7+ + b6+ + b5+ + b4+ + b3+ + b2+ + b1+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b23+ + b24+ + b25+ + b26+ + c; +} + +int main(int argc, char *argv[]) { + f(3); + return 0; +} diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c new file mode 100644 index 00000000..db88588b --- /dev/null +++ b/test/nardino/scheduling/spille_forw.c @@ -0,0 +1,166 @@ +#include <stdio.h> + +int f(int n, float * arr) { + float a1 = (float) n; + float b1 = 2.*a1; + float c = a1; + float a2 = (float) n+1; + float b2 = 2.*a2; + c += a2; + float a3 = (float) n+2; + float b3 = 2.*a3; + c += a3; + float a4 = (float) n+3; + float b4 = 2.*a4; + c += a4; + float a5 = (float) n+4; + float b5 = 2.*a5; + c += a5; + float a6 = (float) n+5; + float b6 = 2.*a6; + c += a6; + float a7 = (float) n+6; + float b7 = 2.*a7; + c += a7; + float a8 = (float) n+7; + float b8 = 2.*a8; + c += a8; + float a9 = (float) n+8; + float b9 = 2.*a9; + c += a9; + float a10 = (float) n+9; + float b10 = 2.*a10; + c += a10; + float a11 = (float) n+10; + float b11 = 2.*a11; + c += a11; + float a12 = (float) n+11; + float b12 = 2.*a12; + c += a12; + float a13 = (float) n+12; + float b13 = 2.*a13; + c += a13; + float a14 = (float) n+13; + float b14 = 2.*a14; + c += a14; + float a15 = (float) n+14; + float b15 = 2.*a15; + c += a15; + float a16 = (float) n+15; + float b16 = 2.*a16; + c += a16; + float a17 = (float) n+16; + float b17 = 2.*a17; + c += a17; + float a18 = (float) n+17; + float b18 = 2.*a18; + c += a18; + float a19 = (float) n+18; + float b19 = 2.*a19; + c += a19; + float a20 = (float) n+19; + float b20 = 2.*a20; + c += a20; + float a21 = (float) n+20; + float b21 = 2.*a21; + c += a21; + float a22 = (float) n+21; + float b22 = 2.*a22; + c += a22; + float a23 = (float) n+22; + float b23 = 2.*a23; + c += a23; + float a24 = (float) n+23; + float b24 = 2.*a24; + c += a24; + float a25 = (float) n+24; + float b25 = 2.*a25; + c += a25; + float a26 = (float) n+25; + float b26 = 2.*a26; + c += a26; + float a27 = (float) n+26; + float b27 = 2.*a27; + c += a27; + float a28 = (float) n+27; + float b28 = 2.*a28; + c += a28; + float a29 = (float) n+28; + float b29 = 2.*a29; + c += a29; + float a30 = (float) n+29; + float b30 = 2.*a30; + c += a30; + /* arr[0] = a1; */ + /* arr[1] = a2; */ + /* arr[2] = a3; */ + /* arr[3] = a4; */ + /* arr[4] = a5; */ + /* arr[5] = a6; */ + /* arr[6] = a7; */ + /* arr[7] = a8; */ + /* arr[8] = a9; */ + /* arr[9] = a10; */ + /* arr[10] = a11; */ + /* arr[11] = a12; */ + /* arr[12] = a13; */ + /* arr[13] = a14; */ + /* arr[14] = a15; */ + /* arr[15] = a16; */ + /* arr[16] = a17; */ + /* arr[17] = a18; */ + /* arr[18] = a19; */ + /* arr[19] = a20; */ + /* arr[20] = a21; */ + /* arr[21] = a22; */ + /* arr[22] = a23; */ + /* arr[23] = a24; */ + /* arr[24] = a25; */ + /* arr[25] = a26; */ + /* arr[26] = a27; */ + /* arr[27] = a28; */ + /* arr[28] = a29; */ + /* arr[29] = a30; */ + return c + + b1+ + b2+ + b3+ + b4+ + b5+ + b6+ + b7+ + b8+ + b9+ + b10+ + b11+ + b12+ + b13+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b24+ + b25+ + b26+ + b27+ + b28+ + b29+ + b30; +} + + + + + + +int main(int argc, char *argv[]) { + float arr[30]; + f(5, arr); + return 0; +} diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml index 5c90af6c..45ffc828 100644 --- a/tools/compiler_expand.ml +++ b/tools/compiler_expand.ml @@ -13,6 +13,8 @@ type print_result = Noprint | Print of string;; type when_triggered = Always | Option of string;; type needs_require = Require | NoRequire;; +let rtl_tunneling = PARTIAL, Always, Require, (Some "RTL Branch Tunneling"), "RTLTunneling" + (* FIXME - The gestion of NoRequire is a bit ugly right now. *) let rtl_passes = [| @@ -38,6 +40,7 @@ PARTIAL, (Option "optim_CSE3"), Require, (Some "CSE3"), "CSE3"; TOTAL, (Option "optim_CSE3"), Require, (Some "Kill useless moves after CSE3"), "KillUselessMoves"; TOTAL, (Option "optim_forward_moves"), Require, (Some "Forwarding moves"), "ForwardMoves"; PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode"; +rtl_tunneling; TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber"; PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate"; TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber"; @@ -46,7 +49,7 @@ TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM"), "CSE3"; PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Redundancy elimination for LICM"), "Deadcode"; TOTAL, (Option "all_loads_nontrap"), Require, None, "Allnontrap"; -PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob" +PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob"; |];; let post_rtl_passes = @@ -55,7 +58,7 @@ let post_rtl_passes = PARTIAL, Always, Require, (Some "Prepass scheduling"), "BTL_Scheduler", Noprint; PARTIAL, Always, Require, (Some "Projection to RTL"), "BTLtoRTL", (Print (Printf.sprintf "RTL %d" ((Array.length rtl_passes) + 1))); PARTIAL, Always, Require, (Some "Register allocation"), "Allocation", (Print "LTL 1"); - PARTIAL, Always, Require, (Some "Branch tunneling"), "Tunneling", (Print "LTL 2"); + PARTIAL, Always, Require, (Some "LTL Branch tunneling"), "LTLTunneling", (Print "LTL 2"); PARTIAL, Always, Require, (Some "CFG linearization"), "Linearize", Noprint; TOTAL, Always, Require, (Some "Label cleanup"), "CleanupLabels", Noprint; PARTIAL, (Option "debug"), Require, (Some "Debugging info for local variables"), "Debugvar", Noprint; diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml index 50c871e4..00e70f65 100644 --- a/x86/TargetPrinter.ml +++ b/x86/TargetPrinter.ml @@ -95,9 +95,6 @@ let z oc n = output_string oc (Z.to_string n) let data_pointer = if Archi.ptr64 then ".quad" else ".long" -(* The comment deliminiter *) -let comment = "##" - (* Base-2 log of a Caml integer *) let rec log2 n = assert (n > 0); @@ -106,6 +103,7 @@ let rec log2 n = (* System dependent printer functions *) module type SYSTEM = sig + val comment: string val raw_symbol: out_channel -> string -> unit val symbol: out_channel -> P.t -> unit val label: out_channel -> int -> unit @@ -124,6 +122,9 @@ module type SYSTEM = module ELF_System : SYSTEM = struct + (* The comment delimiter *) + let comment = "#" + let raw_symbol oc s = fprintf oc "%s" s @@ -219,6 +220,10 @@ module ELF_System : SYSTEM = module MacOS_System : SYSTEM = struct + (* The comment delimiter. + `##` instead of `#` to please the Clang assembler. *) + let comment = "##" + let raw_symbol oc s = fprintf oc "_%s" s @@ -280,6 +285,9 @@ module MacOS_System : SYSTEM = module Cygwin_System : SYSTEM = struct + (* The comment delimiter *) + let comment = "#" + let symbol_prefix = if Archi.ptr64 then "" else "_" |