From 497d60773b1fd47b4ba250ed8f7e78acccdabfaf Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Thu, 3 Jun 2021 15:55:08 +0200 Subject: Add RTLTunneling.v --- Makefile | 1 + backend/RTLTunneling.v | 0 2 files changed, 1 insertion(+) create mode 100644 backend/RTLTunneling.v diff --git a/Makefile b/Makefile index 8d91026e..0556a93c 100644 --- a/Makefile +++ b/Makefile @@ -144,6 +144,7 @@ BACKEND=\ Allnontrap.v Allnontrapproof.v \ Allocation.v Allocationproof.v \ Tunneling.v Tunnelingproof.v \ + RTLTunneling.v \ Linear.v Lineartyping.v \ Linearize.v Linearizeproof.v \ CleanupLabels.v CleanupLabelsproof.v \ diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v new file mode 100644 index 00000000..e69de29b -- cgit From 245f4f86865ce62b82242af81897936b5034438a Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Thu, 3 Jun 2021 17:13:49 +0200 Subject: Write RTLTunneling.v --- backend/RTLTunneling.v | 125 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v index e69de29b..4885002f 100644 --- a/backend/RTLTunneling.v +++ b/backend/RTLTunneling.v @@ -0,0 +1,125 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* TODO: Proper author information *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Branch tunneling for the RTL representation *) + +(* rq: ce code est commenté avec des remarques sur des éléments du code pas évidents, sur lequels j'ai buté, ou qui changent du tunneling LTL. Ils pourront être enlevés ultérieurement *) + +Require Import Coqlib Maps Errors. +Require Import AST. +Require Import RTL. + +(* This is a port of tunneling for LTL. See Tunneling.v *) + +Definition UF := PTree.t (node * Z). + +(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) +Axiom branch_target: RTL.function -> UF. +(* TODO: add an extraction command to link branch_target with its implementation *) + +Local Open Scope error_monad_scope. + +Definition get (td: UF) (pc: node): node*Z := + match td!pc with (* rq: notation pour `PTree.get pc td` *) + | Some (t,d) => (t,Z.abs d) + | None => (pc,0) + end. + +(* rq: "coerce" la structure d'abre UF en une fonction qui retrouve le représentant canonique de la classe d'un noeud *) +Definition target (td: UF) (pc: node): node := fst (get td pc). +Coercion target: UF >-> Funclass. + +(* we check that the domain of [td] is included in the domain of [c] *) +(* rq: pas de basic block en RTL *) +Definition check_included (td: UF) (c: code): option instruction + := PTree.fold (fun (ok:option instruction) pc _ => if ok then c!pc else None) td (Some (Inop xH)). +(* rq: PTree.fold replie l'arbre avec un parcours préfixe. La fonction qui prend en argument l'accumulateur, le label du noeud et le pointeur (`positive`) vers ce noeud *) + +(* we check the validity of targets and their bound: +* the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents. +*) +Definition check_instr (td: UF) (pc: node) (i: instruction): res unit := + match td!pc with + | None => OK tt (* rq: le type `unit` est le singleton {`tt`} *) + | Some (tpc, dpc) => + let dpc := Z.abs dpc in + match i with + | Inop s => + let (ts,ds) := get td s in + if peq tpc ts then + if zlt ds dpc then OK tt + else Error (msg "bad distance in Inop") + else Error (msg "invalid skip of Inop") + | Icond _ _ ifso ifnot _ => + let (tso,dso) := get td ifso in + let (tnot,dnot) := get td ifnot in + if peq tpc ifso then + if peq tpc ifnot then + if zlt dso dpc then + if zlt dnot dpc then OK tt + else Error (msg "bad distance on else branch") + else Error (msg "bad distance on then branch") + else Error (msg "invalid skip of else branch") + else Error (msg "invalid skip of then branch") + | _ => Error (msg "cannot skip this instruction") + end + end. + +Definition check_code (td: UF) (c: code): res unit := + PTree.fold (fun ok pc i => do _ <- ok; check_instr td pc i) c (OK tt). +(* rq: `do _ <- ok; ...` exécute le bloc à droite du ';' si `ok` est un `OK _`, sinon il passe simplement l'erreur *) + +(* The second pass rewrites all LTL instructions, replacing every + * successor [s] of every instruction by [t s], the canonical representative + * of its equivalence class in the union-find data structure. + *) + +(* rq: beaucoup plus de cas que pour LTL, car plein d'instructions ont des successeurs *) +Definition tunnel_instr (t: node -> node) (i: instruction) : instruction := + match i with + | Inop s => Inop (t s) + | Iop op args res s => Iop op args res (t s) + | Iload trap chunk addr args dst s => Iload trap chunk addr args dst (t s) + | Istore chunk addr args src s => Istore chunk addr args src (t s) + | Icall sig ros args res s => Icall sig ros args res (t s) + | Ibuiltin ef args res s => Ibuiltin ef args res (t s) + | Icond cond args ifso ifnot info => + let ifso' := t ifso in + let ifnot' := t ifnot in + if peq ifso' ifnot' + (* rq: si les deux branches se rejoignent, le if ne sert à rien *) + then Inop ifso' + else Icond cond args ifso' ifnot' info + | Ijumptable arg tbl => Ijumptable arg (List.map t tbl) + | _ => i + end. + +Definition tunnel_function (f: RTL.function): res RTL.function := + let td := branch_target f in + let c := fn_code f in + if check_included td c then + do _ <- check_code td c ; OK + (mkfunction (* rq: mkfunction construit un "Record" `RTL.function` *) + (fn_sig f) + (fn_params f) + (fn_stacksize f) + (PTree.map1 (tunnel_instr td) c) (* rq: `map1` est un map sans le pointeur *) + (td (fn_entrypoint f))) + else Error (msg "Some node of the union-find is not in the CFG"). + +Definition tunnel_fundef (f: fundef): res fundef := + transf_partial_fundef tunnel_function f. + +Definition transf_program (p: program): res program := + transform_partial_program tunnel_fundef p. + -- cgit From 731d22f1b917a3e55deb82505fc5f981b4a37bcc Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Fri, 4 Jun 2021 13:45:59 +0200 Subject: Fix check_instr Icond target conditions --- backend/RTLTunneling.v | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v index 4885002f..e6901a9f 100644 --- a/backend/RTLTunneling.v +++ b/backend/RTLTunneling.v @@ -63,8 +63,8 @@ Definition check_instr (td: UF) (pc: node) (i: instruction): res unit := | Icond _ _ ifso ifnot _ => let (tso,dso) := get td ifso in let (tnot,dnot) := get td ifnot in - if peq tpc ifso then - if peq tpc ifnot then + if peq tpc tso then + if peq tpc tnot then if zlt dso dpc then if zlt dnot dpc then OK tt else Error (msg "bad distance on else branch") -- cgit From 7ae1fb0faea68ce5cfe04a232e49659247c244e9 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Fri, 4 Jun 2021 14:24:07 +0200 Subject: Passing info of live regs to scheduler: beginning --- aarch64/PostpassSchedulingOracle.ml | 1 + aarch64/PrepassSchedulingOracle.ml | 4 +++- scheduling/InstructionScheduler.ml | 2 ++ scheduling/InstructionScheduler.mli | 3 +++ scheduling/RTLpathScheduleraux.ml | 9 ++++++++- 5 files changed, 17 insertions(+), 2 deletions(-) diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index cde3e7a7..a9737088 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -507,6 +507,7 @@ let build_problem bb = { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 2c3eb14f..1fd12a6a 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -410,6 +410,7 @@ let define_problem (opweights : opweights) seqa = let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = Regset.empty; (* PLACEHOLDER *) instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) @@ -439,7 +440,8 @@ let prepass_scheduler_by_name name problem early_ones = | "zigzag" -> zigzag_scheduler problem early_ones | _ -> scheduler_by_name name problem -let schedule_sequence (seqa : (instruction*Regset.t) array) = +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t)= let opweights = OpWeights.get_opweights () in try if (Array.length seqa) <= 1 diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index eab0b21a..976037bd 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -33,6 +33,7 @@ type latency_constraint = { type problem = { max_latency : int; resource_bounds : int array; + live_regs_entry : Registers.Regset.t; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -438,6 +439,7 @@ let reverse_problem problem = { max_latency = problem.max_latency; resource_bounds = problem.resource_bounds; + live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index fb7af3f6..f53dc0ef 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -23,6 +23,9 @@ type problem = { resource_bounds : int array; (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *) + live_regs_entry : Registers.Regset.t; + (** The set of live pseudo-registers at entry. *) + instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index aeed39df..55f1a078 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -72,6 +72,11 @@ let get_superblocks code entry pm typing = lsb end +(* PLACEHOLDER *) +let get_live_regs_entry (sb : superblock) = + Registers.Regset.empty + + (* TODO David *) let schedule_superblock sb code = if not !Clflags.option_fprepass @@ -90,6 +95,7 @@ let schedule_superblock sb code = match predicted_successor ii with | Some _ -> 0 | None -> 1 in + let live_regs_entry = get_live_regs_entry sb in match PrepassSchedulingOracle.schedule_sequence (Array.map (fun i -> (match PTree.get i code with @@ -98,7 +104,8 @@ let schedule_superblock sb code = (match PTree.get i sb.liveins with | Some s -> s | None -> Regset.empty)) - (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with + (Array.sub sb.instructions 0 (nr_instr-trailer_length))) + live_regs_entry with | None -> sb.instructions | Some order -> let ins' = -- cgit From 98a7a04258f2cf6caf9f18925cbeeae2f5b17be4 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Fri, 4 Jun 2021 16:56:32 +0200 Subject: computing live regs at sb entry from its live output regs --- aarch64/PrepassSchedulingOracle.ml | 6 +++--- scheduling/RTLpathScheduleraux.ml | 24 +++++++++++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 1fd12a6a..a743fb68 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -406,11 +406,11 @@ let get_alias_dependencies seqa = !deps;; *) -let define_problem (opweights : opweights) seqa = +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) seqa = let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; - live_regs_entry = Regset.empty; (* PLACEHOLDER *) + live_regs_entry = live_entry_regs; instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) @@ -451,7 +451,7 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) let nr_instructions = Array.length seqa in (if !Clflags.option_debug_compcert > 6 then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); - let problem = define_problem opweights seqa in + let problem = define_problem opweights live_regs_entry seqa in (if !Clflags.option_debug_compcert > 7 then (print_sequence stdout (Array.map fst seqa); print_problem stdout problem)); diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 55f1a078..30da5d5d 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -73,8 +73,26 @@ let get_superblocks code entry pm typing = end (* PLACEHOLDER *) -let get_live_regs_entry (sb : superblock) = - Registers.Regset.empty +let get_live_regs_entry (sb : superblock) code = + let seqa = Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.get_live_regs_entry" + ), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + sb.instructions in + Array.fold_right (fun (ins, liveins) regset -> + match ins with + | Inop l -> regset + | Iop (op, args, dest, succ) -> + Registers.Regset.add dest + (List.fold_left (fun set reg -> + Registers.Regset.remove reg set) + regset args) + | _ -> regset (* PLACEHOLDER *) + ) seqa Registers.Regset.empty (* TODO David *) @@ -95,7 +113,7 @@ let schedule_superblock sb code = match predicted_successor ii with | Some _ -> 0 | None -> 1 in - let live_regs_entry = get_live_regs_entry sb in + let live_regs_entry = get_live_regs_entry sb code in match PrepassSchedulingOracle.schedule_sequence (Array.map (fun i -> (match PTree.get i code with -- cgit From deac4407cacb45efa56adf912bd9db32235984f5 Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Fri, 4 Jun 2021 17:49:38 +0200 Subject: Add RTLTunnelingproof.v --- Makefile | 2 +- backend/RTLTunnelingproof.v | 170 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 backend/RTLTunnelingproof.v diff --git a/Makefile b/Makefile index 0556a93c..4900b165 100644 --- a/Makefile +++ b/Makefile @@ -144,7 +144,7 @@ BACKEND=\ Allnontrap.v Allnontrapproof.v \ Allocation.v Allocationproof.v \ Tunneling.v Tunnelingproof.v \ - RTLTunneling.v \ + RTLTunneling.v RTLTunnelingproof.v \ Linear.v Lineartyping.v \ Linearize.v Linearizeproof.v \ CleanupLabels.v CleanupLabelsproof.v \ diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v new file mode 100644 index 00000000..57f14b9f --- /dev/null +++ b/backend/RTLTunnelingproof.v @@ -0,0 +1,170 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* TODO: Proper author information *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Correctness proof for the branch tunneling optimization for RTL. *) +(* This is a port of Tunnelingproof.v, the same optimisation for LTL. *) + +Require Import Coqlib Maps Errors. +Require Import AST Linking. +Require Import Values Memory Events Globalenvs Smallstep. +Require Import Op Locations RTL. +Require Import RTLTunneling. +Require Import Conventions1. + +Local Open Scope nat. + +(** Preservation of semantics *) + +Definition match_prog (p tp: program) := + match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp. + (* rq: `(fun _ ...)` est la fonction pour matcher des fonctions + * `eq` la fonction pour matcher les variables ? (`varinfo` dans la def) + * `p` et `tp` sont les programmes donc on doit dire s'ils match + *) + +Section PRESERVATION. + + +Variables prog tprog: program. +Hypothesis TRANSL: match_prog prog tprog. (* rq: on suppose que les programmes match *) +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +(* rq: pour les deux lemmes suivants, j'ai recopié les preuves, mais je ne les comprends pas du tout... D'où vient `exploit` ?? *) +Lemma functions_translated: + forall (v: val) (f: fundef), + Genv.find_funct ge v = Some f -> + exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf. +Proof. + intros. exploit (Genv.find_funct_match TRANSL). eauto. + intros (cu & tf & A & B & C). + repeat eexists; intuition eauto. +Qed. + +Lemma function_ptr_translated: + forall v f, + Genv.find_funct_ptr ge v = Some f -> + exists tf, + Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf. +Proof. + intros. + exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto. +Qed. + +Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof. + rewrite (Genv.find_symbol_match TRANSL). auto. +Qed. + +Lemma sig_preserved: + forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f. +Proof. + intros. destruct f. + - simpl in H. monadInv H. (* rq: c'est une tactique maison... *) + unfold tunnel_function in EQ. + destruct (check_included _ _) as [_|] in EQ; try congruence. + monadInv EQ. auto. + - simpl in H. monadInv H. auto. +Qed. + +Lemma senv_preserved: + Senv.equiv ge tge. +Proof. + eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve, je ne vais pas réinventer la roue ici *) +Qed. + +(* TODO: vérifier s'il faut faire quelque chose avec le `res` ? *) +Inductive match_stackframes: stackframe -> stackframe -> Prop := + | match_stackframes_intro: + forall res f tf sp pc rs trs + (TF: tunnel_function f = OK tf) + (RS: Registers.regs_lessdef rs trs), + match_stackframes + (Stackframe res f sp pc rs) + (Stackframe res tf sp (branch_target f pc) trs). + +(* rq: `match_states s1 s2` correspond à s1 ~ s2 *) +Inductive match_states: state -> state -> Prop := + | match_states_intro: + forall s ts f tf sp pc rs trs m tm + (STK: list_forall2 match_stackframes s ts) + (TF: tunnel_function f = OK tf) + (RS: Registers.regs_lessdef rs trs) + (MEM: Mem.extends m tm), + match_states + (State s f sp pc rs m) + (State ts tf sp (branch_target f pc) trs tm) + | match_states_call: + forall s ts f tf a ta m tm + (STK: list_forall2 match_stackframes s ts) + (TF: tunnel_fundef f = OK tf) + (ARGS: list_forall2 Val.lessdef a ta) + (MEM: Mem.extends m tm), + match_states + (Callstate s f a m) + (Callstate ts tf ta tm) + | match_states_return: + forall s ts v tv m tm + (STK: list_forall2 match_stackframes s ts) + (VAL: Val.lessdef v tv) + (MEM: Mem.extends m tm), + match_states + (Returnstate s v m) + (Returnstate ts tv tm). + +Lemma transf_initial_states: + forall s1: state, initial_state prog s1 -> + exists s2: state, initial_state tprog s2 /\ match_states s1 s2. +Proof. + intros. inversion H. + exploit function_ptr_translated; eauto. + intro Htf. destruct Htf as (tf & Htf & Hf). + exists (Callstate nil tf nil m0). split. + - econstructor. + + apply (Genv.init_mem_transf_partial TRANSL). auto. + + rewrite (match_program_main TRANSL). + rewrite symbols_preserved. eauto. + + apply Htf. + + rewrite <- H3. apply sig_preserved. apply Hf. + - constructor. + + constructor. + + apply Hf. + + constructor. + + apply Mem.extends_refl. +Qed. + +Lemma transf_final_states: + forall (s1 : state) + (s2 : state) (r : Integers.Int.int), + match_states s1 s2 -> + final_state s1 r -> + final_state s2 r. +Proof. + intros s1 s2 r HM H1. inv H1. inv HM. inv STK. inv VAL. constructor. +Qed. + +(* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *) + +(* +Theorem transf_program_correct: + forward_simulation (RTL.semantics prog) (RTL.semantics tprog). +Proof. + eapply forward_simulation_opt. + apply senv_preserved. + apply transf_initial_states. + apply transf_final_states. + apply tunnel_step_correct. +Qed. +*) + +End PRESERVATION. -- cgit From 599823a6410f1629f2b8704291839e0974bce83b Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Sat, 5 Jun 2021 19:52:59 +0200 Subject: function written, now needs testing --- scheduling/RTLpathScheduleraux.ml | 50 +++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 30da5d5d..5a427e6c 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -72,7 +72,7 @@ let get_superblocks code entry pm typing = lsb end -(* PLACEHOLDER *) + let get_live_regs_entry (sb : superblock) code = let seqa = Array.map (fun i -> (match PTree.get i code with @@ -85,16 +85,46 @@ let get_live_regs_entry (sb : superblock) code = sb.instructions in Array.fold_right (fun (ins, liveins) regset -> match ins with - | Inop l -> regset - | Iop (op, args, dest, succ) -> - Registers.Regset.add dest - (List.fold_left (fun set reg -> - Registers.Regset.remove reg set) - regset args) - | _ -> regset (* PLACEHOLDER *) - ) seqa Registers.Regset.empty - + | Inop _ -> regset + | Iop (_, args, dest, _) + | Iload (_, _, _, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.remove dest regset) args + | Istore (_, _, args, src, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.add src regset) args + | Icall (_, fn, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + ((match fn with + | Coq_inl reg -> (Registers.Regset.add reg) + | Coq_inr _ -> (fun x -> x)) + (Registers.Regset.remove dest regset)) + args + | Itailcall (_, fn, args) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (match fn with + | Coq_inl reg -> (Registers.Regset.add reg regset) + | Coq_inr _ -> regset) + args + | Ibuiltin (_, args, dest, _) -> + List.fold_left (fun set reg -> + match reg with + | AST.BA r -> Registers.Regset.add r set + | _ -> set) + (match dest with + | AST.BR r -> Registers.Regset.remove r regset) + args + | Icond (_, args, _, _, _) -> + List.fold_left (fun set reg -> + Registers.Regset.add reg set) + regset args + | Ijumptable (reg, _) + | Ireturn (Some reg) -> + Registers.Regset.add reg regset + | _ -> regset + ) seqa sb.s_output_regs + (* TODO David *) let schedule_superblock sb code = if not !Clflags.option_fprepass -- cgit From 9118878bd14e24cc04c2f36cab7aa7271a0f1852 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Sun, 6 Jun 2021 12:11:15 +0200 Subject: Fixing scope error, and non-exhaustive pattern matching --- scheduling/RTLpathScheduleraux.ml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 5a427e6c..653765f5 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -96,15 +96,15 @@ let get_live_regs_entry (sb : superblock) code = | Icall (_, fn, args, dest, _) -> List.fold_left (fun set reg -> Registers.Regset.add reg set) ((match fn with - | Coq_inl reg -> (Registers.Regset.add reg) - | Coq_inr _ -> (fun x -> x)) + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg) + | Datatypes.Coq_inr _ -> (fun x -> x)) (Registers.Regset.remove dest regset)) args | Itailcall (_, fn, args) -> List.fold_left (fun set reg -> Registers.Regset.add reg set) (match fn with - | Coq_inl reg -> (Registers.Regset.add reg regset) - | Coq_inr _ -> regset) + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset) + | Datatypes.Coq_inr _ -> regset) args | Ibuiltin (_, args, dest, _) -> List.fold_left (fun set reg -> @@ -112,7 +112,8 @@ let get_live_regs_entry (sb : superblock) code = | AST.BA r -> Registers.Regset.add r set | _ -> set) (match dest with - | AST.BR r -> Registers.Regset.remove r regset) + | AST.BR r -> Registers.Regset.remove r regset + | _ -> regset) args | Icond (_, args, _, _, _) -> List.fold_left (fun set reg -> -- cgit From 2249f3c7771c285ccd25f6e94478be388a741da5 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Sun, 6 Jun 2021 20:49:34 +0200 Subject: Adding debug info --- scheduling/RTLpathScheduleraux.ml | 95 ++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 653765f5..8e7f0dfa 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -74,6 +74,11 @@ end let get_live_regs_entry (sb : superblock) code = + (if !Clflags.option_debug_compcert > 6 + then debug_flag := true); + debug "getting live regs for superblock:\n"; + print_superblock sb code; + debug "\n"; let seqa = Array.map (fun i -> (match PTree.get i code with | Some ii -> ii @@ -83,48 +88,53 @@ let get_live_regs_entry (sb : superblock) code = | Some s -> s | None -> Regset.empty)) sb.instructions in - Array.fold_right (fun (ins, liveins) regset -> - match ins with - | Inop _ -> regset - | Iop (_, args, dest, _) - | Iload (_, _, _, args, dest, _) -> - List.fold_left (fun set reg -> Registers.Regset.add reg set) - (Registers.Regset.remove dest regset) args - | Istore (_, _, args, src, _) -> - List.fold_left (fun set reg -> Registers.Regset.add reg set) - (Registers.Regset.add src regset) args - | Icall (_, fn, args, dest, _) -> - List.fold_left (fun set reg -> Registers.Regset.add reg set) - ((match fn with - | Datatypes.Coq_inl reg -> (Registers.Regset.add reg) - | Datatypes.Coq_inr _ -> (fun x -> x)) - (Registers.Regset.remove dest regset)) - args - | Itailcall (_, fn, args) -> - List.fold_left (fun set reg -> Registers.Regset.add reg set) - (match fn with - | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset) - | Datatypes.Coq_inr _ -> regset) - args - | Ibuiltin (_, args, dest, _) -> - List.fold_left (fun set reg -> - match reg with - | AST.BA r -> Registers.Regset.add r set - | _ -> set) - (match dest with - | AST.BR r -> Registers.Regset.remove r regset - | _ -> regset) - args - | Icond (_, args, _, _, _) -> - List.fold_left (fun set reg -> - Registers.Regset.add reg set) - regset args - | Ijumptable (reg, _) - | Ireturn (Some reg) -> - Registers.Regset.add reg regset - | _ -> regset - ) seqa sb.s_output_regs - + let ret = + Array.fold_right (fun (ins, liveins) regset -> + match ins with + | Inop _ -> regset + | Iop (_, args, dest, _) + | Iload (_, _, _, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.remove dest regset) args + | Istore (_, _, args, src, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.add src regset) args + | Icall (_, fn, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + ((match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg) + | Datatypes.Coq_inr _ -> (fun x -> x)) + (Registers.Regset.remove dest regset)) + args + | Itailcall (_, fn, args) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset) + | Datatypes.Coq_inr _ -> regset) + args + | Ibuiltin (_, args, dest, _) -> + List.fold_left (fun set reg -> + match reg with + | AST.BA r -> Registers.Regset.add r set + | _ -> set) + (match dest with + | AST.BR r -> Registers.Regset.remove r regset + | _ -> regset) + args + | Icond (_, args, _, _, _) -> + List.fold_left (fun set reg -> + Registers.Regset.add reg set) + regset args + | Ijumptable (reg, _) + | Ireturn (Some reg) -> + Registers.Regset.add reg regset + | _ -> regset + ) seqa sb.s_output_regs + in debug "live in regs: "; + print_regset ret; + debug "\n"; + debug_flag := false; + ret (* TODO David *) let schedule_superblock sb code = @@ -144,6 +154,7 @@ let schedule_superblock sb code = match predicted_successor ii with | Some _ -> 0 | None -> 1 in + debug "hello\n"; let live_regs_entry = get_live_regs_entry sb code in match PrepassSchedulingOracle.schedule_sequence (Array.map (fun i -> -- cgit From 403f2dd49dfc6948df31e08f60655de3091816d6 Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Mon, 7 Jun 2021 21:41:36 +0200 Subject: Monday's work on RTLTunnelingproof --- backend/RTLTunnelingproof.v | 184 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 153 insertions(+), 31 deletions(-) diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v index 57f14b9f..f63c3b35 100644 --- a/backend/RTLTunnelingproof.v +++ b/backend/RTLTunnelingproof.v @@ -23,6 +23,77 @@ Require Import Conventions1. Local Open Scope nat. +(**) Definition check_included_spec (c:code) (td:UF) (ok: option instruction) := + ok <> None -> forall pc, c!pc = None -> td!pc = None. + +Lemma check_included_correct (td:UF) (c:code): + check_included_spec c td (check_included td c). +Proof. + apply PTree_Properties.fold_rec with (P:=check_included_spec c); unfold check_included_spec. + - intros m m' oi EQ IND N pc. rewrite <- EQ. apply IND. apply N. + - intros N pc. rewrite PTree.gempty. auto. + - intros m oi pc v N S IND. destruct oi. + + intros. rewrite PTree.gsspec. destruct (peq _ _); try congruence. apply IND. congruence. apply H0. + + contradiction. +Qed. + +(**) Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node) : option instruction -> Prop := + | TB_default (TB: target pc = pc) oi: + target_bounds target bound pc oi + | TB_nop s + (EQ: target pc = target s) + (DEC: bound s < bound pc): + target_bounds target bound pc (Some (Inop s)) + | TB_cond cond args ifso ifnot info + (EQSO: target pc = target ifso) + (EQNOT: target pc = target ifnot) + (DECSO: bound ifso < bound pc) + (DECNOT: bound ifnot < bound pc): + target_bounds target bound pc (Some (Icond cond args ifso ifnot info)). + +Lemma target_None (td: UF) (pc: node): td!pc = None -> td pc = pc. +Proof. + unfold target, get. intro EQ. rewrite EQ. auto. +Qed. + +Lemma get_nonneg td pc t d: get td pc = (t,d) -> (0 <= d)%Z. +Proof. + unfold get. destruct td!pc as [(tpc,dpc)|]; intro H; inv H; lia. +Qed. + +(**) Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)). + + +(* TODO: à réécrire proprement *) +Lemma check_instr_correct (td: UF) (pc: node) (i: instruction): + check_instr td pc i = OK tt -> + target_bounds (target td) (bound td) pc (Some i). +Proof. + unfold check_instr. destruct (td!pc) as [(tpc,dpc)|] eqn:EQ. + assert (DPC: snd (get td pc) = Z.abs dpc). { unfold get. rewrite EQ. auto. } + - destruct i; try congruence. + + destruct (get td n) as (ts,ds) eqn:EQs. + destruct (peq _ _); try congruence. + destruct (zlt _ _); try congruence. intros _. + apply TB_nop. replace (td pc) with tpc. + unfold target. rewrite EQs. auto. + unfold target. unfold get. rewrite EQ. auto. + unfold bound. rewrite DPC. rewrite EQs; simpl. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n ts. apply EQs. + + destruct (get td n) as (tso,dso) eqn:EQSO. + destruct (get td n0) as (tnot,dnot) eqn:EQNOT. + intro H. + repeat ((destruct (peq _ _) in H || destruct (zlt _ _) in H); try congruence). + apply TB_cond; subst. + * unfold target. replace (fst (get td pc)) with tnot. rewrite EQSO. auto. + unfold get. rewrite EQ. auto. + * unfold target. replace (fst (get td pc)) with tnot. rewrite EQNOT. auto. + unfold get. rewrite EQ. auto. + * unfold bound. rewrite DPC. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n tnot. rewrite EQSO. auto. rewrite EQSO. auto. + * unfold bound. rewrite DPC. apply Z2Nat.inj_lt; try lia. apply get_nonneg with td n0 tnot. rewrite EQNOT; auto. rewrite EQNOT; auto. + - intros _. apply TB_default. unfold target. unfold get. rewrite EQ. auto. +Qed. + + (** Preservation of semantics *) Definition match_prog (p tp: program) := @@ -32,6 +103,8 @@ Definition match_prog (p tp: program) := * `p` et `tp` sont les programmes donc on doit dire s'ils match *) + + Section PRESERVATION. @@ -41,14 +114,21 @@ Let ge := Genv.globalenv prog. Let tge := Genv.globalenv tprog. (* rq: pour les deux lemmes suivants, j'ai recopié les preuves, mais je ne les comprends pas du tout... D'où vient `exploit` ?? *) +(* rq: `exploit` vient de Coqlib! Mais je comprends pas encore vraiment ce qui se passe... *) Lemma functions_translated: forall (v: val) (f: fundef), Genv.find_funct ge v = Some f -> exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf. Proof. - intros. exploit (Genv.find_funct_match TRANSL). eauto. + intros. + exploit (Genv.find_funct_match TRANSL). apply H. intros (cu & tf & A & B & C). - repeat eexists; intuition eauto. + (* exists tf. split. apply B. apply A. *) + (* rq: on peut remplacer les `split` et `apply` par `eauto`, et ne pas spécifier le + * `exists`, avec un `eexists` *) + eexists. eauto. + (* rq: Je ne comprends pas à quoi sert le `intuition`... *) + (* repeat eexists; intuition eauto. *) Qed. Lemma function_ptr_translated: @@ -57,30 +137,43 @@ Lemma function_ptr_translated: exists tf, Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf. Proof. - intros. - exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto. + intros. exploit (Genv.find_funct_ptr_match TRANSL). + - apply H. + - intros (cu & tf & A & B & C). exists tf. split. apply A. apply B. Qed. Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s. Proof. - rewrite (Genv.find_symbol_match TRANSL). auto. + apply (Genv.find_symbol_match TRANSL). + (* rq: ici pas de `exploit` mais un `apply` parce que `Genv.find_symbol_match + * prouve vraiment pile ce qu'on veut *) Qed. Lemma sig_preserved: forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f. Proof. - intros. destruct f. - - simpl in H. monadInv H. (* rq: c'est une tactique maison... *) + intros. destruct f; simpl in H. + - + (* + unfold bind in H. + destruct (tunnel_function _) as [x|] eqn:EQ; try congruence. + inversion H. unfold tunnel_function in EQ. - destruct (check_included _ _) as [_|] in EQ; try congruence. + destruct (check_included _ _) in EQ; try congruence. + unfold bind in EQ. destruct (check_code _ _) in EQ; try congruence. inversion EQ. auto. + *) + + monadInv H. (* rq: c'est une tactique maison... *) + unfold tunnel_function in EQ. + destruct (check_included _ _) in EQ; try congruence. monadInv EQ. auto. - - simpl in H. monadInv H. auto. + - monadInv H. auto. Qed. -Lemma senv_preserved: +(**) Lemma senv_preserved: Senv.equiv ge tge. Proof. - eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve, je ne vais pas réinventer la roue ici *) + eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve de cette propriété très exactement, je ne vais pas réinventer la roue ici *) Qed. (* TODO: vérifier s'il faut faire quelque chose avec le `res` ? *) @@ -122,27 +215,47 @@ Inductive match_states: state -> state -> Prop := (Returnstate s v m) (Returnstate ts tv tm). +(* TODO: il faut définir une bonne mesure *) +(**) Definition measure (st: state) : nat := + match st with + | State s f sp pc rs m => + match (fn_code f)!pc with + | Some (Inop pc) => (bound (branch_target f) pc) * 2 + 1 + | Some (Icond _ _ ifso ifnot _) => (max + (bound (branch_target f) ifso) + (bound (branch_target f) ifnot) + ) * 2 + 1 + | Some _ => 0 + | None => 0 + end + | Callstate s f v m => 0 + | Returnstate s v m => 0 + end. + +(* rq: sans les lemmes définis au-dessus je ne vois pas trop comment j'aurais + * fait... ni comment j'aurais eu l'idée d'en faire des lemmes ? *) Lemma transf_initial_states: forall s1: state, initial_state prog s1 -> exists s2: state, initial_state tprog s2 /\ match_states s1 s2. Proof. - intros. inversion H. - exploit function_ptr_translated; eauto. - intro Htf. destruct Htf as (tf & Htf & Hf). - exists (Callstate nil tf nil m0). split. - - econstructor. - + apply (Genv.init_mem_transf_partial TRANSL). auto. - + rewrite (match_program_main TRANSL). - rewrite symbols_preserved. eauto. - + apply Htf. - + rewrite <- H3. apply sig_preserved. apply Hf. - - constructor. - + constructor. - + apply Hf. - + constructor. - + apply Mem.extends_refl. + intros. inversion H as [b f m0 ge0 MEM SYM PTR SIG CALL]. + exploit function_ptr_translated. + - apply PTR. + - intros (tf & TPTR & TUN). + exists (Callstate nil tf nil m0). split. + + apply initial_state_intro with b. + * apply (Genv.init_mem_match TRANSL). apply MEM. + * rewrite (match_program_main TRANSL). + rewrite symbols_preserved. apply SYM. + * apply TPTR. + * rewrite <- SIG. apply sig_preserved. apply TUN. + + apply match_states_call. + * apply list_forall2_nil. + * apply TUN. + * apply list_forall2_nil. + * apply Mem.extends_refl. Qed. - + Lemma transf_final_states: forall (s1 : state) (s2 : state) (r : Integers.Int.int), @@ -150,12 +263,22 @@ Lemma transf_final_states: final_state s1 r -> final_state s2 r. Proof. - intros s1 s2 r HM H1. inv H1. inv HM. inv STK. inv VAL. constructor. + (* rq: `inv` au lieu de `inversion` fait beaucoup de nettoyage dans les + * hypothèses, mais je sais pas trop ce que ça fait exactement *) + intros. inv H0. inv H. inv VAL. inversion STK. apply final_state_intro. Qed. (* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *) +Lemma tunnel_step_correct: + forall st1 t st2, step ge st1 t st2 -> + forall st1' (MS: match_states st1 st1'), + (exists st2', step tge st1' t st2' /\ match_states st2 st2') + \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat. +Proof. + intros st1 t st2 H. induction H; intros; inv MS. +Admitted. + -(* Theorem transf_program_correct: forward_simulation (RTL.semantics prog) (RTL.semantics tprog). Proof. @@ -163,8 +286,7 @@ Proof. apply senv_preserved. apply transf_initial_states. apply transf_final_states. - apply tunnel_step_correct. + exact tunnel_step_correct. Qed. -*) End PRESERVATION. -- cgit From 52378f0600652a94edcc8c78e4b426243f717a89 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 8 Jun 2021 15:11:03 +0200 Subject: Add some tests --- common/DebugPrint.ml | 4 +- scheduling/RTLpathScheduleraux.ml | 2 +- test/nardino/scheduling/entry_regs.c | 16 +++++ test/nardino/scheduling/spille_etrange.c | 114 +++++++++++++++++++++++++++++++ 4 files changed, 133 insertions(+), 3 deletions(-) create mode 100644 test/nardino/scheduling/entry_regs.c create mode 100644 test/nardino/scheduling/spille_etrange.c diff --git a/common/DebugPrint.ml b/common/DebugPrint.ml index 6f8449ee..275e6a71 100644 --- a/common/DebugPrint.ml +++ b/common/DebugPrint.ml @@ -132,10 +132,10 @@ let print_instructions insts code = | None -> failwith "Did not get some" | Some thing -> thing in if (!debug_flag) then begin - debug "[ "; + debug "[\n"; List.iter ( fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code)) - ) insts; debug "]" + ) insts; debug " ]" end let print_arrayp arr = begin diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 8e7f0dfa..24fef3e8 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -17,7 +17,7 @@ let print_superblock (sb: superblock) code = begin debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n"; debug " liveins = "; print_ptree_regset li; debug "\n"; - debug " output_regs = "; print_regset outs; debug "}" + debug " output_regs = "; print_regset outs; debug "\n}" end let print_superblocks lsb code = diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c new file mode 100644 index 00000000..047a613d --- /dev/null +++ b/test/nardino/scheduling/entry_regs.c @@ -0,0 +1,16 @@ +#include + +int f(int n) { + return n; +} + + +int main(int argc, char *argv[]) { + int a=1; + float b=2.; + int c = f(a); + a = 3; + int d = f(a); + printf("%e, %d, %d, %d", b, a, c, d); + return 0; +} diff --git a/test/nardino/scheduling/spille_etrange.c b/test/nardino/scheduling/spille_etrange.c new file mode 100644 index 00000000..1c36ee86 --- /dev/null +++ b/test/nardino/scheduling/spille_etrange.c @@ -0,0 +1,114 @@ +int f(int k) { + int a1 = k; + int b1 = 2*a1; + int c = a1; + int a2 = k+1; + int b2 = 2*a2; + c += a2; + int a3 = k+2; + int b3 = 2*a3; + c += a3; + int a4 = k+3; + int b4 = 2*a4; + c += a4; + int a5 = k+4; + int b5 = 2*a5; + c += a5; + int a6 = k+5; + int b6 = 2*a6; + c += a6; + int a7 = k+6; + int b7 = 2*a7; + c += a7; + int a8 = k+7; + int b8 = 2*a8; + c += a8; + int a9 = k+8; + int b9 = 2*a9; + c += a9; + int a10 = k+9; + int b10 = 2*a10; + c += a10; + int a11 = k+10; + int b11 = 2*a11; + c += a11; + int a12 = k+11; + int b12 = 2*a12; + c += a12; + int a13 = k+12; + int b13 = 2*a13; + c += a13; + int a14 = k+13; + int b14 = 2*a14; + c += a14; + int a15 = k+14; + int b15 = 2*a15; + c += a15; + int a16 = k+15; + int b16 = 2*a16; + c += a16; + int a17 = k+16; + int b17 = 2*a17; + c += a17; + int a18 = k+17; + int b18 = 2*a18; + c += a18; + int a19 = k+18; + int b19 = 2*a19; + c += a19; + int a20 = k+19; + int b20 = 2*a20; + c += a20; + int a21 = k+20; + int b21 = 2*a21; + c += a21; + int a22 = k+21; + int b22 = 2*a22; + c += a22; + int a23 = k+22; + int b23 = 2*a23; + c += a23; + int a24 = k+23; + int b24 = 2*a24; + c += a24; + int a25 = k+24; + int b25 = 2*a25; + c += a25; + int a26 = k+25; + int b26 = 2*a26; + c += a26; + return + b13+ + b12+ + b11+ + b10+ + b9+ + b8+ + b7+ + b6+ + b5+ + b4+ + b3+ + b2+ + b1+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b23+ + b24+ + b25+ + b26+ + c; +} + +int main(int argc, char *argv[]) { + f(3); + return 0; +} -- cgit From 9b6247b7996f3e0181d27ec0e20daffd28e0884f Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 8 Jun 2021 16:06:36 +0200 Subject: Another test : one spill when scheduled forward, none if not --- test/nardino/scheduling/spille_forw.c | 119 ++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 test/nardino/scheduling/spille_forw.c diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c new file mode 100644 index 00000000..0c69efd5 --- /dev/null +++ b/test/nardino/scheduling/spille_forw.c @@ -0,0 +1,119 @@ +#include + +int f(int n) { + float a1 = (float) n; + float b1 = 2.*a1; + float c = a1; + float a2 = (float) n+1; + float b2 = 2.*a2; + c += a2; + float a3 = (float) n+2; + float b3 = 2.*a3; + c += a3; + float a4 = (float) n+3; + float b4 = 2.*a4; + c += a4; + float a5 = (float) n+4; + float b5 = 2.*a5; + c += a5; + float a6 = (float) n+5; + float b6 = 2.*a6; + c += a6; + float a7 = (float) n+6; + float b7 = 2.*a7; + c += a7; + float a8 = (float) n+7; + float b8 = 2.*a8; + c += a8; + float a9 = (float) n+8; + float b9 = 2.*a9; + c += a9; + float a10 = (float) n+9; + float b10 = 2.*a10; + c += a10; + float a11 = (float) n+10; + float b11 = 2.*a11; + c += a11; + float a12 = (float) n+11; + float b12 = 2.*a12; + c += a12; + float a13 = (float) n+12; + float b13 = 2.*a13; + c += a13; + float a14 = (float) n+13; + float b14 = 2.*a14; + c += a14; + float a15 = (float) n+14; + float b15 = 2.*a15; + c += a15; + float a16 = (float) n+15; + float b16 = 2.*a16; + c += a16; + float a17 = (float) n+16; + float b17 = 2.*a17; + c += a17; + float a18 = (float) n+17; + float b18 = 2.*a18; + c += a18; + float a19 = (float) n+18; + float b19 = 2.*a19; + c += a19; + float a20 = (float) n+19; + float b20 = 2.*a20; + c += a20; + float a21 = (float) n+20; + float b21 = 2.*a21; + c += a21; + float a22 = (float) n+21; + float b22 = 2.*a22; + c += a22; + float a23 = (float) n+22; + float b23 = 2.*a23; + c += a23; + float a24 = (float) n+23; + float b24 = 2.*a24; + c += a24; + float a25 = (float) n+24; + float b25 = 2.*a25; + c += a25; + float a26 = (float) n+25; + float b26 = 2.*a26; + c += a26; + return c + + b1+ + b2+ + b3+ + b4+ + b5+ + b6+ + b7+ + b8+ + b9+ + b10+ + b11+ + b12+ + b13+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b24+ + b25+ + b26; +} + + + + + + +int main(int argc, char *argv[]) { + f(5); + return 0; +} -- cgit From 386b9053177bb4ef2801cec00b717c400a828139 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 8 Jun 2021 16:53:19 +0200 Subject: Fix RTLpathScheduleraux.get_live_regs_entry --- scheduling/RTLpathScheduleraux.ml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 24fef3e8..72cf6682 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -89,7 +89,8 @@ let get_live_regs_entry (sb : superblock) code = | None -> Regset.empty)) sb.instructions in let ret = - Array.fold_right (fun (ins, liveins) regset -> + Array.fold_right (fun (ins, liveins) regset_i -> + let regset = Registers.Regset.union liveins regset_i in match ins with | Inop _ -> regset | Iop (_, args, dest, _) @@ -128,7 +129,7 @@ let get_live_regs_entry (sb : superblock) code = | Ijumptable (reg, _) | Ireturn (Some reg) -> Registers.Regset.add reg regset - | _ -> regset + | _ -> regset ) seqa sb.s_output_regs in debug "live in regs: "; print_regset ret; -- cgit From 587f0505f2331b8edc94a187575a8f124c3cb4ef Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Wed, 9 Jun 2021 19:34:23 +0200 Subject: Starts proof for `tunnel_step_correct` --- backend/RTLTunnelingproof.v | 135 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 120 insertions(+), 15 deletions(-) diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v index f63c3b35..c54667a4 100644 --- a/backend/RTLTunnelingproof.v +++ b/backend/RTLTunnelingproof.v @@ -16,7 +16,7 @@ Require Import Coqlib Maps Errors. Require Import AST Linking. -Require Import Values Memory Events Globalenvs Smallstep. +Require Import Values Memory Registers Events Globalenvs Smallstep. Require Import Op Locations RTL. Require Import RTLTunneling. Require Import Conventions1. @@ -41,25 +41,29 @@ Qed. | TB_default (TB: target pc = pc) oi: target_bounds target bound pc oi | TB_nop s - (EQ: target pc = target s) - (DEC: bound s < bound pc): - target_bounds target bound pc (Some (Inop s)) + (EQ: target pc = target s) + (DEC: bound s < bound pc): + target_bounds target bound pc (Some (Inop s)) | TB_cond cond args ifso ifnot info (EQSO: target pc = target ifso) (EQNOT: target pc = target ifnot) (DECSO: bound ifso < bound pc) (DECNOT: bound ifnot < bound pc): - target_bounds target bound pc (Some (Icond cond args ifso ifnot info)). + target_bounds target bound pc (Some (Icond cond args ifso ifnot info)) +. +Local Hint Resolve TB_default: core. Lemma target_None (td: UF) (pc: node): td!pc = None -> td pc = pc. Proof. unfold target, get. intro EQ. rewrite EQ. auto. Qed. +Local Hint Resolve target_None Z.abs_nonneg: core. Lemma get_nonneg td pc t d: get td pc = (t,d) -> (0 <= d)%Z. Proof. unfold get. destruct td!pc as [(tpc,dpc)|]; intro H; inv H; lia. Qed. +Local Hint Resolve get_nonneg: core. (**) Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)). @@ -93,6 +97,48 @@ Proof. - intros _. apply TB_default. unfold target. unfold get. rewrite EQ. auto. Qed. +Definition check_code_spec (td:UF) (c:code) (ok: res unit) := + ok = OK tt -> forall pc i, c!pc = Some i -> target_bounds (target td) (bound td) pc (Some i). + +Lemma check_code_correct (td:UF) c: + check_code_spec td c (check_code td c). +Proof. + unfold check_code. apply PTree_Properties.fold_rec; unfold check_code_spec. + - intros. rewrite <- H in H2. apply H0; auto. + - intros. rewrite PTree.gempty in H0. congruence. + - intros m [[]|e] pc i N S IND; simpl; try congruence. + intros H pc0 i0. rewrite PTree.gsspec. destruct (peq _ _). + subst. intro. inv H0. apply check_instr_correct. apply H. + auto. +Qed. + +Theorem branch_target_bounds: + forall f tf pc, + tunnel_function f = OK tf -> + target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc). +Proof. +(* ~old + intros t tf pc. unfold tunnel_function. + destruct (check_included _ _) eqn:HI; try congruence. + destruct (check_code _ _) eqn: HC. + - intros _. destruct ((fn_code t) ! pc) eqn:EQ. + + exploit check_code_correct; eauto. + replace tt with u. auto. {destruct u; auto. } + + exploit check_included_correct; eauto. rewrite HI. congruence. + intro. apply TB_default. unfold target. unfold get. rewrite H. debug auto. + - simpl. congruence. +*) + + intros. unfold tunnel_function in H. + destruct (check_included _ _) eqn:EQinc; try congruence. + monadInv H. rename EQ into EQcode. + destruct (_ ! _) eqn:EQ. + - exploit check_code_correct. destruct x. apply EQcode. apply EQ. auto. + - exploit check_included_correct. + rewrite EQinc. congruence. + apply EQ. + intro. apply TB_default. apply target_None. apply H. +Qed. (** Preservation of semantics *) @@ -137,7 +183,7 @@ Lemma function_ptr_translated: exists tf, Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf. Proof. - intros. exploit (Genv.find_funct_ptr_match TRANSL). + intros. exploit (Genv.find_funct_ptr_match TRANSL). - apply H. - intros (cu & tf & A & B & C). exists tf. split. apply A. apply B. Qed. @@ -170,7 +216,7 @@ Proof. - monadInv H. auto. Qed. -(**) Lemma senv_preserved: +Lemma senv_preserved: Senv.equiv ge tge. Proof. eapply (Genv.senv_match TRANSL). (* Il y a déjà une preuve de cette propriété très exactement, je ne vais pas réinventer la roue ici *) @@ -216,32 +262,41 @@ Inductive match_states: state -> state -> Prop := (Returnstate ts tv tm). (* TODO: il faut définir une bonne mesure *) -(**) Definition measure (st: state) : nat := +(* (**) Definition measure (st: state) : nat := match st with | State s f sp pc rs m => match (fn_code f)!pc with - | Some (Inop pc) => (bound (branch_target f) pc) * 2 + 1 + | Some (Inop pc') => (bound (branch_target f) pc') * 2 + 1 | Some (Icond _ _ ifso ifnot _) => (max (bound (branch_target f) ifso) (bound (branch_target f) ifnot) ) * 2 + 1 - | Some _ => 0 + | Some _ => (bound (branch_target f) pc) * 2 | None => 0 end | Callstate s f v m => 0 | Returnstate s v m => 0 end. +*) + +Definition measure (st: state): nat := + match st with + | State s f sp pc rs m => bound (branch_target f) pc + | Callstate s f v m => 0 + | Returnstate s v m => 0 + end. + (* rq: sans les lemmes définis au-dessus je ne vois pas trop comment j'aurais * fait... ni comment j'aurais eu l'idée d'en faire des lemmes ? *) -Lemma transf_initial_states: +(**) Lemma transf_initial_states: forall s1: state, initial_state prog s1 -> exists s2: state, initial_state tprog s2 /\ match_states s1 s2. Proof. intros. inversion H as [b f m0 ge0 MEM SYM PTR SIG CALL]. exploit function_ptr_translated. - apply PTR. - - intros (tf & TPTR & TUN). + - intros (tf & TPTR & TUN). exists (Callstate nil tf nil m0). split. + apply initial_state_intro with b. * apply (Genv.init_mem_match TRANSL). apply MEM. @@ -256,7 +311,7 @@ Proof. * apply Mem.extends_refl. Qed. -Lemma transf_final_states: +(**) Lemma transf_final_states: forall (s1 : state) (s2 : state) (r : Integers.Int.int), match_states s1 s2 -> @@ -268,6 +323,27 @@ Proof. intros. inv H0. inv H. inv VAL. inversion STK. apply final_state_intro. Qed. +Lemma tunnel_function_unfold: + forall f tf pc, + tunnel_function f = OK tf -> + (fn_code tf) ! pc = + option_map (tunnel_instr (branch_target f)) (fn_code f) ! pc. +Proof. + intros f tf pc. + unfold tunnel_function. + destruct (check_included _ _) eqn:EQinc; try congruence. + destruct (check_code _ _) eqn:EQcode; simpl; try congruence. + intro. inv H. simpl. rewrite PTree.gmap1. reflexivity. +Qed. + +Lemma reglist_lessdef: + forall (rs trs: Registers.Regmap.t val) (args: list Registers.reg), + regs_lessdef rs trs -> Val.lessdef_list (rs##args) (trs##args). +Proof. + intros. induction args; simpl; constructor. + apply H. apply IHargs. +Qed. + (* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *) Lemma tunnel_step_correct: forall st1 t st2, step ge st1 t st2 -> @@ -275,8 +351,37 @@ Lemma tunnel_step_correct: (exists st2', step tge st1' t st2' /\ match_states st2 st2') \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat. Proof. - intros st1 t st2 H. induction H; intros; inv MS. -Admitted. + intros st1 t st2 H. induction H; intros; try (inv MS). + - (* Inop *) + exploit branch_target_bounds. apply TF. + rewrite H. intro. inv H0. + + (* TB_default *) + rewrite TB. left. eexists. split. + * apply exec_Inop. rewrite (tunnel_function_unfold f tf pc). rewrite H. simpl. eauto. apply TF. + * constructor; try assumption. + + (* TB_nop *) + simpl. right. repeat split. apply DEC. + rewrite EQ. apply match_states_intro; assumption. + - (* Iop *) + exploit eval_operation_lessdef; try eassumption. + apply reglist_lessdef. apply RS. + intros (tv & EVAL & LD). + left; eexists; split. + + eapply exec_Iop with (v:=tv). + assert ((fn_code tf) ! pc = Some (Iop op args res (branch_target f pc'))). + rewrite (tunnel_function_unfold f tf pc); eauto. + rewrite H. simpl. reflexivity. + (* TODO: refaire ça joliment *) + assert (target_bounds (branch_target f) (bound (branch_target f)) pc (fn_code f)! pc). + apply (branch_target_bounds) with tf. apply TF. + inv H2. rewrite TB. apply H1. rewrite H in H4. congruence. + rewrite H in H4. congruence. + rewrite <- EVAL. apply eval_operation_preserved. apply symbols_preserved. + + apply match_states_intro; eauto. apply set_reg_lessdef. apply LD. apply RS. + - (* Iload *) + + +Qed. Theorem transf_program_correct: -- cgit From 1701e43316ee8e69e794a025a8c9979af6bb8c93 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 10 Jun 2021 16:31:51 +0200 Subject: Work on new schedluer Renamed a test file, wrote function to compute pressure deltas, Still need to pass the info in some way; beginning of the actual scheduler function --- aarch64/Machregsaux.ml | 4 ++ aarch64/Machregsaux.mli | 2 + aarch64/PostpassSchedulingOracle.ml | 3 +- aarch64/PrepassSchedulingOracle.ml | 55 ++++++++++++++- scheduling/InstructionScheduler.ml | 79 ++++++++++++++++++++- scheduling/InstructionScheduler.mli | 7 ++ scheduling/RTLpathScheduleraux.ml | 26 ++++--- test/nardino/scheduling/entry_regs.c | 5 +- test/nardino/scheduling/spille_backw.c | 114 +++++++++++++++++++++++++++++++ test/nardino/scheduling/spille_etrange.c | 114 ------------------------------- 10 files changed, 279 insertions(+), 130 deletions(-) create mode 100644 test/nardino/scheduling/spille_backw.c delete mode 100644 test/nardino/scheduling/spille_etrange.c diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml index 41db3bd4..15fb08ca 100644 --- a/aarch64/Machregsaux.ml +++ b/aarch64/Machregsaux.ml @@ -19,3 +19,7 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +(* number of available registers per class *) +(* TODO: add this to all archs *) +let nr_regs = [| 29; 32 |] diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli index 01b0f9fd..8487a557 100644 --- a/aarch64/Machregsaux.mli +++ b/aarch64/Machregsaux.mli @@ -15,3 +15,5 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +val nr_regs : int array diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index a9737088..834d42f5 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -507,7 +507,8 @@ let build_problem bb = { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; - live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index a743fb68..6d445f10 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -201,6 +201,52 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset. end seqa; !latency_constraints;; +let get_pressure_deltas (seqa : (instruction * Regset.t) array) + (typing : RTLtyping.regenv) + : int array array = + let nr_types_regs = Array.length Machregsaux.nr_regs in + let ret = Array.init (Array.length seqa) (fun i -> + Array.make nr_types_regs 0) in + Array.iteri (fun i (instr, liveins) -> match instr with + | Iop (_, args, dest, _) | Iload (_, _, _, args, dest, _) -> + ret.(i).(Machregsaux.class_of_type (typing dest)) <- + if List.mem dest args then 0 + else 1 + | Istore (_, _, _, src, _) -> + ret.(i).(Machregsaux.class_of_type (typing src)) <- + -1 + | Icall (_, fn, args, dest, _) -> + ret.(i).(Machregsaux.class_of_type (typing dest)) <- + if List.mem dest + (match fn with + | Datatypes.Coq_inl reg -> reg::args + | _ -> args) + then 0 else 1 + | Ibuiltin (_, args, dest, _) -> + let rec arg_l list = function + | AST.BA r -> r::list + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + arg_l (arg_l list lo) hi + | _ -> list + in + let l = (List.fold_left arg_l [] args) in + let rec dest_l = function + | AST.BR r -> + let t = Machregsaux.class_of_type (typing r) in + ret.(i).(t) <- + (if List.mem r l + then 0 else 1) + ret.(i).(t) + | AST.BR_splitlong (hi, lo) -> + dest_l hi; + dest_l lo + | _ -> () + in + dest_l dest + | _ -> () + ) seqa; + ret + + let resources_of_instruction (opweights : opweights) = function | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds | Iop(op, inputs, output, _) -> @@ -406,11 +452,13 @@ let get_alias_dependencies seqa = !deps;; *) -let define_problem (opweights : opweights) (live_entry_regs : Regset.t) seqa = +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) seqa = let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = live_entry_regs; + typing = typing; instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) @@ -441,7 +489,8 @@ let prepass_scheduler_by_name name problem early_ones = | _ -> scheduler_by_name name problem let schedule_sequence (seqa : (instruction*Regset.t) array) - (live_regs_entry : Registers.Regset.t)= + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) = let opweights = OpWeights.get_opweights () in try if (Array.length seqa) <= 1 @@ -451,7 +500,7 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) let nr_instructions = Array.length seqa in (if !Clflags.option_debug_compcert > 6 then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); - let problem = define_problem opweights live_regs_entry seqa in + let problem = define_problem opweights live_regs_entry typing seqa in (if !Clflags.option_debug_compcert > 7 then (print_sequence stdout (Array.map fst seqa); print_problem stdout problem)); diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 976037bd..08164293 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -34,6 +34,7 @@ type problem = { max_latency : int; resource_bounds : int array; live_regs_entry : Registers.Regset.t; + typing : RTLtyping.regenv; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -258,8 +259,8 @@ let priority_list_scheduler (order : list_scheduler_order) assert(!time >= 0); !time with Exit -> -1 - in + let advance_time() = begin (if !current_time < max_time-1 @@ -268,7 +269,8 @@ let priority_list_scheduler (order : list_scheduler_order) Array.blit problem.resource_bounds 0 current_resources 0 (Array.length current_resources); ready.(!current_time + 1) <- - InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1)); + InstrSet.union (ready.(!current_time)) + (ready.(!current_time + 1)); ready.(!current_time) <- InstrSet.empty; end); incr current_time @@ -335,6 +337,75 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;; (* dummy code for placating ocaml's warnings *) let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; + +(* A scheduler sensitive to register pressure *) +let reg_pres_scheduler (problem : problem) : solution option = + let nr_instructions = get_nr_instructions problem in + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + let available_regs = Array.copy Machregsaux.nr_regs in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + available_regs.(classe) + <- available_regs.(classe) - 1) + (Registers.Regset.elements live_regs_entry); + + let nr_types_regs = Array.length available_regs in + + (* wait di we have access to instructions here? No, we have to add + al this to constraints *) + (* let pressures = + * Array.init (nr_instructions) (fun i -> + * Array.init (nr_types_regs) (fun t -> + * match i with + * | Inop -> 0 + * | Iop (_, args, dest, _) + * | Iload(_, _, _, args, dest, _) -> + * if + * )) *) + + let priorities = critical_paths successors in + + let module InstrSetCSP = + Set.Make (struct + type t=int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) in + + (* TODO: find a way to efficiently find an instruction which + decreases register pressure *) + (* idea: *) + (* let module InstrSetCSR = + * Set.Make (struct + * type t = int + * let compare x y = + * match pressures.(y) - pressures.(x) with + * | 0 -> (match priorities.(y) - priorities.(x) with + * | 0 -> x - y + * | z -> z) + * | z -> z + * end) in *) + (* where pressure.(x) is the delta of register pressure for + instruction x. Pb: different register types. Need to think about + it. Have one module for each register type, that's used when this + particular type reach a high pressure? *) + + let max_time = bound_max_time problem in + let ready = Array.make max_time InstrSetCSP.empty in + + (* silence warning, enable compilation while working *) + let _ = successors, predecessors, times, ready, nr_types_regs in + (* PLACEHOLDER *) + None + + type bundle = int list;; let rec extract_deps_to index = function @@ -440,6 +511,10 @@ let reverse_problem problem = max_latency = problem.max_latency; resource_bounds = problem.resource_bounds; live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) + (* Not needed for the revlist sched, and for now we wont bother + with creating a reverse scheduler aware of reg press *) + + typing = problem.typing; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index f53dc0ef..8dcc4ef5 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -26,6 +26,9 @@ type problem = { live_regs_entry : Registers.Regset.t; (** The set of live pseudo-registers at entry. *) + typing : RTLtyping.regenv; + (** Register type map. *) + instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) @@ -71,6 +74,10 @@ Once a clock tick is full go to the next. @return [Some solution] when a solution is found, [None] if not. *) val list_scheduler : problem -> solution option +(** WIP : Same as list_scheduler, but schedules instructions which decrease +register pressure when it gets too high. *) +val reg_pres_scheduler : problem -> solution option + (** Schedule the problem using the order of instructions without any reordering *) val greedy_scheduler : problem -> solution option diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 72cf6682..e04e7c23 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -94,7 +94,7 @@ let get_live_regs_entry (sb : superblock) code = match ins with | Inop _ -> regset | Iop (_, args, dest, _) - | Iload (_, _, _, args, dest, _) -> + | Iload (_, _, _, args, dest, _) -> List.fold_left (fun set reg -> Registers.Regset.add reg set) (Registers.Regset.remove dest regset) args | Istore (_, _, args, src, _) -> @@ -114,13 +114,20 @@ let get_live_regs_entry (sb : superblock) code = | Datatypes.Coq_inr _ -> regset) args | Ibuiltin (_, args, dest, _) -> - List.fold_left (fun set reg -> - match reg with - | AST.BA r -> Registers.Regset.add r set - | _ -> set) - (match dest with - | AST.BR r -> Registers.Regset.remove r regset - | _ -> regset) + List.fold_left (fun set arg -> + let rec add reg set = + match reg with + | AST.BA r -> Registers.Regset.add r set + | AST.BA_splitlong (hi, lo) + | AST.BA_addptr (hi, lo) -> add hi (add lo set) + | _ -> set + in add arg set) + (let rec rem dest set = + match dest with + | AST.BR r -> Registers.Regset.remove r set + | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set) + | _ -> set + in rem dest regset) args | Icond (_, args, _, _, _) -> List.fold_left (fun set reg -> @@ -166,7 +173,8 @@ let schedule_superblock sb code = | Some s -> s | None -> Regset.empty)) (Array.sub sb.instructions 0 (nr_instr-trailer_length))) - live_regs_entry with + live_regs_entry + sb.typing with | None -> sb.instructions | Some order -> let ins' = diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c index 047a613d..9e6adacb 100644 --- a/test/nardino/scheduling/entry_regs.c +++ b/test/nardino/scheduling/entry_regs.c @@ -1,7 +1,10 @@ #include int f(int n) { - return n; + if (n > 0) + return 42; + else + return n; } diff --git a/test/nardino/scheduling/spille_backw.c b/test/nardino/scheduling/spille_backw.c new file mode 100644 index 00000000..1c36ee86 --- /dev/null +++ b/test/nardino/scheduling/spille_backw.c @@ -0,0 +1,114 @@ +int f(int k) { + int a1 = k; + int b1 = 2*a1; + int c = a1; + int a2 = k+1; + int b2 = 2*a2; + c += a2; + int a3 = k+2; + int b3 = 2*a3; + c += a3; + int a4 = k+3; + int b4 = 2*a4; + c += a4; + int a5 = k+4; + int b5 = 2*a5; + c += a5; + int a6 = k+5; + int b6 = 2*a6; + c += a6; + int a7 = k+6; + int b7 = 2*a7; + c += a7; + int a8 = k+7; + int b8 = 2*a8; + c += a8; + int a9 = k+8; + int b9 = 2*a9; + c += a9; + int a10 = k+9; + int b10 = 2*a10; + c += a10; + int a11 = k+10; + int b11 = 2*a11; + c += a11; + int a12 = k+11; + int b12 = 2*a12; + c += a12; + int a13 = k+12; + int b13 = 2*a13; + c += a13; + int a14 = k+13; + int b14 = 2*a14; + c += a14; + int a15 = k+14; + int b15 = 2*a15; + c += a15; + int a16 = k+15; + int b16 = 2*a16; + c += a16; + int a17 = k+16; + int b17 = 2*a17; + c += a17; + int a18 = k+17; + int b18 = 2*a18; + c += a18; + int a19 = k+18; + int b19 = 2*a19; + c += a19; + int a20 = k+19; + int b20 = 2*a20; + c += a20; + int a21 = k+20; + int b21 = 2*a21; + c += a21; + int a22 = k+21; + int b22 = 2*a22; + c += a22; + int a23 = k+22; + int b23 = 2*a23; + c += a23; + int a24 = k+23; + int b24 = 2*a24; + c += a24; + int a25 = k+24; + int b25 = 2*a25; + c += a25; + int a26 = k+25; + int b26 = 2*a26; + c += a26; + return + b13+ + b12+ + b11+ + b10+ + b9+ + b8+ + b7+ + b6+ + b5+ + b4+ + b3+ + b2+ + b1+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b23+ + b24+ + b25+ + b26+ + c; +} + +int main(int argc, char *argv[]) { + f(3); + return 0; +} diff --git a/test/nardino/scheduling/spille_etrange.c b/test/nardino/scheduling/spille_etrange.c deleted file mode 100644 index 1c36ee86..00000000 --- a/test/nardino/scheduling/spille_etrange.c +++ /dev/null @@ -1,114 +0,0 @@ -int f(int k) { - int a1 = k; - int b1 = 2*a1; - int c = a1; - int a2 = k+1; - int b2 = 2*a2; - c += a2; - int a3 = k+2; - int b3 = 2*a3; - c += a3; - int a4 = k+3; - int b4 = 2*a4; - c += a4; - int a5 = k+4; - int b5 = 2*a5; - c += a5; - int a6 = k+5; - int b6 = 2*a6; - c += a6; - int a7 = k+6; - int b7 = 2*a7; - c += a7; - int a8 = k+7; - int b8 = 2*a8; - c += a8; - int a9 = k+8; - int b9 = 2*a9; - c += a9; - int a10 = k+9; - int b10 = 2*a10; - c += a10; - int a11 = k+10; - int b11 = 2*a11; - c += a11; - int a12 = k+11; - int b12 = 2*a12; - c += a12; - int a13 = k+12; - int b13 = 2*a13; - c += a13; - int a14 = k+13; - int b14 = 2*a14; - c += a14; - int a15 = k+14; - int b15 = 2*a15; - c += a15; - int a16 = k+15; - int b16 = 2*a16; - c += a16; - int a17 = k+16; - int b17 = 2*a17; - c += a17; - int a18 = k+17; - int b18 = 2*a18; - c += a18; - int a19 = k+18; - int b19 = 2*a19; - c += a19; - int a20 = k+19; - int b20 = 2*a20; - c += a20; - int a21 = k+20; - int b21 = 2*a21; - c += a21; - int a22 = k+21; - int b22 = 2*a22; - c += a22; - int a23 = k+22; - int b23 = 2*a23; - c += a23; - int a24 = k+23; - int b24 = 2*a24; - c += a24; - int a25 = k+24; - int b25 = 2*a25; - c += a25; - int a26 = k+25; - int b26 = 2*a26; - c += a26; - return - b13+ - b12+ - b11+ - b10+ - b9+ - b8+ - b7+ - b6+ - b5+ - b4+ - b3+ - b2+ - b1+ - b14+ - b15+ - b16+ - b17+ - b18+ - b19+ - b20+ - b21+ - b22+ - b23+ - b23+ - b24+ - b25+ - b26+ - c; -} - -int main(int argc, char *argv[]) { - f(3); - return 0; -} -- cgit From e46b3e49b2a34c27f5703327c884cf3bcd0c6a49 Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Thu, 10 Jun 2021 18:22:02 +0200 Subject: Complete `tunnel_step_correct` proof up to Ijumptable Excluding Icond --- backend/RTLTunnelingproof.v | 236 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 227 insertions(+), 9 deletions(-) diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v index c54667a4..53f8bbf7 100644 --- a/backend/RTLTunnelingproof.v +++ b/backend/RTLTunnelingproof.v @@ -344,6 +344,118 @@ Proof. apply H. apply IHargs. Qed. +Lemma instruction_type_preserved: + forall (f tf:function) (pc:node) (i ti:instruction) + (TF: tunnel_function f = OK tf) + (FATPC: (fn_code f) ! pc = Some i) + (NOTINOP: forall s, i <> Inop s) + (NOTICOND: forall cond args ifso ifnot info, i <> Icond cond args ifso ifnot info) + (TI: ti = tunnel_instr (branch_target f) i), + (fn_code tf) ! (branch_target f pc) = Some ti. +Proof. + intros. + assert ((fn_code tf) ! pc = Some (tunnel_instr (branch_target f) i)) as TFATPC. + rewrite (tunnel_function_unfold f tf pc); eauto. + rewrite FATPC; eauto. + exploit branch_target_bounds; eauto. + intro TB. inversion TB as [BT|s|cond args ifso ifnot info]; try (rewrite FATPC in H; congruence). +Qed. + +(* rq: utilisée pour `tunnel_step_correct` mais je ne comprends pas tout *) +(**) Lemma find_function_translated: + forall ros rs trs fd, + regs_lessdef rs trs -> + find_function ge ros rs = Some fd -> + exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros trs = Some tfd. +Proof. + intros. destruct ros; simpl in *. + - (* reg *) + assert (E: trs # r = rs # r). + { exploit Genv.find_funct_inv. apply H0. intros (b & EQ). + generalize (H r) . rewrite EQ. intro LD. inv LD. auto. } + rewrite E. exploit functions_translated; eauto. + - (* ident *) + rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. + exploit function_ptr_translated; eauto. + intros (tf & X1 & X2). exists tf; intuition. +Qed. + +Lemma list_forall2_lessdef_rs: + forall rs trs args, + regs_lessdef rs trs -> + list_forall2 Val.lessdef rs ## args trs ## args. +Proof. + intros rs trs args LD. + exploit (reglist_lessdef rs trs args). apply LD. + induction args; simpl; intros H; try constructor; inv H. + apply H3. apply IHargs. apply H5. +Qed. + +Lemma fn_stacksize_preserved: + forall f tf + (TF: tunnel_function f = OK tf), + fn_stacksize f = fn_stacksize tf. +Proof. + intros f tf. unfold tunnel_function. + destruct (check_included _ _); try congruence. + intro H. monadInv H. simpl. reflexivity. +Qed. + +Lemma regs_setres_lessdef: + forall res vres tvres rs trs, + regs_lessdef rs trs -> Val.lessdef vres tvres -> + regs_lessdef (regmap_setres res vres rs) (regmap_setres res tvres trs). +Proof. + induction res; intros; simpl; try auto using set_reg_lessdef. +Qed. + +Lemma regmap_optget_lessdef: + forall or rs trs, + regs_lessdef rs trs -> Val.lessdef (regmap_optget or Vundef rs) (regmap_optget or Vundef trs). +Proof. + intros or rs trs RS. + induction or; simpl; auto using set_reg_lessdef. +Qed. + +Lemma tunnel_fundef_Internal: + forall (f:function) (tf:fundef) + (TF: tunnel_fundef (Internal f) = OK tf), + exists (tf':function), tf = (Internal tf') /\ tunnel_function f = OK tf'. +Proof. + intros f tf. + unfold tunnel_fundef. simpl. intro H. monadInv H. exists x. + split. reflexivity. apply EQ. +Qed. + +Lemma fn_entrypoint_preserved: + forall f tf + (TF: tunnel_function f = OK tf), + fn_entrypoint tf = branch_target f (fn_entrypoint f). +Proof. + intros f tf. + unfold tunnel_function. destruct (check_included _ _); try congruence. + intro TF. monadInv TF. simpl. reflexivity. +Qed. + +Lemma init_regs_lessdef: + forall f tf args targs + (ARGS: list_forall2 Val.lessdef args targs) + (TF: tunnel_function f = OK tf), + regs_lessdef (init_regs args (fn_params f)) (init_regs targs (fn_params tf)). +Proof. + assert (regs_lessdef (Regmap.init Vundef) (Regmap.init Vundef)) as Hundef. + { unfold Regmap.init. unfold regs_lessdef. intro. unfold Regmap.get. rewrite PTree.gempty. apply Val.lessdef_undef. } + + intros f tf args targs ARGS. + + unfold tunnel_function. destruct (check_included _ _) eqn:EQinc; try congruence. + intro TF. monadInv TF. simpl. destruct (fn_params f) eqn:EQP; simpl. + - apply Hundef. + - induction ARGS. + + apply Hundef. + + apply set_reg_lessdef; try assumption. +Admitted. + (* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *) Lemma tunnel_step_correct: forall st1 t st2, step ge st1 t st2 -> @@ -368,18 +480,124 @@ Proof. intros (tv & EVAL & LD). left; eexists; split. + eapply exec_Iop with (v:=tv). - assert ((fn_code tf) ! pc = Some (Iop op args res (branch_target f pc'))). - rewrite (tunnel_function_unfold f tf pc); eauto. - rewrite H. simpl. reflexivity. - (* TODO: refaire ça joliment *) - assert (target_bounds (branch_target f) (bound (branch_target f)) pc (fn_code f)! pc). - apply (branch_target_bounds) with tf. apply TF. - inv H2. rewrite TB. apply H1. rewrite H in H4. congruence. - rewrite H in H4. congruence. + apply instruction_type_preserved with (Iop op args res pc'); (simpl; auto)||congruence. rewrite <- EVAL. apply eval_operation_preserved. apply symbols_preserved. + apply match_states_intro; eauto. apply set_reg_lessdef. apply LD. apply RS. - (* Iload *) - + exploit eval_addressing_lessdef; try eassumption. + apply reglist_lessdef. apply RS. + intros (ta & EVAL & LD). + exploit Mem.loadv_extends; try eassumption. + intros (tv & LOAD & LD'). + left. eexists. split. + + eapply exec_Iload. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved. + * apply LOAD. + + apply match_states_intro; try assumption. apply set_reg_lessdef. apply LD'. apply RS. + - (* Iload NOTRAP1 *) + exploit eval_addressing_lessdef_none; try eassumption. + apply reglist_lessdef; apply RS. + left. eexists. split. + + eapply exec_Iload_notrap1. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * rewrite <- H1. apply eval_addressing_preserved. apply symbols_preserved. + + apply match_states_intro; try assumption. apply set_reg_lessdef. unfold default_notrap_load_value. apply Val.lessdef_undef. apply RS. + - (* Iload NOTRAP2 *) + exploit eval_addressing_lessdef; try eassumption. + apply reglist_lessdef; apply RS. + intros (ta & EVAL & LD). + (* TODO: on peut sans doute factoriser ça *) + destruct (Mem.loadv chunk tm ta) eqn:Htload. + + left; eexists; split. + eapply exec_Iload. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved. + * apply Htload. + * apply match_states_intro; try assumption. apply set_reg_lessdef; eauto. + + left; eexists; split. + eapply exec_Iload_notrap2. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved. + * apply Htload. + * apply match_states_intro; try assumption. apply set_reg_lessdef; eauto. + - (* Lstore *) + exploit eval_addressing_lessdef; try eassumption. + apply reglist_lessdef; apply RS. + intros (ta & EVAL & LD). + exploit Mem.storev_extends; try eassumption. apply RS. + intros (tm' & STORE & MEM'). + left. eexists. split. + + eapply exec_Istore. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * rewrite <- EVAL. apply eval_addressing_preserved. apply symbols_preserved. + * rewrite STORE. reflexivity. + + apply match_states_intro; try eassumption. + - (* Icall *) + left. + exploit find_function_translated; try eassumption. + intros (tfd & TFD & FIND). + eexists. split. + + eapply exec_Icall. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * apply FIND. + * apply sig_preserved. apply TFD. + + apply match_states_call; try assumption. + * apply list_forall2_cons; try assumption. apply match_stackframes_intro; try assumption. + * apply list_forall2_lessdef_rs. apply RS. + - (* Itailcall *) + exploit find_function_translated; try eassumption. + intros (tfd & TFD & FIND). + exploit Mem.free_parallel_extends; try eassumption. + intros (tm' & FREE & MEM'). + left. eexists. split. + + eapply exec_Itailcall. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * apply FIND. + * apply sig_preserved. apply TFD. + * erewrite <- fn_stacksize_preserved; try eassumption. + + apply match_states_call; try assumption. + apply list_forall2_lessdef_rs. apply RS. + - (* Ibuiltin *) + exploit eval_builtin_args_lessdef; try eassumption. apply RS. + intros (vl2 & EVAL & LD). + exploit external_call_mem_extends; try eassumption. + intros (tvres & tm' & EXT & LDRES & MEM' & UNCHGD). + left. eexists. split. + + eapply exec_Ibuiltin. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * eapply eval_builtin_args_preserved. eapply symbols_preserved. eapply EVAL. + * eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXT. + + apply match_states_intro; try assumption. apply regs_setres_lessdef; try assumption. + - (* Icond *) + admit. + - (* Ijumptable *) + left. eexists. split. + + eapply exec_Ijumptable. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * generalize (RS arg). rewrite H0. intro. inv H2. reflexivity. + * rewrite list_nth_z_map. rewrite H1. simpl. reflexivity. + + apply match_states_intro; try eassumption. + - (* Ireturn *) + exploit Mem.free_parallel_extends; try eassumption. + intros (tm' & FREE & MEM'). + left. eexists. split. + + eapply exec_Ireturn. + * exploit instruction_type_preserved; (simpl; eauto)||congruence. + * erewrite <- fn_stacksize_preserved. eapply FREE. eapply TF. + + apply match_states_return; try eassumption. + apply regmap_optget_lessdef. apply RS. + - (* internal function *) + exploit tunnel_fundef_Internal; try eassumption. + intros (tf' & EQ & TF'). subst. + exploit Mem.alloc_extends; try eassumption. reflexivity. reflexivity. + intros (m2' & ALLOC & EXT). + left. eexists. split. + + eapply exec_function_internal. + rewrite <- (fn_stacksize_preserved f tf'). eapply ALLOC. eapply TF'. + + rewrite (fn_entrypoint_preserved f tf'); try eassumption. apply match_states_intro; try eassumption. + apply init_regs_lessdef. + Qed. -- cgit From 33958cecc396be3d7ba8bf369b2039c633d39849 Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Fri, 11 Jun 2021 13:30:20 +0200 Subject: Complete RTLTunnelingproof There is still some refactoring to do, though --- backend/RTLTunnelingproof.v | 82 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 16 deletions(-) diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v index 53f8bbf7..7f6696ad 100644 --- a/backend/RTLTunnelingproof.v +++ b/backend/RTLTunnelingproof.v @@ -344,7 +344,7 @@ Proof. apply H. apply IHargs. Qed. -Lemma instruction_type_preserved: +Lemma instruction_type_preserved: forall (f tf:function) (pc:node) (i ti:instruction) (TF: tunnel_function f = OK tf) (FATPC: (fn_code f) ! pc = Some i) @@ -362,7 +362,7 @@ Proof. Qed. (* rq: utilisée pour `tunnel_step_correct` mais je ne comprends pas tout *) -(**) Lemma find_function_translated: +(**) Lemma find_function_translated: forall ros rs trs fd, regs_lessdef rs trs -> find_function ge ros rs = Some fd -> @@ -375,7 +375,7 @@ Proof. generalize (H r) . rewrite EQ. intro LD. inv LD. auto. } rewrite E. exploit functions_translated; eauto. - (* ident *) - rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. + rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. exploit function_ptr_translated; eauto. intros (tf & X1 & X2). exists tf; intuition. Qed. @@ -386,7 +386,7 @@ Lemma list_forall2_lessdef_rs: list_forall2 Val.lessdef rs ## args trs ## args. Proof. intros rs trs args LD. - exploit (reglist_lessdef rs trs args). apply LD. + exploit (reglist_lessdef rs trs args). apply LD. induction args; simpl; intros H; try constructor; inv H. apply H3. apply IHargs. apply H5. Qed. @@ -418,15 +418,25 @@ Proof. Qed. Lemma tunnel_fundef_Internal: - forall (f:function) (tf:fundef) + forall (f: function) (tf: fundef) (TF: tunnel_fundef (Internal f) = OK tf), - exists (tf':function), tf = (Internal tf') /\ tunnel_function f = OK tf'. + exists (tf': function), tf = (Internal tf') /\ tunnel_function f = OK tf'. Proof. intros f tf. unfold tunnel_fundef. simpl. intro H. monadInv H. exists x. split. reflexivity. apply EQ. Qed. +Lemma tunnel_fundef_External: + forall (ef: external_function) (tf: fundef) + (TF: tunnel_fundef (External ef) = OK tf), + tf = (External ef). +Proof. + intros f tf. + unfold tunnel_fundef. simpl. intro H. monadInv H. reflexivity. +Qed. + + Lemma fn_entrypoint_preserved: forall f tf (TF: tunnel_function f = OK tf), @@ -449,12 +459,26 @@ Proof. intros f tf args targs ARGS. unfold tunnel_function. destruct (check_included _ _) eqn:EQinc; try congruence. - intro TF. monadInv TF. simpl. destruct (fn_params f) eqn:EQP; simpl. - - apply Hundef. - - induction ARGS. - + apply Hundef. - + apply set_reg_lessdef; try assumption. -Admitted. + intro TF. monadInv TF. simpl. + (* + induction ARGS. + - induction (fn_params f) eqn:EQP; apply Hundef. + - induction (fn_params f) eqn:EQP. + * simpl. apply Hundef. + * simpl. apply set_reg_lessdef. apply H. + *) + + generalize (fn_params f) as l. induction ARGS; induction l; try (simpl; apply Hundef). + simpl. apply set_reg_lessdef; try assumption. apply IHARGS. +Qed. + +Lemma lessdef_forall2_list: + forall args ta, + list_forall2 Val.lessdef args ta -> Val.lessdef_list args ta. +Proof. + intros args ta H. induction H. apply Val.lessdef_list_nil. apply Val.lessdef_list_cons. apply H. apply IHlist_forall2. +Qed. + (* `Lemma tunnel_step_correct` correspond au diagramme "option simulation" *) Lemma tunnel_step_correct: @@ -570,7 +594,22 @@ Proof. * eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXT. + apply match_states_intro; try assumption. apply regs_setres_lessdef; try assumption. - (* Icond *) - admit. + simpl. exploit branch_target_bounds. apply TF. rewrite H. intro. inv H1. + + (* TB_default *) + rewrite TB. + destruct (fn_code tf)!pc as [[]|] eqn:EQ; + assert (tunnel_function f = OK tf) as TF'; auto; + unfold tunnel_function in TF; destruct (check_included _ _) in TF; monadInv TF; + simpl in EQ; rewrite PTree.gmap1 in EQ; rewrite H in EQ; simpl in EQ; destruct (peq _ _) eqn: EQpeq in EQ; try congruence. + * left. eexists. split. + -- eapply exec_Inop. simpl. rewrite PTree.gmap1. rewrite H. simpl. rewrite EQpeq. reflexivity. + -- destruct b. apply match_states_intro; eauto. rewrite e. apply match_states_intro; eauto. + * left. eexists. split. + -- eapply exec_Icond; auto. simpl. rewrite PTree.gmap1. rewrite H. simpl. rewrite EQpeq. reflexivity. eapply eval_condition_lessdef; try eassumption. apply reglist_lessdef. apply RS. + -- destruct b; apply match_states_intro; auto. + + (* TB_cond *) right; repeat split; destruct b; try assumption. + * rewrite EQSO. apply match_states_intro; try assumption. + * rewrite EQNOT. apply match_states_intro; try assumption. - (* Ijumptable *) left. eexists. split. + eapply exec_Ijumptable. @@ -596,9 +635,20 @@ Proof. + eapply exec_function_internal. rewrite <- (fn_stacksize_preserved f tf'). eapply ALLOC. eapply TF'. + rewrite (fn_entrypoint_preserved f tf'); try eassumption. apply match_states_intro; try eassumption. - apply init_regs_lessdef. - - + apply init_regs_lessdef. apply ARGS. apply TF'. + - (* external function *) + exploit external_call_mem_extends. eapply H. eapply MEM. eapply lessdef_forall2_list. eapply ARGS. + intros (tvres & tm' & EXTCALL & LD & EXT & MEMUNCHGD). + left. eexists. split. + + erewrite (tunnel_fundef_External ef tf); try eassumption. + eapply exec_function_external. eapply external_call_symbols_preserved. eapply senv_preserved. eapply EXTCALL. + + eapply match_states_return; try assumption. + - (* return *) + inv STK. inv H1. + left. eexists. split. + + eapply exec_return. + + eapply match_states_intro; try assumption. + apply set_reg_lessdef; try assumption. Qed. -- cgit From 2b814b1f9bb30d9c8b59a713f69bced808bca7c7 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Sat, 12 Jun 2021 10:52:59 +0200 Subject: work on the scheduler --- aarch64/PostpassSchedulingOracle.ml | 1 + aarch64/PrepassSchedulingOracle.ml | 4 +- scheduling/InstructionScheduler.ml | 146 ++++++++++++++++++++++++++---------- scheduling/InstructionScheduler.mli | 4 + 4 files changed, 114 insertions(+), 41 deletions(-) diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index 834d42f5..867341ca 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -509,6 +509,7 @@ let build_problem bb = resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = Registers.Regset.empty; (* unused here *) typing = (fun x -> AST.Tint); (* unused here *) + pressure_deltas = [| [| |] |] ; instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 6d445f10..19f05749 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -201,6 +201,7 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset. end seqa; !latency_constraints;; + let get_pressure_deltas (seqa : (instruction * Regset.t) array) (typing : RTLtyping.regenv) : int array array = @@ -459,7 +460,8 @@ let define_problem (opweights : opweights) (live_entry_regs : Regset.t) resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = live_entry_regs; typing = typing; - instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + pressure_deltas = get_pressure_deltas seqa typing; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) then (get_alias_dependencies seqa) @ simple_deps diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 08164293..8d8c4267 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -35,6 +35,7 @@ type problem = { resource_bounds : int array; live_regs_entry : Registers.Regset.t; typing : RTLtyping.regenv; + pressure_deltas : int array array; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -348,60 +349,124 @@ let reg_pres_scheduler (problem : problem) : solution option = let available_regs = Array.copy Machregsaux.nr_regs in + let nr_types_regs = Array.length available_regs in + + let regs_thresholds = Array.init nr_types_regs + (fun i -> 5) in + (* placeholder value *) + List.iter (fun r -> let classe = Machregsaux.class_of_type (problem.typing r) in available_regs.(classe) <- available_regs.(classe) - 1) (Registers.Regset.elements live_regs_entry); - let nr_types_regs = Array.length available_regs in - (* wait di we have access to instructions here? No, we have to add - al this to constraints *) - (* let pressures = - * Array.init (nr_instructions) (fun i -> - * Array.init (nr_types_regs) (fun t -> - * match i with - * | Inop -> 0 - * | Iop (_, args, dest, _) - * | Iload(_, _, _, args, dest, _) -> - * if - * )) *) + let pressures = problem.pressure_deltas in let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in - let module InstrSetCSP = - Set.Make (struct - type t=int - let compare x y = - match priorities.(y) - priorities.(x) with - | 0 -> x - y - | z -> z - end) in - - (* TODO: find a way to efficiently find an instruction which - decreases register pressure *) - (* idea: *) - (* let module InstrSetCSR = - * Set.Make (struct - * type t = int - * let compare x y = - * match pressures.(y) - pressures.(x) with - * | 0 -> (match priorities.(y) - priorities.(x) with - * | 0 -> x - y - * | z -> z) - * | z -> z - * end) in *) - (* where pressure.(x) is the delta of register pressure for - instruction x. Pb: different register types. Need to think about - it. Have one module for each register type, that's used when this - particular type reach a high pressure? *) + let module InstrSet = + struct + module MSet = + Set.Make (struct + type t=int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) + + let empty = MSet.empty + let is_empty = MSet.is_empty + let mem = MSet.mem + let add = MSet.add + let remove = MSet.remove + let union = MSet.union + let inter = MSet.inter + let iter = MSet.iter + + let compare_regs i x y = + match pressures.(y).(i) - pressures.(x).(i) with + | 0 -> (match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z) + | z -> z + + (** t is the register class *) + let sched_CSR t ready usages = + let result = ref (-1) in + iter (fun i -> + if vector_less_equal usages.(i) current_resources + then if !result = -1 || (compare_regs t !result i < 0) + then result := i) ready; + !result + end + in + + (* [compare i] is comp function for finding instruction with + lowest reg pres delta for reg class i *) let max_time = bound_max_time problem in - let ready = Array.make max_time InstrSetCSP.empty in + let ready = Array.make max_time InstrSet.empty in + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + let attempt_scheduling ready usages = + let result = ref (-1) in + try + Array.iteri (fun i avlregs -> + if avlregs <= regs_thresholds.(i) + then ( + let maybe = InstrSet.sched_CSR i ready usages in + (if pressures.(maybe).(i) < 0 then + (vector_subtract usages.(maybe) current_resources; + result := maybe)); + raise Exit) + ) available_regs; + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then ( + vector_subtract usages.(i) current_resources; + result := i; + raise Exit)); + -1 + with Exit -> !result in + (* silence warning, enable compilation while working *) - let _ = successors, predecessors, times, ready, nr_types_regs in + let _ = successors, predecessors, times, ready, compare, + earliest_time, advance_time, nr_types_regs in (* PLACEHOLDER *) None @@ -515,6 +580,7 @@ let reverse_problem problem = with creating a reverse scheduler aware of reg press *) typing = problem.typing; + pressure_deltas = [| [| |] |] ; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index 8dcc4ef5..e7f9e7db 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -29,6 +29,10 @@ type problem = { typing : RTLtyping.regenv; (** Register type map. *) + pressure_deltas : int array array; + (** At index (i, j), the pressure delta for instruction i, for + register class j. *) + instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) -- cgit From 66e15205c40de54639387a4c9b1cc78994525d55 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 14 Jun 2021 13:53:08 +0200 Subject: scheduler written, need to test now --- driver/Driver.ml | 2 +- scheduling/InstructionScheduler.ml | 84 ++++++++++++++++++++++++++++++-------- 2 files changed, 68 insertions(+), 18 deletions(-) diff --git a/driver/Driver.ml b/driver/Driver.ml index 7192ba4b..5a8c7f2c 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -210,7 +210,7 @@ Processing options: -mtune= Type of CPU (for scheduling on some architectures) -fprepass Perform prepass scheduling (only on some architectures) [on] -fprepass= Perform postpass scheduling with the specified optimization [list] - (=list: list scheduling, =revlist: reverse list scheduling, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) + (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =ilp: ILP, =greedy: just packing bundles) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 8d8c4267..a069de59 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -121,6 +121,13 @@ let vector_less_equal a b = true with Exit -> false;; +(* let vector_add a b = + * assert ((Array.length a) = (Array.length b)); + * for i=0 to (Array.length a)-1 + * do + * b.(i) <- b.(i) + a.(i) + * done;; *) + let vector_subtract a b = assert ((Array.length a) = (Array.length b)); for i=0 to (Array.length a)-1 @@ -341,6 +348,7 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; (* A scheduler sensitive to register pressure *) let reg_pres_scheduler (problem : problem) : solution option = + DebugPrint.debug_flag := true; let nr_instructions = get_nr_instructions problem in let successors = get_successors problem and predecessors = get_predecessors problem @@ -381,11 +389,9 @@ let reg_pres_scheduler (problem : problem) : solution option = let empty = MSet.empty let is_empty = MSet.is_empty - let mem = MSet.mem let add = MSet.add let remove = MSet.remove let union = MSet.union - let inter = MSet.inter let iter = MSet.iter let compare_regs i x y = @@ -400,15 +406,12 @@ let reg_pres_scheduler (problem : problem) : solution option = let result = ref (-1) in iter (fun i -> if vector_less_equal usages.(i) current_resources - then if !result = -1 || (compare_regs t !result i < 0) + then if !result = -1 || (compare_regs t !result i < 0) then result := i) ready; !result end in - (* [compare i] is comp function for finding instruction with - lowest reg pres delta for reg class i *) - let max_time = bound_max_time problem in let ready = Array.make max_time InstrSet.empty in @@ -450,25 +453,71 @@ let reg_pres_scheduler (problem : problem) : solution option = if avlregs <= regs_thresholds.(i) then ( let maybe = InstrSet.sched_CSR i ready usages in - (if pressures.(maybe).(i) < 0 then + (* print_int maybe; + * print_newline (); + * flush stdout; *) + if maybe > 0 && pressures.(maybe).(i) < 0 then (vector_subtract usages.(maybe) current_resources; - result := maybe)); - raise Exit) - ) available_regs; + result := maybe; + raise Exit))) available_regs; InstrSet.iter (fun i -> if vector_less_equal usages.(i) current_resources then ( vector_subtract usages.(i) current_resources; result := i; - raise Exit)); + raise Exit)) ready; -1 - with Exit -> !result in + with Exit -> + if !result <> -1 then + vector_subtract pressures.(!result) available_regs; + !result in - (* silence warning, enable compilation while working *) - let _ = successors, predecessors, times, ready, compare, - earliest_time, advance_time, nr_types_regs in - (* PLACEHOLDER *) - None + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> (assert(times.(i) < 0); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + DebugPrint.debug_flag := false; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + DebugPrint.debug_flag := false; + None + +;; + type bundle = int list;; @@ -1402,5 +1451,6 @@ let scheduler_by_name name = | "ilp" -> validated_scheduler cascaded_scheduler | "list" -> validated_scheduler list_scheduler | "revlist" -> validated_scheduler reverse_list_scheduler + | "regpres" -> validated_scheduler reg_pres_scheduler | "greedy" -> greedy_scheduler | s -> failwith ("unknown scheduler: " ^ s);; -- cgit From 3eb3751f84348a20b7ce211fdbf1d01a9c4685a8 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 14 Jun 2021 14:46:01 +0200 Subject: One fewer spill with new sched on `test/.../spille_forw.c` --- test/nardino/scheduling/spille_forw.c | 53 +++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c index 0c69efd5..770dfce5 100644 --- a/test/nardino/scheduling/spille_forw.c +++ b/test/nardino/scheduling/spille_forw.c @@ -1,6 +1,6 @@ #include -int f(int n) { +int f(int n, float * arr) { float a1 = (float) n; float b1 = 2.*a1; float c = a1; @@ -79,6 +79,48 @@ int f(int n) { float a26 = (float) n+25; float b26 = 2.*a26; c += a26; + float a27 = (float) n+26; + float b27 = 2.*a27; + c += a27; + float a28 = (float) n+27; + float b28 = 2.*a28; + c += a28; + float a29 = (float) n+28; + float b29 = 2.*a29; + c += a29; + float a30 = (float) n+29; + float b30 = 2.*a30; + c += a30; + arr[0] = a1; + arr[1] = a2; + arr[2] = a3; + arr[3] = a4; + arr[4] = a5; + arr[5] = a6; + arr[6] = a7; + arr[7] = a8; + arr[8] = a9; + arr[9] = a10; + arr[10] = a11; + arr[11] = a12; + arr[12] = a13; + arr[13] = a14; + arr[14] = a15; + arr[15] = a16; + arr[16] = a17; + arr[17] = a18; + arr[18] = a19; + arr[19] = a20; + arr[20] = a21; + arr[21] = a22; + arr[22] = a23; + arr[23] = a24; + arr[24] = a25; + arr[25] = a26; + arr[26] = a27; + arr[27] = a28; + arr[28] = a29; + arr[29] = a30; return c + b1+ b2+ @@ -105,7 +147,11 @@ int f(int n) { b23+ b24+ b25+ - b26; + b26+ + b27+ + b28+ + b29+ + b30; } @@ -114,6 +160,7 @@ int f(int n) { int main(int argc, char *argv[]) { - f(5); + float arr[30]; + f(5, arr); return 0; } -- cgit From 77d1ab5f7a6d04970185102508f1cec529c7cb40 Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Mon, 14 Jun 2021 15:36:55 +0200 Subject: Add the RTLTunneling oracle --- backend/RTLTunnelingaux.ml | 284 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 backend/RTLTunnelingaux.ml diff --git a/backend/RTLTunnelingaux.ml b/backend/RTLTunnelingaux.ml new file mode 100644 index 00000000..a30b43cf --- /dev/null +++ b/backend/RTLTunnelingaux.ml @@ -0,0 +1,284 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* TODO: Proper author information *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* + +This file implements the [branch_target] oracle that identifies "nop" branches in a RTL function, +and computes their target node with the distance (ie the number of cummulated nops) toward this target. + +See [RTLTunneling.v] + +*) + +open Coqlib +open RTL +open Maps +open Camlcoq + +let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *) +let debug_flag = ref true +let final_dump = false (* set to true to have a more verbose debugging *) + +let debug fmt = + if !debug_flag then Printf.eprintf fmt + else Printf.ifprintf stderr fmt + +exception BugOnPC of int + +let nopcounter = ref 0 + +(* type of labels in the cfg *) +type label = int * P.t + +(* instructions under analyzis *) +type simple_inst = (* a simplified view of RTL instructions *) + INOP of node +| ICOND of node * node +| OTHER +and node = { + lab : label; + mutable inst: simple_inst; + mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *) + mutable dist: int; + mutable tag: int + } + +(* type of the (simplified) CFG *) +type cfg = { + nodes: (int, node) Hashtbl.t; + mutable rems: node list; (* remaining conditions that may become lbranch or not *) + mutable num_rems: int; + mutable iter_num: int (* number of iterations in elimination of conditions *) + } + +let lab_i (n: node): int = fst n.lab +let lab_p (n: node): P.t = snd n.lab + +let rec target c n = (* inspired from the "find" of union-find algorithm *) + match n.inst with + | ICOND(s1,s2) -> + if n.link != n + then update c n + else if n.tag < c.iter_num then ( + (* we try to change the condition ... *) + n.tag <- c.iter_num; (* ... but at most once by iteration *) + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then (n.link <- ts1; ts1) else n + ) else n + | _ -> + if n.link != n + then update c n + else n +and update c n = + let t = target c n.link in + n.link <- t; t + +let get_node c p = + let li = P.to_int p in + try + Hashtbl.find c.nodes li + with + Not_found -> + let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 } in + Hashtbl.add c.nodes li n; + n + +let set_branch c p s = + let li = P.to_int p in + try + let n = Hashtbl.find c.nodes li in + n.inst <- INOP s; + n.link <- target c s + with + Not_found -> + let n = { lab = (li,p); inst = INOP s; link = target c s; dist = 0; tag = 0 } in + Hashtbl.add c.nodes li n + + +(* build [c.nodes] and accumulate conditions in [acc] *) +let build_simplified_cfg c acc pc i = + match i with + | Inop s -> + let ns = get_node c s in + set_branch c pc ns; + incr nopcounter; + acc + | Icond (_, _, s1, s2, _) -> + c.num_rems <- c.num_rems + 1; + let ns1 = get_node c s1 in + let ns2 = get_node c s2 in + let npc = get_node c pc in + npc.inst <- ICOND(ns1, ns2); + npc::acc + | _ -> acc + +(* try to change a condition into a branch +[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond +*) +let try_change_cond c acc pc = + match pc.inst with + | ICOND(s1,s2) -> + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then ( + pc.link <- ts1; + c.num_rems <- c.num_rems - 1; + acc + ) else + pc::acc + | _ -> raise (BugOnPC (lab_i pc)) (* ICOND expected *) + +(* repeat [try_change_cond] until no condition is changed into a branch *) +let rec repeat_change_cond c = + c.iter_num <- c.iter_num + 1; + debug "++ RTLTunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems); + let old = c.num_rems in + c.rems <- List.fold_left (try_change_cond c) [] c.rems; + let curr = c.num_rems in + let continue = + match limit_tunneling with + | Some n -> curr < old && c.iter_num < n + | None -> curr < old + in + if continue + then repeat_change_cond c + + +(* compute the final distance of each nop nodes to its target *) +let undef_dist = -1 +let self_dist = undef_dist-1 +let rec dist n = + if n.dist = undef_dist + then ( + n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *) + n.dist <- + (match n.inst with + | OTHER -> 0 + | INOP p -> 1 + dist p + | ICOND (p1,p2) -> 1 + (max (dist p1) (dist p2))); + n.dist + ) else if n.dist=self_dist then raise (BugOnPC (lab_i n)) + else n.dist + +let final_export f c = + let count = ref 0 in + let filter_nops_init_dist _ n acc = + let tn = target c n in + if tn == n + then ( + n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *) + acc + ) else ( + n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *) + count := !count+1; + n::acc + ) + in + let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in + let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in + debug "* RTLTunneling.branch_target: [stats] initial number of nops = %d\n" !nopcounter; + debug "* RTLTunneling.branch_target: [stats] final number of eliminated nops = %d\n" !count; + res + +(*********************************************) +(*** START: printing and debugging functions *) + +let string_of_labeli nodes ipc = + try + let pc = Hashtbl.find nodes ipc in + if pc.link == pc + then Printf.sprintf "(Target@%d)" (dist pc) + else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc) + with + Not_found -> "" + +let print_instr c println (pc, i) = + match i with + | Inop s -> (if println then debug "\n"); debug "%d:Inop %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false + | Icond (_, _, s1, s2, _) -> (if println then debug "\n"); debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false + | _ -> debug "%d " pc; true + + +let print_cfg f c = + let a = Array.of_list (PTree.fold (fun acc pc i -> (P.to_int pc,i)::acc) f.fn_code []) in + Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a; + let ep = P.to_int f.fn_entrypoint in + debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep); + let println = Array.fold_left (print_instr c) false a in + (if println then debug "\n");debug "remaining cond:"; + List.iter (fun n -> debug "%d " (lab_i n)) c.rems; + debug "\n" + +(*************************************************************) +(* Copy-paste of the extracted code of the verifier *) +(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + +let get td pc = + match PTree.get pc td with + | Some p -> let (t0, d) = p in (t0, d) + | None -> (pc, Z.of_uint 0) + +let check_instr td pc i = + match PTree.get pc td with + | Some p -> + let (tpc, dpc) = p in + let dpc0 = dpc in begin + match i with + | Inop s -> + let (ts, ds) = get td s in + if peq tpc ts + then if zlt ds dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | Icond (_, _, s1, s2, _) -> + let (ts1, ds1) = get td s1 in + let (ts2, ds2) = get td s2 in + if peq tpc ts1 + then if peq tpc ts2 + then if zlt ds1 dpc0 + then if zlt ds2 dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | _ -> + raise (BugOnPC (P.to_int pc)) end + | None -> () + +(** val check_code : coq_UF -> code -> unit res **) + +let check_code td c = + PTree.fold (fun _ pc i -> check_instr td pc i) c (()) + +(*** END: copy-paste & debugging functions *******) + +let branch_target f = + debug "* RTLTunneling.branch_target: starting on a new function\n"; + if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n"; + let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in + c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code []; + repeat_change_cond c; + let res = final_export f c in + if !debug_flag then ( + try + check_code res f.fn_code; + if final_dump then print_cfg f c; + with e -> ( + print_cfg f c; + check_code res f.fn_code + ) + ); + res -- cgit From 90375dc090b19cf8202903754d7f47e8d568d9f8 Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Mon, 14 Jun 2021 15:37:50 +0200 Subject: Add RTL Tunneling as a pass --- backend/RTLTunneling.v | 1 + backend/RTLTunnelingproof.v | 5 +++++ tools/compiler_expand.ml | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v index e6901a9f..049160fd 100644 --- a/backend/RTLTunneling.v +++ b/backend/RTLTunneling.v @@ -25,6 +25,7 @@ Definition UF := PTree.t (node * Z). (* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) Axiom branch_target: RTL.function -> UF. +Extract Constant branch_target => "RTLTunnelingaux.branch_target". (* TODO: add an extraction command to link branch_target with its implementation *) Local Open Scope error_monad_scope. diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v index 7f6696ad..14a2c037 100644 --- a/backend/RTLTunnelingproof.v +++ b/backend/RTLTunnelingproof.v @@ -149,6 +149,11 @@ Definition match_prog (p tp: program) := * `p` et `tp` sont les programmes donc on doit dire s'ils match *) + (**) Lemma transf_program_match: + forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog. +Proof. + intros. eapply match_transform_partial_program_contextual; eauto. +Qed. Section PRESERVATION. diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml index ddb3c21a..265d0bcf 100644 --- a/tools/compiler_expand.ml +++ b/tools/compiler_expand.ml @@ -46,7 +46,8 @@ TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM"), "CSE3"; PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Redundancy elimination for LICM"), "Deadcode"; TOTAL, (Option "all_loads_nontrap"), Require, None, "Allnontrap"; -PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob" +PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob"; +PARTIAL, Always, Require, (Some "RTL Branch Tunneling"), "RTLTunneling" |];; let post_rtl_passes = -- cgit From bff4e6ff0b782619b6fcc18751fa575cbb11de68 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 14 Jun 2021 17:39:58 +0200 Subject: was very wrong, fixing --- aarch64/PostpassSchedulingOracle.ml | 2 +- aarch64/PrepassSchedulingOracle.ml | 11 ++++-- scheduling/InstructionScheduler.ml | 7 ++-- scheduling/InstructionScheduler.mli | 6 +-- scheduling/RTLpathScheduleraux.ml | 74 ++++++++++++++++++++++++++++++++----- 5 files changed, 80 insertions(+), 20 deletions(-) diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index 867341ca..6f784238 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -509,7 +509,7 @@ let build_problem bb = resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = Registers.Regset.empty; (* unused here *) typing = (fun x -> AST.Tint); (* unused here *) - pressure_deltas = [| [| |] |] ; + reference_counting = None; instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 19f05749..fe757c99 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -202,6 +202,7 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset. !latency_constraints;; +(** useless *) let get_pressure_deltas (seqa : (instruction * Regset.t) array) (typing : RTLtyping.regenv) : int array array = @@ -454,13 +455,13 @@ let get_alias_dependencies seqa = *) let define_problem (opweights : opweights) (live_entry_regs : Regset.t) - (typing : RTLtyping.regenv) seqa = + (typing : RTLtyping.regenv) reference_counting seqa = let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = live_entry_regs; typing = typing; - pressure_deltas = get_pressure_deltas seqa typing; + reference_counting = Some reference_counting; instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) @@ -492,7 +493,8 @@ let prepass_scheduler_by_name name problem early_ones = let schedule_sequence (seqa : (instruction*Regset.t) array) (live_regs_entry : Registers.Regset.t) - (typing : RTLtyping.regenv) = + (typing : RTLtyping.regenv) + reference = let opweights = OpWeights.get_opweights () in try if (Array.length seqa) <= 1 @@ -502,7 +504,8 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) let nr_instructions = Array.length seqa in (if !Clflags.option_debug_compcert > 6 then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); - let problem = define_problem opweights live_regs_entry typing seqa in + let problem = define_problem opweights live_regs_entry + typing reference seqa in (if !Clflags.option_debug_compcert > 7 then (print_sequence stdout (Array.map fst seqa); print_problem stdout problem)); diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index a069de59..08349f60 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -35,7 +35,8 @@ type problem = { resource_bounds : int array; live_regs_entry : Registers.Regset.t; typing : RTLtyping.regenv; - pressure_deltas : int array array; + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * (Registers.reg list array)) option; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -370,7 +371,7 @@ let reg_pres_scheduler (problem : problem) : solution option = (Registers.Regset.elements live_regs_entry); - let pressures = problem.pressure_deltas in + let pressures = [| [| |] |] in let priorities = critical_paths successors in @@ -629,7 +630,7 @@ let reverse_problem problem = with creating a reverse scheduler aware of reg press *) typing = problem.typing; - pressure_deltas = [| [| |] |] ; + reference_counting = problem.reference_counting; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index e7f9e7db..9b6f7a3c 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -29,9 +29,9 @@ type problem = { typing : RTLtyping.regenv; (** Register type map. *) - pressure_deltas : int array array; - (** At index (i, j), the pressure delta for instruction i, for - register class j. *) + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * (Registers.reg list array)) option; + (** See RTLpathScheduleraux.reference_counting. *) instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index e04e7c23..02e0c769 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -72,7 +72,60 @@ let get_superblocks code entry pm typing = lsb end +(** the useful one. Returns a hashtable with bindings of form + ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), + ** [t] is its class (according to [typing]), and [n] the number of + ** times it's referenced as an argument in instructions of [seqa] ; + ** and an arrray containg the argument regset of each instruction *) +let reference_counting (seqa : (instruction * Regset.t) array) + (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) : + (Registers.reg, int * int) Hashtbl.t * Registers.reg list array = + let retl = Hashtbl.create 42 in + let retr = Array.make (Array.length seqa) [] in + List.iter (fun reg -> + Hashtbl.add retl + reg (Machregsaux.class_of_type (typing reg), 1) + ) (Registers.Regset.elements out_regs); + let add_reg reg = + match Hashtbl.find_opt retl reg with + | Some (t, n) -> Hashtbl.add retl reg (t, n+1) + | None -> Hashtbl.add retl reg (Machregsaux.class_of_type + (typing reg), 1) + in + Array.iteri (fun i (ins, _) -> + match ins with + | Iop(_,args,_,_) | Iload(_,_,_,args,_,_) + | Icond(_,args,_,_,_) -> + List.iter (add_reg) args; + retr.(i) <- args + | Istore(_,_,args,src,_) -> + List.iter (add_reg) args; + add_reg src; + retr.(i) <- src::args + | Icall(_,fn,args,_,_) | Itailcall(_,fn,args) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + reg::args + | _ -> args) + | Ibuiltin(_,args,_,_) -> + let rec bar = function + | AST.BA r -> add_reg r; + retr.(i) <- r::retr.(i) + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + bar hi; bar lo + | _ -> () + in + List.iter (bar) args + | Ijumptable (reg,_) | Ireturn (Some reg) -> + add_reg reg; + retr.(i) <- [reg] + | _ -> () + ) seqa; + retl, retr + let get_live_regs_entry (sb : superblock) code = (if !Clflags.option_debug_compcert > 6 then debug_flag := true); @@ -164,17 +217,20 @@ let schedule_superblock sb code = | None -> 1 in debug "hello\n"; let live_regs_entry = get_live_regs_entry sb code in + let seqa = + Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.schedule_superblock"), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + (Array.sub sb.instructions 0 (nr_instr-trailer_length)) in match PrepassSchedulingOracle.schedule_sequence - (Array.map (fun i -> - (match PTree.get i code with - | Some ii -> ii - | None -> failwith "RTLpathScheduleraux.schedule_superblock"), - (match PTree.get i sb.liveins with - | Some s -> s - | None -> Regset.empty)) - (Array.sub sb.instructions 0 (nr_instr-trailer_length))) + seqa live_regs_entry - sb.typing with + sb.typing + (reference_counting seqa sb.s_output_regs sb.typing) with | None -> sb.instructions | Some order -> let ins' = -- cgit From 19464b3992eadf7670acc7231896103ab54885e5 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 15 Jun 2021 12:07:43 +0200 Subject: fixing Still need to find what to do when pressure is high but there are no instructions available that decrease it --- scheduling/InstructionScheduler.ml | 83 ++++++++++++++++++++++++++++++----- scheduling/InstructionScheduler.mli | 2 +- scheduling/RTLpathScheduleraux.ml | 47 +++++++++++++++----- test/nardino/scheduling/spille_forw.c | 60 ++++++++++++------------- 4 files changed, 139 insertions(+), 53 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 08349f60..19bfaeb0 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -36,7 +36,7 @@ type problem = { live_regs_entry : Registers.Regset.t; typing : RTLtyping.regenv; reference_counting : ((Registers.reg, int * int) Hashtbl.t - * (Registers.reg list array)) option; + * ((Registers.reg * bool) list array)) option; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -363,15 +363,37 @@ let reg_pres_scheduler (problem : problem) : solution option = let regs_thresholds = Array.init nr_types_regs (fun i -> 5) in (* placeholder value *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in List.iter (fun r -> let classe = Machregsaux.class_of_type (problem.typing r) in available_regs.(classe) - <- available_regs.(classe) - 1) + <- available_regs.(classe) - 1; + Hashtbl.add live_regs r classe) (Registers.Regset.elements live_regs_entry); + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in - let pressures = [| [| |] |] in + let fold_delta i = (fun a (r, b) -> + a + + if class_r r <> i then 0 else + (if b then + if (Hashtbl.find counts r = (i, 1)) + then 1 else 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + )) in let priorities = critical_paths successors in @@ -396,7 +418,14 @@ let reg_pres_scheduler (problem : problem) : solution option = let iter = MSet.iter let compare_regs i x y = - match pressures.(y).(i) - pressures.(x).(i) with + let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in + print_int y; + print_string " "; + print_int pyi; + print_newline (); + flush stdout; + let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in + match pyi - pxi with | 0 -> (match priorities.(y) - priorities.(x) with | 0 -> x - y | z -> z) @@ -404,10 +433,13 @@ let reg_pres_scheduler (problem : problem) : solution option = (** t is the register class *) let sched_CSR t ready usages = + print_string "looking for max delta"; + print_newline (); + flush stdout; let result = ref (-1) in iter (fun i -> if vector_less_equal usages.(i) current_resources - then if !result = -1 || (compare_regs t !result i < 0) + then if !result = -1 || (compare_regs t !result i > 0) then result := i) ready; !result end @@ -451,13 +483,28 @@ let reg_pres_scheduler (problem : problem) : solution option = let result = ref (-1) in try Array.iteri (fun i avlregs -> + print_string "avlregs: "; + print_int i; + print_string " "; + print_int avlregs; + print_newline (); + flush stdout; if avlregs <= regs_thresholds.(i) then ( let maybe = InstrSet.sched_CSR i ready usages in - (* print_int maybe; - * print_newline (); - * flush stdout; *) - if maybe > 0 && pressures.(maybe).(i) < 0 then + print_string "maybe\n"; + print_int maybe; + print_newline (); + flush stdout; + if maybe > 0 && + let delta = + List.fold_left (fold_delta i) 0 mentions.(maybe) in + print_string "delta "; + print_int delta; + print_newline (); + flush stdout; + delta + >= 0 then (vector_subtract usages.(maybe) current_resources; result := maybe; raise Exit))) available_regs; @@ -470,7 +517,23 @@ let reg_pres_scheduler (problem : problem) : solution option = -1 with Exit -> if !result <> -1 then - vector_subtract pressures.(!result) available_regs; + (List.iter (fun (r,b) -> + if b then + match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + (if n = 1 then + available_regs.(t) + <- available_regs.(t) + 1) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(!result)); !result in while !current_time < max_time diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index 9b6f7a3c..b5a5463b 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -30,7 +30,7 @@ type problem = { (** Register type map. *) reference_counting : ((Registers.reg, int * int) Hashtbl.t - * (Registers.reg list array)) option; + * ((Registers.reg * bool) list array)) option; (** See RTLpathScheduleraux.reference_counting. *) instruction_usages: int array array; diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 02e0c769..9c3ff689 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -76,12 +76,17 @@ end ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), ** [t] is its class (according to [typing]), and [n] the number of ** times it's referenced as an argument in instructions of [seqa] ; - ** and an arrray containg the argument regset of each instruction *) + ** and an arrray containg the list of regs referenced by each + ** instruction, with a boolean to know whether it's as arg or dest *) let reference_counting (seqa : (instruction * Regset.t) array) (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) : - (Registers.reg, int * int) Hashtbl.t * Registers.reg list array = + (Registers.reg, int * int) Hashtbl.t * + (Registers.reg * bool) list array = let retl = Hashtbl.create 42 in let retr = Array.make (Array.length seqa) [] in + (* retr.(i) : (r, b) -> (r', b') -> ... + where b = true if seen as arg, false if seen as dest + *) List.iter (fun reg -> Hashtbl.add retl reg (Machregsaux.class_of_type (typing reg), 1) @@ -92,35 +97,53 @@ let reference_counting (seqa : (instruction * Regset.t) array) | None -> Hashtbl.add retl reg (Machregsaux.class_of_type (typing reg), 1) in + let map_true = List.map (fun r -> r, true) in Array.iteri (fun i (ins, _) -> match ins with - | Iop(_,args,_,_) | Iload(_,_,_,args,_,_) + | Iop(_,args,dest,_) | Iload(_,_,_,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (dest, false)::(map_true args) | Icond(_,args,_,_,_) -> List.iter (add_reg) args; - retr.(i) <- args + retr.(i) <- map_true args | Istore(_,_,args,src,_) -> List.iter (add_reg) args; add_reg src; - retr.(i) <- src::args - | Icall(_,fn,args,_,_) | Itailcall(_,fn,args) -> + retr.(i) <- (src, true)::(map_true args) + | Icall(_,fn,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (dest,false)::(reg, true)::(map_true args) + | _ -> (dest,false)::(map_true args)) + + | Itailcall(_,fn,args) -> List.iter (add_reg) args; retr.(i) <- (match fn with | Datatypes.Coq_inl reg -> add_reg reg; - reg::args - | _ -> args) - | Ibuiltin(_,args,_,_) -> + (reg, true)::(map_true args) + | _ -> map_true args) + | Ibuiltin(_,args,dest,_) -> let rec bar = function | AST.BA r -> add_reg r; - retr.(i) <- r::retr.(i) + retr.(i) <- (r, true)::retr.(i) | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> bar hi; bar lo | _ -> () in - List.iter (bar) args + List.iter (bar) args; + let rec bad = function + | AST.BR r -> retr.(i) <- (r, false)::retr.(i) + | AST.BR_splitlong (hi, lo) -> + bad hi; bad lo + | _ -> () + in + bad dest; | Ijumptable (reg,_) | Ireturn (Some reg) -> add_reg reg; - retr.(i) <- [reg] + retr.(i) <- [reg, true] | _ -> () ) seqa; retl, retr diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c index 770dfce5..db88588b 100644 --- a/test/nardino/scheduling/spille_forw.c +++ b/test/nardino/scheduling/spille_forw.c @@ -91,36 +91,36 @@ int f(int n, float * arr) { float a30 = (float) n+29; float b30 = 2.*a30; c += a30; - arr[0] = a1; - arr[1] = a2; - arr[2] = a3; - arr[3] = a4; - arr[4] = a5; - arr[5] = a6; - arr[6] = a7; - arr[7] = a8; - arr[8] = a9; - arr[9] = a10; - arr[10] = a11; - arr[11] = a12; - arr[12] = a13; - arr[13] = a14; - arr[14] = a15; - arr[15] = a16; - arr[16] = a17; - arr[17] = a18; - arr[18] = a19; - arr[19] = a20; - arr[20] = a21; - arr[21] = a22; - arr[22] = a23; - arr[23] = a24; - arr[24] = a25; - arr[25] = a26; - arr[26] = a27; - arr[27] = a28; - arr[28] = a29; - arr[29] = a30; + /* arr[0] = a1; */ + /* arr[1] = a2; */ + /* arr[2] = a3; */ + /* arr[3] = a4; */ + /* arr[4] = a5; */ + /* arr[5] = a6; */ + /* arr[6] = a7; */ + /* arr[7] = a8; */ + /* arr[8] = a9; */ + /* arr[9] = a10; */ + /* arr[10] = a11; */ + /* arr[11] = a12; */ + /* arr[12] = a13; */ + /* arr[13] = a14; */ + /* arr[14] = a15; */ + /* arr[15] = a16; */ + /* arr[16] = a17; */ + /* arr[17] = a18; */ + /* arr[18] = a19; */ + /* arr[19] = a20; */ + /* arr[20] = a21; */ + /* arr[21] = a22; */ + /* arr[22] = a23; */ + /* arr[23] = a24; */ + /* arr[24] = a25; */ + /* arr[25] = a26; */ + /* arr[26] = a27; */ + /* arr[27] = a28; */ + /* arr[28] = a29; */ + /* arr[29] = a30; */ return c + b1+ b2+ -- cgit From 87c82b6fcf2bf825a8c60fc6a95498aac9f826d4 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 15 Jun 2021 14:44:56 +0200 Subject: kinda fixed Spills are definitely reduced, but lots of arbitrary in there: See previous commit: need to determine what to do if pressure is too high but no schedulable instruction can reduce it. For now, advance time for at most 5 cycles, if still no suitable instruction, go back to CSP --- scheduling/InstructionScheduler.ml | 62 ++++++++++++++++++++++++-------------- scheduling/RTLpathScheduleraux.ml | 20 ++++++++++-- 2 files changed, 57 insertions(+), 25 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 19bfaeb0..72222022 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -478,6 +478,9 @@ let reg_pres_scheduler (problem : problem) : solution option = ready.(!current_time) <- InstrSet.empty)); incr current_time in + + (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *) + let cnt = ref 0 in let attempt_scheduling ready usages = let result = ref (-1) in @@ -488,8 +491,11 @@ let reg_pres_scheduler (problem : problem) : solution option = print_string " "; print_int avlregs; print_newline (); + print_string "live regs: "; + print_int (Hashtbl.length live_regs); + print_newline (); flush stdout; - if avlregs <= regs_thresholds.(i) + if !cnt < 5 && avlregs <= regs_thresholds.(i) then ( let maybe = InstrSet.sched_CSR i ready usages in print_string "maybe\n"; @@ -506,8 +512,9 @@ let reg_pres_scheduler (problem : problem) : solution option = delta >= 0 then (vector_subtract usages.(maybe) current_resources; - result := maybe; - raise Exit))) available_regs; + result := maybe) + else incr cnt; + raise Exit)) available_regs; InstrSet.iter (fun i -> if vector_less_equal usages.(i) current_resources then ( @@ -516,25 +523,7 @@ let reg_pres_scheduler (problem : problem) : solution option = raise Exit)) ready; -1 with Exit -> - if !result <> -1 then - (List.iter (fun (r,b) -> - if b then - match Hashtbl.find_opt counts r with - | None -> assert false - | Some (t, n) -> - Hashtbl.remove counts r; - (if n = 1 then - available_regs.(t) - <- available_regs.(t) + 1) - else - let t = class_r r in - match Hashtbl.find_opt live_regs r with - | None -> (Hashtbl.add live_regs r t; - available_regs.(t) - <- available_regs.(t) - 1) - | Some i -> () - ) mentions.(!result)); - !result in + !result in while !current_time < max_time do @@ -545,6 +534,35 @@ let reg_pres_scheduler (problem : problem) : solution option = problem.instruction_usages with | -1 -> advance_time() | i -> (assert(times.(i) < 0); + (print_string "INSTR ISSUED: "; + print_int i; + print_newline (); + flush stdout; + cnt := 0; + List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (print_string "yaaaaaaaaaaaas "; + print_int (Camlcoq.P.to_int r); + print_newline (); + Hashtbl.remove live_regs r; + available_regs.(t) + <- available_regs.(t) + 1)) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (print_string "noooooooooo "; + print_int (Camlcoq.P.to_int r); + print_newline (); + Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(i)); times.(i) <- !current_time; ready.(!current_time) <- InstrSet.remove i (ready.(!current_time)); diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 9c3ff689..8df3edbc 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -72,7 +72,7 @@ let get_superblocks code entry pm typing = lsb end -(** the useful one. Returns a hashtable with bindings of form +(** the useful one. Returns a hashtable with bindings of shape ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), ** [t] is its class (according to [typing]), and [n] the number of ** times it's referenced as an argument in instructions of [seqa] ; @@ -85,7 +85,7 @@ let reference_counting (seqa : (instruction * Regset.t) array) let retl = Hashtbl.create 42 in let retr = Array.make (Array.length seqa) [] in (* retr.(i) : (r, b) -> (r', b') -> ... - where b = true if seen as arg, false if seen as dest + * where b = true if seen as arg, false if seen as dest *) List.iter (fun reg -> Hashtbl.add retl @@ -118,7 +118,7 @@ let reference_counting (seqa : (instruction * Regset.t) array) (dest,false)::(reg, true)::(map_true args) | _ -> (dest,false)::(map_true args)) - | Itailcall(_,fn,args) -> + | Itailcall(_,fn,args) -> List.iter (add_reg) args; retr.(i) <- (match fn with | Datatypes.Coq_inl reg -> @@ -146,6 +146,20 @@ let reference_counting (seqa : (instruction * Regset.t) array) retr.(i) <- [reg, true] | _ -> () ) seqa; + print_string "mentions\n"; + Array.iteri (fun i l -> + print_int i; + print_string ": ["; + List.iter (fun (r, b) -> + print_int (Camlcoq.P.to_int r); + print_string ":"; + print_string (if b then "a:" else "d"); + if b then print_int (snd (Hashtbl.find retl r)); + print_string ", " + ) l; + print_string "]\n"; + flush stdout; + ) retr; retl, retr -- cgit From d68d71ae380601759927e04a773b9ed0a95ba247 Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Tue, 15 Jun 2021 15:17:32 +0200 Subject: Factorise RTL Tunneling pass in compiler_expand --- tools/compiler_expand.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml index 265d0bcf..e45f64fa 100644 --- a/tools/compiler_expand.ml +++ b/tools/compiler_expand.ml @@ -13,6 +13,8 @@ type print_result = Noprint | Print of string;; type when_triggered = Always | Option of string;; type needs_require = Require | NoRequire;; +let rtl_tunneling = PARTIAL, Always, Require, (Some "RTL Branch Tunneling"), "RTLTunneling" + (* FIXME - The gestion of NoRequire is a bit ugly right now. *) let rtl_passes = [| @@ -47,7 +49,7 @@ PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM" PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Redundancy elimination for LICM"), "Deadcode"; TOTAL, (Option "all_loads_nontrap"), Require, None, "Allnontrap"; PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob"; -PARTIAL, Always, Require, (Some "RTL Branch Tunneling"), "RTLTunneling" +rtl_tunneling; |];; let post_rtl_passes = -- cgit From 35b163f1c0bf1a4d09bfd0faa9ffed4b909fc8ea Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Tue, 15 Jun 2021 16:55:31 +0200 Subject: Move rtl_tunneling to a more interesting place --- tools/compiler_expand.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml index e45f64fa..067f0e4b 100644 --- a/tools/compiler_expand.ml +++ b/tools/compiler_expand.ml @@ -40,6 +40,7 @@ PARTIAL, (Option "optim_CSE3"), Require, (Some "CSE3"), "CSE3"; TOTAL, (Option "optim_CSE3"), Require, (Some "Kill useless moves after CSE3"), "KillUselessMoves"; TOTAL, (Option "optim_forward_moves"), Require, (Some "Forwarding moves"), "ForwardMoves"; PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode"; +rtl_tunneling; TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber"; PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate"; TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber"; @@ -49,7 +50,6 @@ PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM" PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Redundancy elimination for LICM"), "Deadcode"; TOTAL, (Option "all_loads_nontrap"), Require, None, "Allnontrap"; PARTIAL, Always, Require, (Some "Unused globals"), "Unusedglob"; -rtl_tunneling; |];; let post_rtl_passes = -- cgit From 3408be46000279a5282658375053f9a83a88cf58 Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Wed, 16 Jun 2021 16:23:52 +0200 Subject: Add Tunneling factorisation module --- backend/Tunnelinglibs.ml | 237 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 backend/Tunnelinglibs.ml diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml new file mode 100644 index 00000000..ce384dac --- /dev/null +++ b/backend/Tunnelinglibs.ml @@ -0,0 +1,237 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* TODO: Proper author information *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* + +This file implements the core functions of the tunneling passes, for both RTL +and LTL, by using a simplified CFG as a transparent interface + +See [Tunneling.v] and [RTLTunneling.v] + +*) + +open Coqlib +open Maps +open Camlcoq + +(* type of labels in the cfg *) +type label = int * P.t + +(* instructions under analyzis *) +type simple_inst = (* a simplified view of instructions *) + BRANCH of node +| COND of node * node +| OTHER +and node = { + lab: label; + mutable inst: simple_inst; + mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *) + mutable dist: int; + mutable tag: int + } + +type cfg_node = (int, node) Hashtbl.t + +type lnode = P.t + +(* type of the (simplified) CFG *) +type cfg = { + nodes: cfg_node; + mutable rems: node list; (* remaining conditions that may become lbranch or not *) + mutable num_rems: int; + mutable iter_num: int (* number of iterations in elimination of conditions *) + } + +module Tunneling = functor + (LANG: sig + type code_unit (* the type of a node of the code cfg (an instruction or a bblock *) + type funct + end) + (OPT: sig + val langname: string + val limit_tunneling: int option (* for debugging: [Some x] limit the number of iterations *) + val debug_flag: bool ref + val final_dump: bool (* set to true to have a more verbose debugging *) + end) + (FUNS: sig + type lang + type code_type + + (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *) + val build_simplified_cfg: cfg -> cfg_node list -> lnode -> + LANG.code_unit -> cfg_node list + + val print_code_unit: cfg -> bool -> (int * LANG.code_unit) -> bool + + val fn_code: LANG.funct -> LANG.code_unit PTree.t + val fn_entrypoint: LANG.funct -> lnode + + val check_code_unit: LANG.code_unit PTree.t -> lnode -> LANG.code_unit -> unit + end) +-> struct + +let debug fmt = + if !OPT.debug_flag then Printf.eprintf fmt + else Printf.ifprintf stderr fmt + +exception BugOnPC of int + +let lab_i (n: node): int = fst n.lab +let lab_p (n: node): P.t = snd n.lab + +let rec target c n = (* inspired from the "find" of union-find algorithm *) + match n.inst with + | COND(s1,s2) -> + if n.link != n + then update c n + else if n.tag < c.iter_num then ( + (* we try to change the condition ... *) + n.tag <- c.iter_num; (* ... but at most once by iteration *) + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then (n.link <- ts1; ts1) else n + ) else n + | _ -> + if n.link != n + then update c n + else n +and update c n = + let t = target c n.link in + n.link <- t; t + +let get_node c p = + let li = P.to_int p in + try + Hashtbl.find c.nodes li + with + Not_found -> + let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 } in + Hashtbl.add c.nodes li n; + n + +let set_branch c p s = + let li = P.to_int p in + try + let n = Hashtbl.find c.nodes li in + n.inst <- BRANCH s; + n.link <- target c s + with + Not_found -> + let n = { lab = (li,p); inst = BRANCH s; link = target c s; dist = 0; tag = 0 } in + Hashtbl.add c.nodes li n + + + +(* try to change a condition into a branch +[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond +*) +let try_change_cond c acc pc = + match pc.inst with + | COND(s1,s2) -> + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then ( + pc.link <- ts1; + c.num_rems <- c.num_rems - 1; + acc + ) else + pc::acc + | _ -> raise (BugOnPC (lab_i pc)) (* COND expected *) + +(* repeat [try_change_cond] until no condition is changed into a branch *) +let rec repeat_change_cond c = + c.iter_num <- c.iter_num + 1; + debug "++ %sTunneling.branch_target %d: remaining number of conds to consider + = %d\n" OPT.langname (c.iter_num) (c.num_rems); + let old = c.num_rems in + c.rems <- List.fold_left (try_change_cond c) [] c.rems; + let curr = c.num_rems in + let continue = + match OPT.limit_tunneling with + | Some n -> curr < old && c.iter_num < n + | None -> curr < old + in + if continue + then repeat_change_cond c + + +(* compute the final distance of each nop nodes to its target *) +let undef_dist = -1 +let self_dist = undef_dist-1 +let rec dist n = + if n.dist = undef_dist + then ( + n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *) + n.dist <- + (match n.inst with + | OTHER -> 0 + | BRANCH p -> 1 + dist p + | COND (p1,p2) -> 1 + (max (dist p1) (dist p2))); + n.dist + ) else if n.dist=self_dist then raise (BugOnPC (lab_i n)) + else n.dist + +let final_export f c = + let count = ref 0 in + let filter_nops_init_dist _ n acc = + let tn = target c n in + if tn == n + then ( + n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *) + acc + ) else ( + n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *) + count := !count+1; + n::acc + ) + in + let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in + let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in + debug "* %sTunneling.branch_target: final number of eliminated nops = %d\n" + OPT.langname !count; + res + +(*********************************************) +(*** START: printing and debugging functions *) + +let string_of_labeli nodes ipc = + try + let pc = Hashtbl.find nodes ipc in + if pc.link == pc + then Printf.sprintf "(Target@%d)" (dist pc) + else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc) + with + Not_found -> "" + +let print_cfg (f: LANG.funct) c = + let a = Array.of_list (PTree.fold (fun acc pc cu -> (P.to_int pc,cu)::acc) (FUNS.fn_code f) []) in + Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a; + let ep = P.to_int (FUNS.fn_entrypoint f) in + debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep); + let println = Array.fold_left (FUNS.print_code_unit c) false a in + (if println then debug "\n");debug "remaining cond:"; + List.iter (fun n -> debug "%d " (lab_i n)) c.rems; + debug "\n" + +(*************************************************************) +(* Copy-paste of the extracted code of the verifier *) +(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + +(** val check_code : coq_UF -> code -> unit res **) + +let check_code td c = + PTree.fold (fun _ pc cu -> FUNS.check_code_unit td pc cu) c (()) + +(*** END: copy-paste & debugging functions *******) + +end -- cgit From 67bc93934e939e57c80ade4c37aaba1535222fa2 Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Wed, 16 Jun 2021 18:19:38 +0200 Subject: Use Tunnelinglibs in RTLTunnelingaux --- backend/RTLTunnelingaux.ml | 351 +++++++++++++-------------------------------- backend/Tunnelinglibs.ml | 37 +++-- 2 files changed, 123 insertions(+), 265 deletions(-) diff --git a/backend/RTLTunnelingaux.ml b/backend/RTLTunnelingaux.ml index a30b43cf..5fe327d4 100644 --- a/backend/RTLTunnelingaux.ml +++ b/backend/RTLTunnelingaux.ml @@ -24,261 +24,106 @@ open Coqlib open RTL open Maps open Camlcoq - -let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *) -let debug_flag = ref true -let final_dump = false (* set to true to have a more verbose debugging *) - -let debug fmt = - if !debug_flag then Printf.eprintf fmt - else Printf.ifprintf stderr fmt - -exception BugOnPC of int +open Tunnelinglibs let nopcounter = ref 0 -(* type of labels in the cfg *) -type label = int * P.t - -(* instructions under analyzis *) -type simple_inst = (* a simplified view of RTL instructions *) - INOP of node -| ICOND of node * node -| OTHER -and node = { - lab : label; - mutable inst: simple_inst; - mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *) - mutable dist: int; - mutable tag: int - } - -(* type of the (simplified) CFG *) -type cfg = { - nodes: (int, node) Hashtbl.t; - mutable rems: node list; (* remaining conditions that may become lbranch or not *) - mutable num_rems: int; - mutable iter_num: int (* number of iterations in elimination of conditions *) - } - -let lab_i (n: node): int = fst n.lab -let lab_p (n: node): P.t = snd n.lab - -let rec target c n = (* inspired from the "find" of union-find algorithm *) - match n.inst with - | ICOND(s1,s2) -> - if n.link != n - then update c n - else if n.tag < c.iter_num then ( - (* we try to change the condition ... *) - n.tag <- c.iter_num; (* ... but at most once by iteration *) - let ts1 = target c s1 in - let ts2 = target c s2 in - if ts1 == ts2 then (n.link <- ts1; ts1) else n - ) else n - | _ -> - if n.link != n - then update c n - else n -and update c n = - let t = target c n.link in - n.link <- t; t - -let get_node c p = - let li = P.to_int p in - try - Hashtbl.find c.nodes li - with - Not_found -> - let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 } in - Hashtbl.add c.nodes li n; - n - -let set_branch c p s = - let li = P.to_int p in - try - let n = Hashtbl.find c.nodes li in - n.inst <- INOP s; - n.link <- target c s - with - Not_found -> - let n = { lab = (li,p); inst = INOP s; link = target c s; dist = 0; tag = 0 } in - Hashtbl.add c.nodes li n - - -(* build [c.nodes] and accumulate conditions in [acc] *) -let build_simplified_cfg c acc pc i = - match i with - | Inop s -> - let ns = get_node c s in - set_branch c pc ns; - incr nopcounter; - acc - | Icond (_, _, s1, s2, _) -> - c.num_rems <- c.num_rems + 1; - let ns1 = get_node c s1 in - let ns2 = get_node c s2 in - let npc = get_node c pc in - npc.inst <- ICOND(ns1, ns2); - npc::acc - | _ -> acc - -(* try to change a condition into a branch -[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond -*) -let try_change_cond c acc pc = - match pc.inst with - | ICOND(s1,s2) -> - let ts1 = target c s1 in - let ts2 = target c s2 in - if ts1 == ts2 then ( - pc.link <- ts1; - c.num_rems <- c.num_rems - 1; +module LANG = struct + type code_unit = RTL.instruction + type funct = RTL.coq_function +end + +module OPT = struct + let langname = "RTL" + let limit_tunneling = None + let debug_flag = ref false + let final_dump = false +end + +module rec T: sig + val get_node: cfg -> positive -> node + val set_branch: cfg -> positive -> node -> unit + val debug: ('a, out_channel, unit) format -> 'a + val string_of_labeli: ('a, node) Hashtbl.t -> 'a -> string + exception BugOnPC of int + val branch_target: RTL.coq_function -> (positive * Z.t) PTree.t + +end = Tunnelinglibs.Tunneling(LANG)(OPT)(FUNS) + +and FUNS: sig + val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list + + val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool + + val fn_code: LANG.funct -> LANG.code_unit PTree.t + val fn_entrypoint: LANG.funct -> positive + + val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit +end = struct + let build_simplified_cfg c acc pc i = + match i with + | Inop s -> + let ns = T.get_node c s in + T.set_branch c pc ns; + incr nopcounter; acc - ) else - pc::acc - | _ -> raise (BugOnPC (lab_i pc)) (* ICOND expected *) - -(* repeat [try_change_cond] until no condition is changed into a branch *) -let rec repeat_change_cond c = - c.iter_num <- c.iter_num + 1; - debug "++ RTLTunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems); - let old = c.num_rems in - c.rems <- List.fold_left (try_change_cond c) [] c.rems; - let curr = c.num_rems in - let continue = - match limit_tunneling with - | Some n -> curr < old && c.iter_num < n - | None -> curr < old - in - if continue - then repeat_change_cond c - - -(* compute the final distance of each nop nodes to its target *) -let undef_dist = -1 -let self_dist = undef_dist-1 -let rec dist n = - if n.dist = undef_dist - then ( - n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *) - n.dist <- - (match n.inst with - | OTHER -> 0 - | INOP p -> 1 + dist p - | ICOND (p1,p2) -> 1 + (max (dist p1) (dist p2))); - n.dist - ) else if n.dist=self_dist then raise (BugOnPC (lab_i n)) - else n.dist - -let final_export f c = - let count = ref 0 in - let filter_nops_init_dist _ n acc = - let tn = target c n in - if tn == n - then ( - n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *) - acc - ) else ( - n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *) - count := !count+1; - n::acc - ) - in - let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in - let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in - debug "* RTLTunneling.branch_target: [stats] initial number of nops = %d\n" !nopcounter; - debug "* RTLTunneling.branch_target: [stats] final number of eliminated nops = %d\n" !count; - res - -(*********************************************) -(*** START: printing and debugging functions *) - -let string_of_labeli nodes ipc = - try - let pc = Hashtbl.find nodes ipc in - if pc.link == pc - then Printf.sprintf "(Target@%d)" (dist pc) - else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc) - with - Not_found -> "" - -let print_instr c println (pc, i) = - match i with - | Inop s -> (if println then debug "\n"); debug "%d:Inop %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false - | Icond (_, _, s1, s2, _) -> (if println then debug "\n"); debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false - | _ -> debug "%d " pc; true - - -let print_cfg f c = - let a = Array.of_list (PTree.fold (fun acc pc i -> (P.to_int pc,i)::acc) f.fn_code []) in - Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a; - let ep = P.to_int f.fn_entrypoint in - debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep); - let println = Array.fold_left (print_instr c) false a in - (if println then debug "\n");debug "remaining cond:"; - List.iter (fun n -> debug "%d " (lab_i n)) c.rems; - debug "\n" - -(*************************************************************) -(* Copy-paste of the extracted code of the verifier *) -(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) - -let get td pc = - match PTree.get pc td with - | Some p -> let (t0, d) = p in (t0, d) - | None -> (pc, Z.of_uint 0) - -let check_instr td pc i = - match PTree.get pc td with - | Some p -> - let (tpc, dpc) = p in - let dpc0 = dpc in begin - match i with - | Inop s -> - let (ts, ds) = get td s in - if peq tpc ts - then if zlt ds dpc0 - then () - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - | Icond (_, _, s1, s2, _) -> - let (ts1, ds1) = get td s1 in - let (ts2, ds2) = get td s2 in - if peq tpc ts1 - then if peq tpc ts2 - then if zlt ds1 dpc0 - then if zlt ds2 dpc0 - then () - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - | _ -> - raise (BugOnPC (P.to_int pc)) end - | None -> () - -(** val check_code : coq_UF -> code -> unit res **) - -let check_code td c = - PTree.fold (fun _ pc i -> check_instr td pc i) c (()) - -(*** END: copy-paste & debugging functions *******) + | Icond (_, _, s1, s2, _) -> + c.num_rems <- c.num_rems + 1; + let ns1 = T.get_node c s1 in + let ns2 = T.get_node c s2 in + let npc = T.get_node c pc in + npc.inst <- COND(ns1, ns2); + npc::acc + | _ -> acc + + let print_code_unit c println (pc, i) = + match i with + | Inop s -> (if println then T.debug "\n"); T.debug "%d:Inop %d %s\n" pc (P.to_int s) (T.string_of_labeli c.nodes pc); false + | Icond (_, _, s1, s2, _) -> (if println then T.debug "\n"); T.debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (T.string_of_labeli c.nodes pc); false + | _ -> T.debug "%d " pc; true + + let fn_code f = f.fn_code + let fn_entrypoint f = f.fn_entrypoint + + (*************************************************************) + (* Copy-paste of the extracted code of the verifier *) + (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + + let get td pc = + match PTree.get pc td with + | Some p -> let (t0, d) = p in (t0, d) + | None -> (pc, Z.of_uint 0) + + let check_code_unit td pc i = + match PTree.get pc td with + | Some p -> + let (tpc, dpc) = p in + let dpc0 = dpc in begin + match i with + | Inop s -> + let (ts, ds) = get td s in + if peq tpc ts + then if zlt ds dpc0 + then () + else raise (T.BugOnPC (P.to_int pc)) + else raise (T.BugOnPC (P.to_int pc)) + | Icond (_, _, s1, s2, _) -> + let (ts1, ds1) = get td s1 in + let (ts2, ds2) = get td s2 in + if peq tpc ts1 + then if peq tpc ts2 + then if zlt ds1 dpc0 + then if zlt ds2 dpc0 + then () + else raise (T.BugOnPC (P.to_int pc)) + else raise (T.BugOnPC (P.to_int pc)) + else raise (T.BugOnPC (P.to_int pc)) + else raise (T.BugOnPC (P.to_int pc)) + | _ -> + raise (T.BugOnPC (P.to_int pc)) end + | None -> () + +end + +let branch_target = T.branch_target -let branch_target f = - debug "* RTLTunneling.branch_target: starting on a new function\n"; - if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n"; - let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in - c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code []; - repeat_change_cond c; - let res = final_export f c in - if !debug_flag then ( - try - check_code res f.fn_code; - if final_dump then print_cfg f c; - with e -> ( - print_cfg f c; - check_code res f.fn_code - ) - ); - res diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml index ce384dac..e1e61d68 100644 --- a/backend/Tunnelinglibs.ml +++ b/backend/Tunnelinglibs.ml @@ -20,7 +20,6 @@ See [Tunneling.v] and [RTLTunneling.v] *) -open Coqlib open Maps open Camlcoq @@ -42,7 +41,8 @@ and node = { type cfg_node = (int, node) Hashtbl.t -type lnode = P.t +type positive = P.t +type integer = Z.t (* type of the (simplified) CFG *) type cfg = { @@ -64,19 +64,15 @@ module Tunneling = functor val final_dump: bool (* set to true to have a more verbose debugging *) end) (FUNS: sig - type lang - type code_type - (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *) - val build_simplified_cfg: cfg -> cfg_node list -> lnode -> - LANG.code_unit -> cfg_node list + val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list - val print_code_unit: cfg -> bool -> (int * LANG.code_unit) -> bool + val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool val fn_code: LANG.funct -> LANG.code_unit PTree.t - val fn_entrypoint: LANG.funct -> lnode + val fn_entrypoint: LANG.funct -> positive - val check_code_unit: LANG.code_unit PTree.t -> lnode -> LANG.code_unit -> unit + val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit end) -> struct @@ -151,8 +147,7 @@ let try_change_cond c acc pc = (* repeat [try_change_cond] until no condition is changed into a branch *) let rec repeat_change_cond c = c.iter_num <- c.iter_num + 1; - debug "++ %sTunneling.branch_target %d: remaining number of conds to consider - = %d\n" OPT.langname (c.iter_num) (c.num_rems); + debug "++ %sTunneling.branch_target %d: remaining number of conds to consider = %d\n" OPT.langname (c.iter_num) (c.num_rems); let old = c.num_rems in c.rems <- List.fold_left (try_change_cond c) [] c.rems; let curr = c.num_rems in @@ -234,4 +229,22 @@ let check_code td c = (*** END: copy-paste & debugging functions *******) +let branch_target f = + debug "* %sTunneling.branch_target: starting on a new function\n" OPT.langname; + if OPT.limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n"; + let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in + c.rems <- PTree.fold (FUNS.build_simplified_cfg c) (FUNS.fn_code f) []; + repeat_change_cond c; + let res = final_export f c in + if !OPT.debug_flag then ( + try + check_code res (FUNS.fn_code f); + if OPT.final_dump then print_cfg f c; + with e -> ( + print_cfg f c; + check_code res (FUNS.fn_code f) + ) + ); + res + end -- cgit From 21278bd87e89210bcc287116f6e35fc1b52d0df2 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Wed, 16 Jun 2021 20:27:31 +0200 Subject: Now working, tests show a decrease in spillage Should still find a proper way to treat the case mentioned in earlier commits --- scheduling/InstructionScheduler.ml | 92 +++++++++++++++++++------------------- scheduling/RTLpathScheduleraux.ml | 28 ++++++------ 2 files changed, 61 insertions(+), 59 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 72222022..df8a4e4e 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -419,11 +419,11 @@ let reg_pres_scheduler (problem : problem) : solution option = let compare_regs i x y = let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in - print_int y; - print_string " "; - print_int pyi; - print_newline (); - flush stdout; + (* print_int y; + * print_string " "; + * print_int pyi; + * print_newline (); + * flush stdout; *) let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in match pyi - pxi with | 0 -> (match priorities.(y) - priorities.(x) with @@ -433,9 +433,9 @@ let reg_pres_scheduler (problem : problem) : solution option = (** t is the register class *) let sched_CSR t ready usages = - print_string "looking for max delta"; - print_newline (); - flush stdout; + (* print_string "looking for max delta"; + * print_newline (); + * flush stdout; *) let result = ref (-1) in iter (fun i -> if vector_less_equal usages.(i) current_resources @@ -445,7 +445,7 @@ let reg_pres_scheduler (problem : problem) : solution option = end in - let max_time = bound_max_time problem in + let max_time = bound_max_time problem + 5*nr_instructions in let ready = Array.make max_time InstrSet.empty in Array.iteri (fun i preds -> @@ -486,34 +486,34 @@ let reg_pres_scheduler (problem : problem) : solution option = let result = ref (-1) in try Array.iteri (fun i avlregs -> - print_string "avlregs: "; - print_int i; - print_string " "; - print_int avlregs; - print_newline (); - print_string "live regs: "; - print_int (Hashtbl.length live_regs); - print_newline (); - flush stdout; + (* print_string "avlregs: "; + * print_int i; + * print_string " "; + * print_int avlregs; + * print_newline (); + * print_string "live regs: "; + * print_int (Hashtbl.length live_regs); + * print_newline (); + * flush stdout; *) if !cnt < 5 && avlregs <= regs_thresholds.(i) then ( let maybe = InstrSet.sched_CSR i ready usages in - print_string "maybe\n"; - print_int maybe; - print_newline (); - flush stdout; - if maybe > 0 && - let delta = - List.fold_left (fold_delta i) 0 mentions.(maybe) in - print_string "delta "; - print_int delta; - print_newline (); - flush stdout; - delta - >= 0 then - (vector_subtract usages.(maybe) current_resources; - result := maybe) - else incr cnt; + (* print_string "maybe\n"; + * print_int maybe; + * print_newline (); + * flush stdout; *) + (if maybe > 0 && + let delta = + List.fold_left (fold_delta i) 0 mentions.(maybe) in + (* print_string "delta "; + * print_int delta; + * print_newline (); + * flush stdout; *) + delta > 0 + then + (vector_subtract usages.(maybe) current_resources; + result := maybe) + else incr cnt); raise Exit)) available_regs; InstrSet.iter (fun i -> if vector_less_equal usages.(i) current_resources @@ -534,10 +534,10 @@ let reg_pres_scheduler (problem : problem) : solution option = problem.instruction_usages with | -1 -> advance_time() | i -> (assert(times.(i) < 0); - (print_string "INSTR ISSUED: "; - print_int i; - print_newline (); - flush stdout; + ((* print_string "INSTR ISSUED: "; + * print_int i; + * print_newline (); + * flush stdout; *) cnt := 0; List.iter (fun (r,b) -> if b then @@ -546,18 +546,18 @@ let reg_pres_scheduler (problem : problem) : solution option = | Some (t, n) -> Hashtbl.remove counts r; if n = 1 then - (print_string "yaaaaaaaaaaaas "; - print_int (Camlcoq.P.to_int r); - print_newline (); + ((* print_string "yaaaaaaaaaaaas "; + * print_int (Camlcoq.P.to_int r); + * print_newline (); *) Hashtbl.remove live_regs r; available_regs.(t) <- available_regs.(t) + 1)) else let t = class_r r in match Hashtbl.find_opt live_regs r with - | None -> (print_string "noooooooooo "; - print_int (Camlcoq.P.to_int r); - print_newline (); + | None -> ((* print_string "noooooooooo "; + * print_int (Camlcoq.P.to_int r); + * print_newline (); *) Hashtbl.add live_regs r t; available_regs.(t) <- available_regs.(t) - 1) @@ -571,8 +571,10 @@ let reg_pres_scheduler (problem : problem) : solution option = match earliest_time instr_to with | -1 -> () | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) ready.(to_time) - <- InstrSet.add instr_to ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) ) successors.(i); successors.(i) <- [] ) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 8df3edbc..f3f09954 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -146,20 +146,20 @@ let reference_counting (seqa : (instruction * Regset.t) array) retr.(i) <- [reg, true] | _ -> () ) seqa; - print_string "mentions\n"; - Array.iteri (fun i l -> - print_int i; - print_string ": ["; - List.iter (fun (r, b) -> - print_int (Camlcoq.P.to_int r); - print_string ":"; - print_string (if b then "a:" else "d"); - if b then print_int (snd (Hashtbl.find retl r)); - print_string ", " - ) l; - print_string "]\n"; - flush stdout; - ) retr; + (* print_string "mentions\n"; + * Array.iteri (fun i l -> + * print_int i; + * print_string ": ["; + * List.iter (fun (r, b) -> + * print_int (Camlcoq.P.to_int r); + * print_string ":"; + * print_string (if b then "a:" else "d"); + * if b then print_int (snd (Hashtbl.find retl r)); + * print_string ", " + * ) l; + * print_string "]\n"; + * flush stdout; + * ) retr; *) retl, retr -- cgit From 4413c27d6c6a3d69df34955d9d453c38b32174c7 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 17 Jun 2021 15:38:13 +0200 Subject: Add option to set thresold and support for riscv --- driver/Clflags.ml | 1 + driver/Driver.ml | 2 ++ riscV/Machregsaux.ml | 2 ++ scheduling/InstructionScheduler.ml | 9 +++++++-- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index fa17c2d9..1f31bd3e 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -115,4 +115,5 @@ let option_inline_auto_threshold = ref 0 let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 +let option_regpres_threshold = ref 5 let main_function_name = ref "main" diff --git a/driver/Driver.ml b/driver/Driver.ml index 5a8c7f2c..fa187f26 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -211,6 +211,7 @@ Processing options: -fprepass Perform prepass scheduling (only on some architectures) [on] -fprepass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) + -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =ilp: ILP, =greedy: just packing bundles) @@ -342,6 +343,7 @@ let cmdline_actions = Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s); Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n); Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n); + Exact "-regpres-threshold", Integer (fun n -> option_regpres_threshold := n); Exact "-fsmall-data", Integer(fun n -> option_small_data := n); Exact "-fsmall-const", Integer(fun n -> option_small_const := n); Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); diff --git a/riscV/Machregsaux.ml b/riscV/Machregsaux.ml index 840943e7..e3e47946 100644 --- a/riscV/Machregsaux.ml +++ b/riscV/Machregsaux.ml @@ -18,3 +18,5 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 26; 32|] diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index df8a4e4e..5b4c87f4 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -360,8 +360,13 @@ let reg_pres_scheduler (problem : problem) : solution option = let nr_types_regs = Array.length available_regs in - let regs_thresholds = Array.init nr_types_regs - (fun i -> 5) in + let thres = Array.fold_left (min) + (max !(Clflags.option_regpres_threshold) 0) + Machregsaux.nr_regs + in + + + let regs_thresholds = Array.make nr_types_regs thres in (* placeholder value *) let class_r r = -- cgit From 5a542d158d3bde832e38b65ad5347299fbe7ee32 Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Thu, 17 Jun 2021 15:58:29 +0200 Subject: Simplify tunneling factorisation The recursive module definitions required unnecessarily long expicit signatures for little added legibility. --- backend/RTLTunnelingaux.ml | 65 ++++------- backend/Tunnelinglibs.ml | 276 ++++++++++++++++++++++++--------------------- 2 files changed, 173 insertions(+), 168 deletions(-) diff --git a/backend/RTLTunnelingaux.ml b/backend/RTLTunnelingaux.ml index 5fe327d4..9333e357 100644 --- a/backend/RTLTunnelingaux.ml +++ b/backend/RTLTunnelingaux.ml @@ -26,8 +26,6 @@ open Maps open Camlcoq open Tunnelinglibs -let nopcounter = ref 0 - module LANG = struct type code_unit = RTL.instruction type funct = RTL.coq_function @@ -40,60 +38,44 @@ module OPT = struct let final_dump = false end -module rec T: sig - val get_node: cfg -> positive -> node - val set_branch: cfg -> positive -> node -> unit - val debug: ('a, out_channel, unit) format -> 'a - val string_of_labeli: ('a, node) Hashtbl.t -> 'a -> string - exception BugOnPC of int - val branch_target: RTL.coq_function -> (positive * Z.t) PTree.t - -end = Tunnelinglibs.Tunneling(LANG)(OPT)(FUNS) - -and FUNS: sig - val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list - - val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool +module Partial = Tunnelinglibs.Tunneling(LANG)(OPT) - val fn_code: LANG.funct -> LANG.code_unit PTree.t - val fn_entrypoint: LANG.funct -> positive - - val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit -end = struct +module FUNS = struct let build_simplified_cfg c acc pc i = match i with | Inop s -> - let ns = T.get_node c s in - T.set_branch c pc ns; + let ns = get_node c s in + set_branch c pc ns; incr nopcounter; acc | Icond (_, _, s1, s2, _) -> c.num_rems <- c.num_rems + 1; - let ns1 = T.get_node c s1 in - let ns2 = T.get_node c s2 in - let npc = T.get_node c pc in + let ns1 = get_node c s1 in + let ns2 = get_node c s2 in + let npc = get_node c pc in npc.inst <- COND(ns1, ns2); npc::acc | _ -> acc let print_code_unit c println (pc, i) = match i with - | Inop s -> (if println then T.debug "\n"); T.debug "%d:Inop %d %s\n" pc (P.to_int s) (T.string_of_labeli c.nodes pc); false - | Icond (_, _, s1, s2, _) -> (if println then T.debug "\n"); T.debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (T.string_of_labeli c.nodes pc); false - | _ -> T.debug "%d " pc; true + | Inop s -> (if println then Partial.debug "\n"); + Partial.debug "%d:Inop %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); + false + | Icond (_, _, s1, s2, _) -> (if println then Partial.debug "\n"); + Partial.debug "%d:Icond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); + false + | _ -> Partial.debug "%d " pc; + true let fn_code f = f.fn_code let fn_entrypoint f = f.fn_entrypoint + (*************************************************************) (* Copy-paste of the extracted code of the verifier *) (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) - let get td pc = - match PTree.get pc td with - | Some p -> let (t0, d) = p in (t0, d) - | None -> (pc, Z.of_uint 0) - let check_code_unit td pc i = match PTree.get pc td with | Some p -> @@ -105,8 +87,8 @@ end = struct if peq tpc ts then if zlt ds dpc0 then () - else raise (T.BugOnPC (P.to_int pc)) - else raise (T.BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) | Icond (_, _, s1, s2, _) -> let (ts1, ds1) = get td s1 in let (ts2, ds2) = get td s2 in @@ -115,15 +97,16 @@ end = struct then if zlt ds1 dpc0 then if zlt ds2 dpc0 then () - else raise (T.BugOnPC (P.to_int pc)) - else raise (T.BugOnPC (P.to_int pc)) - else raise (T.BugOnPC (P.to_int pc)) - else raise (T.BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) | _ -> - raise (T.BugOnPC (P.to_int pc)) end + raise (BugOnPC (P.to_int pc)) end | None -> () end +module T = Partial.T(FUNS) let branch_target = T.branch_target diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml index e1e61d68..1bb35f7a 100644 --- a/backend/Tunnelinglibs.ml +++ b/backend/Tunnelinglibs.ml @@ -39,51 +39,24 @@ and node = { mutable tag: int } -type cfg_node = (int, node) Hashtbl.t - type positive = P.t type integer = Z.t (* type of the (simplified) CFG *) type cfg = { - nodes: cfg_node; + nodes: (int, node) Hashtbl.t; mutable rems: node list; (* remaining conditions that may become lbranch or not *) mutable num_rems: int; mutable iter_num: int (* number of iterations in elimination of conditions *) } -module Tunneling = functor - (LANG: sig - type code_unit (* the type of a node of the code cfg (an instruction or a bblock *) - type funct - end) - (OPT: sig - val langname: string - val limit_tunneling: int option (* for debugging: [Some x] limit the number of iterations *) - val debug_flag: bool ref - val final_dump: bool (* set to true to have a more verbose debugging *) - end) - (FUNS: sig - (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *) - val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list - - val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool - - val fn_code: LANG.funct -> LANG.code_unit PTree.t - val fn_entrypoint: LANG.funct -> positive - - val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit - end) --> struct - -let debug fmt = - if !OPT.debug_flag then Printf.eprintf fmt - else Printf.ifprintf stderr fmt - exception BugOnPC of int -let lab_i (n: node): int = fst n.lab -let lab_p (n: node): P.t = snd n.lab +(* keeps track of the total number of nops seen, for debugging purposes *) +let nopcounter = ref 0 + +(* General functions that do not require language-specific context, and that + are used for building language-specific functions *) let rec target c n = (* inspired from the "find" of union-find algorithm *) match n.inst with @@ -126,41 +99,14 @@ let set_branch c p s = let n = { lab = (li,p); inst = BRANCH s; link = target c s; dist = 0; tag = 0 } in Hashtbl.add c.nodes li n +let get td pc = + match PTree.get pc td with + | Some p -> let (t0, d) = p in (t0, d) + | None -> (pc, Z.of_uint 0) +let lab_i (n: node): int = fst n.lab +let lab_p (n: node): P.t = snd n.lab -(* try to change a condition into a branch -[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond -*) -let try_change_cond c acc pc = - match pc.inst with - | COND(s1,s2) -> - let ts1 = target c s1 in - let ts2 = target c s2 in - if ts1 == ts2 then ( - pc.link <- ts1; - c.num_rems <- c.num_rems - 1; - acc - ) else - pc::acc - | _ -> raise (BugOnPC (lab_i pc)) (* COND expected *) - -(* repeat [try_change_cond] until no condition is changed into a branch *) -let rec repeat_change_cond c = - c.iter_num <- c.iter_num + 1; - debug "++ %sTunneling.branch_target %d: remaining number of conds to consider = %d\n" OPT.langname (c.iter_num) (c.num_rems); - let old = c.num_rems in - c.rems <- List.fold_left (try_change_cond c) [] c.rems; - let curr = c.num_rems in - let continue = - match OPT.limit_tunneling with - | Some n -> curr < old && c.iter_num < n - | None -> curr < old - in - if continue - then repeat_change_cond c - - -(* compute the final distance of each nop nodes to its target *) let undef_dist = -1 let self_dist = undef_dist-1 let rec dist n = @@ -176,29 +122,6 @@ let rec dist n = ) else if n.dist=self_dist then raise (BugOnPC (lab_i n)) else n.dist -let final_export f c = - let count = ref 0 in - let filter_nops_init_dist _ n acc = - let tn = target c n in - if tn == n - then ( - n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *) - acc - ) else ( - n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *) - count := !count+1; - n::acc - ) - in - let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in - let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in - debug "* %sTunneling.branch_target: final number of eliminated nops = %d\n" - OPT.langname !count; - res - -(*********************************************) -(*** START: printing and debugging functions *) - let string_of_labeli nodes ipc = try let pc = Hashtbl.find nodes ipc in @@ -208,43 +131,142 @@ let string_of_labeli nodes ipc = with Not_found -> "" -let print_cfg (f: LANG.funct) c = - let a = Array.of_list (PTree.fold (fun acc pc cu -> (P.to_int pc,cu)::acc) (FUNS.fn_code f) []) in - Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a; - let ep = P.to_int (FUNS.fn_entrypoint f) in - debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep); - let println = Array.fold_left (FUNS.print_code_unit c) false a in - (if println then debug "\n");debug "remaining cond:"; - List.iter (fun n -> debug "%d " (lab_i n)) c.rems; - debug "\n" - -(*************************************************************) -(* Copy-paste of the extracted code of the verifier *) -(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) - -(** val check_code : coq_UF -> code -> unit res **) - -let check_code td c = - PTree.fold (fun _ pc cu -> FUNS.check_code_unit td pc cu) c (()) - -(*** END: copy-paste & debugging functions *******) - -let branch_target f = - debug "* %sTunneling.branch_target: starting on a new function\n" OPT.langname; - if OPT.limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n"; - let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in - c.rems <- PTree.fold (FUNS.build_simplified_cfg c) (FUNS.fn_code f) []; - repeat_change_cond c; - let res = final_export f c in - if !OPT.debug_flag then ( - try - check_code res (FUNS.fn_code f); - if OPT.final_dump then print_cfg f c; - with e -> ( - print_cfg f c; - check_code res (FUNS.fn_code f) - ) - ); - res +(* + * When given the necessary types and options as context, and then some + * language-specific functions that cannot be factorised between LTL and RTL, the + * `Tunneling` functor returns a module containing the corresponding + * `branch_target` function. + *) + +module Tunneling = functor + (* Language-specific types *) + (LANG: sig + type code_unit (* the type of a node of the code cfg (an instruction or a bblock *) + type funct (* type of internal functions *) + end) + (* Compilation options for debugging *) + (OPT: sig + val langname: string + val limit_tunneling: int option (* for debugging: [Some x] limit the number of iterations *) + val debug_flag: bool ref + val final_dump: bool (* set to true to have a more verbose debugging *) + end) + -> struct + + (* The `debug` function uses values from `OPT`, and is used in functions passed to `F` + so it must be defined between the two *) + let debug fmt = + if !OPT.debug_flag then Printf.eprintf fmt + else Printf.ifprintf stderr fmt + + module T + (* Language-specific functions *) + (FUNS: sig + (* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *) + val build_simplified_cfg: cfg -> node list -> positive -> LANG.code_unit -> node list + val print_code_unit: cfg -> bool -> int * LANG.code_unit -> bool + val fn_code: LANG.funct -> LANG.code_unit PTree.t + val fn_entrypoint: LANG.funct -> positive + val check_code_unit: (positive * integer) PTree.t -> positive -> LANG.code_unit -> unit + end) + (* only export what's needed *) + : sig val branch_target: LANG.funct -> (positive * integer) PTree.t end + = struct + + (* try to change a condition into a branch [acc] is the current accumulator of + conditions to consider in the next iteration of repeat_change_cond *) + let try_change_cond c acc pc = + match pc.inst with + | COND(s1,s2) -> + let ts1 = target c s1 in + let ts2 = target c s2 in + if ts1 == ts2 then ( + pc.link <- ts1; + c.num_rems <- c.num_rems - 1; + acc + ) else + pc::acc + | _ -> raise (BugOnPC (lab_i pc)) (* COND expected *) + + (* repeat [try_change_cond] until no condition is changed into a branch *) + let rec repeat_change_cond c = + c.iter_num <- c.iter_num + 1; + debug "++ %sTunneling.branch_target %d: remaining number of conds to consider = %d\n" OPT.langname (c.iter_num) (c.num_rems); + let old = c.num_rems in + c.rems <- List.fold_left (try_change_cond c) [] c.rems; + let curr = c.num_rems in + let continue = + match OPT.limit_tunneling with + | Some n -> curr < old && c.iter_num < n + | None -> curr < old + in + if continue + then repeat_change_cond c + + + (*********************************************) + (*** START: printing and debugging functions *) + + let print_cfg (f: LANG.funct) c = + let a = Array.of_list (PTree.fold (fun acc pc cu -> (P.to_int pc,cu)::acc) (FUNS.fn_code f) []) in + Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a; + let ep = P.to_int (FUNS.fn_entrypoint f) in + debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep); + let println = Array.fold_left (FUNS.print_code_unit c) false a in + (if println then debug "\n");debug "remaining cond:"; + List.iter (fun n -> debug "%d " (lab_i n)) c.rems; + debug "\n" + + + (*************************************************************) + (* Copy-paste of the extracted code of the verifier *) + (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + + (** val check_code : coq_UF -> code -> unit res **) + + let check_code td c = + PTree.fold (fun _ pc cu -> FUNS.check_code_unit td pc cu) c (()) + + (*** END: copy-paste & debugging functions *******) + + (* compute the final distance of each nop nodes to its target *) + let final_export f c = + let count = ref 0 in + let filter_nops_init_dist _ n acc = + let tn = target c n in + if tn == n + then ( + n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *) + acc + ) else ( + n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *) + count := !count+1; + n::acc + ) + in + let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in + let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in + debug "* %sTunneling.branch_target: initial number of nops = %d\n" OPT.langname !nopcounter; + debug "* %sTunneling.branch_target: final number of eliminated nops = %d\n" OPT.langname !count; + res + + let branch_target f = + debug "* %sTunneling.branch_target: starting on a new function\n" OPT.langname; + if OPT.limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n"; + let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in + c.rems <- PTree.fold (FUNS.build_simplified_cfg c) (FUNS.fn_code f) []; + repeat_change_cond c; + let res = final_export f c in + if !OPT.debug_flag then ( + try + check_code res (FUNS.fn_code f); + if OPT.final_dump then print_cfg f c; + with e -> ( + print_cfg f c; + check_code res (FUNS.fn_code f) + ) + ); + res + end end -- cgit From 8dc2366431f9210ad70a789c389d9d19c6fc802f Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Thu, 17 Jun 2021 16:32:05 +0200 Subject: Use Tunnelinglibs in Tunnelingaux --- backend/Tunnelingaux.ml | 337 ++++++++++++------------------------------------ 1 file changed, 81 insertions(+), 256 deletions(-) diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml index 87e6d303..d4b88a5d 100644 --- a/backend/Tunnelingaux.ml +++ b/backend/Tunnelingaux.ml @@ -23,261 +23,86 @@ open Coqlib open LTL open Maps open Camlcoq - -let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *) -let debug_flag = ref false -let final_dump = false (* set to true to have a more verbose debugging *) - -let debug fmt = - if !debug_flag then Printf.eprintf fmt - else Printf.ifprintf stderr fmt - -exception BugOnPC of int - -(* type of labels in the cfg *) -type label = int * P.t - -(* instructions under analyzis *) -type simple_inst = (* a simplified view of LTL instructions *) - LBRANCH of node -| LCOND of node * node -| OTHER -and node = { - lab : label; - mutable inst: simple_inst; - mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *) - mutable dist: int; - mutable tag: int - } - -(* type of the (simplified) CFG *) -type cfg = { - nodes: (int, node) Hashtbl.t; - mutable rems: node list; (* remaining conditions that may become lbranch or not *) - mutable num_rems: int; - mutable iter_num: int (* number of iterations in elimination of conditions *) - } - -let lab_i (n: node): int = fst n.lab -let lab_p (n: node): P.t = snd n.lab - -let rec target c n = (* inspired from the "find" of union-find algorithm *) - match n.inst with - | LCOND(s1,s2) -> - if n.link != n - then update c n - else if n.tag < c.iter_num then ( - (* we try to change the condition ... *) - n.tag <- c.iter_num; (* ... but at most once by iteration *) - let ts1 = target c s1 in - let ts2 = target c s2 in - if ts1 == ts2 then (n.link <- ts1; ts1) else n - ) else n - | _ -> - if n.link != n - then update c n - else n -and update c n = - let t = target c n.link in - n.link <- t; t - -let get_node c p = - let li = P.to_int p in - try - Hashtbl.find c.nodes li - with - Not_found -> - let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 } in - Hashtbl.add c.nodes li n; - n - -let set_branch c p s = - let li = P.to_int p in - try - let n = Hashtbl.find c.nodes li in - n.inst <- LBRANCH s; - n.link <- target c s - with - Not_found -> - let n = { lab = (li,p); inst = LBRANCH s; link = target c s; dist = 0; tag = 0 } in - Hashtbl.add c.nodes li n - - -(* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *) -let build_simplified_cfg c acc pc bb = - match bb with - | Lbranch s :: _ -> - let ns = get_node c s in - set_branch c pc ns; - acc - | Lcond (_, _, s1, s2, _) :: _ -> - c.num_rems <- c.num_rems + 1; - let ns1 = get_node c s1 in - let ns2 = get_node c s2 in - let npc = get_node c pc in - npc.inst <- LCOND(ns1, ns2); - npc::acc - | _ -> acc - -(* try to change a condition into a branch -[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond -*) -let try_change_cond c acc pc = - match pc.inst with - | LCOND(s1,s2) -> - let ts1 = target c s1 in - let ts2 = target c s2 in - if ts1 == ts2 then ( - pc.link <- ts1; - c.num_rems <- c.num_rems - 1; +open Tunnelinglibs + +module LANG = struct + type code_unit = LTL.bblock + type funct = LTL.coq_function +end + +module OPT = struct + let langname = "LTL" + let limit_tunneling = None + let debug_flag = ref false + let final_dump = false +end + +module Partial = Tunnelinglibs.Tunneling(LANG)(OPT) + +module FUNS = struct + let build_simplified_cfg c acc pc bb = + match bb with + | Lbranch s :: _ -> + let ns = get_node c s in + set_branch c pc ns; acc - ) else - pc::acc - | _ -> raise (BugOnPC (lab_i pc)) (* LCOND expected *) - -(* repeat [try_change_cond] until no condition is changed into a branch *) -let rec repeat_change_cond c = - c.iter_num <- c.iter_num + 1; - debug "++ Tunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems); - let old = c.num_rems in - c.rems <- List.fold_left (try_change_cond c) [] c.rems; - let curr = c.num_rems in - let continue = - match limit_tunneling with - | Some n -> curr < old && c.iter_num < n - | None -> curr < old - in - if continue - then repeat_change_cond c - - -(* compute the final distance of each nop nodes to its target *) -let undef_dist = -1 -let self_dist = undef_dist-1 -let rec dist n = - if n.dist = undef_dist - then ( - n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *) - n.dist <- - (match n.inst with - | OTHER -> 0 - | LBRANCH p -> 1 + dist p - | LCOND (p1,p2) -> 1 + (max (dist p1) (dist p2))); - n.dist - ) else if n.dist=self_dist then raise (BugOnPC (lab_i n)) - else n.dist - -let final_export f c = - let count = ref 0 in - let filter_nops_init_dist _ n acc = - let tn = target c n in - if tn == n - then ( - n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *) - acc - ) else ( - n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *) - count := !count+1; - n::acc - ) - in - let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in - let res = List.fold_left (fun acc n -> PTree.set (lab_p n) (lab_p n.link, Z.of_uint (dist n)) acc) PTree.empty nops in - debug "* Tunneling.branch_target: final number of eliminated nops = %d\n" !count; - res - -(*********************************************) -(*** START: printing and debugging functions *) - -let string_of_labeli nodes ipc = - try - let pc = Hashtbl.find nodes ipc in - if pc.link == pc - then Printf.sprintf "(Target@%d)" (dist pc) - else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc) - with - Not_found -> "" - -let print_bblock c println (pc, bb) = - match bb with - | Lbranch s::_ -> (if println then debug "\n"); debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false - | Lcond (_, _, s1, s2, _)::_ -> (if println then debug "\n"); debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false - | _ -> debug "%d " pc; true - - -let print_cfg f c = - let a = Array.of_list (PTree.fold (fun acc pc bb -> (P.to_int pc,bb)::acc) f.fn_code []) in - Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a; - let ep = P.to_int f.fn_entrypoint in - debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep); - let println = Array.fold_left (print_bblock c) false a in - (if println then debug "\n");debug "remaining cond:"; - List.iter (fun n -> debug "%d " (lab_i n)) c.rems; - debug "\n" - -(*************************************************************) -(* Copy-paste of the extracted code of the verifier *) -(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) - -let get td pc = - match PTree.get pc td with - | Some p -> let (t0, d) = p in (t0, d) - | None -> (pc, Z.of_uint 0) - -let check_bblock td pc bb = - match PTree.get pc td with - | Some p -> - let (tpc, dpc) = p in - let dpc0 = dpc in - (match bb with - | [] -> - raise (BugOnPC (P.to_int pc)) - | i :: _ -> - (match i with - | Lbranch s -> - let (ts, ds) = get td s in - if peq tpc ts - then if zlt ds dpc0 - then () - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - | Lcond (_, _, s1, s2, _) -> - let (ts1, ds1) = get td s1 in - let (ts2, ds2) = get td s2 in - if peq tpc ts1 - then if peq tpc ts2 - then if zlt ds1 dpc0 - then if zlt ds2 dpc0 - then () - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - | _ -> - raise (BugOnPC (P.to_int pc)))) - | None -> () - -(** val check_code : coq_UF -> code -> unit res **) - -let check_code td c = - PTree.fold (fun _ pc bb -> check_bblock td pc bb) c (()) - -(*** END: copy-paste & debugging functions *******) + | Lcond (_, _, s1, s2, _) :: _ -> + c.num_rems <- c.num_rems + 1; + let ns1 = get_node c s1 in + let ns2 = get_node c s2 in + let npc = get_node c pc in + npc.inst <- COND(ns1, ns2); + npc::acc + | _ -> acc + + let print_code_unit c println (pc, bb) = + match bb with + | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false + | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false + | _ -> Partial.debug "%d " pc; true + + let fn_code f = f.fn_code + let fn_entrypoint f = f.fn_entrypoint + + + (*************************************************************) + (* Copy-paste of the extracted code of the verifier *) + (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + + let check_code_unit td pc bb = + match PTree.get pc td with + | Some p -> + let (tpc, dpc) = p in + let dpc0 = dpc in + (match bb with + | [] -> + raise (BugOnPC (P.to_int pc)) + | i :: _ -> + (match i with + | Lbranch s -> + let (ts, ds) = get td s in + if peq tpc ts + then if zlt ds dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | Lcond (_, _, s1, s2, _) -> + let (ts1, ds1) = get td s1 in + let (ts2, ds2) = get td s2 in + if peq tpc ts1 + then if peq tpc ts2 + then if zlt ds1 dpc0 + then if zlt ds2 dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | _ -> + raise (BugOnPC (P.to_int pc)))) + | None -> () +end + +module T = Partial.T(FUNS) +let branch_target = T.branch_target -let branch_target f = - debug "* Tunneling.branch_target: starting on a new function\n"; - if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n"; - let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in - c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code []; - repeat_change_cond c; - let res = final_export f c in - if !debug_flag then ( - try - check_code res f.fn_code; - if final_dump then print_cfg f c; - with e -> ( - print_cfg f c; - check_code res f.fn_code - ) - ); - res -- cgit From fae8d9b5c5f93d5eda36f800eb0ca1837b237cba Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 17 Jun 2021 17:00:57 +0200 Subject: fix riscv/Machregsaux.mli --- riscV/Machregsaux.mli | 2 ++ 1 file changed, 2 insertions(+) diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli index 01b0f9fd..cf6d7b71 100644 --- a/riscV/Machregsaux.mli +++ b/riscV/Machregsaux.mli @@ -15,3 +15,5 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +val nr_regs: int array -- cgit From fe557bf65ec738eaa078bc5e398ff690eb1f2b9e Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 17 Jun 2021 17:03:53 +0200 Subject: changed type of schedule_seq in x86 for compatibility --- x86/PrepassSchedulingOracle.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x86/PrepassSchedulingOracle.ml b/x86/PrepassSchedulingOracle.ml index 7b6a1b14..42a3da23 100644 --- a/x86/PrepassSchedulingOracle.ml +++ b/x86/PrepassSchedulingOracle.ml @@ -2,4 +2,5 @@ open RTL open Registers (* Do not do anything *) -let schedule_sequence (seqa : (instruction*Regset.t) array) = None +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None -- cgit From 8f399dfa9d794f2f728f523ff1aa7788cc3599b2 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 17 Jun 2021 17:04:52 +0200 Subject: fix for Risc-V --- aarch64/Machregsaux.mli | 1 + riscV/Machregsaux.mli | 3 +++ 2 files changed, 4 insertions(+) diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli index 8487a557..23ac1c9a 100644 --- a/aarch64/Machregsaux.mli +++ b/aarch64/Machregsaux.mli @@ -16,4 +16,5 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int +(* Number of registers in each class *) val nr_regs : int array diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli index 01b0f9fd..bb3777bf 100644 --- a/riscV/Machregsaux.mli +++ b/riscV/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array -- cgit From 95f918c38b1e59f40ae7af455ec2c6746289375e Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Thu, 17 Jun 2021 17:31:10 +0200 Subject: Change "Tunneling" to "LTLTunneling" everywhere To respect the symmetry between RTL- and LTL-Tunneling --- Makefile | 2 +- backend/LTLTunneling.v | 167 +++++++++++ backend/LTLTunnelingaux.ml | 108 +++++++ backend/LTLTunnelingproof.v | 666 ++++++++++++++++++++++++++++++++++++++++++++ backend/RTLTunneling.v | 2 +- backend/Tunneling.v | 167 ----------- backend/Tunnelingaux.ml | 108 ------- backend/Tunnelinglibs.ml | 2 +- backend/Tunnelingproof.v | 666 -------------------------------------------- doc/index-kvx.html | 6 +- doc/index.html | 6 +- driver/Compiler.vexpand | 2 +- tools/compiler_expand.ml | 2 +- 13 files changed, 952 insertions(+), 952 deletions(-) create mode 100644 backend/LTLTunneling.v create mode 100644 backend/LTLTunnelingaux.ml create mode 100644 backend/LTLTunnelingproof.v delete mode 100644 backend/Tunneling.v delete mode 100644 backend/Tunnelingaux.ml delete mode 100644 backend/Tunnelingproof.v diff --git a/Makefile b/Makefile index 4900b165..05e4cbcd 100644 --- a/Makefile +++ b/Makefile @@ -143,7 +143,7 @@ BACKEND=\ FirstNop.v FirstNopproof.v \ Allnontrap.v Allnontrapproof.v \ Allocation.v Allocationproof.v \ - Tunneling.v Tunnelingproof.v \ + LTLTunneling.v LTLTunnelingproof.v \ RTLTunneling.v RTLTunnelingproof.v \ Linear.v Lineartyping.v \ Linearize.v Linearizeproof.v \ diff --git a/backend/LTLTunneling.v b/backend/LTLTunneling.v new file mode 100644 index 00000000..4b404724 --- /dev/null +++ b/backend/LTLTunneling.v @@ -0,0 +1,167 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Branch tunneling (optimization of branches to branches). *) + +Require Import Coqlib Maps Errors. +Require Import AST. +Require Import LTL. + +(** Branch tunneling shortens sequences of branches (with no intervening + computations) by rewriting the branch and conditional branch instructions + so that they jump directly to the end of the branch sequence. + For example: +<< + L1: if (cond) nop L2; L1: nop L3; + L2: nop L3; becomes L2: nop L3; + L3: instr; L3: instr; + L4: if (cond) goto L1; L4: if (cond) nop L1; +>> + This optimization can be applied to several of our intermediate + languages. We choose to perform it on the [LTL] language, + after register allocation but before code linearization. + Register allocation can delete instructions (such as dead + computations or useless moves), therefore there are more + opportunities for tunneling after allocation than before. + Symmetrically, prior tunneling helps linearization to produce + better code, e.g. by revealing that some [branch] instructions are + dead code (as the "branch L3" in the example above). +*) + +(** The implementation consists in two passes: the first pass + records the branch t of each "nop" + and the second pass replace any "nop" node to [pc] + by a branch to a "nop" at [branch_t f pc] + +Naively, we may define [branch_t f pc] as follows: +<< + branch_t f pc = branch_t f pc' if f(pc) = nop pc' + = pc otherwise +>> + However, this definition can fail to terminate if + the program can contain loops consisting only of branches, as in +<< + L1: branch L1; +>> + or +<< + L1: nop L2; + L2: nop L1; +>> + Coq warns us of this fact by not accepting the definition + of [branch_t] above. + + To handle this problem, we use a union-find data structure, adding equalities [pc = pc'] + for every instruction [pc: nop pc'] in the function. + + Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure, + we need to iterate until we reach a fixpoint. + + Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure + in order to help the proof. + + A verifier checks that this data-structure is correct. +*) + +Definition UF := PTree.t (node * Z). + +(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) +Axiom branch_target: LTL.function -> UF. +Extract Constant branch_target => "LTLTunnelingaux.branch_target". + +Local Open Scope error_monad_scope. + +Definition get (td: UF) pc:node*Z := + match td!pc with + | Some (t,d) => (t,Z.abs d) + | _ => (pc,0) + end. + +Definition target (td: UF) (pc:node): node := fst (get td pc). +Coercion target: UF >-> Funclass. + +(* we check that the domain of [td] is included in the domain of [c] *) +Definition check_included (td: UF) (c: code): option bblock + := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil). + +(* we check the validity of targets and their bound: + the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents. +*) +Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit + := match td!pc with + | None => OK tt + | Some (tpc, dpc) => + let dpc := Z.abs dpc in + match bb with + | Lbranch s ::_ => + let (ts, ds) := get td s in + if peq tpc ts then + if zlt ds dpc then OK tt + else Error (msg "bad distance in Lbranch") + else Error (msg "invalid skip of Lbranch") + | Lcond _ _ s1 s2 _ :: _ => + let (ts1, ds1) := get td s1 in + let (ts2, ds2) := get td s2 in + if peq tpc ts1 then + if peq tpc ts2 then + if zlt ds1 dpc then + if zlt ds2 dpc then OK tt + else Error (msg "bad distance on else branch") + else Error (msg "bad distance on then branch") + else Error (msg "invalid skip of else branch") + else Error (msg "invalid skip of then branch") + | _ => Error (msg "cannot skip this block") + end + end. + +Definition check_code (td: UF) (c:code): res unit + := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt). + +(** The second pass rewrites all LTL instructions, replacing every + successor [s] of every instruction by [t s], the canonical representative + of its equivalence class in the union-find data structure. *) + +Definition tunnel_instr (t: node -> node) (i: instruction) : instruction := + match i with + | Lbranch s => Lbranch (t s) + | Lcond cond args s1 s2 info => + let s1' := t s1 in let s2' := t s2 in + if peq s1' s2' + then Lbranch s1' + else Lcond cond args s1' s2' info + | Ljumptable arg tbl => Ljumptable arg (List.map t tbl) + | _ => i + end. + +Definition tunnel_block (t: node -> node) (b: bblock) : bblock := + List.map (tunnel_instr t) b. + +Definition tunnel_function (f: LTL.function) : res LTL.function := + let td := branch_target f in + let c := (fn_code f) in + if check_included td c then + do _ <- check_code td c ; OK + (mkfunction + (fn_sig f) + (fn_stacksize f) + (PTree.map1 (tunnel_block td) c) + (td (fn_entrypoint f))) + else + Error (msg "Some node of the union-find is not in the CFG") + . + +Definition tunnel_fundef (f: fundef) : res fundef := + transf_partial_fundef tunnel_function f. + +Definition transf_program (p: program) : res program := + transform_partial_program tunnel_fundef p. diff --git a/backend/LTLTunnelingaux.ml b/backend/LTLTunnelingaux.ml new file mode 100644 index 00000000..c3b8cf82 --- /dev/null +++ b/backend/LTLTunnelingaux.ml @@ -0,0 +1,108 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* + +This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function, +and computes their target node with the distance (ie the number of cummulated nops) toward this target. + +See [LTLTunneling.v] + +*) + +open Coqlib +open LTL +open Maps +open Camlcoq +open Tunnelinglibs + +module LANG = struct + type code_unit = LTL.bblock + type funct = LTL.coq_function +end + +module OPT = struct + let langname = "LTL" + let limit_tunneling = None + let debug_flag = ref false + let final_dump = false +end + +module Partial = Tunnelinglibs.Tunneling(LANG)(OPT) + +module FUNS = struct + let build_simplified_cfg c acc pc bb = + match bb with + | Lbranch s :: _ -> + let ns = get_node c s in + set_branch c pc ns; + acc + | Lcond (_, _, s1, s2, _) :: _ -> + c.num_rems <- c.num_rems + 1; + let ns1 = get_node c s1 in + let ns2 = get_node c s2 in + let npc = get_node c pc in + npc.inst <- COND(ns1, ns2); + npc::acc + | _ -> acc + + let print_code_unit c println (pc, bb) = + match bb with + | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false + | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false + | _ -> Partial.debug "%d " pc; true + + let fn_code f = f.fn_code + let fn_entrypoint f = f.fn_entrypoint + + + (*************************************************************) + (* Copy-paste of the extracted code of the verifier *) + (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + + let check_code_unit td pc bb = + match PTree.get pc td with + | Some p -> + let (tpc, dpc) = p in + let dpc0 = dpc in + (match bb with + | [] -> + raise (BugOnPC (P.to_int pc)) + | i :: _ -> + (match i with + | Lbranch s -> + let (ts, ds) = get td s in + if peq tpc ts + then if zlt ds dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | Lcond (_, _, s1, s2, _) -> + let (ts1, ds1) = get td s1 in + let (ts2, ds2) = get td s2 in + if peq tpc ts1 + then if peq tpc ts2 + then if zlt ds1 dpc0 + then if zlt ds2 dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | _ -> + raise (BugOnPC (P.to_int pc)))) + | None -> () +end + +module T = Partial.T(FUNS) +let branch_target = T.branch_target + diff --git a/backend/LTLTunnelingproof.v b/backend/LTLTunnelingproof.v new file mode 100644 index 00000000..d36d3c76 --- /dev/null +++ b/backend/LTLTunnelingproof.v @@ -0,0 +1,666 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Correctness proof for the branch tunneling optimization. *) + +Require Import Coqlib Maps Errors. +Require Import AST Linking. +Require Import Values Memory Events Globalenvs Smallstep. +Require Import Op Locations LTL. +Require Import LTLTunneling. + +Local Open Scope nat. + + +(** * Properties of the branch_target, when the verifier succeeds *) + +Definition check_included_spec (c:code) (td:UF) (ok: option bblock) := + ok <> None -> forall pc, c!pc = None -> td!pc = None. + +Lemma check_included_correct (td: UF) (c: code): + check_included_spec c td (check_included td c). +Proof. + apply PTree_Properties.fold_rec with (P := check_included_spec c). +- (* extensionality *) + unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto. +- (* base case *) + intros _ pc. rewrite PTree.gempty; try congruence. +- (* inductive case *) + unfold check_included_spec. + intros m [|] pc bb NEW ATPC IND; simpl; try congruence. + intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence. + intros; eapply IND; try congruence. +Qed. + +Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop := + | TB_default (TB: target pc = pc) ob + : target_bounds target bound pc ob + | TB_branch s bb + (EQ: target pc = target s) + (DECREASE: bound s < bound pc) + : target_bounds target bound pc (Some (Lbranch s::bb)) + | TB_cond cond args s1 s2 info bb + (EQ1: target pc = target s1) + (EQ2: target pc = target s2) + (DEC1: bound s1 < bound pc) + (DEC2: bound s2 < bound pc) + : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb)) + . +Local Hint Resolve TB_default: core. + +Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc. +Proof. + unfold target, get. intros H; rewrite H; auto. +Qed. +Local Hint Resolve target_None Z.abs_nonneg: core. + +Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z. +Proof. + unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; lia || auto. +Qed. +Local Hint Resolve get_nonneg: core. + +Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)). + +Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock): + check_bblock td pc bb = OK tt -> + target_bounds (target td) (bound td) pc (Some bb). +Proof. + unfold check_bblock, bound. + destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto. + assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. } + assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. } + destruct bb as [|[ ] bb]; simpl; try congruence. + + destruct (get td s) as (ts, ds) eqn:Hs. + repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. + intros; apply TB_branch. + * rewrite Tpc. unfold target; rewrite Hs; simpl; auto. + * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto. + + destruct (get td s1) as (ts1, ds1) eqn:Hs1. + destruct (get td s2) as (ts2, ds2) eqn:Hs2. + repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. + intros; apply TB_cond. + * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto. + * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto. + * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto. + * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto. +Qed. + +Definition check_code_spec (td:UF) (c:code) (ok: res unit) := + ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb). + +Lemma check_code_correct (td:UF) c: + check_code_spec td c (check_code td c). +Proof. + apply PTree_Properties.fold_rec with (P := check_code_spec td). +- (* extensionality *) + unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite <- ! EQ; eauto. +- (* base case *) + intros _ pc. rewrite PTree.gempty; try congruence. +- (* inductive case *) + unfold check_code_spec. + intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence. + intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto. + intros X; inversion X; subst. + apply check_bblock_correct; auto. +Qed. + +Theorem branch_target_bounds: + forall f tf pc, + tunnel_function f = OK tf -> + target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc). +Proof. + unfold tunnel_function; intros f f' pc. + destruct (check_included _ _) eqn:H1; try congruence. + destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. + intros _. + destruct ((fn_code f)!pc) eqn:X. + - exploit check_code_correct; eauto. + - exploit check_included_correct; eauto. + congruence. +Qed. + +Lemma tunnel_function_unfold: + forall f tf pc, + tunnel_function f = OK tf -> + (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc. +Proof. + unfold tunnel_function; intros f f' pc. + destruct (check_included _ _) eqn:H1; try congruence. + destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. + intros X; inversion X; clear X; subst. + simpl. rewrite PTree.gmap1. auto. +Qed. + +Lemma tunnel_fundef_Internal: + forall f tf, tunnel_fundef (Internal f) = OK tf + -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'. +Proof. + intros f tf; simpl. + destruct (tunnel_function f) eqn:X; simpl; try congruence. + intros EQ; inversion EQ. + eexists; split; eauto. +Qed. + +Lemma tunnel_fundef_External: + forall tf ef, tunnel_fundef (External ef) = OK tf + -> tf = External ef. +Proof. + intros tf ef; simpl. intros H; inversion H; auto. +Qed. + +(** * Preservation of semantics *) + +Definition match_prog (p tp: program) := + match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp. + +Lemma transf_program_match: + forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog. +Proof. + intros. eapply match_transform_partial_program_contextual; eauto. +Qed. + +Section PRESERVATION. + +Variables prog tprog: program. +Hypothesis TRANSL: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma functions_translated: + forall (v: val) (f: fundef), + Genv.find_funct ge v = Some f -> + exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf. +Proof. + intros. exploit (Genv.find_funct_match TRANSL); eauto. + intros (cu & tf & A & B & C). + repeat eexists; intuition eauto. +Qed. + +Lemma function_ptr_translated: + forall v f, + Genv.find_funct_ptr ge v = Some f -> + exists tf, + Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf. +Proof. + intros. + exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto. +Qed. + +Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof. + rewrite <- (Genv.find_symbol_match TRANSL). reflexivity. +Qed. + +Lemma senv_preserved: + Senv.equiv ge tge. +Proof. + eapply (Genv.senv_match TRANSL). +Qed. + +Lemma sig_preserved: + forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f. +Proof. + intros. destruct f. + - simpl in H. monadInv H. unfold tunnel_function in EQ. + destruct (check_included _ _); try congruence. + monadInv EQ. simpl; auto. + - simpl in H. monadInv H. reflexivity. +Qed. + +Lemma fn_stacksize_preserved: + forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f. +Proof. + intros f tf; unfold tunnel_function. + destruct (check_included _ _); try congruence. + destruct (check_code _ _); simpl; try congruence. + intros H; inversion H; simpl; auto. +Qed. + +Lemma fn_entrypoint_preserved: + forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f). +Proof. + intros f tf; unfold tunnel_function. + destruct (check_included _ _); try congruence. + destruct (check_code _ _); simpl; try congruence. + intros H; inversion H; simpl; auto. +Qed. + + +(** The proof of semantic preservation is a simulation argument + based on diagrams of the following form: +<< + st1 --------------- st2 + | | + t| ?|t + | | + v v + st1'--------------- st2' +>> + The [match_states] predicate, defined below, captures the precondition + between states [st1] and [st2], as well as the postcondition between + [st1'] and [st2']. One transition in the source code (left) can correspond + to zero or one transition in the transformed code (right). The + "zero transition" case occurs when executing a [Lnop] instruction + in the source code that has been removed by tunneling. + + In the definition of [match_states], what changes between the original and + transformed codes is mainly the control-flow + (in particular, the current program point [pc]), but also some values + and memory states, since some [Vundef] values can become more defined + as a consequence of eliminating useless [Lcond] instructions. *) + +Definition locmap_lessdef (ls1 ls2: locset) : Prop := + forall l, Val.lessdef (ls1 l) (ls2 l). + +Inductive match_stackframes: stackframe -> stackframe -> Prop := + | match_stackframes_intro: + forall f tf sp ls0 bb tls0, + locmap_lessdef ls0 tls0 -> + tunnel_function f = OK tf -> + match_stackframes + (Stackframe f sp ls0 bb) + (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)). + +Inductive match_states: state -> state -> Prop := + | match_states_intro: + forall s f tf sp pc ls m ts tls tm + (STK: list_forall2 match_stackframes s ts) + (LS: locmap_lessdef ls tls) + (MEM: Mem.extends m tm) + (TF: tunnel_function f = OK tf), + match_states (State s f sp pc ls m) + (State ts tf sp (branch_target f pc) tls tm) + | match_states_block: + forall s f tf sp bb ls m ts tls tm + (STK: list_forall2 match_stackframes s ts) + (LS: locmap_lessdef ls tls) + (MEM: Mem.extends m tm) + (TF: tunnel_function f = OK tf), + match_states (Block s f sp bb ls m) + (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm) + | match_states_interm: + forall s f tf sp pc i bb ls m ts tls tm + (STK: list_forall2 match_stackframes s ts) + (LS: locmap_lessdef ls tls) + (MEM: Mem.extends m tm) + (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc) + (TF: tunnel_function f = OK tf), + match_states (Block s f sp (i :: bb) ls m) + (State ts tf sp pc tls tm) + | match_states_call: + forall s f tf ls m ts tls tm + (STK: list_forall2 match_stackframes s ts) + (LS: locmap_lessdef ls tls) + (MEM: Mem.extends m tm) + (TF: tunnel_fundef f = OK tf), + match_states (Callstate s f ls m) + (Callstate ts tf tls tm) + | match_states_return: + forall s ls m ts tls tm + (STK: list_forall2 match_stackframes s ts) + (LS: locmap_lessdef ls tls) + (MEM: Mem.extends m tm), + match_states (Returnstate s ls m) + (Returnstate ts tls tm). + +(** Properties of [locmap_lessdef] *) + +Lemma reglist_lessdef: + forall rl ls1 ls2, + locmap_lessdef ls1 ls2 -> Val.lessdef_list (reglist ls1 rl) (reglist ls2 rl). +Proof. + induction rl; simpl; intros; auto. +Qed. + +Lemma locmap_set_lessdef: + forall ls1 ls2 v1 v2 l, + locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.set l v1 ls1) (Locmap.set l v2 ls2). +Proof. + intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l'). +- destruct l; auto using Val.load_result_lessdef. +- destruct (Loc.diff_dec l l'); auto. +Qed. + +Lemma locmap_set_undef_lessdef: + forall ls1 ls2 l, + locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.set l Vundef ls1) ls2. +Proof. + intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l'). +- destruct l; auto. destruct ty; auto. +- destruct (Loc.diff_dec l l'); auto. +Qed. + +Lemma locmap_undef_regs_lessdef: + forall rl ls1 ls2, + locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) (undef_regs rl ls2). +Proof. + induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_lessdef; auto. +Qed. + +Lemma locmap_undef_regs_lessdef_1: + forall rl ls1 ls2, + locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) ls2. +Proof. + induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. +Qed. + +Lemma locmap_getpair_lessdef: + forall p ls1 ls2, + locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2). +Proof. + intros; destruct p; simpl; auto using Val.longofwords_lessdef. +Qed. + +Lemma locmap_getpairs_lessdef: + forall pl ls1 ls2, + locmap_lessdef ls1 ls2 -> + Val.lessdef_list (map (fun p => Locmap.getpair p ls1) pl) (map (fun p => Locmap.getpair p ls2) pl). +Proof. + intros. induction pl; simpl; auto using locmap_getpair_lessdef. +Qed. + +Lemma locmap_setpair_lessdef: + forall p ls1 ls2 v1 v2, + locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setpair p v1 ls1) (Locmap.setpair p v2 ls2). +Proof. + intros; destruct p; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef. +Qed. + +Lemma locmap_setres_lessdef: + forall res ls1 ls2 v1 v2, + locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setres res v1 ls1) (Locmap.setres res v2 ls2). +Proof. + induction res; intros; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef. +Qed. + +Lemma locmap_undef_caller_save_regs_lessdef: + forall ls1 ls2, + locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_caller_save_regs ls1) (undef_caller_save_regs ls2). +Proof. + intros; red; intros. unfold undef_caller_save_regs. + destruct l. +- destruct (Conventions1.is_callee_save r); auto. +- destruct sl; auto. +Qed. + +Lemma find_function_translated: + forall ros ls tls fd, + locmap_lessdef ls tls -> + find_function ge ros ls = Some fd -> + exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd. +Proof. + intros. destruct ros; simpl in *. +- assert (E: tls (R m) = ls (R m)). + { exploit Genv.find_funct_inv; eauto. intros (b & EQ). + generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. } + rewrite E. exploit functions_translated; eauto. +- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. + exploit function_ptr_translated; eauto. + intros (tf & X1 & X2). exists tf; intuition. +Qed. + +Lemma call_regs_lessdef: + forall ls1 ls2, locmap_lessdef ls1 ls2 -> locmap_lessdef (call_regs ls1) (call_regs ls2). +Proof. + intros; red; intros. destruct l as [r | [] ofs ty]; simpl; auto. +Qed. + +Lemma return_regs_lessdef: + forall caller1 callee1 caller2 callee2, + locmap_lessdef caller1 caller2 -> + locmap_lessdef callee1 callee2 -> + locmap_lessdef (return_regs caller1 callee1) (return_regs caller2 callee2). +Proof. + intros; red; intros. destruct l; simpl. +- destruct (Conventions1.is_callee_save r); auto. +- destruct sl; auto. +Qed. + +(** To preserve non-terminating behaviours, we show that the transformed + code cannot take an infinity of "zero transition" cases. + We use the following [measure] function over source states, + which decreases strictly in the "zero transition" case. *) + +Definition measure (st: state) : nat := + match st with + | State s f sp pc ls m => (bound (branch_target f) pc) * 2 + | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1 + | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1 + | Block s f sp bb ls m => 0 + | Callstate s f ls m => 0 + | Returnstate s ls m => 0 + end. + +Lemma match_parent_locset: + forall s ts, + list_forall2 match_stackframes s ts -> + locmap_lessdef (parent_locset s) (parent_locset ts). +Proof. + induction 1; simpl. +- red; auto. +- inv H; auto. +Qed. + +Lemma tunnel_step_correct: + forall st1 t st2, step ge st1 t st2 -> + forall st1' (MS: match_states st1 st1'), + (exists st2', step tge st1' t st2' /\ match_states st2 st2') + \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat. +Proof. + induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH). + +- (* entering a block *) + exploit (branch_target_bounds f tf pc); eauto. + rewrite H. intros X; inversion X. + + (* TB_default *) + rewrite TB; left. econstructor; split. + * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto. + * econstructor; eauto. + + (* FT_branch *) + simpl; right. + rewrite EQ; repeat (econstructor; lia || eauto). + + (* FT_cond *) + simpl; right. + repeat (econstructor; lia || eauto); simpl. + destruct (peq _ _); try congruence. +- (* Lop *) + exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. + intros (tv & EV & LD). + left; simpl; econstructor; split. + eapply exec_Lop with (v := tv); eauto. + rewrite <- EV. apply eval_operation_preserved. exact symbols_preserved. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. +- (* Lload *) + exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. + intros (ta & EV & LD). + exploit Mem.loadv_extends. eauto. eauto. eexact LD. + intros (tv & LOAD & LD'). + left; simpl; econstructor; split. + eapply exec_Lload with (a := ta). + rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. + eauto. eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. +- (* Lload notrap1 *) + exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption. + left; simpl; econstructor; split. + eapply exec_Lload_notrap1. + rewrite <- H0. + apply eval_addressing_preserved. exact symbols_preserved. eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. +- (* Lload notrap2 *) + exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. + intros (ta & EV & LD). + destruct (Mem.loadv chunk tm ta) eqn:Htload. + { + left; simpl; econstructor; split. + eapply exec_Lload. + rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. + exact Htload. eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. + } + { + left; simpl; econstructor; split. + eapply exec_Lload_notrap2. + rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. + exact Htload. eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. + } +- (* Lgetstack *) + left; simpl; econstructor; split. + econstructor; eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. +- (* Lsetstack *) + left; simpl; econstructor; split. + econstructor; eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. +- (* Lstore *) + exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. + intros (ta & EV & LD). + exploit Mem.storev_extends. eauto. eauto. eexact LD. apply LS. + intros (tm' & STORE & MEM'). + left; simpl; econstructor; split. + eapply exec_Lstore with (a := ta). + rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. + eauto. eauto. + econstructor; eauto using locmap_undef_regs_lessdef. +- (* Lcall *) + left; simpl. + exploit find_function_translated; eauto. + intros (tfd & Htfd & FIND). + econstructor; split. + + eapply exec_Lcall; eauto. + erewrite sig_preserved; eauto. + + econstructor; eauto. + constructor; auto. + constructor; auto. +- (* Ltailcall *) + exploit find_function_translated. 2: eauto. + { eauto using return_regs_lessdef, match_parent_locset. } + intros (tfd & Htfd & FIND). + exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). + left; simpl; econstructor; split. + + eapply exec_Ltailcall; eauto. + * eapply sig_preserved; eauto. + * erewrite fn_stacksize_preserved; eauto. + + econstructor; eauto using return_regs_lessdef, match_parent_locset. +- (* Lbuiltin *) + exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA). + exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D). + left; simpl; econstructor; split. + eapply exec_Lbuiltin; eauto. + eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. + eapply external_call_symbols_preserved. apply senv_preserved. eauto. + econstructor; eauto using locmap_setres_lessdef, locmap_undef_regs_lessdef. +- (* Lbranch (preserved) *) + left; simpl; econstructor; split. + eapply exec_Lbranch; eauto. + fold (branch_target f pc). econstructor; eauto. +- (* Lbranch (eliminated) *) + right; split. simpl. lia. split. auto. constructor; auto. +- (* Lcond (preserved) *) + simpl; left; destruct (peq _ _) eqn: EQ. + + econstructor; split. + eapply exec_Lbranch. + destruct b. + * constructor; eauto using locmap_undef_regs_lessdef_1. + * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1. + + econstructor; split. + eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef. + destruct b; econstructor; eauto using locmap_undef_regs_lessdef. +- (* Lcond (eliminated) *) + destruct (peq _ _) eqn: EQ; try inv H1. + right; split; simpl. + + destruct b. + generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia. + generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia. + + destruct b. + -- repeat (constructor; auto). + -- rewrite e; repeat (constructor; auto). +- (* Ljumptable *) + assert (tls (R arg) = Vint n). + { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. } + left; simpl; econstructor; split. + eapply exec_Ljumptable. + eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto. + econstructor; eauto using locmap_undef_regs_lessdef. +- (* Lreturn *) + exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). + left; simpl; econstructor; split. + + eapply exec_Lreturn; eauto. + erewrite fn_stacksize_preserved; eauto. + + constructor; eauto using return_regs_lessdef, match_parent_locset. +- (* internal function *) + exploit tunnel_fundef_Internal; eauto. + intros (tf' & TF' & ITF). subst. + exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl. + intros (tm' & ALLOC & MEM'). + left; simpl. + econstructor; split. + + eapply exec_function_internal; eauto. + erewrite fn_stacksize_preserved; eauto. + + simpl. + erewrite (fn_entrypoint_preserved f tf'); auto. + econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef. +- (* external function *) + exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef. + intros (tvres & tm' & A & B & C & D). + left; simpl; econstructor; split. + + erewrite (tunnel_fundef_External tf ef); eauto. + eapply exec_function_external; eauto. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. + + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef. +- (* return *) + inv STK. inv H1. + left; econstructor; split. + eapply exec_return; eauto. + constructor; auto. +Qed. + +Lemma transf_initial_states: + forall st1, initial_state prog st1 -> + exists st2, initial_state tprog st2 /\ match_states st1 st2. +Proof. + intros. inversion H. + exploit function_ptr_translated; eauto. + intros (tf & Htf & Hf). + exists (Callstate nil tf (Locmap.init Vundef) m0); split. + econstructor; eauto. + apply (Genv.init_mem_transf_partial TRANSL); auto. + rewrite (match_program_main TRANSL). + rewrite symbols_preserved. eauto. + rewrite <- H3. apply sig_preserved. auto. + constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto. +Qed. + +Lemma transf_final_states: + forall st1 st2 r, + match_states st1 st2 -> final_state st1 r -> final_state st2 r. +Proof. + intros. inv H0. inv H. inv STK. + set (p := map_rpair R (Conventions1.loc_result signature_main)) in *. + generalize (locmap_getpair_lessdef p _ _ LS). rewrite H1; intros LD; inv LD. + econstructor; eauto. +Qed. + +Theorem transf_program_correct: + forward_simulation (LTL.semantics prog) (LTL.semantics tprog). +Proof. + eapply forward_simulation_opt. + apply senv_preserved. + eexact transf_initial_states. + eexact transf_final_states. + eexact tunnel_step_correct. +Qed. + +End PRESERVATION. diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v index 049160fd..df663f48 100644 --- a/backend/RTLTunneling.v +++ b/backend/RTLTunneling.v @@ -19,7 +19,7 @@ Require Import Coqlib Maps Errors. Require Import AST. Require Import RTL. -(* This is a port of tunneling for LTL. See Tunneling.v *) +(* This is a port of tunneling for LTL. See LTLTunneling.v *) Definition UF := PTree.t (node * Z). diff --git a/backend/Tunneling.v b/backend/Tunneling.v deleted file mode 100644 index c849ea92..00000000 --- a/backend/Tunneling.v +++ /dev/null @@ -1,167 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* Sylvain Boulmé Grenoble-INP, VERIMAG *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Branch tunneling (optimization of branches to branches). *) - -Require Import Coqlib Maps Errors. -Require Import AST. -Require Import LTL. - -(** Branch tunneling shortens sequences of branches (with no intervening - computations) by rewriting the branch and conditional branch instructions - so that they jump directly to the end of the branch sequence. - For example: -<< - L1: if (cond) nop L2; L1: nop L3; - L2: nop L3; becomes L2: nop L3; - L3: instr; L3: instr; - L4: if (cond) goto L1; L4: if (cond) nop L1; ->> - This optimization can be applied to several of our intermediate - languages. We choose to perform it on the [LTL] language, - after register allocation but before code linearization. - Register allocation can delete instructions (such as dead - computations or useless moves), therefore there are more - opportunities for tunneling after allocation than before. - Symmetrically, prior tunneling helps linearization to produce - better code, e.g. by revealing that some [branch] instructions are - dead code (as the "branch L3" in the example above). -*) - -(** The implementation consists in two passes: the first pass - records the branch t of each "nop" - and the second pass replace any "nop" node to [pc] - by a branch to a "nop" at [branch_t f pc] - -Naively, we may define [branch_t f pc] as follows: -<< - branch_t f pc = branch_t f pc' if f(pc) = nop pc' - = pc otherwise ->> - However, this definition can fail to terminate if - the program can contain loops consisting only of branches, as in -<< - L1: branch L1; ->> - or -<< - L1: nop L2; - L2: nop L1; ->> - Coq warns us of this fact by not accepting the definition - of [branch_t] above. - - To handle this problem, we use a union-find data structure, adding equalities [pc = pc'] - for every instruction [pc: nop pc'] in the function. - - Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure, - we need to iterate until we reach a fixpoint. - - Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure - in order to help the proof. - - A verifier checks that this data-structure is correct. -*) - -Definition UF := PTree.t (node * Z). - -(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) -Axiom branch_target: LTL.function -> UF. -Extract Constant branch_target => "Tunnelingaux.branch_target". - -Local Open Scope error_monad_scope. - -Definition get (td: UF) pc:node*Z := - match td!pc with - | Some (t,d) => (t,Z.abs d) - | _ => (pc,0) - end. - -Definition target (td: UF) (pc:node): node := fst (get td pc). -Coercion target: UF >-> Funclass. - -(* we check that the domain of [td] is included in the domain of [c] *) -Definition check_included (td: UF) (c: code): option bblock - := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil). - -(* we check the validity of targets and their bound: - the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents. -*) -Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit - := match td!pc with - | None => OK tt - | Some (tpc, dpc) => - let dpc := Z.abs dpc in - match bb with - | Lbranch s ::_ => - let (ts, ds) := get td s in - if peq tpc ts then - if zlt ds dpc then OK tt - else Error (msg "bad distance in Lbranch") - else Error (msg "invalid skip of Lbranch") - | Lcond _ _ s1 s2 _ :: _ => - let (ts1, ds1) := get td s1 in - let (ts2, ds2) := get td s2 in - if peq tpc ts1 then - if peq tpc ts2 then - if zlt ds1 dpc then - if zlt ds2 dpc then OK tt - else Error (msg "bad distance on else branch") - else Error (msg "bad distance on then branch") - else Error (msg "invalid skip of else branch") - else Error (msg "invalid skip of then branch") - | _ => Error (msg "cannot skip this block") - end - end. - -Definition check_code (td: UF) (c:code): res unit - := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt). - -(** The second pass rewrites all LTL instructions, replacing every - successor [s] of every instruction by [t s], the canonical representative - of its equivalence class in the union-find data structure. *) - -Definition tunnel_instr (t: node -> node) (i: instruction) : instruction := - match i with - | Lbranch s => Lbranch (t s) - | Lcond cond args s1 s2 info => - let s1' := t s1 in let s2' := t s2 in - if peq s1' s2' - then Lbranch s1' - else Lcond cond args s1' s2' info - | Ljumptable arg tbl => Ljumptable arg (List.map t tbl) - | _ => i - end. - -Definition tunnel_block (t: node -> node) (b: bblock) : bblock := - List.map (tunnel_instr t) b. - -Definition tunnel_function (f: LTL.function) : res LTL.function := - let td := branch_target f in - let c := (fn_code f) in - if check_included td c then - do _ <- check_code td c ; OK - (mkfunction - (fn_sig f) - (fn_stacksize f) - (PTree.map1 (tunnel_block td) c) - (td (fn_entrypoint f))) - else - Error (msg "Some node of the union-find is not in the CFG") - . - -Definition tunnel_fundef (f: fundef) : res fundef := - transf_partial_fundef tunnel_function f. - -Definition transf_program (p: program) : res program := - transform_partial_program tunnel_fundef p. diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml deleted file mode 100644 index d4b88a5d..00000000 --- a/backend/Tunnelingaux.ml +++ /dev/null @@ -1,108 +0,0 @@ -(* *************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Sylvain Boulmé Grenoble-INP, VERIMAG *) -(* *) -(* Copyright VERIMAG. All rights reserved. *) -(* This file is distributed under the terms of the INRIA *) -(* Non-Commercial License Agreement. *) -(* *) -(* *************************************************************) - -(* - -This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function, -and computes their target node with the distance (ie the number of cummulated nops) toward this target. - -See [Tunneling.v] - -*) - -open Coqlib -open LTL -open Maps -open Camlcoq -open Tunnelinglibs - -module LANG = struct - type code_unit = LTL.bblock - type funct = LTL.coq_function -end - -module OPT = struct - let langname = "LTL" - let limit_tunneling = None - let debug_flag = ref false - let final_dump = false -end - -module Partial = Tunnelinglibs.Tunneling(LANG)(OPT) - -module FUNS = struct - let build_simplified_cfg c acc pc bb = - match bb with - | Lbranch s :: _ -> - let ns = get_node c s in - set_branch c pc ns; - acc - | Lcond (_, _, s1, s2, _) :: _ -> - c.num_rems <- c.num_rems + 1; - let ns1 = get_node c s1 in - let ns2 = get_node c s2 in - let npc = get_node c pc in - npc.inst <- COND(ns1, ns2); - npc::acc - | _ -> acc - - let print_code_unit c println (pc, bb) = - match bb with - | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false - | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false - | _ -> Partial.debug "%d " pc; true - - let fn_code f = f.fn_code - let fn_entrypoint f = f.fn_entrypoint - - - (*************************************************************) - (* Copy-paste of the extracted code of the verifier *) - (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) - - let check_code_unit td pc bb = - match PTree.get pc td with - | Some p -> - let (tpc, dpc) = p in - let dpc0 = dpc in - (match bb with - | [] -> - raise (BugOnPC (P.to_int pc)) - | i :: _ -> - (match i with - | Lbranch s -> - let (ts, ds) = get td s in - if peq tpc ts - then if zlt ds dpc0 - then () - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - | Lcond (_, _, s1, s2, _) -> - let (ts1, ds1) = get td s1 in - let (ts2, ds2) = get td s2 in - if peq tpc ts1 - then if peq tpc ts2 - then if zlt ds1 dpc0 - then if zlt ds2 dpc0 - then () - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - | _ -> - raise (BugOnPC (P.to_int pc)))) - | None -> () -end - -module T = Partial.T(FUNS) -let branch_target = T.branch_target - diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml index 1bb35f7a..7c826ba4 100644 --- a/backend/Tunnelinglibs.ml +++ b/backend/Tunnelinglibs.ml @@ -16,7 +16,7 @@ This file implements the core functions of the tunneling passes, for both RTL and LTL, by using a simplified CFG as a transparent interface -See [Tunneling.v] and [RTLTunneling.v] +See [LTLTunneling.v] and [RTLTunneling.v] *) diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v deleted file mode 100644 index 3bc92f75..00000000 --- a/backend/Tunnelingproof.v +++ /dev/null @@ -1,666 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* Sylvain Boulmé Grenoble-INP, VERIMAG *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Correctness proof for the branch tunneling optimization. *) - -Require Import Coqlib Maps Errors. -Require Import AST Linking. -Require Import Values Memory Events Globalenvs Smallstep. -Require Import Op Locations LTL. -Require Import Tunneling. - -Local Open Scope nat. - - -(** * Properties of the branch_target, when the verifier succeeds *) - -Definition check_included_spec (c:code) (td:UF) (ok: option bblock) := - ok <> None -> forall pc, c!pc = None -> td!pc = None. - -Lemma check_included_correct (td: UF) (c: code): - check_included_spec c td (check_included td c). -Proof. - apply PTree_Properties.fold_rec with (P := check_included_spec c). -- (* extensionality *) - unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto. -- (* base case *) - intros _ pc. rewrite PTree.gempty; try congruence. -- (* inductive case *) - unfold check_included_spec. - intros m [|] pc bb NEW ATPC IND; simpl; try congruence. - intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence. - intros; eapply IND; try congruence. -Qed. - -Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop := - | TB_default (TB: target pc = pc) ob - : target_bounds target bound pc ob - | TB_branch s bb - (EQ: target pc = target s) - (DECREASE: bound s < bound pc) - : target_bounds target bound pc (Some (Lbranch s::bb)) - | TB_cond cond args s1 s2 info bb - (EQ1: target pc = target s1) - (EQ2: target pc = target s2) - (DEC1: bound s1 < bound pc) - (DEC2: bound s2 < bound pc) - : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb)) - . -Local Hint Resolve TB_default: core. - -Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc. -Proof. - unfold target, get. intros H; rewrite H; auto. -Qed. -Local Hint Resolve target_None Z.abs_nonneg: core. - -Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z. -Proof. - unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; lia || auto. -Qed. -Local Hint Resolve get_nonneg: core. - -Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)). - -Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock): - check_bblock td pc bb = OK tt -> - target_bounds (target td) (bound td) pc (Some bb). -Proof. - unfold check_bblock, bound. - destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto. - assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. } - assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. } - destruct bb as [|[ ] bb]; simpl; try congruence. - + destruct (get td s) as (ts, ds) eqn:Hs. - repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. - intros; apply TB_branch. - * rewrite Tpc. unfold target; rewrite Hs; simpl; auto. - * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto. - + destruct (get td s1) as (ts1, ds1) eqn:Hs1. - destruct (get td s2) as (ts2, ds2) eqn:Hs2. - repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. - intros; apply TB_cond. - * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto. - * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto. - * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto. - * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto. -Qed. - -Definition check_code_spec (td:UF) (c:code) (ok: res unit) := - ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb). - -Lemma check_code_correct (td:UF) c: - check_code_spec td c (check_code td c). -Proof. - apply PTree_Properties.fold_rec with (P := check_code_spec td). -- (* extensionality *) - unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite <- ! EQ; eauto. -- (* base case *) - intros _ pc. rewrite PTree.gempty; try congruence. -- (* inductive case *) - unfold check_code_spec. - intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence. - intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto. - intros X; inversion X; subst. - apply check_bblock_correct; auto. -Qed. - -Theorem branch_target_bounds: - forall f tf pc, - tunnel_function f = OK tf -> - target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc). -Proof. - unfold tunnel_function; intros f f' pc. - destruct (check_included _ _) eqn:H1; try congruence. - destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. - intros _. - destruct ((fn_code f)!pc) eqn:X. - - exploit check_code_correct; eauto. - - exploit check_included_correct; eauto. - congruence. -Qed. - -Lemma tunnel_function_unfold: - forall f tf pc, - tunnel_function f = OK tf -> - (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc. -Proof. - unfold tunnel_function; intros f f' pc. - destruct (check_included _ _) eqn:H1; try congruence. - destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. - intros X; inversion X; clear X; subst. - simpl. rewrite PTree.gmap1. auto. -Qed. - -Lemma tunnel_fundef_Internal: - forall f tf, tunnel_fundef (Internal f) = OK tf - -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'. -Proof. - intros f tf; simpl. - destruct (tunnel_function f) eqn:X; simpl; try congruence. - intros EQ; inversion EQ. - eexists; split; eauto. -Qed. - -Lemma tunnel_fundef_External: - forall tf ef, tunnel_fundef (External ef) = OK tf - -> tf = External ef. -Proof. - intros tf ef; simpl. intros H; inversion H; auto. -Qed. - -(** * Preservation of semantics *) - -Definition match_prog (p tp: program) := - match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp. - -Lemma transf_program_match: - forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog. -Proof. - intros. eapply match_transform_partial_program_contextual; eauto. -Qed. - -Section PRESERVATION. - -Variables prog tprog: program. -Hypothesis TRANSL: match_prog prog tprog. -Let ge := Genv.globalenv prog. -Let tge := Genv.globalenv tprog. - -Lemma functions_translated: - forall (v: val) (f: fundef), - Genv.find_funct ge v = Some f -> - exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf. -Proof. - intros. exploit (Genv.find_funct_match TRANSL); eauto. - intros (cu & tf & A & B & C). - repeat eexists; intuition eauto. -Qed. - -Lemma function_ptr_translated: - forall v f, - Genv.find_funct_ptr ge v = Some f -> - exists tf, - Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf. -Proof. - intros. - exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto. -Qed. - -Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s. -Proof. - rewrite <- (Genv.find_symbol_match TRANSL). reflexivity. -Qed. - -Lemma senv_preserved: - Senv.equiv ge tge. -Proof. - eapply (Genv.senv_match TRANSL). -Qed. - -Lemma sig_preserved: - forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f. -Proof. - intros. destruct f. - - simpl in H. monadInv H. unfold tunnel_function in EQ. - destruct (check_included _ _); try congruence. - monadInv EQ. simpl; auto. - - simpl in H. monadInv H. reflexivity. -Qed. - -Lemma fn_stacksize_preserved: - forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f. -Proof. - intros f tf; unfold tunnel_function. - destruct (check_included _ _); try congruence. - destruct (check_code _ _); simpl; try congruence. - intros H; inversion H; simpl; auto. -Qed. - -Lemma fn_entrypoint_preserved: - forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f). -Proof. - intros f tf; unfold tunnel_function. - destruct (check_included _ _); try congruence. - destruct (check_code _ _); simpl; try congruence. - intros H; inversion H; simpl; auto. -Qed. - - -(** The proof of semantic preservation is a simulation argument - based on diagrams of the following form: -<< - st1 --------------- st2 - | | - t| ?|t - | | - v v - st1'--------------- st2' ->> - The [match_states] predicate, defined below, captures the precondition - between states [st1] and [st2], as well as the postcondition between - [st1'] and [st2']. One transition in the source code (left) can correspond - to zero or one transition in the transformed code (right). The - "zero transition" case occurs when executing a [Lnop] instruction - in the source code that has been removed by tunneling. - - In the definition of [match_states], what changes between the original and - transformed codes is mainly the control-flow - (in particular, the current program point [pc]), but also some values - and memory states, since some [Vundef] values can become more defined - as a consequence of eliminating useless [Lcond] instructions. *) - -Definition locmap_lessdef (ls1 ls2: locset) : Prop := - forall l, Val.lessdef (ls1 l) (ls2 l). - -Inductive match_stackframes: stackframe -> stackframe -> Prop := - | match_stackframes_intro: - forall f tf sp ls0 bb tls0, - locmap_lessdef ls0 tls0 -> - tunnel_function f = OK tf -> - match_stackframes - (Stackframe f sp ls0 bb) - (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)). - -Inductive match_states: state -> state -> Prop := - | match_states_intro: - forall s f tf sp pc ls m ts tls tm - (STK: list_forall2 match_stackframes s ts) - (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm) - (TF: tunnel_function f = OK tf), - match_states (State s f sp pc ls m) - (State ts tf sp (branch_target f pc) tls tm) - | match_states_block: - forall s f tf sp bb ls m ts tls tm - (STK: list_forall2 match_stackframes s ts) - (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm) - (TF: tunnel_function f = OK tf), - match_states (Block s f sp bb ls m) - (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm) - | match_states_interm: - forall s f tf sp pc i bb ls m ts tls tm - (STK: list_forall2 match_stackframes s ts) - (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm) - (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc) - (TF: tunnel_function f = OK tf), - match_states (Block s f sp (i :: bb) ls m) - (State ts tf sp pc tls tm) - | match_states_call: - forall s f tf ls m ts tls tm - (STK: list_forall2 match_stackframes s ts) - (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm) - (TF: tunnel_fundef f = OK tf), - match_states (Callstate s f ls m) - (Callstate ts tf tls tm) - | match_states_return: - forall s ls m ts tls tm - (STK: list_forall2 match_stackframes s ts) - (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm), - match_states (Returnstate s ls m) - (Returnstate ts tls tm). - -(** Properties of [locmap_lessdef] *) - -Lemma reglist_lessdef: - forall rl ls1 ls2, - locmap_lessdef ls1 ls2 -> Val.lessdef_list (reglist ls1 rl) (reglist ls2 rl). -Proof. - induction rl; simpl; intros; auto. -Qed. - -Lemma locmap_set_lessdef: - forall ls1 ls2 v1 v2 l, - locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.set l v1 ls1) (Locmap.set l v2 ls2). -Proof. - intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l'). -- destruct l; auto using Val.load_result_lessdef. -- destruct (Loc.diff_dec l l'); auto. -Qed. - -Lemma locmap_set_undef_lessdef: - forall ls1 ls2 l, - locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.set l Vundef ls1) ls2. -Proof. - intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l'). -- destruct l; auto. destruct ty; auto. -- destruct (Loc.diff_dec l l'); auto. -Qed. - -Lemma locmap_undef_regs_lessdef: - forall rl ls1 ls2, - locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) (undef_regs rl ls2). -Proof. - induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_lessdef; auto. -Qed. - -Lemma locmap_undef_regs_lessdef_1: - forall rl ls1 ls2, - locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) ls2. -Proof. - induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. -Qed. - -Lemma locmap_getpair_lessdef: - forall p ls1 ls2, - locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2). -Proof. - intros; destruct p; simpl; auto using Val.longofwords_lessdef. -Qed. - -Lemma locmap_getpairs_lessdef: - forall pl ls1 ls2, - locmap_lessdef ls1 ls2 -> - Val.lessdef_list (map (fun p => Locmap.getpair p ls1) pl) (map (fun p => Locmap.getpair p ls2) pl). -Proof. - intros. induction pl; simpl; auto using locmap_getpair_lessdef. -Qed. - -Lemma locmap_setpair_lessdef: - forall p ls1 ls2 v1 v2, - locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setpair p v1 ls1) (Locmap.setpair p v2 ls2). -Proof. - intros; destruct p; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef. -Qed. - -Lemma locmap_setres_lessdef: - forall res ls1 ls2 v1 v2, - locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setres res v1 ls1) (Locmap.setres res v2 ls2). -Proof. - induction res; intros; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef. -Qed. - -Lemma locmap_undef_caller_save_regs_lessdef: - forall ls1 ls2, - locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_caller_save_regs ls1) (undef_caller_save_regs ls2). -Proof. - intros; red; intros. unfold undef_caller_save_regs. - destruct l. -- destruct (Conventions1.is_callee_save r); auto. -- destruct sl; auto. -Qed. - -Lemma find_function_translated: - forall ros ls tls fd, - locmap_lessdef ls tls -> - find_function ge ros ls = Some fd -> - exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd. -Proof. - intros. destruct ros; simpl in *. -- assert (E: tls (R m) = ls (R m)). - { exploit Genv.find_funct_inv; eauto. intros (b & EQ). - generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. } - rewrite E. exploit functions_translated; eauto. -- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. - exploit function_ptr_translated; eauto. - intros (tf & X1 & X2). exists tf; intuition. -Qed. - -Lemma call_regs_lessdef: - forall ls1 ls2, locmap_lessdef ls1 ls2 -> locmap_lessdef (call_regs ls1) (call_regs ls2). -Proof. - intros; red; intros. destruct l as [r | [] ofs ty]; simpl; auto. -Qed. - -Lemma return_regs_lessdef: - forall caller1 callee1 caller2 callee2, - locmap_lessdef caller1 caller2 -> - locmap_lessdef callee1 callee2 -> - locmap_lessdef (return_regs caller1 callee1) (return_regs caller2 callee2). -Proof. - intros; red; intros. destruct l; simpl. -- destruct (Conventions1.is_callee_save r); auto. -- destruct sl; auto. -Qed. - -(** To preserve non-terminating behaviours, we show that the transformed - code cannot take an infinity of "zero transition" cases. - We use the following [measure] function over source states, - which decreases strictly in the "zero transition" case. *) - -Definition measure (st: state) : nat := - match st with - | State s f sp pc ls m => (bound (branch_target f) pc) * 2 - | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1 - | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1 - | Block s f sp bb ls m => 0 - | Callstate s f ls m => 0 - | Returnstate s ls m => 0 - end. - -Lemma match_parent_locset: - forall s ts, - list_forall2 match_stackframes s ts -> - locmap_lessdef (parent_locset s) (parent_locset ts). -Proof. - induction 1; simpl. -- red; auto. -- inv H; auto. -Qed. - -Lemma tunnel_step_correct: - forall st1 t st2, step ge st1 t st2 -> - forall st1' (MS: match_states st1 st1'), - (exists st2', step tge st1' t st2' /\ match_states st2 st2') - \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat. -Proof. - induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH). - -- (* entering a block *) - exploit (branch_target_bounds f tf pc); eauto. - rewrite H. intros X; inversion X. - + (* TB_default *) - rewrite TB; left. econstructor; split. - * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto. - * econstructor; eauto. - + (* FT_branch *) - simpl; right. - rewrite EQ; repeat (econstructor; lia || eauto). - + (* FT_cond *) - simpl; right. - repeat (econstructor; lia || eauto); simpl. - destruct (peq _ _); try congruence. -- (* Lop *) - exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. - intros (tv & EV & LD). - left; simpl; econstructor; split. - eapply exec_Lop with (v := tv); eauto. - rewrite <- EV. apply eval_operation_preserved. exact symbols_preserved. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. -- (* Lload *) - exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. - intros (ta & EV & LD). - exploit Mem.loadv_extends. eauto. eauto. eexact LD. - intros (tv & LOAD & LD'). - left; simpl; econstructor; split. - eapply exec_Lload with (a := ta). - rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. - eauto. eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. -- (* Lload notrap1 *) - exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption. - left; simpl; econstructor; split. - eapply exec_Lload_notrap1. - rewrite <- H0. - apply eval_addressing_preserved. exact symbols_preserved. eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. -- (* Lload notrap2 *) - exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. - intros (ta & EV & LD). - destruct (Mem.loadv chunk tm ta) eqn:Htload. - { - left; simpl; econstructor; split. - eapply exec_Lload. - rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. - exact Htload. eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. - } - { - left; simpl; econstructor; split. - eapply exec_Lload_notrap2. - rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. - exact Htload. eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. - } -- (* Lgetstack *) - left; simpl; econstructor; split. - econstructor; eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. -- (* Lsetstack *) - left; simpl; econstructor; split. - econstructor; eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. -- (* Lstore *) - exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. - intros (ta & EV & LD). - exploit Mem.storev_extends. eauto. eauto. eexact LD. apply LS. - intros (tm' & STORE & MEM'). - left; simpl; econstructor; split. - eapply exec_Lstore with (a := ta). - rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. - eauto. eauto. - econstructor; eauto using locmap_undef_regs_lessdef. -- (* Lcall *) - left; simpl. - exploit find_function_translated; eauto. - intros (tfd & Htfd & FIND). - econstructor; split. - + eapply exec_Lcall; eauto. - erewrite sig_preserved; eauto. - + econstructor; eauto. - constructor; auto. - constructor; auto. -- (* Ltailcall *) - exploit find_function_translated. 2: eauto. - { eauto using return_regs_lessdef, match_parent_locset. } - intros (tfd & Htfd & FIND). - exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). - left; simpl; econstructor; split. - + eapply exec_Ltailcall; eauto. - * eapply sig_preserved; eauto. - * erewrite fn_stacksize_preserved; eauto. - + econstructor; eauto using return_regs_lessdef, match_parent_locset. -- (* Lbuiltin *) - exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA). - exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D). - left; simpl; econstructor; split. - eapply exec_Lbuiltin; eauto. - eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. - eapply external_call_symbols_preserved. apply senv_preserved. eauto. - econstructor; eauto using locmap_setres_lessdef, locmap_undef_regs_lessdef. -- (* Lbranch (preserved) *) - left; simpl; econstructor; split. - eapply exec_Lbranch; eauto. - fold (branch_target f pc). econstructor; eauto. -- (* Lbranch (eliminated) *) - right; split. simpl. lia. split. auto. constructor; auto. -- (* Lcond (preserved) *) - simpl; left; destruct (peq _ _) eqn: EQ. - + econstructor; split. - eapply exec_Lbranch. - destruct b. - * constructor; eauto using locmap_undef_regs_lessdef_1. - * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1. - + econstructor; split. - eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef. - destruct b; econstructor; eauto using locmap_undef_regs_lessdef. -- (* Lcond (eliminated) *) - destruct (peq _ _) eqn: EQ; try inv H1. - right; split; simpl. - + destruct b. - generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia. - generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia. - + destruct b. - -- repeat (constructor; auto). - -- rewrite e; repeat (constructor; auto). -- (* Ljumptable *) - assert (tls (R arg) = Vint n). - { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. } - left; simpl; econstructor; split. - eapply exec_Ljumptable. - eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto. - econstructor; eauto using locmap_undef_regs_lessdef. -- (* Lreturn *) - exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). - left; simpl; econstructor; split. - + eapply exec_Lreturn; eauto. - erewrite fn_stacksize_preserved; eauto. - + constructor; eauto using return_regs_lessdef, match_parent_locset. -- (* internal function *) - exploit tunnel_fundef_Internal; eauto. - intros (tf' & TF' & ITF). subst. - exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl. - intros (tm' & ALLOC & MEM'). - left; simpl. - econstructor; split. - + eapply exec_function_internal; eauto. - erewrite fn_stacksize_preserved; eauto. - + simpl. - erewrite (fn_entrypoint_preserved f tf'); auto. - econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef. -- (* external function *) - exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef. - intros (tvres & tm' & A & B & C & D). - left; simpl; econstructor; split. - + erewrite (tunnel_fundef_External tf ef); eauto. - eapply exec_function_external; eauto. - eapply external_call_symbols_preserved; eauto. apply senv_preserved. - + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef. -- (* return *) - inv STK. inv H1. - left; econstructor; split. - eapply exec_return; eauto. - constructor; auto. -Qed. - -Lemma transf_initial_states: - forall st1, initial_state prog st1 -> - exists st2, initial_state tprog st2 /\ match_states st1 st2. -Proof. - intros. inversion H. - exploit function_ptr_translated; eauto. - intros (tf & Htf & Hf). - exists (Callstate nil tf (Locmap.init Vundef) m0); split. - econstructor; eauto. - apply (Genv.init_mem_transf_partial TRANSL); auto. - rewrite (match_program_main TRANSL). - rewrite symbols_preserved. eauto. - rewrite <- H3. apply sig_preserved. auto. - constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto. -Qed. - -Lemma transf_final_states: - forall st1 st2 r, - match_states st1 st2 -> final_state st1 r -> final_state st2 r. -Proof. - intros. inv H0. inv H. inv STK. - set (p := map_rpair R (Conventions1.loc_result signature_main)) in *. - generalize (locmap_getpair_lessdef p _ _ LS). rewrite H1; intros LD; inv LD. - econstructor; eauto. -Qed. - -Theorem transf_program_correct: - forward_simulation (LTL.semantics prog) (LTL.semantics tprog). -Proof. - eapply forward_simulation_opt. - apply senv_preserved. - eexact transf_initial_states. - eexact transf_final_states. - eexact tunnel_step_correct. -Qed. - -End PRESERVATION. diff --git a/doc/index-kvx.html b/doc/index-kvx.html index 62afb423..daa4cdc4 100644 --- a/doc/index-kvx.html +++ b/doc/index-kvx.html @@ -315,10 +315,10 @@ This IR is generic over the processor, even if currently, only used for KVX. - Branch tunneling + (LTL) Branch tunneling LTL to LTL - Tunneling - Tunnelingproof + LTLTunneling + LTLTunnelingproof diff --git a/doc/index.html b/doc/index.html index c3912cb2..34b87924 100644 --- a/doc/index.html +++ b/doc/index.html @@ -270,10 +270,10 @@ code. - Branch tunneling + (LTL) Branch tunneling LTL to LTL - Tunneling - Tunnelingproof + LTLTunneling + LTLTunnelingproof diff --git a/driver/Compiler.vexpand b/driver/Compiler.vexpand index a751b232..952bed22 100644 --- a/driver/Compiler.vexpand +++ b/driver/Compiler.vexpand @@ -308,7 +308,7 @@ EXPAND_RTL_FORWARD_SIMULATIONS eapply compose_forward_simulations. eapply Allocationproof.transf_program_correct; eassumption. eapply compose_forward_simulations. - eapply Tunnelingproof.transf_program_correct; eassumption. + eapply LTLTunnelingproof.transf_program_correct; eassumption. eapply compose_forward_simulations. eapply Linearizeproof.transf_program_correct; eassumption. eapply compose_forward_simulations. diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml index 067f0e4b..e45f0617 100644 --- a/tools/compiler_expand.ml +++ b/tools/compiler_expand.ml @@ -58,7 +58,7 @@ let post_rtl_passes = PARTIAL, Always, Require, (Some "Prepass scheduling"), "RTLpathScheduler", Noprint; TOTAL, Always, Require, (Some "Projection to RTL"), "RTLpath", (Print (Printf.sprintf "RTL %d" ((Array.length rtl_passes) + 1))); PARTIAL, Always, Require, (Some "Register allocation"), "Allocation", (Print "LTL 1"); - PARTIAL, Always, Require, (Some "Branch tunneling"), "Tunneling", (Print "LTL 2"); + PARTIAL, Always, Require, (Some "LTL Branch tunneling"), "LTLTunneling", (Print "LTL 2"); PARTIAL, Always, Require, (Some "CFG linearization"), "Linearize", Noprint; TOTAL, Always, Require, (Some "Label cleanup"), "CleanupLabels", Noprint; PARTIAL, (Option "debug"), Require, (Some "Debugging info for local variables"), "Debugvar", Noprint; -- cgit From 10cbe4b28ef6dc5d02c9a5d4d369484e4943a18d Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 22 Jun 2021 15:57:21 +0200 Subject: Changed default threshold value following tests --- driver/Clflags.ml | 2 +- scheduling/InstructionScheduler.ml | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 1f31bd3e..c90fdb8c 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -115,5 +115,5 @@ let option_inline_auto_threshold = ref 0 let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 -let option_regpres_threshold = ref 5 +let option_regpres_threshold = ref 2 let main_function_name = ref "main" diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 5b4c87f4..cd924825 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -551,19 +551,13 @@ let reg_pres_scheduler (problem : problem) : solution option = | Some (t, n) -> Hashtbl.remove counts r; if n = 1 then - ((* print_string "yaaaaaaaaaaaas "; - * print_int (Camlcoq.P.to_int r); - * print_newline (); *) - Hashtbl.remove live_regs r; + (Hashtbl.remove live_regs r; available_regs.(t) <- available_regs.(t) + 1)) else let t = class_r r in match Hashtbl.find_opt live_regs r with - | None -> ((* print_string "noooooooooo "; - * print_int (Camlcoq.P.to_int r); - * print_newline (); *) - Hashtbl.add live_regs r t; + | None -> (Hashtbl.add live_regs r t; available_regs.(t) <- available_regs.(t) - 1) | Some i -> () -- cgit From dfa09586ae40c70769eeda688a0e7f59f611749f Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 24 Jun 2021 18:33:20 +0200 Subject: Another scheduler --- driver/Driver.ml | 2 +- scheduling/InstructionScheduler.ml | 201 ++++++++++++++++++++++++++++++++++++ scheduling/InstructionScheduler.mli | 2 + 3 files changed, 204 insertions(+), 1 deletion(-) diff --git a/driver/Driver.ml b/driver/Driver.ml index fa187f26..4f43d7c9 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -210,7 +210,7 @@ Processing options: -mtune= Type of CPU (for scheduling on some architectures) -fprepass Perform prepass scheduling (only on some architectures) [on] -fprepass= Perform postpass scheduling with the specified optimization [list] - (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) + (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =regpres_bis: variant of regpres, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index cd924825..99002c36 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -602,6 +602,206 @@ let reg_pres_scheduler (problem : problem) : solution option = ;; +(********************************************************************) + +let reg_pres_scheduler_bis (problem : problem) : solution option = + DebugPrint.debug_flag := true; + Printf.printf "\nNEW\n\n"; + let nr_instructions = get_nr_instructions problem in + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + (* let available_regs = Array.copy Machregsaux.nr_regs in *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + (* available_regs.(classe) + * <- available_regs.(classe) - 1; *) + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta a (r, b) = + a + (if b then + match Hashtbl.find_opt counts r with + | Some (_, 1) -> 1 + | _ -> 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + ) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let compare_pres x y = + let pdy = List.fold_left (fold_delta) 0 mentions.(y) in + let pdx = List.fold_left (fold_delta) 0 mentions.(x) in + match pdy - pdx with + | 0 -> x - y + | z -> z + in + + let module InstrSet = + Set.Make (struct + type t = int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) in + + let max_time = bound_max_time problem (* + 5*nr_instructions *) in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (* Printf.printf "ADV\n"; + * flush stdout; *) + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + + let attempt_scheduling ready usages = + let result = ref [] in + try + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then + if !result = [] || priorities.(i) = priorities.(List.hd (!result)) + then + result := i::(!result) + else raise Exit + ) ready; + if !result <> [] then raise Exit; + -1 + with + Exit -> + let mini = List.fold_left (fun a b -> + if a = -1 || compare_pres a b > 0 + then b else a + ) (-1) !result in + vector_subtract usages.(mini) current_resources; + mini + in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> ( + Printf.printf "ISSUED: %d\nREADY: " i; + InstrSet.iter (fun i -> Printf.printf "%d " i) + ready.(!current_time); + Printf.printf "\nSUCC: "; + List.iter (fun (i, l) -> Printf.printf "%d " i) + successors.(i); + Printf.printf "\n\n"; + flush stdout; + assert(times.(i) < 0); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + (List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + (* available_regs.(t) + * <- available_regs.(t) + 1 *))) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + (* available_regs.(t) + * <- available_regs.(t) - 1 *)) + | Some i -> () + ) mentions.(i)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + DebugPrint.debug_flag := false; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + DebugPrint.debug_flag := false; + None + +;; + +(********************************************************************) type bundle = int list;; @@ -1535,5 +1735,6 @@ let scheduler_by_name name = | "list" -> validated_scheduler list_scheduler | "revlist" -> validated_scheduler reverse_list_scheduler | "regpres" -> validated_scheduler reg_pres_scheduler + | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis | "greedy" -> greedy_scheduler | s -> failwith ("unknown scheduler: " ^ s);; diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index b5a5463b..48c7bc09 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -81,6 +81,8 @@ val list_scheduler : problem -> solution option (** WIP : Same as list_scheduler, but schedules instructions which decrease register pressure when it gets too high. *) val reg_pres_scheduler : problem -> solution option + +val reg_pres_scheduler_bis : problem -> solution option (** Schedule the problem using the order of instructions without any reordering *) val greedy_scheduler : problem -> solution option -- cgit From 9ac49c465f9c8969fba00e6242da0c188a6a3080 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Fri, 25 Jun 2021 09:42:41 +0200 Subject: Changed printfs into debugs --- scheduling/InstructionScheduler.ml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 99002c36..e2413bc0 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -735,14 +735,13 @@ let reg_pres_scheduler_bis (problem : problem) : solution option = problem.instruction_usages with | -1 -> advance_time() | i -> ( - Printf.printf "ISSUED: %d\nREADY: " i; - InstrSet.iter (fun i -> Printf.printf "%d " i) + DebugPrint.debug "ISSUED: %d\nREADY: " i; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready.(!current_time); - Printf.printf "\nSUCC: "; - List.iter (fun (i, l) -> Printf.printf "%d " i) + DebugPrint.debug "\nSUCC: "; + List.iter (fun (i, l) -> DebugPrint.debug "%d " i) successors.(i); - Printf.printf "\n\n"; - flush stdout; + DebugPrint.debug "\n\n"; assert(times.(i) < 0); times.(i) <- !current_time; ready.(!current_time) -- cgit From b96a48de58e1969535865b7b345514a24f7178a6 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 28 Jun 2021 16:04:44 +0200 Subject: Change temporary solution (see prev commits), and add option for it --- driver/Clflags.ml | 1 + driver/Driver.ml | 2 ++ scheduling/InstructionScheduler.ml | 21 +++++++++++++++++---- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index c90fdb8c..d01b57f0 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -116,4 +116,5 @@ let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 let option_regpres_threshold = ref 2 +let option_regpres_temp = ref false let main_function_name = ref "main" diff --git a/driver/Driver.ml b/driver/Driver.ml index 4f43d7c9..22c75f44 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -212,6 +212,7 @@ Processing options: -fprepass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =regpres_bis: variant of regpres, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure + -fregpres-temp use the temporary solution (default no) -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =ilp: ILP, =greedy: just packing bundles) @@ -426,6 +427,7 @@ let cmdline_actions = @ f_opt "redundancy" option_fredundancy @ [ Exact "-mtune", String (fun s -> option_mtune := s) ] @ f_opt "prepass" option_fprepass + @ f_opt "regpres-temp" option_regpres_temp @ f_opt "postpass" option_fpostpass @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ] @ f_opt "predict" option_fpredict diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index e2413bc0..a881df68 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -486,7 +486,7 @@ let reg_pres_scheduler (problem : problem) : solution option = (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *) let cnt = ref 0 in - + let attempt_scheduling ready usages = let result = ref (-1) in try @@ -500,7 +500,7 @@ let reg_pres_scheduler (problem : problem) : solution option = * print_int (Hashtbl.length live_regs); * print_newline (); * flush stdout; *) - if !cnt < 5 && avlregs <= regs_thresholds.(i) + if avlregs <= regs_thresholds.(i) then ( let maybe = InstrSet.sched_CSR i ready usages in (* print_string "maybe\n"; @@ -518,7 +518,19 @@ let reg_pres_scheduler (problem : problem) : solution option = then (vector_subtract usages.(maybe) current_resources; result := maybe) - else incr cnt); + else + if not !Clflags.option_regpres_temp + then + (InstrSet.iter (fun ins -> + if vector_less_equal usages.(ins) current_resources && + List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0 + then result := ins + ) ready; + if !result <> -1 then + vector_subtract usages.(!result) current_resources) + else + (incr cnt) + ); raise Exit)) available_regs; InstrSet.iter (fun i -> if vector_less_equal usages.(i) current_resources @@ -543,7 +555,8 @@ let reg_pres_scheduler (problem : problem) : solution option = * print_int i; * print_newline (); * flush stdout; *) - cnt := 0; + if !Clflags.option_regpres_temp then + cnt := 0; List.iter (fun (r,b) -> if b then (match Hashtbl.find_opt counts r with -- cgit From af97fca0f1d824f3becf9c6895f44ad234e262f8 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 6 Jul 2021 15:32:35 +0200 Subject: Add debug info --- scheduling/InstructionScheduler.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index a881df68..4fdc455c 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -502,6 +502,8 @@ let reg_pres_scheduler (problem : problem) : solution option = * flush stdout; *) if avlregs <= regs_thresholds.(i) then ( + if !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "REGPRES: high pres class %d\n" i; let maybe = InstrSet.sched_CSR i ready usages in (* print_string "maybe\n"; * print_int maybe; -- cgit From a4a0b36f56a94c19da301265a4e3acad1fbdf6c4 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 8 Jul 2021 11:20:49 +0200 Subject: Deactivate sched validator (i think) --- scheduling/RTLpathScheduler.v | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/scheduling/RTLpathScheduler.v b/scheduling/RTLpathScheduler.v index 31680256..5a81dd28 100644 --- a/scheduling/RTLpathScheduler.v +++ b/scheduling/RTLpathScheduler.v @@ -158,7 +158,7 @@ Definition verified_scheduler (f: RTLpath.function) : res (RTLpath.function * (P let (tc, te) := tcte in let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in do tf <- proj1_sig (function_builder tfr tpm); - do tt <- function_equiv_checker dm f tf; + (* do tt <- function_equiv_checker dm f tf; *) OK (tf, dm). Theorem verified_scheduler_correct f tf dm: @@ -172,15 +172,16 @@ Theorem verified_scheduler_correct f tf dm: /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2) . Proof. - intros VERIF. unfold verified_scheduler in VERIF. explore. - Local Hint Resolve function_equiv_checker_entrypoint - function_equiv_checker_pathentry1 function_equiv_checker_pathentry2 - function_equiv_checker_correct: core. - destruct (function_builder _ _) as [res H]; simpl in * |- *; auto. - apply H in EQ2. rewrite EQ2. simpl. - repeat (constructor; eauto). - exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition. -Qed. + Admitted. +(* intros VERIF. unfold verified_scheduler in VERIF. explore. *) +(* Local Hint Resolve function_equiv_checker_entrypoint *) +(* function_equiv_checker_pathentry1 function_equiv_checker_pathentry2 *) +(* function_equiv_checker_correct: core. *) +(* destruct (function_builder _ _) as [res H]; simpl in * |- *; auto. *) +(* apply H in EQ2. rewrite EQ2. simpl. *) +(* repeat (constructor; eauto). *) +(* exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition. *) +(* Qed. *) Record match_function (dupmap: PTree.t node) (f1 f2: RTLpath.function): Prop := { preserv_fnsig: fn_sig f1 = fn_sig f2; -- cgit From f86f5df47b69053702661671340b0fcb31506aa3 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 8 Jul 2021 11:22:17 +0200 Subject: add more debug info --- scheduling/InstructionScheduler.ml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 4fdc455c..f823ccca 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -350,7 +350,12 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; (* A scheduler sensitive to register pressure *) let reg_pres_scheduler (problem : problem) : solution option = DebugPrint.debug_flag := true; + let nr_instructions = get_nr_instructions problem in + + if !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "SCHEDULING_SUPERBLOCK %d\n" nr_instructions; + let successors = get_successors problem and predecessors = get_predecessors problem and times = Array.make (nr_instructions+1) (-1) in @@ -381,6 +386,7 @@ let reg_pres_scheduler (problem : problem) : solution option = Hashtbl.add live_regs r classe) (Registers.Regset.elements live_regs_entry); + let csr_b = ref false in let counts, mentions = match problem.reference_counting with @@ -502,8 +508,7 @@ let reg_pres_scheduler (problem : problem) : solution option = * flush stdout; *) if avlregs <= regs_thresholds.(i) then ( - if !Clflags.option_debug_compcert > 6 then - DebugPrint.debug "REGPRES: high pres class %d\n" i; + csr_b := true; let maybe = InstrSet.sched_CSR i ready usages in (* print_string "maybe\n"; * print_int maybe; @@ -557,6 +562,9 @@ let reg_pres_scheduler (problem : problem) : solution option = * print_int i; * print_newline (); * flush stdout; *) + if !csr_b && !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "REGPRES: high pres class %d\n" i; + csr_b := false; if !Clflags.option_regpres_temp then cnt := 0; List.iter (fun (r,b) -> -- cgit From 70f5867e441e253869cb3b432af77636a186d1cb Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 12:26:27 +0200 Subject: rm TODO --- aarch64/Machregsaux.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml index 15fb08ca..98e461eb 100644 --- a/aarch64/Machregsaux.ml +++ b/aarch64/Machregsaux.ml @@ -21,5 +21,4 @@ let class_of_type = function | AST.Tany32 | AST.Tany64 -> assert false (* number of available registers per class *) -(* TODO: add this to all archs *) let nr_regs = [| 29; 32 |] -- cgit From d6a846b641787ea6a5ed113b1d7275ffb5028d9c Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 12:54:19 +0200 Subject: rm "Admitted" --- scheduling/RTLpathScheduler.v | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/scheduling/RTLpathScheduler.v b/scheduling/RTLpathScheduler.v index 5a81dd28..31680256 100644 --- a/scheduling/RTLpathScheduler.v +++ b/scheduling/RTLpathScheduler.v @@ -158,7 +158,7 @@ Definition verified_scheduler (f: RTLpath.function) : res (RTLpath.function * (P let (tc, te) := tcte in let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in do tf <- proj1_sig (function_builder tfr tpm); - (* do tt <- function_equiv_checker dm f tf; *) + do tt <- function_equiv_checker dm f tf; OK (tf, dm). Theorem verified_scheduler_correct f tf dm: @@ -172,16 +172,15 @@ Theorem verified_scheduler_correct f tf dm: /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2) . Proof. - Admitted. -(* intros VERIF. unfold verified_scheduler in VERIF. explore. *) -(* Local Hint Resolve function_equiv_checker_entrypoint *) -(* function_equiv_checker_pathentry1 function_equiv_checker_pathentry2 *) -(* function_equiv_checker_correct: core. *) -(* destruct (function_builder _ _) as [res H]; simpl in * |- *; auto. *) -(* apply H in EQ2. rewrite EQ2. simpl. *) -(* repeat (constructor; eauto). *) -(* exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition. *) -(* Qed. *) + intros VERIF. unfold verified_scheduler in VERIF. explore. + Local Hint Resolve function_equiv_checker_entrypoint + function_equiv_checker_pathentry1 function_equiv_checker_pathentry2 + function_equiv_checker_correct: core. + destruct (function_builder _ _) as [res H]; simpl in * |- *; auto. + apply H in EQ2. rewrite EQ2. simpl. + repeat (constructor; eauto). + exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition. +Qed. Record match_function (dupmap: PTree.t node) (f1 f2: RTLpath.function): Prop := { preserv_fnsig: fn_sig f1 = fn_sig f2; -- cgit From 169a221104c37737f12abe79711009fc0d88ce09 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 13:00:56 +0200 Subject: rm useless code --- aarch64/PrepassSchedulingOracle.ml | 48 -------------------------------------- 1 file changed, 48 deletions(-) diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index fe757c99..53a81095 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -201,54 +201,6 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset. end seqa; !latency_constraints;; - -(** useless *) -let get_pressure_deltas (seqa : (instruction * Regset.t) array) - (typing : RTLtyping.regenv) - : int array array = - let nr_types_regs = Array.length Machregsaux.nr_regs in - let ret = Array.init (Array.length seqa) (fun i -> - Array.make nr_types_regs 0) in - Array.iteri (fun i (instr, liveins) -> match instr with - | Iop (_, args, dest, _) | Iload (_, _, _, args, dest, _) -> - ret.(i).(Machregsaux.class_of_type (typing dest)) <- - if List.mem dest args then 0 - else 1 - | Istore (_, _, _, src, _) -> - ret.(i).(Machregsaux.class_of_type (typing src)) <- - -1 - | Icall (_, fn, args, dest, _) -> - ret.(i).(Machregsaux.class_of_type (typing dest)) <- - if List.mem dest - (match fn with - | Datatypes.Coq_inl reg -> reg::args - | _ -> args) - then 0 else 1 - | Ibuiltin (_, args, dest, _) -> - let rec arg_l list = function - | AST.BA r -> r::list - | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> - arg_l (arg_l list lo) hi - | _ -> list - in - let l = (List.fold_left arg_l [] args) in - let rec dest_l = function - | AST.BR r -> - let t = Machregsaux.class_of_type (typing r) in - ret.(i).(t) <- - (if List.mem r l - then 0 else 1) + ret.(i).(t) - | AST.BR_splitlong (hi, lo) -> - dest_l hi; - dest_l lo - | _ -> () - in - dest_l dest - | _ -> () - ) seqa; - ret - - let resources_of_instruction (opweights : opweights) = function | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds | Iop(op, inputs, output, _) -> -- cgit From 43d4932e8ba9e00eb8c8788c86f56b6bddd46392 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 13:28:26 +0200 Subject: setup registers --- kvx/Machregsaux.ml | 2 ++ kvx/Machregsaux.mli | 3 +++ 2 files changed, 5 insertions(+) diff --git a/kvx/Machregsaux.ml b/kvx/Machregsaux.ml index e3b18181..dbb89727 100644 --- a/kvx/Machregsaux.ml +++ b/kvx/Machregsaux.ml @@ -31,3 +31,5 @@ let class_of_type = function | AST.Tint | AST.Tlong | AST.Tfloat | AST.Tsingle -> 0 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 59 |] diff --git a/kvx/Machregsaux.mli b/kvx/Machregsaux.mli index 01b0f9fd..23ac1c9a 100644 --- a/kvx/Machregsaux.mli +++ b/kvx/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array -- cgit From 6121be54b80a55fdadd8b64dfad53357148c9090 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 14:13:50 +0200 Subject: fix for KVX --- kvx/PostpassSchedulingOracle.ml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml index 2107ce22..5ebad421 100644 --- a/kvx/PostpassSchedulingOracle.ml +++ b/kvx/PostpassSchedulingOracle.ml @@ -787,8 +787,14 @@ let latency_constraints bb = *) let build_problem bb = - { max_latency = -1; resource_bounds = resource_bounds; - instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb } +{ max_latency = -1; + resource_bounds = resource_bounds; + instruction_usages = instruction_usages bb; + latency_constraints = latency_constraints bb; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None +} let rec find_min_opt (l: int option list) = match l with -- cgit From 67f4ae2b702cc95ed7cef67b726e15abbf18e768 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 15:26:03 +0200 Subject: use a more recognizable option name --- arm/ExpansionOracle.ml | 18 +- arm/PrepassSchedulingOracle.ml | 7 +- driver/Clflags.ml | 2 +- driver/Driver.ml | 4 +- kvx/ExpansionOracle.ml | 18 +- kvx/PrepassSchedulingOracle.ml | 486 ++++++++++++++++++++++++++++++++++- kvx/PrepassSchedulingOracleDeps.ml | 18 +- powerpc/ExpansionOracle.ml | 18 +- powerpc/PrepassSchedulingOracle.ml | 7 +- riscV/PrepassSchedulingOracle.ml | 486 ++++++++++++++++++++++++++++++++++- riscV/PrepassSchedulingOracleDeps.ml | 18 +- scheduling/InstructionScheduler.ml | 4 +- x86/ExpansionOracle.ml | 18 +- 13 files changed, 1089 insertions(+), 15 deletions(-) mode change 120000 => 100644 arm/ExpansionOracle.ml mode change 120000 => 100644 arm/PrepassSchedulingOracle.ml mode change 120000 => 100644 kvx/ExpansionOracle.ml mode change 120000 => 100644 kvx/PrepassSchedulingOracle.ml mode change 120000 => 100644 kvx/PrepassSchedulingOracleDeps.ml mode change 120000 => 100644 powerpc/ExpansionOracle.ml mode change 120000 => 100644 powerpc/PrepassSchedulingOracle.ml mode change 120000 => 100644 riscV/PrepassSchedulingOracle.ml mode change 120000 => 100644 riscV/PrepassSchedulingOracleDeps.ml mode change 120000 => 100644 x86/ExpansionOracle.ml diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/arm/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/arm/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml deleted file mode 120000 index 9885fd52..00000000 --- a/arm/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../x86/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..42a3da23 --- /dev/null +++ b/arm/PrepassSchedulingOracle.ml @@ -0,0 +1,6 @@ +open RTL +open Registers + +(* Do not do anything *) +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None diff --git a/driver/Clflags.ml b/driver/Clflags.ml index d01b57f0..085eaa7e 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -116,5 +116,5 @@ let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 let option_regpres_threshold = ref 2 -let option_regpres_temp = ref false +let option_regpres_wait_window = ref false let main_function_name = ref "main" diff --git a/driver/Driver.ml b/driver/Driver.ml index 22c75f44..79353f32 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -212,7 +212,7 @@ Processing options: -fprepass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =regpres_bis: variant of regpres, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure - -fregpres-temp use the temporary solution (default no) + -fregpres-wait-window When register pressure is high, use a 5-cycle waiting window instead of scheduling short paths first (default no) -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =ilp: ILP, =greedy: just packing bundles) @@ -427,7 +427,7 @@ let cmdline_actions = @ f_opt "redundancy" option_fredundancy @ [ Exact "-mtune", String (fun s -> option_mtune := s) ] @ f_opt "prepass" option_fprepass - @ f_opt "regpres-temp" option_regpres_temp + @ f_opt "regpres-wait-window" option_regpres_wait_window @ f_opt "postpass" option_fpostpass @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ] @ f_opt "predict" option_fpredict diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/kvx/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/kvx/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml deleted file mode 120000 index 912e9ffa..00000000 --- a/kvx/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..53a81095 --- /dev/null +++ b/kvx/PrepassSchedulingOracle.ml @@ -0,0 +1,485 @@ +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights live_regs_entry + typing reference seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml deleted file mode 120000 index 1e955b85..00000000 --- a/kvx/PrepassSchedulingOracleDeps.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracleDeps.ml \ No newline at end of file diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/kvx/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/powerpc/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/powerpc/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml deleted file mode 120000 index 9885fd52..00000000 --- a/powerpc/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../x86/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..42a3da23 --- /dev/null +++ b/powerpc/PrepassSchedulingOracle.ml @@ -0,0 +1,6 @@ +open RTL +open Registers + +(* Do not do anything *) +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml deleted file mode 120000 index 912e9ffa..00000000 --- a/riscV/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..53a81095 --- /dev/null +++ b/riscV/PrepassSchedulingOracle.ml @@ -0,0 +1,485 @@ +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights live_regs_entry + typing reference seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml deleted file mode 120000 index 1e955b85..00000000 --- a/riscV/PrepassSchedulingOracleDeps.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracleDeps.ml \ No newline at end of file diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/riscV/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index f823ccca..0203d9c8 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -526,7 +526,7 @@ let reg_pres_scheduler (problem : problem) : solution option = (vector_subtract usages.(maybe) current_resources; result := maybe) else - if not !Clflags.option_regpres_temp + if not !Clflags.option_regpres_wait_window then (InstrSet.iter (fun ins -> if vector_less_equal usages.(ins) current_resources && @@ -565,7 +565,7 @@ let reg_pres_scheduler (problem : problem) : solution option = if !csr_b && !Clflags.option_debug_compcert > 6 then DebugPrint.debug "REGPRES: high pres class %d\n" i; csr_b := false; - if !Clflags.option_regpres_temp then + if !Clflags.option_regpres_wait_window then cnt := 0; List.iter (fun (r,b) -> if b then diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/x86/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/x86/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () -- cgit From 2ff766a18432fd75739abab0b5741ded6b67a2a5 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 15:29:25 +0200 Subject: activate register pressure by default --- driver/Clflags.ml | 2 +- driver/Driver.ml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 085eaa7e..25bd2c78 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi let option_mtune = ref "" let option_fprepass = ref true -let option_fprepass_sched = ref "list" +let option_fprepass_sched = ref "regpres" let option_fpostpass = ref true let option_fpostpass_sched = ref "list" diff --git a/driver/Driver.ml b/driver/Driver.ml index 79353f32..3f5a4bd9 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -298,9 +298,9 @@ let num_input_files = ref 0 let cmdline_actions = let f_opt name ref = [Exact("-f" ^ name), Set ref; Exact("-fno-" ^ name), Unset ref] in - let f_opt_str name ref strref = + let f_opt_str name default ref strref = [Exact("-f" ^ name ^ "="), String - (fun s -> (strref := (if s == "" then "list" else s)); ref := true) + (fun s -> (strref := (if s == "" then default else s)); ref := true) ] in let f_str name strref default = [Exact("-f" ^ name ^ "="), String @@ -435,8 +435,8 @@ let cmdline_actions = @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ] @ f_opt "tracelinearize" option_ftracelinearize - @ f_opt_str "prepass" option_fprepass option_fprepass_sched - @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched + @ f_opt_str "prepass" "regpress" option_fprepass option_fprepass_sched + @ f_opt_str "postpass" "list" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline @ f_opt "inline-functions-called-once" option_finline_functions_called_once @ f_opt "globaladdrtmp" option_fglobaladdrtmp -- cgit From cf033ec29391d5358dea1d3b25da1738957478c4 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 18:01:03 +0200 Subject: comment for authors --- aarch64/PrepassSchedulingOracle.ml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 53a81095..e09eea13 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -1,3 +1,16 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* Nicolas Nardino ENS-Lyon, VERIMAG *) +(* *) +(* *) +(* *************************************************************) + open AST open RTL open Maps -- cgit From cb6103e684ff89a0493d6b2e3af2317df04fede3 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 19 Jul 2021 13:23:40 +0200 Subject: debugprint flag not to true --- scheduling/InstructionScheduler.ml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 0203d9c8..0eadd5a4 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -349,8 +349,6 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; (* A scheduler sensitive to register pressure *) let reg_pres_scheduler (problem : problem) : solution option = - DebugPrint.debug_flag := true; - let nr_instructions = get_nr_instructions problem in if !Clflags.option_debug_compcert > 6 then @@ -615,11 +613,9 @@ let reg_pres_scheduler (problem : problem) : solution option = if target_time > !final_time then final_time := target_time) predecessors.(nr_instructions); times.(nr_instructions) <- !final_time; - DebugPrint.debug_flag := false; Some times with Exit -> DebugPrint.debug "reg_pres_sched failed\n"; - DebugPrint.debug_flag := false; None ;; @@ -628,7 +624,6 @@ let reg_pres_scheduler (problem : problem) : solution option = (********************************************************************) let reg_pres_scheduler_bis (problem : problem) : solution option = - DebugPrint.debug_flag := true; Printf.printf "\nNEW\n\n"; let nr_instructions = get_nr_instructions problem in let successors = get_successors problem @@ -814,11 +809,9 @@ let reg_pres_scheduler_bis (problem : problem) : solution option = if target_time > !final_time then final_time := target_time) predecessors.(nr_instructions); times.(nr_instructions) <- !final_time; - DebugPrint.debug_flag := false; Some times with Exit -> DebugPrint.debug "reg_pres_sched failed\n"; - DebugPrint.debug_flag := false; None ;; -- cgit From ad72ffe9c2d7839cfe85926264565d59daf87a3c Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 19 Jul 2021 13:24:46 +0200 Subject: disable printing debug info --- scheduling/InstructionScheduler.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 0eadd5a4..5bd969ca 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -624,7 +624,7 @@ let reg_pres_scheduler (problem : problem) : solution option = (********************************************************************) let reg_pres_scheduler_bis (problem : problem) : solution option = - Printf.printf "\nNEW\n\n"; + (* Printf.printf "\nNEW\n\n"; *) let nr_instructions = get_nr_instructions problem in let successors = get_successors problem and predecessors = get_predecessors problem -- cgit From 650c134f915d0b2e799c76c87ffd2ed09c782dcc Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 19 Jul 2021 13:26:41 +0200 Subject: use regpres_bis not regpres --- driver/Clflags.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 085eaa7e..134c222f 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi let option_mtune = ref "" let option_fprepass = ref true -let option_fprepass_sched = ref "list" +let option_fprepass_sched = ref "regpres_bis" let option_fpostpass = ref true let option_fpostpass_sched = ref "list" -- cgit From a86c0c659666a9a154ae4d27fee1c4cad3dd26b7 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 19 Jul 2021 15:57:57 +0200 Subject: start fixing --- scheduling/InstructionScheduler.ml | 90 ++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 52 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 0203d9c8..3e82336b 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -353,8 +353,8 @@ let reg_pres_scheduler (problem : problem) : solution option = let nr_instructions = get_nr_instructions problem in - if !Clflags.option_debug_compcert > 6 then - DebugPrint.debug "SCHEDULING_SUPERBLOCK %d\n" nr_instructions; + (* if !Clflags.option_debug_compcert > 6 then *) + DebugPrint.debug "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions; let successors = get_successors problem and predecessors = get_predecessors problem @@ -495,32 +495,22 @@ let reg_pres_scheduler (problem : problem) : solution option = let attempt_scheduling ready usages = let result = ref (-1) in + DebugPrint.debug "\n\nREADY: "; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready; + DebugPrint.debug "\n\n"; try Array.iteri (fun i avlregs -> - (* print_string "avlregs: "; - * print_int i; - * print_string " "; - * print_int avlregs; - * print_newline (); - * print_string "live regs: "; - * print_int (Hashtbl.length live_regs); - * print_newline (); - * flush stdout; *) - if avlregs <= regs_thresholds.(i) + DebugPrint.debug "avlregs: %d %d\nlive regs: %d\n" + i avlregs (Hashtbl.length live_regs); + if !cnt < 5 && avlregs <= regs_thresholds.(i) then ( csr_b := true; let maybe = InstrSet.sched_CSR i ready usages in - (* print_string "maybe\n"; - * print_int maybe; - * print_newline (); - * flush stdout; *) - (if maybe > 0 && + DebugPrint.debug "maybe %d\n" maybe; + (if maybe >= 0 && let delta = List.fold_left (fold_delta i) 0 mentions.(maybe) in - (* print_string "delta "; - * print_int delta; - * print_newline (); - * flush stdout; *) + DebugPrint.debug "delta %d\n" delta; delta > 0 then (vector_subtract usages.(maybe) current_resources; @@ -558,33 +548,30 @@ let reg_pres_scheduler (problem : problem) : solution option = problem.instruction_usages with | -1 -> advance_time() | i -> (assert(times.(i) < 0); - ((* print_string "INSTR ISSUED: "; - * print_int i; - * print_newline (); - * flush stdout; *) - if !csr_b && !Clflags.option_debug_compcert > 6 then - DebugPrint.debug "REGPRES: high pres class %d\n" i; - csr_b := false; - if !Clflags.option_regpres_wait_window then - cnt := 0; - List.iter (fun (r,b) -> - if b then - (match Hashtbl.find_opt counts r with - | None -> assert false - | Some (t, n) -> - Hashtbl.remove counts r; - if n = 1 then - (Hashtbl.remove live_regs r; - available_regs.(t) - <- available_regs.(t) + 1)) - else - let t = class_r r in - match Hashtbl.find_opt live_regs r with - | None -> (Hashtbl.add live_regs r t; - available_regs.(t) - <- available_regs.(t) - 1) - | Some i -> () - ) mentions.(i)); + (DebugPrint.debug "INSTR ISSUED: %d\n" i; + if !csr_b && !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "REGPRES: high pres class %d\n" i; + csr_b := false; + (* if !Clflags.option_regpres_wait_window then *) + cnt := 0; + List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + available_regs.(t) + <- available_regs.(t) + 1)) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(i)); times.(i) <- !current_time; ready.(!current_time) <- InstrSet.remove i (ready.(!current_time)); @@ -595,8 +582,8 @@ let reg_pres_scheduler (problem : problem) : solution option = | to_time -> ((* DebugPrint.debug "TO TIME %d : %d\n" to_time * (Array.length ready); *) - ready.(to_time) - <- InstrSet.add instr_to ready.(to_time)) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) ) successors.(i); successors.(i) <- [] ) @@ -605,8 +592,7 @@ let reg_pres_scheduler (problem : problem) : solution option = try let final_time = ref (-1) in for i = 0 to nr_instructions - 1 do - (* print_int i; - * flush stdout; *) + DebugPrint.debug "%d " i; (if times.(i) < 0 then raise Exit); (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) done; -- cgit From ee2112a9bacc246e0434a19fc93aab335fd56ddd Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 19 Jul 2021 18:09:27 +0200 Subject: Fix scheduling fails (forgot a case) --- scheduling/InstructionScheduler.ml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 3e82336b..9c5c674d 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -349,13 +349,14 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; (* A scheduler sensitive to register pressure *) let reg_pres_scheduler (problem : problem) : solution option = - DebugPrint.debug_flag := true; + (* DebugPrint.debug_flag := true; *) let nr_instructions = get_nr_instructions problem in - (* if !Clflags.option_debug_compcert > 6 then *) - DebugPrint.debug "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions; - + if !Clflags.option_debug_compcert > 6 then + (Printf.eprintf "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions; + flush stderr); + let successors = get_successors problem and predecessors = get_predecessors problem and times = Array.make (nr_instructions+1) (-1) in @@ -524,7 +525,8 @@ let reg_pres_scheduler (problem : problem) : solution option = then result := ins ) ready; if !result <> -1 then - vector_subtract usages.(!result) current_resources) + vector_subtract usages.(!result) current_resources; + incr cnt) else (incr cnt) ); @@ -550,7 +552,8 @@ let reg_pres_scheduler (problem : problem) : solution option = | i -> (assert(times.(i) < 0); (DebugPrint.debug "INSTR ISSUED: %d\n" i; if !csr_b && !Clflags.option_debug_compcert > 6 then - DebugPrint.debug "REGPRES: high pres class %d\n" i; + (Printf.eprintf "REGPRES: high pres class %d\n" i; + flush stderr); csr_b := false; (* if !Clflags.option_regpres_wait_window then *) cnt := 0; @@ -601,11 +604,11 @@ let reg_pres_scheduler (problem : problem) : solution option = if target_time > !final_time then final_time := target_time) predecessors.(nr_instructions); times.(nr_instructions) <- !final_time; - DebugPrint.debug_flag := false; + (* DebugPrint.debug_flag := false; *) Some times with Exit -> DebugPrint.debug "reg_pres_sched failed\n"; - DebugPrint.debug_flag := false; + (* DebugPrint.debug_flag := false; *) None ;; -- cgit From 1fbe45e2d1f02ef6e8fb6fe7545728a744e047b8 Mon Sep 17 00:00:00 2001 From: Sylvain Boulmé Date: Sat, 24 Jul 2021 11:49:14 +0200 Subject: remove default_notrap_load_value --- backend/RTLTunnelingproof.v | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/RTLTunnelingproof.v b/backend/RTLTunnelingproof.v index 14a2c037..51ef0f7a 100644 --- a/backend/RTLTunnelingproof.v +++ b/backend/RTLTunnelingproof.v @@ -531,7 +531,7 @@ Proof. + eapply exec_Iload_notrap1. * exploit instruction_type_preserved; (simpl; eauto)||congruence. * rewrite <- H1. apply eval_addressing_preserved. apply symbols_preserved. - + apply match_states_intro; try assumption. apply set_reg_lessdef. unfold default_notrap_load_value. apply Val.lessdef_undef. apply RS. + + apply match_states_intro; try assumption. apply set_reg_lessdef. apply Val.lessdef_undef. apply RS. - (* Iload NOTRAP2 *) exploit eval_addressing_lessdef; try eassumption. apply reglist_lessdef; apply RS. -- cgit