From 7ae1fb0faea68ce5cfe04a232e49659247c244e9 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Fri, 4 Jun 2021 14:24:07 +0200 Subject: Passing info of live regs to scheduler: beginning --- aarch64/PostpassSchedulingOracle.ml | 1 + aarch64/PrepassSchedulingOracle.ml | 4 +++- scheduling/InstructionScheduler.ml | 2 ++ scheduling/InstructionScheduler.mli | 3 +++ scheduling/RTLpathScheduleraux.ml | 9 ++++++++- 5 files changed, 17 insertions(+), 2 deletions(-) diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index cde3e7a7..a9737088 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -507,6 +507,7 @@ let build_problem bb = { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 2c3eb14f..1fd12a6a 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -410,6 +410,7 @@ let define_problem (opweights : opweights) seqa = let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = Regset.empty; (* PLACEHOLDER *) instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) @@ -439,7 +440,8 @@ let prepass_scheduler_by_name name problem early_ones = | "zigzag" -> zigzag_scheduler problem early_ones | _ -> scheduler_by_name name problem -let schedule_sequence (seqa : (instruction*Regset.t) array) = +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t)= let opweights = OpWeights.get_opweights () in try if (Array.length seqa) <= 1 diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index eab0b21a..976037bd 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -33,6 +33,7 @@ type latency_constraint = { type problem = { max_latency : int; resource_bounds : int array; + live_regs_entry : Registers.Regset.t; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -438,6 +439,7 @@ let reverse_problem problem = { max_latency = problem.max_latency; resource_bounds = problem.resource_bounds; + live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index fb7af3f6..f53dc0ef 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -23,6 +23,9 @@ type problem = { resource_bounds : int array; (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *) + live_regs_entry : Registers.Regset.t; + (** The set of live pseudo-registers at entry. *) + instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index aeed39df..55f1a078 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -72,6 +72,11 @@ let get_superblocks code entry pm typing = lsb end +(* PLACEHOLDER *) +let get_live_regs_entry (sb : superblock) = + Registers.Regset.empty + + (* TODO David *) let schedule_superblock sb code = if not !Clflags.option_fprepass @@ -90,6 +95,7 @@ let schedule_superblock sb code = match predicted_successor ii with | Some _ -> 0 | None -> 1 in + let live_regs_entry = get_live_regs_entry sb in match PrepassSchedulingOracle.schedule_sequence (Array.map (fun i -> (match PTree.get i code with @@ -98,7 +104,8 @@ let schedule_superblock sb code = (match PTree.get i sb.liveins with | Some s -> s | None -> Regset.empty)) - (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with + (Array.sub sb.instructions 0 (nr_instr-trailer_length))) + live_regs_entry with | None -> sb.instructions | Some order -> let ins' = -- cgit From 98a7a04258f2cf6caf9f18925cbeeae2f5b17be4 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Fri, 4 Jun 2021 16:56:32 +0200 Subject: computing live regs at sb entry from its live output regs --- aarch64/PrepassSchedulingOracle.ml | 6 +++--- scheduling/RTLpathScheduleraux.ml | 24 +++++++++++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 1fd12a6a..a743fb68 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -406,11 +406,11 @@ let get_alias_dependencies seqa = !deps;; *) -let define_problem (opweights : opweights) seqa = +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) seqa = let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; - live_regs_entry = Regset.empty; (* PLACEHOLDER *) + live_regs_entry = live_entry_regs; instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) @@ -451,7 +451,7 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) let nr_instructions = Array.length seqa in (if !Clflags.option_debug_compcert > 6 then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); - let problem = define_problem opweights seqa in + let problem = define_problem opweights live_regs_entry seqa in (if !Clflags.option_debug_compcert > 7 then (print_sequence stdout (Array.map fst seqa); print_problem stdout problem)); diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 55f1a078..30da5d5d 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -73,8 +73,26 @@ let get_superblocks code entry pm typing = end (* PLACEHOLDER *) -let get_live_regs_entry (sb : superblock) = - Registers.Regset.empty +let get_live_regs_entry (sb : superblock) code = + let seqa = Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.get_live_regs_entry" + ), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + sb.instructions in + Array.fold_right (fun (ins, liveins) regset -> + match ins with + | Inop l -> regset + | Iop (op, args, dest, succ) -> + Registers.Regset.add dest + (List.fold_left (fun set reg -> + Registers.Regset.remove reg set) + regset args) + | _ -> regset (* PLACEHOLDER *) + ) seqa Registers.Regset.empty (* TODO David *) @@ -95,7 +113,7 @@ let schedule_superblock sb code = match predicted_successor ii with | Some _ -> 0 | None -> 1 in - let live_regs_entry = get_live_regs_entry sb in + let live_regs_entry = get_live_regs_entry sb code in match PrepassSchedulingOracle.schedule_sequence (Array.map (fun i -> (match PTree.get i code with -- cgit From 599823a6410f1629f2b8704291839e0974bce83b Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Sat, 5 Jun 2021 19:52:59 +0200 Subject: function written, now needs testing --- scheduling/RTLpathScheduleraux.ml | 50 +++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 30da5d5d..5a427e6c 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -72,7 +72,7 @@ let get_superblocks code entry pm typing = lsb end -(* PLACEHOLDER *) + let get_live_regs_entry (sb : superblock) code = let seqa = Array.map (fun i -> (match PTree.get i code with @@ -85,16 +85,46 @@ let get_live_regs_entry (sb : superblock) code = sb.instructions in Array.fold_right (fun (ins, liveins) regset -> match ins with - | Inop l -> regset - | Iop (op, args, dest, succ) -> - Registers.Regset.add dest - (List.fold_left (fun set reg -> - Registers.Regset.remove reg set) - regset args) - | _ -> regset (* PLACEHOLDER *) - ) seqa Registers.Regset.empty - + | Inop _ -> regset + | Iop (_, args, dest, _) + | Iload (_, _, _, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.remove dest regset) args + | Istore (_, _, args, src, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.add src regset) args + | Icall (_, fn, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + ((match fn with + | Coq_inl reg -> (Registers.Regset.add reg) + | Coq_inr _ -> (fun x -> x)) + (Registers.Regset.remove dest regset)) + args + | Itailcall (_, fn, args) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (match fn with + | Coq_inl reg -> (Registers.Regset.add reg regset) + | Coq_inr _ -> regset) + args + | Ibuiltin (_, args, dest, _) -> + List.fold_left (fun set reg -> + match reg with + | AST.BA r -> Registers.Regset.add r set + | _ -> set) + (match dest with + | AST.BR r -> Registers.Regset.remove r regset) + args + | Icond (_, args, _, _, _) -> + List.fold_left (fun set reg -> + Registers.Regset.add reg set) + regset args + | Ijumptable (reg, _) + | Ireturn (Some reg) -> + Registers.Regset.add reg regset + | _ -> regset + ) seqa sb.s_output_regs + (* TODO David *) let schedule_superblock sb code = if not !Clflags.option_fprepass -- cgit From 9118878bd14e24cc04c2f36cab7aa7271a0f1852 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Sun, 6 Jun 2021 12:11:15 +0200 Subject: Fixing scope error, and non-exhaustive pattern matching --- scheduling/RTLpathScheduleraux.ml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 5a427e6c..653765f5 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -96,15 +96,15 @@ let get_live_regs_entry (sb : superblock) code = | Icall (_, fn, args, dest, _) -> List.fold_left (fun set reg -> Registers.Regset.add reg set) ((match fn with - | Coq_inl reg -> (Registers.Regset.add reg) - | Coq_inr _ -> (fun x -> x)) + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg) + | Datatypes.Coq_inr _ -> (fun x -> x)) (Registers.Regset.remove dest regset)) args | Itailcall (_, fn, args) -> List.fold_left (fun set reg -> Registers.Regset.add reg set) (match fn with - | Coq_inl reg -> (Registers.Regset.add reg regset) - | Coq_inr _ -> regset) + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset) + | Datatypes.Coq_inr _ -> regset) args | Ibuiltin (_, args, dest, _) -> List.fold_left (fun set reg -> @@ -112,7 +112,8 @@ let get_live_regs_entry (sb : superblock) code = | AST.BA r -> Registers.Regset.add r set | _ -> set) (match dest with - | AST.BR r -> Registers.Regset.remove r regset) + | AST.BR r -> Registers.Regset.remove r regset + | _ -> regset) args | Icond (_, args, _, _, _) -> List.fold_left (fun set reg -> -- cgit From 1cd64f429262ac333021571ef60bea1e1d7fe57a Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Sun, 6 Jun 2021 15:50:24 +0200 Subject: omega -> lia --- scheduling/RTLpath.v | 43 ++++++++++++++++++++------------------- scheduling/RTLpathSE_simu_specs.v | 11 +++++----- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/scheduling/RTLpath.v b/scheduling/RTLpath.v index 2f73f1fa..a4fce97e 100644 --- a/scheduling/RTLpath.v +++ b/scheduling/RTLpath.v @@ -26,6 +26,7 @@ Require Import Coqlib Maps. Require Import AST Integers Values Events Memory Globalenvs Smallstep. Require Import Op Registers. Require Import RTL Linking. +Require Import Lia. Notation "'SOME' X <- A 'IN' B" := (match A with Some X => B | None => None end) (at level 200, X ident, A at level 100, B at level 200) @@ -582,8 +583,8 @@ Lemma wellformed_suffix_path c pm path path': exists pc', nth_default_succ c (path-path') pc = Some pc' /\ wellformed_path c pm path' pc'. Proof. induction 1 as [|m]. - + intros. enough (path'-path'=0)%nat as ->; [simpl;eauto|omega]. - + intros pc WF; enough (S m-path'=S (m-path'))%nat as ->; [simpl;eauto|omega]. + + intros. enough (path'-path'=0)%nat as ->; [simpl;eauto|lia]. + + intros pc WF; enough (S m-path'=S (m-path'))%nat as ->; [simpl;eauto|lia]. inversion WF; subst; clear WF; intros; simplify_someHyps. intros; simplify_someHyps; eauto. Qed. @@ -600,9 +601,9 @@ Proof. intros; exploit fn_path_wf; eauto. intro WF. set (ps:=path.(psize)). - exploit (wellformed_suffix_path (fn_code f) (fn_path f) ps O); omega || eauto. + exploit (wellformed_suffix_path (fn_code f) (fn_path f) ps O); lia || eauto. destruct 1 as (pc' & NTH_SUCC & WF'); auto. - assert (ps - 0 = ps)%nat as HH by omega. rewrite HH in NTH_SUCC. clear HH. + assert (ps - 0 = ps)%nat as HH by lia. rewrite HH in NTH_SUCC. clear HH. unfold nth_default_succ_inst. inversion WF'; clear WF'; subst. simplify_someHyps; eauto. Qed. @@ -617,11 +618,11 @@ Lemma internal_node_path path f path0 pc: Proof. intros; exploit fn_path_wf; eauto. set (ps:=path0.(psize)). - intro WF; exploit (wellformed_suffix_path (fn_code f) (fn_path f) ps (ps-path)); eauto. { omega. } + intro WF; exploit (wellformed_suffix_path (fn_code f) (fn_path f) ps (ps-path)); eauto. { lia. } destruct 1 as (pc' & NTH_SUCC & WF'). - assert (ps - (ps - path) = path)%nat as HH by omega. rewrite HH in NTH_SUCC. clear HH. + assert (ps - (ps - path) = path)%nat as HH by lia. rewrite HH in NTH_SUCC. clear HH. unfold nth_default_succ_inst. - inversion WF'; clear WF'; subst. { omega. } + inversion WF'; clear WF'; subst. { lia. } simplify_someHyps; eauto. Qed. @@ -706,7 +707,7 @@ Proof. rewrite CONT. auto. + intros; try_simplify_someHyps; try congruence. eexists. exists i. exists O; simpl. intuition eauto. - omega. + lia. Qed. Lemma isteps_resize ge path0 path1 f sp rs m pc st: @@ -837,15 +838,15 @@ Lemma stuttering path idx stack f sp rs m pc st t s1': RTL.step ge (State stack f sp st.(ipc) st.(irs) st.(imem)) t s1' -> t = E0 /\ match_inst_states idx s1' (State stack f sp pc rs m). Proof. - intros PSTEP PATH BOUND CONT RSTEP; exploit (internal_node_path (path.(psize)-(S idx))); omega || eauto. + intros PSTEP PATH BOUND CONT RSTEP; exploit (internal_node_path (path.(psize)-(S idx))); lia || eauto. intros (i & pc' & Hi & Hpc & DUM). unfold nth_default_succ_inst in Hi. erewrite isteps_normal_exit in Hi; eauto. exploit istep_complete; congruence || eauto. intros (SILENT & st0 & STEP0 & EQ). intuition; subst; unfold match_inst_states; simpl. - intros; refine (State_match _ _ path stack f sp pc rs m _ PATH _ _ _); simpl; omega || eauto. - set (ps:=path.(psize)). enough (ps - idx = S (ps - (S idx)))%nat as ->; try omega. + intros; refine (State_match _ _ path stack f sp pc rs m _ PATH _ _ _); simpl; lia || eauto. + set (ps:=path.(psize)). enough (ps - idx = S (ps - (S idx)))%nat as ->; try lia. erewrite <- isteps_step_right; eauto. Qed. @@ -874,7 +875,7 @@ Proof. destruct (initialize_path (*fn_code f*) (fn_path f) (ipc st0)) as (path0 & Hpath0); eauto. exists (path0.(psize)); intuition eauto. econstructor; eauto. - * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || omega. + * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || lia. * simpl; eauto. + generalize Hi; inversion RSTEP; clear RSTEP; subst; (repeat (simplify_someHyp; simpl in * |- * )); try congruence; eauto. - (* Icall *) @@ -897,7 +898,7 @@ Proof. destruct (initialize_path (*fn_code f*) (fn_path f) pc') as (path0 & Hpath0); eauto. exists path0.(psize); intuition eauto. econstructor; eauto. - * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || omega. + * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || lia. * simpl; eauto. - (* Ijumptable *) intros; exploit exec_Ijumptable; eauto. @@ -906,7 +907,7 @@ Proof. destruct (initialize_path (*fn_code f*) (fn_path f) pc') as (path0 & Hpath0); eauto. exists path0.(psize); intuition eauto. econstructor; eauto. - * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || omega. + * enough (path0.(psize)-path0.(psize)=0)%nat as ->; simpl; eauto || lia. * simpl; eauto. - (* Ireturn *) intros; exploit exec_Ireturn; eauto. @@ -933,7 +934,7 @@ Proof. intros PSTEP PATH BOUND RSTEP WF; destruct (st.(icontinue)) eqn: CONT. destruct idx as [ | idx]. + (* path_step on normal_exit *) - assert (path.(psize)-0=path.(psize))%nat as HH by omega. rewrite HH in PSTEP. clear HH. + assert (path.(psize)-0=path.(psize))%nat as HH by lia. rewrite HH in PSTEP. clear HH. exploit normal_exit; eauto. intros (s2' & LSTEP & (idx' & MATCH)). exists idx'; exists s2'; intuition eauto. @@ -942,7 +943,7 @@ Proof. unfold match_states; exists idx; exists (State stack f sp pc rs m); intuition. + (* one or two path_step on early_exit *) - exploit (isteps_resize ge (path.(psize) - idx)%nat path.(psize)); eauto; try omega. + exploit (isteps_resize ge (path.(psize) - idx)%nat path.(psize)); eauto; try lia. clear PSTEP; intros PSTEP. (* TODO for clarification: move the assert below into a separate lemma *) assert (HPATH0: exists path0, (fn_path f)!(ipc st) = Some path0). @@ -952,7 +953,7 @@ Proof. exploit istep_early_exit; eauto. intros (X1 & X2 & EARLY_EXIT). destruct st as [cont pc0 rs0 m0]; simpl in * |- *; intuition subst. - exploit (internal_node_path path0); omega || eauto. + exploit (internal_node_path path0); lia || eauto. intros (i' & pc' & Hi' & Hpc' & ENTRY). unfold nth_default_succ_inst in Hi'. erewrite isteps_normal_exit in Hi'; eauto. @@ -974,8 +975,8 @@ Proof. - simpl; eauto. * (* single step case *) exploit (stuttering path1 ps stack f sp (irs st) (imem st) (ipc st)); simpl; auto. - - { rewrite Hpath1size; enough (S ps-S ps=0)%nat as ->; try omega. simpl; eauto. } - - omega. + - { rewrite Hpath1size; enough (S ps-S ps=0)%nat as ->; try lia. simpl; eauto. } + - lia. - simpl; eauto. - simpl; eauto. - intuition subst. @@ -1000,7 +1001,7 @@ Proof. exists path.(psize). constructor; auto. econstructor; eauto. - set (ps:=path.(psize)). enough (ps-ps=O)%nat as ->; simpl; eauto. - omega. + lia. - simpl; auto. + (* exec_function_external *) destruct f; simpl in H3 |-; inversion H3; subst; clear H3. @@ -1019,7 +1020,7 @@ Proof. exists path.(psize). constructor; auto. econstructor; eauto. - set (ps:=path.(psize)). enough (ps-ps=O)%nat as ->; simpl; eauto. - omega. + lia. - simpl; auto. Qed. diff --git a/scheduling/RTLpathSE_simu_specs.v b/scheduling/RTLpathSE_simu_specs.v index 4bb3e18e..5051d805 100644 --- a/scheduling/RTLpathSE_simu_specs.v +++ b/scheduling/RTLpathSE_simu_specs.v @@ -7,6 +7,7 @@ Require Import RTL RTLpath. Require Import Errors. Require Import RTLpathSE_theory RTLpathLivegenproof. Require Import Axioms. +Require Import Lia. Local Open Scope error_monad_scope. Local Open Scope option_monad_scope. @@ -666,7 +667,7 @@ Proof. induction l2. - intro. destruct l1; auto. apply is_tail_false in H. contradiction. - intros ITAIL. inv ITAIL; auto. - apply IHl2 in H1. clear IHl2. simpl. omega. + apply IHl2 in H1. clear IHl2. simpl. lia. Qed. Lemma is_tail_nth_error {A} (l1 l2: list A) x: @@ -676,14 +677,14 @@ Proof. induction l2. - intro ITAIL. apply is_tail_false in ITAIL. contradiction. - intros ITAIL. assert (length (a::l2) = S (length l2)) by auto. rewrite H. clear H. - assert (forall n n', ((S n) - n' - 1)%nat = (n - n')%nat) by (intros; omega). rewrite H. clear H. + assert (forall n n', ((S n) - n' - 1)%nat = (n - n')%nat) by (intros; lia). rewrite H. clear H. inv ITAIL. - + assert (forall n, (n - n)%nat = 0%nat) by (intro; omega). rewrite H. + + assert (forall n, (n - n)%nat = 0%nat) by (intro; lia). rewrite H. simpl. reflexivity. + exploit IHl2; eauto. intros. clear IHl2. - assert (forall n n', (n > n')%nat -> (n - n')%nat = S (n - n' - 1)%nat) by (intros; omega). + assert (forall n n', (n > n')%nat -> (n - n')%nat = S (n - n' - 1)%nat) by (intros; lia). exploit (is_tail_length (x::l1)); eauto. intro. simpl in H2. - assert ((length l2 > length l1)%nat) by omega. clear H2. + assert ((length l2 > length l1)%nat) by lia. clear H2. rewrite H0; auto. Qed. -- cgit From 2249f3c7771c285ccd25f6e94478be388a741da5 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Sun, 6 Jun 2021 20:49:34 +0200 Subject: Adding debug info --- scheduling/RTLpathScheduleraux.ml | 95 ++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 653765f5..8e7f0dfa 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -74,6 +74,11 @@ end let get_live_regs_entry (sb : superblock) code = + (if !Clflags.option_debug_compcert > 6 + then debug_flag := true); + debug "getting live regs for superblock:\n"; + print_superblock sb code; + debug "\n"; let seqa = Array.map (fun i -> (match PTree.get i code with | Some ii -> ii @@ -83,48 +88,53 @@ let get_live_regs_entry (sb : superblock) code = | Some s -> s | None -> Regset.empty)) sb.instructions in - Array.fold_right (fun (ins, liveins) regset -> - match ins with - | Inop _ -> regset - | Iop (_, args, dest, _) - | Iload (_, _, _, args, dest, _) -> - List.fold_left (fun set reg -> Registers.Regset.add reg set) - (Registers.Regset.remove dest regset) args - | Istore (_, _, args, src, _) -> - List.fold_left (fun set reg -> Registers.Regset.add reg set) - (Registers.Regset.add src regset) args - | Icall (_, fn, args, dest, _) -> - List.fold_left (fun set reg -> Registers.Regset.add reg set) - ((match fn with - | Datatypes.Coq_inl reg -> (Registers.Regset.add reg) - | Datatypes.Coq_inr _ -> (fun x -> x)) - (Registers.Regset.remove dest regset)) - args - | Itailcall (_, fn, args) -> - List.fold_left (fun set reg -> Registers.Regset.add reg set) - (match fn with - | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset) - | Datatypes.Coq_inr _ -> regset) - args - | Ibuiltin (_, args, dest, _) -> - List.fold_left (fun set reg -> - match reg with - | AST.BA r -> Registers.Regset.add r set - | _ -> set) - (match dest with - | AST.BR r -> Registers.Regset.remove r regset - | _ -> regset) - args - | Icond (_, args, _, _, _) -> - List.fold_left (fun set reg -> - Registers.Regset.add reg set) - regset args - | Ijumptable (reg, _) - | Ireturn (Some reg) -> - Registers.Regset.add reg regset - | _ -> regset - ) seqa sb.s_output_regs - + let ret = + Array.fold_right (fun (ins, liveins) regset -> + match ins with + | Inop _ -> regset + | Iop (_, args, dest, _) + | Iload (_, _, _, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.remove dest regset) args + | Istore (_, _, args, src, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.add src regset) args + | Icall (_, fn, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + ((match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg) + | Datatypes.Coq_inr _ -> (fun x -> x)) + (Registers.Regset.remove dest regset)) + args + | Itailcall (_, fn, args) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset) + | Datatypes.Coq_inr _ -> regset) + args + | Ibuiltin (_, args, dest, _) -> + List.fold_left (fun set reg -> + match reg with + | AST.BA r -> Registers.Regset.add r set + | _ -> set) + (match dest with + | AST.BR r -> Registers.Regset.remove r regset + | _ -> regset) + args + | Icond (_, args, _, _, _) -> + List.fold_left (fun set reg -> + Registers.Regset.add reg set) + regset args + | Ijumptable (reg, _) + | Ireturn (Some reg) -> + Registers.Regset.add reg regset + | _ -> regset + ) seqa sb.s_output_regs + in debug "live in regs: "; + print_regset ret; + debug "\n"; + debug_flag := false; + ret (* TODO David *) let schedule_superblock sb code = @@ -144,6 +154,7 @@ let schedule_superblock sb code = match predicted_successor ii with | Some _ -> 0 | None -> 1 in + debug "hello\n"; let live_regs_entry = get_live_regs_entry sb code in match PrepassSchedulingOracle.schedule_sequence (Array.map (fun i -> -- cgit From c21c794ec0abb307ff4d0948e2a504da936ea602 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 7 Jun 2021 14:42:48 +0200 Subject: timing --- test/monniaux/division/harness.c | 81 ++++++++++++++++++++++++++++++++++++++ test/monniaux/division/my_udiv32.s | 36 +++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 test/monniaux/division/harness.c create mode 100644 test/monniaux/division/my_udiv32.s diff --git a/test/monniaux/division/harness.c b/test/monniaux/division/harness.c new file mode 100644 index 00000000..8d7e7d13 --- /dev/null +++ b/test/monniaux/division/harness.c @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include "../cycles.h" + +static uint32_t dm_random_uint32(void) { + static uint32_t current=UINT32_C(0xDEADBEEF); + current = ((uint64_t) current << 6) % UINT32_C(4294967291); + return current; +} + +static uint64_t dm_biased_random_uint32(void) { + uint32_t flags = dm_random_uint32(); + uint32_t r; + switch (flags & 15) { + case 0: + r = dm_random_uint32() & 0xFU; + break; + case 1: + r = dm_random_uint32() & 0xFFU; + break; + case 2: + r = dm_random_uint32() & 0xFFFU; + break; + case 3: + r = dm_random_uint32() & 0xFFFFU; + break; + case 4: + r = dm_random_uint32() & 0xFFFFFU; + break; + case 5: + r = dm_random_uint32() & 0xFFFFFFU; + break; + case 6: + r = dm_random_uint32() & 0xFFFFFFFU; + break; + case 7: + r = dm_random_uint32() & 0x3; + break; + default: + r = dm_random_uint32(); + } + return r; +} + +inline uint32_t native_udiv32(uint32_t x, uint32_t y) { + return x/y; +} +extern uint32_t my_udiv32(uint32_t x, uint32_t y); + +int main() { + cycle_t time_me=0, time_native=0; + cycle_count_config(); + + for(int i=0; i<1000; i++) { + uint32_t x = dm_biased_random_uint32(); + uint32_t y = dm_biased_random_uint32(); + + cycle_t cycle_a, cycle_b, cycle_c; + + uint32_t q1, q2; + cycle_a = get_cycle(); + q1 = native_udiv32(x, y); + cycle_b = get_cycle(); + q2 = my_udiv32(x, y); + cycle_c = get_cycle(); + + if(q1 != q2) { + printf("%u %u\n", q1, q2); + } + + time_native += cycle_b - cycle_a; + time_me += cycle_c - cycle_b; + } + + printf("%" PRcycle "\t%" PRcycle "\n", time_native, time_me); + + return 0; +} diff --git a/test/monniaux/division/my_udiv32.s b/test/monniaux/division/my_udiv32.s new file mode 100644 index 00000000..0f4fd127 --- /dev/null +++ b/test/monniaux/division/my_udiv32.s @@ -0,0 +1,36 @@ + .align 8 + .global my_udiv32 + .type my_udiv32, @function +my_udiv32: + zxwd $r1 = $r1 + make $r3 = 0x3ff0000000000000 # 1.0 + zxwd $r0 = $r0 + ;; + floatud.rn $r5 = $r1, 0 + ;; + floatuw.rn $r2 = $r1, 0 + ;; + finvw $r2 = $r2 + ;; + + fwidenlwd $r2 = $r2 + floatud.rn $r4 = $r0, 0 + ;; + ffmsd $r3 = $r2, $r5 + ;; + ffmad $r2 = $r2, $r3 + ;; + fmuld $r2 = $r2, $r4 + ;; + fixedud.rn $r2 = $r2, 0 + ;; + msbfw $r0 = $r2, $r1 + zxwd $r1 = $r2 + addw $r2 = $r2, -1 + ;; + cmoved.wltz $r0? $r1 = $r2 + ;; + copyd $r0 = $r1 + ret + ;; + .size my_udiv32, .-my_udiv32 -- cgit From f0124301f874520bfdf76f16e016ffb2e1a8ca37 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 7 Jun 2021 14:47:57 +0200 Subject: division --- test/monniaux/cycles.h | 2 +- test/monniaux/division/harness.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h index f26060a7..2905938b 100644 --- a/test/monniaux/cycles.h +++ b/test/monniaux/cycles.h @@ -6,7 +6,7 @@ typedef uint64_t cycle_t; #define PRcycle PRId64 -#include <../../kvx-cos/include/hal/cos_registers.h> +#include "/opt/kalray/accesscore/kvx-cos/include/hal/cos_registers.h" static inline void cycle_count_config(void) { diff --git a/test/monniaux/division/harness.c b/test/monniaux/division/harness.c index 8d7e7d13..b6ce674d 100644 --- a/test/monniaux/division/harness.c +++ b/test/monniaux/division/harness.c @@ -57,7 +57,8 @@ int main() { for(int i=0; i<1000; i++) { uint32_t x = dm_biased_random_uint32(); uint32_t y = dm_biased_random_uint32(); - + if (y == 0) continue; + cycle_t cycle_a, cycle_b, cycle_c; uint32_t q1, q2; @@ -68,7 +69,7 @@ int main() { cycle_c = get_cycle(); if(q1 != q2) { - printf("%u %u\n", q1, q2); + printf("ERREUR %u %u\n", q1, q2); } time_native += cycle_b - cycle_a; -- cgit From a14865049571f157896107ebf0b2f908b1b95cbc Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 7 Jun 2021 22:45:39 +0200 Subject: coq 8.13.2 --- lib/Integers.v | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/Integers.v b/lib/Integers.v index 3e103ab7..2addc78b 100644 --- a/lib/Integers.v +++ b/lib/Integers.v @@ -3747,8 +3747,7 @@ Proof. unfold lt. rewrite signed_zero. rewrite bits_zero. - destruct (zlt _ _); try lia. - reflexivity. + destruct (zlt _ _); try lia; reflexivity. } change (Z.testbit (unsigned x) (i + 63)) with (testbit x (i + 63)). rewrite bits_zero. -- cgit From dbff5b8a016fe9f6667ea007be3de764a50b620a Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Tue, 8 Jun 2021 00:30:31 +0200 Subject: omega -> lia --- riscV/Asmgenproof1.v | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/riscV/Asmgenproof1.v b/riscV/Asmgenproof1.v index 6abde89f..42ab8375 100644 --- a/riscV/Asmgenproof1.v +++ b/riscV/Asmgenproof1.v @@ -821,18 +821,18 @@ Proof. unfold Val.cmp; destruct (rs#r1); simpl; auto. rewrite B1. unfold Int.lt. rewrite zlt_false. auto. change (Int.signed (Int.repr Int.max_signed)) with Int.max_signed. - generalize (Int.signed_range i); omega. + generalize (Int.signed_range i); lia. * exploit (opimm32_correct Psltw Psltiw (Val.cmp Clt)); eauto. intros (rs1 & A1 & B1 & C1). exists rs1; split. eexact A1. split; auto. rewrite B1. unfold Val.cmp; simpl; destruct (rs#r1); simpl; auto. unfold Int.lt. replace (Int.signed (Int.add n Int.one)) with (Int.signed n + 1). destruct (zlt (Int.signed n) (Int.signed i)). - rewrite zlt_false by omega. auto. - rewrite zlt_true by omega. auto. + rewrite zlt_false by lia. auto. + rewrite zlt_true by lia. auto. rewrite Int.add_signed. symmetry; apply Int.signed_repr. assert (Int.signed n <> Int.max_signed). { red; intros E. elim H1. rewrite <- (Int.repr_signed n). rewrite E. auto. } - generalize (Int.signed_range n); omega. + generalize (Int.signed_range n); lia. + apply DFL. + apply DFL. Qed. @@ -919,18 +919,18 @@ Proof. unfold Val.cmpl; destruct (rs#r1); simpl; auto. rewrite B1. unfold Int64.lt. rewrite zlt_false. auto. change (Int64.signed (Int64.repr Int64.max_signed)) with Int64.max_signed. - generalize (Int64.signed_range i); omega. + generalize (Int64.signed_range i); lia. * exploit (opimm64_correct Psltl Psltil (fun v1 v2 => Val.maketotal (Val.cmpl Clt v1 v2))); eauto. intros (rs1 & A1 & B1 & C1). exists rs1; split. eexact A1. split; auto. rewrite B1. unfold Val.cmpl; simpl; destruct (rs#r1); simpl; auto. unfold Int64.lt. replace (Int64.signed (Int64.add n Int64.one)) with (Int64.signed n + 1). destruct (zlt (Int64.signed n) (Int64.signed i)). - rewrite zlt_false by omega. auto. - rewrite zlt_true by omega. auto. + rewrite zlt_false by lia. auto. + rewrite zlt_true by lia. auto. rewrite Int64.add_signed. symmetry; apply Int64.signed_repr. assert (Int64.signed n <> Int64.max_signed). { red; intros E. elim H1. rewrite <- (Int64.repr_signed n). rewrite E. auto. } - generalize (Int64.signed_range n); omega. + generalize (Int64.signed_range n); lia. + apply DFL. + apply DFL. Qed. -- cgit From 2bf707f9fe5259f869999347a46ec52ab096e030 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Tue, 8 Jun 2021 01:06:09 +0200 Subject: run CI on kvx-work-ssa kvx-work-velus --- .gitlab-ci.yml | 106 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 57 insertions(+), 49 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a7811ae3..9f407912 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -14,7 +14,7 @@ check-admitted: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -35,7 +35,7 @@ build_x86_64: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -58,7 +58,7 @@ build_ia32: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -83,7 +83,7 @@ build_aarch64: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -108,7 +108,9 @@ build_arm: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -134,53 +136,55 @@ build_armhf: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' - when: always - - if: '$CI_COMMIT_BRANCH == "master"' - when: always - - when: manual - -build_ppc: - stage: build - image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda - before_script: - - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update - - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user - - eval `opam config env` - - opam update - - opam install -y menhir - script: - - ./config_ppc.sh - - make -j "$NJOBS" - rules: - - if: '$CI_COMMIT_BRANCH == "kvx-work"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always - when: manual -build_ppc64: - stage: build - image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda - before_script: - - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update - - sudo apt-get -y install gcc-powerpc64-linux-gnu - - eval `opam config env` - - opam update - - opam install -y menhir - script: - - ./config_ppc64.sh - - make -j "$NJOBS" - rules: - - if: '$CI_COMMIT_BRANCH == "kvx-work"' - when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' - when: always - - if: '$CI_COMMIT_BRANCH == "master"' - when: always - - when: manual +# build_ppc: +# stage: build +# image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda +# before_script: +# - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update +# - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user +# - eval `opam config env` +# - opam update +# - opam install -y menhir +# script: +# - ./config_ppc.sh +# - make -j "$NJOBS" +# rules: +# - if: '$CI_COMMIT_BRANCH == "kvx-work"' +# when: always +# - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' +# when: always +# - if: '$CI_COMMIT_BRANCH == "master"' +# when: always +# - when: manual + +# build_ppc64: +# stage: build +# image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda +# before_script: +# - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update +# - sudo apt-get -y install gcc-powerpc64-linux-gnu +# - eval `opam config env` +# - opam update +# - opam install -y menhir +# script: +# - ./config_ppc64.sh +# - make -j "$NJOBS" +# rules: +# - if: '$CI_COMMIT_BRANCH == "kvx-work"' +# when: always +# - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' +# when: always +# - if: '$CI_COMMIT_BRANCH == "master"' +# when: always +# - when: manual build_rv64: stage: build @@ -201,7 +205,7 @@ build_rv64: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -222,7 +226,9 @@ build_rv32: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always @@ -249,7 +255,9 @@ build_kvx: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always - - if: '$CI_COMMIT_BRANCH == "mppa-kvx"' + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' when: always -- cgit From 52378f0600652a94edcc8c78e4b426243f717a89 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 8 Jun 2021 15:11:03 +0200 Subject: Add some tests --- common/DebugPrint.ml | 4 +- scheduling/RTLpathScheduleraux.ml | 2 +- test/nardino/scheduling/entry_regs.c | 16 +++++ test/nardino/scheduling/spille_etrange.c | 114 +++++++++++++++++++++++++++++++ 4 files changed, 133 insertions(+), 3 deletions(-) create mode 100644 test/nardino/scheduling/entry_regs.c create mode 100644 test/nardino/scheduling/spille_etrange.c diff --git a/common/DebugPrint.ml b/common/DebugPrint.ml index 6f8449ee..275e6a71 100644 --- a/common/DebugPrint.ml +++ b/common/DebugPrint.ml @@ -132,10 +132,10 @@ let print_instructions insts code = | None -> failwith "Did not get some" | Some thing -> thing in if (!debug_flag) then begin - debug "[ "; + debug "[\n"; List.iter ( fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code)) - ) insts; debug "]" + ) insts; debug " ]" end let print_arrayp arr = begin diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 8e7f0dfa..24fef3e8 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -17,7 +17,7 @@ let print_superblock (sb: superblock) code = begin debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n"; debug " liveins = "; print_ptree_regset li; debug "\n"; - debug " output_regs = "; print_regset outs; debug "}" + debug " output_regs = "; print_regset outs; debug "\n}" end let print_superblocks lsb code = diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c new file mode 100644 index 00000000..047a613d --- /dev/null +++ b/test/nardino/scheduling/entry_regs.c @@ -0,0 +1,16 @@ +#include + +int f(int n) { + return n; +} + + +int main(int argc, char *argv[]) { + int a=1; + float b=2.; + int c = f(a); + a = 3; + int d = f(a); + printf("%e, %d, %d, %d", b, a, c, d); + return 0; +} diff --git a/test/nardino/scheduling/spille_etrange.c b/test/nardino/scheduling/spille_etrange.c new file mode 100644 index 00000000..1c36ee86 --- /dev/null +++ b/test/nardino/scheduling/spille_etrange.c @@ -0,0 +1,114 @@ +int f(int k) { + int a1 = k; + int b1 = 2*a1; + int c = a1; + int a2 = k+1; + int b2 = 2*a2; + c += a2; + int a3 = k+2; + int b3 = 2*a3; + c += a3; + int a4 = k+3; + int b4 = 2*a4; + c += a4; + int a5 = k+4; + int b5 = 2*a5; + c += a5; + int a6 = k+5; + int b6 = 2*a6; + c += a6; + int a7 = k+6; + int b7 = 2*a7; + c += a7; + int a8 = k+7; + int b8 = 2*a8; + c += a8; + int a9 = k+8; + int b9 = 2*a9; + c += a9; + int a10 = k+9; + int b10 = 2*a10; + c += a10; + int a11 = k+10; + int b11 = 2*a11; + c += a11; + int a12 = k+11; + int b12 = 2*a12; + c += a12; + int a13 = k+12; + int b13 = 2*a13; + c += a13; + int a14 = k+13; + int b14 = 2*a14; + c += a14; + int a15 = k+14; + int b15 = 2*a15; + c += a15; + int a16 = k+15; + int b16 = 2*a16; + c += a16; + int a17 = k+16; + int b17 = 2*a17; + c += a17; + int a18 = k+17; + int b18 = 2*a18; + c += a18; + int a19 = k+18; + int b19 = 2*a19; + c += a19; + int a20 = k+19; + int b20 = 2*a20; + c += a20; + int a21 = k+20; + int b21 = 2*a21; + c += a21; + int a22 = k+21; + int b22 = 2*a22; + c += a22; + int a23 = k+22; + int b23 = 2*a23; + c += a23; + int a24 = k+23; + int b24 = 2*a24; + c += a24; + int a25 = k+24; + int b25 = 2*a25; + c += a25; + int a26 = k+25; + int b26 = 2*a26; + c += a26; + return + b13+ + b12+ + b11+ + b10+ + b9+ + b8+ + b7+ + b6+ + b5+ + b4+ + b3+ + b2+ + b1+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b23+ + b24+ + b25+ + b26+ + c; +} + +int main(int argc, char *argv[]) { + f(3); + return 0; +} -- cgit From 9b6247b7996f3e0181d27ec0e20daffd28e0884f Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 8 Jun 2021 16:06:36 +0200 Subject: Another test : one spill when scheduled forward, none if not --- test/nardino/scheduling/spille_forw.c | 119 ++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 test/nardino/scheduling/spille_forw.c diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c new file mode 100644 index 00000000..0c69efd5 --- /dev/null +++ b/test/nardino/scheduling/spille_forw.c @@ -0,0 +1,119 @@ +#include + +int f(int n) { + float a1 = (float) n; + float b1 = 2.*a1; + float c = a1; + float a2 = (float) n+1; + float b2 = 2.*a2; + c += a2; + float a3 = (float) n+2; + float b3 = 2.*a3; + c += a3; + float a4 = (float) n+3; + float b4 = 2.*a4; + c += a4; + float a5 = (float) n+4; + float b5 = 2.*a5; + c += a5; + float a6 = (float) n+5; + float b6 = 2.*a6; + c += a6; + float a7 = (float) n+6; + float b7 = 2.*a7; + c += a7; + float a8 = (float) n+7; + float b8 = 2.*a8; + c += a8; + float a9 = (float) n+8; + float b9 = 2.*a9; + c += a9; + float a10 = (float) n+9; + float b10 = 2.*a10; + c += a10; + float a11 = (float) n+10; + float b11 = 2.*a11; + c += a11; + float a12 = (float) n+11; + float b12 = 2.*a12; + c += a12; + float a13 = (float) n+12; + float b13 = 2.*a13; + c += a13; + float a14 = (float) n+13; + float b14 = 2.*a14; + c += a14; + float a15 = (float) n+14; + float b15 = 2.*a15; + c += a15; + float a16 = (float) n+15; + float b16 = 2.*a16; + c += a16; + float a17 = (float) n+16; + float b17 = 2.*a17; + c += a17; + float a18 = (float) n+17; + float b18 = 2.*a18; + c += a18; + float a19 = (float) n+18; + float b19 = 2.*a19; + c += a19; + float a20 = (float) n+19; + float b20 = 2.*a20; + c += a20; + float a21 = (float) n+20; + float b21 = 2.*a21; + c += a21; + float a22 = (float) n+21; + float b22 = 2.*a22; + c += a22; + float a23 = (float) n+22; + float b23 = 2.*a23; + c += a23; + float a24 = (float) n+23; + float b24 = 2.*a24; + c += a24; + float a25 = (float) n+24; + float b25 = 2.*a25; + c += a25; + float a26 = (float) n+25; + float b26 = 2.*a26; + c += a26; + return c + + b1+ + b2+ + b3+ + b4+ + b5+ + b6+ + b7+ + b8+ + b9+ + b10+ + b11+ + b12+ + b13+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b24+ + b25+ + b26; +} + + + + + + +int main(int argc, char *argv[]) { + f(5); + return 0; +} -- cgit From 386b9053177bb4ef2801cec00b717c400a828139 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 8 Jun 2021 16:53:19 +0200 Subject: Fix RTLpathScheduleraux.get_live_regs_entry --- scheduling/RTLpathScheduleraux.ml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 24fef3e8..72cf6682 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -89,7 +89,8 @@ let get_live_regs_entry (sb : superblock) code = | None -> Regset.empty)) sb.instructions in let ret = - Array.fold_right (fun (ins, liveins) regset -> + Array.fold_right (fun (ins, liveins) regset_i -> + let regset = Registers.Regset.union liveins regset_i in match ins with | Inop _ -> regset | Iop (_, args, dest, _) @@ -128,7 +129,7 @@ let get_live_regs_entry (sb : superblock) code = | Ijumptable (reg, _) | Ireturn (Some reg) -> Registers.Regset.add reg regset - | _ -> regset + | _ -> regset ) seqa sb.s_output_regs in debug "live in regs: "; print_regset ret; -- cgit From 9fdd2f1d82387950bd72f865920f189d756109d9 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 9 Jun 2021 10:24:47 +0200 Subject: MacOS compatibility --- config_simple.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config_simple.sh b/config_simple.sh index e2d3844c..52b7d1a6 100755 --- a/config_simple.sh +++ b/config_simple.sh @@ -2,7 +2,7 @@ arch=$1 shift version=`git rev-parse --short HEAD` branch=`git rev-parse --abbrev-ref HEAD` -date=`date -I` +date=`date +%Y-%m-%d` if test "x$CCOMP_INSTALL_PREFIX" = "x" ; then CCOMP_INSTALL_PREFIX=/opt/CompCert ; -- cgit From cfd5e458e8fb8a92db60e90b40c9889e370f116b Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 9 Jun 2021 13:24:24 +0200 Subject: comment is now ## due to some weird MacOS bug --- x86/TargetPrinter.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml index 2000f96a..50c871e4 100644 --- a/x86/TargetPrinter.ml +++ b/x86/TargetPrinter.ml @@ -96,7 +96,7 @@ let z oc n = output_string oc (Z.to_string n) let data_pointer = if Archi.ptr64 then ".quad" else ".long" (* The comment deliminiter *) -let comment = "#" +let comment = "##" (* Base-2 log of a Caml integer *) let rec log2 n = -- cgit From 2a650bdab340d7a31cdc224f7e7ad1673b248af1 Mon Sep 17 00:00:00 2001 From: Olivier Lebeltel Date: Wed, 9 Jun 2021 13:58:13 +0200 Subject: added config_macos_x86_64.sh --- config_macos_x86_64.sh | 1 + 1 file changed, 1 insertion(+) create mode 100755 config_macos_x86_64.sh diff --git a/config_macos_x86_64.sh b/config_macos_x86_64.sh new file mode 100755 index 00000000..9d5b3f5e --- /dev/null +++ b/config_macos_x86_64.sh @@ -0,0 +1 @@ +exec ./config_simple.sh x86_64-macos "$@" -- cgit From d703ae1ad5e1fcdc63e07b2a50a3e8576a11e61e Mon Sep 17 00:00:00 2001 From: Léo Gourdin Date: Wed, 9 Jun 2021 23:54:07 +0200 Subject: push afadl test example --- test/gourdinl/compare_pp.sh | 16 ++++++++++++++++ test/gourdinl/postpass_exp.c | 5 +++++ 2 files changed, 21 insertions(+) create mode 100755 test/gourdinl/compare_pp.sh create mode 100644 test/gourdinl/postpass_exp.c diff --git a/test/gourdinl/compare_pp.sh b/test/gourdinl/compare_pp.sh new file mode 100755 index 00000000..09183cf9 --- /dev/null +++ b/test/gourdinl/compare_pp.sh @@ -0,0 +1,16 @@ +ffname=$(basename $1) +fname=${ffname%.*} +nopp=$fname.nopp.s +pp=$fname.pp.s + +../../ccomp -fno-coalesce-mem -fno-postpass -S $1 -o $nopp +../../ccomp -fno-coalesce-mem -fpostpass= list -S $1 -o $pp +sed -i '1,2d' $nopp +sed -i '1,2d' $pp +if cmp -s $nopp $pp; then + echo "same!" +else + echo "differents!" + diff -y $nopp $pp +fi + diff --git a/test/gourdinl/postpass_exp.c b/test/gourdinl/postpass_exp.c new file mode 100644 index 00000000..522ac2a6 --- /dev/null +++ b/test/gourdinl/postpass_exp.c @@ -0,0 +1,5 @@ +int main(int x, int y) { + int z = x << 32; + y = y - z; + return x + y; +} -- cgit From 1701e43316ee8e69e794a025a8c9979af6bb8c93 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 10 Jun 2021 16:31:51 +0200 Subject: Work on new schedluer Renamed a test file, wrote function to compute pressure deltas, Still need to pass the info in some way; beginning of the actual scheduler function --- aarch64/Machregsaux.ml | 4 ++ aarch64/Machregsaux.mli | 2 + aarch64/PostpassSchedulingOracle.ml | 3 +- aarch64/PrepassSchedulingOracle.ml | 55 ++++++++++++++- scheduling/InstructionScheduler.ml | 79 ++++++++++++++++++++- scheduling/InstructionScheduler.mli | 7 ++ scheduling/RTLpathScheduleraux.ml | 26 ++++--- test/nardino/scheduling/entry_regs.c | 5 +- test/nardino/scheduling/spille_backw.c | 114 +++++++++++++++++++++++++++++++ test/nardino/scheduling/spille_etrange.c | 114 ------------------------------- 10 files changed, 279 insertions(+), 130 deletions(-) create mode 100644 test/nardino/scheduling/spille_backw.c delete mode 100644 test/nardino/scheduling/spille_etrange.c diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml index 41db3bd4..15fb08ca 100644 --- a/aarch64/Machregsaux.ml +++ b/aarch64/Machregsaux.ml @@ -19,3 +19,7 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +(* number of available registers per class *) +(* TODO: add this to all archs *) +let nr_regs = [| 29; 32 |] diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli index 01b0f9fd..8487a557 100644 --- a/aarch64/Machregsaux.mli +++ b/aarch64/Machregsaux.mli @@ -15,3 +15,5 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +val nr_regs : int array diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index a9737088..834d42f5 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -507,7 +507,8 @@ let build_problem bb = { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; - live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index a743fb68..6d445f10 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -201,6 +201,52 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset. end seqa; !latency_constraints;; +let get_pressure_deltas (seqa : (instruction * Regset.t) array) + (typing : RTLtyping.regenv) + : int array array = + let nr_types_regs = Array.length Machregsaux.nr_regs in + let ret = Array.init (Array.length seqa) (fun i -> + Array.make nr_types_regs 0) in + Array.iteri (fun i (instr, liveins) -> match instr with + | Iop (_, args, dest, _) | Iload (_, _, _, args, dest, _) -> + ret.(i).(Machregsaux.class_of_type (typing dest)) <- + if List.mem dest args then 0 + else 1 + | Istore (_, _, _, src, _) -> + ret.(i).(Machregsaux.class_of_type (typing src)) <- + -1 + | Icall (_, fn, args, dest, _) -> + ret.(i).(Machregsaux.class_of_type (typing dest)) <- + if List.mem dest + (match fn with + | Datatypes.Coq_inl reg -> reg::args + | _ -> args) + then 0 else 1 + | Ibuiltin (_, args, dest, _) -> + let rec arg_l list = function + | AST.BA r -> r::list + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + arg_l (arg_l list lo) hi + | _ -> list + in + let l = (List.fold_left arg_l [] args) in + let rec dest_l = function + | AST.BR r -> + let t = Machregsaux.class_of_type (typing r) in + ret.(i).(t) <- + (if List.mem r l + then 0 else 1) + ret.(i).(t) + | AST.BR_splitlong (hi, lo) -> + dest_l hi; + dest_l lo + | _ -> () + in + dest_l dest + | _ -> () + ) seqa; + ret + + let resources_of_instruction (opweights : opweights) = function | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds | Iop(op, inputs, output, _) -> @@ -406,11 +452,13 @@ let get_alias_dependencies seqa = !deps;; *) -let define_problem (opweights : opweights) (live_entry_regs : Regset.t) seqa = +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) seqa = let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = live_entry_regs; + typing = typing; instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) @@ -441,7 +489,8 @@ let prepass_scheduler_by_name name problem early_ones = | _ -> scheduler_by_name name problem let schedule_sequence (seqa : (instruction*Regset.t) array) - (live_regs_entry : Registers.Regset.t)= + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) = let opweights = OpWeights.get_opweights () in try if (Array.length seqa) <= 1 @@ -451,7 +500,7 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) let nr_instructions = Array.length seqa in (if !Clflags.option_debug_compcert > 6 then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); - let problem = define_problem opweights live_regs_entry seqa in + let problem = define_problem opweights live_regs_entry typing seqa in (if !Clflags.option_debug_compcert > 7 then (print_sequence stdout (Array.map fst seqa); print_problem stdout problem)); diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 976037bd..08164293 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -34,6 +34,7 @@ type problem = { max_latency : int; resource_bounds : int array; live_regs_entry : Registers.Regset.t; + typing : RTLtyping.regenv; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -258,8 +259,8 @@ let priority_list_scheduler (order : list_scheduler_order) assert(!time >= 0); !time with Exit -> -1 - in + let advance_time() = begin (if !current_time < max_time-1 @@ -268,7 +269,8 @@ let priority_list_scheduler (order : list_scheduler_order) Array.blit problem.resource_bounds 0 current_resources 0 (Array.length current_resources); ready.(!current_time + 1) <- - InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1)); + InstrSet.union (ready.(!current_time)) + (ready.(!current_time + 1)); ready.(!current_time) <- InstrSet.empty; end); incr current_time @@ -335,6 +337,75 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;; (* dummy code for placating ocaml's warnings *) let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; + +(* A scheduler sensitive to register pressure *) +let reg_pres_scheduler (problem : problem) : solution option = + let nr_instructions = get_nr_instructions problem in + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + let available_regs = Array.copy Machregsaux.nr_regs in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + available_regs.(classe) + <- available_regs.(classe) - 1) + (Registers.Regset.elements live_regs_entry); + + let nr_types_regs = Array.length available_regs in + + (* wait di we have access to instructions here? No, we have to add + al this to constraints *) + (* let pressures = + * Array.init (nr_instructions) (fun i -> + * Array.init (nr_types_regs) (fun t -> + * match i with + * | Inop -> 0 + * | Iop (_, args, dest, _) + * | Iload(_, _, _, args, dest, _) -> + * if + * )) *) + + let priorities = critical_paths successors in + + let module InstrSetCSP = + Set.Make (struct + type t=int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) in + + (* TODO: find a way to efficiently find an instruction which + decreases register pressure *) + (* idea: *) + (* let module InstrSetCSR = + * Set.Make (struct + * type t = int + * let compare x y = + * match pressures.(y) - pressures.(x) with + * | 0 -> (match priorities.(y) - priorities.(x) with + * | 0 -> x - y + * | z -> z) + * | z -> z + * end) in *) + (* where pressure.(x) is the delta of register pressure for + instruction x. Pb: different register types. Need to think about + it. Have one module for each register type, that's used when this + particular type reach a high pressure? *) + + let max_time = bound_max_time problem in + let ready = Array.make max_time InstrSetCSP.empty in + + (* silence warning, enable compilation while working *) + let _ = successors, predecessors, times, ready, nr_types_regs in + (* PLACEHOLDER *) + None + + type bundle = int list;; let rec extract_deps_to index = function @@ -440,6 +511,10 @@ let reverse_problem problem = max_latency = problem.max_latency; resource_bounds = problem.resource_bounds; live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) + (* Not needed for the revlist sched, and for now we wont bother + with creating a reverse scheduler aware of reg press *) + + typing = problem.typing; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index f53dc0ef..8dcc4ef5 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -26,6 +26,9 @@ type problem = { live_regs_entry : Registers.Regset.t; (** The set of live pseudo-registers at entry. *) + typing : RTLtyping.regenv; + (** Register type map. *) + instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) @@ -71,6 +74,10 @@ Once a clock tick is full go to the next. @return [Some solution] when a solution is found, [None] if not. *) val list_scheduler : problem -> solution option +(** WIP : Same as list_scheduler, but schedules instructions which decrease +register pressure when it gets too high. *) +val reg_pres_scheduler : problem -> solution option + (** Schedule the problem using the order of instructions without any reordering *) val greedy_scheduler : problem -> solution option diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 72cf6682..e04e7c23 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -94,7 +94,7 @@ let get_live_regs_entry (sb : superblock) code = match ins with | Inop _ -> regset | Iop (_, args, dest, _) - | Iload (_, _, _, args, dest, _) -> + | Iload (_, _, _, args, dest, _) -> List.fold_left (fun set reg -> Registers.Regset.add reg set) (Registers.Regset.remove dest regset) args | Istore (_, _, args, src, _) -> @@ -114,13 +114,20 @@ let get_live_regs_entry (sb : superblock) code = | Datatypes.Coq_inr _ -> regset) args | Ibuiltin (_, args, dest, _) -> - List.fold_left (fun set reg -> - match reg with - | AST.BA r -> Registers.Regset.add r set - | _ -> set) - (match dest with - | AST.BR r -> Registers.Regset.remove r regset - | _ -> regset) + List.fold_left (fun set arg -> + let rec add reg set = + match reg with + | AST.BA r -> Registers.Regset.add r set + | AST.BA_splitlong (hi, lo) + | AST.BA_addptr (hi, lo) -> add hi (add lo set) + | _ -> set + in add arg set) + (let rec rem dest set = + match dest with + | AST.BR r -> Registers.Regset.remove r set + | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set) + | _ -> set + in rem dest regset) args | Icond (_, args, _, _, _) -> List.fold_left (fun set reg -> @@ -166,7 +173,8 @@ let schedule_superblock sb code = | Some s -> s | None -> Regset.empty)) (Array.sub sb.instructions 0 (nr_instr-trailer_length))) - live_regs_entry with + live_regs_entry + sb.typing with | None -> sb.instructions | Some order -> let ins' = diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c index 047a613d..9e6adacb 100644 --- a/test/nardino/scheduling/entry_regs.c +++ b/test/nardino/scheduling/entry_regs.c @@ -1,7 +1,10 @@ #include int f(int n) { - return n; + if (n > 0) + return 42; + else + return n; } diff --git a/test/nardino/scheduling/spille_backw.c b/test/nardino/scheduling/spille_backw.c new file mode 100644 index 00000000..1c36ee86 --- /dev/null +++ b/test/nardino/scheduling/spille_backw.c @@ -0,0 +1,114 @@ +int f(int k) { + int a1 = k; + int b1 = 2*a1; + int c = a1; + int a2 = k+1; + int b2 = 2*a2; + c += a2; + int a3 = k+2; + int b3 = 2*a3; + c += a3; + int a4 = k+3; + int b4 = 2*a4; + c += a4; + int a5 = k+4; + int b5 = 2*a5; + c += a5; + int a6 = k+5; + int b6 = 2*a6; + c += a6; + int a7 = k+6; + int b7 = 2*a7; + c += a7; + int a8 = k+7; + int b8 = 2*a8; + c += a8; + int a9 = k+8; + int b9 = 2*a9; + c += a9; + int a10 = k+9; + int b10 = 2*a10; + c += a10; + int a11 = k+10; + int b11 = 2*a11; + c += a11; + int a12 = k+11; + int b12 = 2*a12; + c += a12; + int a13 = k+12; + int b13 = 2*a13; + c += a13; + int a14 = k+13; + int b14 = 2*a14; + c += a14; + int a15 = k+14; + int b15 = 2*a15; + c += a15; + int a16 = k+15; + int b16 = 2*a16; + c += a16; + int a17 = k+16; + int b17 = 2*a17; + c += a17; + int a18 = k+17; + int b18 = 2*a18; + c += a18; + int a19 = k+18; + int b19 = 2*a19; + c += a19; + int a20 = k+19; + int b20 = 2*a20; + c += a20; + int a21 = k+20; + int b21 = 2*a21; + c += a21; + int a22 = k+21; + int b22 = 2*a22; + c += a22; + int a23 = k+22; + int b23 = 2*a23; + c += a23; + int a24 = k+23; + int b24 = 2*a24; + c += a24; + int a25 = k+24; + int b25 = 2*a25; + c += a25; + int a26 = k+25; + int b26 = 2*a26; + c += a26; + return + b13+ + b12+ + b11+ + b10+ + b9+ + b8+ + b7+ + b6+ + b5+ + b4+ + b3+ + b2+ + b1+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b23+ + b24+ + b25+ + b26+ + c; +} + +int main(int argc, char *argv[]) { + f(3); + return 0; +} diff --git a/test/nardino/scheduling/spille_etrange.c b/test/nardino/scheduling/spille_etrange.c deleted file mode 100644 index 1c36ee86..00000000 --- a/test/nardino/scheduling/spille_etrange.c +++ /dev/null @@ -1,114 +0,0 @@ -int f(int k) { - int a1 = k; - int b1 = 2*a1; - int c = a1; - int a2 = k+1; - int b2 = 2*a2; - c += a2; - int a3 = k+2; - int b3 = 2*a3; - c += a3; - int a4 = k+3; - int b4 = 2*a4; - c += a4; - int a5 = k+4; - int b5 = 2*a5; - c += a5; - int a6 = k+5; - int b6 = 2*a6; - c += a6; - int a7 = k+6; - int b7 = 2*a7; - c += a7; - int a8 = k+7; - int b8 = 2*a8; - c += a8; - int a9 = k+8; - int b9 = 2*a9; - c += a9; - int a10 = k+9; - int b10 = 2*a10; - c += a10; - int a11 = k+10; - int b11 = 2*a11; - c += a11; - int a12 = k+11; - int b12 = 2*a12; - c += a12; - int a13 = k+12; - int b13 = 2*a13; - c += a13; - int a14 = k+13; - int b14 = 2*a14; - c += a14; - int a15 = k+14; - int b15 = 2*a15; - c += a15; - int a16 = k+15; - int b16 = 2*a16; - c += a16; - int a17 = k+16; - int b17 = 2*a17; - c += a17; - int a18 = k+17; - int b18 = 2*a18; - c += a18; - int a19 = k+18; - int b19 = 2*a19; - c += a19; - int a20 = k+19; - int b20 = 2*a20; - c += a20; - int a21 = k+20; - int b21 = 2*a21; - c += a21; - int a22 = k+21; - int b22 = 2*a22; - c += a22; - int a23 = k+22; - int b23 = 2*a23; - c += a23; - int a24 = k+23; - int b24 = 2*a24; - c += a24; - int a25 = k+24; - int b25 = 2*a25; - c += a25; - int a26 = k+25; - int b26 = 2*a26; - c += a26; - return - b13+ - b12+ - b11+ - b10+ - b9+ - b8+ - b7+ - b6+ - b5+ - b4+ - b3+ - b2+ - b1+ - b14+ - b15+ - b16+ - b17+ - b18+ - b19+ - b20+ - b21+ - b22+ - b23+ - b23+ - b24+ - b25+ - b26+ - c; -} - -int main(int argc, char *argv[]) { - f(3); - return 0; -} -- cgit From a0ad5ff6f9c7603610a7448935b36c9ed22c6435 Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Thu, 10 Jun 2021 18:33:58 +0200 Subject: x86 assembly: fix the comment delimiter for macos and make it per-OS As reported in #399, it seems better to use `##` instead of `#` as comment delimiter under macOS. For the time being we keep using `#` for Linux and Cygwin. Closes: #399 --- x86/TargetPrinter.ml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/x86/TargetPrinter.ml b/x86/TargetPrinter.ml index 1b27ee73..5bc2be1c 100644 --- a/x86/TargetPrinter.ml +++ b/x86/TargetPrinter.ml @@ -95,9 +95,6 @@ let z oc n = output_string oc (Z.to_string n) let data_pointer = if Archi.ptr64 then ".quad" else ".long" -(* The comment deliminiter *) -let comment = "#" - (* Base-2 log of a Caml integer *) let rec log2 n = assert (n > 0); @@ -106,6 +103,7 @@ let rec log2 n = (* System dependent printer functions *) module type SYSTEM = sig + val comment: string val raw_symbol: out_channel -> string -> unit val symbol: out_channel -> P.t -> unit val label: out_channel -> int -> unit @@ -124,6 +122,9 @@ module type SYSTEM = module ELF_System : SYSTEM = struct + (* The comment delimiter *) + let comment = "#" + let raw_symbol oc s = fprintf oc "%s" s @@ -180,6 +181,10 @@ module ELF_System : SYSTEM = module MacOS_System : SYSTEM = struct + (* The comment delimiter. + `##` instead of `#` to please the Clang assembler. *) + let comment = "##" + let raw_symbol oc s = fprintf oc "_%s" s @@ -239,6 +244,9 @@ module MacOS_System : SYSTEM = module Cygwin_System : SYSTEM = struct + (* The comment delimiter *) + let comment = "#" + let symbol_prefix = if Archi.ptr64 then "" else "_" -- cgit From 146320253dffbcb348a1651cf73eea9ff96355ae Mon Sep 17 00:00:00 2001 From: Léo Gourdin Date: Thu, 10 Jun 2021 18:36:57 +0200 Subject: remove filter file --- filter_peeplog.fish | 39 --------------------------------------- 1 file changed, 39 deletions(-) delete mode 100755 filter_peeplog.fish diff --git a/filter_peeplog.fish b/filter_peeplog.fish deleted file mode 100755 index 72a0eaf1..00000000 --- a/filter_peeplog.fish +++ /dev/null @@ -1,39 +0,0 @@ -echo "LDP_CONSEC_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr32" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr64" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr32" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr64" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64" | wc -l) -echo "\n" -echo "LDP_CONSEC_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr32f" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_CONSEC_PEEP_IMM_INC_ldr64f" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr32f" | wc -l) -echo "LDP_CONSEC_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_CONSEC_PEEP_IMM_DEC_ldr64f" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr32f" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_INC_ldr64f" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr32f" | wc -l) -echo "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_FORW_SPACED_PEEP_IMM_DEC_ldr64f" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr32f" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_INC_ldr64f" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr32f" | wc -l) -echo "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64f" (cat log | ack "LDP_BACK_SPACED_PEEP_IMM_DEC_ldr64f" | wc -l) -echo "\n" -echo "STP_CONSEC_PEEP_IMM_INC_str32" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str32" | wc -l) -echo "STP_CONSEC_PEEP_IMM_INC_str64" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str64" | wc -l) -echo "STP_FORW_SPACED_PEEP_IMM_INC_str32" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str32" | wc -l) -echo "STP_FORW_SPACED_PEEP_IMM_INC_str64" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str64" | wc -l) -echo "STP_BACK_SPACED_PEEP_IMM_INC_str32" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str32" | wc -l) -echo "STP_BACK_SPACED_PEEP_IMM_INC_str64" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str64" | wc -l) -echo "\n" -echo "STP_CONSEC_PEEP_IMM_INC_str32f" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str32f" | wc -l) -echo "STP_CONSEC_PEEP_IMM_INC_str64f" (cat log | ack "STP_CONSEC_PEEP_IMM_INC_str64f" | wc -l) -echo "STP_FORW_SPACED_PEEP_IMM_INC_str32f" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str32f" | wc -l) -echo "STP_FORW_SPACED_PEEP_IMM_INC_str64f" (cat log | ack "STP_FORW_SPACED_PEEP_IMM_INC_str64f" | wc -l) -echo "STP_BACK_SPACED_PEEP_IMM_INC_str32f" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str32f" | wc -l) -echo "STP_BACK_SPACED_PEEP_IMM_INC_str64f" (cat log | ack "STP_BACK_SPACED_PEEP_IMM_INC_str64f" | wc -l) -- cgit From 2c56ce62c6bfd59aabccbb0df47b8247375556b4 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 11 Jun 2021 13:28:12 +0200 Subject: add PPC to CI and remove ugly hack for qemu linker paths --- .gitlab-ci.yml | 118 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 62 insertions(+), 56 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9f407912..9eac61a0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -14,6 +14,8 @@ check-admitted: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' @@ -35,6 +37,8 @@ build_x86_64: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' @@ -58,6 +62,8 @@ build_ia32: rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' @@ -76,13 +82,13 @@ build_aarch64: script: - ./config_aarch64.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/aarch64-linux-gnu/lib - - sudo ln -s /usr/aarch64-linux-gnu/lib/ld-linux-aarch64.so.1 /lib - - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64' EXECUTE='qemu-aarch64' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64' CCOMPOPTS='-static' TARGET_CFLAGS='-static' + - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu/lib' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu/lib' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' @@ -101,10 +107,8 @@ build_arm: script: - ./config_arm.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/arm-linux-gnueabi/lib - - sudo ln -s /usr/arm-linux-gnueabi/lib/ld-linux.so.3 /lib # FIXME: UGLY ! - - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always @@ -129,10 +133,8 @@ build_armhf: script: - ./config_armhf.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/arm-linux-gnueabihf/lib - - sudo ln -s /usr/arm-linux-gnueabihf/lib/ld-linux-armhf.so.3 /lib # FIXME: UGLY ! - - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always @@ -144,47 +146,51 @@ build_armhf: when: always - when: manual -# build_ppc: -# stage: build -# image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda -# before_script: -# - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update -# - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user -# - eval `opam config env` -# - opam update -# - opam install -y menhir -# script: -# - ./config_ppc.sh -# - make -j "$NJOBS" -# rules: -# - if: '$CI_COMMIT_BRANCH == "kvx-work"' -# when: always -# - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' -# when: always -# - if: '$CI_COMMIT_BRANCH == "master"' -# when: always -# - when: manual +build_ppc: + stage: build + image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user + - eval `opam config env` + - opam update + - opam install -y menhir + script: + - ./config_ppc.sh + - make -j "$NJOBS" + - make -C test CCOMPOPTS=-static SIMU='qemu-ppc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + rules: + - if: '$CI_COMMIT_BRANCH == "kvx-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual -# build_ppc64: -# stage: build -# image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda -# before_script: -# - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update -# - sudo apt-get -y install gcc-powerpc64-linux-gnu -# - eval `opam config env` -# - opam update -# - opam install -y menhir -# script: -# - ./config_ppc64.sh -# - make -j "$NJOBS" -# rules: -# - if: '$CI_COMMIT_BRANCH == "kvx-work"' -# when: always -# - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' -# when: always -# - if: '$CI_COMMIT_BRANCH == "master"' -# when: always -# - when: manual +build_ppc64: + stage: build + image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda + before_script: + - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update + - sudo apt-get -y install gcc-powerpc64-linux-gnu + - eval `opam config env` + - opam update + - opam install -y menhir + script: + - ./config_ppc64.sh + - make -j "$NJOBS" + - make -C test CCOMPOPTS=-static SIMU='qemu-ppc64' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc64-linux-gnu -cpu 7400' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + rules: + - if: '$CI_COMMIT_BRANCH == "kvx-work"' + when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' + when: always + - if: '$CI_COMMIT_BRANCH == "master"' + when: always + - when: manual build_rv64: stage: build @@ -198,13 +204,13 @@ build_rv64: script: - ./config_rv64.sh - make -j "$NJOBS" - - export LD_LIBRARY_PATH=/usr/riscv64-linux-gnu/lib - - sudo ln -s /usr/riscv64-linux-gnu/lib/ld-linux-riscv64-lp64d.so.1 /lib - - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64' EXECUTE='qemu-riscv64' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64' CCOMPOPTS='-static' TARGET_CFLAGS='-static' + - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' -- cgit From 9c91b4daf1632dfe295748fc19ebedf0b93985b7 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 11 Jun 2021 13:47:25 +0200 Subject: path issues --- .gitlab-ci.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9eac61a0..b3558971 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -82,7 +82,7 @@ build_aarch64: script: - ./config_aarch64.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu/lib' all test + - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64 -L /usr/aarch64-linux-gnu/lib' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu/lib' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu/lib' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' @@ -107,7 +107,7 @@ build_arm: script: - ./config_arm.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' all test + - make -C test CCOMPOPTS=-static SIMU='qemu-arm -L /usr/arm-linux-gnueabi' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' @@ -133,7 +133,7 @@ build_armhf: script: - ./config_armhf.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS=-static SIMU='qemu-arm' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' all test + - make -C test CCOMPOPTS=-static SIMU='qemu-arm -L /usr/arm-linux-gnueabihf' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' @@ -158,7 +158,7 @@ build_ppc: script: - ./config_ppc.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS=-static SIMU='qemu-ppc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test + - make -C test CCOMPOPTS=-static SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' @@ -181,7 +181,7 @@ build_ppc64: script: - ./config_ppc64.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS=-static SIMU='qemu-ppc64' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test + - make -C test CCOMPOPTS=-static SIMU='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc64-linux-gnu -cpu 7400' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' @@ -204,7 +204,7 @@ build_rv64: script: - ./config_rv64.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' all test + - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64 -L /usr/riscv64-linux-gnu' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' -- cgit From 2affa0523a16284316d88b6d2c32638b0a8dea88 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 11 Jun 2021 13:50:32 +0200 Subject: don't use -static on ppc --- .gitlab-ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b3558971..cd864546 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -158,8 +158,8 @@ build_ppc: script: - ./config_ppc.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS=-static SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + - make -C test SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always @@ -181,8 +181,8 @@ build_ppc64: script: - ./config_ppc64.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS=-static SIMU='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc64-linux-gnu -cpu 7400' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 + - make -C test SIMU='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc64-linux-gnu -cpu 7400' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always -- cgit From efe8ddef663f0eb511ae4303afb7f799f31c3a68 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 11 Jun 2021 14:17:16 +0200 Subject: fix bad paths --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cd864546..969ef169 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -82,8 +82,8 @@ build_aarch64: script: - ./config_aarch64.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64 -L /usr/aarch64-linux-gnu/lib' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu/lib' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu/lib' CCOMPOPTS='-static' TARGET_CFLAGS='-static' + - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64 -L /usr/aarch64-linux-gnu' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' all test + - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always -- cgit From f502e3bc8a400b32c19b67052d0b6c96df1d88e4 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 11 Jun 2021 14:37:16 +0200 Subject: disable PPC64; can't link and don't know why --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 969ef169..c9d53c66 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -181,8 +181,8 @@ build_ppc64: script: - ./config_ppc64.sh - make -j "$NJOBS" - - make -C test SIMU='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test - - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc64-linux-gnu -cpu 7400' BITS=32 + #- make -C test SIMU='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test + #- ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc64-linux-gnu -cpu 7400' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always -- cgit From d44976886e34a38b2ae36fd5b0dd86ff466ffaff Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 11 Jun 2021 15:18:19 +0200 Subject: compile non yarpgen tests without -static; this should work --- .gitlab-ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c9d53c66..a52ad5fb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -82,7 +82,7 @@ build_aarch64: script: - ./config_aarch64.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS='-static' SIMU='qemu-aarch64 -L /usr/aarch64-linux-gnu' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' all test + - make -C test SIMU='qemu-aarch64 -L /usr/aarch64-linux-gnu' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='aarch64-linux-gnu-gcc' EXECUTE='qemu-aarch64 -L /usr/aarch64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' @@ -107,7 +107,7 @@ build_arm: script: - ./config_arm.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS=-static SIMU='qemu-arm -L /usr/arm-linux-gnueabi' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' all test + - make -C test SIMU='qemu-arm -L /usr/arm-linux-gnueabi' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabi-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabi' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' @@ -133,7 +133,7 @@ build_armhf: script: - ./config_armhf.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS=-static SIMU='qemu-arm -L /usr/arm-linux-gnueabihf' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' all test + - make -C test SIMU='qemu-arm -L /usr/arm-linux-gnueabihf' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='arm-linux-gnueabihf-gcc' EXECUTE='qemu-arm -L /usr/arm-linux-gnueabihf' CCOMPOPTS='-static' TARGET_CFLAGS='-static' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' @@ -204,7 +204,7 @@ build_rv64: script: - ./config_rv64.sh - make -j "$NJOBS" - - make -C test CCOMPOPTS=-static SIMU='qemu-riscv64 -L /usr/riscv64-linux-gnu' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' all test + - make -C test SIMU='qemu-riscv64 -L /usr/riscv64-linux-gnu' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='riscv64-linux-gnu-gcc' EXECUTE='qemu-riscv64 -L /usr/riscv64-linux-gnu' CCOMPOPTS='-static' TARGET_CFLAGS='-static' rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' -- cgit From 293455988549527895a5a79a5d862cb54b217d53 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 11 Jun 2021 15:51:34 +0200 Subject: disable ppc partially --- .gitlab-ci.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a52ad5fb..11d7ab58 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -158,11 +158,13 @@ build_ppc: script: - ./config_ppc.sh - make -j "$NJOBS" - - make -C test SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test + # problems with float on qemu on CI - make -C test SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' @@ -182,10 +184,12 @@ build_ppc64: - ./config_ppc64.sh - make -j "$NJOBS" #- make -C test SIMU='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' all test - #- ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc64-linux-gnu -cpu 7400' BITS=32 + #- ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc64-linux-gnu-gcc' EXECUTE='qemu-ppc64 -L /usr/powerpc64-linux-gnu -cpu 7400' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' when: always + - if: '$CI_COMMIT_BRANCH == "kvx-work-ssa"' + when: always - if: '$CI_COMMIT_BRANCH == "kvx-work-velus"' when: always - if: '$CI_COMMIT_BRANCH == "master"' -- cgit From f5e95c5647e2c5d3012c613de1c072ca2cbead8d Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 11 Jun 2021 23:49:56 +0200 Subject: show qemu version --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 11d7ab58..8ebca587 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -158,6 +158,7 @@ build_ppc: script: - ./config_ppc.sh - make -j "$NJOBS" + - qemu-ppc --version # problems with float on qemu on CI - make -C test SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' BITS=32 rules: -- cgit From 8c7a5100478611c8278ccef5e06951c831d07ad8 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Sat, 12 Jun 2021 01:06:34 +0200 Subject: Use qemu-6.0.0 for PPC as the 3.1.0 version shipping with the Debian in the docker has buggy float Squashed commit of the following: commit 54d1983cd8d8551c28109a506a752a971897f4ed Author: David Monniaux Date: Sat Jun 12 00:48:02 2021 +0200 sudo make install commit 49af5c63eff29a49f3cb466a6b6af44570d85352 Author: David Monniaux Date: Sat Jun 12 00:43:17 2021 +0200 pixman commit d78ab98e5751dd3ae0299a3e8c271472ebd8bb63 Author: David Monniaux Date: Sat Jun 12 00:36:30 2021 +0200 libglib commit 0808bf51be42b04c2db4ccc914633407c1309585 Author: David Monniaux Date: Sat Jun 12 00:31:46 2021 +0200 don't show verbose untar commit 972c244c72d9a30fee41dc7cbcc3698a49b6cde6 Author: David Monniaux Date: Sat Jun 12 00:30:32 2021 +0200 ninja-build commit a1c261d01abc1c62ea94d56cfc9cce90887db680 Author: David Monniaux Date: Sat Jun 12 00:28:14 2021 +0200 install ninja commit 92990598283f624d598853851c3edb2650f45b4b Author: David Monniaux Date: Sat Jun 12 00:25:17 2021 +0200 untar commit a225a0dcea26dd8888be535aa1aec4a58007679d Author: David Monniaux Date: Sat Jun 12 00:20:32 2021 +0200 install wget first commit 3b2c30ab6a953bde9d09034d38c5919a9425163d Author: David Monniaux Date: Sat Jun 12 00:17:09 2021 +0200 install recent qemu --- .gitlab-ci.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8ebca587..b3bb418f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -151,7 +151,10 @@ build_ppc: image: coqorg/coq:8.12.2-ocaml-4.11.1-flambda before_script: - sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update - - sudo apt-get -y install gcc-powerpc-linux-gnu qemu-user + - sudo apt-get -y install gcc-powerpc-linux-gnu wget ninja-build libglib2.0-dev libpixman-1-dev + - wget --no-verbose https://download.qemu.org/qemu-6.0.0.tar.xz + - tar xJf qemu-6.0.0.tar.xz + - (cd qemu-6.0.0 && ./configure --target-list=ppc-linux-user && make && sudo make install) - eval `opam config env` - opam update - opam install -y menhir @@ -159,7 +162,7 @@ build_ppc: - ./config_ppc.sh - make -j "$NJOBS" - qemu-ppc --version - # problems with float on qemu on CI - make -C test SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test + - make -C test SIMU='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' all test - ulimit -s65536 && make -C test/monniaux/yarpgen TARGET_CC='powerpc-linux-gnu-gcc' EXECUTE='qemu-ppc -L /usr/powerpc-linux-gnu -cpu 7400' BITS=32 rules: - if: '$CI_COMMIT_BRANCH == "kvx-work"' -- cgit From 2b814b1f9bb30d9c8b59a713f69bced808bca7c7 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Sat, 12 Jun 2021 10:52:59 +0200 Subject: work on the scheduler --- aarch64/PostpassSchedulingOracle.ml | 1 + aarch64/PrepassSchedulingOracle.ml | 4 +- scheduling/InstructionScheduler.ml | 146 ++++++++++++++++++++++++++---------- scheduling/InstructionScheduler.mli | 4 + 4 files changed, 114 insertions(+), 41 deletions(-) diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index 834d42f5..867341ca 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -509,6 +509,7 @@ let build_problem bb = resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = Registers.Regset.empty; (* unused here *) typing = (fun x -> AST.Tint); (* unused here *) + pressure_deltas = [| [| |] |] ; instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 6d445f10..19f05749 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -201,6 +201,7 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset. end seqa; !latency_constraints;; + let get_pressure_deltas (seqa : (instruction * Regset.t) array) (typing : RTLtyping.regenv) : int array array = @@ -459,7 +460,8 @@ let define_problem (opweights : opweights) (live_entry_regs : Regset.t) resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = live_entry_regs; typing = typing; - instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + pressure_deltas = get_pressure_deltas seqa typing; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) then (get_alias_dependencies seqa) @ simple_deps diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 08164293..8d8c4267 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -35,6 +35,7 @@ type problem = { resource_bounds : int array; live_regs_entry : Registers.Regset.t; typing : RTLtyping.regenv; + pressure_deltas : int array array; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -348,60 +349,124 @@ let reg_pres_scheduler (problem : problem) : solution option = let available_regs = Array.copy Machregsaux.nr_regs in + let nr_types_regs = Array.length available_regs in + + let regs_thresholds = Array.init nr_types_regs + (fun i -> 5) in + (* placeholder value *) + List.iter (fun r -> let classe = Machregsaux.class_of_type (problem.typing r) in available_regs.(classe) <- available_regs.(classe) - 1) (Registers.Regset.elements live_regs_entry); - let nr_types_regs = Array.length available_regs in - (* wait di we have access to instructions here? No, we have to add - al this to constraints *) - (* let pressures = - * Array.init (nr_instructions) (fun i -> - * Array.init (nr_types_regs) (fun t -> - * match i with - * | Inop -> 0 - * | Iop (_, args, dest, _) - * | Iload(_, _, _, args, dest, _) -> - * if - * )) *) + let pressures = problem.pressure_deltas in let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in - let module InstrSetCSP = - Set.Make (struct - type t=int - let compare x y = - match priorities.(y) - priorities.(x) with - | 0 -> x - y - | z -> z - end) in - - (* TODO: find a way to efficiently find an instruction which - decreases register pressure *) - (* idea: *) - (* let module InstrSetCSR = - * Set.Make (struct - * type t = int - * let compare x y = - * match pressures.(y) - pressures.(x) with - * | 0 -> (match priorities.(y) - priorities.(x) with - * | 0 -> x - y - * | z -> z) - * | z -> z - * end) in *) - (* where pressure.(x) is the delta of register pressure for - instruction x. Pb: different register types. Need to think about - it. Have one module for each register type, that's used when this - particular type reach a high pressure? *) + let module InstrSet = + struct + module MSet = + Set.Make (struct + type t=int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) + + let empty = MSet.empty + let is_empty = MSet.is_empty + let mem = MSet.mem + let add = MSet.add + let remove = MSet.remove + let union = MSet.union + let inter = MSet.inter + let iter = MSet.iter + + let compare_regs i x y = + match pressures.(y).(i) - pressures.(x).(i) with + | 0 -> (match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z) + | z -> z + + (** t is the register class *) + let sched_CSR t ready usages = + let result = ref (-1) in + iter (fun i -> + if vector_less_equal usages.(i) current_resources + then if !result = -1 || (compare_regs t !result i < 0) + then result := i) ready; + !result + end + in + + (* [compare i] is comp function for finding instruction with + lowest reg pres delta for reg class i *) let max_time = bound_max_time problem in - let ready = Array.make max_time InstrSetCSP.empty in + let ready = Array.make max_time InstrSet.empty in + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + let attempt_scheduling ready usages = + let result = ref (-1) in + try + Array.iteri (fun i avlregs -> + if avlregs <= regs_thresholds.(i) + then ( + let maybe = InstrSet.sched_CSR i ready usages in + (if pressures.(maybe).(i) < 0 then + (vector_subtract usages.(maybe) current_resources; + result := maybe)); + raise Exit) + ) available_regs; + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then ( + vector_subtract usages.(i) current_resources; + result := i; + raise Exit)); + -1 + with Exit -> !result in + (* silence warning, enable compilation while working *) - let _ = successors, predecessors, times, ready, nr_types_regs in + let _ = successors, predecessors, times, ready, compare, + earliest_time, advance_time, nr_types_regs in (* PLACEHOLDER *) None @@ -515,6 +580,7 @@ let reverse_problem problem = with creating a reverse scheduler aware of reg press *) typing = problem.typing; + pressure_deltas = [| [| |] |] ; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index 8dcc4ef5..e7f9e7db 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -29,6 +29,10 @@ type problem = { typing : RTLtyping.regenv; (** Register type map. *) + pressure_deltas : int array array; + (** At index (i, j), the pressure delta for instruction i, for + register class j. *) + instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) -- cgit From 66e15205c40de54639387a4c9b1cc78994525d55 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 14 Jun 2021 13:53:08 +0200 Subject: scheduler written, need to test now --- driver/Driver.ml | 2 +- scheduling/InstructionScheduler.ml | 84 ++++++++++++++++++++++++++++++-------- 2 files changed, 68 insertions(+), 18 deletions(-) diff --git a/driver/Driver.ml b/driver/Driver.ml index 7192ba4b..5a8c7f2c 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -210,7 +210,7 @@ Processing options: -mtune= Type of CPU (for scheduling on some architectures) -fprepass Perform prepass scheduling (only on some architectures) [on] -fprepass= Perform postpass scheduling with the specified optimization [list] - (=list: list scheduling, =revlist: reverse list scheduling, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) + (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =ilp: ILP, =greedy: just packing bundles) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 8d8c4267..a069de59 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -121,6 +121,13 @@ let vector_less_equal a b = true with Exit -> false;; +(* let vector_add a b = + * assert ((Array.length a) = (Array.length b)); + * for i=0 to (Array.length a)-1 + * do + * b.(i) <- b.(i) + a.(i) + * done;; *) + let vector_subtract a b = assert ((Array.length a) = (Array.length b)); for i=0 to (Array.length a)-1 @@ -341,6 +348,7 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; (* A scheduler sensitive to register pressure *) let reg_pres_scheduler (problem : problem) : solution option = + DebugPrint.debug_flag := true; let nr_instructions = get_nr_instructions problem in let successors = get_successors problem and predecessors = get_predecessors problem @@ -381,11 +389,9 @@ let reg_pres_scheduler (problem : problem) : solution option = let empty = MSet.empty let is_empty = MSet.is_empty - let mem = MSet.mem let add = MSet.add let remove = MSet.remove let union = MSet.union - let inter = MSet.inter let iter = MSet.iter let compare_regs i x y = @@ -400,15 +406,12 @@ let reg_pres_scheduler (problem : problem) : solution option = let result = ref (-1) in iter (fun i -> if vector_less_equal usages.(i) current_resources - then if !result = -1 || (compare_regs t !result i < 0) + then if !result = -1 || (compare_regs t !result i < 0) then result := i) ready; !result end in - (* [compare i] is comp function for finding instruction with - lowest reg pres delta for reg class i *) - let max_time = bound_max_time problem in let ready = Array.make max_time InstrSet.empty in @@ -450,25 +453,71 @@ let reg_pres_scheduler (problem : problem) : solution option = if avlregs <= regs_thresholds.(i) then ( let maybe = InstrSet.sched_CSR i ready usages in - (if pressures.(maybe).(i) < 0 then + (* print_int maybe; + * print_newline (); + * flush stdout; *) + if maybe > 0 && pressures.(maybe).(i) < 0 then (vector_subtract usages.(maybe) current_resources; - result := maybe)); - raise Exit) - ) available_regs; + result := maybe; + raise Exit))) available_regs; InstrSet.iter (fun i -> if vector_less_equal usages.(i) current_resources then ( vector_subtract usages.(i) current_resources; result := i; - raise Exit)); + raise Exit)) ready; -1 - with Exit -> !result in + with Exit -> + if !result <> -1 then + vector_subtract pressures.(!result) available_regs; + !result in - (* silence warning, enable compilation while working *) - let _ = successors, predecessors, times, ready, compare, - earliest_time, advance_time, nr_types_regs in - (* PLACEHOLDER *) - None + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> (assert(times.(i) < 0); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + DebugPrint.debug_flag := false; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + DebugPrint.debug_flag := false; + None + +;; + type bundle = int list;; @@ -1402,5 +1451,6 @@ let scheduler_by_name name = | "ilp" -> validated_scheduler cascaded_scheduler | "list" -> validated_scheduler list_scheduler | "revlist" -> validated_scheduler reverse_list_scheduler + | "regpres" -> validated_scheduler reg_pres_scheduler | "greedy" -> greedy_scheduler | s -> failwith ("unknown scheduler: " ^ s);; -- cgit From 3eb3751f84348a20b7ce211fdbf1d01a9c4685a8 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 14 Jun 2021 14:46:01 +0200 Subject: One fewer spill with new sched on `test/.../spille_forw.c` --- test/nardino/scheduling/spille_forw.c | 53 +++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c index 0c69efd5..770dfce5 100644 --- a/test/nardino/scheduling/spille_forw.c +++ b/test/nardino/scheduling/spille_forw.c @@ -1,6 +1,6 @@ #include -int f(int n) { +int f(int n, float * arr) { float a1 = (float) n; float b1 = 2.*a1; float c = a1; @@ -79,6 +79,48 @@ int f(int n) { float a26 = (float) n+25; float b26 = 2.*a26; c += a26; + float a27 = (float) n+26; + float b27 = 2.*a27; + c += a27; + float a28 = (float) n+27; + float b28 = 2.*a28; + c += a28; + float a29 = (float) n+28; + float b29 = 2.*a29; + c += a29; + float a30 = (float) n+29; + float b30 = 2.*a30; + c += a30; + arr[0] = a1; + arr[1] = a2; + arr[2] = a3; + arr[3] = a4; + arr[4] = a5; + arr[5] = a6; + arr[6] = a7; + arr[7] = a8; + arr[8] = a9; + arr[9] = a10; + arr[10] = a11; + arr[11] = a12; + arr[12] = a13; + arr[13] = a14; + arr[14] = a15; + arr[15] = a16; + arr[16] = a17; + arr[17] = a18; + arr[18] = a19; + arr[19] = a20; + arr[20] = a21; + arr[21] = a22; + arr[22] = a23; + arr[23] = a24; + arr[24] = a25; + arr[25] = a26; + arr[26] = a27; + arr[27] = a28; + arr[28] = a29; + arr[29] = a30; return c + b1+ b2+ @@ -105,7 +147,11 @@ int f(int n) { b23+ b24+ b25+ - b26; + b26+ + b27+ + b28+ + b29+ + b30; } @@ -114,6 +160,7 @@ int f(int n) { int main(int argc, char *argv[]) { - f(5); + float arr[30]; + f(5, arr); return 0; } -- cgit From bff4e6ff0b782619b6fcc18751fa575cbb11de68 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 14 Jun 2021 17:39:58 +0200 Subject: was very wrong, fixing --- aarch64/PostpassSchedulingOracle.ml | 2 +- aarch64/PrepassSchedulingOracle.ml | 11 ++++-- scheduling/InstructionScheduler.ml | 7 ++-- scheduling/InstructionScheduler.mli | 6 +-- scheduling/RTLpathScheduleraux.ml | 74 ++++++++++++++++++++++++++++++++----- 5 files changed, 80 insertions(+), 20 deletions(-) diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index 867341ca..6f784238 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -509,7 +509,7 @@ let build_problem bb = resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = Registers.Regset.empty; (* unused here *) typing = (fun x -> AST.Tint); (* unused here *) - pressure_deltas = [| [| |] |] ; + reference_counting = None; instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 19f05749..fe757c99 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -202,6 +202,7 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset. !latency_constraints;; +(** useless *) let get_pressure_deltas (seqa : (instruction * Regset.t) array) (typing : RTLtyping.regenv) : int array array = @@ -454,13 +455,13 @@ let get_alias_dependencies seqa = *) let define_problem (opweights : opweights) (live_entry_regs : Regset.t) - (typing : RTLtyping.regenv) seqa = + (typing : RTLtyping.regenv) reference_counting seqa = let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = live_entry_regs; typing = typing; - pressure_deltas = get_pressure_deltas seqa typing; + reference_counting = Some reference_counting; instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) @@ -492,7 +493,8 @@ let prepass_scheduler_by_name name problem early_ones = let schedule_sequence (seqa : (instruction*Regset.t) array) (live_regs_entry : Registers.Regset.t) - (typing : RTLtyping.regenv) = + (typing : RTLtyping.regenv) + reference = let opweights = OpWeights.get_opweights () in try if (Array.length seqa) <= 1 @@ -502,7 +504,8 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) let nr_instructions = Array.length seqa in (if !Clflags.option_debug_compcert > 6 then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); - let problem = define_problem opweights live_regs_entry typing seqa in + let problem = define_problem opweights live_regs_entry + typing reference seqa in (if !Clflags.option_debug_compcert > 7 then (print_sequence stdout (Array.map fst seqa); print_problem stdout problem)); diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index a069de59..08349f60 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -35,7 +35,8 @@ type problem = { resource_bounds : int array; live_regs_entry : Registers.Regset.t; typing : RTLtyping.regenv; - pressure_deltas : int array array; + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * (Registers.reg list array)) option; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -370,7 +371,7 @@ let reg_pres_scheduler (problem : problem) : solution option = (Registers.Regset.elements live_regs_entry); - let pressures = problem.pressure_deltas in + let pressures = [| [| |] |] in let priorities = critical_paths successors in @@ -629,7 +630,7 @@ let reverse_problem problem = with creating a reverse scheduler aware of reg press *) typing = problem.typing; - pressure_deltas = [| [| |] |] ; + reference_counting = problem.reference_counting; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index e7f9e7db..9b6f7a3c 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -29,9 +29,9 @@ type problem = { typing : RTLtyping.regenv; (** Register type map. *) - pressure_deltas : int array array; - (** At index (i, j), the pressure delta for instruction i, for - register class j. *) + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * (Registers.reg list array)) option; + (** See RTLpathScheduleraux.reference_counting. *) instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index e04e7c23..02e0c769 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -72,7 +72,60 @@ let get_superblocks code entry pm typing = lsb end +(** the useful one. Returns a hashtable with bindings of form + ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), + ** [t] is its class (according to [typing]), and [n] the number of + ** times it's referenced as an argument in instructions of [seqa] ; + ** and an arrray containg the argument regset of each instruction *) +let reference_counting (seqa : (instruction * Regset.t) array) + (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) : + (Registers.reg, int * int) Hashtbl.t * Registers.reg list array = + let retl = Hashtbl.create 42 in + let retr = Array.make (Array.length seqa) [] in + List.iter (fun reg -> + Hashtbl.add retl + reg (Machregsaux.class_of_type (typing reg), 1) + ) (Registers.Regset.elements out_regs); + let add_reg reg = + match Hashtbl.find_opt retl reg with + | Some (t, n) -> Hashtbl.add retl reg (t, n+1) + | None -> Hashtbl.add retl reg (Machregsaux.class_of_type + (typing reg), 1) + in + Array.iteri (fun i (ins, _) -> + match ins with + | Iop(_,args,_,_) | Iload(_,_,_,args,_,_) + | Icond(_,args,_,_,_) -> + List.iter (add_reg) args; + retr.(i) <- args + | Istore(_,_,args,src,_) -> + List.iter (add_reg) args; + add_reg src; + retr.(i) <- src::args + | Icall(_,fn,args,_,_) | Itailcall(_,fn,args) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + reg::args + | _ -> args) + | Ibuiltin(_,args,_,_) -> + let rec bar = function + | AST.BA r -> add_reg r; + retr.(i) <- r::retr.(i) + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + bar hi; bar lo + | _ -> () + in + List.iter (bar) args + | Ijumptable (reg,_) | Ireturn (Some reg) -> + add_reg reg; + retr.(i) <- [reg] + | _ -> () + ) seqa; + retl, retr + let get_live_regs_entry (sb : superblock) code = (if !Clflags.option_debug_compcert > 6 then debug_flag := true); @@ -164,17 +217,20 @@ let schedule_superblock sb code = | None -> 1 in debug "hello\n"; let live_regs_entry = get_live_regs_entry sb code in + let seqa = + Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.schedule_superblock"), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + (Array.sub sb.instructions 0 (nr_instr-trailer_length)) in match PrepassSchedulingOracle.schedule_sequence - (Array.map (fun i -> - (match PTree.get i code with - | Some ii -> ii - | None -> failwith "RTLpathScheduleraux.schedule_superblock"), - (match PTree.get i sb.liveins with - | Some s -> s - | None -> Regset.empty)) - (Array.sub sb.instructions 0 (nr_instr-trailer_length))) + seqa live_regs_entry - sb.typing with + sb.typing + (reference_counting seqa sb.s_output_regs sb.typing) with | None -> sb.instructions | Some order -> let ins' = -- cgit From 19464b3992eadf7670acc7231896103ab54885e5 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 15 Jun 2021 12:07:43 +0200 Subject: fixing Still need to find what to do when pressure is high but there are no instructions available that decrease it --- scheduling/InstructionScheduler.ml | 83 ++++++++++++++++++++++++++++++----- scheduling/InstructionScheduler.mli | 2 +- scheduling/RTLpathScheduleraux.ml | 47 +++++++++++++++----- test/nardino/scheduling/spille_forw.c | 60 ++++++++++++------------- 4 files changed, 139 insertions(+), 53 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 08349f60..19bfaeb0 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -36,7 +36,7 @@ type problem = { live_regs_entry : Registers.Regset.t; typing : RTLtyping.regenv; reference_counting : ((Registers.reg, int * int) Hashtbl.t - * (Registers.reg list array)) option; + * ((Registers.reg * bool) list array)) option; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -363,15 +363,37 @@ let reg_pres_scheduler (problem : problem) : solution option = let regs_thresholds = Array.init nr_types_regs (fun i -> 5) in (* placeholder value *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in List.iter (fun r -> let classe = Machregsaux.class_of_type (problem.typing r) in available_regs.(classe) - <- available_regs.(classe) - 1) + <- available_regs.(classe) - 1; + Hashtbl.add live_regs r classe) (Registers.Regset.elements live_regs_entry); + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in - let pressures = [| [| |] |] in + let fold_delta i = (fun a (r, b) -> + a + + if class_r r <> i then 0 else + (if b then + if (Hashtbl.find counts r = (i, 1)) + then 1 else 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + )) in let priorities = critical_paths successors in @@ -396,7 +418,14 @@ let reg_pres_scheduler (problem : problem) : solution option = let iter = MSet.iter let compare_regs i x y = - match pressures.(y).(i) - pressures.(x).(i) with + let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in + print_int y; + print_string " "; + print_int pyi; + print_newline (); + flush stdout; + let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in + match pyi - pxi with | 0 -> (match priorities.(y) - priorities.(x) with | 0 -> x - y | z -> z) @@ -404,10 +433,13 @@ let reg_pres_scheduler (problem : problem) : solution option = (** t is the register class *) let sched_CSR t ready usages = + print_string "looking for max delta"; + print_newline (); + flush stdout; let result = ref (-1) in iter (fun i -> if vector_less_equal usages.(i) current_resources - then if !result = -1 || (compare_regs t !result i < 0) + then if !result = -1 || (compare_regs t !result i > 0) then result := i) ready; !result end @@ -451,13 +483,28 @@ let reg_pres_scheduler (problem : problem) : solution option = let result = ref (-1) in try Array.iteri (fun i avlregs -> + print_string "avlregs: "; + print_int i; + print_string " "; + print_int avlregs; + print_newline (); + flush stdout; if avlregs <= regs_thresholds.(i) then ( let maybe = InstrSet.sched_CSR i ready usages in - (* print_int maybe; - * print_newline (); - * flush stdout; *) - if maybe > 0 && pressures.(maybe).(i) < 0 then + print_string "maybe\n"; + print_int maybe; + print_newline (); + flush stdout; + if maybe > 0 && + let delta = + List.fold_left (fold_delta i) 0 mentions.(maybe) in + print_string "delta "; + print_int delta; + print_newline (); + flush stdout; + delta + >= 0 then (vector_subtract usages.(maybe) current_resources; result := maybe; raise Exit))) available_regs; @@ -470,7 +517,23 @@ let reg_pres_scheduler (problem : problem) : solution option = -1 with Exit -> if !result <> -1 then - vector_subtract pressures.(!result) available_regs; + (List.iter (fun (r,b) -> + if b then + match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + (if n = 1 then + available_regs.(t) + <- available_regs.(t) + 1) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(!result)); !result in while !current_time < max_time diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index 9b6f7a3c..b5a5463b 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -30,7 +30,7 @@ type problem = { (** Register type map. *) reference_counting : ((Registers.reg, int * int) Hashtbl.t - * (Registers.reg list array)) option; + * ((Registers.reg * bool) list array)) option; (** See RTLpathScheduleraux.reference_counting. *) instruction_usages: int array array; diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 02e0c769..9c3ff689 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -76,12 +76,17 @@ end ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), ** [t] is its class (according to [typing]), and [n] the number of ** times it's referenced as an argument in instructions of [seqa] ; - ** and an arrray containg the argument regset of each instruction *) + ** and an arrray containg the list of regs referenced by each + ** instruction, with a boolean to know whether it's as arg or dest *) let reference_counting (seqa : (instruction * Regset.t) array) (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) : - (Registers.reg, int * int) Hashtbl.t * Registers.reg list array = + (Registers.reg, int * int) Hashtbl.t * + (Registers.reg * bool) list array = let retl = Hashtbl.create 42 in let retr = Array.make (Array.length seqa) [] in + (* retr.(i) : (r, b) -> (r', b') -> ... + where b = true if seen as arg, false if seen as dest + *) List.iter (fun reg -> Hashtbl.add retl reg (Machregsaux.class_of_type (typing reg), 1) @@ -92,35 +97,53 @@ let reference_counting (seqa : (instruction * Regset.t) array) | None -> Hashtbl.add retl reg (Machregsaux.class_of_type (typing reg), 1) in + let map_true = List.map (fun r -> r, true) in Array.iteri (fun i (ins, _) -> match ins with - | Iop(_,args,_,_) | Iload(_,_,_,args,_,_) + | Iop(_,args,dest,_) | Iload(_,_,_,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (dest, false)::(map_true args) | Icond(_,args,_,_,_) -> List.iter (add_reg) args; - retr.(i) <- args + retr.(i) <- map_true args | Istore(_,_,args,src,_) -> List.iter (add_reg) args; add_reg src; - retr.(i) <- src::args - | Icall(_,fn,args,_,_) | Itailcall(_,fn,args) -> + retr.(i) <- (src, true)::(map_true args) + | Icall(_,fn,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (dest,false)::(reg, true)::(map_true args) + | _ -> (dest,false)::(map_true args)) + + | Itailcall(_,fn,args) -> List.iter (add_reg) args; retr.(i) <- (match fn with | Datatypes.Coq_inl reg -> add_reg reg; - reg::args - | _ -> args) - | Ibuiltin(_,args,_,_) -> + (reg, true)::(map_true args) + | _ -> map_true args) + | Ibuiltin(_,args,dest,_) -> let rec bar = function | AST.BA r -> add_reg r; - retr.(i) <- r::retr.(i) + retr.(i) <- (r, true)::retr.(i) | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> bar hi; bar lo | _ -> () in - List.iter (bar) args + List.iter (bar) args; + let rec bad = function + | AST.BR r -> retr.(i) <- (r, false)::retr.(i) + | AST.BR_splitlong (hi, lo) -> + bad hi; bad lo + | _ -> () + in + bad dest; | Ijumptable (reg,_) | Ireturn (Some reg) -> add_reg reg; - retr.(i) <- [reg] + retr.(i) <- [reg, true] | _ -> () ) seqa; retl, retr diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c index 770dfce5..db88588b 100644 --- a/test/nardino/scheduling/spille_forw.c +++ b/test/nardino/scheduling/spille_forw.c @@ -91,36 +91,36 @@ int f(int n, float * arr) { float a30 = (float) n+29; float b30 = 2.*a30; c += a30; - arr[0] = a1; - arr[1] = a2; - arr[2] = a3; - arr[3] = a4; - arr[4] = a5; - arr[5] = a6; - arr[6] = a7; - arr[7] = a8; - arr[8] = a9; - arr[9] = a10; - arr[10] = a11; - arr[11] = a12; - arr[12] = a13; - arr[13] = a14; - arr[14] = a15; - arr[15] = a16; - arr[16] = a17; - arr[17] = a18; - arr[18] = a19; - arr[19] = a20; - arr[20] = a21; - arr[21] = a22; - arr[22] = a23; - arr[23] = a24; - arr[24] = a25; - arr[25] = a26; - arr[26] = a27; - arr[27] = a28; - arr[28] = a29; - arr[29] = a30; + /* arr[0] = a1; */ + /* arr[1] = a2; */ + /* arr[2] = a3; */ + /* arr[3] = a4; */ + /* arr[4] = a5; */ + /* arr[5] = a6; */ + /* arr[6] = a7; */ + /* arr[7] = a8; */ + /* arr[8] = a9; */ + /* arr[9] = a10; */ + /* arr[10] = a11; */ + /* arr[11] = a12; */ + /* arr[12] = a13; */ + /* arr[13] = a14; */ + /* arr[14] = a15; */ + /* arr[15] = a16; */ + /* arr[16] = a17; */ + /* arr[17] = a18; */ + /* arr[18] = a19; */ + /* arr[19] = a20; */ + /* arr[20] = a21; */ + /* arr[21] = a22; */ + /* arr[22] = a23; */ + /* arr[23] = a24; */ + /* arr[24] = a25; */ + /* arr[25] = a26; */ + /* arr[26] = a27; */ + /* arr[27] = a28; */ + /* arr[28] = a29; */ + /* arr[29] = a30; */ return c + b1+ b2+ -- cgit From 87c82b6fcf2bf825a8c60fc6a95498aac9f826d4 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 15 Jun 2021 14:44:56 +0200 Subject: kinda fixed Spills are definitely reduced, but lots of arbitrary in there: See previous commit: need to determine what to do if pressure is too high but no schedulable instruction can reduce it. For now, advance time for at most 5 cycles, if still no suitable instruction, go back to CSP --- scheduling/InstructionScheduler.ml | 62 ++++++++++++++++++++++++-------------- scheduling/RTLpathScheduleraux.ml | 20 ++++++++++-- 2 files changed, 57 insertions(+), 25 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 19bfaeb0..72222022 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -478,6 +478,9 @@ let reg_pres_scheduler (problem : problem) : solution option = ready.(!current_time) <- InstrSet.empty)); incr current_time in + + (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *) + let cnt = ref 0 in let attempt_scheduling ready usages = let result = ref (-1) in @@ -488,8 +491,11 @@ let reg_pres_scheduler (problem : problem) : solution option = print_string " "; print_int avlregs; print_newline (); + print_string "live regs: "; + print_int (Hashtbl.length live_regs); + print_newline (); flush stdout; - if avlregs <= regs_thresholds.(i) + if !cnt < 5 && avlregs <= regs_thresholds.(i) then ( let maybe = InstrSet.sched_CSR i ready usages in print_string "maybe\n"; @@ -506,8 +512,9 @@ let reg_pres_scheduler (problem : problem) : solution option = delta >= 0 then (vector_subtract usages.(maybe) current_resources; - result := maybe; - raise Exit))) available_regs; + result := maybe) + else incr cnt; + raise Exit)) available_regs; InstrSet.iter (fun i -> if vector_less_equal usages.(i) current_resources then ( @@ -516,25 +523,7 @@ let reg_pres_scheduler (problem : problem) : solution option = raise Exit)) ready; -1 with Exit -> - if !result <> -1 then - (List.iter (fun (r,b) -> - if b then - match Hashtbl.find_opt counts r with - | None -> assert false - | Some (t, n) -> - Hashtbl.remove counts r; - (if n = 1 then - available_regs.(t) - <- available_regs.(t) + 1) - else - let t = class_r r in - match Hashtbl.find_opt live_regs r with - | None -> (Hashtbl.add live_regs r t; - available_regs.(t) - <- available_regs.(t) - 1) - | Some i -> () - ) mentions.(!result)); - !result in + !result in while !current_time < max_time do @@ -545,6 +534,35 @@ let reg_pres_scheduler (problem : problem) : solution option = problem.instruction_usages with | -1 -> advance_time() | i -> (assert(times.(i) < 0); + (print_string "INSTR ISSUED: "; + print_int i; + print_newline (); + flush stdout; + cnt := 0; + List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (print_string "yaaaaaaaaaaaas "; + print_int (Camlcoq.P.to_int r); + print_newline (); + Hashtbl.remove live_regs r; + available_regs.(t) + <- available_regs.(t) + 1)) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (print_string "noooooooooo "; + print_int (Camlcoq.P.to_int r); + print_newline (); + Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(i)); times.(i) <- !current_time; ready.(!current_time) <- InstrSet.remove i (ready.(!current_time)); diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 9c3ff689..8df3edbc 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -72,7 +72,7 @@ let get_superblocks code entry pm typing = lsb end -(** the useful one. Returns a hashtable with bindings of form +(** the useful one. Returns a hashtable with bindings of shape ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), ** [t] is its class (according to [typing]), and [n] the number of ** times it's referenced as an argument in instructions of [seqa] ; @@ -85,7 +85,7 @@ let reference_counting (seqa : (instruction * Regset.t) array) let retl = Hashtbl.create 42 in let retr = Array.make (Array.length seqa) [] in (* retr.(i) : (r, b) -> (r', b') -> ... - where b = true if seen as arg, false if seen as dest + * where b = true if seen as arg, false if seen as dest *) List.iter (fun reg -> Hashtbl.add retl @@ -118,7 +118,7 @@ let reference_counting (seqa : (instruction * Regset.t) array) (dest,false)::(reg, true)::(map_true args) | _ -> (dest,false)::(map_true args)) - | Itailcall(_,fn,args) -> + | Itailcall(_,fn,args) -> List.iter (add_reg) args; retr.(i) <- (match fn with | Datatypes.Coq_inl reg -> @@ -146,6 +146,20 @@ let reference_counting (seqa : (instruction * Regset.t) array) retr.(i) <- [reg, true] | _ -> () ) seqa; + print_string "mentions\n"; + Array.iteri (fun i l -> + print_int i; + print_string ": ["; + List.iter (fun (r, b) -> + print_int (Camlcoq.P.to_int r); + print_string ":"; + print_string (if b then "a:" else "d"); + if b then print_int (snd (Hashtbl.find retl r)); + print_string ", " + ) l; + print_string "]\n"; + flush stdout; + ) retr; retl, retr -- cgit From 04b2489d7c2a9b0d203b3d431517367a07bd6b30 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 16 Jun 2021 11:39:25 +0200 Subject: fix modeling issue (Vundef for load outside of bounds) --- kvx/Asmvliw.v | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v index aa2e0885..304e45a8 100644 --- a/kvx/Asmvliw.v +++ b/kvx/Asmvliw.v @@ -313,7 +313,12 @@ Inductive cf_instruction : Type := . (** *** Loads *) -Definition concrete_default_notrap_load_value (chunk : memory_chunk) := +Definition concrete_default_notrap_load_value (chunk : memory_chunk) := Vundef. + +(* What follows was the original spec, but is subtly incorrect. + Our definition of the assembly-level memory model is already an abstraction of the real world. + In particular, we consider that a load is incorrect when it points outside of CompCert's visible memory, whereas this memory could be correct at the assembly level. + This means that CompCert would believe an incorrect load would yield 0 whereas it would yield another value. match chunk with | Mint8signed | Mint8unsigned | Mint16signed | Mint16unsigned | Mint32 => Vint Int.zero @@ -321,7 +326,7 @@ Definition concrete_default_notrap_load_value (chunk : memory_chunk) := | Many32 | Many64 => Vundef | Mfloat32 => Vsingle Float32.zero | Mfloat64 => Vfloat Float.zero - end. + end. *) Inductive load_name : Type := | Plb (**r load byte *) -- cgit From 21278bd87e89210bcc287116f6e35fc1b52d0df2 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Wed, 16 Jun 2021 20:27:31 +0200 Subject: Now working, tests show a decrease in spillage Should still find a proper way to treat the case mentioned in earlier commits --- scheduling/InstructionScheduler.ml | 92 +++++++++++++++++++------------------- scheduling/RTLpathScheduleraux.ml | 28 ++++++------ 2 files changed, 61 insertions(+), 59 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 72222022..df8a4e4e 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -419,11 +419,11 @@ let reg_pres_scheduler (problem : problem) : solution option = let compare_regs i x y = let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in - print_int y; - print_string " "; - print_int pyi; - print_newline (); - flush stdout; + (* print_int y; + * print_string " "; + * print_int pyi; + * print_newline (); + * flush stdout; *) let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in match pyi - pxi with | 0 -> (match priorities.(y) - priorities.(x) with @@ -433,9 +433,9 @@ let reg_pres_scheduler (problem : problem) : solution option = (** t is the register class *) let sched_CSR t ready usages = - print_string "looking for max delta"; - print_newline (); - flush stdout; + (* print_string "looking for max delta"; + * print_newline (); + * flush stdout; *) let result = ref (-1) in iter (fun i -> if vector_less_equal usages.(i) current_resources @@ -445,7 +445,7 @@ let reg_pres_scheduler (problem : problem) : solution option = end in - let max_time = bound_max_time problem in + let max_time = bound_max_time problem + 5*nr_instructions in let ready = Array.make max_time InstrSet.empty in Array.iteri (fun i preds -> @@ -486,34 +486,34 @@ let reg_pres_scheduler (problem : problem) : solution option = let result = ref (-1) in try Array.iteri (fun i avlregs -> - print_string "avlregs: "; - print_int i; - print_string " "; - print_int avlregs; - print_newline (); - print_string "live regs: "; - print_int (Hashtbl.length live_regs); - print_newline (); - flush stdout; + (* print_string "avlregs: "; + * print_int i; + * print_string " "; + * print_int avlregs; + * print_newline (); + * print_string "live regs: "; + * print_int (Hashtbl.length live_regs); + * print_newline (); + * flush stdout; *) if !cnt < 5 && avlregs <= regs_thresholds.(i) then ( let maybe = InstrSet.sched_CSR i ready usages in - print_string "maybe\n"; - print_int maybe; - print_newline (); - flush stdout; - if maybe > 0 && - let delta = - List.fold_left (fold_delta i) 0 mentions.(maybe) in - print_string "delta "; - print_int delta; - print_newline (); - flush stdout; - delta - >= 0 then - (vector_subtract usages.(maybe) current_resources; - result := maybe) - else incr cnt; + (* print_string "maybe\n"; + * print_int maybe; + * print_newline (); + * flush stdout; *) + (if maybe > 0 && + let delta = + List.fold_left (fold_delta i) 0 mentions.(maybe) in + (* print_string "delta "; + * print_int delta; + * print_newline (); + * flush stdout; *) + delta > 0 + then + (vector_subtract usages.(maybe) current_resources; + result := maybe) + else incr cnt); raise Exit)) available_regs; InstrSet.iter (fun i -> if vector_less_equal usages.(i) current_resources @@ -534,10 +534,10 @@ let reg_pres_scheduler (problem : problem) : solution option = problem.instruction_usages with | -1 -> advance_time() | i -> (assert(times.(i) < 0); - (print_string "INSTR ISSUED: "; - print_int i; - print_newline (); - flush stdout; + ((* print_string "INSTR ISSUED: "; + * print_int i; + * print_newline (); + * flush stdout; *) cnt := 0; List.iter (fun (r,b) -> if b then @@ -546,18 +546,18 @@ let reg_pres_scheduler (problem : problem) : solution option = | Some (t, n) -> Hashtbl.remove counts r; if n = 1 then - (print_string "yaaaaaaaaaaaas "; - print_int (Camlcoq.P.to_int r); - print_newline (); + ((* print_string "yaaaaaaaaaaaas "; + * print_int (Camlcoq.P.to_int r); + * print_newline (); *) Hashtbl.remove live_regs r; available_regs.(t) <- available_regs.(t) + 1)) else let t = class_r r in match Hashtbl.find_opt live_regs r with - | None -> (print_string "noooooooooo "; - print_int (Camlcoq.P.to_int r); - print_newline (); + | None -> ((* print_string "noooooooooo "; + * print_int (Camlcoq.P.to_int r); + * print_newline (); *) Hashtbl.add live_regs r t; available_regs.(t) <- available_regs.(t) - 1) @@ -571,8 +571,10 @@ let reg_pres_scheduler (problem : problem) : solution option = match earliest_time instr_to with | -1 -> () | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) ready.(to_time) - <- InstrSet.add instr_to ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) ) successors.(i); successors.(i) <- [] ) diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index 8df3edbc..f3f09954 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -146,20 +146,20 @@ let reference_counting (seqa : (instruction * Regset.t) array) retr.(i) <- [reg, true] | _ -> () ) seqa; - print_string "mentions\n"; - Array.iteri (fun i l -> - print_int i; - print_string ": ["; - List.iter (fun (r, b) -> - print_int (Camlcoq.P.to_int r); - print_string ":"; - print_string (if b then "a:" else "d"); - if b then print_int (snd (Hashtbl.find retl r)); - print_string ", " - ) l; - print_string "]\n"; - flush stdout; - ) retr; + (* print_string "mentions\n"; + * Array.iteri (fun i l -> + * print_int i; + * print_string ": ["; + * List.iter (fun (r, b) -> + * print_int (Camlcoq.P.to_int r); + * print_string ":"; + * print_string (if b then "a:" else "d"); + * if b then print_int (snd (Hashtbl.find retl r)); + * print_string ", " + * ) l; + * print_string "]\n"; + * flush stdout; + * ) retr; *) retl, retr -- cgit From 4413c27d6c6a3d69df34955d9d453c38b32174c7 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 17 Jun 2021 15:38:13 +0200 Subject: Add option to set thresold and support for riscv --- driver/Clflags.ml | 1 + driver/Driver.ml | 2 ++ riscV/Machregsaux.ml | 2 ++ scheduling/InstructionScheduler.ml | 9 +++++++-- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index fa17c2d9..1f31bd3e 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -115,4 +115,5 @@ let option_inline_auto_threshold = ref 0 let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 +let option_regpres_threshold = ref 5 let main_function_name = ref "main" diff --git a/driver/Driver.ml b/driver/Driver.ml index 5a8c7f2c..fa187f26 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -211,6 +211,7 @@ Processing options: -fprepass Perform prepass scheduling (only on some architectures) [on] -fprepass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) + -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =ilp: ILP, =greedy: just packing bundles) @@ -342,6 +343,7 @@ let cmdline_actions = Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s); Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n); Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n); + Exact "-regpres-threshold", Integer (fun n -> option_regpres_threshold := n); Exact "-fsmall-data", Integer(fun n -> option_small_data := n); Exact "-fsmall-const", Integer(fun n -> option_small_const := n); Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); diff --git a/riscV/Machregsaux.ml b/riscV/Machregsaux.ml index 840943e7..e3e47946 100644 --- a/riscV/Machregsaux.ml +++ b/riscV/Machregsaux.ml @@ -18,3 +18,5 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 26; 32|] diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index df8a4e4e..5b4c87f4 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -360,8 +360,13 @@ let reg_pres_scheduler (problem : problem) : solution option = let nr_types_regs = Array.length available_regs in - let regs_thresholds = Array.init nr_types_regs - (fun i -> 5) in + let thres = Array.fold_left (min) + (max !(Clflags.option_regpres_threshold) 0) + Machregsaux.nr_regs + in + + + let regs_thresholds = Array.make nr_types_regs thres in (* placeholder value *) let class_r r = -- cgit From fae8d9b5c5f93d5eda36f800eb0ca1837b237cba Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 17 Jun 2021 17:00:57 +0200 Subject: fix riscv/Machregsaux.mli --- riscV/Machregsaux.mli | 2 ++ 1 file changed, 2 insertions(+) diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli index 01b0f9fd..cf6d7b71 100644 --- a/riscV/Machregsaux.mli +++ b/riscV/Machregsaux.mli @@ -15,3 +15,5 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +val nr_regs: int array -- cgit From fe557bf65ec738eaa078bc5e398ff690eb1f2b9e Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 17 Jun 2021 17:03:53 +0200 Subject: changed type of schedule_seq in x86 for compatibility --- x86/PrepassSchedulingOracle.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x86/PrepassSchedulingOracle.ml b/x86/PrepassSchedulingOracle.ml index 7b6a1b14..42a3da23 100644 --- a/x86/PrepassSchedulingOracle.ml +++ b/x86/PrepassSchedulingOracle.ml @@ -2,4 +2,5 @@ open RTL open Registers (* Do not do anything *) -let schedule_sequence (seqa : (instruction*Regset.t) array) = None +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None -- cgit From 8f399dfa9d794f2f728f523ff1aa7788cc3599b2 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Thu, 17 Jun 2021 17:04:52 +0200 Subject: fix for Risc-V --- aarch64/Machregsaux.mli | 1 + riscV/Machregsaux.mli | 3 +++ 2 files changed, 4 insertions(+) diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli index 8487a557..23ac1c9a 100644 --- a/aarch64/Machregsaux.mli +++ b/aarch64/Machregsaux.mli @@ -16,4 +16,5 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int +(* Number of registers in each class *) val nr_regs : int array diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli index 01b0f9fd..bb3777bf 100644 --- a/riscV/Machregsaux.mli +++ b/riscV/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array -- cgit From 5798f56b8a8630e43dbed84a824811a5626a1503 Mon Sep 17 00:00:00 2001 From: Cyril SIX Date: Fri, 18 Jun 2021 18:35:50 +0200 Subject: Replacing default notrap load value by Vundef everywhere --- backend/CSE2proof.v | 4 ++-- backend/CSE3analysisproof.v | 2 +- backend/CSEdomain.v | 4 ++-- backend/CSEproof.v | 5 ++--- backend/Deadcodeproof.v | 2 -- backend/LTL.v | 4 ++-- backend/Linear.v | 4 ++-- backend/Lineartyping.v | 2 -- backend/Mach.v | 4 ++-- backend/RTL.v | 4 ++-- backend/RTLtyping.v | 2 +- backend/Tailcallproof.v | 4 ++-- backend/ValueAnalysis.v | 2 -- common/Memory.v | 2 -- kvx/Asmblockdeps.v | 10 +++++----- kvx/Asmblockgenproof1.v | 8 ++++---- kvx/Asmblockprops.v | 4 ++-- kvx/Asmvliw.v | 11 +++++------ scheduling/RTLpath.v | 2 +- scheduling/RTLpathSE_theory.v | 4 ++-- scheduling/postpass_lib/Machblock.v | 4 ++-- 21 files changed, 39 insertions(+), 49 deletions(-) diff --git a/backend/CSE2proof.v b/backend/CSE2proof.v index 49dbd409..252240c9 100644 --- a/backend/CSE2proof.v +++ b/backend/CSE2proof.v @@ -1399,7 +1399,7 @@ Proof. 2: discriminate. econstructor; split. { - eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto. + eapply exec_Iop with (v := Vundef); eauto. simpl. rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0. { @@ -1472,7 +1472,7 @@ Proof. 2: discriminate. econstructor; split. { - eapply exec_Iop with (v := (default_notrap_load_value chunk)); eauto. + eapply exec_Iop with (v := Vundef); eauto. simpl. rewrite <- subst_args_ok with (genv := ge) (f := f) (pc := pc) (sp := sp) (m := m) in H0. { diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v index d53cf604..382c9f4c 100644 --- a/backend/CSE3analysisproof.v +++ b/backend/CSE3analysisproof.v @@ -502,7 +502,7 @@ Section SOUNDNESS. end with | Some dat => v' = dat - | None => v' = default_notrap_load_value chunk + | None => v' = Vundef end end. diff --git a/backend/CSEdomain.v b/backend/CSEdomain.v index f78e1d25..9641d012 100644 --- a/backend/CSEdomain.v +++ b/backend/CSEdomain.v @@ -113,12 +113,12 @@ Inductive rhs_eval_to (valu: valuation) (ge: genv) (sp: val) (m: mem): (* | load_notrap1_eval_to: forall chunk addr vl, eval_addressing ge sp addr (map valu vl) = None -> rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl) - (default_notrap_load_value chunk) + Vundef | load_notrap2_eval_to: forall chunk addr vl a, eval_addressing ge sp addr (map valu vl) = Some a -> Mem.loadv chunk m a = None -> rhs_eval_to valu ge sp m (Load NOTRAP chunk addr vl) - (default_notrap_load_value chunk) *). + Vundef *). Inductive equation_holds (valu: valuation) (ge: genv) (sp: val) (m: mem): equation -> Prop := diff --git a/backend/CSEproof.v b/backend/CSEproof.v index cf51f5a2..556b44b3 100644 --- a/backend/CSEproof.v +++ b/backend/CSEproof.v @@ -402,7 +402,7 @@ Lemma add_load_holds_none1: forall valu1 ge sp rs m n addr (args: list reg) chunk dst, numbering_holds valu1 ge sp rs m n -> eval_addressing ge sp addr rs##args = None -> - exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst chunk addr args). + exists valu2, numbering_holds valu2 ge sp (rs#dst <- Vundef) m (add_load n dst chunk addr args). Proof. unfold add_load; intros. destruct (valnum_regs n args) as [n1 vl] eqn:VN. @@ -418,7 +418,7 @@ Lemma add_load_holds_none2: numbering_holds valu1 ge sp rs m n -> eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - exists valu2, numbering_holds valu2 ge sp (rs#dst <- (default_notrap_load_value chunk)) m (add_load n dst NOTRAP chunk addr args). + exists valu2, numbering_holds valu2 ge sp (rs#dst <- Vundef) m (add_load n dst NOTRAP chunk addr args). Proof. unfold add_load; intros. destruct (valnum_regs n args) as [n1 vl] eqn:VN. @@ -1210,7 +1210,6 @@ Proof. exists valu1. apply set_unknown_holds. assumption. - unfold default_notrap_load_value. apply set_reg_lessdef; eauto. } { diff --git a/backend/Deadcodeproof.v b/backend/Deadcodeproof.v index b51d6cce..be20af0b 100644 --- a/backend/Deadcodeproof.v +++ b/backend/Deadcodeproof.v @@ -845,7 +845,6 @@ Ltac UseTransfer := eapply match_succ_states; eauto. simpl; auto. apply eagree_update; auto. rewrite is_int_zero_sound by auto. - unfold default_notrap_load_value. constructor. + (* preserved *) exploit eval_addressing_lessdef_none. eapply add_needs_all_lessdef; eauto. eassumption. @@ -878,7 +877,6 @@ Ltac UseTransfer := eapply match_succ_states; eauto. simpl; auto. apply eagree_update; auto. rewrite is_int_zero_sound by auto. - unfold default_notrap_load_value. constructor. + (* preserved *) exploit eval_addressing_lessdef. eapply add_needs_all_lessdef; eauto. eauto. diff --git a/backend/LTL.v b/backend/LTL.v index 3edd60a2..a382ef0e 100644 --- a/backend/LTL.v +++ b/backend/LTL.v @@ -217,14 +217,14 @@ Inductive step: state -> trace -> state -> Prop := E0 (Block s f sp bb rs' m) | exec_Lload_notrap1: forall s f sp chunk addr args dst bb rs m rs', eval_addressing ge sp addr (reglist rs args) = None -> - rs' = Locmap.set (R dst) (default_notrap_load_value chunk) + rs' = Locmap.set (R dst) Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m) E0 (Block s f sp bb rs' m) | exec_Lload_notrap2: forall s f sp chunk addr args dst bb rs m a rs', eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.loadv chunk m a = None -> - rs' = Locmap.set (R dst) (default_notrap_load_value chunk) + rs' = Locmap.set (R dst) Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (Block s f sp (Lload NOTRAP chunk addr args dst :: bb) rs m) E0 (Block s f sp bb rs' m) diff --git a/backend/Linear.v b/backend/Linear.v index 1443f795..cb11f7dc 100644 --- a/backend/Linear.v +++ b/backend/Linear.v @@ -170,7 +170,7 @@ Inductive step: state -> trace -> state -> Prop := forall s f sp chunk addr args dst b rs m rs', eval_addressing ge sp addr (reglist rs args) = None -> rs' = Locmap.set (R dst) - (default_notrap_load_value chunk) + Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m) E0 (State s f sp b rs' m) @@ -179,7 +179,7 @@ Inductive step: state -> trace -> state -> Prop := eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.loadv chunk m a = None -> rs' = Locmap.set (R dst) - (default_notrap_load_value chunk) + Vundef (undef_regs (destroyed_by_load chunk addr) rs) -> step (State s f sp (Lload NOTRAP chunk addr args dst :: b) rs m) E0 (State s f sp b rs' m) diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v index 22658fb7..cf903aad 100644 --- a/backend/Lineartyping.v +++ b/backend/Lineartyping.v @@ -338,14 +338,12 @@ Local Opaque mreg_type. simpl in *; InvBooleans. econstructor; eauto. apply wt_setreg. eapply Val.has_subtype; eauto. - unfold default_notrap_load_value. constructor. apply wt_undef_regs; auto. - (* load notrap2 *) simpl in *; InvBooleans. econstructor; eauto. apply wt_setreg. eapply Val.has_subtype; eauto. - unfold default_notrap_load_value. constructor. apply wt_undef_regs; auto. - (* store *) diff --git a/backend/Mach.v b/backend/Mach.v index 1c6fdb18..2cfd738d 100644 --- a/backend/Mach.v +++ b/backend/Mach.v @@ -330,14 +330,14 @@ Inductive step: state -> trace -> state -> Prop := | exec_Mload_notrap1: forall s f sp chunk addr args dst c rs m rs', eval_addressing ge sp addr rs##args = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m) E0 (State s f sp c rs' m) | exec_Mload_notrap2: forall s f sp chunk addr args dst c rs m a rs', eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> step (State s f sp (Mload NOTRAP chunk addr args dst :: c) rs m) E0 (State s f sp c rs' m) | exec_Mstore: diff --git a/backend/RTL.v b/backend/RTL.v index 31b5cf99..fe350adf 100644 --- a/backend/RTL.v +++ b/backend/RTL.v @@ -225,14 +225,14 @@ Inductive step: state -> trace -> state -> Prop := (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') -> eval_addressing ge sp addr rs##args = None -> step (State s f sp pc rs m) - E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m) + E0 (State s f sp pc' (rs#dst <- Vundef) m) | exec_Iload_notrap2: forall s f sp pc rs m chunk addr args dst pc' a, (fn_code f)!pc = Some(Iload NOTRAP chunk addr args dst pc') -> eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None-> step (State s f sp pc rs m) - E0 (State s f sp pc' (rs#dst <- (default_notrap_load_value chunk)) m) + E0 (State s f sp pc' (rs#dst <- Vundef) m) | exec_Istore: forall s f sp pc rs m chunk addr args src pc' a m', (fn_code f)!pc = Some(Istore chunk addr args src pc') -> diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v index 15ed6d8a..6048f895 100644 --- a/backend/RTLtyping.v +++ b/backend/RTLtyping.v @@ -858,7 +858,7 @@ Lemma wt_exec_Iload_notrap: forall env f chunk addr args dst s rs, wt_instr f env (Iload NOTRAP chunk addr args dst s) -> wt_regset env rs -> - wt_regset env (rs#dst <- (default_notrap_load_value chunk)). + wt_regset env (rs#dst <- Vundef). Proof. intros. eapply wt_regset_assign; eauto. simpl. trivial. diff --git a/backend/Tailcallproof.v b/backend/Tailcallproof.v index 80a68327..39fc10fb 100644 --- a/backend/Tailcallproof.v +++ b/backend/Tailcallproof.v @@ -440,7 +440,7 @@ Proof. TransfInstr. assert (Val.lessdef_list (rs##args) (rs'##args)). apply regs_lessdef_regs; auto. left. - exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split. + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- Vundef) m'); split. eapply exec_Iload_notrap1. eassumption. eapply eval_addressing_lessdef_none. eassumption. @@ -465,7 +465,7 @@ Proof. exact symbols_preserved. assumption. econstructor; eauto. apply set_reg_lessdef; auto. - + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- (default_notrap_load_value chunk)) m'); split. + + exists (State s' (transf_function f) (Vptr sp0 Ptrofs.zero) pc' (rs'#dst <- Vundef) m'); split. eapply exec_Iload_notrap2. eassumption. erewrite eval_addressing_preserved. eassumption. diff --git a/backend/ValueAnalysis.v b/backend/ValueAnalysis.v index 561e94c9..e20edff7 100644 --- a/backend/ValueAnalysis.v +++ b/backend/ValueAnalysis.v @@ -1287,13 +1287,11 @@ Proof. eapply sound_succ_state; eauto. simpl; auto. unfold transfer; rewrite H. eauto. apply ematch_update; auto. - unfold default_notrap_load_value. constructor. - (* load notrap2 *) eapply sound_succ_state; eauto. simpl; auto. unfold transfer; rewrite H. eauto. apply ematch_update; auto. - unfold default_notrap_load_value. constructor. - (* store *) exploit eval_static_addressing_sound; eauto with va. intros VMADDR. diff --git a/common/Memory.v b/common/Memory.v index bf8ca083..ff17efb0 100644 --- a/common/Memory.v +++ b/common/Memory.v @@ -41,8 +41,6 @@ Require Export Memdata. Require Export Memtype. Require Import Lia. -Definition default_notrap_load_value (chunk : memory_chunk) := Vundef. - (* To avoid useless definitions of inductors in extracted code. *) Local Unset Elimination Schemes. diff --git a/kvx/Asmblockdeps.v b/kvx/Asmblockdeps.v index b6d18c3e..a9786e0a 100644 --- a/kvx/Asmblockdeps.v +++ b/kvx/Asmblockdeps.v @@ -164,17 +164,17 @@ Definition arith_eval (ao: arith_op) (l: list value) := | _, _ => None end. -Definition exec_incorrect_load trap chunk := +Definition exec_incorrect_load trap := match trap with | TRAP => None - | NOTRAP => Some (Val (concrete_default_notrap_load_value chunk)) + | NOTRAP => Some (Val Vundef) end. Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v: val) (ofs: offset) := let (ge, fn) := Ge in match (eval_offset ofs) with | OK ptr => match Mem.loadv chunk m (Val.offset_ptr v ptr) with - | None => exec_incorrect_load trap chunk + | None => exec_incorrect_load trap | Some vl => Some (Val vl) end | _ => None @@ -182,13 +182,13 @@ Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: Definition exec_load_deps_reg (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) := match Mem.loadv chunk m (Val.addl v vo) with - | None => exec_incorrect_load trap chunk + | None => exec_incorrect_load trap | Some vl => Some (Val vl) end. Definition exec_load_deps_regxs (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) := match Mem.loadv chunk m (Val.addl v (Val.shll vo (scale_of_chunk chunk))) with - | None => exec_incorrect_load trap chunk + | None => exec_incorrect_load trap | Some vl => Some (Val vl) end. diff --git a/kvx/Asmblockgenproof1.v b/kvx/Asmblockgenproof1.v index a65bd5bc..259c4f9c 100644 --- a/kvx/Asmblockgenproof1.v +++ b/kvx/Asmblockgenproof1.v @@ -1914,7 +1914,7 @@ Lemma transl_load_access2_correct_notrap2: Mem.loadv chunk m v = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#rd = concrete_default_notrap_load_value chunk + /\ rs'#rd = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. Proof. intros until ro; intros ARGS IREGE INSTR TR EV LOAD. @@ -1963,7 +1963,7 @@ Lemma transl_load_access2XS_correct_notrap2: Mem.loadv chunk m v = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#rd = concrete_default_notrap_load_value chunk + /\ rs'#rd = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. Proof. intros until ro; intros ARGS IREGE INSTR TR EV LOAD. @@ -2008,7 +2008,7 @@ Lemma transl_load_access_correct_notrap2: Mem.loadv chunk m v = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#rd = concrete_default_notrap_load_value chunk + /\ rs'#rd = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> rd -> rs'#r = rs#r. Proof. intros until v; intros INSTR TR EV LOAD. @@ -2185,7 +2185,7 @@ Lemma transl_load_correct_notrap2: Mem.loadv chunk m a = None -> exists rs', exec_straight ge (basics_to_code c) rs m (basics_to_code k) rs' m - /\ rs'#(preg_of dst) = (concrete_default_notrap_load_value chunk) + /\ rs'#(preg_of dst) = Vundef /\ forall r, r <> PC -> r <> RTMP -> r <> preg_of dst -> rs'#r = rs#r. Proof. intros until a; intros TR EV LOAD. destruct addr. diff --git a/kvx/Asmblockprops.v b/kvx/Asmblockprops.v index c3929be5..a732d29b 100644 --- a/kvx/Asmblockprops.v +++ b/kvx/Asmblockprops.v @@ -96,9 +96,9 @@ Theorem exec_basic_instr_pc: Proof. intros. destruct b; try destruct i; try destruct i. all: try (inv H; Simpl). - 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail. + 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail. - 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail. + 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; inv H1; Simpl; fail. { (* PLoadQRRO *) unfold parexec_load_q_offset in H1. diff --git a/kvx/Asmvliw.v b/kvx/Asmvliw.v index 304e45a8..45b230e6 100644 --- a/kvx/Asmvliw.v +++ b/kvx/Asmvliw.v @@ -313,7 +313,6 @@ Inductive cf_instruction : Type := . (** *** Loads *) -Definition concrete_default_notrap_load_value (chunk : memory_chunk) := Vundef. (* What follows was the original spec, but is subtly incorrect. Our definition of the assembly-level memory model is already an abstraction of the real world. @@ -1174,16 +1173,16 @@ Definition eval_offset (ofs: offset) : res ptrofs := OK ofs. (** *** load/store instructions *) -Definition parexec_incorrect_load trap chunk d rsw mw := +Definition parexec_incorrect_load trap d rsw mw := match trap with | TRAP => Stuck - | NOTRAP => Next (rsw#d <- (concrete_default_notrap_load_value chunk)) mw + | NOTRAP => Next (rsw#d <- Vundef) mw end. Definition parexec_load_offset (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a: ireg) (ofs: offset) := match (eval_offset ofs) with | OK ptr => match Mem.loadv chunk mr (Val.offset_ptr (rsr a) ptr) with - | None => parexec_incorrect_load trap chunk d rsw mw + | None => parexec_incorrect_load trap d rsw mw | Some v => Next (rsw#d <- v) mw end | _ => Stuck @@ -1230,13 +1229,13 @@ Definition parexec_load_o_offset (rsr rsw: regset) (mr mw: mem) (d : gpreg_o) (a Definition parexec_load_reg (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) := match Mem.loadv chunk mr (Val.addl (rsr a) (rsr ro)) with - | None => parexec_incorrect_load trap chunk d rsw mw + | None => parexec_incorrect_load trap d rsw mw | Some v => Next (rsw#d <- v) mw end. Definition parexec_load_regxs (trap: trapping_mode) (chunk: memory_chunk) (rsr rsw: regset) (mr mw: mem) (d a ro: ireg) := match Mem.loadv chunk mr (Val.addl (rsr a) (Val.shll (rsr ro) (scale_of_chunk chunk))) with - | None => parexec_incorrect_load trap chunk d rsw mw + | None => parexec_incorrect_load trap d rsw mw | Some v => Next (rsw#d <- v) mw end. diff --git a/scheduling/RTLpath.v b/scheduling/RTLpath.v index a4fce97e..b29a7759 100644 --- a/scheduling/RTLpath.v +++ b/scheduling/RTLpath.v @@ -156,7 +156,7 @@ Definition istep (ge: RTL.genv) (i: instruction) (sp: val) (rs: regset) (m: mem) SOME v <- Mem.loadv chunk m a IN Some (mk_istate true pc' (rs#dst <- v) m) | Iload NOTRAP chunk addr args dst pc' => - let default_state := mk_istate true pc' rs#dst <- (default_notrap_load_value chunk) m in + let default_state := mk_istate true pc' rs#dst <- Vundef m in match (eval_addressing ge sp addr rs##args) with | None => Some default_state | Some a => match (Mem.loadv chunk m a) with diff --git a/scheduling/RTLpathSE_theory.v b/scheduling/RTLpathSE_theory.v index aa8db342..2a791feb 100644 --- a/scheduling/RTLpathSE_theory.v +++ b/scheduling/RTLpathSE_theory.v @@ -87,11 +87,11 @@ Fixpoint seval_sval (ge: RTL.genv) (sp:val) (sv: sval) (rs0: regset) (m0: mem): | NOTRAP => SOME args <- seval_list_sval ge sp lsv rs0 m0 IN match (eval_addressing ge sp addr args) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some a => SOME m <- seval_smem ge sp sm rs0 m0 IN match (Mem.loadv chunk m a) with - | None => Some (default_notrap_load_value chunk) + | None => Some Vundef | Some val => Some val end end diff --git a/scheduling/postpass_lib/Machblock.v b/scheduling/postpass_lib/Machblock.v index c8eadbd7..b588cca8 100644 --- a/scheduling/postpass_lib/Machblock.v +++ b/scheduling/postpass_lib/Machblock.v @@ -237,13 +237,13 @@ Inductive basic_step (s: list stackframe) (fb: block) (sp: val) (rs: regset) (m: | exec_MBload_notrap1: forall addr args rs' chunk dst, eval_addressing ge sp addr rs##args = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m | exec_MBload_notrap2: forall addr args a rs' chunk dst, eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = None -> - rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- (default_notrap_load_value chunk)) -> + rs' = ((undef_regs (destroyed_by_load chunk addr) rs)#dst <- Vundef) -> basic_step s fb sp rs m (MBload NOTRAP chunk addr args dst) rs' m | exec_MBstore: forall chunk addr args src m' a rs', -- cgit From 10cbe4b28ef6dc5d02c9a5d4d369484e4943a18d Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 22 Jun 2021 15:57:21 +0200 Subject: Changed default threshold value following tests --- driver/Clflags.ml | 2 +- scheduling/InstructionScheduler.ml | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 1f31bd3e..c90fdb8c 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -115,5 +115,5 @@ let option_inline_auto_threshold = ref 0 let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 -let option_regpres_threshold = ref 5 +let option_regpres_threshold = ref 2 let main_function_name = ref "main" diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 5b4c87f4..cd924825 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -551,19 +551,13 @@ let reg_pres_scheduler (problem : problem) : solution option = | Some (t, n) -> Hashtbl.remove counts r; if n = 1 then - ((* print_string "yaaaaaaaaaaaas "; - * print_int (Camlcoq.P.to_int r); - * print_newline (); *) - Hashtbl.remove live_regs r; + (Hashtbl.remove live_regs r; available_regs.(t) <- available_regs.(t) + 1)) else let t = class_r r in match Hashtbl.find_opt live_regs r with - | None -> ((* print_string "noooooooooo "; - * print_int (Camlcoq.P.to_int r); - * print_newline (); *) - Hashtbl.add live_regs r t; + | None -> (Hashtbl.add live_regs r t; available_regs.(t) <- available_regs.(t) - 1) | Some i -> () -- cgit From dfa09586ae40c70769eeda688a0e7f59f611749f Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 24 Jun 2021 18:33:20 +0200 Subject: Another scheduler --- driver/Driver.ml | 2 +- scheduling/InstructionScheduler.ml | 201 ++++++++++++++++++++++++++++++++++++ scheduling/InstructionScheduler.mli | 2 + 3 files changed, 204 insertions(+), 1 deletion(-) diff --git a/driver/Driver.ml b/driver/Driver.ml index fa187f26..4f43d7c9 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -210,7 +210,7 @@ Processing options: -mtune= Type of CPU (for scheduling on some architectures) -fprepass Perform prepass scheduling (only on some architectures) [on] -fprepass= Perform postpass scheduling with the specified optimization [list] - (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) + (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =regpres_bis: variant of regpres, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index cd924825..99002c36 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -602,6 +602,206 @@ let reg_pres_scheduler (problem : problem) : solution option = ;; +(********************************************************************) + +let reg_pres_scheduler_bis (problem : problem) : solution option = + DebugPrint.debug_flag := true; + Printf.printf "\nNEW\n\n"; + let nr_instructions = get_nr_instructions problem in + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + (* let available_regs = Array.copy Machregsaux.nr_regs in *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + (* available_regs.(classe) + * <- available_regs.(classe) - 1; *) + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta a (r, b) = + a + (if b then + match Hashtbl.find_opt counts r with + | Some (_, 1) -> 1 + | _ -> 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + ) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let compare_pres x y = + let pdy = List.fold_left (fold_delta) 0 mentions.(y) in + let pdx = List.fold_left (fold_delta) 0 mentions.(x) in + match pdy - pdx with + | 0 -> x - y + | z -> z + in + + let module InstrSet = + Set.Make (struct + type t = int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) in + + let max_time = bound_max_time problem (* + 5*nr_instructions *) in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (* Printf.printf "ADV\n"; + * flush stdout; *) + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + + let attempt_scheduling ready usages = + let result = ref [] in + try + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then + if !result = [] || priorities.(i) = priorities.(List.hd (!result)) + then + result := i::(!result) + else raise Exit + ) ready; + if !result <> [] then raise Exit; + -1 + with + Exit -> + let mini = List.fold_left (fun a b -> + if a = -1 || compare_pres a b > 0 + then b else a + ) (-1) !result in + vector_subtract usages.(mini) current_resources; + mini + in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> ( + Printf.printf "ISSUED: %d\nREADY: " i; + InstrSet.iter (fun i -> Printf.printf "%d " i) + ready.(!current_time); + Printf.printf "\nSUCC: "; + List.iter (fun (i, l) -> Printf.printf "%d " i) + successors.(i); + Printf.printf "\n\n"; + flush stdout; + assert(times.(i) < 0); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + (List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + (* available_regs.(t) + * <- available_regs.(t) + 1 *))) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + (* available_regs.(t) + * <- available_regs.(t) - 1 *)) + | Some i -> () + ) mentions.(i)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + DebugPrint.debug_flag := false; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + DebugPrint.debug_flag := false; + None + +;; + +(********************************************************************) type bundle = int list;; @@ -1535,5 +1735,6 @@ let scheduler_by_name name = | "list" -> validated_scheduler list_scheduler | "revlist" -> validated_scheduler reverse_list_scheduler | "regpres" -> validated_scheduler reg_pres_scheduler + | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis | "greedy" -> greedy_scheduler | s -> failwith ("unknown scheduler: " ^ s);; diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index b5a5463b..48c7bc09 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -81,6 +81,8 @@ val list_scheduler : problem -> solution option (** WIP : Same as list_scheduler, but schedules instructions which decrease register pressure when it gets too high. *) val reg_pres_scheduler : problem -> solution option + +val reg_pres_scheduler_bis : problem -> solution option (** Schedule the problem using the order of instructions without any reordering *) val greedy_scheduler : problem -> solution option -- cgit From 9ac49c465f9c8969fba00e6242da0c188a6a3080 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Fri, 25 Jun 2021 09:42:41 +0200 Subject: Changed printfs into debugs --- scheduling/InstructionScheduler.ml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 99002c36..e2413bc0 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -735,14 +735,13 @@ let reg_pres_scheduler_bis (problem : problem) : solution option = problem.instruction_usages with | -1 -> advance_time() | i -> ( - Printf.printf "ISSUED: %d\nREADY: " i; - InstrSet.iter (fun i -> Printf.printf "%d " i) + DebugPrint.debug "ISSUED: %d\nREADY: " i; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready.(!current_time); - Printf.printf "\nSUCC: "; - List.iter (fun (i, l) -> Printf.printf "%d " i) + DebugPrint.debug "\nSUCC: "; + List.iter (fun (i, l) -> DebugPrint.debug "%d " i) successors.(i); - Printf.printf "\n\n"; - flush stdout; + DebugPrint.debug "\n\n"; assert(times.(i) < 0); times.(i) <- !current_time; ready.(!current_time) -- cgit From 56498b6437ea8deb89a4e1fadbbfec490b8341aa Mon Sep 17 00:00:00 2001 From: Léo Gourdin Date: Fri, 25 Jun 2021 11:33:54 +0200 Subject: adding mayundef resource_bounds (not changing perfs) --- riscV/OpWeights.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/riscV/OpWeights.ml b/riscV/OpWeights.ml index 0a1d9ad4..a5ece6d5 100644 --- a/riscV/OpWeights.ml +++ b/riscV/OpWeights.ml @@ -66,7 +66,8 @@ module Rocket = struct | OEmayundef _ -> 0 | _ -> 1 - let resources_of_op (op : operation) (nargs : int) = resource_bounds + let resources_of_op (op : operation) (nargs : int) = + match op with OEmayundef _ -> [| 0 |] | _ -> resource_bounds let non_pipelined_resources_of_op (op : operation) (nargs : int) = match op with -- cgit From b96a48de58e1969535865b7b345514a24f7178a6 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 28 Jun 2021 16:04:44 +0200 Subject: Change temporary solution (see prev commits), and add option for it --- driver/Clflags.ml | 1 + driver/Driver.ml | 2 ++ scheduling/InstructionScheduler.ml | 21 +++++++++++++++++---- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index c90fdb8c..d01b57f0 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -116,4 +116,5 @@ let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 let option_regpres_threshold = ref 2 +let option_regpres_temp = ref false let main_function_name = ref "main" diff --git a/driver/Driver.ml b/driver/Driver.ml index 4f43d7c9..22c75f44 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -212,6 +212,7 @@ Processing options: -fprepass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =regpres_bis: variant of regpres, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure + -fregpres-temp use the temporary solution (default no) -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =ilp: ILP, =greedy: just packing bundles) @@ -426,6 +427,7 @@ let cmdline_actions = @ f_opt "redundancy" option_fredundancy @ [ Exact "-mtune", String (fun s -> option_mtune := s) ] @ f_opt "prepass" option_fprepass + @ f_opt "regpres-temp" option_regpres_temp @ f_opt "postpass" option_fpostpass @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ] @ f_opt "predict" option_fpredict diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index e2413bc0..a881df68 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -486,7 +486,7 @@ let reg_pres_scheduler (problem : problem) : solution option = (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *) let cnt = ref 0 in - + let attempt_scheduling ready usages = let result = ref (-1) in try @@ -500,7 +500,7 @@ let reg_pres_scheduler (problem : problem) : solution option = * print_int (Hashtbl.length live_regs); * print_newline (); * flush stdout; *) - if !cnt < 5 && avlregs <= regs_thresholds.(i) + if avlregs <= regs_thresholds.(i) then ( let maybe = InstrSet.sched_CSR i ready usages in (* print_string "maybe\n"; @@ -518,7 +518,19 @@ let reg_pres_scheduler (problem : problem) : solution option = then (vector_subtract usages.(maybe) current_resources; result := maybe) - else incr cnt); + else + if not !Clflags.option_regpres_temp + then + (InstrSet.iter (fun ins -> + if vector_less_equal usages.(ins) current_resources && + List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0 + then result := ins + ) ready; + if !result <> -1 then + vector_subtract usages.(!result) current_resources) + else + (incr cnt) + ); raise Exit)) available_regs; InstrSet.iter (fun i -> if vector_less_equal usages.(i) current_resources @@ -543,7 +555,8 @@ let reg_pres_scheduler (problem : problem) : solution option = * print_int i; * print_newline (); * flush stdout; *) - cnt := 0; + if !Clflags.option_regpres_temp then + cnt := 0; List.iter (fun (r,b) -> if b then (match Hashtbl.find_opt counts r with -- cgit From af97fca0f1d824f3becf9c6895f44ad234e262f8 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Tue, 6 Jul 2021 15:32:35 +0200 Subject: Add debug info --- scheduling/InstructionScheduler.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index a881df68..4fdc455c 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -502,6 +502,8 @@ let reg_pres_scheduler (problem : problem) : solution option = * flush stdout; *) if avlregs <= regs_thresholds.(i) then ( + if !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "REGPRES: high pres class %d\n" i; let maybe = InstrSet.sched_CSR i ready usages in (* print_string "maybe\n"; * print_int maybe; -- cgit From a4a0b36f56a94c19da301265a4e3acad1fbdf6c4 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 8 Jul 2021 11:20:49 +0200 Subject: Deactivate sched validator (i think) --- scheduling/RTLpathScheduler.v | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/scheduling/RTLpathScheduler.v b/scheduling/RTLpathScheduler.v index 31680256..5a81dd28 100644 --- a/scheduling/RTLpathScheduler.v +++ b/scheduling/RTLpathScheduler.v @@ -158,7 +158,7 @@ Definition verified_scheduler (f: RTLpath.function) : res (RTLpath.function * (P let (tc, te) := tcte in let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in do tf <- proj1_sig (function_builder tfr tpm); - do tt <- function_equiv_checker dm f tf; + (* do tt <- function_equiv_checker dm f tf; *) OK (tf, dm). Theorem verified_scheduler_correct f tf dm: @@ -172,15 +172,16 @@ Theorem verified_scheduler_correct f tf dm: /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2) . Proof. - intros VERIF. unfold verified_scheduler in VERIF. explore. - Local Hint Resolve function_equiv_checker_entrypoint - function_equiv_checker_pathentry1 function_equiv_checker_pathentry2 - function_equiv_checker_correct: core. - destruct (function_builder _ _) as [res H]; simpl in * |- *; auto. - apply H in EQ2. rewrite EQ2. simpl. - repeat (constructor; eauto). - exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition. -Qed. + Admitted. +(* intros VERIF. unfold verified_scheduler in VERIF. explore. *) +(* Local Hint Resolve function_equiv_checker_entrypoint *) +(* function_equiv_checker_pathentry1 function_equiv_checker_pathentry2 *) +(* function_equiv_checker_correct: core. *) +(* destruct (function_builder _ _) as [res H]; simpl in * |- *; auto. *) +(* apply H in EQ2. rewrite EQ2. simpl. *) +(* repeat (constructor; eauto). *) +(* exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition. *) +(* Qed. *) Record match_function (dupmap: PTree.t node) (f1 f2: RTLpath.function): Prop := { preserv_fnsig: fn_sig f1 = fn_sig f2; -- cgit From f86f5df47b69053702661671340b0fcb31506aa3 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Thu, 8 Jul 2021 11:22:17 +0200 Subject: add more debug info --- scheduling/InstructionScheduler.ml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 4fdc455c..f823ccca 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -350,7 +350,12 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; (* A scheduler sensitive to register pressure *) let reg_pres_scheduler (problem : problem) : solution option = DebugPrint.debug_flag := true; + let nr_instructions = get_nr_instructions problem in + + if !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "SCHEDULING_SUPERBLOCK %d\n" nr_instructions; + let successors = get_successors problem and predecessors = get_predecessors problem and times = Array.make (nr_instructions+1) (-1) in @@ -381,6 +386,7 @@ let reg_pres_scheduler (problem : problem) : solution option = Hashtbl.add live_regs r classe) (Registers.Regset.elements live_regs_entry); + let csr_b = ref false in let counts, mentions = match problem.reference_counting with @@ -502,8 +508,7 @@ let reg_pres_scheduler (problem : problem) : solution option = * flush stdout; *) if avlregs <= regs_thresholds.(i) then ( - if !Clflags.option_debug_compcert > 6 then - DebugPrint.debug "REGPRES: high pres class %d\n" i; + csr_b := true; let maybe = InstrSet.sched_CSR i ready usages in (* print_string "maybe\n"; * print_int maybe; @@ -557,6 +562,9 @@ let reg_pres_scheduler (problem : problem) : solution option = * print_int i; * print_newline (); * flush stdout; *) + if !csr_b && !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "REGPRES: high pres class %d\n" i; + csr_b := false; if !Clflags.option_regpres_temp then cnt := 0; List.iter (fun (r,b) -> -- cgit From 70f5867e441e253869cb3b432af77636a186d1cb Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 12:26:27 +0200 Subject: rm TODO --- aarch64/Machregsaux.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml index 15fb08ca..98e461eb 100644 --- a/aarch64/Machregsaux.ml +++ b/aarch64/Machregsaux.ml @@ -21,5 +21,4 @@ let class_of_type = function | AST.Tany32 | AST.Tany64 -> assert false (* number of available registers per class *) -(* TODO: add this to all archs *) let nr_regs = [| 29; 32 |] -- cgit From d6a846b641787ea6a5ed113b1d7275ffb5028d9c Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 12:54:19 +0200 Subject: rm "Admitted" --- scheduling/RTLpathScheduler.v | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/scheduling/RTLpathScheduler.v b/scheduling/RTLpathScheduler.v index 5a81dd28..31680256 100644 --- a/scheduling/RTLpathScheduler.v +++ b/scheduling/RTLpathScheduler.v @@ -158,7 +158,7 @@ Definition verified_scheduler (f: RTLpath.function) : res (RTLpath.function * (P let (tc, te) := tcte in let tfr := mkfunction (fn_sig f) (fn_params f) (fn_stacksize f) tc te in do tf <- proj1_sig (function_builder tfr tpm); - (* do tt <- function_equiv_checker dm f tf; *) + do tt <- function_equiv_checker dm f tf; OK (tf, dm). Theorem verified_scheduler_correct f tf dm: @@ -172,16 +172,15 @@ Theorem verified_scheduler_correct f tf dm: /\ (forall pc1 pc2, dm ! pc2 = Some pc1 -> sexec_simu dm f tf pc1 pc2) . Proof. - Admitted. -(* intros VERIF. unfold verified_scheduler in VERIF. explore. *) -(* Local Hint Resolve function_equiv_checker_entrypoint *) -(* function_equiv_checker_pathentry1 function_equiv_checker_pathentry2 *) -(* function_equiv_checker_correct: core. *) -(* destruct (function_builder _ _) as [res H]; simpl in * |- *; auto. *) -(* apply H in EQ2. rewrite EQ2. simpl. *) -(* repeat (constructor; eauto). *) -(* exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition. *) -(* Qed. *) + intros VERIF. unfold verified_scheduler in VERIF. explore. + Local Hint Resolve function_equiv_checker_entrypoint + function_equiv_checker_pathentry1 function_equiv_checker_pathentry2 + function_equiv_checker_correct: core. + destruct (function_builder _ _) as [res H]; simpl in * |- *; auto. + apply H in EQ2. rewrite EQ2. simpl. + repeat (constructor; eauto). + exploit function_equiv_checker_entrypoint. eapply EQ4. rewrite EQ2. intuition. +Qed. Record match_function (dupmap: PTree.t node) (f1 f2: RTLpath.function): Prop := { preserv_fnsig: fn_sig f1 = fn_sig f2; -- cgit From 169a221104c37737f12abe79711009fc0d88ce09 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 13:00:56 +0200 Subject: rm useless code --- aarch64/PrepassSchedulingOracle.ml | 48 -------------------------------------- 1 file changed, 48 deletions(-) diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index fe757c99..53a81095 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -201,54 +201,6 @@ let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset. end seqa; !latency_constraints;; - -(** useless *) -let get_pressure_deltas (seqa : (instruction * Regset.t) array) - (typing : RTLtyping.regenv) - : int array array = - let nr_types_regs = Array.length Machregsaux.nr_regs in - let ret = Array.init (Array.length seqa) (fun i -> - Array.make nr_types_regs 0) in - Array.iteri (fun i (instr, liveins) -> match instr with - | Iop (_, args, dest, _) | Iload (_, _, _, args, dest, _) -> - ret.(i).(Machregsaux.class_of_type (typing dest)) <- - if List.mem dest args then 0 - else 1 - | Istore (_, _, _, src, _) -> - ret.(i).(Machregsaux.class_of_type (typing src)) <- - -1 - | Icall (_, fn, args, dest, _) -> - ret.(i).(Machregsaux.class_of_type (typing dest)) <- - if List.mem dest - (match fn with - | Datatypes.Coq_inl reg -> reg::args - | _ -> args) - then 0 else 1 - | Ibuiltin (_, args, dest, _) -> - let rec arg_l list = function - | AST.BA r -> r::list - | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> - arg_l (arg_l list lo) hi - | _ -> list - in - let l = (List.fold_left arg_l [] args) in - let rec dest_l = function - | AST.BR r -> - let t = Machregsaux.class_of_type (typing r) in - ret.(i).(t) <- - (if List.mem r l - then 0 else 1) + ret.(i).(t) - | AST.BR_splitlong (hi, lo) -> - dest_l hi; - dest_l lo - | _ -> () - in - dest_l dest - | _ -> () - ) seqa; - ret - - let resources_of_instruction (opweights : opweights) = function | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds | Iop(op, inputs, output, _) -> -- cgit From 43d4932e8ba9e00eb8c8788c86f56b6bddd46392 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 13:28:26 +0200 Subject: setup registers --- kvx/Machregsaux.ml | 2 ++ kvx/Machregsaux.mli | 3 +++ 2 files changed, 5 insertions(+) diff --git a/kvx/Machregsaux.ml b/kvx/Machregsaux.ml index e3b18181..dbb89727 100644 --- a/kvx/Machregsaux.ml +++ b/kvx/Machregsaux.ml @@ -31,3 +31,5 @@ let class_of_type = function | AST.Tint | AST.Tlong | AST.Tfloat | AST.Tsingle -> 0 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 59 |] diff --git a/kvx/Machregsaux.mli b/kvx/Machregsaux.mli index 01b0f9fd..23ac1c9a 100644 --- a/kvx/Machregsaux.mli +++ b/kvx/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array -- cgit From 6121be54b80a55fdadd8b64dfad53357148c9090 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 14:13:50 +0200 Subject: fix for KVX --- kvx/PostpassSchedulingOracle.ml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml index 2107ce22..5ebad421 100644 --- a/kvx/PostpassSchedulingOracle.ml +++ b/kvx/PostpassSchedulingOracle.ml @@ -787,8 +787,14 @@ let latency_constraints bb = *) let build_problem bb = - { max_latency = -1; resource_bounds = resource_bounds; - instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb } +{ max_latency = -1; + resource_bounds = resource_bounds; + instruction_usages = instruction_usages bb; + latency_constraints = latency_constraints bb; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None +} let rec find_min_opt (l: int option list) = match l with -- cgit From 67f4ae2b702cc95ed7cef67b726e15abbf18e768 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 15:26:03 +0200 Subject: use a more recognizable option name --- arm/ExpansionOracle.ml | 18 +- arm/PrepassSchedulingOracle.ml | 7 +- driver/Clflags.ml | 2 +- driver/Driver.ml | 4 +- kvx/ExpansionOracle.ml | 18 +- kvx/PrepassSchedulingOracle.ml | 486 ++++++++++++++++++++++++++++++++++- kvx/PrepassSchedulingOracleDeps.ml | 18 +- powerpc/ExpansionOracle.ml | 18 +- powerpc/PrepassSchedulingOracle.ml | 7 +- riscV/PrepassSchedulingOracle.ml | 486 ++++++++++++++++++++++++++++++++++- riscV/PrepassSchedulingOracleDeps.ml | 18 +- scheduling/InstructionScheduler.ml | 4 +- x86/ExpansionOracle.ml | 18 +- 13 files changed, 1089 insertions(+), 15 deletions(-) mode change 120000 => 100644 arm/ExpansionOracle.ml mode change 120000 => 100644 arm/PrepassSchedulingOracle.ml mode change 120000 => 100644 kvx/ExpansionOracle.ml mode change 120000 => 100644 kvx/PrepassSchedulingOracle.ml mode change 120000 => 100644 kvx/PrepassSchedulingOracleDeps.ml mode change 120000 => 100644 powerpc/ExpansionOracle.ml mode change 120000 => 100644 powerpc/PrepassSchedulingOracle.ml mode change 120000 => 100644 riscV/PrepassSchedulingOracle.ml mode change 120000 => 100644 riscV/PrepassSchedulingOracleDeps.ml mode change 120000 => 100644 x86/ExpansionOracle.ml diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/arm/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/arm/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml deleted file mode 120000 index 9885fd52..00000000 --- a/arm/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../x86/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..42a3da23 --- /dev/null +++ b/arm/PrepassSchedulingOracle.ml @@ -0,0 +1,6 @@ +open RTL +open Registers + +(* Do not do anything *) +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None diff --git a/driver/Clflags.ml b/driver/Clflags.ml index d01b57f0..085eaa7e 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -116,5 +116,5 @@ let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 let option_regpres_threshold = ref 2 -let option_regpres_temp = ref false +let option_regpres_wait_window = ref false let main_function_name = ref "main" diff --git a/driver/Driver.ml b/driver/Driver.ml index 22c75f44..79353f32 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -212,7 +212,7 @@ Processing options: -fprepass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =regpres_bis: variant of regpres, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure - -fregpres-temp use the temporary solution (default no) + -fregpres-wait-window When register pressure is high, use a 5-cycle waiting window instead of scheduling short paths first (default no) -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =ilp: ILP, =greedy: just packing bundles) @@ -427,7 +427,7 @@ let cmdline_actions = @ f_opt "redundancy" option_fredundancy @ [ Exact "-mtune", String (fun s -> option_mtune := s) ] @ f_opt "prepass" option_fprepass - @ f_opt "regpres-temp" option_regpres_temp + @ f_opt "regpres-wait-window" option_regpres_wait_window @ f_opt "postpass" option_fpostpass @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ] @ f_opt "predict" option_fpredict diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/kvx/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/kvx/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml deleted file mode 120000 index 912e9ffa..00000000 --- a/kvx/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..53a81095 --- /dev/null +++ b/kvx/PrepassSchedulingOracle.ml @@ -0,0 +1,485 @@ +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights live_regs_entry + typing reference seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml deleted file mode 120000 index 1e955b85..00000000 --- a/kvx/PrepassSchedulingOracleDeps.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracleDeps.ml \ No newline at end of file diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/kvx/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/powerpc/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/powerpc/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml deleted file mode 120000 index 9885fd52..00000000 --- a/powerpc/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../x86/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..42a3da23 --- /dev/null +++ b/powerpc/PrepassSchedulingOracle.ml @@ -0,0 +1,6 @@ +open RTL +open Registers + +(* Do not do anything *) +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml deleted file mode 120000 index 912e9ffa..00000000 --- a/riscV/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..53a81095 --- /dev/null +++ b/riscV/PrepassSchedulingOracle.ml @@ -0,0 +1,485 @@ +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights live_regs_entry + typing reference seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml deleted file mode 120000 index 1e955b85..00000000 --- a/riscV/PrepassSchedulingOracleDeps.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracleDeps.ml \ No newline at end of file diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/riscV/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index f823ccca..0203d9c8 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -526,7 +526,7 @@ let reg_pres_scheduler (problem : problem) : solution option = (vector_subtract usages.(maybe) current_resources; result := maybe) else - if not !Clflags.option_regpres_temp + if not !Clflags.option_regpres_wait_window then (InstrSet.iter (fun ins -> if vector_less_equal usages.(ins) current_resources && @@ -565,7 +565,7 @@ let reg_pres_scheduler (problem : problem) : solution option = if !csr_b && !Clflags.option_debug_compcert > 6 then DebugPrint.debug "REGPRES: high pres class %d\n" i; csr_b := false; - if !Clflags.option_regpres_temp then + if !Clflags.option_regpres_wait_window then cnt := 0; List.iter (fun (r,b) -> if b then diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/x86/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/x86/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () -- cgit From 2ff766a18432fd75739abab0b5741ded6b67a2a5 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 15:29:25 +0200 Subject: activate register pressure by default --- driver/Clflags.ml | 2 +- driver/Driver.ml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 085eaa7e..25bd2c78 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi let option_mtune = ref "" let option_fprepass = ref true -let option_fprepass_sched = ref "list" +let option_fprepass_sched = ref "regpres" let option_fpostpass = ref true let option_fpostpass_sched = ref "list" diff --git a/driver/Driver.ml b/driver/Driver.ml index 79353f32..3f5a4bd9 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -298,9 +298,9 @@ let num_input_files = ref 0 let cmdline_actions = let f_opt name ref = [Exact("-f" ^ name), Set ref; Exact("-fno-" ^ name), Unset ref] in - let f_opt_str name ref strref = + let f_opt_str name default ref strref = [Exact("-f" ^ name ^ "="), String - (fun s -> (strref := (if s == "" then "list" else s)); ref := true) + (fun s -> (strref := (if s == "" then default else s)); ref := true) ] in let f_str name strref default = [Exact("-f" ^ name ^ "="), String @@ -435,8 +435,8 @@ let cmdline_actions = @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ] @ f_opt "tracelinearize" option_ftracelinearize - @ f_opt_str "prepass" option_fprepass option_fprepass_sched - @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched + @ f_opt_str "prepass" "regpress" option_fprepass option_fprepass_sched + @ f_opt_str "postpass" "list" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline @ f_opt "inline-functions-called-once" option_finline_functions_called_once @ f_opt "globaladdrtmp" option_fglobaladdrtmp -- cgit From cf033ec29391d5358dea1d3b25da1738957478c4 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 18:01:03 +0200 Subject: comment for authors --- aarch64/PrepassSchedulingOracle.ml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 53a81095..e09eea13 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -1,3 +1,16 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* Nicolas Nardino ENS-Lyon, VERIMAG *) +(* *) +(* *) +(* *************************************************************) + open AST open RTL open Maps -- cgit From 51668ba258e7b79a1b2b129a404b1eb9981e8e3b Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 16 Jul 2021 18:01:59 +0200 Subject: Make prepass scheduling sensitive to register pressure, by Nicolas Nardino. Squashed commit of the following: commit cf033ec29391d5358dea1d3b25da1738957478c4 Author: David Monniaux Date: Fri Jul 16 18:01:03 2021 +0200 comment for authors commit 2ff766a18432fd75739abab0b5741ded6b67a2a5 Author: David Monniaux Date: Fri Jul 16 15:29:25 2021 +0200 activate register pressure by default commit 67f4ae2b702cc95ed7cef67b726e15abbf18e768 Author: David Monniaux Date: Fri Jul 16 15:26:03 2021 +0200 use a more recognizable option name commit 6121be54b80a55fdadd8b64dfad53357148c9090 Author: David Monniaux Date: Fri Jul 16 14:13:50 2021 +0200 fix for KVX commit 43d4932e8ba9e00eb8c8788c86f56b6bddd46392 Author: David Monniaux Date: Fri Jul 16 13:28:26 2021 +0200 setup registers commit 169a221104c37737f12abe79711009fc0d88ce09 Author: David Monniaux Date: Fri Jul 16 13:00:56 2021 +0200 rm useless code commit d6a846b641787ea6a5ed113b1d7275ffb5028d9c Author: David Monniaux Date: Fri Jul 16 12:54:19 2021 +0200 rm "Admitted" commit fd4d085aa988a6044f89fc17e8422be23bc87f9d Merge: 70f5867e 56498b64 Author: David Monniaux Date: Fri Jul 16 12:30:25 2021 +0200 Merge remote-tracking branch 'origin/kvx-work' into kvx-sched-w-reg-press commit 70f5867e441e253869cb3b432af77636a186d1cb Author: David Monniaux Date: Fri Jul 16 12:26:27 2021 +0200 rm TODO commit f86f5df47b69053702661671340b0fcb31506aa3 Author: nicolas.nardino Date: Thu Jul 8 11:22:17 2021 +0200 add more debug info commit a4a0b36f56a94c19da301265a4e3acad1fbdf6c4 Author: nicolas.nardino Date: Thu Jul 8 11:20:49 2021 +0200 Deactivate sched validator (i think) commit af97fca0f1d824f3becf9c6895f44ad234e262f8 Author: nicolas.nardino Date: Tue Jul 6 15:32:35 2021 +0200 Add debug info commit b96a48de58e1969535865b7b345514a24f7178a6 Author: nicolas.nardino Date: Mon Jun 28 16:04:44 2021 +0200 Change temporary solution (see prev commits), and add option for it commit 9ac49c465f9c8969fba00e6242da0c188a6a3080 Author: nicolas.nardino Date: Fri Jun 25 09:42:41 2021 +0200 Changed printfs into debugs commit dfa09586ae40c70769eeda688a0e7f59f611749f Author: nicolas.nardino Date: Thu Jun 24 18:33:20 2021 +0200 Another scheduler commit c5e8595480604c78260017cc771b0e4195fdd182 Merge: 10cbe4b2 cf2aa686 Author: nicolas.nardino Date: Tue Jun 22 15:58:10 2021 +0200 Merge branch 'kvx-sched-w-reg-press' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into kvx-sched-w-reg-press commit 10cbe4b28ef6dc5d02c9a5d4d369484e4943a18d Author: nicolas.nardino Date: Tue Jun 22 15:57:21 2021 +0200 Changed default threshold value following tests commit cf2aa686bcf9a823562fe977df6dd778d5467985 Merge: eddbce33 fe557bf6 Author: David Monniaux Date: Thu Jun 17 17:05:30 2021 +0200 Merge branch 'kvx-sched-w-reg-press' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into kvx-sched-w-reg-press commit eddbce33e28c49bf7b9e83ebd5dbf6cb0d770090 Merge: 8f399dfa fae8d9b5 Author: David Monniaux Date: Thu Jun 17 17:05:20 2021 +0200 Merge branch 'kvx-sched-w-reg-press' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into kvx-sched-w-reg-press commit 8f399dfa9d794f2f728f523ff1aa7788cc3599b2 Author: David Monniaux Date: Thu Jun 17 17:04:52 2021 +0200 fix for Risc-V commit fe557bf65ec738eaa078bc5e398ff690eb1f2b9e Author: nicolas.nardino Date: Thu Jun 17 17:03:53 2021 +0200 changed type of schedule_seq in x86 for compatibility commit fae8d9b5c5f93d5eda36f800eb0ca1837b237cba Author: nicolas.nardino Date: Thu Jun 17 17:00:57 2021 +0200 fix riscv/Machregsaux.mli commit 9759e94256fd09f4995418b67b7aedbcf84b4b10 Merge: 4413c27d 04b2489d Author: David Monniaux Date: Thu Jun 17 16:52:09 2021 +0200 Merge remote-tracking branch 'origin/kvx-work' into kvx-sched-w-reg-press commit 4413c27d6c6a3d69df34955d9d453c38b32174c7 Author: nicolas.nardino Date: Thu Jun 17 15:38:13 2021 +0200 Add option to set thresold and support for riscv commit 21278bd87e89210bcc287116f6e35fc1b52d0df2 Author: nicolas.nardino Date: Wed Jun 16 20:27:31 2021 +0200 Now working, tests show a decrease in spillage Should still find a proper way to treat the case mentioned in earlier commits commit 87c82b6fcf2bf825a8c60fc6a95498aac9f826d4 Author: nicolas.nardino Date: Tue Jun 15 14:44:56 2021 +0200 kinda fixed Spills are definitely reduced, but lots of arbitrary in there: See previous commit: need to determine what to do if pressure is too high but no schedulable instruction can reduce it. For now, advance time for at most 5 cycles, if still no suitable instruction, go back to CSP commit 19464b3992eadf7670acc7231896103ab54885e5 Author: nicolas.nardino Date: Tue Jun 15 12:07:43 2021 +0200 fixing Still need to find what to do when pressure is high but there are no instructions available that decrease it commit bff4e6ff0b782619b6fcc18751fa575cbb11de68 Author: nicolas.nardino Date: Mon Jun 14 17:39:58 2021 +0200 was very wrong, fixing commit 3eb3751f84348a20b7ce211fdbf1d01a9c4685a8 Author: nicolas.nardino Date: Mon Jun 14 14:46:01 2021 +0200 One fewer spill with new sched on `test/.../spille_forw.c` commit 66e15205c40de54639387a4c9b1cc78994525d55 Author: nicolas.nardino Date: Mon Jun 14 13:53:08 2021 +0200 scheduler written, need to test now commit 2b814b1f9bb30d9c8b59a713f69bced808bca7c7 Author: nicolas.nardino Date: Sat Jun 12 10:52:59 2021 +0200 work on the scheduler commit 1701e43316ee8e69e794a025a8c9979af6bb8c93 Author: nicolas.nardino Date: Thu Jun 10 16:31:51 2021 +0200 Work on new schedluer Renamed a test file, wrote function to compute pressure deltas, Still need to pass the info in some way; beginning of the actual scheduler function commit 386b9053177bb4ef2801cec00b717c400a828139 Author: nicolas.nardino Date: Tue Jun 8 16:53:19 2021 +0200 Fix RTLpathScheduleraux.get_live_regs_entry commit 9b6247b7996f3e0181d27ec0e20daffd28e0884f Author: nicolas.nardino Date: Tue Jun 8 16:06:36 2021 +0200 Another test : one spill when scheduled forward, none if not commit 52378f0600652a94edcc8c78e4b426243f717a89 Author: nicolas.nardino Date: Tue Jun 8 15:11:03 2021 +0200 Add some tests commit 2249f3c7771c285ccd25f6e94478be388a741da5 Author: nicolas.nardino Date: Sun Jun 6 20:49:34 2021 +0200 Adding debug info commit 9118878bd14e24cc04c2f36cab7aa7271a0f1852 Author: nicolas.nardino Date: Sun Jun 6 12:11:15 2021 +0200 Fixing scope error, and non-exhaustive pattern matching commit 599823a6410f1629f2b8704291839e0974bce83b Author: nicolas.nardino Date: Sat Jun 5 19:52:59 2021 +0200 function written, now needs testing commit 98a7a04258f2cf6caf9f18925cbeeae2f5b17be4 Author: nicolas.nardino Date: Fri Jun 4 16:56:32 2021 +0200 computing live regs at sb entry from its live output regs commit 7ae1fb0faea68ce5cfe04a232e49659247c244e9 Author: nicolas.nardino Date: Fri Jun 4 14:24:07 2021 +0200 Passing info of live regs to scheduler: beginning --- aarch64/Machregsaux.ml | 3 + aarch64/Machregsaux.mli | 3 + aarch64/PostpassSchedulingOracle.ml | 3 + aarch64/PrepassSchedulingOracle.ml | 29 +- arm/ExpansionOracle.ml | 18 +- arm/PrepassSchedulingOracle.ml | 7 +- common/DebugPrint.ml | 4 +- driver/Clflags.ml | 4 +- driver/Driver.ml | 14 +- kvx/ExpansionOracle.ml | 18 +- kvx/Machregsaux.ml | 2 + kvx/Machregsaux.mli | 3 + kvx/PostpassSchedulingOracle.ml | 10 +- kvx/PrepassSchedulingOracle.ml | 486 ++++++++++++++++++++++++++++++- kvx/PrepassSchedulingOracleDeps.ml | 18 +- powerpc/ExpansionOracle.ml | 18 +- powerpc/PrepassSchedulingOracle.ml | 7 +- riscV/Machregsaux.ml | 2 + riscV/Machregsaux.mli | 3 + riscV/PrepassSchedulingOracle.ml | 486 ++++++++++++++++++++++++++++++- riscV/PrepassSchedulingOracleDeps.ml | 18 +- scheduling/InstructionScheduler.ml | 503 ++++++++++++++++++++++++++++++++- scheduling/InstructionScheduler.mli | 16 ++ scheduling/RTLpathScheduleraux.ml | 187 +++++++++++- test/nardino/scheduling/entry_regs.c | 19 ++ test/nardino/scheduling/spille_backw.c | 114 ++++++++ test/nardino/scheduling/spille_forw.c | 166 +++++++++++ x86/ExpansionOracle.ml | 18 +- x86/PrepassSchedulingOracle.ml | 3 +- 29 files changed, 2146 insertions(+), 36 deletions(-) mode change 120000 => 100644 arm/ExpansionOracle.ml mode change 120000 => 100644 arm/PrepassSchedulingOracle.ml mode change 120000 => 100644 kvx/ExpansionOracle.ml mode change 120000 => 100644 kvx/PrepassSchedulingOracle.ml mode change 120000 => 100644 kvx/PrepassSchedulingOracleDeps.ml mode change 120000 => 100644 powerpc/ExpansionOracle.ml mode change 120000 => 100644 powerpc/PrepassSchedulingOracle.ml mode change 120000 => 100644 riscV/PrepassSchedulingOracle.ml mode change 120000 => 100644 riscV/PrepassSchedulingOracleDeps.ml create mode 100644 test/nardino/scheduling/entry_regs.c create mode 100644 test/nardino/scheduling/spille_backw.c create mode 100644 test/nardino/scheduling/spille_forw.c mode change 120000 => 100644 x86/ExpansionOracle.ml diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml index 41db3bd4..98e461eb 100644 --- a/aarch64/Machregsaux.ml +++ b/aarch64/Machregsaux.ml @@ -19,3 +19,6 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +(* number of available registers per class *) +let nr_regs = [| 29; 32 |] diff --git a/aarch64/Machregsaux.mli b/aarch64/Machregsaux.mli index 01b0f9fd..23ac1c9a 100644 --- a/aarch64/Machregsaux.mli +++ b/aarch64/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index cde3e7a7..6f784238 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -507,6 +507,9 @@ let build_problem bb = { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None; instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } diff --git a/aarch64/PrepassSchedulingOracle.ml b/aarch64/PrepassSchedulingOracle.ml index 2c3eb14f..e09eea13 100644 --- a/aarch64/PrepassSchedulingOracle.ml +++ b/aarch64/PrepassSchedulingOracle.ml @@ -1,3 +1,16 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* Nicolas Nardino ENS-Lyon, VERIMAG *) +(* *) +(* *) +(* *************************************************************) + open AST open RTL open Maps @@ -406,11 +419,15 @@ let get_alias_dependencies seqa = !deps;; *) -let define_problem (opweights : opweights) seqa = +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = let simple_deps = get_simple_dependencies opweights seqa in { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; - instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); latency_constraints = (* if (use_alias_analysis ()) then (get_alias_dependencies seqa) @ simple_deps @@ -439,7 +456,10 @@ let prepass_scheduler_by_name name problem early_ones = | "zigzag" -> zigzag_scheduler problem early_ones | _ -> scheduler_by_name name problem -let schedule_sequence (seqa : (instruction*Regset.t) array) = +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = let opweights = OpWeights.get_opweights () in try if (Array.length seqa) <= 1 @@ -449,7 +469,8 @@ let schedule_sequence (seqa : (instruction*Regset.t) array) = let nr_instructions = Array.length seqa in (if !Clflags.option_debug_compcert > 6 then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); - let problem = define_problem opweights seqa in + let problem = define_problem opweights live_regs_entry + typing reference seqa in (if !Clflags.option_debug_compcert > 7 then (print_sequence stdout (Array.map fst seqa); print_problem stdout problem)); diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/arm/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/arm/ExpansionOracle.ml b/arm/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/arm/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml deleted file mode 120000 index 9885fd52..00000000 --- a/arm/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../x86/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/arm/PrepassSchedulingOracle.ml b/arm/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..42a3da23 --- /dev/null +++ b/arm/PrepassSchedulingOracle.ml @@ -0,0 +1,6 @@ +open RTL +open Registers + +(* Do not do anything *) +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None diff --git a/common/DebugPrint.ml b/common/DebugPrint.ml index 6f8449ee..275e6a71 100644 --- a/common/DebugPrint.ml +++ b/common/DebugPrint.ml @@ -132,10 +132,10 @@ let print_instructions insts code = | None -> failwith "Did not get some" | Some thing -> thing in if (!debug_flag) then begin - debug "[ "; + debug "[\n"; List.iter ( fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code)) - ) insts; debug "]" + ) insts; debug " ]" end let print_arrayp arr = begin diff --git a/driver/Clflags.ml b/driver/Clflags.ml index fa17c2d9..25bd2c78 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi let option_mtune = ref "" let option_fprepass = ref true -let option_fprepass_sched = ref "list" +let option_fprepass_sched = ref "regpres" let option_fpostpass = ref true let option_fpostpass_sched = ref "list" @@ -115,4 +115,6 @@ let option_inline_auto_threshold = ref 0 let option_profile_arcs = ref false let option_fbranch_probabilities = ref true let option_debug_compcert = ref 0 +let option_regpres_threshold = ref 2 +let option_regpres_wait_window = ref false let main_function_name = ref "main" diff --git a/driver/Driver.ml b/driver/Driver.ml index 7192ba4b..3f5a4bd9 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -210,7 +210,9 @@ Processing options: -mtune= Type of CPU (for scheduling on some architectures) -fprepass Perform prepass scheduling (only on some architectures) [on] -fprepass= Perform postpass scheduling with the specified optimization [list] - (=list: list scheduling, =revlist: reverse list scheduling, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) + (=list: list scheduling, =revlist: reverse list scheduling, =regpres: list scheduling aware of register pressure, =regpres_bis: variant of regpres, =zigzag: zigzag scheduling, =ilp: ILP, =greedy: just packing bundles) + -regpres-threshold n With `-fprepass= regpres`, set threshold value for number of free registers before trying to decrease register pressure + -fregpres-wait-window When register pressure is high, use a 5-cycle waiting window instead of scheduling short paths first (default no) -fpostpass Perform postpass scheduling (only for K1 architecture) [on] -fpostpass= Perform postpass scheduling with the specified optimization [list] (=list: list scheduling, =ilp: ILP, =greedy: just packing bundles) @@ -296,9 +298,9 @@ let num_input_files = ref 0 let cmdline_actions = let f_opt name ref = [Exact("-f" ^ name), Set ref; Exact("-fno-" ^ name), Unset ref] in - let f_opt_str name ref strref = + let f_opt_str name default ref strref = [Exact("-f" ^ name ^ "="), String - (fun s -> (strref := (if s == "" then "list" else s)); ref := true) + (fun s -> (strref := (if s == "" then default else s)); ref := true) ] in let f_str name strref default = [Exact("-f" ^ name ^ "="), String @@ -342,6 +344,7 @@ let cmdline_actions = Exact "-fprofile-use=", String (fun s -> Profilingaux.load_profiling_info s); Exact "-finline-auto-threshold", Integer (fun n -> option_inline_auto_threshold := n); Exact "-debug-compcert", Integer (fun n -> option_debug_compcert := n); + Exact "-regpres-threshold", Integer (fun n -> option_regpres_threshold := n); Exact "-fsmall-data", Integer(fun n -> option_small_data := n); Exact "-fsmall-const", Integer(fun n -> option_small_const := n); Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); @@ -424,6 +427,7 @@ let cmdline_actions = @ f_opt "redundancy" option_fredundancy @ [ Exact "-mtune", String (fun s -> option_mtune := s) ] @ f_opt "prepass" option_fprepass + @ f_opt "regpres-wait-window" option_regpres_wait_window @ f_opt "postpass" option_fpostpass @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ] @ f_opt "predict" option_fpredict @@ -431,8 +435,8 @@ let cmdline_actions = @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ] @ f_opt "tracelinearize" option_ftracelinearize - @ f_opt_str "prepass" option_fprepass option_fprepass_sched - @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched + @ f_opt_str "prepass" "regpress" option_fprepass option_fprepass_sched + @ f_opt_str "postpass" "list" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline @ f_opt "inline-functions-called-once" option_finline_functions_called_once @ f_opt "globaladdrtmp" option_fglobaladdrtmp diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/kvx/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/kvx/ExpansionOracle.ml b/kvx/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/kvx/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/kvx/Machregsaux.ml b/kvx/Machregsaux.ml index e3b18181..dbb89727 100644 --- a/kvx/Machregsaux.ml +++ b/kvx/Machregsaux.ml @@ -31,3 +31,5 @@ let class_of_type = function | AST.Tint | AST.Tlong | AST.Tfloat | AST.Tsingle -> 0 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 59 |] diff --git a/kvx/Machregsaux.mli b/kvx/Machregsaux.mli index 01b0f9fd..23ac1c9a 100644 --- a/kvx/Machregsaux.mli +++ b/kvx/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/kvx/PostpassSchedulingOracle.ml b/kvx/PostpassSchedulingOracle.ml index 2107ce22..5ebad421 100644 --- a/kvx/PostpassSchedulingOracle.ml +++ b/kvx/PostpassSchedulingOracle.ml @@ -787,8 +787,14 @@ let latency_constraints bb = *) let build_problem bb = - { max_latency = -1; resource_bounds = resource_bounds; - instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb } +{ max_latency = -1; + resource_bounds = resource_bounds; + instruction_usages = instruction_usages bb; + latency_constraints = latency_constraints bb; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None +} let rec find_min_opt (l: int option list) = match l with diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml deleted file mode 120000 index 912e9ffa..00000000 --- a/kvx/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/kvx/PrepassSchedulingOracle.ml b/kvx/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..53a81095 --- /dev/null +++ b/kvx/PrepassSchedulingOracle.ml @@ -0,0 +1,485 @@ +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights live_regs_entry + typing reference seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml deleted file mode 120000 index 1e955b85..00000000 --- a/kvx/PrepassSchedulingOracleDeps.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracleDeps.ml \ No newline at end of file diff --git a/kvx/PrepassSchedulingOracleDeps.ml b/kvx/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/kvx/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/powerpc/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/powerpc/ExpansionOracle.ml b/powerpc/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/powerpc/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml deleted file mode 120000 index 9885fd52..00000000 --- a/powerpc/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../x86/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/powerpc/PrepassSchedulingOracle.ml b/powerpc/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..42a3da23 --- /dev/null +++ b/powerpc/PrepassSchedulingOracle.ml @@ -0,0 +1,6 @@ +open RTL +open Registers + +(* Do not do anything *) +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None diff --git a/riscV/Machregsaux.ml b/riscV/Machregsaux.ml index 840943e7..e3e47946 100644 --- a/riscV/Machregsaux.ml +++ b/riscV/Machregsaux.ml @@ -18,3 +18,5 @@ let class_of_type = function | AST.Tint | AST.Tlong -> 0 | AST.Tfloat | AST.Tsingle -> 1 | AST.Tany32 | AST.Tany64 -> assert false + +let nr_regs = [| 26; 32|] diff --git a/riscV/Machregsaux.mli b/riscV/Machregsaux.mli index 01b0f9fd..bb3777bf 100644 --- a/riscV/Machregsaux.mli +++ b/riscV/Machregsaux.mli @@ -15,3 +15,6 @@ val is_scratch_register: string -> bool val class_of_type: AST.typ -> int + +(* Number of registers in each class *) +val nr_regs : int array diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml deleted file mode 120000 index 912e9ffa..00000000 --- a/riscV/PrepassSchedulingOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracle.ml \ No newline at end of file diff --git a/riscV/PrepassSchedulingOracle.ml b/riscV/PrepassSchedulingOracle.ml new file mode 100644 index 00000000..53a81095 --- /dev/null +++ b/riscV/PrepassSchedulingOracle.ml @@ -0,0 +1,485 @@ +open AST +open RTL +open Maps +open InstructionScheduler +open Registers +open PrepassSchedulingOracleDeps + +let use_alias_analysis () = false + +let length_of_chunk = function +| Mint8signed +| Mint8unsigned -> 1 +| Mint16signed +| Mint16unsigned -> 2 +| Mint32 +| Mfloat32 +| Many32 -> 4 +| Mint64 +| Mfloat64 +| Many64 -> 8;; + +let get_simple_dependencies (opweights : opweights) (seqa : (instruction*Regset.t) array) = + let last_reg_reads : int list PTree.t ref = ref PTree.empty + and last_reg_write : (int*int) PTree.t ref = ref PTree.empty + and last_mem_reads : int list ref = ref [] + and last_mem_write : int option ref = ref None + and last_branch : int option ref = ref None + and last_non_pipelined_op : int array = Array.make + opweights.nr_non_pipelined_units ( -1 ) + and latency_constraints : latency_constraint list ref = ref [] in + let add_constraint instr_from instr_to latency = + assert (instr_from <= instr_to); + assert (latency >= 0); + if instr_from = instr_to + then (if latency = 0 + then () + else failwith "PrepassSchedulingOracle.get_dependencies: negative self-loop") + else + latency_constraints := + { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !latency_constraints + and get_last_reads reg = + match PTree.get reg !last_reg_reads + with Some l -> l + | None -> [] in + let add_input_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Read after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + last_mem_reads := i :: !last_mem_reads + end + and add_output_mem i = + if not (use_alias_analysis ()) + then + begin + begin + (* Write after write *) + match !last_mem_write with + | None -> () + | Some j -> add_constraint j i 1 + end; + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) !last_mem_reads; + last_mem_write := Some i; + last_mem_reads := [] + end + and add_input_reg i reg = + begin + (* Read after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, latency) -> add_constraint j i latency + end; + last_reg_reads := PTree.set reg + (i :: get_last_reads reg) + !last_reg_reads + and add_output_reg i latency reg = + begin + (* Write after write *) + match PTree.get reg !last_reg_write with + | None -> () + | Some (j, _) -> add_constraint j i 1 + end; + begin + (* Write after read *) + List.iter (fun j -> add_constraint j i 0) (get_last_reads reg) + end; + last_reg_write := PTree.set reg (i, latency) !last_reg_write; + last_reg_reads := PTree.remove reg !last_reg_reads + in + let add_input_regs i regs = List.iter (add_input_reg i) regs in + let rec add_builtin_res i (res : reg builtin_res) = + match res with + | BR r -> add_output_reg i 10 r + | BR_none -> () + | BR_splitlong (hi, lo) -> add_builtin_res i hi; + add_builtin_res i lo in + let rec add_builtin_arg i (ba : reg builtin_arg) = + match ba with + | BA r -> add_input_reg i r + | BA_int _ | BA_long _ | BA_float _ | BA_single _ -> () + | BA_loadstack(_,_) -> add_input_mem i + | BA_addrstack _ -> () + | BA_loadglobal(_, _, _) -> add_input_mem i + | BA_addrglobal _ -> () + | BA_splitlong(hi, lo) -> add_builtin_arg i hi; + add_builtin_arg i lo + | BA_addptr(a1, a2) -> add_builtin_arg i a1; + add_builtin_arg i a2 in + let irreversible_action i = + match !last_branch with + | None -> () + | Some j -> add_constraint j i 1 in + let set_branch i = + irreversible_action i; + last_branch := Some i in + let add_non_pipelined_resources i resources = + Array.iter2 + (fun latency last -> + if latency >= 0 && last >= 0 then add_constraint last i latency) + resources last_non_pipelined_op; + Array.iteri (fun rsc latency -> + if latency >= 0 + then last_non_pipelined_op.(rsc) <- i) resources + in + Array.iteri + begin + fun i (insn, other_uses) -> + List.iter (fun use -> + add_input_reg i use) + (Regset.elements other_uses); + + match insn with + | Inop _ -> () + | Iop(op, inputs, output, _) -> + add_non_pipelined_resources i + (opweights.non_pipelined_resources_of_op op (List.length inputs)); + (if Op.is_trapping_op op then irreversible_action i); + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_op op (List.length inputs)) output + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + (if trap=TRAP then irreversible_action i); + add_input_mem i; + add_input_regs i addr_regs; + add_output_reg i (opweights.latency_of_load trap chunk addressing (List.length addr_regs)) output + | Istore(chunk, addressing, addr_regs, input, _) -> + irreversible_action i; + add_input_regs i addr_regs; + add_input_reg i input; + add_output_mem i + | Icall(signature, ef, inputs, output, _) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + add_output_reg i (opweights.latency_of_call signature ef) output; + add_output_mem i; + failwith "Icall" + | Itailcall(signature, ef, inputs) -> + set_branch i; + (match ef with + | Datatypes.Coq_inl r -> add_input_reg i r + | Datatypes.Coq_inr symbol -> () + ); + add_input_mem i; + add_input_regs i inputs; + failwith "Itailcall" + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + set_branch i; + add_input_mem i; + List.iter (add_builtin_arg i) builtin_inputs; + add_builtin_res i builtin_output; + add_output_mem i; + failwith "Ibuiltin" + | Icond(cond, inputs, _, _, _) -> + set_branch i; + add_input_mem i; + add_input_regs i inputs + | Ijumptable(input, _) -> + set_branch i; + add_input_reg i input; + failwith "Ijumptable" + | Ireturn(Some input) -> + set_branch i; + add_input_reg i input; + failwith "Ireturn" + | Ireturn(None) -> + set_branch i; + failwith "Ireturn none" + end seqa; + !latency_constraints;; + +let resources_of_instruction (opweights : opweights) = function + | Inop _ -> Array.map (fun _ -> 0) opweights.pipelined_resource_bounds + | Iop(op, inputs, output, _) -> + opweights.resources_of_op op (List.length inputs) + | Iload(trap, chunk, addressing, addr_regs, output, _) -> + opweights.resources_of_load trap chunk addressing (List.length addr_regs) + | Istore(chunk, addressing, addr_regs, input, _) -> + opweights.resources_of_store chunk addressing (List.length addr_regs) + | Icall(signature, ef, inputs, output, _) -> + opweights.resources_of_call signature ef + | Ibuiltin(ef, builtin_inputs, builtin_output, _) -> + opweights.resources_of_builtin ef + | Icond(cond, args, _, _ , _) -> + opweights.resources_of_cond cond (List.length args) + | Itailcall _ | Ijumptable _ | Ireturn _ -> opweights.pipelined_resource_bounds + +let print_sequence pp (seqa : instruction array) = + Array.iteri ( + fun i (insn : instruction) -> + PrintRTL.print_instruction pp (i, insn)) seqa;; + +type unique_id = int + +type 'a symbolic_term_node = + | STop of Op.operation * 'a list + | STinitial_reg of int + | STother of int;; + +type symbolic_term = { + hash_id : unique_id; + hash_ct : symbolic_term symbolic_term_node + };; + +let rec print_term channel term = + match term.hash_ct with + | STop(op, args) -> + PrintOp.print_operation print_term channel (op, args) + | STinitial_reg n -> Printf.fprintf channel "x%d" n + | STother n -> Printf.fprintf channel "y%d" n;; + +type symbolic_term_table = { + st_table : (unique_id symbolic_term_node, symbolic_term) Hashtbl.t; + mutable st_next_id : unique_id };; + +let hash_init () = { + st_table = Hashtbl.create 20; + st_next_id = 0 + };; + +let ground_to_id = function + | STop(op, l) -> STop(op, List.map (fun t -> t.hash_id) l) + | STinitial_reg r -> STinitial_reg r + | STother i -> STother i;; + +let hash_node (table : symbolic_term_table) (term : symbolic_term symbolic_term_node) : symbolic_term = + let grounded = ground_to_id term in + match Hashtbl.find_opt table.st_table grounded with + | Some x -> x + | None -> + let term' = { hash_id = table.st_next_id; + hash_ct = term } in + (if table.st_next_id = max_int then failwith "hash: max_int"); + table.st_next_id <- table.st_next_id + 1; + Hashtbl.add table.st_table grounded term'; + term';; + +type access = { + base : symbolic_term; + offset : int64; + length : int + };; + +let term_equal a b = (a.hash_id = b.hash_id);; + +let access_of_addressing get_reg chunk addressing args = + match addressing, args with + | (Op.Aindexed ofs), [reg] -> Some + { base = get_reg reg; + offset = Camlcoq.camlint64_of_ptrofs ofs; + length = length_of_chunk chunk + } + | _, _ -> None ;; +(* TODO: global *) + +let symbolic_execution (seqa : instruction array) = + let regs = ref PTree.empty + and table = hash_init() in + let assign reg term = regs := PTree.set reg term !regs + and hash term = hash_node table term in + let get_reg reg = + match PTree.get reg !regs with + | None -> hash (STinitial_reg (Camlcoq.P.to_int reg)) + | Some x -> x in + let targets = Array.make (Array.length seqa) None in + Array.iteri + begin + fun i insn -> + match insn with + | Iop(Op.Omove, [input], output, _) -> + assign output (get_reg input) + | Iop(op, inputs, output, _) -> + assign output (hash (STop(op, List.map get_reg inputs))) + + | Iload(trap, chunk, addressing, args, output, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access; + assign output (hash (STother(i))) + + | Icall(_, _, _, output, _) + | Ibuiltin(_, _, BR output, _) -> + assign output (hash (STother(i))) + + | Istore(chunk, addressing, args, va, _) -> + let access = access_of_addressing get_reg chunk addressing args in + targets.(i) <- access + + | Inop _ -> () + | Ibuiltin(_, _, BR_none, _) -> () + | Ibuiltin(_, _, BR_splitlong _, _) -> failwith "BR_splitlong" + + | Itailcall (_, _, _) + |Icond (_, _, _, _, _) + |Ijumptable (_, _) + |Ireturn _ -> () + end seqa; + targets;; + +let print_access channel = function + | None -> Printf.fprintf channel "any" + | Some x -> Printf.fprintf channel "%a + %Ld" print_term x.base x.offset;; + +let print_targets channel seqa = + let targets = symbolic_execution seqa in + Array.iteri + (fun i insn -> + match insn with + | Iload _ -> Printf.fprintf channel "%d: load %a\n" + i print_access targets.(i) + | Istore _ -> Printf.fprintf channel "%d: store %a\n" + i print_access targets.(i) + | _ -> () + ) seqa;; + +let may_overlap a0 b0 = + match a0, b0 with + | (None, _) | (_ , None) -> true + | (Some a), (Some b) -> + if term_equal a.base b.base + then (max a.offset b.offset) < + (min (Int64.add (Int64.of_int a.length) a.offset) + (Int64.add (Int64.of_int b.length) b.offset)) + else match a.base.hash_ct, b.base.hash_ct with + | STop(Op.Oaddrsymbol(ida, ofsa),[]), + STop(Op.Oaddrsymbol(idb, ofsb),[]) -> + (ida=idb) && + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | STop(Op.Oaddrstack _, []), + STop(Op.Oaddrsymbol _, []) + | STop(Op.Oaddrsymbol _, []), + STop(Op.Oaddrstack _, []) -> false + | STop(Op.Oaddrstack(ofsa),[]), + STop(Op.Oaddrstack(ofsb),[]) -> + let ao = Int64.add a.offset (Camlcoq.camlint64_of_ptrofs ofsa) + and bo = Int64.add b.offset (Camlcoq.camlint64_of_ptrofs ofsb) in + (max ao bo) < + (min (Int64.add (Int64.of_int a.length) ao) + (Int64.add (Int64.of_int b.length) bo)) + | _ -> true;; + +(* +(* TODO suboptimal quadratic algorithm *) +let get_alias_dependencies seqa = + let targets = symbolic_execution seqa + and deps = ref [] in + let add_constraint instr_from instr_to latency = + deps := { instr_from = instr_from; + instr_to = instr_to; + latency = latency + }:: !deps in + for i=0 to (Array.length seqa)-1 + do + for j=0 to i-1 + do + match seqa.(j), seqa.(i) with + | (Istore _), ((Iload _) | (Istore _)) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 1 + | (Iload _), (Istore _) -> + if may_overlap targets.(j) targets.(i) + then add_constraint j i 0 + | (Istore _ | Iload _), (Icall _ | Ibuiltin _) + | (Icall _ | Ibuiltin _), (Icall _ | Ibuiltin _ | Iload _ | Istore _) -> + add_constraint j i 1 + | (Inop _ | Iop _), _ + | _, (Inop _ | Iop _) + | (Iload _), (Iload _) -> () + done + done; + !deps;; + *) + +let define_problem (opweights : opweights) (live_entry_regs : Regset.t) + (typing : RTLtyping.regenv) reference_counting seqa = + let simple_deps = get_simple_dependencies opweights seqa in + { max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = live_entry_regs; + typing = typing; + reference_counting = Some reference_counting; + instruction_usages = Array.map (resources_of_instruction opweights) (Array.map fst seqa); + latency_constraints = + (* if (use_alias_analysis ()) + then (get_alias_dependencies seqa) @ simple_deps + else *) simple_deps };; + +let zigzag_scheduler problem early_ones = + let nr_instructions = get_nr_instructions problem in + assert(nr_instructions = (Array.length early_ones)); + match list_scheduler problem with + | Some fwd_schedule -> + let fwd_makespan = fwd_schedule.((Array.length fwd_schedule) - 1) in + let constraints' = ref problem.latency_constraints in + Array.iteri (fun i is_early -> + if is_early then + constraints' := { + instr_from = i; + instr_to = nr_instructions ; + latency = fwd_makespan - fwd_schedule.(i) } ::!constraints' ) + early_ones; + validated_scheduler reverse_list_scheduler + { problem with latency_constraints = !constraints' } + | None -> None;; + +let prepass_scheduler_by_name name problem early_ones = + match name with + | "zigzag" -> zigzag_scheduler problem early_ones + | _ -> scheduler_by_name name problem + +let schedule_sequence (seqa : (instruction*Regset.t) array) + (live_regs_entry : Registers.Regset.t) + (typing : RTLtyping.regenv) + reference = + let opweights = OpWeights.get_opweights () in + try + if (Array.length seqa) <= 1 + then None + else + begin + let nr_instructions = Array.length seqa in + (if !Clflags.option_debug_compcert > 6 + then Printf.printf "prepass scheduling length = %d\n" (Array.length seqa)); + let problem = define_problem opweights live_regs_entry + typing reference seqa in + (if !Clflags.option_debug_compcert > 7 + then (print_sequence stdout (Array.map fst seqa); + print_problem stdout problem)); + match prepass_scheduler_by_name + (!Clflags.option_fprepass_sched) + problem + (Array.map (fun (ins, _) -> + match ins with + | Icond _ -> true + | _ -> false) seqa) with + | None -> Printf.printf "no solution in prepass scheduling\n"; + None + | Some solution -> + let positions = Array.init nr_instructions (fun i -> i) in + Array.sort (fun i j -> + let si = solution.(i) and sj = solution.(j) in + if si < sj then -1 + else if si > sj then 1 + else i - j) positions; + Some positions + end + with (Failure s) -> + Printf.printf "failure in prepass scheduling: %s\n" s; + None;; + diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml deleted file mode 120000 index 1e955b85..00000000 --- a/riscV/PrepassSchedulingOracleDeps.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/PrepassSchedulingOracleDeps.ml \ No newline at end of file diff --git a/riscV/PrepassSchedulingOracleDeps.ml b/riscV/PrepassSchedulingOracleDeps.ml new file mode 100644 index 00000000..8d10d406 --- /dev/null +++ b/riscV/PrepassSchedulingOracleDeps.ml @@ -0,0 +1,17 @@ +type called_function = (Registers.reg, AST.ident) Datatypes.sum + +type opweights = + { + pipelined_resource_bounds : int array; + nr_non_pipelined_units : int; + latency_of_op : Op.operation -> int -> int; + resources_of_op : Op.operation -> int -> int array; + non_pipelined_resources_of_op : Op.operation -> int -> int array; + latency_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int; + resources_of_load : AST.trapping_mode -> AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_store : AST.memory_chunk -> Op.addressing -> int -> int array; + resources_of_cond : Op.condition -> int -> int array; + latency_of_call : AST.signature -> called_function -> int; + resources_of_call : AST.signature -> called_function -> int array; + resources_of_builtin : AST.external_function -> int array + };; diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index eab0b21a..0203d9c8 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -33,6 +33,10 @@ type latency_constraint = { type problem = { max_latency : int; resource_bounds : int array; + live_regs_entry : Registers.Regset.t; + typing : RTLtyping.regenv; + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * ((Registers.reg * bool) list array)) option; instruction_usages : int array array; latency_constraints : latency_constraint list; };; @@ -118,6 +122,13 @@ let vector_less_equal a b = true with Exit -> false;; +(* let vector_add a b = + * assert ((Array.length a) = (Array.length b)); + * for i=0 to (Array.length a)-1 + * do + * b.(i) <- b.(i) + a.(i) + * done;; *) + let vector_subtract a b = assert ((Array.length a) = (Array.length b)); for i=0 to (Array.length a)-1 @@ -257,8 +268,8 @@ let priority_list_scheduler (order : list_scheduler_order) assert(!time >= 0); !time with Exit -> -1 - in + let advance_time() = begin (if !current_time < max_time-1 @@ -267,7 +278,8 @@ let priority_list_scheduler (order : list_scheduler_order) Array.blit problem.resource_bounds 0 current_resources 0 (Array.length current_resources); ready.(!current_time + 1) <- - InstrSet.union (ready.(!current_time)) (ready.(!current_time + 1)); + InstrSet.union (ready.(!current_time)) + (ready.(!current_time + 1)); ready.(!current_time) <- InstrSet.empty; end); incr current_time @@ -334,6 +346,485 @@ let list_scheduler = priority_list_scheduler CRITICAL_PATH_ORDER;; (* dummy code for placating ocaml's warnings *) let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; + +(* A scheduler sensitive to register pressure *) +let reg_pres_scheduler (problem : problem) : solution option = + DebugPrint.debug_flag := true; + + let nr_instructions = get_nr_instructions problem in + + if !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "SCHEDULING_SUPERBLOCK %d\n" nr_instructions; + + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + let available_regs = Array.copy Machregsaux.nr_regs in + + let nr_types_regs = Array.length available_regs in + + let thres = Array.fold_left (min) + (max !(Clflags.option_regpres_threshold) 0) + Machregsaux.nr_regs + in + + + let regs_thresholds = Array.make nr_types_regs thres in + (* placeholder value *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + available_regs.(classe) + <- available_regs.(classe) - 1; + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + let csr_b = ref false in + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta i = (fun a (r, b) -> + a + + if class_r r <> i then 0 else + (if b then + if (Hashtbl.find counts r = (i, 1)) + then 1 else 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + )) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let module InstrSet = + struct + module MSet = + Set.Make (struct + type t=int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) + + let empty = MSet.empty + let is_empty = MSet.is_empty + let add = MSet.add + let remove = MSet.remove + let union = MSet.union + let iter = MSet.iter + + let compare_regs i x y = + let pyi = List.fold_left (fold_delta i) 0 mentions.(y) in + (* print_int y; + * print_string " "; + * print_int pyi; + * print_newline (); + * flush stdout; *) + let pxi = List.fold_left (fold_delta i) 0 mentions.(x) in + match pyi - pxi with + | 0 -> (match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z) + | z -> z + + (** t is the register class *) + let sched_CSR t ready usages = + (* print_string "looking for max delta"; + * print_newline (); + * flush stdout; *) + let result = ref (-1) in + iter (fun i -> + if vector_less_equal usages.(i) current_resources + then if !result = -1 || (compare_regs t !result i > 0) + then result := i) ready; + !result + end + in + + let max_time = bound_max_time problem + 5*nr_instructions in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + (* ALL MENTIONS TO cnt ARE PLACEHOLDERS *) + let cnt = ref 0 in + + let attempt_scheduling ready usages = + let result = ref (-1) in + try + Array.iteri (fun i avlregs -> + (* print_string "avlregs: "; + * print_int i; + * print_string " "; + * print_int avlregs; + * print_newline (); + * print_string "live regs: "; + * print_int (Hashtbl.length live_regs); + * print_newline (); + * flush stdout; *) + if avlregs <= regs_thresholds.(i) + then ( + csr_b := true; + let maybe = InstrSet.sched_CSR i ready usages in + (* print_string "maybe\n"; + * print_int maybe; + * print_newline (); + * flush stdout; *) + (if maybe > 0 && + let delta = + List.fold_left (fold_delta i) 0 mentions.(maybe) in + (* print_string "delta "; + * print_int delta; + * print_newline (); + * flush stdout; *) + delta > 0 + then + (vector_subtract usages.(maybe) current_resources; + result := maybe) + else + if not !Clflags.option_regpres_wait_window + then + (InstrSet.iter (fun ins -> + if vector_less_equal usages.(ins) current_resources && + List.fold_left (fold_delta i) 0 mentions.(maybe) >= 0 + then result := ins + ) ready; + if !result <> -1 then + vector_subtract usages.(!result) current_resources) + else + (incr cnt) + ); + raise Exit)) available_regs; + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then ( + vector_subtract usages.(i) current_resources; + result := i; + raise Exit)) ready; + -1 + with Exit -> + !result in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> (assert(times.(i) < 0); + ((* print_string "INSTR ISSUED: "; + * print_int i; + * print_newline (); + * flush stdout; *) + if !csr_b && !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "REGPRES: high pres class %d\n" i; + csr_b := false; + if !Clflags.option_regpres_wait_window then + cnt := 0; + List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + available_regs.(t) + <- available_regs.(t) + 1)) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(i)); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + DebugPrint.debug_flag := false; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + DebugPrint.debug_flag := false; + None + +;; + + +(********************************************************************) + +let reg_pres_scheduler_bis (problem : problem) : solution option = + DebugPrint.debug_flag := true; + Printf.printf "\nNEW\n\n"; + let nr_instructions = get_nr_instructions problem in + let successors = get_successors problem + and predecessors = get_predecessors problem + and times = Array.make (nr_instructions+1) (-1) in + let live_regs_entry = problem.live_regs_entry in + + (* let available_regs = Array.copy Machregsaux.nr_regs in *) + + let class_r r = + Machregsaux.class_of_type (problem.typing r) in + + let live_regs = Hashtbl.create 42 in + + List.iter (fun r -> let classe = Machregsaux.class_of_type + (problem.typing r) in + (* available_regs.(classe) + * <- available_regs.(classe) - 1; *) + Hashtbl.add live_regs r classe) + (Registers.Regset.elements live_regs_entry); + + + let counts, mentions = + match problem.reference_counting with + | Some (l, r) -> l, r + | None -> assert false + in + + let fold_delta a (r, b) = + a + (if b then + match Hashtbl.find_opt counts r with + | Some (_, 1) -> 1 + | _ -> 0 + else + match Hashtbl.find_opt live_regs r with + | None -> -1 + | Some t -> 0 + ) in + + let priorities = critical_paths successors in + + let current_resources = Array.copy problem.resource_bounds in + + let compare_pres x y = + let pdy = List.fold_left (fold_delta) 0 mentions.(y) in + let pdx = List.fold_left (fold_delta) 0 mentions.(x) in + match pdy - pdx with + | 0 -> x - y + | z -> z + in + + let module InstrSet = + Set.Make (struct + type t = int + let compare x y = + match priorities.(y) - priorities.(x) with + | 0 -> x - y + | z -> z + end) in + + let max_time = bound_max_time problem (* + 5*nr_instructions *) in + let ready = Array.make max_time InstrSet.empty in + + Array.iteri (fun i preds -> + if i < nr_instructions && preds = [] + then ready.(0) <- InstrSet.add i ready.(0)) predecessors; + + let current_time = ref 0 + and earliest_time i = + try + let time = ref (-1) in + List.iter (fun (j, latency) -> + if times.(j) < 0 + then raise Exit + else let t = times.(j) + latency in + if t > !time + then time := t) predecessors.(i); + assert (!time >= 0); + !time + with Exit -> -1 + in + + let advance_time () = + (* Printf.printf "ADV\n"; + * flush stdout; *) + (if !current_time < max_time-1 + then ( + Array.blit problem.resource_bounds 0 current_resources 0 + (Array.length current_resources); + ready.(!current_time + 1) <- + InstrSet.union (ready.(!current_time)) + (ready.(!current_time +1)); + ready.(!current_time) <- InstrSet.empty)); + incr current_time + in + + + let attempt_scheduling ready usages = + let result = ref [] in + try + InstrSet.iter (fun i -> + if vector_less_equal usages.(i) current_resources + then + if !result = [] || priorities.(i) = priorities.(List.hd (!result)) + then + result := i::(!result) + else raise Exit + ) ready; + if !result <> [] then raise Exit; + -1 + with + Exit -> + let mini = List.fold_left (fun a b -> + if a = -1 || compare_pres a b > 0 + then b else a + ) (-1) !result in + vector_subtract usages.(mini) current_resources; + mini + in + + while !current_time < max_time + do + if (InstrSet.is_empty ready.(!current_time)) + then advance_time () + else + match attempt_scheduling ready.(!current_time) + problem.instruction_usages with + | -1 -> advance_time() + | i -> ( + DebugPrint.debug "ISSUED: %d\nREADY: " i; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) + ready.(!current_time); + DebugPrint.debug "\nSUCC: "; + List.iter (fun (i, l) -> DebugPrint.debug "%d " i) + successors.(i); + DebugPrint.debug "\n\n"; + assert(times.(i) < 0); + times.(i) <- !current_time; + ready.(!current_time) + <- InstrSet.remove i (ready.(!current_time)); + (List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + (* available_regs.(t) + * <- available_regs.(t) + 1 *))) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + (* available_regs.(t) + * <- available_regs.(t) - 1 *)) + | Some i -> () + ) mentions.(i)); + List.iter (fun (instr_to, latency) -> + if instr_to < nr_instructions then + match earliest_time instr_to with + | -1 -> () + | to_time -> + ((* DebugPrint.debug "TO TIME %d : %d\n" to_time + * (Array.length ready); *) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) + ) successors.(i); + successors.(i) <- [] + ) + done; + + try + let final_time = ref (-1) in + for i = 0 to nr_instructions - 1 do + (* print_int i; + * flush stdout; *) + (if times.(i) < 0 then raise Exit); + (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) + done; + List.iter (fun (i, latency) -> + let target_time = latency + times.(i) in + if target_time > !final_time then + final_time := target_time) predecessors.(nr_instructions); + times.(nr_instructions) <- !final_time; + DebugPrint.debug_flag := false; + Some times + with Exit -> + DebugPrint.debug "reg_pres_sched failed\n"; + DebugPrint.debug_flag := false; + None + +;; + +(********************************************************************) + type bundle = int list;; let rec extract_deps_to index = function @@ -438,6 +929,12 @@ let reverse_problem problem = { max_latency = problem.max_latency; resource_bounds = problem.resource_bounds; + live_regs_entry = Registers.Regset.empty; (* PLACEHOLDER *) + (* Not needed for the revlist sched, and for now we wont bother + with creating a reverse scheduler aware of reg press *) + + typing = problem.typing; + reference_counting = problem.reference_counting; instruction_usages = Array.init (nr_instructions + 1) (fun i -> if i=0 @@ -1259,5 +1756,7 @@ let scheduler_by_name name = | "ilp" -> validated_scheduler cascaded_scheduler | "list" -> validated_scheduler list_scheduler | "revlist" -> validated_scheduler reverse_list_scheduler + | "regpres" -> validated_scheduler reg_pres_scheduler + | "regpres_bis" -> validated_scheduler reg_pres_scheduler_bis | "greedy" -> greedy_scheduler | s -> failwith ("unknown scheduler: " ^ s);; diff --git a/scheduling/InstructionScheduler.mli b/scheduling/InstructionScheduler.mli index fb7af3f6..48c7bc09 100644 --- a/scheduling/InstructionScheduler.mli +++ b/scheduling/InstructionScheduler.mli @@ -23,6 +23,16 @@ type problem = { resource_bounds : int array; (** An array of number of units available indexed by the kind of resources to be allocated. It can be empty, in which case the problem is scheduling without resource constraints. *) + live_regs_entry : Registers.Regset.t; + (** The set of live pseudo-registers at entry. *) + + typing : RTLtyping.regenv; + (** Register type map. *) + + reference_counting : ((Registers.reg, int * int) Hashtbl.t + * ((Registers.reg * bool) list array)) option; + (** See RTLpathScheduleraux.reference_counting. *) + instruction_usages: int array array; (** At index {i i} the vector of resources used by instruction number {i i}. It must be the same length as [resource_bounds] *) @@ -68,6 +78,12 @@ Once a clock tick is full go to the next. @return [Some solution] when a solution is found, [None] if not. *) val list_scheduler : problem -> solution option +(** WIP : Same as list_scheduler, but schedules instructions which decrease +register pressure when it gets too high. *) +val reg_pres_scheduler : problem -> solution option + +val reg_pres_scheduler_bis : problem -> solution option + (** Schedule the problem using the order of instructions without any reordering *) val greedy_scheduler : problem -> solution option diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml index aeed39df..f3f09954 100644 --- a/scheduling/RTLpathScheduleraux.ml +++ b/scheduling/RTLpathScheduleraux.ml @@ -17,7 +17,7 @@ let print_superblock (sb: superblock) code = begin debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n"; debug " liveins = "; print_ptree_regset li; debug "\n"; - debug " output_regs = "; print_regset outs; debug "}" + debug " output_regs = "; print_regset outs; debug "\n}" end let print_superblocks lsb code = @@ -72,6 +72,168 @@ let get_superblocks code entry pm typing = lsb end +(** the useful one. Returns a hashtable with bindings of shape + ** [(r,(t, n)], where [r] is a pseudo-register (Registers.reg), + ** [t] is its class (according to [typing]), and [n] the number of + ** times it's referenced as an argument in instructions of [seqa] ; + ** and an arrray containg the list of regs referenced by each + ** instruction, with a boolean to know whether it's as arg or dest *) +let reference_counting (seqa : (instruction * Regset.t) array) + (out_regs : Registers.Regset.t) (typing : RTLtyping.regenv) : + (Registers.reg, int * int) Hashtbl.t * + (Registers.reg * bool) list array = + let retl = Hashtbl.create 42 in + let retr = Array.make (Array.length seqa) [] in + (* retr.(i) : (r, b) -> (r', b') -> ... + * where b = true if seen as arg, false if seen as dest + *) + List.iter (fun reg -> + Hashtbl.add retl + reg (Machregsaux.class_of_type (typing reg), 1) + ) (Registers.Regset.elements out_regs); + let add_reg reg = + match Hashtbl.find_opt retl reg with + | Some (t, n) -> Hashtbl.add retl reg (t, n+1) + | None -> Hashtbl.add retl reg (Machregsaux.class_of_type + (typing reg), 1) + in + let map_true = List.map (fun r -> r, true) in + Array.iteri (fun i (ins, _) -> + match ins with + | Iop(_,args,dest,_) | Iload(_,_,_,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (dest, false)::(map_true args) + | Icond(_,args,_,_,_) -> + List.iter (add_reg) args; + retr.(i) <- map_true args + | Istore(_,_,args,src,_) -> + List.iter (add_reg) args; + add_reg src; + retr.(i) <- (src, true)::(map_true args) + | Icall(_,fn,args,dest,_) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (dest,false)::(reg, true)::(map_true args) + | _ -> (dest,false)::(map_true args)) + + | Itailcall(_,fn,args) -> + List.iter (add_reg) args; + retr.(i) <- (match fn with + | Datatypes.Coq_inl reg -> + add_reg reg; + (reg, true)::(map_true args) + | _ -> map_true args) + | Ibuiltin(_,args,dest,_) -> + let rec bar = function + | AST.BA r -> add_reg r; + retr.(i) <- (r, true)::retr.(i) + | AST.BA_splitlong (hi, lo) | AST.BA_addptr (hi, lo) -> + bar hi; bar lo + | _ -> () + in + List.iter (bar) args; + let rec bad = function + | AST.BR r -> retr.(i) <- (r, false)::retr.(i) + | AST.BR_splitlong (hi, lo) -> + bad hi; bad lo + | _ -> () + in + bad dest; + | Ijumptable (reg,_) | Ireturn (Some reg) -> + add_reg reg; + retr.(i) <- [reg, true] + | _ -> () + ) seqa; + (* print_string "mentions\n"; + * Array.iteri (fun i l -> + * print_int i; + * print_string ": ["; + * List.iter (fun (r, b) -> + * print_int (Camlcoq.P.to_int r); + * print_string ":"; + * print_string (if b then "a:" else "d"); + * if b then print_int (snd (Hashtbl.find retl r)); + * print_string ", " + * ) l; + * print_string "]\n"; + * flush stdout; + * ) retr; *) + retl, retr + + +let get_live_regs_entry (sb : superblock) code = + (if !Clflags.option_debug_compcert > 6 + then debug_flag := true); + debug "getting live regs for superblock:\n"; + print_superblock sb code; + debug "\n"; + let seqa = Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.get_live_regs_entry" + ), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + sb.instructions in + let ret = + Array.fold_right (fun (ins, liveins) regset_i -> + let regset = Registers.Regset.union liveins regset_i in + match ins with + | Inop _ -> regset + | Iop (_, args, dest, _) + | Iload (_, _, _, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.remove dest regset) args + | Istore (_, _, args, src, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (Registers.Regset.add src regset) args + | Icall (_, fn, args, dest, _) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + ((match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg) + | Datatypes.Coq_inr _ -> (fun x -> x)) + (Registers.Regset.remove dest regset)) + args + | Itailcall (_, fn, args) -> + List.fold_left (fun set reg -> Registers.Regset.add reg set) + (match fn with + | Datatypes.Coq_inl reg -> (Registers.Regset.add reg regset) + | Datatypes.Coq_inr _ -> regset) + args + | Ibuiltin (_, args, dest, _) -> + List.fold_left (fun set arg -> + let rec add reg set = + match reg with + | AST.BA r -> Registers.Regset.add r set + | AST.BA_splitlong (hi, lo) + | AST.BA_addptr (hi, lo) -> add hi (add lo set) + | _ -> set + in add arg set) + (let rec rem dest set = + match dest with + | AST.BR r -> Registers.Regset.remove r set + | AST.BR_splitlong (hi, lo) -> rem hi (rem lo set) + | _ -> set + in rem dest regset) + args + | Icond (_, args, _, _, _) -> + List.fold_left (fun set reg -> + Registers.Regset.add reg set) + regset args + | Ijumptable (reg, _) + | Ireturn (Some reg) -> + Registers.Regset.add reg regset + | _ -> regset + ) seqa sb.s_output_regs + in debug "live in regs: "; + print_regset ret; + debug "\n"; + debug_flag := false; + ret + (* TODO David *) let schedule_superblock sb code = if not !Clflags.option_fprepass @@ -90,15 +252,22 @@ let schedule_superblock sb code = match predicted_successor ii with | Some _ -> 0 | None -> 1 in + debug "hello\n"; + let live_regs_entry = get_live_regs_entry sb code in + let seqa = + Array.map (fun i -> + (match PTree.get i code with + | Some ii -> ii + | None -> failwith "RTLpathScheduleraux.schedule_superblock"), + (match PTree.get i sb.liveins with + | Some s -> s + | None -> Regset.empty)) + (Array.sub sb.instructions 0 (nr_instr-trailer_length)) in match PrepassSchedulingOracle.schedule_sequence - (Array.map (fun i -> - (match PTree.get i code with - | Some ii -> ii - | None -> failwith "RTLpathScheduleraux.schedule_superblock"), - (match PTree.get i sb.liveins with - | Some s -> s - | None -> Regset.empty)) - (Array.sub sb.instructions 0 (nr_instr-trailer_length))) with + seqa + live_regs_entry + sb.typing + (reference_counting seqa sb.s_output_regs sb.typing) with | None -> sb.instructions | Some order -> let ins' = diff --git a/test/nardino/scheduling/entry_regs.c b/test/nardino/scheduling/entry_regs.c new file mode 100644 index 00000000..9e6adacb --- /dev/null +++ b/test/nardino/scheduling/entry_regs.c @@ -0,0 +1,19 @@ +#include + +int f(int n) { + if (n > 0) + return 42; + else + return n; +} + + +int main(int argc, char *argv[]) { + int a=1; + float b=2.; + int c = f(a); + a = 3; + int d = f(a); + printf("%e, %d, %d, %d", b, a, c, d); + return 0; +} diff --git a/test/nardino/scheduling/spille_backw.c b/test/nardino/scheduling/spille_backw.c new file mode 100644 index 00000000..1c36ee86 --- /dev/null +++ b/test/nardino/scheduling/spille_backw.c @@ -0,0 +1,114 @@ +int f(int k) { + int a1 = k; + int b1 = 2*a1; + int c = a1; + int a2 = k+1; + int b2 = 2*a2; + c += a2; + int a3 = k+2; + int b3 = 2*a3; + c += a3; + int a4 = k+3; + int b4 = 2*a4; + c += a4; + int a5 = k+4; + int b5 = 2*a5; + c += a5; + int a6 = k+5; + int b6 = 2*a6; + c += a6; + int a7 = k+6; + int b7 = 2*a7; + c += a7; + int a8 = k+7; + int b8 = 2*a8; + c += a8; + int a9 = k+8; + int b9 = 2*a9; + c += a9; + int a10 = k+9; + int b10 = 2*a10; + c += a10; + int a11 = k+10; + int b11 = 2*a11; + c += a11; + int a12 = k+11; + int b12 = 2*a12; + c += a12; + int a13 = k+12; + int b13 = 2*a13; + c += a13; + int a14 = k+13; + int b14 = 2*a14; + c += a14; + int a15 = k+14; + int b15 = 2*a15; + c += a15; + int a16 = k+15; + int b16 = 2*a16; + c += a16; + int a17 = k+16; + int b17 = 2*a17; + c += a17; + int a18 = k+17; + int b18 = 2*a18; + c += a18; + int a19 = k+18; + int b19 = 2*a19; + c += a19; + int a20 = k+19; + int b20 = 2*a20; + c += a20; + int a21 = k+20; + int b21 = 2*a21; + c += a21; + int a22 = k+21; + int b22 = 2*a22; + c += a22; + int a23 = k+22; + int b23 = 2*a23; + c += a23; + int a24 = k+23; + int b24 = 2*a24; + c += a24; + int a25 = k+24; + int b25 = 2*a25; + c += a25; + int a26 = k+25; + int b26 = 2*a26; + c += a26; + return + b13+ + b12+ + b11+ + b10+ + b9+ + b8+ + b7+ + b6+ + b5+ + b4+ + b3+ + b2+ + b1+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b23+ + b24+ + b25+ + b26+ + c; +} + +int main(int argc, char *argv[]) { + f(3); + return 0; +} diff --git a/test/nardino/scheduling/spille_forw.c b/test/nardino/scheduling/spille_forw.c new file mode 100644 index 00000000..db88588b --- /dev/null +++ b/test/nardino/scheduling/spille_forw.c @@ -0,0 +1,166 @@ +#include + +int f(int n, float * arr) { + float a1 = (float) n; + float b1 = 2.*a1; + float c = a1; + float a2 = (float) n+1; + float b2 = 2.*a2; + c += a2; + float a3 = (float) n+2; + float b3 = 2.*a3; + c += a3; + float a4 = (float) n+3; + float b4 = 2.*a4; + c += a4; + float a5 = (float) n+4; + float b5 = 2.*a5; + c += a5; + float a6 = (float) n+5; + float b6 = 2.*a6; + c += a6; + float a7 = (float) n+6; + float b7 = 2.*a7; + c += a7; + float a8 = (float) n+7; + float b8 = 2.*a8; + c += a8; + float a9 = (float) n+8; + float b9 = 2.*a9; + c += a9; + float a10 = (float) n+9; + float b10 = 2.*a10; + c += a10; + float a11 = (float) n+10; + float b11 = 2.*a11; + c += a11; + float a12 = (float) n+11; + float b12 = 2.*a12; + c += a12; + float a13 = (float) n+12; + float b13 = 2.*a13; + c += a13; + float a14 = (float) n+13; + float b14 = 2.*a14; + c += a14; + float a15 = (float) n+14; + float b15 = 2.*a15; + c += a15; + float a16 = (float) n+15; + float b16 = 2.*a16; + c += a16; + float a17 = (float) n+16; + float b17 = 2.*a17; + c += a17; + float a18 = (float) n+17; + float b18 = 2.*a18; + c += a18; + float a19 = (float) n+18; + float b19 = 2.*a19; + c += a19; + float a20 = (float) n+19; + float b20 = 2.*a20; + c += a20; + float a21 = (float) n+20; + float b21 = 2.*a21; + c += a21; + float a22 = (float) n+21; + float b22 = 2.*a22; + c += a22; + float a23 = (float) n+22; + float b23 = 2.*a23; + c += a23; + float a24 = (float) n+23; + float b24 = 2.*a24; + c += a24; + float a25 = (float) n+24; + float b25 = 2.*a25; + c += a25; + float a26 = (float) n+25; + float b26 = 2.*a26; + c += a26; + float a27 = (float) n+26; + float b27 = 2.*a27; + c += a27; + float a28 = (float) n+27; + float b28 = 2.*a28; + c += a28; + float a29 = (float) n+28; + float b29 = 2.*a29; + c += a29; + float a30 = (float) n+29; + float b30 = 2.*a30; + c += a30; + /* arr[0] = a1; */ + /* arr[1] = a2; */ + /* arr[2] = a3; */ + /* arr[3] = a4; */ + /* arr[4] = a5; */ + /* arr[5] = a6; */ + /* arr[6] = a7; */ + /* arr[7] = a8; */ + /* arr[8] = a9; */ + /* arr[9] = a10; */ + /* arr[10] = a11; */ + /* arr[11] = a12; */ + /* arr[12] = a13; */ + /* arr[13] = a14; */ + /* arr[14] = a15; */ + /* arr[15] = a16; */ + /* arr[16] = a17; */ + /* arr[17] = a18; */ + /* arr[18] = a19; */ + /* arr[19] = a20; */ + /* arr[20] = a21; */ + /* arr[21] = a22; */ + /* arr[22] = a23; */ + /* arr[23] = a24; */ + /* arr[24] = a25; */ + /* arr[25] = a26; */ + /* arr[26] = a27; */ + /* arr[27] = a28; */ + /* arr[28] = a29; */ + /* arr[29] = a30; */ + return c + + b1+ + b2+ + b3+ + b4+ + b5+ + b6+ + b7+ + b8+ + b9+ + b10+ + b11+ + b12+ + b13+ + b14+ + b15+ + b16+ + b17+ + b18+ + b19+ + b20+ + b21+ + b22+ + b23+ + b24+ + b25+ + b26+ + b27+ + b28+ + b29+ + b30; +} + + + + + + +int main(int argc, char *argv[]) { + float arr[30]; + f(5, arr); + return 0; +} diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml deleted file mode 120000 index ee2674bf..00000000 --- a/x86/ExpansionOracle.ml +++ /dev/null @@ -1 +0,0 @@ -../aarch64/ExpansionOracle.ml \ No newline at end of file diff --git a/x86/ExpansionOracle.ml b/x86/ExpansionOracle.ml new file mode 100644 index 00000000..3b63b80d --- /dev/null +++ b/x86/ExpansionOracle.ml @@ -0,0 +1,17 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open RTLpathCommon + +let expanse (sb : superblock) code pm = (code, pm) + +let find_last_node_reg c = () diff --git a/x86/PrepassSchedulingOracle.ml b/x86/PrepassSchedulingOracle.ml index 7b6a1b14..42a3da23 100644 --- a/x86/PrepassSchedulingOracle.ml +++ b/x86/PrepassSchedulingOracle.ml @@ -2,4 +2,5 @@ open RTL open Registers (* Do not do anything *) -let schedule_sequence (seqa : (instruction*Regset.t) array) = None +let schedule_sequence (seqa : (instruction*Regset.t) array) + live_regs_entry typing reference = None -- cgit From 54d15a204d239c0347c504edec1ad2ab9f50d09a Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 19 Jul 2021 13:15:11 +0200 Subject: move back to "list" scheduler since regpres is buggy --- driver/Clflags.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 25bd2c78..085eaa7e 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi let option_mtune = ref "" let option_fprepass = ref true -let option_fprepass_sched = ref "regpres" +let option_fprepass_sched = ref "list" let option_fpostpass = ref true let option_fpostpass_sched = ref "list" -- cgit From cb6103e684ff89a0493d6b2e3af2317df04fede3 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 19 Jul 2021 13:23:40 +0200 Subject: debugprint flag not to true --- scheduling/InstructionScheduler.ml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 0203d9c8..0eadd5a4 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -349,8 +349,6 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; (* A scheduler sensitive to register pressure *) let reg_pres_scheduler (problem : problem) : solution option = - DebugPrint.debug_flag := true; - let nr_instructions = get_nr_instructions problem in if !Clflags.option_debug_compcert > 6 then @@ -615,11 +613,9 @@ let reg_pres_scheduler (problem : problem) : solution option = if target_time > !final_time then final_time := target_time) predecessors.(nr_instructions); times.(nr_instructions) <- !final_time; - DebugPrint.debug_flag := false; Some times with Exit -> DebugPrint.debug "reg_pres_sched failed\n"; - DebugPrint.debug_flag := false; None ;; @@ -628,7 +624,6 @@ let reg_pres_scheduler (problem : problem) : solution option = (********************************************************************) let reg_pres_scheduler_bis (problem : problem) : solution option = - DebugPrint.debug_flag := true; Printf.printf "\nNEW\n\n"; let nr_instructions = get_nr_instructions problem in let successors = get_successors problem @@ -814,11 +809,9 @@ let reg_pres_scheduler_bis (problem : problem) : solution option = if target_time > !final_time then final_time := target_time) predecessors.(nr_instructions); times.(nr_instructions) <- !final_time; - DebugPrint.debug_flag := false; Some times with Exit -> DebugPrint.debug "reg_pres_sched failed\n"; - DebugPrint.debug_flag := false; None ;; -- cgit From ad72ffe9c2d7839cfe85926264565d59daf87a3c Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 19 Jul 2021 13:24:46 +0200 Subject: disable printing debug info --- scheduling/InstructionScheduler.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 0eadd5a4..5bd969ca 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -624,7 +624,7 @@ let reg_pres_scheduler (problem : problem) : solution option = (********************************************************************) let reg_pres_scheduler_bis (problem : problem) : solution option = - Printf.printf "\nNEW\n\n"; + (* Printf.printf "\nNEW\n\n"; *) let nr_instructions = get_nr_instructions problem in let successors = get_successors problem and predecessors = get_predecessors problem -- cgit From 650c134f915d0b2e799c76c87ffd2ed09c782dcc Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Mon, 19 Jul 2021 13:26:41 +0200 Subject: use regpres_bis not regpres --- driver/Clflags.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 085eaa7e..134c222f 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -51,7 +51,7 @@ let option_flooprotate = ref 0 (* rotate the innermost loops to have the conditi let option_mtune = ref "" let option_fprepass = ref true -let option_fprepass_sched = ref "list" +let option_fprepass_sched = ref "regpres_bis" let option_fpostpass = ref true let option_fpostpass_sched = ref "list" -- cgit From a86c0c659666a9a154ae4d27fee1c4cad3dd26b7 Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 19 Jul 2021 15:57:57 +0200 Subject: start fixing --- scheduling/InstructionScheduler.ml | 90 ++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 52 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 0203d9c8..3e82336b 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -353,8 +353,8 @@ let reg_pres_scheduler (problem : problem) : solution option = let nr_instructions = get_nr_instructions problem in - if !Clflags.option_debug_compcert > 6 then - DebugPrint.debug "SCHEDULING_SUPERBLOCK %d\n" nr_instructions; + (* if !Clflags.option_debug_compcert > 6 then *) + DebugPrint.debug "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions; let successors = get_successors problem and predecessors = get_predecessors problem @@ -495,32 +495,22 @@ let reg_pres_scheduler (problem : problem) : solution option = let attempt_scheduling ready usages = let result = ref (-1) in + DebugPrint.debug "\n\nREADY: "; + InstrSet.iter (fun i -> DebugPrint.debug "%d " i) ready; + DebugPrint.debug "\n\n"; try Array.iteri (fun i avlregs -> - (* print_string "avlregs: "; - * print_int i; - * print_string " "; - * print_int avlregs; - * print_newline (); - * print_string "live regs: "; - * print_int (Hashtbl.length live_regs); - * print_newline (); - * flush stdout; *) - if avlregs <= regs_thresholds.(i) + DebugPrint.debug "avlregs: %d %d\nlive regs: %d\n" + i avlregs (Hashtbl.length live_regs); + if !cnt < 5 && avlregs <= regs_thresholds.(i) then ( csr_b := true; let maybe = InstrSet.sched_CSR i ready usages in - (* print_string "maybe\n"; - * print_int maybe; - * print_newline (); - * flush stdout; *) - (if maybe > 0 && + DebugPrint.debug "maybe %d\n" maybe; + (if maybe >= 0 && let delta = List.fold_left (fold_delta i) 0 mentions.(maybe) in - (* print_string "delta "; - * print_int delta; - * print_newline (); - * flush stdout; *) + DebugPrint.debug "delta %d\n" delta; delta > 0 then (vector_subtract usages.(maybe) current_resources; @@ -558,33 +548,30 @@ let reg_pres_scheduler (problem : problem) : solution option = problem.instruction_usages with | -1 -> advance_time() | i -> (assert(times.(i) < 0); - ((* print_string "INSTR ISSUED: "; - * print_int i; - * print_newline (); - * flush stdout; *) - if !csr_b && !Clflags.option_debug_compcert > 6 then - DebugPrint.debug "REGPRES: high pres class %d\n" i; - csr_b := false; - if !Clflags.option_regpres_wait_window then - cnt := 0; - List.iter (fun (r,b) -> - if b then - (match Hashtbl.find_opt counts r with - | None -> assert false - | Some (t, n) -> - Hashtbl.remove counts r; - if n = 1 then - (Hashtbl.remove live_regs r; - available_regs.(t) - <- available_regs.(t) + 1)) - else - let t = class_r r in - match Hashtbl.find_opt live_regs r with - | None -> (Hashtbl.add live_regs r t; - available_regs.(t) - <- available_regs.(t) - 1) - | Some i -> () - ) mentions.(i)); + (DebugPrint.debug "INSTR ISSUED: %d\n" i; + if !csr_b && !Clflags.option_debug_compcert > 6 then + DebugPrint.debug "REGPRES: high pres class %d\n" i; + csr_b := false; + (* if !Clflags.option_regpres_wait_window then *) + cnt := 0; + List.iter (fun (r,b) -> + if b then + (match Hashtbl.find_opt counts r with + | None -> assert false + | Some (t, n) -> + Hashtbl.remove counts r; + if n = 1 then + (Hashtbl.remove live_regs r; + available_regs.(t) + <- available_regs.(t) + 1)) + else + let t = class_r r in + match Hashtbl.find_opt live_regs r with + | None -> (Hashtbl.add live_regs r t; + available_regs.(t) + <- available_regs.(t) - 1) + | Some i -> () + ) mentions.(i)); times.(i) <- !current_time; ready.(!current_time) <- InstrSet.remove i (ready.(!current_time)); @@ -595,8 +582,8 @@ let reg_pres_scheduler (problem : problem) : solution option = | to_time -> ((* DebugPrint.debug "TO TIME %d : %d\n" to_time * (Array.length ready); *) - ready.(to_time) - <- InstrSet.add instr_to ready.(to_time)) + ready.(to_time) + <- InstrSet.add instr_to ready.(to_time)) ) successors.(i); successors.(i) <- [] ) @@ -605,8 +592,7 @@ let reg_pres_scheduler (problem : problem) : solution option = try let final_time = ref (-1) in for i = 0 to nr_instructions - 1 do - (* print_int i; - * flush stdout; *) + DebugPrint.debug "%d " i; (if times.(i) < 0 then raise Exit); (if !final_time < times.(i) + 1 then final_time := times.(i) + 1) done; -- cgit From ee2112a9bacc246e0434a19fc93aab335fd56ddd Mon Sep 17 00:00:00 2001 From: "nicolas.nardino" Date: Mon, 19 Jul 2021 18:09:27 +0200 Subject: Fix scheduling fails (forgot a case) --- scheduling/InstructionScheduler.ml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/scheduling/InstructionScheduler.ml b/scheduling/InstructionScheduler.ml index 3e82336b..9c5c674d 100644 --- a/scheduling/InstructionScheduler.ml +++ b/scheduling/InstructionScheduler.ml @@ -349,13 +349,14 @@ let _ = fun x -> priority_list_scheduler INSTRUCTION_ORDER x;; (* A scheduler sensitive to register pressure *) let reg_pres_scheduler (problem : problem) : solution option = - DebugPrint.debug_flag := true; + (* DebugPrint.debug_flag := true; *) let nr_instructions = get_nr_instructions problem in - (* if !Clflags.option_debug_compcert > 6 then *) - DebugPrint.debug "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions; - + if !Clflags.option_debug_compcert > 6 then + (Printf.eprintf "\nSCHEDULING_SUPERBLOCK %d\n" nr_instructions; + flush stderr); + let successors = get_successors problem and predecessors = get_predecessors problem and times = Array.make (nr_instructions+1) (-1) in @@ -524,7 +525,8 @@ let reg_pres_scheduler (problem : problem) : solution option = then result := ins ) ready; if !result <> -1 then - vector_subtract usages.(!result) current_resources) + vector_subtract usages.(!result) current_resources; + incr cnt) else (incr cnt) ); @@ -550,7 +552,8 @@ let reg_pres_scheduler (problem : problem) : solution option = | i -> (assert(times.(i) < 0); (DebugPrint.debug "INSTR ISSUED: %d\n" i; if !csr_b && !Clflags.option_debug_compcert > 6 then - DebugPrint.debug "REGPRES: high pres class %d\n" i; + (Printf.eprintf "REGPRES: high pres class %d\n" i; + flush stderr); csr_b := false; (* if !Clflags.option_regpres_wait_window then *) cnt := 0; @@ -601,11 +604,11 @@ let reg_pres_scheduler (problem : problem) : solution option = if target_time > !final_time then final_time := target_time) predecessors.(nr_instructions); times.(nr_instructions) <- !final_time; - DebugPrint.debug_flag := false; + (* DebugPrint.debug_flag := false; *) Some times with Exit -> DebugPrint.debug "reg_pres_sched failed\n"; - DebugPrint.debug_flag := false; + (* DebugPrint.debug_flag := false; *) None ;; -- cgit