From afbf8601af31b012872a2fae6939be9fd231145f Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 4 Nov 2020 16:03:14 +0100 Subject: do not print "refining" unless asked --- backend/CSE3analysisaux.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml index bd4ca20a..e37ef61f 100644 --- a/backend/CSE3analysisaux.ml +++ b/backend/CSE3analysisaux.ml @@ -139,7 +139,8 @@ let refine_invariants if not (RB.beq cur nxt) then begin - Printf.printf "refining CSE3 node %d\n" (P.to_int pc); + (if !Clflags.option_debug_compcert > 4 + then Printf.printf "refining CSE3 node %d\n" (P.to_int pc)); List.iter add_todo (successors pc) end in (List.iter add_todo nodes); -- cgit From 2d01e9626d32bfa384ec8846d361f70205b54f25 Mon Sep 17 00:00:00 2001 From: Sylvain Boulmé Date: Wed, 4 Nov 2020 16:03:19 +0100 Subject: Embed the short video with subtitles... --- README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 77219cc1..21629795 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,19 @@ the [user's manual](http://compcert.inria.fr/man/). ## Verimag-Kalray version This is a special version with additions from Verimag and Kalray : -* A backend for the KVX processor: see [`README_Kalray.md`](README_Kalray.md) for details. +* A backend for the KVX processor: +
+Click-this to expand/collapse a 5-minutes video by C. Six, presenting the postpass scheduling and the KVX backend + +
+See also [`README_Kalray.md`](README_Kalray.md). * Some general-purpose optimization phases (e.g. profiling). * see [`PROFILING.md`](PROFILING.md) for details on the profiling system + The people responsible for this version are * Sylvain Boulmé (Grenoble-INP, Verimag) @@ -31,7 +40,6 @@ The people responsible for this version are ## Papers, docs, etc on this CompCert version -* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend. * [Certified and Efficient Instruction Scheduling](https://hal.archives-ouvertes.fr/hal-02185883), an OOPSLA'20 paper, by Six, Boulmé and Monniaux. * [the documentation of the KVX backend Coq sources](https://certicompil.gricad-pages.univ-grenoble-alpes.fr/compcert-kvx) -- cgit From 078981909788db86bdcf4e470401b77d9bd0beb3 Mon Sep 17 00:00:00 2001 From: Sylvain Boulmé Date: Wed, 4 Nov 2020 16:08:02 +0100 Subject: Revert "Embed the short video with subtitles..." This reverts commit 2d01e9626d32bfa384ec8846d361f70205b54f25. --- README.md | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 21629795..77219cc1 100644 --- a/README.md +++ b/README.md @@ -19,19 +19,10 @@ the [user's manual](http://compcert.inria.fr/man/). ## Verimag-Kalray version This is a special version with additions from Verimag and Kalray : -* A backend for the KVX processor: -
-Click-this to expand/collapse a 5-minutes video by C. Six, presenting the postpass scheduling and the KVX backend - -
-See also [`README_Kalray.md`](README_Kalray.md). +* A backend for the KVX processor: see [`README_Kalray.md`](README_Kalray.md) for details. * Some general-purpose optimization phases (e.g. profiling). * see [`PROFILING.md`](PROFILING.md) for details on the profiling system - The people responsible for this version are * Sylvain Boulmé (Grenoble-INP, Verimag) @@ -40,6 +31,7 @@ The people responsible for this version are ## Papers, docs, etc on this CompCert version +* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend. * [Certified and Efficient Instruction Scheduling](https://hal.archives-ouvertes.fr/hal-02185883), an OOPSLA'20 paper, by Six, Boulmé and Monniaux. * [the documentation of the KVX backend Coq sources](https://certicompil.gricad-pages.univ-grenoble-alpes.fr/compcert-kvx) -- cgit From 2ebe7e342cc56486e28c28f6a7a2678f67778229 Mon Sep 17 00:00:00 2001 From: Sylvain Boulmé Date: Wed, 4 Nov 2020 16:11:56 +0100 Subject: youtube link --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 77219cc1..58a7d052 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,8 @@ The people responsible for this version are ## Papers, docs, etc on this CompCert version -* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend. +* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend +(also on [YouTube if you need subtitles](https://www.youtube.com/watch?v=RAzMDS9OVSw)). * [Certified and Efficient Instruction Scheduling](https://hal.archives-ouvertes.fr/hal-02185883), an OOPSLA'20 paper, by Six, Boulmé and Monniaux. * [the documentation of the KVX backend Coq sources](https://certicompil.gricad-pages.univ-grenoble-alpes.fr/compcert-kvx) -- cgit From b7c2d3f693c8cef940bb6151c228059ffde0304a Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Wed, 4 Nov 2020 16:42:41 +0100 Subject: move loop rotate down --- tools/compiler_expand.ml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml index 4e2dbd6a..54946366 100644 --- a/tools/compiler_expand.ml +++ b/tools/compiler_expand.ml @@ -25,11 +25,9 @@ TOTAL, Always, Require, (Some "Renumbering"), "Renumber"; PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE"; PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict"; PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle"; -TOTAL, Always, Require, (Some "Renumbering pre constprop"), "Renumber"; -PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate"; -TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber"; +TOTAL, Always, NoRequire, (Some "Renumbering pre unroll"), "Renumber"; PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody"; -TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber"; +TOTAL, Always, NoRequire, (Some "Renumbering pre tail duplication"), "Renumber"; PARTIAL, Always, NoRequire, (Some "Performing tail duplication"), "Tailduplicate"; TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber"; TOTAL, (Option "optim_constprop"), Require, (Some "Constant propagation"), "Constprop"; @@ -39,6 +37,9 @@ PARTIAL, (Option "optim_CSE3"), Require, (Some "CSE3"), "CSE3"; TOTAL, (Option "optim_CSE3"), Require, (Some "Kill useless moves after CSE3"), "KillUselessMoves"; TOTAL, (Option "optim_forward_moves"), Require, (Some "Forwarding moves"), "ForwardMoves"; PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode"; +TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber"; +PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate"; +TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber"; PARTIAL, (Option "optim_move_loop_invariants"), Require, (Some "LICM"), "LICM"; TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber"; PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM"), "CSE3"; -- cgit From 160c4ae21cdc86e26850ed0bdec8d95ca23c57db Mon Sep 17 00:00:00 2001 From: Cyril SIX Date: Wed, 4 Nov 2020 17:35:34 +0100 Subject: Fixing get_loop_headers + alternative get_inner_loops (commented, not active) --- backend/Duplicateaux.ml | 93 +++++++++++++++++++++++++++++++++++++------------ backend/LICMaux.ml | 41 +++++++++++++++++++--- 2 files changed, 107 insertions(+), 27 deletions(-) diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index fac0ba76..04b68e25 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -30,7 +30,9 @@ let get_some = LICMaux.get_some let rtl_successors = LICMaux.rtl_successors (* Get list of nodes following a BFS of the code *) -let bfs code entrypoint = begin +(* Stops when predicate is reached + * Excludes any node given in excluded function *) +let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin debug "bfs\n"; let visited = ref (PTree.map (fun n i -> false) code) and bfs_list = ref [] @@ -40,20 +42,24 @@ let bfs code entrypoint = begin Queue.add entrypoint to_visit; while not (Queue.is_empty to_visit) do node := Queue.pop to_visit; - if not (get_some @@ PTree.get !node !visited) then begin + if (not (get_some @@ PTree.get !node !visited)) then begin visited := PTree.set !node true !visited; - match PTree.get !node code with - | None -> failwith "No such node" - | Some i -> - bfs_list := !node :: !bfs_list; - let succ = rtl_successors i in - List.iter (fun n -> Queue.add n to_visit) succ + if not (excluded !node) then begin + match PTree.get !node code with + | None -> failwith "No such node" + | Some i -> + bfs_list := !node :: !bfs_list; + if not (predicate !node) then + let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ + end end done; List.rev !bfs_list end end +let bfs code entrypoint = bfs_until code entrypoint (fun _ -> false) (fun _ -> false) + let optbool o = match o with Some _ -> true | None -> false let ptree_get_some n ptree = get_some @@ PTree.get n ptree @@ -81,15 +87,7 @@ end module PSet = Set.Make(PInt) -let print_intlist oc l = - let rec f oc = function - | [] -> () - | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln) - in begin - if !debug_flag then begin - Printf.fprintf oc "[%a]" f l - end - end +let print_intlist = LICMaux.print_intlist let print_intset s = let seq = PSet.to_seq s @@ -627,7 +625,7 @@ let invert_iconds code = type innerLoop = { preds: P.t list; - body: HashedSet.PSet.t; + body: P.t list; head: P.t; (* head of the loop *) final: P.t (* the final instruction, which loops back to the head *) } @@ -635,7 +633,7 @@ type innerLoop = { let print_pset = LICMaux.pp_pset let print_inner_loop iloop = - debug "{preds: %a, body: %a}" print_intlist iloop.preds print_pset iloop.body + debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body let rec print_inner_loops = function | [] -> () @@ -657,6 +655,53 @@ let print_ptree printer pt = let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else () +let cb_exit_node = function + | Icond (_,_,n1,n2,p) -> begin match p with + | Some true -> Some n2 + | Some false -> Some n1 + | None -> None + end + | _ -> None + +let get_natural_loop code predmap n = + let is_final_node m = + let successors = rtl_successors @@ get_some @@ PTree.get m code in + List.exists (fun s -> (P.to_int s) == (P.to_int n)) successors + in + let excluded_node = cb_exit_node @@ get_some @@ PTree.get n code in + let is_excluded m = match excluded_node with + | None -> false + | Some ex -> P.to_int ex == P.to_int m + in + debug "get_natural_loop for %d\n" (P.to_int n); + let body = bfs_until code n is_final_node is_excluded in + debug "BODY: %a\n" print_intlist body; + let final = List.find is_final_node body in + debug "FINAL: %d\n" (P.to_int final); + let preds = List.filter (fun pred -> List.mem pred body) @@ get_some @@ PTree.get n predmap in + debug "PREDS: %a\n" print_intlist preds; + { preds = preds; body = body; head = n; final = final } + +let rec count_loop_headers is_loop_header = function + | [] -> 0 + | n :: ln -> + let rem = count_loop_headers is_loop_header ln in + if (get_some @@ PTree.get n is_loop_header) then rem + 1 else rem + +(* Alternative code to get inner_loops - use it if we suspect the other function to be bugged *) +(* +let get_inner_loops f code is_loop_header = + let predmap = get_predecessors_rtl code in + let iloops = ref [] in + List.iter (fun (n, ilh) -> if ilh then + let iloop = get_natural_loop code predmap n in + let nb_headers = count_loop_headers is_loop_header iloop.body in + if nb_headers == 1 then (* innermost loop *) + iloops := iloop :: !iloops + ) (PTree.elements is_loop_header); + !iloops + *) + let get_inner_loops f code is_loop_header = let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in @@ -684,7 +729,7 @@ let get_inner_loops f code is_loop_header = assert (List.length filtered == 1); List.hd filtered end in - { preds = preds; body = body; head = head; final = final } + { preds = preds; body = (HashedSet.PSet.elements body); head = head; final = final } ) (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction * We remove those to get just the inner loops *) @@ -769,7 +814,7 @@ let rec count_ignore_nops code = function * 3) Links the last instruction of body' into the first instruction of body *) let unroll_inner_loop_single code revmap iloop = - let body = HashedSet.PSet.elements (iloop.body) in + let body = iloop.body in if count_ignore_nops code body > !Clflags.option_funrollsingle then begin debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle; (code, revmap) @@ -806,7 +851,7 @@ let unroll_inner_loops_single f code revmap = * 3) Links the last instruction of body' into the first of body *) let unroll_inner_loop_body code revmap iloop = - let body = HashedSet.PSet.elements (iloop.body) in + let body = iloop.body in let limit = !Clflags.option_funrollbody in if count_ignore_nops code body > limit then begin debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit; @@ -824,7 +869,9 @@ let unroll_inner_loop_body code revmap iloop = let unroll_inner_loops_body f code revmap = let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + (* debug_flag := true; *) let inner_loops = get_inner_loops f code is_loop_header in + debug "Number of loops found: %d\n" (List.length inner_loops); let code' = ref code in let revmap' = ref revmap in begin @@ -832,7 +879,7 @@ let unroll_inner_loops_body f code revmap = List.iter (fun iloop -> let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in code' := new_code; revmap' := new_revmap - ) inner_loops; + ) inner_loops; (* debug_flag := false; *) (!code', !revmap') end diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml index 42b8eeb7..bf6418e8 100644 --- a/backend/LICMaux.ml +++ b/backend/LICMaux.ml @@ -39,6 +39,28 @@ let rtl_successors = function | Icond (_,_,n1,n2,_) -> [n1; n2] | Ijumptable (_,ln) -> ln +let print_ptree_bool oc pt = + if !debug_flag then + let elements = PTree.elements pt in + begin + Printf.fprintf oc "["; + List.iter (fun (n, b) -> + if b then Printf.fprintf oc "%d, " (P.to_int n) + ) elements; + Printf.fprintf oc "]\n" + end + else () + +let print_intlist oc l = + let rec f oc = function + | [] -> () + | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln) + in begin + if !debug_flag then begin + Printf.fprintf oc "[%a]" f l + end + end + (** Getting loop branches with a DFS visit : * Each node is either Unvisited, Visited, or Processed * pre-order: node becomes Processed @@ -53,23 +75,34 @@ let get_loop_headers code entrypoint = begin in let rec dfs_visit code = function | [] -> () | node :: ln -> + debug "ENTERING node %d, REM are %a\n" (P.to_int node) print_intlist ln; match (get_some @@ PTree.get node !visited) with - | Visited -> () + | Visited -> begin + debug "\tNode %d is already Visited, skipping\n" (P.to_int node); + dfs_visit code ln + end | Processed -> begin debug "Node %d is a loop header\n" (P.to_int node); is_loop_header := PTree.set node true !is_loop_header; - visited := PTree.set node Visited !visited + visited := PTree.set node Visited !visited; + dfs_visit code ln end | Unvisited -> begin visited := PTree.set node Processed !visited; - match PTree.get node code with + debug "Node %d is Processed\n" (P.to_int node); + (match PTree.get node code with | None -> failwith "No such node" - | Some i -> let next_visits = rtl_successors i in dfs_visit code next_visits; + | Some i -> let next_visits = rtl_successors i in begin + debug "About to visit: %a\n" print_intlist next_visits; + dfs_visit code next_visits + end); + debug "Node %d is Visited!\n" (P.to_int node); visited := PTree.set node Visited !visited; dfs_visit code ln end in begin dfs_visit code [entrypoint]; + debug "LOOP HEADERS: %a\n" print_ptree_bool !is_loop_header; !is_loop_header end end -- cgit