aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-11-05 09:50:15 +0100
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-11-05 09:50:15 +0100
commit39d14caa044364ea93da7dbc49a699008e505311 (patch)
tree7be8c998cc2289767fa996dc61cd092305bd1002
parent0fe569d24b99a34fb3b9ad6c0cb86876cc86a31d (diff)
parent160c4ae21cdc86e26850ed0bdec8d95ca23c57db (diff)
downloadcompcert-kvx-39d14caa044364ea93da7dbc49a699008e505311.tar.gz
compcert-kvx-39d14caa044364ea93da7dbc49a699008e505311.zip
Merge remote-tracking branch 'origin/kvx-work' into kvx-test-prepass
-rw-r--r--README.md3
-rw-r--r--backend/CSE3analysisaux.ml3
-rw-r--r--backend/Duplicateaux.ml93
-rw-r--r--backend/LICMaux.ml41
-rw-r--r--tools/compiler_expand.ml5
5 files changed, 114 insertions, 31 deletions
diff --git a/README.md b/README.md
index 77219cc1..58a7d052 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,8 @@ The people responsible for this version are
## Papers, docs, etc on this CompCert version
-* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend.
+* [a 5-minutes video](http://www-verimag.imag.fr/~boulme/videos/poster-oopsla20.mp4) by C. Six, presenting the postpass scheduling and the KVX backend
+(also on [YouTube if you need subtitles](https://www.youtube.com/watch?v=RAzMDS9OVSw)).
* [Certified and Efficient Instruction Scheduling](https://hal.archives-ouvertes.fr/hal-02185883), an OOPSLA'20 paper, by Six, Boulmé and Monniaux.
* [the documentation of the KVX backend Coq sources](https://certicompil.gricad-pages.univ-grenoble-alpes.fr/compcert-kvx)
diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml
index bd4ca20a..e37ef61f 100644
--- a/backend/CSE3analysisaux.ml
+++ b/backend/CSE3analysisaux.ml
@@ -139,7 +139,8 @@ let refine_invariants
if not (RB.beq cur nxt)
then
begin
- Printf.printf "refining CSE3 node %d\n" (P.to_int pc);
+ (if !Clflags.option_debug_compcert > 4
+ then Printf.printf "refining CSE3 node %d\n" (P.to_int pc));
List.iter add_todo (successors pc)
end in
(List.iter add_todo nodes);
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index fac0ba76..04b68e25 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -30,7 +30,9 @@ let get_some = LICMaux.get_some
let rtl_successors = LICMaux.rtl_successors
(* Get list of nodes following a BFS of the code *)
-let bfs code entrypoint = begin
+(* Stops when predicate is reached
+ * Excludes any node given in excluded function *)
+let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin
debug "bfs\n";
let visited = ref (PTree.map (fun n i -> false) code)
and bfs_list = ref []
@@ -40,20 +42,24 @@ let bfs code entrypoint = begin
Queue.add entrypoint to_visit;
while not (Queue.is_empty to_visit) do
node := Queue.pop to_visit;
- if not (get_some @@ PTree.get !node !visited) then begin
+ if (not (get_some @@ PTree.get !node !visited)) then begin
visited := PTree.set !node true !visited;
- match PTree.get !node code with
- | None -> failwith "No such node"
- | Some i ->
- bfs_list := !node :: !bfs_list;
- let succ = rtl_successors i in
- List.iter (fun n -> Queue.add n to_visit) succ
+ if not (excluded !node) then begin
+ match PTree.get !node code with
+ | None -> failwith "No such node"
+ | Some i ->
+ bfs_list := !node :: !bfs_list;
+ if not (predicate !node) then
+ let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ
+ end
end
done;
List.rev !bfs_list
end
end
+let bfs code entrypoint = bfs_until code entrypoint (fun _ -> false) (fun _ -> false)
+
let optbool o = match o with Some _ -> true | None -> false
let ptree_get_some n ptree = get_some @@ PTree.get n ptree
@@ -81,15 +87,7 @@ end
module PSet = Set.Make(PInt)
-let print_intlist oc l =
- let rec f oc = function
- | [] -> ()
- | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln)
- in begin
- if !debug_flag then begin
- Printf.fprintf oc "[%a]" f l
- end
- end
+let print_intlist = LICMaux.print_intlist
let print_intset s =
let seq = PSet.to_seq s
@@ -627,7 +625,7 @@ let invert_iconds code =
type innerLoop = {
preds: P.t list;
- body: HashedSet.PSet.t;
+ body: P.t list;
head: P.t; (* head of the loop *)
final: P.t (* the final instruction, which loops back to the head *)
}
@@ -635,7 +633,7 @@ type innerLoop = {
let print_pset = LICMaux.pp_pset
let print_inner_loop iloop =
- debug "{preds: %a, body: %a}" print_intlist iloop.preds print_pset iloop.body
+ debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body
let rec print_inner_loops = function
| [] -> ()
@@ -657,6 +655,53 @@ let print_ptree printer pt =
let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else ()
+let cb_exit_node = function
+ | Icond (_,_,n1,n2,p) -> begin match p with
+ | Some true -> Some n2
+ | Some false -> Some n1
+ | None -> None
+ end
+ | _ -> None
+
+let get_natural_loop code predmap n =
+ let is_final_node m =
+ let successors = rtl_successors @@ get_some @@ PTree.get m code in
+ List.exists (fun s -> (P.to_int s) == (P.to_int n)) successors
+ in
+ let excluded_node = cb_exit_node @@ get_some @@ PTree.get n code in
+ let is_excluded m = match excluded_node with
+ | None -> false
+ | Some ex -> P.to_int ex == P.to_int m
+ in
+ debug "get_natural_loop for %d\n" (P.to_int n);
+ let body = bfs_until code n is_final_node is_excluded in
+ debug "BODY: %a\n" print_intlist body;
+ let final = List.find is_final_node body in
+ debug "FINAL: %d\n" (P.to_int final);
+ let preds = List.filter (fun pred -> List.mem pred body) @@ get_some @@ PTree.get n predmap in
+ debug "PREDS: %a\n" print_intlist preds;
+ { preds = preds; body = body; head = n; final = final }
+
+let rec count_loop_headers is_loop_header = function
+ | [] -> 0
+ | n :: ln ->
+ let rem = count_loop_headers is_loop_header ln in
+ if (get_some @@ PTree.get n is_loop_header) then rem + 1 else rem
+
+(* Alternative code to get inner_loops - use it if we suspect the other function to be bugged *)
+(*
+let get_inner_loops f code is_loop_header =
+ let predmap = get_predecessors_rtl code in
+ let iloops = ref [] in
+ List.iter (fun (n, ilh) -> if ilh then
+ let iloop = get_natural_loop code predmap n in
+ let nb_headers = count_loop_headers is_loop_header iloop.body in
+ if nb_headers == 1 then (* innermost loop *)
+ iloops := iloop :: !iloops
+ ) (PTree.elements is_loop_header);
+ !iloops
+ *)
+
let get_inner_loops f code is_loop_header =
let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
@@ -684,7 +729,7 @@ let get_inner_loops f code is_loop_header =
assert (List.length filtered == 1);
List.hd filtered
end in
- { preds = preds; body = body; head = head; final = final }
+ { preds = preds; body = (HashedSet.PSet.elements body); head = head; final = final }
)
(* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
* We remove those to get just the inner loops *)
@@ -769,7 +814,7 @@ let rec count_ignore_nops code = function
* 3) Links the last instruction of body' into the first instruction of body
*)
let unroll_inner_loop_single code revmap iloop =
- let body = HashedSet.PSet.elements (iloop.body) in
+ let body = iloop.body in
if count_ignore_nops code body > !Clflags.option_funrollsingle then begin
debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle;
(code, revmap)
@@ -806,7 +851,7 @@ let unroll_inner_loops_single f code revmap =
* 3) Links the last instruction of body' into the first of body
*)
let unroll_inner_loop_body code revmap iloop =
- let body = HashedSet.PSet.elements (iloop.body) in
+ let body = iloop.body in
let limit = !Clflags.option_funrollbody in
if count_ignore_nops code body > limit then begin
debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit;
@@ -824,7 +869,9 @@ let unroll_inner_loop_body code revmap iloop =
let unroll_inner_loops_body f code revmap =
let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ (* debug_flag := true; *)
let inner_loops = get_inner_loops f code is_loop_header in
+ debug "Number of loops found: %d\n" (List.length inner_loops);
let code' = ref code in
let revmap' = ref revmap in
begin
@@ -832,7 +879,7 @@ let unroll_inner_loops_body f code revmap =
List.iter (fun iloop ->
let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in
code' := new_code; revmap' := new_revmap
- ) inner_loops;
+ ) inner_loops; (* debug_flag := false; *)
(!code', !revmap')
end
diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml
index 3d344123..f166e9e4 100644
--- a/backend/LICMaux.ml
+++ b/backend/LICMaux.ml
@@ -39,6 +39,28 @@ let rtl_successors = function
| Icond (_,_,n1,n2,_) -> [n1; n2]
| Ijumptable (_,ln) -> ln
+let print_ptree_bool oc pt =
+ if !debug_flag then
+ let elements = PTree.elements pt in
+ begin
+ Printf.fprintf oc "[";
+ List.iter (fun (n, b) ->
+ if b then Printf.fprintf oc "%d, " (P.to_int n)
+ ) elements;
+ Printf.fprintf oc "]\n"
+ end
+ else ()
+
+let print_intlist oc l =
+ let rec f oc = function
+ | [] -> ()
+ | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln)
+ in begin
+ if !debug_flag then begin
+ Printf.fprintf oc "[%a]" f l
+ end
+ end
+
(** Getting loop branches with a DFS visit :
* Each node is either Unvisited, Visited, or Processed
* pre-order: node becomes Processed
@@ -53,23 +75,34 @@ let get_loop_headers code entrypoint = begin
in let rec dfs_visit code = function
| [] -> ()
| node :: ln ->
+ debug "ENTERING node %d, REM are %a\n" (P.to_int node) print_intlist ln;
match (get_some @@ PTree.get node !visited) with
- | Visited -> ()
+ | Visited -> begin
+ debug "\tNode %d is already Visited, skipping\n" (P.to_int node);
+ dfs_visit code ln
+ end
| Processed -> begin
debug "Node %d is a loop header\n" (P.to_int node);
is_loop_header := PTree.set node true !is_loop_header;
- visited := PTree.set node Visited !visited
+ visited := PTree.set node Visited !visited;
+ dfs_visit code ln
end
| Unvisited -> begin
visited := PTree.set node Processed !visited;
- match PTree.get node code with
+ debug "Node %d is Processed\n" (P.to_int node);
+ (match PTree.get node code with
| None -> failwith "No such node"
- | Some i -> let next_visits = rtl_successors i in dfs_visit code next_visits;
+ | Some i -> let next_visits = rtl_successors i in begin
+ debug "About to visit: %a\n" print_intlist next_visits;
+ dfs_visit code next_visits
+ end);
+ debug "Node %d is Visited!\n" (P.to_int node);
visited := PTree.set node Visited !visited;
dfs_visit code ln
end
in begin
dfs_visit code [entrypoint];
+ debug "LOOP HEADERS: %a\n" print_ptree_bool !is_loop_header;
!is_loop_header
end
end
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index d7484628..6af0ec59 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -25,8 +25,6 @@ TOTAL, Always, Require, (Some "Renumbering"), "Renumber";
PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE";
PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict";
PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle";
-TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber";
-PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate";
TOTAL, Always, NoRequire, (Some "Renumbering pre unrolling"), "Renumber";
PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody";
TOTAL, Always, NoRequire, (Some "Renumbering pre tail duplication"), "Renumber";
@@ -39,6 +37,9 @@ PARTIAL, (Option "optim_CSE3"), Require, (Some "CSE3"), "CSE3";
TOTAL, (Option "optim_CSE3"), Require, (Some "Kill useless moves after CSE3"), "KillUselessMoves";
TOTAL, (Option "optim_forward_moves"), Require, (Some "Forwarding moves"), "ForwardMoves";
PARTIAL, (Option "optim_redundancy"), Require, (Some "Redundancy elimination"), "Deadcode";
+TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber";
+PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate";
+TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber";
PARTIAL, (Option "optim_move_loop_invariants"), Require, (Some "LICM"), "LICM";
TOTAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "Renumbering for LICM"), "Renumber";
PARTIAL, (Option "optim_move_loop_invariants"), NoRequire, (Some "CSE3 for LICM"), "CSE3";