aboutsummaryrefslogtreecommitdiffstats
path: root/backend/Duplicateaux.ml
diff options
context:
space:
mode:
authorCyril SIX <cyril.six@kalray.eu>2020-12-11 16:04:21 +0100
committerCyril SIX <cyril.six@kalray.eu>2020-12-11 16:05:29 +0100
commit3b2ca46523f3fd730c9009aecbc575cc2abb15ca (patch)
tree3d7abe1bdfd4acebbe6f8472c6006b8518167342 /backend/Duplicateaux.ml
parent4e14e7e5369d64e9b2853731d8adeff20881bd1f (diff)
downloadcompcert-kvx-3b2ca46523f3fd730c9009aecbc575cc2abb15ca.tar.gz
compcert-kvx-3b2ca46523f3fd730c9009aecbc575cc2abb15ca.zip
Fixing wrong predictions on imbricated loops
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r--backend/Duplicateaux.ml218
1 files changed, 114 insertions, 104 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index a58ab2c2..861df3cd 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -199,23 +199,124 @@ let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header =
| Some b -> Some b
end
+(** Innermost loop detection *)
+
+type innerLoop = {
+ preds: P.t list;
+ body: P.t list;
+ head: P.t; (* head of the loop *)
+ finals: P.t list; (* the final instructions, which loops back to the head *)
+ (* There may be more than one ; for instance if there is an if inside the loop with both
+ * branches leading to a goto backedge
+ * Such cases usually happen after a tail-duplication *)
+ sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
+ (* may be None if we predict that we do not loop *)
+}
+
+let print_pset = LICMaux.pp_pset
+
+let print_ptree printer pt =
+ let elements = PTree.elements pt in
+ begin
+ debug "[\n";
+ List.iter (fun (n, elt) ->
+ debug "\t%d: %a\n" (P.to_int n) printer elt
+ ) elements;
+ debug "]\n"
+ end
+
+let rtl_successors_pref = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,p) -> (match p with
+ | Some true -> [n1]
+ | Some false -> [n2]
+ | None -> [n1; n2])
+| Ijumptable (_,ln) -> ln
+
+(* Find the last node of a trace (starting at "node"), until a loop is encountered.
+ * If a non-predicted branch is encountered, returns None *)
+let rec find_last_node_before_loop code node trace is_loop_header =
+ let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
+ let headers = List.filter (fun n ->
+ get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in
+ match headers with
+ | [] -> (
+ let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in
+ match next_nodes with
+ | [n] -> (
+ (* To prevent getting out of the superblock and loop infinitely when the prediction is false *)
+ if HashedSet.PSet.contains trace n then
+ find_last_node_before_loop code n trace is_loop_header
+ else None
+ )
+ | _ -> None (* May happen when we predict that a loop is not taken *)
+ )
+ | [h] -> Some node
+ | _ -> failwith "Multiple branches leading to a loop"
+
+(* The computation of sb_final requires to already have branch prediction *)
+let get_inner_loops f code is_loop_header =
+ let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
+ fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
+ let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
+ begin
+ debug "PREDMAP: "; print_ptree print_intlist predmap;
+ debug "LOOPMAP: "; print_ptree print_pset loopmap;
+ List.map (fun (n, body) ->
+ let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p)
+ @@ get_some @@ PTree.get n predmap in
+ let head = (* the instruction from body which is a loop header *)
+ let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter
+ (fun n -> ptree_get_some n is_loop_header) body in
+ begin
+ assert (List.length heads == 1);
+ List.hd heads
+ end in
+ let finals = (* the predecessors from head that are in the body *)
+ let head_preds = ptree_get_some head predmap in
+ let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
+ begin
+ debug "HEAD: %d\n" (P.to_int head);
+ debug "BODY: %a\n" print_pset body;
+ debug "HEADPREDS: %a\n" print_intlist head_preds;
+ filtered
+ end in
+ let sb_final = find_last_node_before_loop code head body is_loop_header in
+ let body = HashedSet.PSet.elements body in
+ { preds = preds; body = body; head = head; finals = finals;
+ sb_final = sb_final; }
+ )
+ (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
+ * We remove those to get just the inner loops *)
+ @@ List.filter (fun (n, body) ->
+ let count = List.length @@ HashedSet.PSet.elements body in count != 1
+ ) (PTree.elements loopmap)
+ end
+
+
(* Returns a PTree of either None or Some b where b determines the node following the loop, for a cb instruction *)
(* It uses the fact that loops in CompCert are done by a branch (backedge) instruction followed by a cb *)
-let get_loop_info is_loop_header bfs_order code =
+let get_loop_info f is_loop_header bfs_order code =
debug "GET LOOP INFO\n";
debug "==================================\n";
let loop_info = ref (PTree.map (fun n i -> None) code) in
- let mark_path n =
+ let mark_path n lbody =
let cb_info = ref PTree.empty in
let visited = ref (PTree.map (fun n i -> false) code) in
(* Returns true if there is a path from src to dest (not involving jumptables) *)
(* Mark nodes as visited along the way *)
let explore src dest =
+ debug "Trying to dive a path from %d to %d\n" (P.to_int src) (P.to_int dest);
(* Memoizing the results to avoid exponential blow-up *)
let memory = ref PTree.empty in
let rec explore_rec src =
- if src == dest then true
- else if (get_some @@ PTree.get src !visited) then false
+ debug "explore_rec %d vs %d... " (P.to_int src) (P.to_int dest);
+ if (P.to_int src) == (P.to_int dest) then (debug "FOUND\n"; true)
+ else if (get_some @@ PTree.get src !visited) then (debug "VISITED... :( \n"; false)
+ (* if we went out of the innermost loop *)
+ else if (not @@ List.mem src lbody) then (debug "Out of innermost...\n"; false)
else begin
let inst = get_some @@ PTree.get src code in
visited := PTree.set src true !visited;
@@ -301,18 +402,20 @@ let get_loop_info is_loop_header bfs_order code =
(* | Some _ -> ( debug "already loop info there\n" ) FIXME - we don't know yet whether a branch to a loop head is a backedge or not *)
)
end
- in begin
- List.iter mark_path @@ List.filter (fun n -> get_some @@ PTree.get n is_loop_header) bfs_order;
+ in let iloops = get_inner_loops f code is_loop_header in
+ begin
+ List.iter (fun il -> mark_path il.head il.body) iloops;
+ (* List.iter mark_path @@ List.filter (fun n -> get_some @@ PTree.get n is_loop_header) bfs_order; *)
debug "==================================\n";
!loop_info
end
(* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *)
-let get_directions code entrypoint = begin
+let get_directions f code entrypoint = begin
debug "get_directions\n";
let bfs_order = bfs code entrypoint in
let is_loop_header = get_loop_headers code entrypoint in
- let loop_info = get_loop_info is_loop_header bfs_order code in
+ let loop_info = get_loop_info f is_loop_header bfs_order code in
let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *)
begin
(* ptree_printbool is_loop_header; *)
@@ -362,9 +465,9 @@ let rec update_direction_rec directions = function
in PTree.set n (update_direction direction i) (update_direction_rec directions lm)
(* Uses branch prediction to write prediction annotations in Icond *)
-let update_directions code entrypoint = begin
+let update_directions f code entrypoint = begin
debug "Update_directions\n";
- let directions = get_directions code entrypoint
+ let directions = get_directions f code entrypoint
in begin
(* debug "Ifso directions: ";
ptree_printbool directions;
@@ -667,99 +770,6 @@ let invert_iconds code =
* cyclic dependencies between LICMaux and Duplicateaux
*)
-type innerLoop = {
- preds: P.t list;
- body: P.t list;
- head: P.t; (* head of the loop *)
- finals: P.t list; (* the final instructions, which loops back to the head *)
- (* There may be more than one ; for instance if there is an if inside the loop with both
- * branches leading to a goto backedge
- * Such cases usually happen after a tail-duplication *)
- sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
- (* may be None if we predict that we do not loop *)
-}
-
-let print_pset = LICMaux.pp_pset
-
-let print_ptree printer pt =
- let elements = PTree.elements pt in
- begin
- debug "[\n";
- List.iter (fun (n, elt) ->
- debug "\t%d: %a\n" (P.to_int n) printer elt
- ) elements;
- debug "]\n"
- end
-
-let rtl_successors_pref = function
-| Itailcall _ | Ireturn _ -> []
-| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
-| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
-| Icond (_,_,n1,n2,p) -> (match p with
- | Some true -> [n1]
- | Some false -> [n2]
- | None -> [n1; n2])
-| Ijumptable (_,ln) -> ln
-
-(* Find the last node of a trace (starting at "node"), until a loop is encountered.
- * If a non-predicted branch is encountered, returns None *)
-let rec find_last_node_before_loop code node trace is_loop_header =
- let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
- let headers = List.filter (fun n ->
- get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in
- match headers with
- | [] -> (
- let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in
- match next_nodes with
- | [n] -> (
- (* To prevent getting out of the superblock and loop infinitely when the prediction is false *)
- if HashedSet.PSet.contains trace n then
- find_last_node_before_loop code n trace is_loop_header
- else None
- )
- | _ -> None (* May happen when we predict that a loop is not taken *)
- )
- | [h] -> Some node
- | _ -> failwith "Multiple branches leading to a loop"
-
-(* The computation of sb_final requires to already have branch prediction *)
-let get_inner_loops f code is_loop_header =
- let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
- fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
- let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
- begin
- debug "PREDMAP: "; print_ptree print_intlist predmap;
- debug "LOOPMAP: "; print_ptree print_pset loopmap;
- List.map (fun (n, body) ->
- let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p)
- @@ get_some @@ PTree.get n predmap in
- let head = (* the instruction from body which is a loop header *)
- let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter
- (fun n -> ptree_get_some n is_loop_header) body in
- begin
- assert (List.length heads == 1);
- List.hd heads
- end in
- let finals = (* the predecessors from head that are in the body *)
- let head_preds = ptree_get_some head predmap in
- let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
- begin
- debug "HEAD: %d\n" (P.to_int head);
- debug "BODY: %a\n" print_pset body;
- debug "HEADPREDS: %a\n" print_intlist head_preds;
- filtered
- end in
- let sb_final = find_last_node_before_loop code head body is_loop_header in
- let body = HashedSet.PSet.elements body in
- { preds = preds; body = body; head = head; finals = finals;
- sb_final = sb_final; }
- )
- (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
- * We remove those to get just the inner loops *)
- @@ List.filter (fun (n, body) ->
- let count = List.length @@ HashedSet.PSet.elements body in count != 1
- ) (PTree.elements loopmap)
- end
let print_option_pint oc o =
@@ -1055,7 +1065,7 @@ let static_predict f =
let revmap = make_identity_ptree code in
let code =
if !Clflags.option_fpredict then
- update_directions code entrypoint
+ update_directions f code entrypoint
else code in
let code =
if !Clflags.option_fpredict then