aboutsummaryrefslogtreecommitdiffstats
path: root/backend/Duplicateaux.ml
diff options
context:
space:
mode:
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r--backend/Duplicateaux.ml356
1 files changed, 246 insertions, 110 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 76b5616b..861df3cd 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -115,16 +115,18 @@ let ptree_printbool pt =
(* Looks ahead (until a branch) to see if a node further down verifies
* the given predicate *)
-let rec look_ahead code node is_loop_header predicate =
+let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate =
if (predicate node) then true
- else match (rtl_successors @@ get_some @@ PTree.get node code) with
+ else match (successors @@ get_some @@ PTree.get node code) with
| [n] -> if (predicate n) then true
else (
if (get_some @@ PTree.get n is_loop_header) then false
- else look_ahead code n is_loop_header predicate
+ else look_ahead_gen successors code n is_loop_header predicate
)
| _ -> false
+let look_ahead = look_ahead_gen rtl_successors
+
(**
* Heuristics mostly based on the paper Branch Prediction for Free
*)
@@ -183,7 +185,7 @@ let do_loop_heuristic code cond ifso ifnot is_loop_header =
let predicate n = get_some @@ PTree.get n is_loop_header in
let ifso_loop = look_ahead code ifso is_loop_header predicate in
let ifnot_loop = look_ahead code ifnot is_loop_header predicate in
- if ifso_loop && ifnot_loop then None (* TODO - take the innermost loop ? *)
+ if ifso_loop && ifnot_loop then (debug "\t\tLOOP but can't choose which\n"; None) (* TODO - take the innermost loop ? *)
else if ifso_loop then Some true
else if ifnot_loop then Some false
else None
@@ -197,23 +199,165 @@ let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header =
| Some b -> Some b
end
+(** Innermost loop detection *)
+
+type innerLoop = {
+ preds: P.t list;
+ body: P.t list;
+ head: P.t; (* head of the loop *)
+ finals: P.t list; (* the final instructions, which loops back to the head *)
+ (* There may be more than one ; for instance if there is an if inside the loop with both
+ * branches leading to a goto backedge
+ * Such cases usually happen after a tail-duplication *)
+ sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
+ (* may be None if we predict that we do not loop *)
+}
+
+let print_pset = LICMaux.pp_pset
+
+let print_ptree printer pt =
+ let elements = PTree.elements pt in
+ begin
+ debug "[\n";
+ List.iter (fun (n, elt) ->
+ debug "\t%d: %a\n" (P.to_int n) printer elt
+ ) elements;
+ debug "]\n"
+ end
+
+let rtl_successors_pref = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,p) -> (match p with
+ | Some true -> [n1]
+ | Some false -> [n2]
+ | None -> [n1; n2])
+| Ijumptable (_,ln) -> ln
+
+(* Find the last node of a trace (starting at "node"), until a loop is encountered.
+ * If a non-predicted branch is encountered, returns None *)
+let rec find_last_node_before_loop code node trace is_loop_header =
+ let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
+ let headers = List.filter (fun n ->
+ get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in
+ match headers with
+ | [] -> (
+ let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in
+ match next_nodes with
+ | [n] -> (
+ (* To prevent getting out of the superblock and loop infinitely when the prediction is false *)
+ if HashedSet.PSet.contains trace n then
+ find_last_node_before_loop code n trace is_loop_header
+ else None
+ )
+ | _ -> None (* May happen when we predict that a loop is not taken *)
+ )
+ | [h] -> Some node
+ | _ -> failwith "Multiple branches leading to a loop"
+
+(* The computation of sb_final requires to already have branch prediction *)
+let get_inner_loops f code is_loop_header =
+ let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
+ fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
+ let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
+ begin
+ debug "PREDMAP: "; print_ptree print_intlist predmap;
+ debug "LOOPMAP: "; print_ptree print_pset loopmap;
+ List.map (fun (n, body) ->
+ let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p)
+ @@ get_some @@ PTree.get n predmap in
+ let head = (* the instruction from body which is a loop header *)
+ let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter
+ (fun n -> ptree_get_some n is_loop_header) body in
+ begin
+ assert (List.length heads == 1);
+ List.hd heads
+ end in
+ let finals = (* the predecessors from head that are in the body *)
+ let head_preds = ptree_get_some head predmap in
+ let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
+ begin
+ debug "HEAD: %d\n" (P.to_int head);
+ debug "BODY: %a\n" print_pset body;
+ debug "HEADPREDS: %a\n" print_intlist head_preds;
+ filtered
+ end in
+ let sb_final = find_last_node_before_loop code head body is_loop_header in
+ let body = HashedSet.PSet.elements body in
+ { preds = preds; body = body; head = head; finals = finals;
+ sb_final = sb_final; }
+ )
+ (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
+ * We remove those to get just the inner loops *)
+ @@ List.filter (fun (n, body) ->
+ let count = List.length @@ HashedSet.PSet.elements body in count != 1
+ ) (PTree.elements loopmap)
+ end
+
+
(* Returns a PTree of either None or Some b where b determines the node following the loop, for a cb instruction *)
(* It uses the fact that loops in CompCert are done by a branch (backedge) instruction followed by a cb *)
-let get_loop_info is_loop_header bfs_order code =
+let get_loop_info f is_loop_header bfs_order code =
+ debug "GET LOOP INFO\n";
+ debug "==================================\n";
let loop_info = ref (PTree.map (fun n i -> None) code) in
- let mark_path s n =
+ let mark_path n lbody =
+ let cb_info = ref PTree.empty in
let visited = ref (PTree.map (fun n i -> false) code) in
- let rec explore src dest =
- if (get_some @@ PTree.get src !visited) then false
- else if src == dest then true
- else begin
- visited := PTree.set src true !visited;
- match rtl_successors @@ get_some @@ PTree.get src code with
- | [] -> false
- | [s] -> explore s dest
- | [s1; s2] -> (explore s1 dest) || (explore s2 dest)
- | _ -> false
- end
+ (* Returns true if there is a path from src to dest (not involving jumptables) *)
+ (* Mark nodes as visited along the way *)
+ let explore src dest =
+ debug "Trying to dive a path from %d to %d\n" (P.to_int src) (P.to_int dest);
+ (* Memoizing the results to avoid exponential blow-up *)
+ let memory = ref PTree.empty in
+ let rec explore_rec src =
+ debug "explore_rec %d vs %d... " (P.to_int src) (P.to_int dest);
+ if (P.to_int src) == (P.to_int dest) then (debug "FOUND\n"; true)
+ else if (get_some @@ PTree.get src !visited) then (debug "VISITED... :( \n"; false)
+ (* if we went out of the innermost loop *)
+ else if (not @@ List.mem src lbody) then (debug "Out of innermost...\n"; false)
+ else begin
+ let inst = get_some @@ PTree.get src code in
+ visited := PTree.set src true !visited;
+ match rtl_successors inst with
+ | [] -> false
+ | [s] -> explore_wrap s
+ | [s1; s2] -> let snapshot_visited = ref !visited in begin
+ debug "\t\tSplit at %d: either %d or %d\n" (P.to_int src) (P.to_int s1) (P.to_int s2);
+ (* Remembering that we tried the ifso node *)
+ cb_info := PTree.set src true !cb_info;
+ match explore_wrap s1 with
+ | true -> (
+ visited := !snapshot_visited;
+ match explore_wrap s2 with
+ | true -> begin
+ (* Both paths lead to a loop: we cannot predict the CB
+ * (but the explore still succeeds) *)
+ cb_info := PTree.remove src !cb_info;
+ true
+ end
+ | false -> true (* nothing to do, the explore succeeded *)
+ )
+ | false -> begin
+ cb_info := PTree.set src false !cb_info;
+ match explore_wrap s2 with
+ | true -> true
+ | false -> (cb_info := PTree.remove src !cb_info; false)
+ end
+ end
+ | _ -> false
+ end
+ and explore_wrap src = begin
+ match PTree.get src !memory with
+ | Some b -> b
+ | None ->
+ let result = explore_rec src in
+ (memory := PTree.set src result !memory; result)
+ end in explore_wrap src
+ (* Goes forward until a CB is encountered
+ * Returns None if no CB was found, or Some the_cb_node
+ * Marks nodes as visited along the way *)
in let rec advance_to_cb src =
if (get_some @@ PTree.get src !visited) then None
else begin
@@ -225,44 +369,53 @@ let get_loop_info is_loop_header bfs_order code =
| Ijumptable _ | Itailcall _ | Ireturn _ -> None
end
in begin
- debug "Marking path from %d to %d\n" (P.to_int n) (P.to_int s);
- match advance_to_cb s with
- | None -> (debug "Nothing found\n")
- | Some s -> ( debug "Advancing to %d\n" (P.to_int s);
+ debug "Attempting to find natural loop from HEAD %d..\n" (P.to_int n);
+ match advance_to_cb n with
+ | None -> (debug "\tNo CB found\n")
+ | Some s -> ( debug "\tFound a CB! %d\n" (P.to_int s);
match get_some @@ PTree.get s !loop_info with
| None | Some _ -> begin
match get_some @@ PTree.get s code with
- | Icond (_, _, n1, n2, _) ->
+ | Icond (_, _, n1, n2, _) -> (
let b1 = explore n1 n in
let b2 = explore n2 n in
- if (b1 && b2) then (debug "both true\n")
- else if b1 then (debug "true privileged\n"; loop_info := PTree.set s (Some true) !loop_info)
- else if b2 then (debug "false privileged\n"; loop_info := PTree.set s (Some false) !loop_info)
- else (debug "none true\n")
- | _ -> ( debug "not an icond\n" )
+ if (b1 && b2) then
+ debug "\tBoth paths lead back to the head: NONE\n"
+ else if (b1 || b2) then begin
+ if b1 then begin
+ debug "\tTrue path leads to the head: TRUE\n";
+ loop_info := PTree.set s (Some true) !loop_info;
+ end else if b2 then begin
+ debug "\tFalse path leads to the head: FALSE\n";
+ loop_info := PTree.set s (Some false) !loop_info
+ end;
+ debug "\tSetting other CBs encountered..\n";
+ List.iter (fun (cb, dir) ->
+ debug "\t\t%d is %B\n" (P.to_int cb) dir;
+ loop_info := PTree.set cb (Some dir) !loop_info
+ ) (PTree.elements !cb_info)
+ end else
+ debug "\tNo path leads back to the head: NONE\n"
+ )
+ | _ -> failwith "\tNot an Icond\n"
end
(* | Some _ -> ( debug "already loop info there\n" ) FIXME - we don't know yet whether a branch to a loop head is a backedge or not *)
)
end
- in begin
- List.iter (fun n ->
- match get_some @@ PTree.get n code with
- | Inop s | Iop (_,_,_,s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
- | Ibuiltin (_, _, _, s) ->
- if get_some @@ PTree.get s is_loop_header then mark_path s n
- | Icond _ -> () (* loop backedges are never Icond in CompCert RTL.3 *)
- | Ijumptable _ -> ()
- | Itailcall _ | Ireturn _ -> ()
- ) bfs_order;
+ in let iloops = get_inner_loops f code is_loop_header in
+ begin
+ List.iter (fun il -> mark_path il.head il.body) iloops;
+ (* List.iter mark_path @@ List.filter (fun n -> get_some @@ PTree.get n is_loop_header) bfs_order; *)
+ debug "==================================\n";
!loop_info
end
(* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *)
-let get_directions code entrypoint = begin
+let get_directions f code entrypoint = begin
debug "get_directions\n";
let bfs_order = bfs code entrypoint in
let is_loop_header = get_loop_headers code entrypoint in
- let loop_info = get_loop_info is_loop_header bfs_order code in
+ let loop_info = get_loop_info f is_loop_header bfs_order code in
let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *)
begin
(* ptree_printbool is_loop_header; *)
@@ -312,9 +465,9 @@ let rec update_direction_rec directions = function
in PTree.set n (update_direction direction i) (update_direction_rec directions lm)
(* Uses branch prediction to write prediction annotations in Icond *)
-let update_directions code entrypoint = begin
+let update_directions f code entrypoint = begin
debug "Update_directions\n";
- let directions = get_directions code entrypoint
+ let directions = get_directions f code entrypoint
in begin
(* debug "Ifso directions: ";
ptree_printbool directions;
@@ -545,7 +698,8 @@ let is_a_nop code n =
* preds: mapping node -> predecessors
* ptree: the revmap
* trace: the trace to follow tail duplication on *)
-let tail_duplicate code preds ptree trace =
+let tail_duplicate code preds is_loop_header ptree trace =
+ debug "Tail_duplicate on that trace: %a\n" print_trace trace;
(* next_int: unused integer that can be used for the next duplication *)
let next_int = ref (next_free_pc code)
(* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *)
@@ -560,7 +714,12 @@ let tail_duplicate code preds ptree trace =
if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *)
else
let node_preds = ptree_get_some n preds
- in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds
+ in let node_preds_nolast =
+ (* We traverse loop headers without initiating tail duplication
+ * (see case of two imbricated loops) *)
+ if (get_some @@ PTree.get n is_loop_header) then []
+ else List.filter (fun e -> e <> get_some !last_node) node_preds
+ (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *)
in let final_node_preds = match !last_duplicate with
| None -> node_preds_nolast
| Some n' -> n' :: node_preds_nolast
@@ -581,12 +740,12 @@ let tail_duplicate code preds ptree trace =
in let new_code, new_ptree = f code ptree true trace
in (new_code, new_ptree, !nb_duplicated)
-let superblockify_traces code preds traces ptree =
+let superblockify_traces code preds is_loop_header traces ptree =
let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *)
in let rec f code ptree = function
| [] -> (code, ptree, 0)
| trace :: traces ->
- let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace
+ let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace
in if (nb_duplicated < max_nb_duplicated)
then (debug "End duplication\n"; f new_code new_ptree traces)
else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
@@ -611,19 +770,21 @@ let invert_iconds code =
* cyclic dependencies between LICMaux and Duplicateaux
*)
-type innerLoop = {
- preds: P.t list;
- body: P.t list;
- head: P.t; (* head of the loop *)
- finals: P.t list (* the final instructions, which loops back to the head *)
- (* There may be more than one ; for instance if there is an if inside the loop with both
- * branches leading to a goto backedge *)
-}
-let print_pset = LICMaux.pp_pset
+
+let print_option_pint oc o =
+ if !debug_flag then
+ match o with
+ | None -> Printf.fprintf oc "None"
+ | Some n -> Printf.fprintf oc "Some %d" (P.to_int n)
let print_inner_loop iloop =
- debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body
+ debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n"
+ print_intlist iloop.preds
+ print_intlist iloop.body
+ (P.to_int iloop.head)
+ print_intlist iloop.finals
+ print_option_pint iloop.sb_final
let rec print_inner_loops = function
| [] -> ()
@@ -633,16 +794,6 @@ let rec print_inner_loops = function
print_inner_loops iloops
end
-let print_ptree printer pt =
- let elements = PTree.elements pt in
- begin
- debug "[\n";
- List.iter (fun (n, elt) ->
- debug "\t%d: %a\n" (P.to_int n) printer elt
- ) elements;
- debug "]\n"
- end
-
let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else ()
let cb_exit_node = function
@@ -692,41 +843,6 @@ let get_inner_loops f code is_loop_header =
!iloops
*)
-let get_inner_loops f code is_loop_header =
- let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
- fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
- let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
- begin
- debug "PREDMAP: "; print_ptree print_intlist predmap;
- debug "LOOPMAP: "; print_ptree print_pset loopmap;
- List.map (fun (n, body) ->
- let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p)
- @@ get_some @@ PTree.get n predmap in
- let head = (* the instruction from body which is a loop header *)
- let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter
- (fun n -> ptree_get_some n is_loop_header) body in
- begin
- assert (List.length heads == 1);
- List.hd heads
- end in
- let finals = (* the predecessors from head that are in the body *)
- let head_preds = ptree_get_some head predmap in
- let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
- begin
- debug "HEAD: %d\n" (P.to_int head);
- debug "BODY: %a\n" print_pset body;
- debug "HEADPREDS: %a\n" print_intlist head_preds;
- filtered
- end in
- { preds = preds; body = (HashedSet.PSet.elements body); head = head; finals = finals }
- )
- (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
- * We remove those to get just the inner loops *)
- @@ List.filter (fun (n, body) ->
- let count = List.length @@ HashedSet.PSet.elements body in count != 1
- ) (PTree.elements loopmap)
- end
-
let rec generate_fwmap ln ln' fwmap =
match ln with
| [] -> begin
@@ -836,31 +952,50 @@ let unroll_inner_loops_single f code revmap =
(!code', !revmap')
end
+let is_some o = match o with Some _ -> true | None -> false
+
+let rec go_through_predicted code start final =
+ if start == final then
+ Some [start]
+ else
+ match rtl_successors_pref @@ get_some @@ PTree.get start code with
+ | [n] -> (
+ match go_through_predicted code n final with
+ | Some ln -> Some (start :: ln)
+ | None -> None
+ )
+ | _ -> None
+
(* Unrolls the body of the inner loop once - duplicating the exit condition as well
* 1) Clones body into body'
- * 2) Links the last instruction of body into the first of body'
+ * 2) Links the last instruction of body (sb_final) into the first of body'
* 3) Links the last instruction of body' into the first of body
*)
let unroll_inner_loop_body code revmap iloop =
+ debug "iloop = "; print_inner_loop iloop;
let body = iloop.body in
let limit = !Clflags.option_funrollbody in
if count_ignore_nops code body > limit then begin
- debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit;
+ debug "Too many nodes in the loop body (%d > %d)\n" (List.length body) limit;
+ (code, revmap)
+ end else if not @@ is_some iloop.sb_final then begin
+ debug "The loop body does not form a superblock OR we have predicted that we do not loop\n";
(code, revmap)
end else
- let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
+ let sb_final = get_some @@ iloop.sb_final in
+ let sb_body = get_some @@ go_through_predicted code iloop.head sb_final in
+ let (code2, revmap2, dupbody, fwmap) = clone code revmap sb_body in
let code' = ref code2 in
let head' = apply_map fwmap (iloop.head) in
- let finals' = apply_map_list fwmap (iloop.finals) in
+ let sb_final' = apply_map fwmap sb_final in
begin
- code' := change_pointers !code' iloop.head head' iloop.finals;
- code' := change_pointers !code' head' iloop.head finals';
+ code' := change_pointers !code' iloop.head head' [sb_final];
+ code' := change_pointers !code' head' iloop.head [sb_final'];
(!code', revmap2)
end
let unroll_inner_loops_body f code revmap =
let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
- (* debug_flag := true; *)
let inner_loops = get_inner_loops f code is_loop_header in
debug "Number of loops found: %d\n" (List.length inner_loops);
let code' = ref code in
@@ -870,7 +1005,7 @@ let unroll_inner_loops_body f code revmap =
List.iter (fun iloop ->
let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in
code' := new_code; revmap' := new_revmap
- ) inner_loops; (* debug_flag := false; *)
+ ) inner_loops;
(!code', !revmap')
end
@@ -930,7 +1065,7 @@ let static_predict f =
let revmap = make_identity_ptree code in
let code =
if !Clflags.option_fpredict then
- update_directions code entrypoint
+ update_directions f code entrypoint
else code in
let code =
if !Clflags.option_fpredict then
@@ -966,6 +1101,7 @@ let tail_duplicate f =
if !Clflags.option_ftailduplicate > 0 then
let traces = select_traces code entrypoint in
let preds = get_predecessors_rtl code in
- superblockify_traces code preds traces revmap
+ let is_loop_header = get_loop_headers code entrypoint in
+ superblockify_traces code preds is_loop_header traces revmap
else (code, revmap) in
((code, entrypoint), revmap)