aboutsummaryrefslogtreecommitdiffstats
path: root/backend/Duplicateaux.ml
diff options
context:
space:
mode:
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r--backend/Duplicateaux.ml91
1 files changed, 75 insertions, 16 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 76b5616b..c9985dc4 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -115,16 +115,18 @@ let ptree_printbool pt =
(* Looks ahead (until a branch) to see if a node further down verifies
* the given predicate *)
-let rec look_ahead code node is_loop_header predicate =
+let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate =
if (predicate node) then true
- else match (rtl_successors @@ get_some @@ PTree.get node code) with
+ else match (successors @@ get_some @@ PTree.get node code) with
| [n] -> if (predicate n) then true
else (
if (get_some @@ PTree.get n is_loop_header) then false
- else look_ahead code n is_loop_header predicate
+ else look_ahead_gen successors code n is_loop_header predicate
)
| _ -> false
+let look_ahead = look_ahead_gen rtl_successors
+
(**
* Heuristics mostly based on the paper Branch Prediction for Free
*)
@@ -545,7 +547,8 @@ let is_a_nop code n =
* preds: mapping node -> predecessors
* ptree: the revmap
* trace: the trace to follow tail duplication on *)
-let tail_duplicate code preds ptree trace =
+let tail_duplicate code preds is_loop_header ptree trace =
+ debug "Tail_duplicate on that trace: %a\n" print_trace trace;
(* next_int: unused integer that can be used for the next duplication *)
let next_int = ref (next_free_pc code)
(* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *)
@@ -560,7 +563,12 @@ let tail_duplicate code preds ptree trace =
if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *)
else
let node_preds = ptree_get_some n preds
- in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds
+ in let node_preds_nolast =
+ (* We traverse loop headers without initiating tail duplication
+ * (see case of two imbricated loops) *)
+ if (get_some @@ PTree.get n is_loop_header) then []
+ else List.filter (fun e -> e <> get_some !last_node) node_preds
+ (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *)
in let final_node_preds = match !last_duplicate with
| None -> node_preds_nolast
| Some n' -> n' :: node_preds_nolast
@@ -581,12 +589,12 @@ let tail_duplicate code preds ptree trace =
in let new_code, new_ptree = f code ptree true trace
in (new_code, new_ptree, !nb_duplicated)
-let superblockify_traces code preds traces ptree =
+let superblockify_traces code preds is_loop_header traces ptree =
let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *)
in let rec f code ptree = function
| [] -> (code, ptree, 0)
| trace :: traces ->
- let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace
+ let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace
in if (nb_duplicated < max_nb_duplicated)
then (debug "End duplication\n"; f new_code new_ptree traces)
else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
@@ -615,15 +623,29 @@ type innerLoop = {
preds: P.t list;
body: P.t list;
head: P.t; (* head of the loop *)
- finals: P.t list (* the final instructions, which loops back to the head *)
+ finals: P.t list; (* the final instructions, which loops back to the head *)
(* There may be more than one ; for instance if there is an if inside the loop with both
- * branches leading to a goto backedge *)
+ * branches leading to a goto backedge
+ * Such cases usually happen after a tail-duplication *)
+ sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
+ (* may be None if we predict that we do not loop *)
}
let print_pset = LICMaux.pp_pset
+let print_option_pint oc o =
+ if !debug_flag then
+ match o with
+ | None -> Printf.fprintf oc "None"
+ | Some n -> Printf.fprintf oc "Some %d" (P.to_int n)
+
let print_inner_loop iloop =
- debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body
+ debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n"
+ print_intlist iloop.preds
+ print_intlist iloop.body
+ (P.to_int iloop.head)
+ print_intlist iloop.finals
+ print_option_pint iloop.sb_final
let rec print_inner_loops = function
| [] -> ()
@@ -692,6 +714,34 @@ let get_inner_loops f code is_loop_header =
!iloops
*)
+let rtl_successors_pref = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,p) -> (match p with
+ | Some true -> [n1]
+ | Some false -> [n2]
+ | None -> [n1; n2])
+| Ijumptable (_,ln) -> ln
+
+(* Find the last node of a trace (starting at "node"), until a loop is encountered.
+ * If a non-predicted branch is encountered, returns None *)
+let rec find_last_node_before_loop code node trace is_loop_header =
+ let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
+ let headers = List.filter (fun n ->
+ get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in
+ match headers with
+ | [] -> (
+ let next_nodes = List.filter (fun n -> HashedSet.PSet.contains trace n)
+ (rtl_successors_pref @@ get_some @@ PTree.get node code) in
+ match next_nodes with
+ | [n] -> find_last_node_before_loop code n trace is_loop_header
+ | _ -> None (* May happen when we predict that a loop is not taken *)
+ )
+ | [h] -> Some node
+ | _ -> failwith "Multiple branches leading to a loop"
+
+(* The computation of sb_final requires to already have branch prediction *)
let get_inner_loops f code is_loop_header =
let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
@@ -718,7 +768,10 @@ let get_inner_loops f code is_loop_header =
debug "HEADPREDS: %a\n" print_intlist head_preds;
filtered
end in
- { preds = preds; body = (HashedSet.PSet.elements body); head = head; finals = finals }
+ let sb_final = find_last_node_before_loop code head body is_loop_header in
+ let body = HashedSet.PSet.elements body in
+ { preds = preds; body = body; head = head; finals = finals;
+ sb_final = sb_final; }
)
(* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
* We remove those to get just the inner loops *)
@@ -836,31 +889,36 @@ let unroll_inner_loops_single f code revmap =
(!code', !revmap')
end
+let is_some o = match o with Some _ -> true | None -> false
+
(* Unrolls the body of the inner loop once - duplicating the exit condition as well
* 1) Clones body into body'
- * 2) Links the last instruction of body into the first of body'
+ * 2) Links the last instruction of body (sb_final) into the first of body'
* 3) Links the last instruction of body' into the first of body
*)
let unroll_inner_loop_body code revmap iloop =
+ debug "iloop = "; print_inner_loop iloop;
let body = iloop.body in
let limit = !Clflags.option_funrollbody in
if count_ignore_nops code body > limit then begin
debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit;
(code, revmap)
+ end else if not @@ is_some iloop.sb_final then begin
+ debug "The loop body does not form a superblock OR we have predicted that we do not loop";
+ (code, revmap)
end else
let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
let code' = ref code2 in
let head' = apply_map fwmap (iloop.head) in
let finals' = apply_map_list fwmap (iloop.finals) in
begin
- code' := change_pointers !code' iloop.head head' iloop.finals;
+ code' := change_pointers !code' iloop.head head' [get_some @@ iloop.sb_final];
code' := change_pointers !code' head' iloop.head finals';
(!code', revmap2)
end
let unroll_inner_loops_body f code revmap =
let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
- (* debug_flag := true; *)
let inner_loops = get_inner_loops f code is_loop_header in
debug "Number of loops found: %d\n" (List.length inner_loops);
let code' = ref code in
@@ -870,7 +928,7 @@ let unroll_inner_loops_body f code revmap =
List.iter (fun iloop ->
let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in
code' := new_code; revmap' := new_revmap
- ) inner_loops; (* debug_flag := false; *)
+ ) inner_loops;
(!code', !revmap')
end
@@ -966,6 +1024,7 @@ let tail_duplicate f =
if !Clflags.option_ftailduplicate > 0 then
let traces = select_traces code entrypoint in
let preds = get_predecessors_rtl code in
- superblockify_traces code preds traces revmap
+ let is_loop_header = get_loop_headers code entrypoint in
+ superblockify_traces code preds is_loop_header traces revmap
else (code, revmap) in
((code, entrypoint), revmap)