diff options
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r-- | backend/Duplicateaux.ml | 91 |
1 files changed, 75 insertions, 16 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index 76b5616b..c9985dc4 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -115,16 +115,18 @@ let ptree_printbool pt = (* Looks ahead (until a branch) to see if a node further down verifies * the given predicate *) -let rec look_ahead code node is_loop_header predicate = +let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate = if (predicate node) then true - else match (rtl_successors @@ get_some @@ PTree.get node code) with + else match (successors @@ get_some @@ PTree.get node code) with | [n] -> if (predicate n) then true else ( if (get_some @@ PTree.get n is_loop_header) then false - else look_ahead code n is_loop_header predicate + else look_ahead_gen successors code n is_loop_header predicate ) | _ -> false +let look_ahead = look_ahead_gen rtl_successors + (** * Heuristics mostly based on the paper Branch Prediction for Free *) @@ -545,7 +547,8 @@ let is_a_nop code n = * preds: mapping node -> predecessors * ptree: the revmap * trace: the trace to follow tail duplication on *) -let tail_duplicate code preds ptree trace = +let tail_duplicate code preds is_loop_header ptree trace = + debug "Tail_duplicate on that trace: %a\n" print_trace trace; (* next_int: unused integer that can be used for the next duplication *) let next_int = ref (next_free_pc code) (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *) @@ -560,7 +563,12 @@ let tail_duplicate code preds ptree trace = if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *) else let node_preds = ptree_get_some n preds - in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds + in let node_preds_nolast = + (* We traverse loop headers without initiating tail duplication + * (see case of two imbricated loops) *) + if (get_some @@ PTree.get n is_loop_header) then [] + else List.filter (fun e -> e <> get_some !last_node) node_preds + (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *) in let final_node_preds = match !last_duplicate with | None -> node_preds_nolast | Some n' -> n' :: node_preds_nolast @@ -581,12 +589,12 @@ let tail_duplicate code preds ptree trace = in let new_code, new_ptree = f code ptree true trace in (new_code, new_ptree, !nb_duplicated) -let superblockify_traces code preds traces ptree = +let superblockify_traces code preds is_loop_header traces ptree = let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *) in let rec f code ptree = function | [] -> (code, ptree, 0) | trace :: traces -> - let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace + let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace in if (nb_duplicated < max_nb_duplicated) then (debug "End duplication\n"; f new_code new_ptree traces) else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0)) @@ -615,15 +623,29 @@ type innerLoop = { preds: P.t list; body: P.t list; head: P.t; (* head of the loop *) - finals: P.t list (* the final instructions, which loops back to the head *) + finals: P.t list; (* the final instructions, which loops back to the head *) (* There may be more than one ; for instance if there is an if inside the loop with both - * branches leading to a goto backedge *) + * branches leading to a goto backedge + * Such cases usually happen after a tail-duplication *) + sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *) + (* may be None if we predict that we do not loop *) } let print_pset = LICMaux.pp_pset +let print_option_pint oc o = + if !debug_flag then + match o with + | None -> Printf.fprintf oc "None" + | Some n -> Printf.fprintf oc "Some %d" (P.to_int n) + let print_inner_loop iloop = - debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body + debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n" + print_intlist iloop.preds + print_intlist iloop.body + (P.to_int iloop.head) + print_intlist iloop.finals + print_option_pint iloop.sb_final let rec print_inner_loops = function | [] -> () @@ -692,6 +714,34 @@ let get_inner_loops f code is_loop_header = !iloops *) +let rtl_successors_pref = function +| Itailcall _ | Ireturn _ -> [] +| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n) +| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n] +| Icond (_,_,n1,n2,p) -> (match p with + | Some true -> [n1] + | Some false -> [n2] + | None -> [n1; n2]) +| Ijumptable (_,ln) -> ln + +(* Find the last node of a trace (starting at "node"), until a loop is encountered. + * If a non-predicted branch is encountered, returns None *) +let rec find_last_node_before_loop code node trace is_loop_header = + let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in + let headers = List.filter (fun n -> + get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in + match headers with + | [] -> ( + let next_nodes = List.filter (fun n -> HashedSet.PSet.contains trace n) + (rtl_successors_pref @@ get_some @@ PTree.get node code) in + match next_nodes with + | [n] -> find_last_node_before_loop code n trace is_loop_header + | _ -> None (* May happen when we predict that a loop is not taken *) + ) + | [h] -> Some node + | _ -> failwith "Multiple branches leading to a loop" + +(* The computation of sb_final requires to already have branch prediction *) let get_inner_loops f code is_loop_header = let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in @@ -718,7 +768,10 @@ let get_inner_loops f code is_loop_header = debug "HEADPREDS: %a\n" print_intlist head_preds; filtered end in - { preds = preds; body = (HashedSet.PSet.elements body); head = head; finals = finals } + let sb_final = find_last_node_before_loop code head body is_loop_header in + let body = HashedSet.PSet.elements body in + { preds = preds; body = body; head = head; finals = finals; + sb_final = sb_final; } ) (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction * We remove those to get just the inner loops *) @@ -836,31 +889,36 @@ let unroll_inner_loops_single f code revmap = (!code', !revmap') end +let is_some o = match o with Some _ -> true | None -> false + (* Unrolls the body of the inner loop once - duplicating the exit condition as well * 1) Clones body into body' - * 2) Links the last instruction of body into the first of body' + * 2) Links the last instruction of body (sb_final) into the first of body' * 3) Links the last instruction of body' into the first of body *) let unroll_inner_loop_body code revmap iloop = + debug "iloop = "; print_inner_loop iloop; let body = iloop.body in let limit = !Clflags.option_funrollbody in if count_ignore_nops code body > limit then begin debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit; (code, revmap) + end else if not @@ is_some iloop.sb_final then begin + debug "The loop body does not form a superblock OR we have predicted that we do not loop"; + (code, revmap) end else let (code2, revmap2, dupbody, fwmap) = clone code revmap body in let code' = ref code2 in let head' = apply_map fwmap (iloop.head) in let finals' = apply_map_list fwmap (iloop.finals) in begin - code' := change_pointers !code' iloop.head head' iloop.finals; + code' := change_pointers !code' iloop.head head' [get_some @@ iloop.sb_final]; code' := change_pointers !code' head' iloop.head finals'; (!code', revmap2) end let unroll_inner_loops_body f code revmap = let is_loop_header = get_loop_headers code (f.fn_entrypoint) in - (* debug_flag := true; *) let inner_loops = get_inner_loops f code is_loop_header in debug "Number of loops found: %d\n" (List.length inner_loops); let code' = ref code in @@ -870,7 +928,7 @@ let unroll_inner_loops_body f code revmap = List.iter (fun iloop -> let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in code' := new_code; revmap' := new_revmap - ) inner_loops; (* debug_flag := false; *) + ) inner_loops; (!code', !revmap') end @@ -966,6 +1024,7 @@ let tail_duplicate f = if !Clflags.option_ftailduplicate > 0 then let traces = select_traces code entrypoint in let preds = get_predecessors_rtl code in - superblockify_traces code preds traces revmap + let is_loop_header = get_loop_headers code entrypoint in + superblockify_traces code preds is_loop_header traces revmap else (code, revmap) in ((code, entrypoint), revmap) |