diff options
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r-- | backend/Duplicateaux.ml | 356 |
1 files changed, 246 insertions, 110 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index 76b5616b..861df3cd 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -115,16 +115,18 @@ let ptree_printbool pt = (* Looks ahead (until a branch) to see if a node further down verifies * the given predicate *) -let rec look_ahead code node is_loop_header predicate = +let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate = if (predicate node) then true - else match (rtl_successors @@ get_some @@ PTree.get node code) with + else match (successors @@ get_some @@ PTree.get node code) with | [n] -> if (predicate n) then true else ( if (get_some @@ PTree.get n is_loop_header) then false - else look_ahead code n is_loop_header predicate + else look_ahead_gen successors code n is_loop_header predicate ) | _ -> false +let look_ahead = look_ahead_gen rtl_successors + (** * Heuristics mostly based on the paper Branch Prediction for Free *) @@ -183,7 +185,7 @@ let do_loop_heuristic code cond ifso ifnot is_loop_header = let predicate n = get_some @@ PTree.get n is_loop_header in let ifso_loop = look_ahead code ifso is_loop_header predicate in let ifnot_loop = look_ahead code ifnot is_loop_header predicate in - if ifso_loop && ifnot_loop then None (* TODO - take the innermost loop ? *) + if ifso_loop && ifnot_loop then (debug "\t\tLOOP but can't choose which\n"; None) (* TODO - take the innermost loop ? *) else if ifso_loop then Some true else if ifnot_loop then Some false else None @@ -197,23 +199,165 @@ let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header = | Some b -> Some b end +(** Innermost loop detection *) + +type innerLoop = { + preds: P.t list; + body: P.t list; + head: P.t; (* head of the loop *) + finals: P.t list; (* the final instructions, which loops back to the head *) + (* There may be more than one ; for instance if there is an if inside the loop with both + * branches leading to a goto backedge + * Such cases usually happen after a tail-duplication *) + sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *) + (* may be None if we predict that we do not loop *) +} + +let print_pset = LICMaux.pp_pset + +let print_ptree printer pt = + let elements = PTree.elements pt in + begin + debug "[\n"; + List.iter (fun (n, elt) -> + debug "\t%d: %a\n" (P.to_int n) printer elt + ) elements; + debug "]\n" + end + +let rtl_successors_pref = function +| Itailcall _ | Ireturn _ -> [] +| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n) +| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n] +| Icond (_,_,n1,n2,p) -> (match p with + | Some true -> [n1] + | Some false -> [n2] + | None -> [n1; n2]) +| Ijumptable (_,ln) -> ln + +(* Find the last node of a trace (starting at "node"), until a loop is encountered. + * If a non-predicted branch is encountered, returns None *) +let rec find_last_node_before_loop code node trace is_loop_header = + let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in + let headers = List.filter (fun n -> + get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in + match headers with + | [] -> ( + let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in + match next_nodes with + | [n] -> ( + (* To prevent getting out of the superblock and loop infinitely when the prediction is false *) + if HashedSet.PSet.contains trace n then + find_last_node_before_loop code n trace is_loop_header + else None + ) + | _ -> None (* May happen when we predict that a loop is not taken *) + ) + | [h] -> Some node + | _ -> failwith "Multiple branches leading to a loop" + +(* The computation of sb_final requires to already have branch prediction *) +let get_inner_loops f code is_loop_header = + let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; + fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in + let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in + begin + debug "PREDMAP: "; print_ptree print_intlist predmap; + debug "LOOPMAP: "; print_ptree print_pset loopmap; + List.map (fun (n, body) -> + let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) + @@ get_some @@ PTree.get n predmap in + let head = (* the instruction from body which is a loop header *) + let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter + (fun n -> ptree_get_some n is_loop_header) body in + begin + assert (List.length heads == 1); + List.hd heads + end in + let finals = (* the predecessors from head that are in the body *) + let head_preds = ptree_get_some head predmap in + let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in + begin + debug "HEAD: %d\n" (P.to_int head); + debug "BODY: %a\n" print_pset body; + debug "HEADPREDS: %a\n" print_intlist head_preds; + filtered + end in + let sb_final = find_last_node_before_loop code head body is_loop_header in + let body = HashedSet.PSet.elements body in + { preds = preds; body = body; head = head; finals = finals; + sb_final = sb_final; } + ) + (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction + * We remove those to get just the inner loops *) + @@ List.filter (fun (n, body) -> + let count = List.length @@ HashedSet.PSet.elements body in count != 1 + ) (PTree.elements loopmap) + end + + (* Returns a PTree of either None or Some b where b determines the node following the loop, for a cb instruction *) (* It uses the fact that loops in CompCert are done by a branch (backedge) instruction followed by a cb *) -let get_loop_info is_loop_header bfs_order code = +let get_loop_info f is_loop_header bfs_order code = + debug "GET LOOP INFO\n"; + debug "==================================\n"; let loop_info = ref (PTree.map (fun n i -> None) code) in - let mark_path s n = + let mark_path n lbody = + let cb_info = ref PTree.empty in let visited = ref (PTree.map (fun n i -> false) code) in - let rec explore src dest = - if (get_some @@ PTree.get src !visited) then false - else if src == dest then true - else begin - visited := PTree.set src true !visited; - match rtl_successors @@ get_some @@ PTree.get src code with - | [] -> false - | [s] -> explore s dest - | [s1; s2] -> (explore s1 dest) || (explore s2 dest) - | _ -> false - end + (* Returns true if there is a path from src to dest (not involving jumptables) *) + (* Mark nodes as visited along the way *) + let explore src dest = + debug "Trying to dive a path from %d to %d\n" (P.to_int src) (P.to_int dest); + (* Memoizing the results to avoid exponential blow-up *) + let memory = ref PTree.empty in + let rec explore_rec src = + debug "explore_rec %d vs %d... " (P.to_int src) (P.to_int dest); + if (P.to_int src) == (P.to_int dest) then (debug "FOUND\n"; true) + else if (get_some @@ PTree.get src !visited) then (debug "VISITED... :( \n"; false) + (* if we went out of the innermost loop *) + else if (not @@ List.mem src lbody) then (debug "Out of innermost...\n"; false) + else begin + let inst = get_some @@ PTree.get src code in + visited := PTree.set src true !visited; + match rtl_successors inst with + | [] -> false + | [s] -> explore_wrap s + | [s1; s2] -> let snapshot_visited = ref !visited in begin + debug "\t\tSplit at %d: either %d or %d\n" (P.to_int src) (P.to_int s1) (P.to_int s2); + (* Remembering that we tried the ifso node *) + cb_info := PTree.set src true !cb_info; + match explore_wrap s1 with + | true -> ( + visited := !snapshot_visited; + match explore_wrap s2 with + | true -> begin + (* Both paths lead to a loop: we cannot predict the CB + * (but the explore still succeeds) *) + cb_info := PTree.remove src !cb_info; + true + end + | false -> true (* nothing to do, the explore succeeded *) + ) + | false -> begin + cb_info := PTree.set src false !cb_info; + match explore_wrap s2 with + | true -> true + | false -> (cb_info := PTree.remove src !cb_info; false) + end + end + | _ -> false + end + and explore_wrap src = begin + match PTree.get src !memory with + | Some b -> b + | None -> + let result = explore_rec src in + (memory := PTree.set src result !memory; result) + end in explore_wrap src + (* Goes forward until a CB is encountered + * Returns None if no CB was found, or Some the_cb_node + * Marks nodes as visited along the way *) in let rec advance_to_cb src = if (get_some @@ PTree.get src !visited) then None else begin @@ -225,44 +369,53 @@ let get_loop_info is_loop_header bfs_order code = | Ijumptable _ | Itailcall _ | Ireturn _ -> None end in begin - debug "Marking path from %d to %d\n" (P.to_int n) (P.to_int s); - match advance_to_cb s with - | None -> (debug "Nothing found\n") - | Some s -> ( debug "Advancing to %d\n" (P.to_int s); + debug "Attempting to find natural loop from HEAD %d..\n" (P.to_int n); + match advance_to_cb n with + | None -> (debug "\tNo CB found\n") + | Some s -> ( debug "\tFound a CB! %d\n" (P.to_int s); match get_some @@ PTree.get s !loop_info with | None | Some _ -> begin match get_some @@ PTree.get s code with - | Icond (_, _, n1, n2, _) -> + | Icond (_, _, n1, n2, _) -> ( let b1 = explore n1 n in let b2 = explore n2 n in - if (b1 && b2) then (debug "both true\n") - else if b1 then (debug "true privileged\n"; loop_info := PTree.set s (Some true) !loop_info) - else if b2 then (debug "false privileged\n"; loop_info := PTree.set s (Some false) !loop_info) - else (debug "none true\n") - | _ -> ( debug "not an icond\n" ) + if (b1 && b2) then + debug "\tBoth paths lead back to the head: NONE\n" + else if (b1 || b2) then begin + if b1 then begin + debug "\tTrue path leads to the head: TRUE\n"; + loop_info := PTree.set s (Some true) !loop_info; + end else if b2 then begin + debug "\tFalse path leads to the head: FALSE\n"; + loop_info := PTree.set s (Some false) !loop_info + end; + debug "\tSetting other CBs encountered..\n"; + List.iter (fun (cb, dir) -> + debug "\t\t%d is %B\n" (P.to_int cb) dir; + loop_info := PTree.set cb (Some dir) !loop_info + ) (PTree.elements !cb_info) + end else + debug "\tNo path leads back to the head: NONE\n" + ) + | _ -> failwith "\tNot an Icond\n" end (* | Some _ -> ( debug "already loop info there\n" ) FIXME - we don't know yet whether a branch to a loop head is a backedge or not *) ) end - in begin - List.iter (fun n -> - match get_some @@ PTree.get n code with - | Inop s | Iop (_,_,_,s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s) - | Ibuiltin (_, _, _, s) -> - if get_some @@ PTree.get s is_loop_header then mark_path s n - | Icond _ -> () (* loop backedges are never Icond in CompCert RTL.3 *) - | Ijumptable _ -> () - | Itailcall _ | Ireturn _ -> () - ) bfs_order; + in let iloops = get_inner_loops f code is_loop_header in + begin + List.iter (fun il -> mark_path il.head il.body) iloops; + (* List.iter mark_path @@ List.filter (fun n -> get_some @@ PTree.get n is_loop_header) bfs_order; *) + debug "==================================\n"; !loop_info end (* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *) -let get_directions code entrypoint = begin +let get_directions f code entrypoint = begin debug "get_directions\n"; let bfs_order = bfs code entrypoint in let is_loop_header = get_loop_headers code entrypoint in - let loop_info = get_loop_info is_loop_header bfs_order code in + let loop_info = get_loop_info f is_loop_header bfs_order code in let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *) begin (* ptree_printbool is_loop_header; *) @@ -312,9 +465,9 @@ let rec update_direction_rec directions = function in PTree.set n (update_direction direction i) (update_direction_rec directions lm) (* Uses branch prediction to write prediction annotations in Icond *) -let update_directions code entrypoint = begin +let update_directions f code entrypoint = begin debug "Update_directions\n"; - let directions = get_directions code entrypoint + let directions = get_directions f code entrypoint in begin (* debug "Ifso directions: "; ptree_printbool directions; @@ -545,7 +698,8 @@ let is_a_nop code n = * preds: mapping node -> predecessors * ptree: the revmap * trace: the trace to follow tail duplication on *) -let tail_duplicate code preds ptree trace = +let tail_duplicate code preds is_loop_header ptree trace = + debug "Tail_duplicate on that trace: %a\n" print_trace trace; (* next_int: unused integer that can be used for the next duplication *) let next_int = ref (next_free_pc code) (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *) @@ -560,7 +714,12 @@ let tail_duplicate code preds ptree trace = if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *) else let node_preds = ptree_get_some n preds - in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds + in let node_preds_nolast = + (* We traverse loop headers without initiating tail duplication + * (see case of two imbricated loops) *) + if (get_some @@ PTree.get n is_loop_header) then [] + else List.filter (fun e -> e <> get_some !last_node) node_preds + (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *) in let final_node_preds = match !last_duplicate with | None -> node_preds_nolast | Some n' -> n' :: node_preds_nolast @@ -581,12 +740,12 @@ let tail_duplicate code preds ptree trace = in let new_code, new_ptree = f code ptree true trace in (new_code, new_ptree, !nb_duplicated) -let superblockify_traces code preds traces ptree = +let superblockify_traces code preds is_loop_header traces ptree = let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *) in let rec f code ptree = function | [] -> (code, ptree, 0) | trace :: traces -> - let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace + let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace in if (nb_duplicated < max_nb_duplicated) then (debug "End duplication\n"; f new_code new_ptree traces) else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0)) @@ -611,19 +770,21 @@ let invert_iconds code = * cyclic dependencies between LICMaux and Duplicateaux *) -type innerLoop = { - preds: P.t list; - body: P.t list; - head: P.t; (* head of the loop *) - finals: P.t list (* the final instructions, which loops back to the head *) - (* There may be more than one ; for instance if there is an if inside the loop with both - * branches leading to a goto backedge *) -} -let print_pset = LICMaux.pp_pset + +let print_option_pint oc o = + if !debug_flag then + match o with + | None -> Printf.fprintf oc "None" + | Some n -> Printf.fprintf oc "Some %d" (P.to_int n) let print_inner_loop iloop = - debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body + debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n" + print_intlist iloop.preds + print_intlist iloop.body + (P.to_int iloop.head) + print_intlist iloop.finals + print_option_pint iloop.sb_final let rec print_inner_loops = function | [] -> () @@ -633,16 +794,6 @@ let rec print_inner_loops = function print_inner_loops iloops end -let print_ptree printer pt = - let elements = PTree.elements pt in - begin - debug "[\n"; - List.iter (fun (n, elt) -> - debug "\t%d: %a\n" (P.to_int n) printer elt - ) elements; - debug "]\n" - end - let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else () let cb_exit_node = function @@ -692,41 +843,6 @@ let get_inner_loops f code is_loop_header = !iloops *) -let get_inner_loops f code is_loop_header = - let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; - fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in - let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in - begin - debug "PREDMAP: "; print_ptree print_intlist predmap; - debug "LOOPMAP: "; print_ptree print_pset loopmap; - List.map (fun (n, body) -> - let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) - @@ get_some @@ PTree.get n predmap in - let head = (* the instruction from body which is a loop header *) - let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter - (fun n -> ptree_get_some n is_loop_header) body in - begin - assert (List.length heads == 1); - List.hd heads - end in - let finals = (* the predecessors from head that are in the body *) - let head_preds = ptree_get_some head predmap in - let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in - begin - debug "HEAD: %d\n" (P.to_int head); - debug "BODY: %a\n" print_pset body; - debug "HEADPREDS: %a\n" print_intlist head_preds; - filtered - end in - { preds = preds; body = (HashedSet.PSet.elements body); head = head; finals = finals } - ) - (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction - * We remove those to get just the inner loops *) - @@ List.filter (fun (n, body) -> - let count = List.length @@ HashedSet.PSet.elements body in count != 1 - ) (PTree.elements loopmap) - end - let rec generate_fwmap ln ln' fwmap = match ln with | [] -> begin @@ -836,31 +952,50 @@ let unroll_inner_loops_single f code revmap = (!code', !revmap') end +let is_some o = match o with Some _ -> true | None -> false + +let rec go_through_predicted code start final = + if start == final then + Some [start] + else + match rtl_successors_pref @@ get_some @@ PTree.get start code with + | [n] -> ( + match go_through_predicted code n final with + | Some ln -> Some (start :: ln) + | None -> None + ) + | _ -> None + (* Unrolls the body of the inner loop once - duplicating the exit condition as well * 1) Clones body into body' - * 2) Links the last instruction of body into the first of body' + * 2) Links the last instruction of body (sb_final) into the first of body' * 3) Links the last instruction of body' into the first of body *) let unroll_inner_loop_body code revmap iloop = + debug "iloop = "; print_inner_loop iloop; let body = iloop.body in let limit = !Clflags.option_funrollbody in if count_ignore_nops code body > limit then begin - debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit; + debug "Too many nodes in the loop body (%d > %d)\n" (List.length body) limit; + (code, revmap) + end else if not @@ is_some iloop.sb_final then begin + debug "The loop body does not form a superblock OR we have predicted that we do not loop\n"; (code, revmap) end else - let (code2, revmap2, dupbody, fwmap) = clone code revmap body in + let sb_final = get_some @@ iloop.sb_final in + let sb_body = get_some @@ go_through_predicted code iloop.head sb_final in + let (code2, revmap2, dupbody, fwmap) = clone code revmap sb_body in let code' = ref code2 in let head' = apply_map fwmap (iloop.head) in - let finals' = apply_map_list fwmap (iloop.finals) in + let sb_final' = apply_map fwmap sb_final in begin - code' := change_pointers !code' iloop.head head' iloop.finals; - code' := change_pointers !code' head' iloop.head finals'; + code' := change_pointers !code' iloop.head head' [sb_final]; + code' := change_pointers !code' head' iloop.head [sb_final']; (!code', revmap2) end let unroll_inner_loops_body f code revmap = let is_loop_header = get_loop_headers code (f.fn_entrypoint) in - (* debug_flag := true; *) let inner_loops = get_inner_loops f code is_loop_header in debug "Number of loops found: %d\n" (List.length inner_loops); let code' = ref code in @@ -870,7 +1005,7 @@ let unroll_inner_loops_body f code revmap = List.iter (fun iloop -> let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in code' := new_code; revmap' := new_revmap - ) inner_loops; (* debug_flag := false; *) + ) inner_loops; (!code', !revmap') end @@ -930,7 +1065,7 @@ let static_predict f = let revmap = make_identity_ptree code in let code = if !Clflags.option_fpredict then - update_directions code entrypoint + update_directions f code entrypoint else code in let code = if !Clflags.option_fpredict then @@ -966,6 +1101,7 @@ let tail_duplicate f = if !Clflags.option_ftailduplicate > 0 then let traces = select_traces code entrypoint in let preds = get_predecessors_rtl code in - superblockify_traces code preds traces revmap + let is_loop_header = get_loop_headers code entrypoint in + superblockify_traces code preds is_loop_header traces revmap else (code, revmap) in ((code, entrypoint), revmap) |