(* *************************************************************) (* *) (* The Compcert verified compiler *) (* *) (* Sylvain Boulmé Grenoble-INP, VERIMAG *) (* David Monniaux CNRS, VERIMAG *) (* Cyril Six Kalray *) (* *) (* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) (* This file is distributed under the terms of the INRIA *) (* Non-Commercial License Agreement. *) (* *) (* *************************************************************) (* Oracle for Duplicate pass. * - Add static prediction information to Icond nodes * - Performs tail duplication on interesting traces to form superblocks * - Unrolls a single iteration of innermost loops * - (TODO: perform partial loop unrolling inside innermost loops) *) open RTL open Maps open Camlcoq open DebugPrint open RTLcommonaux let stats_oc = ref None let set_stats_oc () = try let name = Sys.getenv "COMPCERT_PREDICT_STATS" in let oc = open_out_gen [Open_append; Open_creat; Open_text] 0o666 name in stats_oc := Some oc with Not_found -> () (* number of total CBs *) let stats_nb_total = ref 0 (* we predicted the same thing as the profiling *) let stats_nb_correct_predicts = ref 0 (* we predicted something (say Some true), but the profiling predicted the opposite (say Some false) *) let stats_nb_mispredicts = ref 0 (* we did not predict anything (None) even though the profiling did predict something *) let stats_nb_missed_opportunities = ref 0 (* we predicted something (say Some true) but the profiling preferred not to predict anything (None) *) let stats_nb_overpredict = ref 0 (* heuristic specific counters *) let wrong_opcode = ref 0 let wrong_return = ref 0 let wrong_loop2 = ref 0 let wrong_call = ref 0 let right_opcode = ref 0 let right_return = ref 0 let right_loop2 = ref 0 let right_call = ref 0 let reset_stats () = begin stats_nb_total := 0; stats_nb_correct_predicts := 0; stats_nb_mispredicts := 0; stats_nb_missed_opportunities := 0; stats_nb_overpredict := 0; wrong_opcode := 0; wrong_return := 0; wrong_loop2 := 0; wrong_call := 0; right_opcode := 0; right_return := 0; right_loop2 := 0; right_call := 0; end let incr theref = theref := !theref + 1 let has_some o = match o with Some _ -> true | None -> false let stats_oc_recording () = has_some !stats_oc let write_stats_oc () = match !stats_oc with | None -> () | Some oc -> begin Printf.fprintf oc "%d %d %d %d %d %d %d %d %d %d %d %d %d\n" !stats_nb_total !stats_nb_correct_predicts !stats_nb_mispredicts !stats_nb_missed_opportunities !stats_nb_overpredict !wrong_opcode !wrong_return !wrong_loop2 !wrong_call !right_opcode !right_return !right_loop2 !right_call ; close_out oc end let get_loop_headers = LICMaux.get_loop_headers let rtl_successors = LICMaux.rtl_successors (* Get list of nodes following a BFS of the code *) (* Stops when predicate is reached * Excludes any node given in excluded function *) let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin debug "bfs\n"; let visited = ref (PTree.map (fun n i -> false) code) and bfs_list = ref [] and to_visit = Queue.create () and node = ref entrypoint in begin Queue.add entrypoint to_visit; while not (Queue.is_empty to_visit) do node := Queue.pop to_visit; if (not (get_some @@ PTree.get !node !visited)) then begin visited := PTree.set !node true !visited; if not (excluded !node) then begin match PTree.get !node code with | None -> failwith "No such node" | Some i -> bfs_list := !node :: !bfs_list; if not (predicate !node) then let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ end end done; List.rev !bfs_list end end let bfs code entrypoint = bfs_until code entrypoint (fun _ -> false) (fun _ -> false) let optbool o = match o with Some _ -> true | None -> false let ptree_get_some n ptree = get_some @@ PTree.get n ptree (* Returns a PTree: node -> list of the predecessors of that node *) let get_predecessors_rtl code = begin debug "get_predecessors_rtl\n"; let preds = ref (PTree.map (fun n i -> []) code) in let process_inst (node, i) = let succ = rtl_successors i in List.iter (fun s -> let previous_preds = ptree_get_some s !preds in if optbool @@ List.find_opt (fun e -> e == node) previous_preds then () else preds := PTree.set s (node::previous_preds) !preds) succ in begin List.iter process_inst (PTree.elements code); !preds end end module PInt = struct type t = P.t let compare x y = compare (P.to_int x) (P.to_int y) end module PSet = Set.Make(PInt) let print_intset s = let seq = PSet.to_seq s in begin if !debug_flag then begin Printf.printf "{"; Seq.iter (fun n -> Printf.printf "%d " (P.to_int n) ) seq; Printf.printf "}" end end (* Looks ahead (until a branch) to see if a node further down verifies * the given predicate *) let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate = if (predicate node) then true else match (successors @@ get_some @@ PTree.get node code) with | [n] -> if (predicate n) then true else ( if (get_some @@ PTree.get n is_loop_header) then false else look_ahead_gen successors code n is_loop_header predicate ) | _ -> false let look_ahead = look_ahead_gen rtl_successors (** * Heuristics mostly based on the paper Branch Prediction for Free *) let do_call_heuristic code cond ifso ifnot is_loop_header = begin debug "\tCall heuristic..\n"; let predicate n = (function | Icall _ -> true | _ -> false) @@ get_some @@ PTree.get n code in let ifso_call = look_ahead code ifso is_loop_header predicate in let ifnot_call = look_ahead code ifnot is_loop_header predicate in if ifso_call && ifnot_call then None else if ifso_call then Some false else if ifnot_call then Some true else None end let do_opcode_heuristic code cond ifso ifnot is_loop_header = begin debug "\tOpcode heuristic..\n"; DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot is_loop_header end let do_return_heuristic code cond ifso ifnot is_loop_header = begin debug "\tReturn heuristic..\n"; let predicate n = (function | Ireturn _ -> true | _ -> false) @@ get_some @@ PTree.get n code in let ifso_return = look_ahead code ifso is_loop_header predicate in let ifnot_return = look_ahead code ifnot is_loop_header predicate in if ifso_return && ifnot_return then None else if ifso_return then Some false else if ifnot_return then Some true else None end let do_store_heuristic code cond ifso ifnot is_loop_header = begin debug "\tStore heuristic..\n"; let predicate n = (function | Istore _ -> true | _ -> false) @@ get_some @@ PTree.get n code in let ifso_store = look_ahead code ifso is_loop_header predicate in let ifnot_store = look_ahead code ifnot is_loop_header predicate in if ifso_store && ifnot_store then None else if ifso_store then Some false else if ifnot_store then Some true else None end let do_loop_heuristic code cond ifso ifnot is_loop_header = begin debug "\tLoop heuristic..\n"; let predicate n = get_some @@ PTree.get n is_loop_header in let ifso_loop = look_ahead code ifso is_loop_header predicate in let ifnot_loop = look_ahead code ifnot is_loop_header predicate in if ifso_loop && ifnot_loop then (debug "\t\tLOOP but can't choose which\n"; None) (* TODO - take the innermost loop ? *) else if ifso_loop then Some true else if ifnot_loop then Some false else None end let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header = begin debug "\tLoop2 heuristic..\n"; match get_some @@ PTree.get n loop_info with | None -> None | Some b -> Some b end (** Innermost loop detection *) type innerLoop = { preds: P.t list; body: P.t list; head: P.t; (* head of the loop *) finals: P.t list; (* the final instructions, which loops back to the head *) (* There may be more than one ; for instance if there is an if inside the loop with both * branches leading to a goto backedge * Such cases usually happen after a tail-duplication *) sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *) (* may be None if we predict that we do not loop *) } let print_pset = LICMaux.pp_pset let rtl_successors_pref = function | Itailcall _ | Ireturn _ -> [] | Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n] | Icond (_,_,n1,n2,p) -> (match p with | Some true -> [n1] | Some false -> [n2] | None -> [n1; n2]) | Ijumptable (_,ln) -> ln (* Find the last node of a trace (starting at "node"), until a loop is encountered. * If a non-predicted branch is encountered, returns None *) let rec find_last_node_before_loop code node trace is_loop_header = let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in let headers = List.filter (fun n -> get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in match headers with | [] -> ( let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in match next_nodes with | [n] -> ( (* To prevent getting out of the superblock and loop infinitely when the prediction is false *) if HashedSet.PSet.contains trace n then find_last_node_before_loop code n trace is_loop_header else None ) | _ -> None (* May happen when we predict that a loop is not taken *) ) | [h] -> Some node | _ -> failwith "Multiple branches leading to a loop" (* The computation of sb_final requires to already have branch prediction *) let get_inner_loops f code is_loop_header = let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in begin debug "PREDMAP: "; print_ptree print_intlist predmap; debug "LOOPMAP: "; print_ptree print_pset loopmap; List.map (fun (n, body) -> let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) @@ get_some @@ PTree.get n predmap in let head = (* the instruction from body which is a loop header *) let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter (fun n -> ptree_get_some n is_loop_header) body in begin assert (List.length heads == 1); List.hd heads end in let finals = (* the predecessors from head that are in the body *) let head_preds = ptree_get_some head predmap in let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in begin debug "HEAD: %d\n" (P.to_int head); debug "BODY: %a\n" print_pset body; debug "HEADPREDS: %a\n" print_intlist head_preds; filtered end in let sb_final = find_last_node_before_loop code head body is_loop_header in let body = HashedSet.PSet.elements body in { preds = preds; body = body; head = head; finals = finals; sb_final = sb_final; } ) (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction * We remove those to get just the inner loops *) @@ List.filter (fun (n, body) -> let count = List.length @@ HashedSet.PSet.elements body in count != 1 ) (PTree.elements loopmap) end let get_loop_bodies code entrypoint = let predecessors = get_predecessors_rtl code in (* Algorithm from Muchnik, Compiler Design & Implementation, Figure 7.21 page 192 *) let natural_loop n m = debug "Natural Loop from %d to %d\n" (P.to_int n) (P.to_int m); let in_body = ref (PTree.map (fun n b -> false) code) in let body = ref [] in let add_to_body n = begin in_body := PTree.set n true !in_body; body := n :: !body end in let rec process_node p = debug " Processing node %d\n" (P.to_int p); List.iter (fun pred -> debug " Looking at predecessor of %d: %d\n" (P.to_int p) (P.to_int pred); let is_in_body = get_some @@ PTree.get pred !in_body in if (not @@ is_in_body) then begin debug " --> adding to body\n"; add_to_body pred; process_node pred end ) (get_some @@ PTree.get p predecessors) in begin add_to_body m; add_to_body n; (if (m != n) then process_node m); !body end in let option_natural_loop n = function | None -> None | Some m -> Some (natural_loop n m) in PTree.map option_natural_loop (LICMaux.get_loop_backedges code entrypoint) (* Returns a PTree of either None or Some b where b determines the node in the loop body, for a cb instruction *) let get_loop_info f is_loop_header bfs_order code = let loop_info = ref (PTree.map (fun n i -> None) code) in let mark_body body = List.iter (fun n -> match get_some @@ PTree.get n code with | Icond (_, _, ifso, ifnot, _) -> begin match PTree.get n !loop_info with | None -> () | Some _ -> let b1 = List.mem ifso body in let b2 = List.mem ifnot body in if (b1 && b2) then () else if (b1 || b2) then begin if b1 then loop_info := PTree.set n (Some true) !loop_info else if b2 then loop_info := PTree.set n (Some false) !loop_info end end | _ -> () ) body in let bodymap = get_loop_bodies code f.fn_entrypoint in List.iter (fun (_,obody) -> match obody with | None -> () | Some body -> mark_body body ) (PTree.elements bodymap); !loop_info (* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *) let get_directions f code entrypoint = begin debug "get_directions\n"; let bfs_order = bfs code entrypoint in let is_loop_header = get_loop_headers code entrypoint in let loop_info = get_loop_info f is_loop_header bfs_order code in let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *) begin (* ptree_printbool is_loop_header; *) (* debug "\n"; *) List.iter (fun n -> match (get_some @@ PTree.get n code) with | Icond (cond, lr, ifso, ifnot, pred) -> begin if stats_oc_recording () || not @@ has_some pred then (* debug "Analyzing %d.." (P.to_int n); *) let heuristics = [ do_opcode_heuristic; do_return_heuristic; do_loop2_heuristic loop_info n; (* do_loop_heuristic; *) do_call_heuristic; (* do_store_heuristic *) ] in let preferred = ref None in let current_heuristic = ref 0 in begin debug "Deciding condition for RTL node %d\n" (P.to_int n); List.iter (fun do_heur -> match !preferred with | None -> begin preferred := do_heur code cond ifso ifnot is_loop_header; if stats_oc_recording () then begin (* Getting stats about mispredictions from each heuristic *) (match !preferred, pred with | Some false, Some true | Some true, Some false (* | Some _, None *) (* Uncomment for overpredicts *) -> begin match !current_heuristic with | 0 -> incr wrong_opcode | 1 -> incr wrong_return | 2 -> incr wrong_loop2 | 3 -> incr wrong_call | _ -> failwith "Shouldn't happen" end | Some false, Some false | Some true, Some true -> begin match !current_heuristic with | 0 -> incr right_opcode | 1 -> incr right_return | 2 -> incr right_loop2 | 3 -> incr right_call | _ -> failwith "Shouldn't happen" end | _ -> () ); incr current_heuristic end end | Some _ -> () ) heuristics; directions := PTree.set n !preferred !directions; (match !preferred with | Some false -> debug "\tFALLTHROUGH\n" | Some true -> debug "\tBRANCH\n" | None -> debug "\tUNSURE\n"); debug "---------------------------------------\n" end end | _ -> () ) bfs_order; !directions end end let update_direction direction = function | Icond (cond, lr, n, n', pred) -> begin (* Counting stats from profiling *) if stats_oc_recording () then begin incr stats_nb_total; match pred, direction with | None, None -> incr stats_nb_correct_predicts | None, Some _ -> incr stats_nb_overpredict | Some _, None -> incr stats_nb_missed_opportunities | Some false, Some false -> incr stats_nb_correct_predicts | Some false, Some true -> incr stats_nb_mispredicts | Some true, Some false -> incr stats_nb_mispredicts | Some true, Some true -> incr stats_nb_correct_predicts end; (* only update if there is no prior existing branch prediction *) (match pred with | None -> Icond (cond, lr, n, n', direction) | Some _ -> begin Icond (cond, lr, n, n', pred) end ) end | i -> i (* Uses branch prediction to write prediction annotations in Icond *) let update_directions f code entrypoint = begin debug "Update_directions\n"; let directions = get_directions f code entrypoint in let code' = ref code in begin debug "Get Directions done, now proceeding to update all direction information..\n"; (* debug "Ifso directions: "; ptree_printbool directions; debug "\n"; *) List.iter (fun (n, i) -> let direction = get_some @@ PTree.get n directions in code' := PTree.set n (update_direction direction i) !code' ) (PTree.elements code); !code' end end (** Trace selection *) let rec exists_false_rec = function | [] -> false | m::lm -> let (_, b) = m in if b then exists_false_rec lm else true let exists_false boolmap = exists_false_rec (PTree.elements boolmap) (* DFS using prediction info to guide the exploration *) let dfs code entrypoint = begin debug "dfs\n"; let visited = ref (PTree.map (fun n i -> false) code) in let rec dfs_list code = function | [] -> [] | node :: ln -> if get_some @@ PTree.get node !visited then dfs_list code ln else begin visited := PTree.set node true !visited; let next_nodes = (match get_some @@ PTree.get node code with | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n) | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> [n] | Ijumptable (_, ln) -> ln | Itailcall _ | Ireturn _ -> [] | Icond (_, _, n1, n2, info) -> (match info with | Some false -> [n2; n1] | _ -> [n1; n2] ) ) in node :: dfs_list code (next_nodes @ ln) end in dfs_list code [entrypoint] end let rec select_unvisited_node is_visited = function | [] -> failwith "Empty list" | n :: ln -> if not (ptree_get_some n is_visited) then n else select_unvisited_node is_visited ln let best_successor_of node code is_visited = match (PTree.get node code) with | None -> failwith "No such node in the code" | Some i -> let next_node = match i with | Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore(_,_,_,_,n) | Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> Some n | Icond (_, _, n1, n2, ob) -> (match ob with None -> None | Some false -> Some n2 | Some true -> Some n1) | _ -> None in match next_node with | None -> None | Some n -> if not (ptree_get_some n is_visited) then Some n else None (* FIXME - could be improved by selecting in priority the predicted paths *) let best_predecessor_of node predecessors code order is_visited = match (PTree.get node predecessors) with | None -> failwith "No predecessor list found" | Some lp -> try Some (List.find (fun n -> if (List.mem n lp) && (not (ptree_get_some n is_visited)) then match ptree_get_some n code with | Icond (_, _, n1, n2, ob) -> (match ob with | None -> false | Some false -> n == n2 | Some true -> n == n1 ) | _ -> true else false ) order) with Not_found -> None let print_trace = print_intlist let print_traces oc traces = let rec f oc = function | [] -> () | t::lt -> Printf.fprintf oc "\n\t%a,\n%a" print_trace t f lt in begin if !debug_flag then Printf.fprintf oc "Traces: {%a}\n" f traces end (* Dumb (but linear) trace selection *) let select_traces_linear code entrypoint = let is_visited = ref (PTree.map (fun n i -> false) code) in let bfs_order = bfs code entrypoint in let rec go_through node = begin is_visited := PTree.set node true !is_visited; let next_node = match (get_some @@ PTree.get node code) with | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n) | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> Some n | Ijumptable _ | Itailcall _ | Ireturn _ -> None | Icond (_, _, n1, n2, info) -> (match info with | Some false -> Some n2 | Some true -> Some n1 | None -> None ) in match next_node with | None -> [node] | Some n -> if not (get_some @@ PTree.get n !is_visited) then node :: go_through n else [node] end in let traces = ref [] in begin List.iter (fun n -> if not (get_some @@ PTree.get n !is_visited) then traces := (go_through n) :: !traces ) bfs_order; !traces end (* Algorithm mostly inspired from Chang and Hwu 1988 * "Trace Selection for Compiling Large C Application Programs to Microcode" *) let select_traces_chang code entrypoint = begin debug "select_traces\n"; let order = dfs code entrypoint in let predecessors = get_predecessors_rtl code in let traces = ref [] in let is_visited = ref (PTree.map (fun n i -> false) code) in begin (* mark all nodes visited *) debug "Length: %d\n" (List.length order); while exists_false !is_visited do (* while (there are unvisited nodes) *) let seed = select_unvisited_node !is_visited order in let trace = ref [seed] in let current = ref seed in begin is_visited := PTree.set seed true !is_visited; (* mark seed visited *) let quit_loop = ref false in begin while not !quit_loop do let s = best_successor_of !current code !is_visited in match s with | None -> quit_loop := true (* if (s==0) exit loop *) | Some succ -> begin trace := !trace @ [succ]; is_visited := PTree.set succ true !is_visited; (* mark s visited *) current := succ end done; current := seed; quit_loop := false; while not !quit_loop do let s = best_predecessor_of !current predecessors code order !is_visited in match s with | None -> quit_loop := true (* if (s==0) exit loop *) | Some pred -> begin trace := pred :: !trace; is_visited := PTree.set pred true !is_visited; (* mark s visited *) current := pred end done; traces := !trace :: !traces; end end done; (* debug "DFS: \t"; print_intlist order; debug "\n"; *) debug "Traces: %a" print_traces !traces; !traces end end let select_traces code entrypoint = let length = List.length @@ PTree.elements code in if (length < 5000) then select_traces_chang code entrypoint else select_traces_linear code entrypoint let rec make_identity_ptree_rec = function | [] -> PTree.empty | m::lm -> let (n, _) = m in PTree.set n n (make_identity_ptree_rec lm) let make_identity_ptree code = make_identity_ptree_rec (PTree.elements code) (* Change the pointers of nodes to point to n' instead of n *) let rec change_pointers code n n' = function | [] -> code | node :: nodes -> let new_pred_inst = match ptree_get_some node code with | Icall(a, b, c, d, n0) -> assert (n0 = n); Icall(a, b, c, d, n') | Ibuiltin(a, b, c, n0) -> assert (n0 = n); Ibuiltin(a, b, c, n') | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e = n) ln); Ijumptable(a, List.map (fun e -> if (e = n) then n' else e) ln) | Icond(a, b, n1, n2, i) -> assert (n1 = n || n2 = n); let n1' = if (n1 = n) then n' else n1 in let n2' = if (n2 = n) then n' else n2 in Icond(a, b, n1', n2', i) | Inop n0 -> assert (n0 = n); Inop n' | Iop (a, b, c, n0) -> assert (n0 = n); Iop (a, b, c, n') | Iload (a, b, c, d, e, n0) -> assert (n0 = n); Iload (a, b, c, d, e, n') | Istore (a, b, c, d, n0) -> assert (n0 = n); Istore (a, b, c, d, n') | Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor" in let new_code = PTree.set node new_pred_inst code in change_pointers new_code n n' nodes (* parent: parent of n to keep as parent * preds: all the other parents of n * n': the integer which should contain the duplicate of n * returns: new code, new ptree *) let duplicate code ptree parent n preds n' = debug "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n'); match PTree.get n' code with | Some _ -> failwith "The PTree already has a node n'" | None -> let c' = change_pointers code n n' preds in let new_code = PTree.set n' (ptree_get_some n code) c' and new_ptree = PTree.set n' n ptree in (new_code, new_ptree) let rec maxint = function | [] -> 0 | i :: l -> assert (i >= 0); let m = maxint l in if i > m then i else m let is_empty = function | [] -> true | _ -> false let next_free_pc code = maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1 let is_a_nop code n = match get_some @@ PTree.get n code with | Inop _ -> true | _ -> false (* code: RTL code * preds: mapping node -> predecessors * ptree: the revmap * trace: the trace to follow tail duplication on *) let tail_duplicate code preds is_loop_header ptree trace = debug "Tail_duplicate on that trace: %a\n" print_trace trace; (* next_int: unused integer that can be used for the next duplication *) let next_int = ref (next_free_pc code) (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *) in let last_node = ref None in let last_duplicate = ref None in let nb_duplicated = ref 0 (* recursive function on a trace *) in let rec f code ptree is_first = function | [] -> (code, ptree) | n :: t -> let (new_code, new_ptree) = if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *) else let node_preds = ptree_get_some n preds in let node_preds_nolast = (* We traverse loop headers without initiating tail duplication * (see case of two imbricated loops) *) if (get_some @@ PTree.get n is_loop_header) then [] else List.filter (fun e -> e <> get_some !last_node) node_preds (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *) in let final_node_preds = match !last_duplicate with | None -> node_preds_nolast | Some n' -> n' :: node_preds_nolast in if not (is_empty final_node_preds) then let n' = !next_int in let (newc, newp) = duplicate code ptree !last_node n final_node_preds (P.of_int n') in begin next_int := !next_int + 1; (if not @@ is_a_nop code n then nb_duplicated := !nb_duplicated + 1); last_duplicate := Some (P.of_int n'); (newc, newp) end else (code, ptree) in begin last_node := Some n; f new_code new_ptree false t end in let new_code, new_ptree = f code ptree true trace in (new_code, new_ptree, !nb_duplicated) let superblockify_traces code preds is_loop_header traces ptree = let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *) in let rec f code ptree = function | [] -> (code, ptree, 0) | trace :: traces -> let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace in if (nb_duplicated < max_nb_duplicated) then (debug "End duplication\n"; f new_code new_ptree traces) else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0)) in let new_code, new_ptree, _ = f code ptree traces in (new_code, new_ptree) let invert_iconds code = PTree.map1 (fun i -> match i with | Icond (c, lr, ifso, ifnot, info) -> (match info with | Some true -> begin (* debug "Reversing ifso/ifnot for node %d\n" (P.to_int n); *) Icond (Op.negate_condition c, lr, ifnot, ifso, Some false) end | _ -> i) | _ -> i ) code (** Partial loop unrolling * * The following code seeks innermost loops, and unfolds the first iteration * Most of the code has been moved from LICMaux.ml to Duplicateaux.ml to solve * cyclic dependencies between LICMaux and Duplicateaux *) let print_inner_loop iloop = debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n" print_intlist iloop.preds print_intlist iloop.body (P.to_int iloop.head) print_intlist iloop.finals print_option_pint iloop.sb_final let rec print_inner_loops = function | [] -> () | iloop :: iloops -> begin print_inner_loop iloop; debug "\n"; print_inner_loops iloops end let cb_exit_node = function | Icond (_,_,n1,n2,p) -> begin match p with | Some true -> Some n2 | Some false -> Some n1 | None -> None end | _ -> None (* (* Alternative code to get inner_loops - use it if we suspect the other function to be bugged *) let get_natural_loop code predmap n = let is_final_node m = let successors = rtl_successors @@ get_some @@ PTree.get m code in List.exists (fun s -> (P.to_int s) == (P.to_int n)) successors in let excluded_node = cb_exit_node @@ get_some @@ PTree.get n code in let is_excluded m = match excluded_node with | None -> false | Some ex -> P.to_int ex == P.to_int m in debug "get_natural_loop for %d\n" (P.to_int n); let body = bfs_until code n is_final_node is_excluded in debug "BODY: %a\n" print_intlist body; let final = List.find is_final_node body in debug "FINAL: %d\n" (P.to_int final); let preds = List.filter (fun pred -> List.mem pred body) @@ get_some @@ PTree.get n predmap in debug "PREDS: %a\n" print_intlist preds; { preds = preds; body = body; head = n; final = final } let rec count_loop_headers is_loop_header = function | [] -> 0 | n :: ln -> let rem = count_loop_headers is_loop_header ln in if (get_some @@ PTree.get n is_loop_header) then rem + 1 else rem let get_inner_loops f code is_loop_header = let predmap = get_predecessors_rtl code in let iloops = ref [] in List.iter (fun (n, ilh) -> if ilh then begin let iloop = get_natural_loop code predmap n in let nb_headers = count_loop_headers is_loop_header iloop.body in if nb_headers == 1 then (* innermost loop *) iloops := iloop :: !iloops end ) (PTree.elements is_loop_header); !iloops *) let rec generate_fwmap ln ln' fwmap = match ln with | [] -> begin match ln' with | [] -> fwmap | _ -> failwith "ln and ln' have different lengths" end | n :: ln -> begin match ln' with | n' :: ln' -> generate_fwmap ln ln' (PTree.set n n' fwmap) | _ -> failwith "ln and ln' have different lengths" end let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap let apply_map fw n = P.of_int @@ ptree_get_some n fw let apply_map_list fw ln = List.map (apply_map fw) ln let apply_map_opt fw n = match PTree.get n fw with | Some n' -> P.of_int n' | None -> n let change_nexts fwmap = function | Icall (a, b, c, d, n) -> Icall (a, b, c, d, apply_map fwmap n) | Ibuiltin (a, b, c, n) -> Ibuiltin (a, b, c, apply_map fwmap n) | Ijumptable (a, ln) -> Ijumptable (a, List.map (apply_map_opt fwmap) ln) | Icond (a, b, n1, n2, i) -> Icond (a, b, apply_map_opt fwmap n1, apply_map_opt fwmap n2, i) | Inop n -> Inop (apply_map fwmap n) | Iop (a, b, c, n) -> Iop (a, b, c, apply_map fwmap n) | Iload (a, b, c, d, e, n) -> Iload (a, b, c, d, e, apply_map fwmap n) | Istore (a, b, c, d, n) -> Istore (a, b, c, d, apply_map fwmap n) | Itailcall (a, b, c) -> Itailcall (a, b, c) | Ireturn o -> Ireturn o (** Clone a list of instructions into free pc indexes * * The list of instructions should be contiguous, and not include any loop. * It is assumed that the first instruction of the list is the head. * Also, the last instruction of the list should be the loop backedge. * * Returns: (code', revmap', ln', fwmap) * code' is the updated code, after cloning * revmap' is the updated revmap * ln' is the list of the new indexes used to reference the cloned instructions * fwmap is a map from ln to ln' *) let clone code revmap ln = begin assert (List.length ln > 0); let head' = next_free_pc code in (* +head' to ensure we never overlap with the existing code *) let ln' = List.map (fun n -> n + head') @@ List.map P.to_int ln in let fwmap = generate_fwmap ln ln' PTree.empty in let revmap' = generate_revmap ln (List.map P.of_int ln') revmap in let code' = ref code in List.iter (fun n -> let instr = get_some @@ PTree.get n code in let instr' = change_nexts fwmap instr in code' := PTree.set (apply_map fwmap n) instr' !code' ) ln; (!code', revmap', ln', fwmap) end let rec count_ignore_nops code = function | [] -> 0 | n::ln -> let inst = get_some @@ PTree.get n code in match inst with | Inop _ -> count_ignore_nops code ln | _ -> 1 + count_ignore_nops code ln (* Unrolls a single interation of the inner loop * 1) Clones the body into body' * 2) Links the preds to the first instruction of body' * 3) Links the last instruction of body' into the first instruction of body *) let unroll_inner_loop_single code revmap iloop = let body = iloop.body in if count_ignore_nops code body > !Clflags.option_funrollsingle then begin debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle; (code, revmap) end else let (code2, revmap2, dupbody, fwmap) = clone code revmap body in let code' = ref code2 in let head' = apply_map fwmap (iloop.head) in let finals' = apply_map_list fwmap (iloop.finals) in begin debug "PREDS: %a\n" print_intlist iloop.preds; debug "IHEAD: %d\n" (P.to_int iloop.head); code' := change_pointers !code' (iloop.head) head' (iloop.preds); code' := change_pointers !code' head' (iloop.head) finals'; (!code', revmap2) end let unroll_inner_loops_single f code revmap = let is_loop_header = get_loop_headers code (f.fn_entrypoint) in let inner_loops = get_inner_loops f code is_loop_header in let code' = ref code in let revmap' = ref revmap in begin print_inner_loops inner_loops; List.iter (fun iloop -> let (new_code, new_revmap) = unroll_inner_loop_single !code' !revmap' iloop in code' := new_code; revmap' := new_revmap ) inner_loops; (!code', !revmap') end let is_some o = match o with Some _ -> true | None -> false let rec go_through_predicted code start final = if start == final then Some [start] else match rtl_successors_pref @@ get_some @@ PTree.get start code with | [n] -> ( match go_through_predicted code n final with | Some ln -> Some (start :: ln) | None -> None ) | _ -> None (* Unrolls the body of the inner loop once - duplicating the exit condition as well * 1) Clones body into body' * 2) Links the last instruction of body (sb_final) into the first of body' * 3) Links the last instruction of body' into the first of body *) let unroll_inner_loop_body code revmap iloop = debug "iloop = "; print_inner_loop iloop; let body = iloop.body in let limit = !Clflags.option_funrollbody in if count_ignore_nops code body > limit then begin debug "Too many nodes in the loop body (%d > %d)\n" (List.length body) limit; (code, revmap) end else if not @@ is_some iloop.sb_final then begin debug "The loop body does not form a superblock OR we have predicted that we do not loop\n"; (code, revmap) end else let sb_final = get_some @@ iloop.sb_final in let sb_body = get_some @@ go_through_predicted code iloop.head sb_final in let (code2, revmap2, dupbody, fwmap) = clone code revmap sb_body in let code' = ref code2 in let head' = apply_map fwmap (iloop.head) in let sb_final' = apply_map fwmap sb_final in begin code' := change_pointers !code' iloop.head head' [sb_final]; code' := change_pointers !code' head' iloop.head [sb_final']; (!code', revmap2) end let unroll_inner_loops_body f code revmap = let is_loop_header = get_loop_headers code (f.fn_entrypoint) in let inner_loops = get_inner_loops f code is_loop_header in debug "Number of loops found: %d\n" (List.length inner_loops); let code' = ref code in let revmap' = ref revmap in begin print_inner_loops inner_loops; List.iter (fun iloop -> let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in code' := new_code; revmap' := new_revmap ) inner_loops; (!code', !revmap') end let extract_upto_icond f code head = let rec extract h = let inst = get_some @@ PTree.get h code in match inst with | Icond _ -> [h] | _ -> ( match rtl_successors inst with | [n] -> h :: (extract n) | _ -> failwith "Found a node with more than one successor??" ) in List.rev @@ extract head let rotate_inner_loop f code revmap iloop = let header = extract_upto_icond f code iloop.head in let limit = !Clflags.option_flooprotate in let nb_duplicated = count_ignore_nops code header in if nb_duplicated > limit then begin debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit; (code, revmap) end else if nb_duplicated == count_ignore_nops code iloop.body then begin debug "The conditional branch is already at the end! No need to rotate."; (code, revmap) end else let (code2, revmap2, dupheader, fwmap) = clone code revmap header in let code' = ref code2 in let head' = apply_map fwmap iloop.head in begin code' := change_pointers !code' iloop.head head' iloop.preds; (!code', revmap2) end let rotate_inner_loops f code revmap = let is_loop_header = get_loop_headers code (f.fn_entrypoint) in let inner_loops = get_inner_loops f code is_loop_header in let code' = ref code in let revmap' = ref revmap in begin print_inner_loops inner_loops; List.iter (fun iloop -> let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in code' := new_code; revmap' := new_revmap ) inner_loops; (!code', !revmap') end let loop_rotate f = let entrypoint = f.fn_entrypoint in let code = f.fn_code in let revmap = make_identity_ptree code in let (code, revmap) = if !Clflags.option_flooprotate > 0 then rotate_inner_loops f code revmap else (code, revmap) in ((code, entrypoint), revmap) let static_predict f = let entrypoint = f.fn_entrypoint in let code = f.fn_code in let revmap = make_identity_ptree code in begin reset_stats (); set_stats_oc (); let code = if !Clflags.option_fpredict then update_directions f code entrypoint else code in write_stats_oc (); let code = if !Clflags.option_fpredict then invert_iconds code else code in ((code, entrypoint), revmap) end let unroll_single f = let entrypoint = f.fn_entrypoint in let code = f.fn_code in let revmap = make_identity_ptree code in let (code, revmap) = if !Clflags.option_funrollsingle > 0 then unroll_inner_loops_single f code revmap else (code, revmap) in ((code, entrypoint), revmap) let unroll_body f = let entrypoint = f.fn_entrypoint in let code = f.fn_code in let revmap = make_identity_ptree code in let (code, revmap) = if !Clflags.option_funrollbody > 0 then unroll_inner_loops_body f code revmap else (code, revmap) in ((code, entrypoint), revmap) let tail_duplicate f = let entrypoint = f.fn_entrypoint in let code = f.fn_code in let revmap = make_identity_ptree code in let (code, revmap) = if !Clflags.option_ftailduplicate > 0 then let traces = select_traces code entrypoint in let preds = get_predecessors_rtl code in let is_loop_header = get_loop_headers code entrypoint in superblockify_traces code preds is_loop_header traces revmap else (code, revmap) in ((code, entrypoint), revmap)