diff options
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r-- | backend/Duplicateaux.ml | 1137 |
1 files changed, 1137 insertions, 0 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml new file mode 100644 index 00000000..22bee067 --- /dev/null +++ b/backend/Duplicateaux.ml @@ -0,0 +1,1137 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* Oracle for Duplicate pass. + * - Add static prediction information to Icond nodes + * - Performs tail duplication on interesting traces to form superblocks + * - Unrolls a single iteration of innermost loops + * - (TODO: perform partial loop unrolling inside innermost loops) + *) + +open RTL +open Maps +open Camlcoq +open DebugPrint +open RTLcommonaux + +let stats_oc = ref None + +let set_stats_oc () = + try + let name = Sys.getenv "COMPCERT_PREDICT_STATS" in + let oc = open_out_gen [Open_append; Open_creat; Open_text] 0o666 name in + stats_oc := Some oc + with Not_found -> () + +(* number of total CBs *) +let stats_nb_total = ref 0 +(* we predicted the same thing as the profiling *) +let stats_nb_correct_predicts = ref 0 +(* we predicted something (say Some true), but the profiling predicted the opposite (say Some false) *) +let stats_nb_mispredicts = ref 0 +(* we did not predict anything (None) even though the profiling did predict something *) +let stats_nb_missed_opportunities = ref 0 +(* we predicted something (say Some true) but the profiling preferred not to predict anything (None) *) +let stats_nb_overpredict = ref 0 + +(* heuristic specific counters *) +let wrong_opcode = ref 0 +let wrong_return = ref 0 +let wrong_loop2 = ref 0 +let wrong_call = ref 0 + +let right_opcode = ref 0 +let right_return = ref 0 +let right_loop2 = ref 0 +let right_call = ref 0 + +let reset_stats () = begin + stats_nb_total := 0; + stats_nb_correct_predicts := 0; + stats_nb_mispredicts := 0; + stats_nb_missed_opportunities := 0; + stats_nb_overpredict := 0; + wrong_opcode := 0; + wrong_return := 0; + wrong_loop2 := 0; + wrong_call := 0; + right_opcode := 0; + right_return := 0; + right_loop2 := 0; + right_call := 0; +end + +let incr theref = theref := !theref + 1 + +let has_some o = match o with Some _ -> true | None -> false + +let stats_oc_recording () = has_some !stats_oc + +let write_stats_oc () = + match !stats_oc with + | None -> () + | Some oc -> begin + Printf.fprintf oc "%d %d %d %d %d %d %d %d %d %d %d %d %d\n" !stats_nb_total + !stats_nb_correct_predicts !stats_nb_mispredicts !stats_nb_missed_opportunities + !stats_nb_overpredict + !wrong_opcode !wrong_return !wrong_loop2 !wrong_call + !right_opcode !right_return !right_loop2 !right_call + ; + close_out oc + end + +let get_loop_headers = LICMaux.get_loop_headers +let rtl_successors = LICMaux.rtl_successors + +(* Get list of nodes following a BFS of the code *) +(* Stops when predicate is reached + * Excludes any node given in excluded function *) +let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin + debug "bfs\n"; + let visited = ref (PTree.map (fun n i -> false) code) + and bfs_list = ref [] + and to_visit = Queue.create () + and node = ref entrypoint + in begin + Queue.add entrypoint to_visit; + while not (Queue.is_empty to_visit) do + node := Queue.pop to_visit; + if (not (get_some @@ PTree.get !node !visited)) then begin + visited := PTree.set !node true !visited; + if not (excluded !node) then begin + match PTree.get !node code with + | None -> failwith "No such node" + | Some i -> + bfs_list := !node :: !bfs_list; + if not (predicate !node) then + let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ + end + end + done; + List.rev !bfs_list + end +end + +let bfs code entrypoint = bfs_until code entrypoint (fun _ -> false) (fun _ -> false) + +let optbool o = match o with Some _ -> true | None -> false + +let ptree_get_some n ptree = get_some @@ PTree.get n ptree + +(* Returns a PTree: node -> list of the predecessors of that node *) +let get_predecessors_rtl code = begin + debug "get_predecessors_rtl\n"; + let preds = ref (PTree.map (fun n i -> []) code) in + let process_inst (node, i) = + let succ = rtl_successors i + in List.iter (fun s -> + let previous_preds = ptree_get_some s !preds in + if optbool @@ List.find_opt (fun e -> e == node) previous_preds then () + else preds := PTree.set s (node::previous_preds) !preds) succ + in begin + List.iter process_inst (PTree.elements code); + !preds + end +end + +module PInt = struct + type t = P.t + let compare x y = compare (P.to_int x) (P.to_int y) +end + +module PSet = Set.Make(PInt) + +let print_intset s = + let seq = PSet.to_seq s + in begin + if !debug_flag then begin + Printf.printf "{"; + Seq.iter (fun n -> + Printf.printf "%d " (P.to_int n) + ) seq; + Printf.printf "}" + end + end + +(* Looks ahead (until a branch) to see if a node further down verifies + * the given predicate *) +let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate = + if (predicate node) then true + else match (successors @@ get_some @@ PTree.get node code) with + | [n] -> if (predicate n) then true + else ( + if (get_some @@ PTree.get n is_loop_header) then false + else look_ahead_gen successors code n is_loop_header predicate + ) + | _ -> false + +let look_ahead = look_ahead_gen rtl_successors + +(** + * Heuristics mostly based on the paper Branch Prediction for Free + *) + +let do_call_heuristic code cond ifso ifnot is_loop_header = + begin + debug "\tCall heuristic..\n"; + let predicate n = (function + | Icall _ -> true + | _ -> false) @@ get_some @@ PTree.get n code + in let ifso_call = look_ahead code ifso is_loop_header predicate + in let ifnot_call = look_ahead code ifnot is_loop_header predicate + in if ifso_call && ifnot_call then None + else if ifso_call then Some false + else if ifnot_call then Some true + else None + end + +let do_opcode_heuristic code cond ifso ifnot is_loop_header = + begin + debug "\tOpcode heuristic..\n"; + DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot is_loop_header + end + +let do_return_heuristic code cond ifso ifnot is_loop_header = + begin + debug "\tReturn heuristic..\n"; + let predicate n = (function + | Ireturn _ -> true + | _ -> false) @@ get_some @@ PTree.get n code + in let ifso_return = look_ahead code ifso is_loop_header predicate + in let ifnot_return = look_ahead code ifnot is_loop_header predicate + in if ifso_return && ifnot_return then None + else if ifso_return then Some false + else if ifnot_return then Some true + else None + end + +let do_store_heuristic code cond ifso ifnot is_loop_header = + begin + debug "\tStore heuristic..\n"; + let predicate n = (function + | Istore _ -> true + | _ -> false) @@ get_some @@ PTree.get n code + in let ifso_store = look_ahead code ifso is_loop_header predicate + in let ifnot_store = look_ahead code ifnot is_loop_header predicate + in if ifso_store && ifnot_store then None + else if ifso_store then Some false + else if ifnot_store then Some true + else None + end + +let do_loop_heuristic code cond ifso ifnot is_loop_header = + begin + debug "\tLoop heuristic..\n"; + let predicate n = get_some @@ PTree.get n is_loop_header in + let ifso_loop = look_ahead code ifso is_loop_header predicate in + let ifnot_loop = look_ahead code ifnot is_loop_header predicate in + if ifso_loop && ifnot_loop then (debug "\t\tLOOP but can't choose which\n"; None) (* TODO - take the innermost loop ? *) + else if ifso_loop then Some true + else if ifnot_loop then Some false + else None + end + +let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header = + begin + debug "\tLoop2 heuristic..\n"; + match get_some @@ PTree.get n loop_info with + | None -> None + | Some b -> Some b + end + +(** Innermost loop detection *) + +type innerLoop = { + preds: P.t list; + body: P.t list; + head: P.t; (* head of the loop *) + finals: P.t list; (* the final instructions, which loops back to the head *) + (* There may be more than one ; for instance if there is an if inside the loop with both + * branches leading to a goto backedge + * Such cases usually happen after a tail-duplication *) + sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *) + (* may be None if we predict that we do not loop *) +} + +let print_pset = LICMaux.pp_pset + +let rtl_successors_pref = function +| Itailcall _ | Ireturn _ -> [] +| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n) +| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n] +| Icond (_,_,n1,n2,p) -> (match p with + | Some true -> [n1] + | Some false -> [n2] + | None -> [n1; n2]) +| Ijumptable (_,ln) -> ln + +(* Find the last node of a trace (starting at "node"), until a loop is encountered. + * If a non-predicted branch is encountered, returns None *) +let rec find_last_node_before_loop code node trace is_loop_header = + let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in + let headers = List.filter (fun n -> + get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in + match headers with + | [] -> ( + let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in + match next_nodes with + | [n] -> ( + (* To prevent getting out of the superblock and loop infinitely when the prediction is false *) + if HashedSet.PSet.contains trace n then + find_last_node_before_loop code n trace is_loop_header + else None + ) + | _ -> None (* May happen when we predict that a loop is not taken *) + ) + | [h] -> Some node + | _ -> failwith "Multiple branches leading to a loop" + +(* The computation of sb_final requires to already have branch prediction *) +let get_inner_loops f code is_loop_header = + let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; + fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in + let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in + begin + debug "PREDMAP: "; print_ptree print_intlist predmap; + debug "LOOPMAP: "; print_ptree print_pset loopmap; + List.map (fun (n, body) -> + let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) + @@ get_some @@ PTree.get n predmap in + let head = (* the instruction from body which is a loop header *) + let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter + (fun n -> ptree_get_some n is_loop_header) body in + begin + assert (List.length heads == 1); + List.hd heads + end in + let finals = (* the predecessors from head that are in the body *) + let head_preds = ptree_get_some head predmap in + let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in + begin + debug "HEAD: %d\n" (P.to_int head); + debug "BODY: %a\n" print_pset body; + debug "HEADPREDS: %a\n" print_intlist head_preds; + filtered + end in + let sb_final = find_last_node_before_loop code head body is_loop_header in + let body = HashedSet.PSet.elements body in + { preds = preds; body = body; head = head; finals = finals; + sb_final = sb_final; } + ) + (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction + * We remove those to get just the inner loops *) + @@ List.filter (fun (n, body) -> + let count = List.length @@ HashedSet.PSet.elements body in count != 1 + ) (PTree.elements loopmap) + end + +let get_loop_bodies code entrypoint = + let predecessors = get_predecessors_rtl code in + (* Algorithm from Muchnik, Compiler Design & Implementation, Figure 7.21 page 192 *) + let natural_loop n m = + debug "Natural Loop from %d to %d\n" (P.to_int n) (P.to_int m); + let in_body = ref (PTree.map (fun n b -> false) code) in + let body = ref [] in + let add_to_body n = begin + in_body := PTree.set n true !in_body; + body := n :: !body + end + in let rec process_node p = + debug " Processing node %d\n" (P.to_int p); + List.iter (fun pred -> + debug " Looking at predecessor of %d: %d\n" (P.to_int p) (P.to_int pred); + let is_in_body = get_some @@ PTree.get pred !in_body in + if (not @@ is_in_body) then begin + debug " --> adding to body\n"; + add_to_body pred; + process_node pred + end + ) (get_some @@ PTree.get p predecessors) + in begin + add_to_body m; + add_to_body n; + (if (m != n) then process_node m); + !body + end + in let option_natural_loop n = function + | None -> None + | Some m -> Some (natural_loop n m) + in PTree.map option_natural_loop (LICMaux.get_loop_backedges code entrypoint) + +(* Returns a PTree of either None or Some b where b determines the node in the loop body, for a cb instruction *) +let get_loop_info f is_loop_header bfs_order code = + let loop_info = ref (PTree.map (fun n i -> None) code) in + let mark_body body = + List.iter (fun n -> + match get_some @@ PTree.get n code with + | Icond (_, _, ifso, ifnot, _) -> begin + match PTree.get n !loop_info with + | None -> () + | Some _ -> + let b1 = List.mem ifso body in + let b2 = List.mem ifnot body in + if (b1 && b2) then () + else if (b1 || b2) then begin + if b1 then loop_info := PTree.set n (Some true) !loop_info + else if b2 then loop_info := PTree.set n (Some false) !loop_info + end + end + | _ -> () + ) body + in let bodymap = get_loop_bodies code f.fn_entrypoint in + List.iter (fun (_,obody) -> + match obody with + | None -> () + | Some body -> mark_body body + ) (PTree.elements bodymap); + !loop_info + +(* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *) +let get_directions f code entrypoint = begin + debug "get_directions\n"; + let bfs_order = bfs code entrypoint in + let is_loop_header = get_loop_headers code entrypoint in + let loop_info = get_loop_info f is_loop_header bfs_order code in + let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *) + begin + (* ptree_printbool is_loop_header; *) + (* debug "\n"; *) + List.iter (fun n -> + match (get_some @@ PTree.get n code) with + | Icond (cond, lr, ifso, ifnot, pred) -> begin + if stats_oc_recording () || not @@ has_some pred then + (* debug "Analyzing %d.." (P.to_int n); *) + let heuristics = [ do_opcode_heuristic; + do_return_heuristic; do_loop2_heuristic loop_info n; (* do_loop_heuristic; *) do_call_heuristic; + (* do_store_heuristic *) ] in + let preferred = ref None in + let current_heuristic = ref 0 in + begin + debug "Deciding condition for RTL node %d\n" (P.to_int n); + List.iter (fun do_heur -> + match !preferred with + | None -> begin + preferred := do_heur code cond ifso ifnot is_loop_header; + if stats_oc_recording () then begin + (* Getting stats about mispredictions from each heuristic *) + (match !preferred, pred with + | Some false, Some true + | Some true, Some false + (* | Some _, None *) (* Uncomment for overpredicts *) + -> begin + match !current_heuristic with + | 0 -> incr wrong_opcode + | 1 -> incr wrong_return + | 2 -> incr wrong_loop2 + | 3 -> incr wrong_call + | _ -> failwith "Shouldn't happen" + end + | Some false, Some false + | Some true, Some true -> begin + match !current_heuristic with + | 0 -> incr right_opcode + | 1 -> incr right_return + | 2 -> incr right_loop2 + | 3 -> incr right_call + | _ -> failwith "Shouldn't happen" + end + | _ -> () + ); + incr current_heuristic + end + end + | Some _ -> () + ) heuristics; + directions := PTree.set n !preferred !directions; + (match !preferred with | Some false -> debug "\tFALLTHROUGH\n" + | Some true -> debug "\tBRANCH\n" + | None -> debug "\tUNSURE\n"); + debug "---------------------------------------\n" + end + end + | _ -> () + ) bfs_order; + !directions + end +end + +let update_direction direction = function +| Icond (cond, lr, n, n', pred) -> begin + (* Counting stats from profiling *) + if stats_oc_recording () then begin + incr stats_nb_total; + match pred, direction with + | None, None -> incr stats_nb_correct_predicts + | None, Some _ -> incr stats_nb_overpredict + | Some _, None -> incr stats_nb_missed_opportunities + | Some false, Some false -> incr stats_nb_correct_predicts + | Some false, Some true -> incr stats_nb_mispredicts + | Some true, Some false -> incr stats_nb_mispredicts + | Some true, Some true -> incr stats_nb_correct_predicts + end; + + (* only update if there is no prior existing branch prediction *) + (match pred with + | None -> Icond (cond, lr, n, n', direction) + | Some _ -> begin + Icond (cond, lr, n, n', pred) + end + ) + end +| i -> i + +(* Uses branch prediction to write prediction annotations in Icond *) +let update_directions f code entrypoint = begin + debug "Update_directions\n"; + let directions = get_directions f code entrypoint in + let code' = ref code in + begin + debug "Get Directions done, now proceeding to update all direction information..\n"; + (* debug "Ifso directions: "; + ptree_printbool directions; + debug "\n"; *) + List.iter (fun (n, i) -> + let direction = get_some @@ PTree.get n directions in + code' := PTree.set n (update_direction direction i) !code' + ) (PTree.elements code); + !code' + end +end + +(** Trace selection *) + +let rec exists_false_rec = function + | [] -> false + | m::lm -> let (_, b) = m in if b then exists_false_rec lm else true + +let exists_false boolmap = exists_false_rec (PTree.elements boolmap) + +(* DFS using prediction info to guide the exploration *) +let dfs code entrypoint = begin + debug "dfs\n"; + let visited = ref (PTree.map (fun n i -> false) code) in + let rec dfs_list code = function + | [] -> [] + | node :: ln -> + if get_some @@ PTree.get node !visited then dfs_list code ln + else begin + visited := PTree.set node true !visited; + let next_nodes = (match get_some @@ PTree.get node code with + | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n) + | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> [n] + | Ijumptable (_, ln) -> ln + | Itailcall _ | Ireturn _ -> [] + | Icond (_, _, n1, n2, info) -> (match info with + | Some false -> [n2; n1] + | _ -> [n1; n2] + ) + ) in node :: dfs_list code (next_nodes @ ln) + end + in dfs_list code [entrypoint] +end + +let rec select_unvisited_node is_visited = function +| [] -> failwith "Empty list" +| n :: ln -> if not (ptree_get_some n is_visited) then n else select_unvisited_node is_visited ln + +let best_successor_of node code is_visited = + match (PTree.get node code) with + | None -> failwith "No such node in the code" + | Some i -> + let next_node = match i with + | Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore(_,_,_,_,n) + | Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> Some n + | Icond (_, _, n1, n2, ob) -> (match ob with None -> None | Some false -> Some n2 | Some true -> Some n1) + | _ -> None + in match next_node with + | None -> None + | Some n -> if not (ptree_get_some n is_visited) then Some n else None + +(* FIXME - could be improved by selecting in priority the predicted paths *) +let best_predecessor_of node predecessors code order is_visited = + match (PTree.get node predecessors) with + | None -> failwith "No predecessor list found" + | Some lp -> + try Some (List.find (fun n -> + if (List.mem n lp) && (not (ptree_get_some n is_visited)) then + match ptree_get_some n code with + | Icond (_, _, n1, n2, ob) -> (match ob with + | None -> false + | Some false -> n == n2 + | Some true -> n == n1 + ) + | _ -> true + else false + ) order) + with Not_found -> None + +let print_trace = print_intlist + +let print_traces oc traces = + let rec f oc = function + | [] -> () + | t::lt -> Printf.fprintf oc "\n\t%a,\n%a" print_trace t f lt + in begin + if !debug_flag then + Printf.fprintf oc "Traces: {%a}\n" f traces + end + +(* Dumb (but linear) trace selection *) +let select_traces_linear code entrypoint = + let is_visited = ref (PTree.map (fun n i -> false) code) in + let bfs_order = bfs code entrypoint in + let rec go_through node = begin + is_visited := PTree.set node true !is_visited; + let next_node = match (get_some @@ PTree.get node code) with + | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n) + | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> Some n + | Ijumptable _ | Itailcall _ | Ireturn _ -> None + | Icond (_, _, n1, n2, info) -> (match info with + | Some false -> Some n2 + | Some true -> Some n1 + | None -> None + ) + in match next_node with + | None -> [node] + | Some n -> + if not (get_some @@ PTree.get n !is_visited) then node :: go_through n + else [node] + end + in let traces = ref [] in begin + List.iter (fun n -> + if not (get_some @@ PTree.get n !is_visited) then + traces := (go_through n) :: !traces + ) bfs_order; + !traces + end + + +(* Algorithm mostly inspired from Chang and Hwu 1988 + * "Trace Selection for Compiling Large C Application Programs to Microcode" *) +let select_traces_chang code entrypoint = begin + debug "select_traces\n"; + let order = dfs code entrypoint in + let predecessors = get_predecessors_rtl code in + let traces = ref [] in + let is_visited = ref (PTree.map (fun n i -> false) code) in begin (* mark all nodes visited *) + debug "Length: %d\n" (List.length order); + while exists_false !is_visited do (* while (there are unvisited nodes) *) + let seed = select_unvisited_node !is_visited order in + let trace = ref [seed] in + let current = ref seed in begin + is_visited := PTree.set seed true !is_visited; (* mark seed visited *) + let quit_loop = ref false in begin + while not !quit_loop do + let s = best_successor_of !current code !is_visited in + match s with + | None -> quit_loop := true (* if (s==0) exit loop *) + | Some succ -> begin + trace := !trace @ [succ]; + is_visited := PTree.set succ true !is_visited; (* mark s visited *) + current := succ + end + done; + current := seed; + quit_loop := false; + while not !quit_loop do + let s = best_predecessor_of !current predecessors code order !is_visited in + match s with + | None -> quit_loop := true (* if (s==0) exit loop *) + | Some pred -> begin + trace := pred :: !trace; + is_visited := PTree.set pred true !is_visited; (* mark s visited *) + current := pred + end + done; + traces := !trace :: !traces; + end + end + done; + (* debug "DFS: \t"; print_intlist order; debug "\n"; *) + debug "Traces: %a" print_traces !traces; + !traces + end +end + +let select_traces code entrypoint = + let length = List.length @@ PTree.elements code in + if (length < 5000) then select_traces_chang code entrypoint + else select_traces_linear code entrypoint + +let rec make_identity_ptree_rec = function +| [] -> PTree.empty +| m::lm -> let (n, _) = m in PTree.set n n (make_identity_ptree_rec lm) + +let make_identity_ptree code = make_identity_ptree_rec (PTree.elements code) + +(* Change the pointers of nodes to point to n' instead of n *) +let rec change_pointers code n n' = function + | [] -> code + | node :: nodes -> + let new_pred_inst = match ptree_get_some node code with + | Icall(a, b, c, d, n0) -> assert (n0 = n); Icall(a, b, c, d, n') + | Ibuiltin(a, b, c, n0) -> assert (n0 = n); Ibuiltin(a, b, c, n') + | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e = n) ln); + Ijumptable(a, List.map (fun e -> if (e = n) then n' else e) ln) + | Icond(a, b, n1, n2, i) -> assert (n1 = n || n2 = n); + let n1' = if (n1 = n) then n' else n1 + in let n2' = if (n2 = n) then n' else n2 + in Icond(a, b, n1', n2', i) + | Inop n0 -> assert (n0 = n); Inop n' + | Iop (a, b, c, n0) -> assert (n0 = n); Iop (a, b, c, n') + | Iload (a, b, c, d, e, n0) -> assert (n0 = n); Iload (a, b, c, d, e, n') + | Istore (a, b, c, d, n0) -> assert (n0 = n); Istore (a, b, c, d, n') + | Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor" + in let new_code = PTree.set node new_pred_inst code + in change_pointers new_code n n' nodes + +(* parent: parent of n to keep as parent + * preds: all the other parents of n + * n': the integer which should contain the duplicate of n + * returns: new code, new ptree *) +let duplicate code ptree parent n preds n' = + debug "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n'); + match PTree.get n' code with + | Some _ -> failwith "The PTree already has a node n'" + | None -> + let c' = change_pointers code n n' preds + in let new_code = PTree.set n' (ptree_get_some n code) c' + and new_ptree = PTree.set n' n ptree + in (new_code, new_ptree) + +let rec maxint = function + | [] -> 0 + | i :: l -> assert (i >= 0); let m = maxint l in if i > m then i else m + +let is_empty = function + | [] -> true + | _ -> false + +let next_free_pc code = maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1 + +let is_a_nop code n = + match get_some @@ PTree.get n code with + | Inop _ -> true + | _ -> false + +(* code: RTL code + * preds: mapping node -> predecessors + * ptree: the revmap + * trace: the trace to follow tail duplication on *) +let tail_duplicate code preds is_loop_header ptree trace = + debug "Tail_duplicate on that trace: %a\n" print_trace trace; + (* next_int: unused integer that can be used for the next duplication *) + let next_int = ref (next_free_pc code) + (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *) + in let last_node = ref None + in let last_duplicate = ref None + in let nb_duplicated = ref 0 + (* recursive function on a trace *) + in let rec f code ptree is_first = function + | [] -> (code, ptree) + | n :: t -> + let (new_code, new_ptree) = + if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *) + else + let node_preds = ptree_get_some n preds + in let node_preds_nolast = + (* We traverse loop headers without initiating tail duplication + * (see case of two imbricated loops) *) + if (get_some @@ PTree.get n is_loop_header) then [] + else List.filter (fun e -> e <> get_some !last_node) node_preds + (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *) + in let final_node_preds = match !last_duplicate with + | None -> node_preds_nolast + | Some n' -> n' :: node_preds_nolast + in if not (is_empty final_node_preds) then + let n' = !next_int + in let (newc, newp) = duplicate code ptree !last_node n final_node_preds (P.of_int n') + in begin + next_int := !next_int + 1; + (if not @@ is_a_nop code n then nb_duplicated := !nb_duplicated + 1); + last_duplicate := Some (P.of_int n'); + (newc, newp) + end + else (code, ptree) + in begin + last_node := Some n; + f new_code new_ptree false t + end + in let new_code, new_ptree = f code ptree true trace + in (new_code, new_ptree, !nb_duplicated) + +let superblockify_traces code preds is_loop_header traces ptree = + let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *) + in let rec f code ptree = function + | [] -> (code, ptree, 0) + | trace :: traces -> + let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace + in if (nb_duplicated < max_nb_duplicated) + then (debug "End duplication\n"; f new_code new_ptree traces) + else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0)) + in let new_code, new_ptree, _ = f code ptree traces + in (new_code, new_ptree) + +let invert_iconds code = + PTree.map1 (fun i -> match i with + | Icond (c, lr, ifso, ifnot, info) -> (match info with + | Some true -> begin + (* debug "Reversing ifso/ifnot for node %d\n" (P.to_int n); *) + Icond (Op.negate_condition c, lr, ifnot, ifso, Some false) + end + | _ -> i) + | _ -> i + ) code + +(** Partial loop unrolling + * + * The following code seeks innermost loops, and unfolds the first iteration + * Most of the code has been moved from LICMaux.ml to Duplicateaux.ml to solve + * cyclic dependencies between LICMaux and Duplicateaux + *) + +let print_inner_loop iloop = + debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n" + print_intlist iloop.preds + print_intlist iloop.body + (P.to_int iloop.head) + print_intlist iloop.finals + print_option_pint iloop.sb_final + +let rec print_inner_loops = function +| [] -> () +| iloop :: iloops -> begin + print_inner_loop iloop; + debug "\n"; + print_inner_loops iloops + end + +let cb_exit_node = function + | Icond (_,_,n1,n2,p) -> begin match p with + | Some true -> Some n2 + | Some false -> Some n1 + | None -> None + end + | _ -> None + + (* +(* Alternative code to get inner_loops - use it if we suspect the other function to be bugged *) +let get_natural_loop code predmap n = + let is_final_node m = + let successors = rtl_successors @@ get_some @@ PTree.get m code in + List.exists (fun s -> (P.to_int s) == (P.to_int n)) successors + in + let excluded_node = cb_exit_node @@ get_some @@ PTree.get n code in + let is_excluded m = match excluded_node with + | None -> false + | Some ex -> P.to_int ex == P.to_int m + in + debug "get_natural_loop for %d\n" (P.to_int n); + let body = bfs_until code n is_final_node is_excluded in + debug "BODY: %a\n" print_intlist body; + let final = List.find is_final_node body in + debug "FINAL: %d\n" (P.to_int final); + let preds = List.filter (fun pred -> List.mem pred body) @@ get_some @@ PTree.get n predmap in + debug "PREDS: %a\n" print_intlist preds; + { preds = preds; body = body; head = n; final = final } + +let rec count_loop_headers is_loop_header = function + | [] -> 0 + | n :: ln -> + let rem = count_loop_headers is_loop_header ln in + if (get_some @@ PTree.get n is_loop_header) then rem + 1 else rem + +let get_inner_loops f code is_loop_header = + let predmap = get_predecessors_rtl code in + let iloops = ref [] in + List.iter (fun (n, ilh) -> if ilh then begin + let iloop = get_natural_loop code predmap n in + let nb_headers = count_loop_headers is_loop_header iloop.body in + if nb_headers == 1 then (* innermost loop *) + iloops := iloop :: !iloops end + ) (PTree.elements is_loop_header); + !iloops + *) + +let rec generate_fwmap ln ln' fwmap = + match ln with + | [] -> begin + match ln' with + | [] -> fwmap + | _ -> failwith "ln and ln' have different lengths" + end + | n :: ln -> begin + match ln' with + | n' :: ln' -> generate_fwmap ln ln' (PTree.set n n' fwmap) + | _ -> failwith "ln and ln' have different lengths" + end + +let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap + +let apply_map fw n = P.of_int @@ ptree_get_some n fw + +let apply_map_list fw ln = List.map (apply_map fw) ln + +let apply_map_opt fw n = + match PTree.get n fw with + | Some n' -> P.of_int n' + | None -> n + +let change_nexts fwmap = function + | Icall (a, b, c, d, n) -> Icall (a, b, c, d, apply_map fwmap n) + | Ibuiltin (a, b, c, n) -> Ibuiltin (a, b, c, apply_map fwmap n) + | Ijumptable (a, ln) -> Ijumptable (a, List.map (apply_map_opt fwmap) ln) + | Icond (a, b, n1, n2, i) -> Icond (a, b, apply_map_opt fwmap n1, apply_map_opt fwmap n2, i) + | Inop n -> Inop (apply_map fwmap n) + | Iop (a, b, c, n) -> Iop (a, b, c, apply_map fwmap n) + | Iload (a, b, c, d, e, n) -> Iload (a, b, c, d, e, apply_map fwmap n) + | Istore (a, b, c, d, n) -> Istore (a, b, c, d, apply_map fwmap n) + | Itailcall (a, b, c) -> Itailcall (a, b, c) + | Ireturn o -> Ireturn o + +(** Clone a list of instructions into free pc indexes + * + * The list of instructions should be contiguous, and not include any loop. + * It is assumed that the first instruction of the list is the head. + * Also, the last instruction of the list should be the loop backedge. + * + * Returns: (code', revmap', ln', fwmap) + * code' is the updated code, after cloning + * revmap' is the updated revmap + * ln' is the list of the new indexes used to reference the cloned instructions + * fwmap is a map from ln to ln' + *) +let clone code revmap ln = begin + assert (List.length ln > 0); + let head' = next_free_pc code in + (* +head' to ensure we never overlap with the existing code *) + let ln' = List.map (fun n -> n + head') @@ List.map P.to_int ln in + let fwmap = generate_fwmap ln ln' PTree.empty in + let revmap' = generate_revmap ln (List.map P.of_int ln') revmap in + let code' = ref code in + List.iter (fun n -> + let instr = get_some @@ PTree.get n code in + let instr' = change_nexts fwmap instr in + code' := PTree.set (apply_map fwmap n) instr' !code' + ) ln; + (!code', revmap', ln', fwmap) +end + +let rec count_ignore_nops code = function + | [] -> 0 + | n::ln -> + let inst = get_some @@ PTree.get n code in + match inst with + | Inop _ -> count_ignore_nops code ln + | _ -> 1 + count_ignore_nops code ln + +(* Unrolls a single interation of the inner loop + * 1) Clones the body into body' + * 2) Links the preds to the first instruction of body' + * 3) Links the last instruction of body' into the first instruction of body + *) +let unroll_inner_loop_single code revmap iloop = + let body = iloop.body in + if count_ignore_nops code body > !Clflags.option_funrollsingle then begin + debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle; + (code, revmap) + end else + let (code2, revmap2, dupbody, fwmap) = clone code revmap body in + let code' = ref code2 in + let head' = apply_map fwmap (iloop.head) in + let finals' = apply_map_list fwmap (iloop.finals) in + begin + debug "PREDS: %a\n" print_intlist iloop.preds; + debug "IHEAD: %d\n" (P.to_int iloop.head); + code' := change_pointers !code' (iloop.head) head' (iloop.preds); + code' := change_pointers !code' head' (iloop.head) finals'; + (!code', revmap2) + end + +let unroll_inner_loops_single f code revmap = + let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + let inner_loops = get_inner_loops f code is_loop_header in + let code' = ref code in + let revmap' = ref revmap in + begin + print_inner_loops inner_loops; + List.iter (fun iloop -> + let (new_code, new_revmap) = unroll_inner_loop_single !code' !revmap' iloop in + code' := new_code; revmap' := new_revmap + ) inner_loops; + (!code', !revmap') + end + +let is_some o = match o with Some _ -> true | None -> false + +let rec go_through_predicted code start final = + if start == final then + Some [start] + else + match rtl_successors_pref @@ get_some @@ PTree.get start code with + | [n] -> ( + match go_through_predicted code n final with + | Some ln -> Some (start :: ln) + | None -> None + ) + | _ -> None + +(* Unrolls the body of the inner loop once - duplicating the exit condition as well + * 1) Clones body into body' + * 2) Links the last instruction of body (sb_final) into the first of body' + * 3) Links the last instruction of body' into the first of body + *) +let unroll_inner_loop_body code revmap iloop = + debug "iloop = "; print_inner_loop iloop; + let body = iloop.body in + let limit = !Clflags.option_funrollbody in + if count_ignore_nops code body > limit then begin + debug "Too many nodes in the loop body (%d > %d)\n" (List.length body) limit; + (code, revmap) + end else if not @@ is_some iloop.sb_final then begin + debug "The loop body does not form a superblock OR we have predicted that we do not loop\n"; + (code, revmap) + end else + let sb_final = get_some @@ iloop.sb_final in + let sb_body = get_some @@ go_through_predicted code iloop.head sb_final in + let (code2, revmap2, dupbody, fwmap) = clone code revmap sb_body in + let code' = ref code2 in + let head' = apply_map fwmap (iloop.head) in + let sb_final' = apply_map fwmap sb_final in + begin + code' := change_pointers !code' iloop.head head' [sb_final]; + code' := change_pointers !code' head' iloop.head [sb_final']; + (!code', revmap2) + end + +let unroll_inner_loops_body f code revmap = + let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + let inner_loops = get_inner_loops f code is_loop_header in + debug "Number of loops found: %d\n" (List.length inner_loops); + let code' = ref code in + let revmap' = ref revmap in + begin + print_inner_loops inner_loops; + List.iter (fun iloop -> + let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in + code' := new_code; revmap' := new_revmap + ) inner_loops; + (!code', !revmap') + end + +let extract_upto_icond f code head = + let rec extract h = + let inst = get_some @@ PTree.get h code in + match inst with + | Icond _ -> [h] + | _ -> ( match rtl_successors inst with + | [n] -> h :: (extract n) + | _ -> failwith "Found a node with more than one successor??" + ) + in List.rev @@ extract head + +let rotate_inner_loop f code revmap iloop = + let header = extract_upto_icond f code iloop.head in + let limit = !Clflags.option_flooprotate in + let nb_duplicated = count_ignore_nops code header in + if nb_duplicated > limit then begin + debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit; + (code, revmap) + end else if nb_duplicated == count_ignore_nops code iloop.body then begin + debug "The conditional branch is already at the end! No need to rotate."; + (code, revmap) + end else + let (code2, revmap2, dupheader, fwmap) = clone code revmap header in + let code' = ref code2 in + let head' = apply_map fwmap iloop.head in + begin + code' := change_pointers !code' iloop.head head' iloop.preds; + (!code', revmap2) + end + +let rotate_inner_loops f code revmap = + let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + let inner_loops = get_inner_loops f code is_loop_header in + let code' = ref code in + let revmap' = ref revmap in + begin + print_inner_loops inner_loops; + List.iter (fun iloop -> + let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in + code' := new_code; revmap' := new_revmap + ) inner_loops; + (!code', !revmap') + end + +let loop_rotate f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in + let (code, revmap) = + if !Clflags.option_flooprotate > 0 then + rotate_inner_loops f code revmap + else (code, revmap) in + ((code, entrypoint), revmap) + +let static_predict f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in + begin + reset_stats (); + set_stats_oc (); + let code = + if !Clflags.option_fpredict then + update_directions f code entrypoint + else code in + write_stats_oc (); + let code = + if !Clflags.option_fpredict then + invert_iconds code + else code in + ((code, entrypoint), revmap) + end + +let unroll_single f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in + let (code, revmap) = + if !Clflags.option_funrollsingle > 0 then + unroll_inner_loops_single f code revmap + else (code, revmap) in + ((code, entrypoint), revmap) + +let unroll_body f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in + let (code, revmap) = + if !Clflags.option_funrollbody > 0 then + unroll_inner_loops_body f code revmap + else (code, revmap) in + ((code, entrypoint), revmap) + +let tail_duplicate f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in + let (code, revmap) = + if !Clflags.option_ftailduplicate > 0 then + let traces = select_traces code entrypoint in + let preds = get_predecessors_rtl code in + let is_loop_header = get_loop_headers code entrypoint in + superblockify_traces code preds is_loop_header traces revmap + else (code, revmap) in + ((code, entrypoint), revmap) |