aboutsummaryrefslogtreecommitdiffstats
path: root/backend/Duplicateaux.ml
diff options
context:
space:
mode:
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r--backend/Duplicateaux.ml1137
1 files changed, 1137 insertions, 0 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
new file mode 100644
index 00000000..22bee067
--- /dev/null
+++ b/backend/Duplicateaux.ml
@@ -0,0 +1,1137 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(* Oracle for Duplicate pass.
+ * - Add static prediction information to Icond nodes
+ * - Performs tail duplication on interesting traces to form superblocks
+ * - Unrolls a single iteration of innermost loops
+ * - (TODO: perform partial loop unrolling inside innermost loops)
+ *)
+
+open RTL
+open Maps
+open Camlcoq
+open DebugPrint
+open RTLcommonaux
+
+let stats_oc = ref None
+
+let set_stats_oc () =
+ try
+ let name = Sys.getenv "COMPCERT_PREDICT_STATS" in
+ let oc = open_out_gen [Open_append; Open_creat; Open_text] 0o666 name in
+ stats_oc := Some oc
+ with Not_found -> ()
+
+(* number of total CBs *)
+let stats_nb_total = ref 0
+(* we predicted the same thing as the profiling *)
+let stats_nb_correct_predicts = ref 0
+(* we predicted something (say Some true), but the profiling predicted the opposite (say Some false) *)
+let stats_nb_mispredicts = ref 0
+(* we did not predict anything (None) even though the profiling did predict something *)
+let stats_nb_missed_opportunities = ref 0
+(* we predicted something (say Some true) but the profiling preferred not to predict anything (None) *)
+let stats_nb_overpredict = ref 0
+
+(* heuristic specific counters *)
+let wrong_opcode = ref 0
+let wrong_return = ref 0
+let wrong_loop2 = ref 0
+let wrong_call = ref 0
+
+let right_opcode = ref 0
+let right_return = ref 0
+let right_loop2 = ref 0
+let right_call = ref 0
+
+let reset_stats () = begin
+ stats_nb_total := 0;
+ stats_nb_correct_predicts := 0;
+ stats_nb_mispredicts := 0;
+ stats_nb_missed_opportunities := 0;
+ stats_nb_overpredict := 0;
+ wrong_opcode := 0;
+ wrong_return := 0;
+ wrong_loop2 := 0;
+ wrong_call := 0;
+ right_opcode := 0;
+ right_return := 0;
+ right_loop2 := 0;
+ right_call := 0;
+end
+
+let incr theref = theref := !theref + 1
+
+let has_some o = match o with Some _ -> true | None -> false
+
+let stats_oc_recording () = has_some !stats_oc
+
+let write_stats_oc () =
+ match !stats_oc with
+ | None -> ()
+ | Some oc -> begin
+ Printf.fprintf oc "%d %d %d %d %d %d %d %d %d %d %d %d %d\n" !stats_nb_total
+ !stats_nb_correct_predicts !stats_nb_mispredicts !stats_nb_missed_opportunities
+ !stats_nb_overpredict
+ !wrong_opcode !wrong_return !wrong_loop2 !wrong_call
+ !right_opcode !right_return !right_loop2 !right_call
+ ;
+ close_out oc
+ end
+
+let get_loop_headers = LICMaux.get_loop_headers
+let rtl_successors = LICMaux.rtl_successors
+
+(* Get list of nodes following a BFS of the code *)
+(* Stops when predicate is reached
+ * Excludes any node given in excluded function *)
+let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin
+ debug "bfs\n";
+ let visited = ref (PTree.map (fun n i -> false) code)
+ and bfs_list = ref []
+ and to_visit = Queue.create ()
+ and node = ref entrypoint
+ in begin
+ Queue.add entrypoint to_visit;
+ while not (Queue.is_empty to_visit) do
+ node := Queue.pop to_visit;
+ if (not (get_some @@ PTree.get !node !visited)) then begin
+ visited := PTree.set !node true !visited;
+ if not (excluded !node) then begin
+ match PTree.get !node code with
+ | None -> failwith "No such node"
+ | Some i ->
+ bfs_list := !node :: !bfs_list;
+ if not (predicate !node) then
+ let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ
+ end
+ end
+ done;
+ List.rev !bfs_list
+ end
+end
+
+let bfs code entrypoint = bfs_until code entrypoint (fun _ -> false) (fun _ -> false)
+
+let optbool o = match o with Some _ -> true | None -> false
+
+let ptree_get_some n ptree = get_some @@ PTree.get n ptree
+
+(* Returns a PTree: node -> list of the predecessors of that node *)
+let get_predecessors_rtl code = begin
+ debug "get_predecessors_rtl\n";
+ let preds = ref (PTree.map (fun n i -> []) code) in
+ let process_inst (node, i) =
+ let succ = rtl_successors i
+ in List.iter (fun s ->
+ let previous_preds = ptree_get_some s !preds in
+ if optbool @@ List.find_opt (fun e -> e == node) previous_preds then ()
+ else preds := PTree.set s (node::previous_preds) !preds) succ
+ in begin
+ List.iter process_inst (PTree.elements code);
+ !preds
+ end
+end
+
+module PInt = struct
+ type t = P.t
+ let compare x y = compare (P.to_int x) (P.to_int y)
+end
+
+module PSet = Set.Make(PInt)
+
+let print_intset s =
+ let seq = PSet.to_seq s
+ in begin
+ if !debug_flag then begin
+ Printf.printf "{";
+ Seq.iter (fun n ->
+ Printf.printf "%d " (P.to_int n)
+ ) seq;
+ Printf.printf "}"
+ end
+ end
+
+(* Looks ahead (until a branch) to see if a node further down verifies
+ * the given predicate *)
+let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate =
+ if (predicate node) then true
+ else match (successors @@ get_some @@ PTree.get node code) with
+ | [n] -> if (predicate n) then true
+ else (
+ if (get_some @@ PTree.get n is_loop_header) then false
+ else look_ahead_gen successors code n is_loop_header predicate
+ )
+ | _ -> false
+
+let look_ahead = look_ahead_gen rtl_successors
+
+(**
+ * Heuristics mostly based on the paper Branch Prediction for Free
+ *)
+
+let do_call_heuristic code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tCall heuristic..\n";
+ let predicate n = (function
+ | Icall _ -> true
+ | _ -> false) @@ get_some @@ PTree.get n code
+ in let ifso_call = look_ahead code ifso is_loop_header predicate
+ in let ifnot_call = look_ahead code ifnot is_loop_header predicate
+ in if ifso_call && ifnot_call then None
+ else if ifso_call then Some false
+ else if ifnot_call then Some true
+ else None
+ end
+
+let do_opcode_heuristic code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tOpcode heuristic..\n";
+ DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot is_loop_header
+ end
+
+let do_return_heuristic code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tReturn heuristic..\n";
+ let predicate n = (function
+ | Ireturn _ -> true
+ | _ -> false) @@ get_some @@ PTree.get n code
+ in let ifso_return = look_ahead code ifso is_loop_header predicate
+ in let ifnot_return = look_ahead code ifnot is_loop_header predicate
+ in if ifso_return && ifnot_return then None
+ else if ifso_return then Some false
+ else if ifnot_return then Some true
+ else None
+ end
+
+let do_store_heuristic code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tStore heuristic..\n";
+ let predicate n = (function
+ | Istore _ -> true
+ | _ -> false) @@ get_some @@ PTree.get n code
+ in let ifso_store = look_ahead code ifso is_loop_header predicate
+ in let ifnot_store = look_ahead code ifnot is_loop_header predicate
+ in if ifso_store && ifnot_store then None
+ else if ifso_store then Some false
+ else if ifnot_store then Some true
+ else None
+ end
+
+let do_loop_heuristic code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tLoop heuristic..\n";
+ let predicate n = get_some @@ PTree.get n is_loop_header in
+ let ifso_loop = look_ahead code ifso is_loop_header predicate in
+ let ifnot_loop = look_ahead code ifnot is_loop_header predicate in
+ if ifso_loop && ifnot_loop then (debug "\t\tLOOP but can't choose which\n"; None) (* TODO - take the innermost loop ? *)
+ else if ifso_loop then Some true
+ else if ifnot_loop then Some false
+ else None
+ end
+
+let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header =
+ begin
+ debug "\tLoop2 heuristic..\n";
+ match get_some @@ PTree.get n loop_info with
+ | None -> None
+ | Some b -> Some b
+ end
+
+(** Innermost loop detection *)
+
+type innerLoop = {
+ preds: P.t list;
+ body: P.t list;
+ head: P.t; (* head of the loop *)
+ finals: P.t list; (* the final instructions, which loops back to the head *)
+ (* There may be more than one ; for instance if there is an if inside the loop with both
+ * branches leading to a goto backedge
+ * Such cases usually happen after a tail-duplication *)
+ sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
+ (* may be None if we predict that we do not loop *)
+}
+
+let print_pset = LICMaux.pp_pset
+
+let rtl_successors_pref = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,p) -> (match p with
+ | Some true -> [n1]
+ | Some false -> [n2]
+ | None -> [n1; n2])
+| Ijumptable (_,ln) -> ln
+
+(* Find the last node of a trace (starting at "node"), until a loop is encountered.
+ * If a non-predicted branch is encountered, returns None *)
+let rec find_last_node_before_loop code node trace is_loop_header =
+ let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
+ let headers = List.filter (fun n ->
+ get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in
+ match headers with
+ | [] -> (
+ let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in
+ match next_nodes with
+ | [n] -> (
+ (* To prevent getting out of the superblock and loop infinitely when the prediction is false *)
+ if HashedSet.PSet.contains trace n then
+ find_last_node_before_loop code n trace is_loop_header
+ else None
+ )
+ | _ -> None (* May happen when we predict that a loop is not taken *)
+ )
+ | [h] -> Some node
+ | _ -> failwith "Multiple branches leading to a loop"
+
+(* The computation of sb_final requires to already have branch prediction *)
+let get_inner_loops f code is_loop_header =
+ let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
+ fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
+ let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
+ begin
+ debug "PREDMAP: "; print_ptree print_intlist predmap;
+ debug "LOOPMAP: "; print_ptree print_pset loopmap;
+ List.map (fun (n, body) ->
+ let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p)
+ @@ get_some @@ PTree.get n predmap in
+ let head = (* the instruction from body which is a loop header *)
+ let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter
+ (fun n -> ptree_get_some n is_loop_header) body in
+ begin
+ assert (List.length heads == 1);
+ List.hd heads
+ end in
+ let finals = (* the predecessors from head that are in the body *)
+ let head_preds = ptree_get_some head predmap in
+ let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
+ begin
+ debug "HEAD: %d\n" (P.to_int head);
+ debug "BODY: %a\n" print_pset body;
+ debug "HEADPREDS: %a\n" print_intlist head_preds;
+ filtered
+ end in
+ let sb_final = find_last_node_before_loop code head body is_loop_header in
+ let body = HashedSet.PSet.elements body in
+ { preds = preds; body = body; head = head; finals = finals;
+ sb_final = sb_final; }
+ )
+ (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
+ * We remove those to get just the inner loops *)
+ @@ List.filter (fun (n, body) ->
+ let count = List.length @@ HashedSet.PSet.elements body in count != 1
+ ) (PTree.elements loopmap)
+ end
+
+let get_loop_bodies code entrypoint =
+ let predecessors = get_predecessors_rtl code in
+ (* Algorithm from Muchnik, Compiler Design & Implementation, Figure 7.21 page 192 *)
+ let natural_loop n m =
+ debug "Natural Loop from %d to %d\n" (P.to_int n) (P.to_int m);
+ let in_body = ref (PTree.map (fun n b -> false) code) in
+ let body = ref [] in
+ let add_to_body n = begin
+ in_body := PTree.set n true !in_body;
+ body := n :: !body
+ end
+ in let rec process_node p =
+ debug " Processing node %d\n" (P.to_int p);
+ List.iter (fun pred ->
+ debug " Looking at predecessor of %d: %d\n" (P.to_int p) (P.to_int pred);
+ let is_in_body = get_some @@ PTree.get pred !in_body in
+ if (not @@ is_in_body) then begin
+ debug " --> adding to body\n";
+ add_to_body pred;
+ process_node pred
+ end
+ ) (get_some @@ PTree.get p predecessors)
+ in begin
+ add_to_body m;
+ add_to_body n;
+ (if (m != n) then process_node m);
+ !body
+ end
+ in let option_natural_loop n = function
+ | None -> None
+ | Some m -> Some (natural_loop n m)
+ in PTree.map option_natural_loop (LICMaux.get_loop_backedges code entrypoint)
+
+(* Returns a PTree of either None or Some b where b determines the node in the loop body, for a cb instruction *)
+let get_loop_info f is_loop_header bfs_order code =
+ let loop_info = ref (PTree.map (fun n i -> None) code) in
+ let mark_body body =
+ List.iter (fun n ->
+ match get_some @@ PTree.get n code with
+ | Icond (_, _, ifso, ifnot, _) -> begin
+ match PTree.get n !loop_info with
+ | None -> ()
+ | Some _ ->
+ let b1 = List.mem ifso body in
+ let b2 = List.mem ifnot body in
+ if (b1 && b2) then ()
+ else if (b1 || b2) then begin
+ if b1 then loop_info := PTree.set n (Some true) !loop_info
+ else if b2 then loop_info := PTree.set n (Some false) !loop_info
+ end
+ end
+ | _ -> ()
+ ) body
+ in let bodymap = get_loop_bodies code f.fn_entrypoint in
+ List.iter (fun (_,obody) ->
+ match obody with
+ | None -> ()
+ | Some body -> mark_body body
+ ) (PTree.elements bodymap);
+ !loop_info
+
+(* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *)
+let get_directions f code entrypoint = begin
+ debug "get_directions\n";
+ let bfs_order = bfs code entrypoint in
+ let is_loop_header = get_loop_headers code entrypoint in
+ let loop_info = get_loop_info f is_loop_header bfs_order code in
+ let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *)
+ begin
+ (* ptree_printbool is_loop_header; *)
+ (* debug "\n"; *)
+ List.iter (fun n ->
+ match (get_some @@ PTree.get n code) with
+ | Icond (cond, lr, ifso, ifnot, pred) -> begin
+ if stats_oc_recording () || not @@ has_some pred then
+ (* debug "Analyzing %d.." (P.to_int n); *)
+ let heuristics = [ do_opcode_heuristic;
+ do_return_heuristic; do_loop2_heuristic loop_info n; (* do_loop_heuristic; *) do_call_heuristic;
+ (* do_store_heuristic *) ] in
+ let preferred = ref None in
+ let current_heuristic = ref 0 in
+ begin
+ debug "Deciding condition for RTL node %d\n" (P.to_int n);
+ List.iter (fun do_heur ->
+ match !preferred with
+ | None -> begin
+ preferred := do_heur code cond ifso ifnot is_loop_header;
+ if stats_oc_recording () then begin
+ (* Getting stats about mispredictions from each heuristic *)
+ (match !preferred, pred with
+ | Some false, Some true
+ | Some true, Some false
+ (* | Some _, None *) (* Uncomment for overpredicts *)
+ -> begin
+ match !current_heuristic with
+ | 0 -> incr wrong_opcode
+ | 1 -> incr wrong_return
+ | 2 -> incr wrong_loop2
+ | 3 -> incr wrong_call
+ | _ -> failwith "Shouldn't happen"
+ end
+ | Some false, Some false
+ | Some true, Some true -> begin
+ match !current_heuristic with
+ | 0 -> incr right_opcode
+ | 1 -> incr right_return
+ | 2 -> incr right_loop2
+ | 3 -> incr right_call
+ | _ -> failwith "Shouldn't happen"
+ end
+ | _ -> ()
+ );
+ incr current_heuristic
+ end
+ end
+ | Some _ -> ()
+ ) heuristics;
+ directions := PTree.set n !preferred !directions;
+ (match !preferred with | Some false -> debug "\tFALLTHROUGH\n"
+ | Some true -> debug "\tBRANCH\n"
+ | None -> debug "\tUNSURE\n");
+ debug "---------------------------------------\n"
+ end
+ end
+ | _ -> ()
+ ) bfs_order;
+ !directions
+ end
+end
+
+let update_direction direction = function
+| Icond (cond, lr, n, n', pred) -> begin
+ (* Counting stats from profiling *)
+ if stats_oc_recording () then begin
+ incr stats_nb_total;
+ match pred, direction with
+ | None, None -> incr stats_nb_correct_predicts
+ | None, Some _ -> incr stats_nb_overpredict
+ | Some _, None -> incr stats_nb_missed_opportunities
+ | Some false, Some false -> incr stats_nb_correct_predicts
+ | Some false, Some true -> incr stats_nb_mispredicts
+ | Some true, Some false -> incr stats_nb_mispredicts
+ | Some true, Some true -> incr stats_nb_correct_predicts
+ end;
+
+ (* only update if there is no prior existing branch prediction *)
+ (match pred with
+ | None -> Icond (cond, lr, n, n', direction)
+ | Some _ -> begin
+ Icond (cond, lr, n, n', pred)
+ end
+ )
+ end
+| i -> i
+
+(* Uses branch prediction to write prediction annotations in Icond *)
+let update_directions f code entrypoint = begin
+ debug "Update_directions\n";
+ let directions = get_directions f code entrypoint in
+ let code' = ref code in
+ begin
+ debug "Get Directions done, now proceeding to update all direction information..\n";
+ (* debug "Ifso directions: ";
+ ptree_printbool directions;
+ debug "\n"; *)
+ List.iter (fun (n, i) ->
+ let direction = get_some @@ PTree.get n directions in
+ code' := PTree.set n (update_direction direction i) !code'
+ ) (PTree.elements code);
+ !code'
+ end
+end
+
+(** Trace selection *)
+
+let rec exists_false_rec = function
+ | [] -> false
+ | m::lm -> let (_, b) = m in if b then exists_false_rec lm else true
+
+let exists_false boolmap = exists_false_rec (PTree.elements boolmap)
+
+(* DFS using prediction info to guide the exploration *)
+let dfs code entrypoint = begin
+ debug "dfs\n";
+ let visited = ref (PTree.map (fun n i -> false) code) in
+ let rec dfs_list code = function
+ | [] -> []
+ | node :: ln ->
+ if get_some @@ PTree.get node !visited then dfs_list code ln
+ else begin
+ visited := PTree.set node true !visited;
+ let next_nodes = (match get_some @@ PTree.get node code with
+ | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n)
+ | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> [n]
+ | Ijumptable (_, ln) -> ln
+ | Itailcall _ | Ireturn _ -> []
+ | Icond (_, _, n1, n2, info) -> (match info with
+ | Some false -> [n2; n1]
+ | _ -> [n1; n2]
+ )
+ ) in node :: dfs_list code (next_nodes @ ln)
+ end
+ in dfs_list code [entrypoint]
+end
+
+let rec select_unvisited_node is_visited = function
+| [] -> failwith "Empty list"
+| n :: ln -> if not (ptree_get_some n is_visited) then n else select_unvisited_node is_visited ln
+
+let best_successor_of node code is_visited =
+ match (PTree.get node code) with
+ | None -> failwith "No such node in the code"
+ | Some i ->
+ let next_node = match i with
+ | Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore(_,_,_,_,n)
+ | Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> Some n
+ | Icond (_, _, n1, n2, ob) -> (match ob with None -> None | Some false -> Some n2 | Some true -> Some n1)
+ | _ -> None
+ in match next_node with
+ | None -> None
+ | Some n -> if not (ptree_get_some n is_visited) then Some n else None
+
+(* FIXME - could be improved by selecting in priority the predicted paths *)
+let best_predecessor_of node predecessors code order is_visited =
+ match (PTree.get node predecessors) with
+ | None -> failwith "No predecessor list found"
+ | Some lp ->
+ try Some (List.find (fun n ->
+ if (List.mem n lp) && (not (ptree_get_some n is_visited)) then
+ match ptree_get_some n code with
+ | Icond (_, _, n1, n2, ob) -> (match ob with
+ | None -> false
+ | Some false -> n == n2
+ | Some true -> n == n1
+ )
+ | _ -> true
+ else false
+ ) order)
+ with Not_found -> None
+
+let print_trace = print_intlist
+
+let print_traces oc traces =
+ let rec f oc = function
+ | [] -> ()
+ | t::lt -> Printf.fprintf oc "\n\t%a,\n%a" print_trace t f lt
+ in begin
+ if !debug_flag then
+ Printf.fprintf oc "Traces: {%a}\n" f traces
+ end
+
+(* Dumb (but linear) trace selection *)
+let select_traces_linear code entrypoint =
+ let is_visited = ref (PTree.map (fun n i -> false) code) in
+ let bfs_order = bfs code entrypoint in
+ let rec go_through node = begin
+ is_visited := PTree.set node true !is_visited;
+ let next_node = match (get_some @@ PTree.get node code) with
+ | Icall(_, _, _, _, n) | Ibuiltin (_, _, _, n) | Iop (_, _, _, n)
+ | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) | Inop n -> Some n
+ | Ijumptable _ | Itailcall _ | Ireturn _ -> None
+ | Icond (_, _, n1, n2, info) -> (match info with
+ | Some false -> Some n2
+ | Some true -> Some n1
+ | None -> None
+ )
+ in match next_node with
+ | None -> [node]
+ | Some n ->
+ if not (get_some @@ PTree.get n !is_visited) then node :: go_through n
+ else [node]
+ end
+ in let traces = ref [] in begin
+ List.iter (fun n ->
+ if not (get_some @@ PTree.get n !is_visited) then
+ traces := (go_through n) :: !traces
+ ) bfs_order;
+ !traces
+ end
+
+
+(* Algorithm mostly inspired from Chang and Hwu 1988
+ * "Trace Selection for Compiling Large C Application Programs to Microcode" *)
+let select_traces_chang code entrypoint = begin
+ debug "select_traces\n";
+ let order = dfs code entrypoint in
+ let predecessors = get_predecessors_rtl code in
+ let traces = ref [] in
+ let is_visited = ref (PTree.map (fun n i -> false) code) in begin (* mark all nodes visited *)
+ debug "Length: %d\n" (List.length order);
+ while exists_false !is_visited do (* while (there are unvisited nodes) *)
+ let seed = select_unvisited_node !is_visited order in
+ let trace = ref [seed] in
+ let current = ref seed in begin
+ is_visited := PTree.set seed true !is_visited; (* mark seed visited *)
+ let quit_loop = ref false in begin
+ while not !quit_loop do
+ let s = best_successor_of !current code !is_visited in
+ match s with
+ | None -> quit_loop := true (* if (s==0) exit loop *)
+ | Some succ -> begin
+ trace := !trace @ [succ];
+ is_visited := PTree.set succ true !is_visited; (* mark s visited *)
+ current := succ
+ end
+ done;
+ current := seed;
+ quit_loop := false;
+ while not !quit_loop do
+ let s = best_predecessor_of !current predecessors code order !is_visited in
+ match s with
+ | None -> quit_loop := true (* if (s==0) exit loop *)
+ | Some pred -> begin
+ trace := pred :: !trace;
+ is_visited := PTree.set pred true !is_visited; (* mark s visited *)
+ current := pred
+ end
+ done;
+ traces := !trace :: !traces;
+ end
+ end
+ done;
+ (* debug "DFS: \t"; print_intlist order; debug "\n"; *)
+ debug "Traces: %a" print_traces !traces;
+ !traces
+ end
+end
+
+let select_traces code entrypoint =
+ let length = List.length @@ PTree.elements code in
+ if (length < 5000) then select_traces_chang code entrypoint
+ else select_traces_linear code entrypoint
+
+let rec make_identity_ptree_rec = function
+| [] -> PTree.empty
+| m::lm -> let (n, _) = m in PTree.set n n (make_identity_ptree_rec lm)
+
+let make_identity_ptree code = make_identity_ptree_rec (PTree.elements code)
+
+(* Change the pointers of nodes to point to n' instead of n *)
+let rec change_pointers code n n' = function
+ | [] -> code
+ | node :: nodes ->
+ let new_pred_inst = match ptree_get_some node code with
+ | Icall(a, b, c, d, n0) -> assert (n0 = n); Icall(a, b, c, d, n')
+ | Ibuiltin(a, b, c, n0) -> assert (n0 = n); Ibuiltin(a, b, c, n')
+ | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e = n) ln);
+ Ijumptable(a, List.map (fun e -> if (e = n) then n' else e) ln)
+ | Icond(a, b, n1, n2, i) -> assert (n1 = n || n2 = n);
+ let n1' = if (n1 = n) then n' else n1
+ in let n2' = if (n2 = n) then n' else n2
+ in Icond(a, b, n1', n2', i)
+ | Inop n0 -> assert (n0 = n); Inop n'
+ | Iop (a, b, c, n0) -> assert (n0 = n); Iop (a, b, c, n')
+ | Iload (a, b, c, d, e, n0) -> assert (n0 = n); Iload (a, b, c, d, e, n')
+ | Istore (a, b, c, d, n0) -> assert (n0 = n); Istore (a, b, c, d, n')
+ | Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor"
+ in let new_code = PTree.set node new_pred_inst code
+ in change_pointers new_code n n' nodes
+
+(* parent: parent of n to keep as parent
+ * preds: all the other parents of n
+ * n': the integer which should contain the duplicate of n
+ * returns: new code, new ptree *)
+let duplicate code ptree parent n preds n' =
+ debug "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n');
+ match PTree.get n' code with
+ | Some _ -> failwith "The PTree already has a node n'"
+ | None ->
+ let c' = change_pointers code n n' preds
+ in let new_code = PTree.set n' (ptree_get_some n code) c'
+ and new_ptree = PTree.set n' n ptree
+ in (new_code, new_ptree)
+
+let rec maxint = function
+ | [] -> 0
+ | i :: l -> assert (i >= 0); let m = maxint l in if i > m then i else m
+
+let is_empty = function
+ | [] -> true
+ | _ -> false
+
+let next_free_pc code = maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1
+
+let is_a_nop code n =
+ match get_some @@ PTree.get n code with
+ | Inop _ -> true
+ | _ -> false
+
+(* code: RTL code
+ * preds: mapping node -> predecessors
+ * ptree: the revmap
+ * trace: the trace to follow tail duplication on *)
+let tail_duplicate code preds is_loop_header ptree trace =
+ debug "Tail_duplicate on that trace: %a\n" print_trace trace;
+ (* next_int: unused integer that can be used for the next duplication *)
+ let next_int = ref (next_free_pc code)
+ (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *)
+ in let last_node = ref None
+ in let last_duplicate = ref None
+ in let nb_duplicated = ref 0
+ (* recursive function on a trace *)
+ in let rec f code ptree is_first = function
+ | [] -> (code, ptree)
+ | n :: t ->
+ let (new_code, new_ptree) =
+ if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *)
+ else
+ let node_preds = ptree_get_some n preds
+ in let node_preds_nolast =
+ (* We traverse loop headers without initiating tail duplication
+ * (see case of two imbricated loops) *)
+ if (get_some @@ PTree.get n is_loop_header) then []
+ else List.filter (fun e -> e <> get_some !last_node) node_preds
+ (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *)
+ in let final_node_preds = match !last_duplicate with
+ | None -> node_preds_nolast
+ | Some n' -> n' :: node_preds_nolast
+ in if not (is_empty final_node_preds) then
+ let n' = !next_int
+ in let (newc, newp) = duplicate code ptree !last_node n final_node_preds (P.of_int n')
+ in begin
+ next_int := !next_int + 1;
+ (if not @@ is_a_nop code n then nb_duplicated := !nb_duplicated + 1);
+ last_duplicate := Some (P.of_int n');
+ (newc, newp)
+ end
+ else (code, ptree)
+ in begin
+ last_node := Some n;
+ f new_code new_ptree false t
+ end
+ in let new_code, new_ptree = f code ptree true trace
+ in (new_code, new_ptree, !nb_duplicated)
+
+let superblockify_traces code preds is_loop_header traces ptree =
+ let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *)
+ in let rec f code ptree = function
+ | [] -> (code, ptree, 0)
+ | trace :: traces ->
+ let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace
+ in if (nb_duplicated < max_nb_duplicated)
+ then (debug "End duplication\n"; f new_code new_ptree traces)
+ else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
+ in let new_code, new_ptree, _ = f code ptree traces
+ in (new_code, new_ptree)
+
+let invert_iconds code =
+ PTree.map1 (fun i -> match i with
+ | Icond (c, lr, ifso, ifnot, info) -> (match info with
+ | Some true -> begin
+ (* debug "Reversing ifso/ifnot for node %d\n" (P.to_int n); *)
+ Icond (Op.negate_condition c, lr, ifnot, ifso, Some false)
+ end
+ | _ -> i)
+ | _ -> i
+ ) code
+
+(** Partial loop unrolling
+ *
+ * The following code seeks innermost loops, and unfolds the first iteration
+ * Most of the code has been moved from LICMaux.ml to Duplicateaux.ml to solve
+ * cyclic dependencies between LICMaux and Duplicateaux
+ *)
+
+let print_inner_loop iloop =
+ debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n"
+ print_intlist iloop.preds
+ print_intlist iloop.body
+ (P.to_int iloop.head)
+ print_intlist iloop.finals
+ print_option_pint iloop.sb_final
+
+let rec print_inner_loops = function
+| [] -> ()
+| iloop :: iloops -> begin
+ print_inner_loop iloop;
+ debug "\n";
+ print_inner_loops iloops
+ end
+
+let cb_exit_node = function
+ | Icond (_,_,n1,n2,p) -> begin match p with
+ | Some true -> Some n2
+ | Some false -> Some n1
+ | None -> None
+ end
+ | _ -> None
+
+ (*
+(* Alternative code to get inner_loops - use it if we suspect the other function to be bugged *)
+let get_natural_loop code predmap n =
+ let is_final_node m =
+ let successors = rtl_successors @@ get_some @@ PTree.get m code in
+ List.exists (fun s -> (P.to_int s) == (P.to_int n)) successors
+ in
+ let excluded_node = cb_exit_node @@ get_some @@ PTree.get n code in
+ let is_excluded m = match excluded_node with
+ | None -> false
+ | Some ex -> P.to_int ex == P.to_int m
+ in
+ debug "get_natural_loop for %d\n" (P.to_int n);
+ let body = bfs_until code n is_final_node is_excluded in
+ debug "BODY: %a\n" print_intlist body;
+ let final = List.find is_final_node body in
+ debug "FINAL: %d\n" (P.to_int final);
+ let preds = List.filter (fun pred -> List.mem pred body) @@ get_some @@ PTree.get n predmap in
+ debug "PREDS: %a\n" print_intlist preds;
+ { preds = preds; body = body; head = n; final = final }
+
+let rec count_loop_headers is_loop_header = function
+ | [] -> 0
+ | n :: ln ->
+ let rem = count_loop_headers is_loop_header ln in
+ if (get_some @@ PTree.get n is_loop_header) then rem + 1 else rem
+
+let get_inner_loops f code is_loop_header =
+ let predmap = get_predecessors_rtl code in
+ let iloops = ref [] in
+ List.iter (fun (n, ilh) -> if ilh then begin
+ let iloop = get_natural_loop code predmap n in
+ let nb_headers = count_loop_headers is_loop_header iloop.body in
+ if nb_headers == 1 then (* innermost loop *)
+ iloops := iloop :: !iloops end
+ ) (PTree.elements is_loop_header);
+ !iloops
+ *)
+
+let rec generate_fwmap ln ln' fwmap =
+ match ln with
+ | [] -> begin
+ match ln' with
+ | [] -> fwmap
+ | _ -> failwith "ln and ln' have different lengths"
+ end
+ | n :: ln -> begin
+ match ln' with
+ | n' :: ln' -> generate_fwmap ln ln' (PTree.set n n' fwmap)
+ | _ -> failwith "ln and ln' have different lengths"
+ end
+
+let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap
+
+let apply_map fw n = P.of_int @@ ptree_get_some n fw
+
+let apply_map_list fw ln = List.map (apply_map fw) ln
+
+let apply_map_opt fw n =
+ match PTree.get n fw with
+ | Some n' -> P.of_int n'
+ | None -> n
+
+let change_nexts fwmap = function
+ | Icall (a, b, c, d, n) -> Icall (a, b, c, d, apply_map fwmap n)
+ | Ibuiltin (a, b, c, n) -> Ibuiltin (a, b, c, apply_map fwmap n)
+ | Ijumptable (a, ln) -> Ijumptable (a, List.map (apply_map_opt fwmap) ln)
+ | Icond (a, b, n1, n2, i) -> Icond (a, b, apply_map_opt fwmap n1, apply_map_opt fwmap n2, i)
+ | Inop n -> Inop (apply_map fwmap n)
+ | Iop (a, b, c, n) -> Iop (a, b, c, apply_map fwmap n)
+ | Iload (a, b, c, d, e, n) -> Iload (a, b, c, d, e, apply_map fwmap n)
+ | Istore (a, b, c, d, n) -> Istore (a, b, c, d, apply_map fwmap n)
+ | Itailcall (a, b, c) -> Itailcall (a, b, c)
+ | Ireturn o -> Ireturn o
+
+(** Clone a list of instructions into free pc indexes
+ *
+ * The list of instructions should be contiguous, and not include any loop.
+ * It is assumed that the first instruction of the list is the head.
+ * Also, the last instruction of the list should be the loop backedge.
+ *
+ * Returns: (code', revmap', ln', fwmap)
+ * code' is the updated code, after cloning
+ * revmap' is the updated revmap
+ * ln' is the list of the new indexes used to reference the cloned instructions
+ * fwmap is a map from ln to ln'
+ *)
+let clone code revmap ln = begin
+ assert (List.length ln > 0);
+ let head' = next_free_pc code in
+ (* +head' to ensure we never overlap with the existing code *)
+ let ln' = List.map (fun n -> n + head') @@ List.map P.to_int ln in
+ let fwmap = generate_fwmap ln ln' PTree.empty in
+ let revmap' = generate_revmap ln (List.map P.of_int ln') revmap in
+ let code' = ref code in
+ List.iter (fun n ->
+ let instr = get_some @@ PTree.get n code in
+ let instr' = change_nexts fwmap instr in
+ code' := PTree.set (apply_map fwmap n) instr' !code'
+ ) ln;
+ (!code', revmap', ln', fwmap)
+end
+
+let rec count_ignore_nops code = function
+ | [] -> 0
+ | n::ln ->
+ let inst = get_some @@ PTree.get n code in
+ match inst with
+ | Inop _ -> count_ignore_nops code ln
+ | _ -> 1 + count_ignore_nops code ln
+
+(* Unrolls a single interation of the inner loop
+ * 1) Clones the body into body'
+ * 2) Links the preds to the first instruction of body'
+ * 3) Links the last instruction of body' into the first instruction of body
+ *)
+let unroll_inner_loop_single code revmap iloop =
+ let body = iloop.body in
+ if count_ignore_nops code body > !Clflags.option_funrollsingle then begin
+ debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle;
+ (code, revmap)
+ end else
+ let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
+ let code' = ref code2 in
+ let head' = apply_map fwmap (iloop.head) in
+ let finals' = apply_map_list fwmap (iloop.finals) in
+ begin
+ debug "PREDS: %a\n" print_intlist iloop.preds;
+ debug "IHEAD: %d\n" (P.to_int iloop.head);
+ code' := change_pointers !code' (iloop.head) head' (iloop.preds);
+ code' := change_pointers !code' head' (iloop.head) finals';
+ (!code', revmap2)
+ end
+
+let unroll_inner_loops_single f code revmap =
+ let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ let inner_loops = get_inner_loops f code is_loop_header in
+ let code' = ref code in
+ let revmap' = ref revmap in
+ begin
+ print_inner_loops inner_loops;
+ List.iter (fun iloop ->
+ let (new_code, new_revmap) = unroll_inner_loop_single !code' !revmap' iloop in
+ code' := new_code; revmap' := new_revmap
+ ) inner_loops;
+ (!code', !revmap')
+ end
+
+let is_some o = match o with Some _ -> true | None -> false
+
+let rec go_through_predicted code start final =
+ if start == final then
+ Some [start]
+ else
+ match rtl_successors_pref @@ get_some @@ PTree.get start code with
+ | [n] -> (
+ match go_through_predicted code n final with
+ | Some ln -> Some (start :: ln)
+ | None -> None
+ )
+ | _ -> None
+
+(* Unrolls the body of the inner loop once - duplicating the exit condition as well
+ * 1) Clones body into body'
+ * 2) Links the last instruction of body (sb_final) into the first of body'
+ * 3) Links the last instruction of body' into the first of body
+ *)
+let unroll_inner_loop_body code revmap iloop =
+ debug "iloop = "; print_inner_loop iloop;
+ let body = iloop.body in
+ let limit = !Clflags.option_funrollbody in
+ if count_ignore_nops code body > limit then begin
+ debug "Too many nodes in the loop body (%d > %d)\n" (List.length body) limit;
+ (code, revmap)
+ end else if not @@ is_some iloop.sb_final then begin
+ debug "The loop body does not form a superblock OR we have predicted that we do not loop\n";
+ (code, revmap)
+ end else
+ let sb_final = get_some @@ iloop.sb_final in
+ let sb_body = get_some @@ go_through_predicted code iloop.head sb_final in
+ let (code2, revmap2, dupbody, fwmap) = clone code revmap sb_body in
+ let code' = ref code2 in
+ let head' = apply_map fwmap (iloop.head) in
+ let sb_final' = apply_map fwmap sb_final in
+ begin
+ code' := change_pointers !code' iloop.head head' [sb_final];
+ code' := change_pointers !code' head' iloop.head [sb_final'];
+ (!code', revmap2)
+ end
+
+let unroll_inner_loops_body f code revmap =
+ let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ let inner_loops = get_inner_loops f code is_loop_header in
+ debug "Number of loops found: %d\n" (List.length inner_loops);
+ let code' = ref code in
+ let revmap' = ref revmap in
+ begin
+ print_inner_loops inner_loops;
+ List.iter (fun iloop ->
+ let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in
+ code' := new_code; revmap' := new_revmap
+ ) inner_loops;
+ (!code', !revmap')
+ end
+
+let extract_upto_icond f code head =
+ let rec extract h =
+ let inst = get_some @@ PTree.get h code in
+ match inst with
+ | Icond _ -> [h]
+ | _ -> ( match rtl_successors inst with
+ | [n] -> h :: (extract n)
+ | _ -> failwith "Found a node with more than one successor??"
+ )
+ in List.rev @@ extract head
+
+let rotate_inner_loop f code revmap iloop =
+ let header = extract_upto_icond f code iloop.head in
+ let limit = !Clflags.option_flooprotate in
+ let nb_duplicated = count_ignore_nops code header in
+ if nb_duplicated > limit then begin
+ debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit;
+ (code, revmap)
+ end else if nb_duplicated == count_ignore_nops code iloop.body then begin
+ debug "The conditional branch is already at the end! No need to rotate.";
+ (code, revmap)
+ end else
+ let (code2, revmap2, dupheader, fwmap) = clone code revmap header in
+ let code' = ref code2 in
+ let head' = apply_map fwmap iloop.head in
+ begin
+ code' := change_pointers !code' iloop.head head' iloop.preds;
+ (!code', revmap2)
+ end
+
+let rotate_inner_loops f code revmap =
+ let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ let inner_loops = get_inner_loops f code is_loop_header in
+ let code' = ref code in
+ let revmap' = ref revmap in
+ begin
+ print_inner_loops inner_loops;
+ List.iter (fun iloop ->
+ let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in
+ code' := new_code; revmap' := new_revmap
+ ) inner_loops;
+ (!code', !revmap')
+ end
+
+let loop_rotate f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
+ let (code, revmap) =
+ if !Clflags.option_flooprotate > 0 then
+ rotate_inner_loops f code revmap
+ else (code, revmap) in
+ ((code, entrypoint), revmap)
+
+let static_predict f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
+ begin
+ reset_stats ();
+ set_stats_oc ();
+ let code =
+ if !Clflags.option_fpredict then
+ update_directions f code entrypoint
+ else code in
+ write_stats_oc ();
+ let code =
+ if !Clflags.option_fpredict then
+ invert_iconds code
+ else code in
+ ((code, entrypoint), revmap)
+ end
+
+let unroll_single f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
+ let (code, revmap) =
+ if !Clflags.option_funrollsingle > 0 then
+ unroll_inner_loops_single f code revmap
+ else (code, revmap) in
+ ((code, entrypoint), revmap)
+
+let unroll_body f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
+ let (code, revmap) =
+ if !Clflags.option_funrollbody > 0 then
+ unroll_inner_loops_body f code revmap
+ else (code, revmap) in
+ ((code, entrypoint), revmap)
+
+let tail_duplicate f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
+ let (code, revmap) =
+ if !Clflags.option_ftailduplicate > 0 then
+ let traces = select_traces code entrypoint in
+ let preds = get_predecessors_rtl code in
+ let is_loop_header = get_loop_headers code entrypoint in
+ superblockify_traces code preds is_loop_header traces revmap
+ else (code, revmap) in
+ ((code, entrypoint), revmap)