aboutsummaryrefslogtreecommitdiffstats
path: root/backend/Duplicateaux.ml
diff options
context:
space:
mode:
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r--backend/Duplicateaux.ml450
1 files changed, 329 insertions, 121 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 1297ec90..eb9f42e0 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -15,6 +15,7 @@
(* Oracle for Duplicate pass.
* - Add static prediction information to Icond nodes
* - Performs tail duplication on interesting traces to form superblocks
+ * - Unrolls a single iteration of innermost loops
* - (TODO: perform partial loop unrolling inside innermost loops)
*)
@@ -22,23 +23,13 @@ open RTL
open Maps
open Camlcoq
-let debug_flag = ref false
-
-let debug fmt =
- if !debug_flag then Printf.eprintf fmt
- else Printf.ifprintf stderr fmt
-
-let get_some = function
-| None -> failwith "Did not get some"
-| Some thing -> thing
-
-let rtl_successors = function
-| Itailcall _ | Ireturn _ -> []
-| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
-| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
-| Icond (_,_,n1,n2,_) -> [n1; n2]
-| Ijumptable (_,ln) -> ln
+let debug_flag = LICMaux.debug_flag
+let debug = LICMaux.debug
+let get_loop_headers = LICMaux.get_loop_headers
+let get_some = LICMaux.get_some
+let rtl_successors = LICMaux.rtl_successors
+(* Get list of nodes following a BFS of the code *)
let bfs code entrypoint = begin
debug "bfs\n";
let visited = ref (PTree.map (fun n i -> false) code)
@@ -67,6 +58,7 @@ let optbool o = match o with Some _ -> true | None -> false
let ptree_get_some n ptree = get_some @@ PTree.get n ptree
+(* Returns a PTree: node -> list of the predecessors of that node *)
let get_predecessors_rtl code = begin
debug "get_predecessors_rtl\n";
let preds = ref (PTree.map (fun n i -> []) code) in
@@ -89,15 +81,13 @@ end
module PSet = Set.Make(PInt)
-let print_intlist l =
- let rec f = function
+let print_intlist oc l =
+ let rec f oc = function
| [] -> ()
- | n::ln -> (Printf.printf "%d " (P.to_int n); f ln)
+ | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln)
in begin
if !debug_flag then begin
- Printf.printf "[";
- f l;
- Printf.printf "]"
+ Printf.fprintf oc "[%a]" f l
end
end
@@ -113,43 +103,6 @@ let print_intset s =
end
end
-type vstate = Unvisited | Processed | Visited
-
-(** Getting loop branches with a DFS visit :
- * Each node is either Unvisited, Visited, or Processed
- * pre-order: node becomes Processed
- * post-order: node becomes Visited
- *
- * If we come accross an edge to a Processed node, it's a loop!
- *)
-let get_loop_headers code entrypoint = begin
- debug "get_loop_headers\n";
- let visited = ref (PTree.map (fun n i -> Unvisited) code)
- and is_loop_header = ref (PTree.map (fun n i -> false) code)
- in let rec dfs_visit code = function
- | [] -> ()
- | node :: ln ->
- match (get_some @@ PTree.get node !visited) with
- | Visited -> ()
- | Processed -> begin
- debug "Node %d is a loop header\n" (P.to_int node);
- is_loop_header := PTree.set node true !is_loop_header;
- visited := PTree.set node Visited !visited
- end
- | Unvisited -> begin
- visited := PTree.set node Processed !visited;
- match PTree.get node code with
- | None -> failwith "No such node"
- | Some i -> let next_visits = rtl_successors i in dfs_visit code next_visits;
- visited := PTree.set node Visited !visited;
- dfs_visit code ln
- end
- in begin
- dfs_visit code [entrypoint];
- !is_loop_header
- end
-end
-
let ptree_printbool pt =
let elements = PTree.elements pt
in begin
@@ -174,6 +127,10 @@ let rec look_ahead code node is_loop_header predicate =
)
| _ -> false
+(**
+ * Heuristics mostly based on the paper Branch Prediction for Free
+ *)
+
let do_call_heuristic code cond ifso ifnot is_loop_header =
begin
debug "\tCall heuristic..\n";
@@ -302,7 +259,7 @@ let get_loop_info is_loop_header bfs_order code =
!loop_info
end
-(* Remark - compared to the original paper, we don't use the store heuristic *)
+(* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *)
let get_directions code entrypoint = begin
debug "get_directions\n";
let bfs_order = bfs code entrypoint in
@@ -435,18 +392,29 @@ let best_predecessor_of node predecessors code order is_visited =
) order)
with Not_found -> None
-let print_trace t = print_intlist t
+let print_trace = print_intlist
-let print_traces traces =
- let rec f = function
+let print_traces oc traces =
+ let rec f oc = function
| [] -> ()
- | t::lt -> Printf.printf "\n\t"; print_trace t; Printf.printf ",\n"; f lt
+ | t::lt -> Printf.fprintf oc "\n\t%a,\n%a" print_trace t f lt
in begin
- if !debug_flag then begin
- Printf.printf "Traces: {";
- f traces;
- Printf.printf "}\n";
- end
+ if !debug_flag then
+ Printf.fprintf oc "Traces: {%a}\n" f traces
+ end
+
+(* Adapted from backend/PrintRTL.ml: print_function *)
+let print_code code = let open PrintRTL in let open Printf in
+ if (!debug_flag) then begin
+ fprintf stdout "{\n";
+ let instrs =
+ List.sort
+ (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
+ (List.rev_map
+ (fun (pc, i) -> (P.to_int pc, i))
+ (PTree.elements code)) in
+ List.iter (print_instruction stdout) instrs;
+ fprintf stdout "}"
end
(* Dumb (but linear) trace selection *)
@@ -521,7 +489,7 @@ let select_traces_chang code entrypoint = begin
end
done;
(* debug "DFS: \t"; print_intlist order; debug "\n"; *)
- debug "Traces: "; print_traces !traces;
+ debug "Traces: %a" print_traces !traces;
!traces
end
end
@@ -537,26 +505,26 @@ let rec make_identity_ptree_rec = function
let make_identity_ptree code = make_identity_ptree_rec (PTree.elements code)
-(* Change the pointers of preds nodes to point to n' instead of n *)
+(* Change the pointers of nodes to point to n' instead of n *)
let rec change_pointers code n n' = function
| [] -> code
- | pred :: preds ->
- let new_pred_inst = match ptree_get_some pred code with
- | Icall(a, b, c, d, n0) -> assert (n0 == n); Icall(a, b, c, d, n')
- | Ibuiltin(a, b, c, n0) -> assert (n0 == n); Ibuiltin(a, b, c, n')
- | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e == n) ln);
- Ijumptable(a, List.map (fun e -> if (e == n) then n' else e) ln)
- | Icond(a, b, n1, n2, i) -> assert (n1 == n || n2 == n);
- let n1' = if (n1 == n) then n' else n1
- in let n2' = if (n2 == n) then n' else n2
+ | node :: nodes ->
+ let new_pred_inst = match ptree_get_some node code with
+ | Icall(a, b, c, d, n0) -> assert (n0 = n); Icall(a, b, c, d, n')
+ | Ibuiltin(a, b, c, n0) -> assert (n0 = n); Ibuiltin(a, b, c, n')
+ | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e = n) ln);
+ Ijumptable(a, List.map (fun e -> if (e = n) then n' else e) ln)
+ | Icond(a, b, n1, n2, i) -> assert (n1 = n || n2 = n);
+ let n1' = if (n1 = n) then n' else n1
+ in let n2' = if (n2 = n) then n' else n2
in Icond(a, b, n1', n2', i)
- | Inop n0 -> assert (n0 == n); Inop n'
- | Iop (a, b, c, n0) -> assert (n0 == n); Iop (a, b, c, n')
- | Iload (a, b, c, d, e, n0) -> assert (n0 == n); Iload (a, b, c, d, e, n')
- | Istore (a, b, c, d, n0) -> assert (n0 == n); Istore (a, b, c, d, n')
+ | Inop n0 -> assert (n0 = n); Inop n'
+ | Iop (a, b, c, n0) -> assert (n0 = n); Iop (a, b, c, n')
+ | Iload (a, b, c, d, e, n0) -> assert (n0 = n); Iload (a, b, c, d, e, n')
+ | Istore (a, b, c, d, n0) -> assert (n0 = n); Istore (a, b, c, d, n')
| Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor"
- in let new_code = PTree.set pred new_pred_inst code
- in change_pointers new_code n n' preds
+ in let new_code = PTree.set node new_pred_inst code
+ in change_pointers new_code n n' nodes
(* parent: parent of n to keep as parent
* preds: all the other parents of n
@@ -580,13 +548,20 @@ let is_empty = function
| [] -> true
| _ -> false
+let next_free_pc code = maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1
+
+let is_a_nop code n =
+ match get_some @@ PTree.get n code with
+ | Inop _ -> true
+ | _ -> false
+
(* code: RTL code
* preds: mapping node -> predecessors
* ptree: the revmap
* trace: the trace to follow tail duplication on *)
let tail_duplicate code preds ptree trace =
(* next_int: unused integer that can be used for the next duplication *)
- let next_int = ref (maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1)
+ let next_int = ref (next_free_pc code)
(* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *)
in let last_node = ref None
in let last_duplicate = ref None
@@ -608,7 +583,7 @@ let tail_duplicate code preds ptree trace =
in let (newc, newp) = duplicate code ptree !last_node n final_node_preds (P.of_int n')
in begin
next_int := !next_int + 1;
- nb_duplicated := !nb_duplicated + 1;
+ (if not @@ is_a_nop code n then nb_duplicated := !nb_duplicated + 1);
last_duplicate := Some (P.of_int n');
(newc, newp)
end
@@ -620,9 +595,8 @@ let tail_duplicate code preds ptree trace =
in let new_code, new_ptree = f code ptree true trace
in (new_code, new_ptree, !nb_duplicated)
-let superblockify_traces code preds traces =
- let max_nb_duplicated = !Clflags.option_fduplicate (* FIXME - should be architecture dependent *)
- in let ptree = make_identity_ptree code
+let superblockify_traces code preds traces ptree =
+ let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *)
in let rec f code ptree = function
| [] -> (code, ptree, 0)
| trace :: traces ->
@@ -633,37 +607,271 @@ let superblockify_traces code preds traces =
in let new_code, new_ptree, _ = f code ptree traces
in (new_code, new_ptree)
-let rec invert_iconds_trace code = function
- | [] -> code
- | n :: ln ->
- let code' = match ptree_get_some n code with
- | Icond (c, lr, ifso, ifnot, info) -> (match info with
- | Some true -> begin
- (* debug "Reversing ifso/ifnot for node %d\n" (P.to_int n); *)
- PTree.set n (Icond (Op.negate_condition c, lr, ifnot, ifso, Some false)) code
- end
- | _ -> code)
- | _ -> code
- in invert_iconds_trace code' ln
+let invert_iconds code =
+ PTree.map1 (fun i -> match i with
+ | Icond (c, lr, ifso, ifnot, info) -> (match info with
+ | Some true -> begin
+ (* debug "Reversing ifso/ifnot for node %d\n" (P.to_int n); *)
+ Icond (Op.negate_condition c, lr, ifnot, ifso, Some false)
+ end
+ | _ -> i)
+ | _ -> i
+ ) code
+
+(** Partial loop unrolling
+ *
+ * The following code seeks innermost loops, and unfolds the first iteration
+ * Most of the code has been moved from LICMaux.ml to Duplicateaux.ml to solve
+ * cyclic dependencies between LICMaux and Duplicateaux
+ *)
-let rec invert_iconds code = function
- | [] -> code
- | t :: ts ->
- let code' = if !Clflags.option_finvertcond then invert_iconds_trace code t
- else code
- in invert_iconds code' ts
+type innerLoop = {
+ preds: P.t list;
+ body: HashedSet.PSet.t;
+ head: P.t; (* head of the loop *)
+ final: P.t (* the final instruction, which loops back to the head *)
+}
+
+let print_pset = LICMaux.pp_pset
+
+let print_inner_loop iloop =
+ debug "{preds: %a, body: %a}" print_intlist iloop.preds print_pset iloop.body
+
+let rec print_inner_loops = function
+| [] -> ()
+| iloop :: iloops -> begin
+ print_inner_loop iloop;
+ debug "\n";
+ print_inner_loops iloops
+ end
+
+let print_ptree printer pt =
+ let elements = PTree.elements pt in
+ begin
+ debug "[\n";
+ List.iter (fun (n, elt) ->
+ debug "\t%d: %a\n" (P.to_int n) printer elt
+ ) elements;
+ debug "]\n"
+ end
+
+let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else ()
+
+let get_inner_loops f code is_loop_header =
+ let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
+ fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
+ let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
+ begin
+ debug "PREDMAP: "; print_ptree print_intlist predmap;
+ debug "LOOPMAP: "; print_ptree print_pset loopmap;
+ List.map (fun (n, body) ->
+ let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p)
+ @@ get_some @@ PTree.get n predmap in
+ let head = (* the instruction from body which is a loop header *)
+ let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter
+ (fun n -> ptree_get_some n is_loop_header) body in
+ begin
+ assert (List.length heads == 1);
+ List.hd heads
+ end in
+ let final = (* the predecessors from head that are in the body *)
+ let head_preds = ptree_get_some head predmap in
+ let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
+ begin
+ debug "HEAD: %d\n" (P.to_int head);
+ debug "BODY: %a\n" print_pset body;
+ debug "HEADPREDS: %a\n" print_intlist head_preds;
+ assert (List.length filtered == 1);
+ List.hd filtered
+ end in
+ { preds = preds; body = body; head = head; final = final }
+ )
+ (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
+ * We remove those to get just the inner loops *)
+ @@ List.filter (fun (n, body) ->
+ let count = List.length @@ HashedSet.PSet.elements body in count != 1
+ ) (PTree.elements loopmap)
+ end
+
+let rec generate_fwmap ln ln' fwmap =
+ match ln with
+ | [] -> begin
+ match ln' with
+ | [] -> fwmap
+ | _ -> failwith "ln and ln' have different lengths"
+ end
+ | n :: ln -> begin
+ match ln' with
+ | n' :: ln' -> generate_fwmap ln ln' (PTree.set n n' fwmap)
+ | _ -> failwith "ln and ln' have different lengths"
+ end
+
+let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap
+
+let apply_map fw n = P.of_int @@ ptree_get_some n fw
+
+let apply_map_opt fw n =
+ match PTree.get n fw with
+ | Some n' -> P.of_int n'
+ | None -> n
+
+let change_nexts fwmap = function
+ | Icall (a, b, c, d, n) -> Icall (a, b, c, d, apply_map fwmap n)
+ | Ibuiltin (a, b, c, n) -> Ibuiltin (a, b, c, apply_map fwmap n)
+ | Ijumptable (a, ln) -> Ijumptable (a, List.map (apply_map_opt fwmap) ln)
+ | Icond (a, b, n1, n2, i) -> Icond (a, b, apply_map_opt fwmap n1, apply_map_opt fwmap n2, i)
+ | Inop n -> Inop (apply_map fwmap n)
+ | Iop (a, b, c, n) -> Iop (a, b, c, apply_map fwmap n)
+ | Iload (a, b, c, d, e, n) -> Iload (a, b, c, d, e, apply_map fwmap n)
+ | Istore (a, b, c, d, n) -> Istore (a, b, c, d, apply_map fwmap n)
+ | Itailcall (a, b, c) -> Itailcall (a, b, c)
+ | Ireturn o -> Ireturn o
+
+(** Clone a list of instructions into free pc indexes
+ *
+ * The list of instructions should be contiguous, and not include any loop.
+ * It is assumed that the first instruction of the list is the head.
+ * Also, the last instruction of the list should be the loop backedge.
+ *
+ * Returns: (code', revmap', ln', fwmap)
+ * code' is the updated code, after cloning
+ * revmap' is the updated revmap
+ * ln' is the list of the new indexes used to reference the cloned instructions
+ * fwmap is a map from ln to ln'
+ *)
+let clone code revmap ln = begin
+ assert (List.length ln > 0);
+ let head' = next_free_pc code in
+ (* +head' to ensure we never overlap with the existing code *)
+ let ln' = List.map (fun n -> n + head') @@ List.map P.to_int ln in
+ let fwmap = generate_fwmap ln ln' PTree.empty in
+ let revmap' = generate_revmap ln (List.map P.of_int ln') revmap in
+ let code' = ref code in
+ List.iter (fun n ->
+ let instr = get_some @@ PTree.get n code in
+ let instr' = change_nexts fwmap instr in
+ code' := PTree.set (apply_map fwmap n) instr' !code'
+ ) ln;
+ (!code', revmap', ln', fwmap)
+end
+
+let rec count_ignore_nops code = function
+ | [] -> 0
+ | n::ln ->
+ let inst = get_some @@ PTree.get n code in
+ match inst with
+ | Inop _ -> count_ignore_nops code ln
+ | _ -> 1 + count_ignore_nops code ln
+
+(* Unrolls a single interation of the inner loop
+ * 1) Clones the body into body'
+ * 2) Links the preds to the first instruction of body'
+ * 3) Links the last instruction of body' into the first instruction of body
+ *)
+let unroll_inner_loop_single code revmap iloop =
+ let body = HashedSet.PSet.elements (iloop.body) in
+ if count_ignore_nops code body > !Clflags.option_funrollsingle then begin
+ debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle;
+ (code, revmap)
+ end else
+ let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
+ let code' = ref code2 in
+ let head' = apply_map fwmap (iloop.head) in
+ let final' = apply_map fwmap (iloop.final) in
+ begin
+ debug "PREDS: %a\n" print_intlist iloop.preds;
+ debug "IHEAD: %d\n" (P.to_int iloop.head);
+ code' := change_pointers !code' (iloop.head) head' (iloop.preds);
+ code' := change_pointers !code' head' (iloop.head) [final'];
+ (!code', revmap2)
+ end
+
+let unroll_inner_loops_single f code revmap =
+ let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ let inner_loops = get_inner_loops f code is_loop_header in
+ let code' = ref code in
+ let revmap' = ref revmap in
+ begin
+ print_inner_loops inner_loops;
+ List.iter (fun iloop ->
+ let (new_code, new_revmap) = unroll_inner_loop_single !code' !revmap' iloop in
+ code' := new_code; revmap' := new_revmap
+ ) inner_loops;
+ (!code', !revmap')
+ end
+
+(* Unrolls the body of the inner loop once - duplicating the exit condition as well
+ * 1) Clones body into body'
+ * 2) Links the last instruction of body into the first of body'
+ * 3) Links the last instruction of body' into the first of body
+ *)
+let unroll_inner_loop_body code revmap iloop =
+ let body = HashedSet.PSet.elements (iloop.body) in
+ let limit = !Clflags.option_funrollbody in
+ if count_ignore_nops code body > limit then begin
+ debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit;
+ (code, revmap)
+ end else
+ let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
+ let code' = ref code2 in
+ let head' = apply_map fwmap (iloop.head) in
+ let final' = apply_map fwmap (iloop.final) in
+ begin
+ code' := change_pointers !code' iloop.head head' [iloop.final];
+ code' := change_pointers !code' head' iloop.head [final'];
+ (!code', revmap2)
+ end
+
+let unroll_inner_loops_body f code revmap =
+ let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ let inner_loops = get_inner_loops f code is_loop_header in
+ let code' = ref code in
+ let revmap' = ref revmap in
+ begin
+ print_inner_loops inner_loops;
+ List.iter (fun iloop ->
+ let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in
+ code' := new_code; revmap' := new_revmap
+ ) inner_loops;
+ (!code', !revmap')
+ end
let duplicate_aux f =
+ (* initializing *)
let entrypoint = f.fn_entrypoint in
- if !Clflags.option_fduplicate < 0 then
- ((f.fn_code, entrypoint), make_identity_ptree f.fn_code)
- else
- let code = update_directions (f.fn_code) entrypoint in
- let traces = select_traces code entrypoint in
- let icond_code = invert_iconds code traces in
- let preds = get_predecessors_rtl icond_code in
- if !Clflags.option_fduplicate >= 1 then
- let (new_code, pTreeId) = ((* print_traces traces; *) superblockify_traces icond_code preds traces) in
- ((new_code, f.fn_entrypoint), pTreeId)
- else
- ((icond_code, entrypoint), make_identity_ptree code)
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
+
+ (* static prediction *)
+ let code =
+ if !Clflags.option_fpredict then
+ update_directions code entrypoint
+ else code in
+
+ (* unroll single *)
+ let (code, revmap) =
+ if !Clflags.option_funrollsingle > 0 then
+ unroll_inner_loops_single f code revmap
+ else (code, revmap) in
+
+ (* unroll body *)
+ let (code, revmap) =
+ if !Clflags.option_funrollbody > 0 then
+ unroll_inner_loops_body f code revmap
+ else (code, revmap) in
+
+ (* static prediction bis *)
+ let code =
+ if !Clflags.option_fpredict then
+ invert_iconds code
+ else code in
+
+ (* tail duplication *)
+ let (code, revmap) =
+ if !Clflags.option_ftailduplicate > 0 then
+ let traces = select_traces code entrypoint in
+ let preds = get_predecessors_rtl code in
+ superblockify_traces code preds traces revmap
+ else (code, revmap) in
+
+ ((code, entrypoint), revmap)