aboutsummaryrefslogtreecommitdiffstats
path: root/backend/Duplicateaux.ml
diff options
context:
space:
mode:
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r--backend/Duplicateaux.ml208
1 files changed, 151 insertions, 57 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index eb9f42e0..76b5616b 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -30,7 +30,9 @@ let get_some = LICMaux.get_some
let rtl_successors = LICMaux.rtl_successors
(* Get list of nodes following a BFS of the code *)
-let bfs code entrypoint = begin
+(* Stops when predicate is reached
+ * Excludes any node given in excluded function *)
+let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin
debug "bfs\n";
let visited = ref (PTree.map (fun n i -> false) code)
and bfs_list = ref []
@@ -40,20 +42,24 @@ let bfs code entrypoint = begin
Queue.add entrypoint to_visit;
while not (Queue.is_empty to_visit) do
node := Queue.pop to_visit;
- if not (get_some @@ PTree.get !node !visited) then begin
+ if (not (get_some @@ PTree.get !node !visited)) then begin
visited := PTree.set !node true !visited;
- match PTree.get !node code with
- | None -> failwith "No such node"
- | Some i ->
- bfs_list := !node :: !bfs_list;
- let succ = rtl_successors i in
- List.iter (fun n -> Queue.add n to_visit) succ
+ if not (excluded !node) then begin
+ match PTree.get !node code with
+ | None -> failwith "No such node"
+ | Some i ->
+ bfs_list := !node :: !bfs_list;
+ if not (predicate !node) then
+ let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ
+ end
end
done;
List.rev !bfs_list
end
end
+let bfs code entrypoint = bfs_until code entrypoint (fun _ -> false) (fun _ -> false)
+
let optbool o = match o with Some _ -> true | None -> false
let ptree_get_some n ptree = get_some @@ PTree.get n ptree
@@ -81,15 +87,7 @@ end
module PSet = Set.Make(PInt)
-let print_intlist oc l =
- let rec f oc = function
- | [] -> ()
- | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln)
- in begin
- if !debug_flag then begin
- Printf.fprintf oc "[%a]" f l
- end
- end
+let print_intlist = LICMaux.print_intlist
let print_intset s =
let seq = PSet.to_seq s
@@ -403,19 +401,7 @@ let print_traces oc traces =
Printf.fprintf oc "Traces: {%a}\n" f traces
end
-(* Adapted from backend/PrintRTL.ml: print_function *)
-let print_code code = let open PrintRTL in let open Printf in
- if (!debug_flag) then begin
- fprintf stdout "{\n";
- let instrs =
- List.sort
- (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
- (List.rev_map
- (fun (pc, i) -> (P.to_int pc, i))
- (PTree.elements code)) in
- List.iter (print_instruction stdout) instrs;
- fprintf stdout "}"
- end
+let print_code code = LICMaux.print_code code
(* Dumb (but linear) trace selection *)
let select_traces_linear code entrypoint =
@@ -627,15 +613,17 @@ let invert_iconds code =
type innerLoop = {
preds: P.t list;
- body: HashedSet.PSet.t;
+ body: P.t list;
head: P.t; (* head of the loop *)
- final: P.t (* the final instruction, which loops back to the head *)
+ finals: P.t list (* the final instructions, which loops back to the head *)
+ (* There may be more than one ; for instance if there is an if inside the loop with both
+ * branches leading to a goto backedge *)
}
let print_pset = LICMaux.pp_pset
let print_inner_loop iloop =
- debug "{preds: %a, body: %a}" print_intlist iloop.preds print_pset iloop.body
+ debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body
let rec print_inner_loops = function
| [] -> ()
@@ -657,6 +645,53 @@ let print_ptree printer pt =
let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else ()
+let cb_exit_node = function
+ | Icond (_,_,n1,n2,p) -> begin match p with
+ | Some true -> Some n2
+ | Some false -> Some n1
+ | None -> None
+ end
+ | _ -> None
+
+ (*
+(* Alternative code to get inner_loops - use it if we suspect the other function to be bugged *)
+let get_natural_loop code predmap n =
+ let is_final_node m =
+ let successors = rtl_successors @@ get_some @@ PTree.get m code in
+ List.exists (fun s -> (P.to_int s) == (P.to_int n)) successors
+ in
+ let excluded_node = cb_exit_node @@ get_some @@ PTree.get n code in
+ let is_excluded m = match excluded_node with
+ | None -> false
+ | Some ex -> P.to_int ex == P.to_int m
+ in
+ debug "get_natural_loop for %d\n" (P.to_int n);
+ let body = bfs_until code n is_final_node is_excluded in
+ debug "BODY: %a\n" print_intlist body;
+ let final = List.find is_final_node body in
+ debug "FINAL: %d\n" (P.to_int final);
+ let preds = List.filter (fun pred -> List.mem pred body) @@ get_some @@ PTree.get n predmap in
+ debug "PREDS: %a\n" print_intlist preds;
+ { preds = preds; body = body; head = n; final = final }
+
+let rec count_loop_headers is_loop_header = function
+ | [] -> 0
+ | n :: ln ->
+ let rem = count_loop_headers is_loop_header ln in
+ if (get_some @@ PTree.get n is_loop_header) then rem + 1 else rem
+
+let get_inner_loops f code is_loop_header =
+ let predmap = get_predecessors_rtl code in
+ let iloops = ref [] in
+ List.iter (fun (n, ilh) -> if ilh then begin
+ let iloop = get_natural_loop code predmap n in
+ let nb_headers = count_loop_headers is_loop_header iloop.body in
+ if nb_headers == 1 then (* innermost loop *)
+ iloops := iloop :: !iloops end
+ ) (PTree.elements is_loop_header);
+ !iloops
+ *)
+
let get_inner_loops f code is_loop_header =
let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
@@ -674,17 +709,16 @@ let get_inner_loops f code is_loop_header =
assert (List.length heads == 1);
List.hd heads
end in
- let final = (* the predecessors from head that are in the body *)
+ let finals = (* the predecessors from head that are in the body *)
let head_preds = ptree_get_some head predmap in
let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
begin
debug "HEAD: %d\n" (P.to_int head);
debug "BODY: %a\n" print_pset body;
debug "HEADPREDS: %a\n" print_intlist head_preds;
- assert (List.length filtered == 1);
- List.hd filtered
+ filtered
end in
- { preds = preds; body = body; head = head; final = final }
+ { preds = preds; body = (HashedSet.PSet.elements body); head = head; finals = finals }
)
(* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
* We remove those to get just the inner loops *)
@@ -710,6 +744,8 @@ let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap
let apply_map fw n = P.of_int @@ ptree_get_some n fw
+let apply_map_list fw ln = List.map (apply_map fw) ln
+
let apply_map_opt fw n =
match PTree.get n fw with
| Some n' -> P.of_int n'
@@ -769,7 +805,7 @@ let rec count_ignore_nops code = function
* 3) Links the last instruction of body' into the first instruction of body
*)
let unroll_inner_loop_single code revmap iloop =
- let body = HashedSet.PSet.elements (iloop.body) in
+ let body = iloop.body in
if count_ignore_nops code body > !Clflags.option_funrollsingle then begin
debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle;
(code, revmap)
@@ -777,12 +813,12 @@ let unroll_inner_loop_single code revmap iloop =
let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
let code' = ref code2 in
let head' = apply_map fwmap (iloop.head) in
- let final' = apply_map fwmap (iloop.final) in
+ let finals' = apply_map_list fwmap (iloop.finals) in
begin
debug "PREDS: %a\n" print_intlist iloop.preds;
debug "IHEAD: %d\n" (P.to_int iloop.head);
code' := change_pointers !code' (iloop.head) head' (iloop.preds);
- code' := change_pointers !code' head' (iloop.head) [final'];
+ code' := change_pointers !code' head' (iloop.head) finals';
(!code', revmap2)
end
@@ -806,7 +842,7 @@ let unroll_inner_loops_single f code revmap =
* 3) Links the last instruction of body' into the first of body
*)
let unroll_inner_loop_body code revmap iloop =
- let body = HashedSet.PSet.elements (iloop.body) in
+ let body = iloop.body in
let limit = !Clflags.option_funrollbody in
if count_ignore_nops code body > limit then begin
debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit;
@@ -815,16 +851,18 @@ let unroll_inner_loop_body code revmap iloop =
let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
let code' = ref code2 in
let head' = apply_map fwmap (iloop.head) in
- let final' = apply_map fwmap (iloop.final) in
+ let finals' = apply_map_list fwmap (iloop.finals) in
begin
- code' := change_pointers !code' iloop.head head' [iloop.final];
- code' := change_pointers !code' head' iloop.head [final'];
+ code' := change_pointers !code' iloop.head head' iloop.finals;
+ code' := change_pointers !code' head' iloop.head finals';
(!code', revmap2)
end
let unroll_inner_loops_body f code revmap =
let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ (* debug_flag := true; *)
let inner_loops = get_inner_loops f code is_loop_header in
+ debug "Number of loops found: %d\n" (List.length inner_loops);
let code' = ref code in
let revmap' = ref revmap in
begin
@@ -832,46 +870,102 @@ let unroll_inner_loops_body f code revmap =
List.iter (fun iloop ->
let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in
code' := new_code; revmap' := new_revmap
+ ) inner_loops; (* debug_flag := false; *)
+ (!code', !revmap')
+ end
+
+let extract_upto_icond f code head =
+ let rec extract h =
+ let inst = get_some @@ PTree.get h code in
+ match inst with
+ | Icond _ -> [h]
+ | _ -> ( match rtl_successors inst with
+ | [n] -> h :: (extract n)
+ | _ -> failwith "Found a node with more than one successor??"
+ )
+ in List.rev @@ extract head
+
+let rotate_inner_loop f code revmap iloop =
+ let header = extract_upto_icond f code iloop.head in
+ let limit = !Clflags.option_flooprotate in
+ if count_ignore_nops code header > limit then begin
+ debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit;
+ (code, revmap)
+ end else
+ let (code2, revmap2, dupheader, fwmap) = clone code revmap header in
+ let code' = ref code2 in
+ let head' = apply_map fwmap iloop.head in
+ begin
+ code' := change_pointers !code' iloop.head head' iloop.preds;
+ (!code', revmap2)
+ end
+
+let rotate_inner_loops f code revmap =
+ let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ let inner_loops = get_inner_loops f code is_loop_header in
+ let code' = ref code in
+ let revmap' = ref revmap in
+ begin
+ print_inner_loops inner_loops;
+ List.iter (fun iloop ->
+ let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in
+ code' := new_code; revmap' := new_revmap
) inner_loops;
(!code', !revmap')
end
-let duplicate_aux f =
- (* initializing *)
+let loop_rotate f =
let entrypoint = f.fn_entrypoint in
let code = f.fn_code in
let revmap = make_identity_ptree code in
+ let (code, revmap) =
+ if !Clflags.option_flooprotate > 0 then
+ rotate_inner_loops f code revmap
+ else (code, revmap) in
+ ((code, entrypoint), revmap)
- (* static prediction *)
+let static_predict f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
let code =
if !Clflags.option_fpredict then
update_directions code entrypoint
else code in
+ let code =
+ if !Clflags.option_fpredict then
+ invert_iconds code
+ else code in
+ ((code, entrypoint), revmap)
- (* unroll single *)
+let unroll_single f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
let (code, revmap) =
if !Clflags.option_funrollsingle > 0 then
unroll_inner_loops_single f code revmap
else (code, revmap) in
+ ((code, entrypoint), revmap)
- (* unroll body *)
+let unroll_body f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
let (code, revmap) =
if !Clflags.option_funrollbody > 0 then
unroll_inner_loops_body f code revmap
else (code, revmap) in
+ ((code, entrypoint), revmap)
- (* static prediction bis *)
- let code =
- if !Clflags.option_fpredict then
- invert_iconds code
- else code in
-
- (* tail duplication *)
+let tail_duplicate f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
let (code, revmap) =
if !Clflags.option_ftailduplicate > 0 then
let traces = select_traces code entrypoint in
let preds = get_predecessors_rtl code in
superblockify_traces code preds traces revmap
else (code, revmap) in
-
((code, entrypoint), revmap)