diff options
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r-- | backend/Duplicateaux.ml | 208 |
1 files changed, 151 insertions, 57 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index eb9f42e0..76b5616b 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -30,7 +30,9 @@ let get_some = LICMaux.get_some let rtl_successors = LICMaux.rtl_successors (* Get list of nodes following a BFS of the code *) -let bfs code entrypoint = begin +(* Stops when predicate is reached + * Excludes any node given in excluded function *) +let bfs_until code entrypoint (predicate: node->bool) (excluded: node->bool) = begin debug "bfs\n"; let visited = ref (PTree.map (fun n i -> false) code) and bfs_list = ref [] @@ -40,20 +42,24 @@ let bfs code entrypoint = begin Queue.add entrypoint to_visit; while not (Queue.is_empty to_visit) do node := Queue.pop to_visit; - if not (get_some @@ PTree.get !node !visited) then begin + if (not (get_some @@ PTree.get !node !visited)) then begin visited := PTree.set !node true !visited; - match PTree.get !node code with - | None -> failwith "No such node" - | Some i -> - bfs_list := !node :: !bfs_list; - let succ = rtl_successors i in - List.iter (fun n -> Queue.add n to_visit) succ + if not (excluded !node) then begin + match PTree.get !node code with + | None -> failwith "No such node" + | Some i -> + bfs_list := !node :: !bfs_list; + if not (predicate !node) then + let succ = rtl_successors i in List.iter (fun n -> Queue.add n to_visit) succ + end end done; List.rev !bfs_list end end +let bfs code entrypoint = bfs_until code entrypoint (fun _ -> false) (fun _ -> false) + let optbool o = match o with Some _ -> true | None -> false let ptree_get_some n ptree = get_some @@ PTree.get n ptree @@ -81,15 +87,7 @@ end module PSet = Set.Make(PInt) -let print_intlist oc l = - let rec f oc = function - | [] -> () - | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln) - in begin - if !debug_flag then begin - Printf.fprintf oc "[%a]" f l - end - end +let print_intlist = LICMaux.print_intlist let print_intset s = let seq = PSet.to_seq s @@ -403,19 +401,7 @@ let print_traces oc traces = Printf.fprintf oc "Traces: {%a}\n" f traces end -(* Adapted from backend/PrintRTL.ml: print_function *) -let print_code code = let open PrintRTL in let open Printf in - if (!debug_flag) then begin - fprintf stdout "{\n"; - let instrs = - List.sort - (fun (pc1, _) (pc2, _) -> compare pc2 pc1) - (List.rev_map - (fun (pc, i) -> (P.to_int pc, i)) - (PTree.elements code)) in - List.iter (print_instruction stdout) instrs; - fprintf stdout "}" - end +let print_code code = LICMaux.print_code code (* Dumb (but linear) trace selection *) let select_traces_linear code entrypoint = @@ -627,15 +613,17 @@ let invert_iconds code = type innerLoop = { preds: P.t list; - body: HashedSet.PSet.t; + body: P.t list; head: P.t; (* head of the loop *) - final: P.t (* the final instruction, which loops back to the head *) + finals: P.t list (* the final instructions, which loops back to the head *) + (* There may be more than one ; for instance if there is an if inside the loop with both + * branches leading to a goto backedge *) } let print_pset = LICMaux.pp_pset let print_inner_loop iloop = - debug "{preds: %a, body: %a}" print_intlist iloop.preds print_pset iloop.body + debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body let rec print_inner_loops = function | [] -> () @@ -657,6 +645,53 @@ let print_ptree printer pt = let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else () +let cb_exit_node = function + | Icond (_,_,n1,n2,p) -> begin match p with + | Some true -> Some n2 + | Some false -> Some n1 + | None -> None + end + | _ -> None + + (* +(* Alternative code to get inner_loops - use it if we suspect the other function to be bugged *) +let get_natural_loop code predmap n = + let is_final_node m = + let successors = rtl_successors @@ get_some @@ PTree.get m code in + List.exists (fun s -> (P.to_int s) == (P.to_int n)) successors + in + let excluded_node = cb_exit_node @@ get_some @@ PTree.get n code in + let is_excluded m = match excluded_node with + | None -> false + | Some ex -> P.to_int ex == P.to_int m + in + debug "get_natural_loop for %d\n" (P.to_int n); + let body = bfs_until code n is_final_node is_excluded in + debug "BODY: %a\n" print_intlist body; + let final = List.find is_final_node body in + debug "FINAL: %d\n" (P.to_int final); + let preds = List.filter (fun pred -> List.mem pred body) @@ get_some @@ PTree.get n predmap in + debug "PREDS: %a\n" print_intlist preds; + { preds = preds; body = body; head = n; final = final } + +let rec count_loop_headers is_loop_header = function + | [] -> 0 + | n :: ln -> + let rem = count_loop_headers is_loop_header ln in + if (get_some @@ PTree.get n is_loop_header) then rem + 1 else rem + +let get_inner_loops f code is_loop_header = + let predmap = get_predecessors_rtl code in + let iloops = ref [] in + List.iter (fun (n, ilh) -> if ilh then begin + let iloop = get_natural_loop code predmap n in + let nb_headers = count_loop_headers is_loop_header iloop.body in + if nb_headers == 1 then (* innermost loop *) + iloops := iloop :: !iloops end + ) (PTree.elements is_loop_header); + !iloops + *) + let get_inner_loops f code is_loop_header = let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in @@ -674,17 +709,16 @@ let get_inner_loops f code is_loop_header = assert (List.length heads == 1); List.hd heads end in - let final = (* the predecessors from head that are in the body *) + let finals = (* the predecessors from head that are in the body *) let head_preds = ptree_get_some head predmap in let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in begin debug "HEAD: %d\n" (P.to_int head); debug "BODY: %a\n" print_pset body; debug "HEADPREDS: %a\n" print_intlist head_preds; - assert (List.length filtered == 1); - List.hd filtered + filtered end in - { preds = preds; body = body; head = head; final = final } + { preds = preds; body = (HashedSet.PSet.elements body); head = head; finals = finals } ) (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction * We remove those to get just the inner loops *) @@ -710,6 +744,8 @@ let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap let apply_map fw n = P.of_int @@ ptree_get_some n fw +let apply_map_list fw ln = List.map (apply_map fw) ln + let apply_map_opt fw n = match PTree.get n fw with | Some n' -> P.of_int n' @@ -769,7 +805,7 @@ let rec count_ignore_nops code = function * 3) Links the last instruction of body' into the first instruction of body *) let unroll_inner_loop_single code revmap iloop = - let body = HashedSet.PSet.elements (iloop.body) in + let body = iloop.body in if count_ignore_nops code body > !Clflags.option_funrollsingle then begin debug "Too many nodes in the loop body (%d > %d)" (List.length body) !Clflags.option_funrollsingle; (code, revmap) @@ -777,12 +813,12 @@ let unroll_inner_loop_single code revmap iloop = let (code2, revmap2, dupbody, fwmap) = clone code revmap body in let code' = ref code2 in let head' = apply_map fwmap (iloop.head) in - let final' = apply_map fwmap (iloop.final) in + let finals' = apply_map_list fwmap (iloop.finals) in begin debug "PREDS: %a\n" print_intlist iloop.preds; debug "IHEAD: %d\n" (P.to_int iloop.head); code' := change_pointers !code' (iloop.head) head' (iloop.preds); - code' := change_pointers !code' head' (iloop.head) [final']; + code' := change_pointers !code' head' (iloop.head) finals'; (!code', revmap2) end @@ -806,7 +842,7 @@ let unroll_inner_loops_single f code revmap = * 3) Links the last instruction of body' into the first of body *) let unroll_inner_loop_body code revmap iloop = - let body = HashedSet.PSet.elements (iloop.body) in + let body = iloop.body in let limit = !Clflags.option_funrollbody in if count_ignore_nops code body > limit then begin debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit; @@ -815,16 +851,18 @@ let unroll_inner_loop_body code revmap iloop = let (code2, revmap2, dupbody, fwmap) = clone code revmap body in let code' = ref code2 in let head' = apply_map fwmap (iloop.head) in - let final' = apply_map fwmap (iloop.final) in + let finals' = apply_map_list fwmap (iloop.finals) in begin - code' := change_pointers !code' iloop.head head' [iloop.final]; - code' := change_pointers !code' head' iloop.head [final']; + code' := change_pointers !code' iloop.head head' iloop.finals; + code' := change_pointers !code' head' iloop.head finals'; (!code', revmap2) end let unroll_inner_loops_body f code revmap = let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + (* debug_flag := true; *) let inner_loops = get_inner_loops f code is_loop_header in + debug "Number of loops found: %d\n" (List.length inner_loops); let code' = ref code in let revmap' = ref revmap in begin @@ -832,46 +870,102 @@ let unroll_inner_loops_body f code revmap = List.iter (fun iloop -> let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in code' := new_code; revmap' := new_revmap + ) inner_loops; (* debug_flag := false; *) + (!code', !revmap') + end + +let extract_upto_icond f code head = + let rec extract h = + let inst = get_some @@ PTree.get h code in + match inst with + | Icond _ -> [h] + | _ -> ( match rtl_successors inst with + | [n] -> h :: (extract n) + | _ -> failwith "Found a node with more than one successor??" + ) + in List.rev @@ extract head + +let rotate_inner_loop f code revmap iloop = + let header = extract_upto_icond f code iloop.head in + let limit = !Clflags.option_flooprotate in + if count_ignore_nops code header > limit then begin + debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit; + (code, revmap) + end else + let (code2, revmap2, dupheader, fwmap) = clone code revmap header in + let code' = ref code2 in + let head' = apply_map fwmap iloop.head in + begin + code' := change_pointers !code' iloop.head head' iloop.preds; + (!code', revmap2) + end + +let rotate_inner_loops f code revmap = + let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + let inner_loops = get_inner_loops f code is_loop_header in + let code' = ref code in + let revmap' = ref revmap in + begin + print_inner_loops inner_loops; + List.iter (fun iloop -> + let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in + code' := new_code; revmap' := new_revmap ) inner_loops; (!code', !revmap') end -let duplicate_aux f = - (* initializing *) +let loop_rotate f = let entrypoint = f.fn_entrypoint in let code = f.fn_code in let revmap = make_identity_ptree code in + let (code, revmap) = + if !Clflags.option_flooprotate > 0 then + rotate_inner_loops f code revmap + else (code, revmap) in + ((code, entrypoint), revmap) - (* static prediction *) +let static_predict f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in let code = if !Clflags.option_fpredict then update_directions code entrypoint else code in + let code = + if !Clflags.option_fpredict then + invert_iconds code + else code in + ((code, entrypoint), revmap) - (* unroll single *) +let unroll_single f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in let (code, revmap) = if !Clflags.option_funrollsingle > 0 then unroll_inner_loops_single f code revmap else (code, revmap) in + ((code, entrypoint), revmap) - (* unroll body *) +let unroll_body f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in let (code, revmap) = if !Clflags.option_funrollbody > 0 then unroll_inner_loops_body f code revmap else (code, revmap) in + ((code, entrypoint), revmap) - (* static prediction bis *) - let code = - if !Clflags.option_fpredict then - invert_iconds code - else code in - - (* tail duplication *) +let tail_duplicate f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in let (code, revmap) = if !Clflags.option_ftailduplicate > 0 then let traces = select_traces code entrypoint in let preds = get_predecessors_rtl code in superblockify_traces code preds traces revmap else (code, revmap) in - ((code, entrypoint), revmap) |