diff options
author | Cyril SIX <cyril.six@kalray.eu> | 2020-10-07 16:54:21 +0200 |
---|---|---|
committer | Cyril SIX <cyril.six@kalray.eu> | 2020-10-07 16:54:21 +0200 |
commit | d788824fe0ff49095eb44af7aadd88aafeddc38c (patch) | |
tree | 5f0a6a5d6b8f6c63c2a2369806b5b7da1652c979 /backend/Duplicateaux.ml | |
parent | 3410d085513f045e2215419da85dccd3cc88779a (diff) | |
download | compcert-kvx-d788824fe0ff49095eb44af7aadd88aafeddc38c.tar.gz compcert-kvx-d788824fe0ff49095eb44af7aadd88aafeddc38c.zip |
[EXP] First draft of 1st iteration unrolling
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r-- | backend/Duplicateaux.ml | 175 |
1 files changed, 102 insertions, 73 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index b54ac5dc..6b1fc43a 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -396,6 +396,20 @@ let print_traces oc traces = Printf.fprintf oc "Traces: {%a}\n" f traces end +(* Adapted from backend/PrintRTL.ml: print_function *) +let print_code code = let open PrintRTL in let open Printf in + if (!debug_flag) then begin + fprintf stdout "{\n"; + let instrs = + List.sort + (fun (pc1, _) (pc2, _) -> compare pc2 pc1) + (List.rev_map + (fun (pc, i) -> (P.to_int pc, i)) + (PTree.elements code)) in + List.iter (print_instruction stdout) instrs; + fprintf stdout "}" + end + (* Dumb (but linear) trace selection *) let select_traces_linear code entrypoint = let is_visited = ref (PTree.map (fun n i -> false) code) in @@ -489,18 +503,18 @@ let rec change_pointers code n n' = function | [] -> code | node :: nodes -> let new_pred_inst = match ptree_get_some node code with - | Icall(a, b, c, d, n0) -> assert (n0 == n); Icall(a, b, c, d, n') - | Ibuiltin(a, b, c, n0) -> assert (n0 == n); Ibuiltin(a, b, c, n') - | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e == n) ln); - Ijumptable(a, List.map (fun e -> if (e == n) then n' else e) ln) - | Icond(a, b, n1, n2, i) -> assert (n1 == n || n2 == n); - let n1' = if (n1 == n) then n' else n1 - in let n2' = if (n2 == n) then n' else n2 + | Icall(a, b, c, d, n0) -> assert (n0 = n); Icall(a, b, c, d, n') + | Ibuiltin(a, b, c, n0) -> assert (n0 = n); Ibuiltin(a, b, c, n') + | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e = n) ln); + Ijumptable(a, List.map (fun e -> if (e = n) then n' else e) ln) + | Icond(a, b, n1, n2, i) -> assert (n1 = n || n2 = n); + let n1' = if (n1 = n) then n' else n1 + in let n2' = if (n2 = n) then n' else n2 in Icond(a, b, n1', n2', i) - | Inop n0 -> assert (n0 == n); Inop n' - | Iop (a, b, c, n0) -> assert (n0 == n); Iop (a, b, c, n') - | Iload (a, b, c, d, e, n0) -> assert (n0 == n); Iload (a, b, c, d, e, n') - | Istore (a, b, c, d, n0) -> assert (n0 == n); Istore (a, b, c, d, n') + | Inop n0 -> assert (n0 = n); Inop n' + | Iop (a, b, c, n0) -> assert (n0 = n); Iop (a, b, c, n') + | Iload (a, b, c, d, e, n0) -> assert (n0 = n); Iload (a, b, c, d, e, n') + | Istore (a, b, c, d, n0) -> assert (n0 = n); Istore (a, b, c, d, n') | Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor" in let new_code = PTree.set node new_pred_inst code in change_pointers new_code n n' nodes @@ -611,6 +625,8 @@ let rec invert_iconds code = function type innerLoop = { preds: P.t list; body: HashedSet.PSet.t; + head: P.t; (* head of the loop *) + final: P.t (* the final instruction, which loops back to the head *) } let print_pset = LICMaux.pp_pset @@ -636,47 +652,69 @@ let print_ptree printer pt = debug "]\n" end -let get_inner_loops f = +let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else () + +let get_inner_loops f is_loop_header = let (_, predmap, loopmap) = LICMaux.inner_loops f in begin debug "PREDMAP: "; print_ptree print_intlist predmap; debug "LOOPMAP: "; print_ptree print_pset loopmap; - let all_loops = List.map (fun (n, body) -> + List.map (fun (n, body) -> let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) @@ get_some @@ PTree.get n predmap in - { preds = preds; body = body } - ) (PTree.elements loopmap) in + let head = (* the instruction from body which is a loop header *) + let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter + (fun n -> ptree_get_some n is_loop_header) body in + begin + assert (List.length heads == 1); + List.hd heads + end in + let final = (* the predecessors from head that are in the body *) + let head_preds = ptree_get_some head predmap in + let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in + begin + debug "HEAD: %d\n" (P.to_int head); + debug "BODY: %a\n" print_pset body; + debug "HEADPREDS: %a\n" print_intlist head_preds; + assert (List.length filtered == 1); + List.hd filtered + end in + { preds = preds; body = body; head = head; final = final } + ) (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction * We remove those to get just the inner loops *) - List.filter (fun iloop -> - let count = List.length @@ HashedSet.PSet.elements iloop.body in count != 1 - ) all_loops + @@ List.filter (fun (n, body) -> + let count = List.length @@ HashedSet.PSet.elements body in count != 1 + ) (PTree.elements loopmap) end -let generate_fwmap ln ln' = - let rec f ln ln' fwmap = - match ln with - | [] -> begin - match ln' with - | [] -> fwmap - | _ -> failwith "ln and ln' have different lengths" - end - | n :: ln -> begin - match ln' with - | n' :: ln' -> f ln ln' (PTree.set n n' fwmap) - | _ -> failwith "ln and ln' have different lengths" - end - in f ln ln' PTree.empty +let rec generate_fwmap ln ln' fwmap = + match ln with + | [] -> begin + match ln' with + | [] -> fwmap + | _ -> failwith "ln and ln' have different lengths" + end + | n :: ln -> begin + match ln' with + | n' :: ln' -> generate_fwmap ln ln' (PTree.set n n' fwmap) + | _ -> failwith "ln and ln' have different lengths" + end -let generate_revmap ln ln' = generate_fwmap ln' ln +let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap let apply_map fw n = P.of_int @@ ptree_get_some n fw +let apply_map_opt fw n = + match PTree.get n fw with + | Some n' -> P.of_int n' + | None -> n + let change_nexts fwmap = function | Icall (a, b, c, d, n) -> Icall (a, b, c, d, apply_map fwmap n) | Ibuiltin (a, b, c, n) -> Ibuiltin (a, b, c, apply_map fwmap n) - | Ijumptable (a, ln) -> Ijumptable (a, List.map (apply_map fwmap) ln) - | Icond (a, b, n1, n2, i) -> Icond (a, b, apply_map fwmap n1, apply_map fwmap n2, i) + | Ijumptable (a, ln) -> Ijumptable (a, List.map (apply_map_opt fwmap) ln) + | Icond (a, b, n1, n2, i) -> Icond (a, b, apply_map_opt fwmap n1, apply_map_opt fwmap n2, i) | Inop n -> Inop (apply_map fwmap n) | Iop (a, b, c, n) -> Iop (a, b, c, apply_map fwmap n) | Iload (a, b, c, d, e, n) -> Iload (a, b, c, d, e, apply_map fwmap n) @@ -684,16 +722,6 @@ let change_nexts fwmap = function | Itailcall (a, b, c) -> Itailcall (a, b, c) | Ireturn o -> Ireturn o -(* let change_single_next n' = function - | Icall (a, b, c, d, n) -> Icall (a, b, c, d, n') - | Ibuiltin (a, b, c, n) -> Ibuiltin (a, b, c, n') - | Inop n -> Inop n' - | Iop (a, b, c, n) -> Iop (a, b, c, n') - | Iload (a, b, c, d, e, n) -> Iload (a, b, c, d, e, n') - | Istore (a, b, c, d, n) -> Istore (a, b, c, d, n') - | _ -> failwith "Not an instruction with single successor" - *) - (** Clone a list of instructions into free pc indexes * * The list of instructions should be contiguous, and not include any loop. @@ -709,20 +737,20 @@ let change_nexts fwmap = function let clone code revmap ln = begin assert (List.length ln > 0); let head' = next_free_pc code in - let head = P.to_int @@ List.hd ln in - let ln' = List.map (fun n -> n + (head' - head)) @@ List.map P.to_int ln in - let fwmap = generate_fwmap ln ln' in - let revmap' = generate_revmap ln (List.map P.of_int ln') in + (* +head' to ensure we never overlap with the existing code *) + let ln' = List.map (fun n -> n + head') @@ List.map P.to_int ln in + let fwmap = generate_fwmap ln ln' PTree.empty in + let revmap' = generate_revmap ln (List.map P.of_int ln') revmap in let code' = ref code in List.iter (fun n -> let instr = get_some @@ PTree.get n code in let instr' = change_nexts fwmap instr in code' := PTree.set (apply_map fwmap n) instr' !code' ) ln; - (!code', revmap', ln') + (!code', revmap', ln', fwmap) end -(** Unrolls a single interation of the inner loop +(* Unrolls a single interation of the inner loop * 1) Clones the body into body' * 2) Links the preds to the first instruction of body' * 3) Links the last instruction of body' into the first instruction of body @@ -732,20 +760,23 @@ end * Or we need to explicitly label the head and final instructions of the inner loop *) let unroll_inner_loop_single code revmap iloop = - let (code2, revmap2, dupbody, fwmap) = clone code revmap (iloop.body) in + let body = HashedSet.PSet.elements (iloop.body) in + let (code2, revmap2, dupbody, fwmap) = clone code revmap body in let code' = ref code2 in - let first_n = List.hd (iloop.body) in - let first_n' = List.hd dupbody in - let last_n' = List.hd @@ List.rev dupbody in + let head' = apply_map fwmap (iloop.head) in + let final' = apply_map fwmap (iloop.final) in begin - code' := change_pointers !code' first_n first_n' (iloop.preds); - code' := change_pointers !code' first_n' first_n [last_n']; + debug "PREDS: %a\n" print_intlist iloop.preds; + debug "IHEAD: %d\n" (P.to_int iloop.head); + code' := change_pointers !code' (iloop.head) head' (iloop.preds); + code' := change_pointers !code' head' (iloop.head) [final']; (* code' := PTree.set last_n' (change_single_next first_n @@ ptree_get_some last_n' !code') !code' *) (!code', revmap2) end let unroll_inner_loops_single f revmap = - let inner_loops = get_inner_loops f in + let is_loop_header = get_loop_headers (f.fn_code) (f.fn_entrypoint) in + let inner_loops = get_inner_loops f is_loop_header in let code' = ref f.fn_code in let revmap' = ref revmap in begin @@ -758,19 +789,17 @@ let unroll_inner_loops_single f revmap = end let duplicate_aux f = - begin - let entrypoint = f.fn_entrypoint in - if !Clflags.option_fduplicate < 0 then - ((f.fn_code, entrypoint), make_identity_ptree f.fn_code) + let entrypoint = f.fn_entrypoint in + if !Clflags.option_fduplicate < 0 then + ((f.fn_code, entrypoint), make_identity_ptree f.fn_code) + else + let (code, revmap) = unroll_inner_loops_single f (make_identity_ptree (f.fn_code)) in + let code' = update_directions code entrypoint in + let traces = select_traces code' entrypoint in + let icond_code = invert_iconds code' traces in + let preds = get_predecessors_rtl icond_code in + if !Clflags.option_fduplicate >= 1 then + let (new_code, pTreeId) = superblockify_traces icond_code preds traces revmap in + ((new_code, entrypoint), pTreeId) else - let (code, revmap) = unroll_inner_loops_single f (make_identity_ptree code) in - let code = update_directions code entrypoint in - let traces = select_traces code entrypoint in - let icond_code = invert_iconds code traces in - let preds = get_predecessors_rtl icond_code in - if !Clflags.option_fduplicate >= 1 then - let (new_code, pTreeId) = superblockify_traces icond_code preds traces revmap in - ((new_code, f.fn_entrypoint), pTreeId) - else - ((icond_code, entrypoint), make_identity_ptree code) - end + ((icond_code, entrypoint), revmap) |