aboutsummaryrefslogtreecommitdiffstats
path: root/backend/Duplicateaux.ml
diff options
context:
space:
mode:
authorCyril SIX <cyril.six@kalray.eu>2020-10-07 16:54:21 +0200
committerCyril SIX <cyril.six@kalray.eu>2020-10-07 16:54:21 +0200
commitd788824fe0ff49095eb44af7aadd88aafeddc38c (patch)
tree5f0a6a5d6b8f6c63c2a2369806b5b7da1652c979 /backend/Duplicateaux.ml
parent3410d085513f045e2215419da85dccd3cc88779a (diff)
downloadcompcert-kvx-d788824fe0ff49095eb44af7aadd88aafeddc38c.tar.gz
compcert-kvx-d788824fe0ff49095eb44af7aadd88aafeddc38c.zip
[EXP] First draft of 1st iteration unrolling
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r--backend/Duplicateaux.ml175
1 files changed, 102 insertions, 73 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index b54ac5dc..6b1fc43a 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -396,6 +396,20 @@ let print_traces oc traces =
Printf.fprintf oc "Traces: {%a}\n" f traces
end
+(* Adapted from backend/PrintRTL.ml: print_function *)
+let print_code code = let open PrintRTL in let open Printf in
+ if (!debug_flag) then begin
+ fprintf stdout "{\n";
+ let instrs =
+ List.sort
+ (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
+ (List.rev_map
+ (fun (pc, i) -> (P.to_int pc, i))
+ (PTree.elements code)) in
+ List.iter (print_instruction stdout) instrs;
+ fprintf stdout "}"
+ end
+
(* Dumb (but linear) trace selection *)
let select_traces_linear code entrypoint =
let is_visited = ref (PTree.map (fun n i -> false) code) in
@@ -489,18 +503,18 @@ let rec change_pointers code n n' = function
| [] -> code
| node :: nodes ->
let new_pred_inst = match ptree_get_some node code with
- | Icall(a, b, c, d, n0) -> assert (n0 == n); Icall(a, b, c, d, n')
- | Ibuiltin(a, b, c, n0) -> assert (n0 == n); Ibuiltin(a, b, c, n')
- | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e == n) ln);
- Ijumptable(a, List.map (fun e -> if (e == n) then n' else e) ln)
- | Icond(a, b, n1, n2, i) -> assert (n1 == n || n2 == n);
- let n1' = if (n1 == n) then n' else n1
- in let n2' = if (n2 == n) then n' else n2
+ | Icall(a, b, c, d, n0) -> assert (n0 = n); Icall(a, b, c, d, n')
+ | Ibuiltin(a, b, c, n0) -> assert (n0 = n); Ibuiltin(a, b, c, n')
+ | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e = n) ln);
+ Ijumptable(a, List.map (fun e -> if (e = n) then n' else e) ln)
+ | Icond(a, b, n1, n2, i) -> assert (n1 = n || n2 = n);
+ let n1' = if (n1 = n) then n' else n1
+ in let n2' = if (n2 = n) then n' else n2
in Icond(a, b, n1', n2', i)
- | Inop n0 -> assert (n0 == n); Inop n'
- | Iop (a, b, c, n0) -> assert (n0 == n); Iop (a, b, c, n')
- | Iload (a, b, c, d, e, n0) -> assert (n0 == n); Iload (a, b, c, d, e, n')
- | Istore (a, b, c, d, n0) -> assert (n0 == n); Istore (a, b, c, d, n')
+ | Inop n0 -> assert (n0 = n); Inop n'
+ | Iop (a, b, c, n0) -> assert (n0 = n); Iop (a, b, c, n')
+ | Iload (a, b, c, d, e, n0) -> assert (n0 = n); Iload (a, b, c, d, e, n')
+ | Istore (a, b, c, d, n0) -> assert (n0 = n); Istore (a, b, c, d, n')
| Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor"
in let new_code = PTree.set node new_pred_inst code
in change_pointers new_code n n' nodes
@@ -611,6 +625,8 @@ let rec invert_iconds code = function
type innerLoop = {
preds: P.t list;
body: HashedSet.PSet.t;
+ head: P.t; (* head of the loop *)
+ final: P.t (* the final instruction, which loops back to the head *)
}
let print_pset = LICMaux.pp_pset
@@ -636,47 +652,69 @@ let print_ptree printer pt =
debug "]\n"
end
-let get_inner_loops f =
+let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else ()
+
+let get_inner_loops f is_loop_header =
let (_, predmap, loopmap) = LICMaux.inner_loops f in
begin
debug "PREDMAP: "; print_ptree print_intlist predmap;
debug "LOOPMAP: "; print_ptree print_pset loopmap;
- let all_loops = List.map (fun (n, body) ->
+ List.map (fun (n, body) ->
let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p)
@@ get_some @@ PTree.get n predmap in
- { preds = preds; body = body }
- ) (PTree.elements loopmap) in
+ let head = (* the instruction from body which is a loop header *)
+ let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter
+ (fun n -> ptree_get_some n is_loop_header) body in
+ begin
+ assert (List.length heads == 1);
+ List.hd heads
+ end in
+ let final = (* the predecessors from head that are in the body *)
+ let head_preds = ptree_get_some head predmap in
+ let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
+ begin
+ debug "HEAD: %d\n" (P.to_int head);
+ debug "BODY: %a\n" print_pset body;
+ debug "HEADPREDS: %a\n" print_intlist head_preds;
+ assert (List.length filtered == 1);
+ List.hd filtered
+ end in
+ { preds = preds; body = body; head = head; final = final }
+ )
(* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
* We remove those to get just the inner loops *)
- List.filter (fun iloop ->
- let count = List.length @@ HashedSet.PSet.elements iloop.body in count != 1
- ) all_loops
+ @@ List.filter (fun (n, body) ->
+ let count = List.length @@ HashedSet.PSet.elements body in count != 1
+ ) (PTree.elements loopmap)
end
-let generate_fwmap ln ln' =
- let rec f ln ln' fwmap =
- match ln with
- | [] -> begin
- match ln' with
- | [] -> fwmap
- | _ -> failwith "ln and ln' have different lengths"
- end
- | n :: ln -> begin
- match ln' with
- | n' :: ln' -> f ln ln' (PTree.set n n' fwmap)
- | _ -> failwith "ln and ln' have different lengths"
- end
- in f ln ln' PTree.empty
+let rec generate_fwmap ln ln' fwmap =
+ match ln with
+ | [] -> begin
+ match ln' with
+ | [] -> fwmap
+ | _ -> failwith "ln and ln' have different lengths"
+ end
+ | n :: ln -> begin
+ match ln' with
+ | n' :: ln' -> generate_fwmap ln ln' (PTree.set n n' fwmap)
+ | _ -> failwith "ln and ln' have different lengths"
+ end
-let generate_revmap ln ln' = generate_fwmap ln' ln
+let generate_revmap ln ln' revmap = generate_fwmap ln' ln revmap
let apply_map fw n = P.of_int @@ ptree_get_some n fw
+let apply_map_opt fw n =
+ match PTree.get n fw with
+ | Some n' -> P.of_int n'
+ | None -> n
+
let change_nexts fwmap = function
| Icall (a, b, c, d, n) -> Icall (a, b, c, d, apply_map fwmap n)
| Ibuiltin (a, b, c, n) -> Ibuiltin (a, b, c, apply_map fwmap n)
- | Ijumptable (a, ln) -> Ijumptable (a, List.map (apply_map fwmap) ln)
- | Icond (a, b, n1, n2, i) -> Icond (a, b, apply_map fwmap n1, apply_map fwmap n2, i)
+ | Ijumptable (a, ln) -> Ijumptable (a, List.map (apply_map_opt fwmap) ln)
+ | Icond (a, b, n1, n2, i) -> Icond (a, b, apply_map_opt fwmap n1, apply_map_opt fwmap n2, i)
| Inop n -> Inop (apply_map fwmap n)
| Iop (a, b, c, n) -> Iop (a, b, c, apply_map fwmap n)
| Iload (a, b, c, d, e, n) -> Iload (a, b, c, d, e, apply_map fwmap n)
@@ -684,16 +722,6 @@ let change_nexts fwmap = function
| Itailcall (a, b, c) -> Itailcall (a, b, c)
| Ireturn o -> Ireturn o
-(* let change_single_next n' = function
- | Icall (a, b, c, d, n) -> Icall (a, b, c, d, n')
- | Ibuiltin (a, b, c, n) -> Ibuiltin (a, b, c, n')
- | Inop n -> Inop n'
- | Iop (a, b, c, n) -> Iop (a, b, c, n')
- | Iload (a, b, c, d, e, n) -> Iload (a, b, c, d, e, n')
- | Istore (a, b, c, d, n) -> Istore (a, b, c, d, n')
- | _ -> failwith "Not an instruction with single successor"
- *)
-
(** Clone a list of instructions into free pc indexes
*
* The list of instructions should be contiguous, and not include any loop.
@@ -709,20 +737,20 @@ let change_nexts fwmap = function
let clone code revmap ln = begin
assert (List.length ln > 0);
let head' = next_free_pc code in
- let head = P.to_int @@ List.hd ln in
- let ln' = List.map (fun n -> n + (head' - head)) @@ List.map P.to_int ln in
- let fwmap = generate_fwmap ln ln' in
- let revmap' = generate_revmap ln (List.map P.of_int ln') in
+ (* +head' to ensure we never overlap with the existing code *)
+ let ln' = List.map (fun n -> n + head') @@ List.map P.to_int ln in
+ let fwmap = generate_fwmap ln ln' PTree.empty in
+ let revmap' = generate_revmap ln (List.map P.of_int ln') revmap in
let code' = ref code in
List.iter (fun n ->
let instr = get_some @@ PTree.get n code in
let instr' = change_nexts fwmap instr in
code' := PTree.set (apply_map fwmap n) instr' !code'
) ln;
- (!code', revmap', ln')
+ (!code', revmap', ln', fwmap)
end
-(** Unrolls a single interation of the inner loop
+(* Unrolls a single interation of the inner loop
* 1) Clones the body into body'
* 2) Links the preds to the first instruction of body'
* 3) Links the last instruction of body' into the first instruction of body
@@ -732,20 +760,23 @@ end
* Or we need to explicitly label the head and final instructions of the inner loop
*)
let unroll_inner_loop_single code revmap iloop =
- let (code2, revmap2, dupbody, fwmap) = clone code revmap (iloop.body) in
+ let body = HashedSet.PSet.elements (iloop.body) in
+ let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
let code' = ref code2 in
- let first_n = List.hd (iloop.body) in
- let first_n' = List.hd dupbody in
- let last_n' = List.hd @@ List.rev dupbody in
+ let head' = apply_map fwmap (iloop.head) in
+ let final' = apply_map fwmap (iloop.final) in
begin
- code' := change_pointers !code' first_n first_n' (iloop.preds);
- code' := change_pointers !code' first_n' first_n [last_n'];
+ debug "PREDS: %a\n" print_intlist iloop.preds;
+ debug "IHEAD: %d\n" (P.to_int iloop.head);
+ code' := change_pointers !code' (iloop.head) head' (iloop.preds);
+ code' := change_pointers !code' head' (iloop.head) [final'];
(* code' := PTree.set last_n' (change_single_next first_n @@ ptree_get_some last_n' !code') !code' *)
(!code', revmap2)
end
let unroll_inner_loops_single f revmap =
- let inner_loops = get_inner_loops f in
+ let is_loop_header = get_loop_headers (f.fn_code) (f.fn_entrypoint) in
+ let inner_loops = get_inner_loops f is_loop_header in
let code' = ref f.fn_code in
let revmap' = ref revmap in
begin
@@ -758,19 +789,17 @@ let unroll_inner_loops_single f revmap =
end
let duplicate_aux f =
- begin
- let entrypoint = f.fn_entrypoint in
- if !Clflags.option_fduplicate < 0 then
- ((f.fn_code, entrypoint), make_identity_ptree f.fn_code)
+ let entrypoint = f.fn_entrypoint in
+ if !Clflags.option_fduplicate < 0 then
+ ((f.fn_code, entrypoint), make_identity_ptree f.fn_code)
+ else
+ let (code, revmap) = unroll_inner_loops_single f (make_identity_ptree (f.fn_code)) in
+ let code' = update_directions code entrypoint in
+ let traces = select_traces code' entrypoint in
+ let icond_code = invert_iconds code' traces in
+ let preds = get_predecessors_rtl icond_code in
+ if !Clflags.option_fduplicate >= 1 then
+ let (new_code, pTreeId) = superblockify_traces icond_code preds traces revmap in
+ ((new_code, entrypoint), pTreeId)
else
- let (code, revmap) = unroll_inner_loops_single f (make_identity_ptree code) in
- let code = update_directions code entrypoint in
- let traces = select_traces code entrypoint in
- let icond_code = invert_iconds code traces in
- let preds = get_predecessors_rtl icond_code in
- if !Clflags.option_fduplicate >= 1 then
- let (new_code, pTreeId) = superblockify_traces icond_code preds traces revmap in
- ((new_code, f.fn_entrypoint), pTreeId)
- else
- ((icond_code, entrypoint), make_identity_ptree code)
- end
+ ((icond_code, entrypoint), revmap)