aboutsummaryrefslogtreecommitdiffstats
path: root/backend/Duplicateaux.ml
diff options
context:
space:
mode:
authorCyril SIX <cyril.six@kalray.eu>2020-12-02 15:18:40 +0100
committerCyril SIX <cyril.six@kalray.eu>2020-12-02 15:18:40 +0100
commited8aec9f9fb259fe218d2d29efa3c8cc85ef93f1 (patch)
tree8539bd273941f134394e1f195621991b020cb6f0 /backend/Duplicateaux.ml
parent73729e86f09b81e397fe40b31fdc8b0bdf3c164e (diff)
downloadcompcert-kvx-ed8aec9f9fb259fe218d2d29efa3c8cc85ef93f1.tar.gz
compcert-kvx-ed8aec9f9fb259fe218d2d29efa3c8cc85ef93f1.zip
[expensive] Behavior change when the loop has two final instructions
Right now though the compilation time is too high for glpk, I need to figure out why
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r--backend/Duplicateaux.ml66
1 files changed, 57 insertions, 9 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index a9dca583..55414f40 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -619,15 +619,28 @@ type innerLoop = {
preds: P.t list;
body: P.t list;
head: P.t; (* head of the loop *)
- finals: P.t list (* the final instructions, which loops back to the head *)
+ finals: P.t list; (* the final instructions, which loops back to the head *)
(* There may be more than one ; for instance if there is an if inside the loop with both
- * branches leading to a goto backedge *)
+ * branches leading to a goto backedge
+ * Such cases usually happen after a tail-duplication *)
+ sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
}
let print_pset = LICMaux.pp_pset
+let print_option_pint oc o =
+ if !debug_flag then
+ match o with
+ | None -> Printf.fprintf oc "None"
+ | Some n -> Printf.fprintf oc "Some %d" (P.to_int n)
+
let print_inner_loop iloop =
- debug "{preds: %a, body: %a}" print_intlist iloop.preds print_intlist iloop.body
+ debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n"
+ print_intlist iloop.preds
+ print_intlist iloop.body
+ (P.to_int iloop.head)
+ print_intlist iloop.finals
+ print_option_pint iloop.sb_final
let rec print_inner_loops = function
| [] -> ()
@@ -696,6 +709,31 @@ let get_inner_loops f code is_loop_header =
!iloops
*)
+let rtl_successors_pref = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,p) -> (match p with
+ | Some true -> [n1]
+ | Some false -> [n2]
+ | None -> [n1; n2])
+| Ijumptable (_,ln) -> ln
+
+(* Find the last node of a trace (starting at "node"), until a loop is encountered.
+ * If a non-predicted branch is encountered, returns None *)
+let rec find_last_node_before_loop code node trace is_loop_header =
+ let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
+ let headers = List.filter (fun n ->
+ get_some @@ PTree.get n is_loop_header && List.mem n trace) rtl_succ in
+ match headers with
+ | [] -> (
+ match (rtl_successors_pref @@ get_some @@ PTree.get node code) with
+ | [n] -> find_last_node_before_loop code n trace is_loop_header
+ | _ -> None
+ )
+ | [h] -> Some node
+ | _ -> failwith "Multiple branches leading to a loop"
+
let get_inner_loops f code is_loop_header =
let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params;
fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
@@ -722,7 +760,10 @@ let get_inner_loops f code is_loop_header =
debug "HEADPREDS: %a\n" print_intlist head_preds;
filtered
end in
- { preds = preds; body = (HashedSet.PSet.elements body); head = head; finals = finals }
+ let body = HashedSet.PSet.elements body in
+ let sb_final = find_last_node_before_loop code head body is_loop_header in
+ { preds = preds; body = body; head = head; finals = finals;
+ sb_final = sb_final; }
)
(* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
* We remove those to get just the inner loops *)
@@ -840,16 +881,23 @@ let unroll_inner_loops_single f code revmap =
(!code', !revmap')
end
+let is_some o = match o with Some _ -> true | None -> false
+
(* Unrolls the body of the inner loop once - duplicating the exit condition as well
* 1) Clones body into body'
- * 2) Links the last instruction of body into the first of body'
+ * 2) Links the last instruction of body (sb_final) into the first of body'
* 3) Links the last instruction of body' into the first of body
*)
let unroll_inner_loop_body code revmap iloop =
+ debug_flag := true;
+ debug "iloop = "; print_inner_loop iloop;
let body = iloop.body in
let limit = !Clflags.option_funrollbody in
if count_ignore_nops code body > limit then begin
- debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit;
+ debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit; debug_flag := false;
+ (code, revmap)
+ end else if not @@ is_some iloop.sb_final then begin
+ debug "The loop body does not form a superblock"; debug_flag := false;
(code, revmap)
end else
let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
@@ -857,14 +905,14 @@ let unroll_inner_loop_body code revmap iloop =
let head' = apply_map fwmap (iloop.head) in
let finals' = apply_map_list fwmap (iloop.finals) in
begin
- code' := change_pointers !code' iloop.head head' iloop.finals;
+ code' := change_pointers !code' iloop.head head' [get_some @@ iloop.sb_final];
code' := change_pointers !code' head' iloop.head finals';
+ debug_flag := false;
(!code', revmap2)
end
let unroll_inner_loops_body f code revmap =
let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
- (* debug_flag := true; *)
let inner_loops = get_inner_loops f code is_loop_header in
debug "Number of loops found: %d\n" (List.length inner_loops);
let code' = ref code in
@@ -874,7 +922,7 @@ let unroll_inner_loops_body f code revmap =
List.iter (fun iloop ->
let (new_code, new_revmap) = unroll_inner_loop_body !code' !revmap' iloop in
code' := new_code; revmap' := new_revmap
- ) inner_loops; (* debug_flag := false; *)
+ ) inner_loops;
(!code', !revmap')
end