diff options
author | Cyril SIX <cyril.six@kalray.eu> | 2020-12-11 16:04:21 +0100 |
---|---|---|
committer | Cyril SIX <cyril.six@kalray.eu> | 2020-12-11 16:05:29 +0100 |
commit | 3b2ca46523f3fd730c9009aecbc575cc2abb15ca (patch) | |
tree | 3d7abe1bdfd4acebbe6f8472c6006b8518167342 /backend/Duplicateaux.ml | |
parent | 4e14e7e5369d64e9b2853731d8adeff20881bd1f (diff) | |
download | compcert-kvx-3b2ca46523f3fd730c9009aecbc575cc2abb15ca.tar.gz compcert-kvx-3b2ca46523f3fd730c9009aecbc575cc2abb15ca.zip |
Fixing wrong predictions on imbricated loops
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r-- | backend/Duplicateaux.ml | 218 |
1 files changed, 114 insertions, 104 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index a58ab2c2..861df3cd 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -199,23 +199,124 @@ let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header = | Some b -> Some b end +(** Innermost loop detection *) + +type innerLoop = { + preds: P.t list; + body: P.t list; + head: P.t; (* head of the loop *) + finals: P.t list; (* the final instructions, which loops back to the head *) + (* There may be more than one ; for instance if there is an if inside the loop with both + * branches leading to a goto backedge + * Such cases usually happen after a tail-duplication *) + sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *) + (* may be None if we predict that we do not loop *) +} + +let print_pset = LICMaux.pp_pset + +let print_ptree printer pt = + let elements = PTree.elements pt in + begin + debug "[\n"; + List.iter (fun (n, elt) -> + debug "\t%d: %a\n" (P.to_int n) printer elt + ) elements; + debug "]\n" + end + +let rtl_successors_pref = function +| Itailcall _ | Ireturn _ -> [] +| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n) +| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n] +| Icond (_,_,n1,n2,p) -> (match p with + | Some true -> [n1] + | Some false -> [n2] + | None -> [n1; n2]) +| Ijumptable (_,ln) -> ln + +(* Find the last node of a trace (starting at "node"), until a loop is encountered. + * If a non-predicted branch is encountered, returns None *) +let rec find_last_node_before_loop code node trace is_loop_header = + let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in + let headers = List.filter (fun n -> + get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in + match headers with + | [] -> ( + let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in + match next_nodes with + | [n] -> ( + (* To prevent getting out of the superblock and loop infinitely when the prediction is false *) + if HashedSet.PSet.contains trace n then + find_last_node_before_loop code n trace is_loop_header + else None + ) + | _ -> None (* May happen when we predict that a loop is not taken *) + ) + | [h] -> Some node + | _ -> failwith "Multiple branches leading to a loop" + +(* The computation of sb_final requires to already have branch prediction *) +let get_inner_loops f code is_loop_header = + let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; + fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in + let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in + begin + debug "PREDMAP: "; print_ptree print_intlist predmap; + debug "LOOPMAP: "; print_ptree print_pset loopmap; + List.map (fun (n, body) -> + let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) + @@ get_some @@ PTree.get n predmap in + let head = (* the instruction from body which is a loop header *) + let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter + (fun n -> ptree_get_some n is_loop_header) body in + begin + assert (List.length heads == 1); + List.hd heads + end in + let finals = (* the predecessors from head that are in the body *) + let head_preds = ptree_get_some head predmap in + let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in + begin + debug "HEAD: %d\n" (P.to_int head); + debug "BODY: %a\n" print_pset body; + debug "HEADPREDS: %a\n" print_intlist head_preds; + filtered + end in + let sb_final = find_last_node_before_loop code head body is_loop_header in + let body = HashedSet.PSet.elements body in + { preds = preds; body = body; head = head; finals = finals; + sb_final = sb_final; } + ) + (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction + * We remove those to get just the inner loops *) + @@ List.filter (fun (n, body) -> + let count = List.length @@ HashedSet.PSet.elements body in count != 1 + ) (PTree.elements loopmap) + end + + (* Returns a PTree of either None or Some b where b determines the node following the loop, for a cb instruction *) (* It uses the fact that loops in CompCert are done by a branch (backedge) instruction followed by a cb *) -let get_loop_info is_loop_header bfs_order code = +let get_loop_info f is_loop_header bfs_order code = debug "GET LOOP INFO\n"; debug "==================================\n"; let loop_info = ref (PTree.map (fun n i -> None) code) in - let mark_path n = + let mark_path n lbody = let cb_info = ref PTree.empty in let visited = ref (PTree.map (fun n i -> false) code) in (* Returns true if there is a path from src to dest (not involving jumptables) *) (* Mark nodes as visited along the way *) let explore src dest = + debug "Trying to dive a path from %d to %d\n" (P.to_int src) (P.to_int dest); (* Memoizing the results to avoid exponential blow-up *) let memory = ref PTree.empty in let rec explore_rec src = - if src == dest then true - else if (get_some @@ PTree.get src !visited) then false + debug "explore_rec %d vs %d... " (P.to_int src) (P.to_int dest); + if (P.to_int src) == (P.to_int dest) then (debug "FOUND\n"; true) + else if (get_some @@ PTree.get src !visited) then (debug "VISITED... :( \n"; false) + (* if we went out of the innermost loop *) + else if (not @@ List.mem src lbody) then (debug "Out of innermost...\n"; false) else begin let inst = get_some @@ PTree.get src code in visited := PTree.set src true !visited; @@ -301,18 +402,20 @@ let get_loop_info is_loop_header bfs_order code = (* | Some _ -> ( debug "already loop info there\n" ) FIXME - we don't know yet whether a branch to a loop head is a backedge or not *) ) end - in begin - List.iter mark_path @@ List.filter (fun n -> get_some @@ PTree.get n is_loop_header) bfs_order; + in let iloops = get_inner_loops f code is_loop_header in + begin + List.iter (fun il -> mark_path il.head il.body) iloops; + (* List.iter mark_path @@ List.filter (fun n -> get_some @@ PTree.get n is_loop_header) bfs_order; *) debug "==================================\n"; !loop_info end (* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *) -let get_directions code entrypoint = begin +let get_directions f code entrypoint = begin debug "get_directions\n"; let bfs_order = bfs code entrypoint in let is_loop_header = get_loop_headers code entrypoint in - let loop_info = get_loop_info is_loop_header bfs_order code in + let loop_info = get_loop_info f is_loop_header bfs_order code in let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *) begin (* ptree_printbool is_loop_header; *) @@ -362,9 +465,9 @@ let rec update_direction_rec directions = function in PTree.set n (update_direction direction i) (update_direction_rec directions lm) (* Uses branch prediction to write prediction annotations in Icond *) -let update_directions code entrypoint = begin +let update_directions f code entrypoint = begin debug "Update_directions\n"; - let directions = get_directions code entrypoint + let directions = get_directions f code entrypoint in begin (* debug "Ifso directions: "; ptree_printbool directions; @@ -667,99 +770,6 @@ let invert_iconds code = * cyclic dependencies between LICMaux and Duplicateaux *) -type innerLoop = { - preds: P.t list; - body: P.t list; - head: P.t; (* head of the loop *) - finals: P.t list; (* the final instructions, which loops back to the head *) - (* There may be more than one ; for instance if there is an if inside the loop with both - * branches leading to a goto backedge - * Such cases usually happen after a tail-duplication *) - sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *) - (* may be None if we predict that we do not loop *) -} - -let print_pset = LICMaux.pp_pset - -let print_ptree printer pt = - let elements = PTree.elements pt in - begin - debug "[\n"; - List.iter (fun (n, elt) -> - debug "\t%d: %a\n" (P.to_int n) printer elt - ) elements; - debug "]\n" - end - -let rtl_successors_pref = function -| Itailcall _ | Ireturn _ -> [] -| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n) -| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n] -| Icond (_,_,n1,n2,p) -> (match p with - | Some true -> [n1] - | Some false -> [n2] - | None -> [n1; n2]) -| Ijumptable (_,ln) -> ln - -(* Find the last node of a trace (starting at "node"), until a loop is encountered. - * If a non-predicted branch is encountered, returns None *) -let rec find_last_node_before_loop code node trace is_loop_header = - let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in - let headers = List.filter (fun n -> - get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in - match headers with - | [] -> ( - let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in - match next_nodes with - | [n] -> ( - (* To prevent getting out of the superblock and loop infinitely when the prediction is false *) - if HashedSet.PSet.contains trace n then - find_last_node_before_loop code n trace is_loop_header - else None - ) - | _ -> None (* May happen when we predict that a loop is not taken *) - ) - | [h] -> Some node - | _ -> failwith "Multiple branches leading to a loop" - -(* The computation of sb_final requires to already have branch prediction *) -let get_inner_loops f code is_loop_header = - let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; - fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in - let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in - begin - debug "PREDMAP: "; print_ptree print_intlist predmap; - debug "LOOPMAP: "; print_ptree print_pset loopmap; - List.map (fun (n, body) -> - let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) - @@ get_some @@ PTree.get n predmap in - let head = (* the instruction from body which is a loop header *) - let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter - (fun n -> ptree_get_some n is_loop_header) body in - begin - assert (List.length heads == 1); - List.hd heads - end in - let finals = (* the predecessors from head that are in the body *) - let head_preds = ptree_get_some head predmap in - let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in - begin - debug "HEAD: %d\n" (P.to_int head); - debug "BODY: %a\n" print_pset body; - debug "HEADPREDS: %a\n" print_intlist head_preds; - filtered - end in - let sb_final = find_last_node_before_loop code head body is_loop_header in - let body = HashedSet.PSet.elements body in - { preds = preds; body = body; head = head; finals = finals; - sb_final = sb_final; } - ) - (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction - * We remove those to get just the inner loops *) - @@ List.filter (fun (n, body) -> - let count = List.length @@ HashedSet.PSet.elements body in count != 1 - ) (PTree.elements loopmap) - end let print_option_pint oc o = @@ -1055,7 +1065,7 @@ let static_predict f = let revmap = make_identity_ptree code in let code = if !Clflags.option_fpredict then - update_directions code entrypoint + update_directions f code entrypoint else code in let code = if !Clflags.option_fpredict then |