From 718a7da96aa18c278cde43fbc77a50135cd71e94 Mon Sep 17 00:00:00 2001 From: Cyril SIX Date: Fri, 4 Dec 2020 12:00:26 +0100 Subject: Less aggressive tail duplication In some cases of two imbricated loops, we would tail-duplicate too much, because of the input trace traversing both loop headers. --- backend/Duplicateaux.ml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'backend/Duplicateaux.ml') diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index 3121f0fa..196d15f2 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -547,7 +547,7 @@ let is_a_nop code n = * preds: mapping node -> predecessors * ptree: the revmap * trace: the trace to follow tail duplication on *) -let tail_duplicate code preds ptree trace = +let tail_duplicate code preds is_loop_header ptree trace = debug "Tail_duplicate on that trace: %a\n" print_trace trace; (* next_int: unused integer that can be used for the next duplication *) let next_int = ref (next_free_pc code) @@ -563,8 +563,12 @@ let tail_duplicate code preds ptree trace = if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *) else let node_preds = ptree_get_some n preds - in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds - in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast + in let node_preds_nolast = + (* We traverse loop headers without initiating tail duplication + * (see case of two imbricated loops) *) + if (get_some @@ PTree.get n is_loop_header) then [] + else List.filter (fun e -> e <> get_some !last_node) node_preds + (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *) in let final_node_preds = match !last_duplicate with | None -> node_preds_nolast | Some n' -> n' :: node_preds_nolast @@ -585,12 +589,12 @@ let tail_duplicate code preds ptree trace = in let new_code, new_ptree = f code ptree true trace in (new_code, new_ptree, !nb_duplicated) -let superblockify_traces code preds traces ptree = +let superblockify_traces code preds is_loop_header traces ptree = let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *) in let rec f code ptree = function | [] -> (code, ptree, 0) | trace :: traces -> - let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace + let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace in if (nb_duplicated < max_nb_duplicated) then (debug "End duplication\n"; f new_code new_ptree traces) else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0)) @@ -1019,6 +1023,7 @@ let tail_duplicate f = if !Clflags.option_ftailduplicate > 0 then let traces = select_traces code entrypoint in let preds = get_predecessors_rtl code in - superblockify_traces code preds traces revmap + let is_loop_header = get_loop_headers code entrypoint in + superblockify_traces code preds is_loop_header traces revmap else (code, revmap) in ((code, entrypoint), revmap) -- cgit