aboutsummaryrefslogtreecommitdiffstats
path: root/backend/Duplicateaux.ml
diff options
context:
space:
mode:
authorCyril SIX <cyril.six@kalray.eu>2020-12-04 12:00:26 +0100
committerCyril SIX <cyril.six@kalray.eu>2020-12-04 12:00:26 +0100
commit718a7da96aa18c278cde43fbc77a50135cd71e94 (patch)
treea74c97be2d6a65f185b4042576e7379fb1da2a63 /backend/Duplicateaux.ml
parent2bbb2af733b98913a66a196cd985fef61b4eb594 (diff)
downloadcompcert-kvx-718a7da96aa18c278cde43fbc77a50135cd71e94.tar.gz
compcert-kvx-718a7da96aa18c278cde43fbc77a50135cd71e94.zip
Less aggressive tail duplication
In some cases of two imbricated loops, we would tail-duplicate too much, because of the input trace traversing both loop headers.
Diffstat (limited to 'backend/Duplicateaux.ml')
-rw-r--r--backend/Duplicateaux.ml17
1 files changed, 11 insertions, 6 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 3121f0fa..196d15f2 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -547,7 +547,7 @@ let is_a_nop code n =
* preds: mapping node -> predecessors
* ptree: the revmap
* trace: the trace to follow tail duplication on *)
-let tail_duplicate code preds ptree trace =
+let tail_duplicate code preds is_loop_header ptree trace =
debug "Tail_duplicate on that trace: %a\n" print_trace trace;
(* next_int: unused integer that can be used for the next duplication *)
let next_int = ref (next_free_pc code)
@@ -563,8 +563,12 @@ let tail_duplicate code preds ptree trace =
if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *)
else
let node_preds = ptree_get_some n preds
- in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds
- in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast
+ in let node_preds_nolast =
+ (* We traverse loop headers without initiating tail duplication
+ * (see case of two imbricated loops) *)
+ if (get_some @@ PTree.get n is_loop_header) then []
+ else List.filter (fun e -> e <> get_some !last_node) node_preds
+ (* in let node_preds_nolast = List.filter (fun e -> not @@ List.mem e t) node_preds_nolast *)
in let final_node_preds = match !last_duplicate with
| None -> node_preds_nolast
| Some n' -> n' :: node_preds_nolast
@@ -585,12 +589,12 @@ let tail_duplicate code preds ptree trace =
in let new_code, new_ptree = f code ptree true trace
in (new_code, new_ptree, !nb_duplicated)
-let superblockify_traces code preds traces ptree =
+let superblockify_traces code preds is_loop_header traces ptree =
let max_nb_duplicated = !Clflags.option_ftailduplicate (* FIXME - should be architecture dependent *)
in let rec f code ptree = function
| [] -> (code, ptree, 0)
| trace :: traces ->
- let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace
+ let new_code, new_ptree, nb_duplicated = tail_duplicate code preds is_loop_header ptree trace
in if (nb_duplicated < max_nb_duplicated)
then (debug "End duplication\n"; f new_code new_ptree traces)
else (debug "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
@@ -1019,6 +1023,7 @@ let tail_duplicate f =
if !Clflags.option_ftailduplicate > 0 then
let traces = select_traces code entrypoint in
let preds = get_predecessors_rtl code in
- superblockify_traces code preds traces revmap
+ let is_loop_header = get_loop_headers code entrypoint in
+ superblockify_traces code preds is_loop_header traces revmap
else (code, revmap) in
((code, entrypoint), revmap)