aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCyril SIX <cyril.six@kalray.eu>2020-11-03 15:06:45 +0100
committerCyril SIX <cyril.six@kalray.eu>2020-11-03 15:06:45 +0100
commit535a8f8706de231f1bd8a7f0243025d84906b03c (patch)
tree5a4def9024c63d318de6b716bab83523b983961a
parente6612fdfd69037099037def2acba5df553c3b49a (diff)
downloadcompcert-kvx-535a8f8706de231f1bd8a7f0243025d84906b03c.tar.gz
compcert-kvx-535a8f8706de231f1bd8a7f0243025d84906b03c.zip
Loop Rotate with -flooprotate
-rw-r--r--backend/Duplicateaux.ml50
-rw-r--r--backend/Duplicatepasses.v12
-rw-r--r--driver/Clflags.ml1
-rw-r--r--driver/Driver.ml3
-rw-r--r--tools/compiler_expand.ml2
5 files changed, 67 insertions, 1 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 2b13ab5d..fac0ba76 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -836,6 +836,56 @@ let unroll_inner_loops_body f code revmap =
(!code', !revmap')
end
+let extract_upto_icond f code head =
+ let rec extract h =
+ let inst = get_some @@ PTree.get h code in
+ match inst with
+ | Icond _ -> [h]
+ | _ -> ( match rtl_successors inst with
+ | [n] -> h :: (extract n)
+ | _ -> failwith "Found a node with more than one successor??"
+ )
+ in List.rev @@ extract head
+
+let rotate_inner_loop f code revmap iloop =
+ let header = extract_upto_icond f code iloop.head in
+ let limit = !Clflags.option_flooprotate in
+ if count_ignore_nops code header > limit then begin
+ debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit;
+ (code, revmap)
+ end else
+ let (code2, revmap2, dupheader, fwmap) = clone code revmap header in
+ let code' = ref code2 in
+ let head' = apply_map fwmap iloop.head in
+ begin
+ code' := change_pointers !code' iloop.head head' iloop.preds;
+ (!code', revmap2)
+ end
+
+let rotate_inner_loops f code revmap =
+ let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ let inner_loops = get_inner_loops f code is_loop_header in
+ let code' = ref code in
+ let revmap' = ref revmap in
+ begin
+ print_inner_loops inner_loops;
+ List.iter (fun iloop ->
+ let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in
+ code' := new_code; revmap' := new_revmap
+ ) inner_loops;
+ (!code', !revmap')
+ end
+
+let loop_rotate f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
+ let (code, revmap) =
+ if !Clflags.option_flooprotate > 0 then
+ rotate_inner_loops f code revmap
+ else (code, revmap) in
+ ((code, entrypoint), revmap)
+
let static_predict f =
let entrypoint = f.fn_entrypoint in
let code = f.fn_code in
diff --git a/backend/Duplicatepasses.v b/backend/Duplicatepasses.v
index dc96f966..7e58eedf 100644
--- a/backend/Duplicatepasses.v
+++ b/backend/Duplicatepasses.v
@@ -45,4 +45,14 @@ End TailDuplicateOracle.
Module Tailduplicateproof := DuplicateProof TailDuplicateOracle.
-Module Tailduplicate := Tailduplicateproof. \ No newline at end of file
+Module Tailduplicate := Tailduplicateproof.
+
+(** Loop Rotate *)
+
+Module LoopRotateOracle <: DuplicateOracle.
+ Axiom duplicate_aux : function -> code * node * (PTree.t node).
+ Extract Constant duplicate_aux => "Duplicateaux.loop_rotate".
+End LoopRotateOracle.
+
+Module Looprotateproof := DuplicateProof LoopRotateOracle.
+Module Looprotate := Looprotateproof.
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 89090b57..d1e7dd7f 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -43,6 +43,7 @@ let option_ftailduplicate = ref 0 (* perform tail duplication for blocks of size
let option_ftracelinearize = ref true (* uses branch prediction information to improve the linearization *)
let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *)
let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *)
+let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *)
let option_fpostpass = ref true
let option_fpostpass_sched = ref "list"
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 27193ff1..d93578b6 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -215,6 +215,8 @@ Processing options:
-ftracelinearize Uses branch prediction information to improve the Linearize [on]
-funrollsingle n Unrolls a single iteration of innermost loops of size n (not counting Inops) [0]
-funrollbody n Unrolls once the body of innermost loops of size n (not counting Inops) [0]
+ -flooprotate n Duplicates the header (condition computation part) of innermost loops to perform a loop rotate [0]
+ Doesn't duplicate if the size of that header is strictly greater than n
-fforward-moves Forward moves after CSE
-finline Perform inlining of functions [on]
-finline-functions-called-once Integrate functions only required by their
@@ -426,6 +428,7 @@ let cmdline_actions =
@ f_opt "predict" option_fpredict
@ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ]
@ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ]
+ @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ]
@ f_opt "tracelinearize" option_ftracelinearize
@ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
@ f_opt "inline" option_finline
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index c714cdce..4e2dbd6a 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -26,6 +26,8 @@ PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE";
PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict";
PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle";
TOTAL, Always, Require, (Some "Renumbering pre constprop"), "Renumber";
+PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate";
+TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber";
PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody";
TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber";
PARTIAL, Always, NoRequire, (Some "Performing tail duplication"), "Tailduplicate";