diff options
-rw-r--r-- | backend/Duplicateaux.ml | 50 | ||||
-rw-r--r-- | backend/Duplicatepasses.v | 12 | ||||
-rw-r--r-- | driver/Clflags.ml | 1 | ||||
-rw-r--r-- | driver/Driver.ml | 3 | ||||
-rw-r--r-- | tools/compiler_expand.ml | 2 |
5 files changed, 67 insertions, 1 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index 2b13ab5d..fac0ba76 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -836,6 +836,56 @@ let unroll_inner_loops_body f code revmap = (!code', !revmap') end +let extract_upto_icond f code head = + let rec extract h = + let inst = get_some @@ PTree.get h code in + match inst with + | Icond _ -> [h] + | _ -> ( match rtl_successors inst with + | [n] -> h :: (extract n) + | _ -> failwith "Found a node with more than one successor??" + ) + in List.rev @@ extract head + +let rotate_inner_loop f code revmap iloop = + let header = extract_upto_icond f code iloop.head in + let limit = !Clflags.option_flooprotate in + if count_ignore_nops code header > limit then begin + debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit; + (code, revmap) + end else + let (code2, revmap2, dupheader, fwmap) = clone code revmap header in + let code' = ref code2 in + let head' = apply_map fwmap iloop.head in + begin + code' := change_pointers !code' iloop.head head' iloop.preds; + (!code', revmap2) + end + +let rotate_inner_loops f code revmap = + let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + let inner_loops = get_inner_loops f code is_loop_header in + let code' = ref code in + let revmap' = ref revmap in + begin + print_inner_loops inner_loops; + List.iter (fun iloop -> + let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in + code' := new_code; revmap' := new_revmap + ) inner_loops; + (!code', !revmap') + end + +let loop_rotate f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in + let (code, revmap) = + if !Clflags.option_flooprotate > 0 then + rotate_inner_loops f code revmap + else (code, revmap) in + ((code, entrypoint), revmap) + let static_predict f = let entrypoint = f.fn_entrypoint in let code = f.fn_code in diff --git a/backend/Duplicatepasses.v b/backend/Duplicatepasses.v index dc96f966..7e58eedf 100644 --- a/backend/Duplicatepasses.v +++ b/backend/Duplicatepasses.v @@ -45,4 +45,14 @@ End TailDuplicateOracle. Module Tailduplicateproof := DuplicateProof TailDuplicateOracle. -Module Tailduplicate := Tailduplicateproof.
\ No newline at end of file +Module Tailduplicate := Tailduplicateproof. + +(** Loop Rotate *) + +Module LoopRotateOracle <: DuplicateOracle. + Axiom duplicate_aux : function -> code * node * (PTree.t node). + Extract Constant duplicate_aux => "Duplicateaux.loop_rotate". +End LoopRotateOracle. + +Module Looprotateproof := DuplicateProof LoopRotateOracle. +Module Looprotate := Looprotateproof. diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 89090b57..d1e7dd7f 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -43,6 +43,7 @@ let option_ftailduplicate = ref 0 (* perform tail duplication for blocks of size let option_ftracelinearize = ref true (* uses branch prediction information to improve the linearization *) let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *) let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *) +let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *) let option_fpostpass = ref true let option_fpostpass_sched = ref "list" diff --git a/driver/Driver.ml b/driver/Driver.ml index 27193ff1..d93578b6 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -215,6 +215,8 @@ Processing options: -ftracelinearize Uses branch prediction information to improve the Linearize [on] -funrollsingle n Unrolls a single iteration of innermost loops of size n (not counting Inops) [0] -funrollbody n Unrolls once the body of innermost loops of size n (not counting Inops) [0] + -flooprotate n Duplicates the header (condition computation part) of innermost loops to perform a loop rotate [0] + Doesn't duplicate if the size of that header is strictly greater than n -fforward-moves Forward moves after CSE -finline Perform inlining of functions [on] -finline-functions-called-once Integrate functions only required by their @@ -426,6 +428,7 @@ let cmdline_actions = @ f_opt "predict" option_fpredict @ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ] @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] + @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ] @ f_opt "tracelinearize" option_ftracelinearize @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml index c714cdce..4e2dbd6a 100644 --- a/tools/compiler_expand.ml +++ b/tools/compiler_expand.ml @@ -26,6 +26,8 @@ PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE"; PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict"; PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle"; TOTAL, Always, Require, (Some "Renumbering pre constprop"), "Renumber"; +PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate"; +TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber"; PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody"; TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber"; PARTIAL, Always, NoRequire, (Some "Performing tail duplication"), "Tailduplicate"; |