diff options
-rw-r--r-- | backend/Duplicateaux.ml | 9 | ||||
-rw-r--r-- | driver/Clflags.ml | 1 | ||||
-rw-r--r-- | driver/Driver.ml | 2 |
3 files changed, 9 insertions, 3 deletions
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index 84dc92ac..eb9f42e0 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -807,8 +807,9 @@ let unroll_inner_loops_single f code revmap = *) let unroll_inner_loop_body code revmap iloop = let body = HashedSet.PSet.elements (iloop.body) in - if count_ignore_nops code body > 1000 then begin (* FIXME *) - debug "Too many nodes in the loop body (%d > %d)" (List.length body) 1000; + let limit = !Clflags.option_funrollbody in + if count_ignore_nops code body > limit then begin + debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit; (code, revmap) end else let (code2, revmap2, dupbody, fwmap) = clone code revmap body in @@ -855,7 +856,9 @@ let duplicate_aux f = (* unroll body *) let (code, revmap) = - unroll_inner_loops_body f code revmap in + if !Clflags.option_funrollbody > 0 then + unroll_inner_loops_body f code revmap + else (code, revmap) in (* static prediction bis *) let code = diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 8bc7a938..9df58903 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -39,6 +39,7 @@ let option_fpredict = ref true (* insert static branch prediction information, a let option_ftailduplicate = ref 0 (* perform tail duplication for blocks of size n *) let option_ftracelinearize = ref true (* uses branch prediction information to improve the linearization *) let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *) +let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *) let option_fpostpass = ref true let option_fpostpass_sched = ref "list" diff --git a/driver/Driver.ml b/driver/Driver.ml index 7ab80540..12f50762 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -212,6 +212,7 @@ Processing options: -ftailduplicate n Perform tail duplication for RTL code blocks of size n (not counting Inops) [0] -ftracelinearize Uses branch prediction information to improve the Linearize [on] -funrollsingle n Unrolls a single iteration of innermost loops of size n (not counting Inops) [0] + -funrollbody n Unrolls once the body of innermost loops of size n (not counting Inops) [0] -fforward-moves Forward moves after CSE -finline Perform inlining of functions [on] -finline-functions-called-once Integrate functions only required by their @@ -420,6 +421,7 @@ let cmdline_actions = @ [ Exact "-ftailduplicate", Integer (fun n -> option_ftailduplicate := n) ] @ f_opt "predict" option_fpredict @ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ] + @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] @ f_opt "tracelinearize" option_ftracelinearize @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline |