diff options
-rw-r--r-- | backend/CSE3analysis.v | 45 | ||||
-rw-r--r-- | backend/CSE3analysisproof.v | 42 | ||||
-rw-r--r-- | backend/Duplicateaux.ml | 50 | ||||
-rw-r--r-- | backend/Duplicatepasses.v | 12 | ||||
-rw-r--r-- | driver/Clflags.ml | 1 | ||||
-rw-r--r-- | driver/Driver.ml | 3 | ||||
-rw-r--r-- | tools/compiler_expand.ml | 4 |
7 files changed, 121 insertions, 36 deletions
diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v index 5eb92a82..5ed04bc4 100644 --- a/backend/CSE3analysis.v +++ b/backend/CSE3analysis.v @@ -326,18 +326,21 @@ Section OPERATIONS. | _ => kill_reg dst rel end else - if is_trivial_sym_op op - then kill_reg dst rel - else - let args' := forward_move_l rel args in - match rhs_find op args' rel with - | Some r => - if Compopts.optim_CSE3_glb tt - then RELATION.glb (move r dst rel) - (oper1 dst op args' rel) - else oper1 dst op args' rel - | None => oper1 dst op args' rel - end. + let args' := forward_move_l rel args in + match rhs_find op args rel with + | Some r => + if Compopts.optim_CSE3_glb tt + then RELATION.glb (move r dst rel) + (RELATION.glb + (oper1 dst op args rel) + (oper1 dst op args' rel)) + else RELATION.glb + (oper1 dst op args rel) + (oper1 dst op args' rel) + | None => RELATION.glb + (oper1 dst op args rel) + (oper1 dst op args' rel) + end. Definition clever_kill_store (chunk : memory_chunk) (addr: addressing) (args : list reg) @@ -380,11 +383,21 @@ Section OPERATIONS. end else rel'. - Definition store + Definition store (tenv : typing_env) (chunk : memory_chunk) (addr: addressing) (args : list reg) - (src : reg) (ty: typ) + (src : reg) (rel : RELATION.t) : RELATION.t := - store1 chunk addr (forward_move_l rel args) (forward_move rel src) ty rel. + let args' := forward_move_l rel args in + let src' := forward_move rel src in + let tsrc := tenv src in + let tsrc' := tenv src' in + RELATION.glb + (RELATION.glb + (store1 chunk addr args src tsrc rel) + (store1 chunk addr args' src tsrc rel)) + (RELATION.glb + (store1 chunk addr args src' tsrc' rel) + (store1 chunk addr args' src' tsrc' rel)). Definition kill_builtin_res res rel := match res with @@ -427,7 +440,7 @@ Section OPERATIONS. | Icond _ _ _ _ _ | Ijumptable _ _ => Some rel | Istore chunk addr args src _ => - Some (store chunk addr args src (tenv (forward_move rel src)) rel) + Some (store tenv chunk addr args src rel) | Iop op args dst _ => Some (oper dst (SOp op) args rel) | Iload trap chunk addr args dst _ => Some (oper dst (SLoad chunk addr) args rel) | Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel)) diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v index 07a58f23..1e5b88c3 100644 --- a/backend/CSE3analysisproof.v +++ b/backend/CSE3analysisproof.v @@ -853,24 +853,24 @@ Section SOUNDNESS. subst. rewrite <- (forward_move_sound rel rs m r) by auto. apply move_sound; auto. - - destruct (is_trivial_sym_op sop). - { - apply kill_reg_sound; auto. - } - destruct rhs_find as [src |] eqn:RHS_FIND. + - destruct rhs_find as [src |] eqn:RHS_FIND. + destruct (Compopts.optim_CSE3_glb tt). * apply sem_rel_glb; split. - ** pose proof (rhs_find_sound no sop (forward_move_l (ctx:=ctx) rel args) rel src rs m REL RHS_FIND) as SOUND. - eapply forward_move_rhs_sound in RHS. - 2: eassumption. + ** pose proof (rhs_find_sound no sop args rel src rs m REL RHS_FIND) as SOUND. rewrite <- (sem_rhs_det SOUND RHS). apply move_sound; auto. + ** apply sem_rel_glb; split. + *** apply oper1_sound; auto. + *** apply oper1_sound; auto. + apply forward_move_rhs_sound; auto. + * apply sem_rel_glb; split. + ** apply oper1_sound; auto. ** apply oper1_sound; auto. apply forward_move_rhs_sound; auto. - * ** apply oper1_sound; auto. - apply forward_move_rhs_sound; auto. - + apply oper1_sound; auto. - apply forward_move_rhs_sound; auto. + + apply sem_rel_glb; split. + * apply oper1_sound; auto. + * apply oper1_sound; auto. + apply forward_move_rhs_sound; auto. Qed. Hint Resolve oper_sound : cse3. @@ -959,22 +959,28 @@ Section SOUNDNESS. Hint Resolve store1_sound : cse3. + Theorem store_sound: forall no chunk addr args a src rel tenv rs m m', sem_rel rel rs m -> wt_regset tenv rs -> eval_addressing genv sp addr (rs ## args) = Some a -> Mem.storev chunk m a (rs#src) = Some m' -> - sem_rel (store (ctx:=ctx) no chunk addr args src (tenv (forward_move (ctx:=ctx) rel src)) rel) rs m'. + sem_rel (store (ctx:=ctx) no tenv chunk addr args src rel) rs m'. Proof. unfold store. intros until m'. intros REL WT ADDR STORE. - rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial. - rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial. - apply store1_sound with (a := a) (m := m); trivial. - (* rewrite forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial. - assumption. *) + apply sem_rel_glb; split. + - apply sem_rel_glb; split. + * apply store1_sound with (a := a) (m := m); trivial. + * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial. + apply store1_sound with (a := a) (m := m); trivial. + - rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial. + apply sem_rel_glb; split. + * apply store1_sound with (a := a) (m := m); trivial. + * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial. + apply store1_sound with (a := a) (m := m); trivial. Qed. Hint Resolve store_sound : cse3. diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index 2b13ab5d..fac0ba76 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -836,6 +836,56 @@ let unroll_inner_loops_body f code revmap = (!code', !revmap') end +let extract_upto_icond f code head = + let rec extract h = + let inst = get_some @@ PTree.get h code in + match inst with + | Icond _ -> [h] + | _ -> ( match rtl_successors inst with + | [n] -> h :: (extract n) + | _ -> failwith "Found a node with more than one successor??" + ) + in List.rev @@ extract head + +let rotate_inner_loop f code revmap iloop = + let header = extract_upto_icond f code iloop.head in + let limit = !Clflags.option_flooprotate in + if count_ignore_nops code header > limit then begin + debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit; + (code, revmap) + end else + let (code2, revmap2, dupheader, fwmap) = clone code revmap header in + let code' = ref code2 in + let head' = apply_map fwmap iloop.head in + begin + code' := change_pointers !code' iloop.head head' iloop.preds; + (!code', revmap2) + end + +let rotate_inner_loops f code revmap = + let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + let inner_loops = get_inner_loops f code is_loop_header in + let code' = ref code in + let revmap' = ref revmap in + begin + print_inner_loops inner_loops; + List.iter (fun iloop -> + let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in + code' := new_code; revmap' := new_revmap + ) inner_loops; + (!code', !revmap') + end + +let loop_rotate f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in + let (code, revmap) = + if !Clflags.option_flooprotate > 0 then + rotate_inner_loops f code revmap + else (code, revmap) in + ((code, entrypoint), revmap) + let static_predict f = let entrypoint = f.fn_entrypoint in let code = f.fn_code in diff --git a/backend/Duplicatepasses.v b/backend/Duplicatepasses.v index dc96f966..7e58eedf 100644 --- a/backend/Duplicatepasses.v +++ b/backend/Duplicatepasses.v @@ -45,4 +45,14 @@ End TailDuplicateOracle. Module Tailduplicateproof := DuplicateProof TailDuplicateOracle. -Module Tailduplicate := Tailduplicateproof.
\ No newline at end of file +Module Tailduplicate := Tailduplicateproof. + +(** Loop Rotate *) + +Module LoopRotateOracle <: DuplicateOracle. + Axiom duplicate_aux : function -> code * node * (PTree.t node). + Extract Constant duplicate_aux => "Duplicateaux.loop_rotate". +End LoopRotateOracle. + +Module Looprotateproof := DuplicateProof LoopRotateOracle. +Module Looprotate := Looprotateproof. diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 68508f2e..bc8a7925 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -43,6 +43,7 @@ let option_ftailduplicate = ref 0 (* perform tail duplication for blocks of size let option_ftracelinearize = ref true (* uses branch prediction information to improve the linearization *) let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *) let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *) +let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *) (* Scheduling *) let option_mtune = ref "" diff --git a/driver/Driver.ml b/driver/Driver.ml index ef4acee7..8ceb3a25 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -219,6 +219,8 @@ Processing options: -ftracelinearize Uses branch prediction information to improve the Linearize [on] -funrollsingle n Unrolls a single iteration of innermost loops of size n (not counting Inops) [0] -funrollbody n Unrolls once the body of innermost loops of size n (not counting Inops) [0] + -flooprotate n Duplicates the header (condition computation part) of innermost loops to perform a loop rotate [0] + Doesn't duplicate if the size of that header is strictly greater than n -fforward-moves Forward moves after CSE -finline Perform inlining of functions [on] -finline-functions-called-once Integrate functions only required by their @@ -432,6 +434,7 @@ let cmdline_actions = @ f_opt "predict" option_fpredict @ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ] @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] + @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ] @ f_opt "tracelinearize" option_ftracelinearize @ f_opt_str "prepass" option_fprepass option_fprepass_sched @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml index 35ddfe5e..d7484628 100644 --- a/tools/compiler_expand.ml +++ b/tools/compiler_expand.ml @@ -25,7 +25,9 @@ TOTAL, Always, Require, (Some "Renumbering"), "Renumber"; PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE"; PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict"; PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle"; -TOTAL, Always, Require, (Some "Renumbering pre unrolling"), "Renumber"; +TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber"; +PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate"; +TOTAL, Always, NoRequire, (Some "Renumbering pre unrolling"), "Renumber"; PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody"; TOTAL, Always, NoRequire, (Some "Renumbering pre tail duplication"), "Renumber"; PARTIAL, Always, NoRequire, (Some "Performing tail duplication"), "Tailduplicate"; |