diff options
author | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2020-11-03 19:56:28 +0100 |
---|---|---|
committer | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2020-11-03 19:56:28 +0100 |
commit | c7db1353b583b37cf930e9ad7abd97cbf9fb3a65 (patch) | |
tree | cc41dce1e6b1ce9bb71bf37e4289e3c96fa268e4 | |
parent | b8b9c8a4b90c8623b5d728a56382d2282441554d (diff) | |
parent | 025a185487c579f768fb747dd6e91a931a2ae66b (diff) | |
download | compcert-kvx-c7db1353b583b37cf930e9ad7abd97cbf9fb3a65.tar.gz compcert-kvx-c7db1353b583b37cf930e9ad7abd97cbf9fb3a65.zip |
Merge remote-tracking branch 'origin/kvx-work' into kvx-work-ssa
-rw-r--r-- | backend/CSE3analysis.v | 53 | ||||
-rw-r--r-- | backend/CSE3analysisaux.ml | 95 | ||||
-rw-r--r-- | backend/CSE3analysisproof.v | 74 | ||||
-rw-r--r-- | backend/Duplicateaux.ml | 50 | ||||
-rw-r--r-- | backend/Duplicatepasses.v | 12 | ||||
-rw-r--r-- | driver/Clflags.ml | 2 | ||||
-rw-r--r-- | driver/Driver.ml | 5 | ||||
-rw-r--r-- | tools/compiler_expand.ml | 2 |
8 files changed, 241 insertions, 52 deletions
diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v index 7316c9a9..5ed04bc4 100644 --- a/backend/CSE3analysis.v +++ b/backend/CSE3analysis.v @@ -282,12 +282,14 @@ Section OPERATIONS. Definition oper2 (dst : reg) (op: sym_op)(args : list reg) (rel : RELATION.t) : RELATION.t := - let rel' := kill_reg dst rel in match eq_find {| eq_lhs := dst; eq_op := op; eq_args:= args |} with - | Some id => PSet.add id rel' - | None => rel' + | Some id => + if PSet.contains rel id + then rel + else PSet.add id (kill_reg dst rel) + | None => kill_reg dst rel end. Definition oper1 (dst : reg) (op: sym_op) (args : list reg) @@ -324,18 +326,21 @@ Section OPERATIONS. | _ => kill_reg dst rel end else - if is_trivial_sym_op op - then kill_reg dst rel - else - let args' := forward_move_l rel args in - match rhs_find op args' rel with - | Some r => - if Compopts.optim_CSE3_glb tt - then RELATION.glb (move r dst rel) - (oper1 dst op args' rel) - else oper1 dst op args' rel - | None => oper1 dst op args' rel - end. + let args' := forward_move_l rel args in + match rhs_find op args rel with + | Some r => + if Compopts.optim_CSE3_glb tt + then RELATION.glb (move r dst rel) + (RELATION.glb + (oper1 dst op args rel) + (oper1 dst op args' rel)) + else RELATION.glb + (oper1 dst op args rel) + (oper1 dst op args' rel) + | None => RELATION.glb + (oper1 dst op args rel) + (oper1 dst op args' rel) + end. Definition clever_kill_store (chunk : memory_chunk) (addr: addressing) (args : list reg) @@ -378,11 +383,21 @@ Section OPERATIONS. end else rel'. - Definition store + Definition store (tenv : typing_env) (chunk : memory_chunk) (addr: addressing) (args : list reg) - (src : reg) (ty: typ) + (src : reg) (rel : RELATION.t) : RELATION.t := - store1 chunk addr (forward_move_l rel args) (forward_move rel src) ty rel. + let args' := forward_move_l rel args in + let src' := forward_move rel src in + let tsrc := tenv src in + let tsrc' := tenv src' in + RELATION.glb + (RELATION.glb + (store1 chunk addr args src tsrc rel) + (store1 chunk addr args' src tsrc rel)) + (RELATION.glb + (store1 chunk addr args src' tsrc' rel) + (store1 chunk addr args' src' tsrc' rel)). Definition kill_builtin_res res rel := match res with @@ -425,7 +440,7 @@ Section OPERATIONS. | Icond _ _ _ _ _ | Ijumptable _ _ => Some rel | Istore chunk addr args src _ => - Some (store chunk addr args src (tenv (forward_move rel src)) rel) + Some (store tenv chunk addr args src rel) | Iop op args dst _ => Some (oper dst (SOp op) args rel) | Iload trap chunk addr args dst _ => Some (oper dst (SLoad chunk addr) args rel) | Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel)) diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml index 8c83dc2e..6e190d35 100644 --- a/backend/CSE3analysisaux.ml +++ b/backend/CSE3analysisaux.ml @@ -14,7 +14,8 @@ open CSE3analysis open Maps open HashedSet open Camlcoq - +open Coqlib + let flatten_eq eq = ((P.to_int eq.eq_lhs), eq.eq_op, List.map P.to_int eq.eq_args);; @@ -98,10 +99,72 @@ let pp_results f (invariants : RB.t PMap.t) hints chan = let max_pc = P.to_int (RTL.max_pc_function f) in for pc=max_pc downto 1 do - Printf.fprintf chan "%d: %a\n" pc + Printf.fprintf chan "%d: %a\n\n" pc (pp_relation_b hints) (PMap.get (P.of_int pc) invariants) done +module IntSet=Set.Make(struct type t=int let compare = ( - ) end);; + +let rec union_list prev = function + | [] -> prev + | h::t -> union_list (RB.lub prev h) t;; + +let rb_glb (x : RB.t) (y : RB.t) : RB.t = + match x, y with + | None, _ | _, None -> None + | (Some x'), (Some y') -> Some (RELATION.glb x' y');; + +let refine_invariants + (nodes : RTL.node list) + (entrypoint : RTL.node) + (successors : RTL.node -> RTL.node list) + (predecessors : RTL.node -> RTL.node list) + (tfr : RTL.node -> RB.t -> RB.t) (invariants0 : RB.t PMap.t) = + let todo = ref IntSet.empty + and invariants = ref invariants0 in + let add_todo (pc : RTL.node) = + todo := IntSet.add (P.to_int pc) !todo in + let update_node (pc : RTL.node) = + Printf.printf "updating node %d\n" (P.to_int pc); + if not (peq pc entrypoint) + then + let cur = PMap.get pc !invariants in + let nxt = union_list RB.bot + (List.map + (fun pred_pc-> + rb_glb cur + (tfr pred_pc (PMap.get pred_pc !invariants))) + (predecessors pc)) in + if not (RB.beq cur nxt) + then + begin + Printf.printf "refining CSE3 node %d\n" (P.to_int pc); + List.iter add_todo (successors pc) + end in + (List.iter add_todo nodes); + while not (IntSet.is_empty !todo) do + let nxt = IntSet.max_elt !todo in + todo := IntSet.remove nxt !todo; + update_node (P.of_int nxt) + done; + !invariants;; + +let get_default default x ptree = + match PTree.get x ptree with + | None -> default + | Some y -> y;; + +let refine_analysis ctx tenv + (f : RTL.coq_function) (invariants0 : RB.t PMap.t) = + let succ_map = RTL.successors_map f in + let succ_f x = get_default [] x succ_map in + let pred_map = Kildall.make_predecessors f.RTL.fn_code RTL.successors_instr in + let pred_f x = get_default [] x pred_map in + let tfr = apply_instr' ctx tenv f.RTL.fn_code in + refine_invariants + (List.map fst (PTree.elements f.RTL.fn_code)) + f.RTL.fn_entrypoint succ_f pred_f tfr invariants0;; + let preanalysis (tenv : typing_env) (f : RTL.coq_function) = let cur_eq_id = ref 0 and cur_catalog = ref PTree.empty @@ -113,6 +176,7 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) = let eq_find_oracle node eq = assert (not (is_trivial eq)); let o = Hashtbl.find_opt eq_table (flatten_eq eq) in + (if o = None then failwith "eq_find_oracle"); (if !Clflags.option_debug_compcert > 5 then Printf.printf "@%d: eq_find %a -> %a\n" (P.to_int node) pp_eq eq (pp_option pp_P) o); @@ -161,21 +225,24 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) = pp_eq eq (pp_option pp_P) o); o in - match - internal_analysis - { eq_catalog = (fun eq_id -> PTree.get eq_id !cur_catalog); - eq_find_oracle = mutating_eq_find_oracle; - eq_rhs_oracle = rhs_find_oracle ; - eq_kill_reg = (fun reg -> PMap.get reg !cur_kill_reg); - eq_kill_mem = (fun () -> !cur_kill_mem); - eq_moves = (fun reg -> PMap.get reg !cur_moves) - } tenv f + let ctx = { eq_catalog = (fun eq_id -> PTree.get eq_id !cur_catalog); + eq_find_oracle = mutating_eq_find_oracle; + eq_rhs_oracle = rhs_find_oracle ; + eq_kill_reg = (fun reg -> PMap.get reg !cur_kill_reg); + eq_kill_mem = (fun () -> !cur_kill_mem); + eq_moves = (fun reg -> PMap.get reg !cur_moves) + } in + match internal_analysis ctx tenv f with None -> failwith "CSE3analysisaux analysis failed, try re-running with -fno-cse3" | Some invariants -> - let hints = { hint_eq_catalog = !cur_catalog; + let invariants' = + if ! Clflags.option_fcse3_refine + then refine_analysis ctx tenv f invariants + else invariants + and hints = { hint_eq_catalog = !cur_catalog; hint_eq_find_oracle= eq_find_oracle; hint_eq_rhs_oracle = rhs_find_oracle } in (if !Clflags.option_debug_compcert > 1 - then pp_results f invariants hints stdout); - invariants, hints + then pp_results f invariants' hints stdout); + invariants', hints ;; diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v index 66b199cc..1e5b88c3 100644 --- a/backend/CSE3analysisproof.v +++ b/backend/CSE3analysisproof.v @@ -699,6 +699,27 @@ Section SOUNDNESS. + congruence. Qed. + Lemma arglist_idem_write: + forall { A : Type} args (rs : Regmap.t A) dst, + (rs # dst <- (rs # dst)) ## args = rs ## args. + Proof. + induction args; trivial. + intros. cbn. + f_equal; trivial. + apply Regmap.gsident. + Qed. + + Lemma sem_rhs_idem_write: + forall sop args rs dst m v, + sem_rhs sop args rs m v -> + sem_rhs sop args (rs # dst <- (rs # dst)) m v. + Proof. + intros. + unfold sem_rhs in *. + rewrite arglist_idem_write. + assumption. + Qed. + Theorem oper2_sound: forall no dst sop args rel rs m v, sem_rel rel rs m -> @@ -726,6 +747,17 @@ Section SOUNDNESS. rewrite Regmap.gss. apply sem_rhs_depends_on_args_only; auto. } + intros INi. + destruct (PSet.contains rel e) eqn:CONTAINSe. + { pose proof (REL e {| eq_lhs := dst; eq_op := sop; eq_args := args |} CONTAINSe H) as RELe. + pose proof (REL i eq CONTAINS INi) as RELi. + unfold sem_eq in *. + cbn in RELe. + replace v with (rs # dst) by (eapply sem_rhs_det; eassumption). + rewrite Regmap.gsident. + apply sem_rhs_idem_write. + assumption. + } rewrite PSet.gaddo in CONTAINS by congruence. apply (kill_reg_sound rel rs m dst v REL i eq); auto. Qed. @@ -821,24 +853,24 @@ Section SOUNDNESS. subst. rewrite <- (forward_move_sound rel rs m r) by auto. apply move_sound; auto. - - destruct (is_trivial_sym_op sop). - { - apply kill_reg_sound; auto. - } - destruct rhs_find as [src |] eqn:RHS_FIND. + - destruct rhs_find as [src |] eqn:RHS_FIND. + destruct (Compopts.optim_CSE3_glb tt). * apply sem_rel_glb; split. - ** pose proof (rhs_find_sound no sop (forward_move_l (ctx:=ctx) rel args) rel src rs m REL RHS_FIND) as SOUND. - eapply forward_move_rhs_sound in RHS. - 2: eassumption. + ** pose proof (rhs_find_sound no sop args rel src rs m REL RHS_FIND) as SOUND. rewrite <- (sem_rhs_det SOUND RHS). apply move_sound; auto. + ** apply sem_rel_glb; split. + *** apply oper1_sound; auto. + *** apply oper1_sound; auto. + apply forward_move_rhs_sound; auto. + * apply sem_rel_glb; split. + ** apply oper1_sound; auto. ** apply oper1_sound; auto. apply forward_move_rhs_sound; auto. - * ** apply oper1_sound; auto. - apply forward_move_rhs_sound; auto. - + apply oper1_sound; auto. - apply forward_move_rhs_sound; auto. + + apply sem_rel_glb; split. + * apply oper1_sound; auto. + * apply oper1_sound; auto. + apply forward_move_rhs_sound; auto. Qed. Hint Resolve oper_sound : cse3. @@ -927,22 +959,28 @@ Section SOUNDNESS. Hint Resolve store1_sound : cse3. + Theorem store_sound: forall no chunk addr args a src rel tenv rs m m', sem_rel rel rs m -> wt_regset tenv rs -> eval_addressing genv sp addr (rs ## args) = Some a -> Mem.storev chunk m a (rs#src) = Some m' -> - sem_rel (store (ctx:=ctx) no chunk addr args src (tenv (forward_move (ctx:=ctx) rel src)) rel) rs m'. + sem_rel (store (ctx:=ctx) no tenv chunk addr args src rel) rs m'. Proof. unfold store. intros until m'. intros REL WT ADDR STORE. - rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial. - rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial. - apply store1_sound with (a := a) (m := m); trivial. - (* rewrite forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial. - assumption. *) + apply sem_rel_glb; split. + - apply sem_rel_glb; split. + * apply store1_sound with (a := a) (m := m); trivial. + * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial. + apply store1_sound with (a := a) (m := m); trivial. + - rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial. + apply sem_rel_glb; split. + * apply store1_sound with (a := a) (m := m); trivial. + * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial. + apply store1_sound with (a := a) (m := m); trivial. Qed. Hint Resolve store_sound : cse3. diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml index 2b13ab5d..fac0ba76 100644 --- a/backend/Duplicateaux.ml +++ b/backend/Duplicateaux.ml @@ -836,6 +836,56 @@ let unroll_inner_loops_body f code revmap = (!code', !revmap') end +let extract_upto_icond f code head = + let rec extract h = + let inst = get_some @@ PTree.get h code in + match inst with + | Icond _ -> [h] + | _ -> ( match rtl_successors inst with + | [n] -> h :: (extract n) + | _ -> failwith "Found a node with more than one successor??" + ) + in List.rev @@ extract head + +let rotate_inner_loop f code revmap iloop = + let header = extract_upto_icond f code iloop.head in + let limit = !Clflags.option_flooprotate in + if count_ignore_nops code header > limit then begin + debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit; + (code, revmap) + end else + let (code2, revmap2, dupheader, fwmap) = clone code revmap header in + let code' = ref code2 in + let head' = apply_map fwmap iloop.head in + begin + code' := change_pointers !code' iloop.head head' iloop.preds; + (!code', revmap2) + end + +let rotate_inner_loops f code revmap = + let is_loop_header = get_loop_headers code (f.fn_entrypoint) in + let inner_loops = get_inner_loops f code is_loop_header in + let code' = ref code in + let revmap' = ref revmap in + begin + print_inner_loops inner_loops; + List.iter (fun iloop -> + let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in + code' := new_code; revmap' := new_revmap + ) inner_loops; + (!code', !revmap') + end + +let loop_rotate f = + let entrypoint = f.fn_entrypoint in + let code = f.fn_code in + let revmap = make_identity_ptree code in + let (code, revmap) = + if !Clflags.option_flooprotate > 0 then + rotate_inner_loops f code revmap + else (code, revmap) in + ((code, entrypoint), revmap) + let static_predict f = let entrypoint = f.fn_entrypoint in let code = f.fn_code in diff --git a/backend/Duplicatepasses.v b/backend/Duplicatepasses.v index dc96f966..7e58eedf 100644 --- a/backend/Duplicatepasses.v +++ b/backend/Duplicatepasses.v @@ -45,4 +45,14 @@ End TailDuplicateOracle. Module Tailduplicateproof := DuplicateProof TailDuplicateOracle. -Module Tailduplicate := Tailduplicateproof.
\ No newline at end of file +Module Tailduplicate := Tailduplicateproof. + +(** Loop Rotate *) + +Module LoopRotateOracle <: DuplicateOracle. + Axiom duplicate_aux : function -> code * node * (PTree.t node). + Extract Constant duplicate_aux => "Duplicateaux.loop_rotate". +End LoopRotateOracle. + +Module Looprotateproof := DuplicateProof LoopRotateOracle. +Module Looprotate := Looprotateproof. diff --git a/driver/Clflags.ml b/driver/Clflags.ml index bda0f438..c3f30b88 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -35,6 +35,7 @@ let option_fcse3_across_calls = ref false let option_fcse3_across_merges = ref true let option_fcse3_glb = ref true let option_fcse3_trivial_ops = ref false +let option_fcse3_refine = ref true let option_fredundancy = ref true (** Options relative to superblock scheduling *) @@ -43,6 +44,7 @@ let option_ftailduplicate = ref 0 (* perform tail duplication for blocks of size let option_ftracelinearize = ref true (* uses branch prediction information to improve the linearization *) let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *) let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *) +let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *) let option_fpostpass = ref true let option_fpostpass_sched = ref "list" diff --git a/driver/Driver.ml b/driver/Driver.ml index 9aba32f8..cd33e4db 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -220,6 +220,7 @@ Processing options: -fcse3-across-merges Propagate CSE3 information across control-flow merges [on] -fcse3-glb Refine CSE3 information using greatest lower bounds [on] -fcse3-trivial-ops Replace trivial operations as well using CSE3 [off] + -fcse3-refine Refine CSE3 invariants by descending iteration [on] -fmove-loop-invariants Perform loop-invariant code motion [off] -fredundancy Perform redundancy elimination [on] -fpostpass Perform postpass scheduling (only for K1 architecture) [on] @@ -231,6 +232,8 @@ Processing options: -ftracelinearize Uses branch prediction information to improve the Linearize [on] -funrollsingle n Unrolls a single iteration of innermost loops of size n (not counting Inops) [0] -funrollbody n Unrolls once the body of innermost loops of size n (not counting Inops) [0] + -flooprotate n Duplicates the header (condition computation part) of innermost loops to perform a loop rotate [0] + Doesn't duplicate if the size of that header is strictly greater than n -fforward-moves Forward moves after CSE -finline Perform inlining of functions [on] -finline-functions-called-once Integrate functions only required by their @@ -458,6 +461,7 @@ let cmdline_actions = @ f_opt "cse3-across-merges" option_fcse3_across_merges @ f_opt "cse3-glb" option_fcse3_glb @ f_opt "cse3-trivial-ops" option_fcse3_trivial_ops + @ f_opt "cse3-refine" option_fcse3_refine @ f_opt "move-loop-invariants" option_fmove_loop_invariants @ f_opt "move-loop-invariants_2" option_fmove_loop_invariants_2 @ f_opt "move-loop-invariants_s" option_fmove_loop_invariants_s @@ -467,6 +471,7 @@ let cmdline_actions = @ f_opt "predict" option_fpredict @ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ] @ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ] + @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ] @ f_opt "tracelinearize" option_ftracelinearize @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched @ f_opt "inline" option_finline diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml index 1da8c21a..f0476b4f 100644 --- a/tools/compiler_expand.ml +++ b/tools/compiler_expand.ml @@ -26,6 +26,8 @@ PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE"; PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict"; PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle"; TOTAL, Always, Require, (Some "Renumbering pre constprop"), "Renumber"; +PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate"; +TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber"; PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody"; TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber"; PARTIAL, Always, NoRequire, (Some "Performing tail duplication"), "Tailduplicate"; |