From 9cf81f29d1d1b1be3575414fceec2a03378918ed Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Fri, 30 Oct 2020 23:05:43 +0100 Subject: also match Istore --- backend/LICMaux.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml index 0ca4418b..42b8eeb7 100644 --- a/backend/LICMaux.ml +++ b/backend/LICMaux.ml @@ -208,7 +208,8 @@ let rewrite_loop_body (last_alloc : reg ref) (List.map (map_reg mapper) args), new_res)); PTree.set res new_res mapper - | Iload(trap, chunk, addr, args, res, pc') + | Iload(_, chunk, addr, args, res, pc') + | Istore(chunk, addr, args, res, pc') when Archi.has_notrap_loads && !Clflags.option_fnontrap_loads -> let new_res = P.succ !last_alloc in -- cgit From 60e28f582b37d4080686992ea114857f75cfe6c2 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Sat, 31 Oct 2020 09:23:53 +0100 Subject: seems to work better --- backend/CSE3analysis.v | 8 +++++--- backend/CSE3analysisproof.v | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v index 7316c9a9..5eb92a82 100644 --- a/backend/CSE3analysis.v +++ b/backend/CSE3analysis.v @@ -282,12 +282,14 @@ Section OPERATIONS. Definition oper2 (dst : reg) (op: sym_op)(args : list reg) (rel : RELATION.t) : RELATION.t := - let rel' := kill_reg dst rel in match eq_find {| eq_lhs := dst; eq_op := op; eq_args:= args |} with - | Some id => PSet.add id rel' - | None => rel' + | Some id => + if PSet.contains rel id + then rel + else PSet.add id (kill_reg dst rel) + | None => kill_reg dst rel end. Definition oper1 (dst : reg) (op: sym_op) (args : list reg) diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v index 66b199cc..07a58f23 100644 --- a/backend/CSE3analysisproof.v +++ b/backend/CSE3analysisproof.v @@ -699,6 +699,27 @@ Section SOUNDNESS. + congruence. Qed. + Lemma arglist_idem_write: + forall { A : Type} args (rs : Regmap.t A) dst, + (rs # dst <- (rs # dst)) ## args = rs ## args. + Proof. + induction args; trivial. + intros. cbn. + f_equal; trivial. + apply Regmap.gsident. + Qed. + + Lemma sem_rhs_idem_write: + forall sop args rs dst m v, + sem_rhs sop args rs m v -> + sem_rhs sop args (rs # dst <- (rs # dst)) m v. + Proof. + intros. + unfold sem_rhs in *. + rewrite arglist_idem_write. + assumption. + Qed. + Theorem oper2_sound: forall no dst sop args rel rs m v, sem_rel rel rs m -> @@ -726,6 +747,17 @@ Section SOUNDNESS. rewrite Regmap.gss. apply sem_rhs_depends_on_args_only; auto. } + intros INi. + destruct (PSet.contains rel e) eqn:CONTAINSe. + { pose proof (REL e {| eq_lhs := dst; eq_op := sop; eq_args := args |} CONTAINSe H) as RELe. + pose proof (REL i eq CONTAINS INi) as RELi. + unfold sem_eq in *. + cbn in RELe. + replace v with (rs # dst) by (eapply sem_rhs_det; eassumption). + rewrite Regmap.gsident. + apply sem_rhs_idem_write. + assumption. + } rewrite PSet.gaddo in CONTAINS by congruence. apply (kill_reg_sound rel rs m dst v REL i eq); auto. Qed. -- cgit From e6612fdfd69037099037def2acba5df553c3b49a Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Sat, 31 Oct 2020 09:58:28 +0100 Subject: refining CSE3 nodes --- backend/CSE3analysisaux.ml | 95 +++++++++++++++++++++++++++++++++++++++------- driver/Clflags.ml | 1 + driver/Driver.ml | 2 + 3 files changed, 84 insertions(+), 14 deletions(-) diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml index 8c83dc2e..6e190d35 100644 --- a/backend/CSE3analysisaux.ml +++ b/backend/CSE3analysisaux.ml @@ -14,7 +14,8 @@ open CSE3analysis open Maps open HashedSet open Camlcoq - +open Coqlib + let flatten_eq eq = ((P.to_int eq.eq_lhs), eq.eq_op, List.map P.to_int eq.eq_args);; @@ -98,10 +99,72 @@ let pp_results f (invariants : RB.t PMap.t) hints chan = let max_pc = P.to_int (RTL.max_pc_function f) in for pc=max_pc downto 1 do - Printf.fprintf chan "%d: %a\n" pc + Printf.fprintf chan "%d: %a\n\n" pc (pp_relation_b hints) (PMap.get (P.of_int pc) invariants) done +module IntSet=Set.Make(struct type t=int let compare = ( - ) end);; + +let rec union_list prev = function + | [] -> prev + | h::t -> union_list (RB.lub prev h) t;; + +let rb_glb (x : RB.t) (y : RB.t) : RB.t = + match x, y with + | None, _ | _, None -> None + | (Some x'), (Some y') -> Some (RELATION.glb x' y');; + +let refine_invariants + (nodes : RTL.node list) + (entrypoint : RTL.node) + (successors : RTL.node -> RTL.node list) + (predecessors : RTL.node -> RTL.node list) + (tfr : RTL.node -> RB.t -> RB.t) (invariants0 : RB.t PMap.t) = + let todo = ref IntSet.empty + and invariants = ref invariants0 in + let add_todo (pc : RTL.node) = + todo := IntSet.add (P.to_int pc) !todo in + let update_node (pc : RTL.node) = + Printf.printf "updating node %d\n" (P.to_int pc); + if not (peq pc entrypoint) + then + let cur = PMap.get pc !invariants in + let nxt = union_list RB.bot + (List.map + (fun pred_pc-> + rb_glb cur + (tfr pred_pc (PMap.get pred_pc !invariants))) + (predecessors pc)) in + if not (RB.beq cur nxt) + then + begin + Printf.printf "refining CSE3 node %d\n" (P.to_int pc); + List.iter add_todo (successors pc) + end in + (List.iter add_todo nodes); + while not (IntSet.is_empty !todo) do + let nxt = IntSet.max_elt !todo in + todo := IntSet.remove nxt !todo; + update_node (P.of_int nxt) + done; + !invariants;; + +let get_default default x ptree = + match PTree.get x ptree with + | None -> default + | Some y -> y;; + +let refine_analysis ctx tenv + (f : RTL.coq_function) (invariants0 : RB.t PMap.t) = + let succ_map = RTL.successors_map f in + let succ_f x = get_default [] x succ_map in + let pred_map = Kildall.make_predecessors f.RTL.fn_code RTL.successors_instr in + let pred_f x = get_default [] x pred_map in + let tfr = apply_instr' ctx tenv f.RTL.fn_code in + refine_invariants + (List.map fst (PTree.elements f.RTL.fn_code)) + f.RTL.fn_entrypoint succ_f pred_f tfr invariants0;; + let preanalysis (tenv : typing_env) (f : RTL.coq_function) = let cur_eq_id = ref 0 and cur_catalog = ref PTree.empty @@ -113,6 +176,7 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) = let eq_find_oracle node eq = assert (not (is_trivial eq)); let o = Hashtbl.find_opt eq_table (flatten_eq eq) in + (if o = None then failwith "eq_find_oracle"); (if !Clflags.option_debug_compcert > 5 then Printf.printf "@%d: eq_find %a -> %a\n" (P.to_int node) pp_eq eq (pp_option pp_P) o); @@ -161,21 +225,24 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) = pp_eq eq (pp_option pp_P) o); o in - match - internal_analysis - { eq_catalog = (fun eq_id -> PTree.get eq_id !cur_catalog); - eq_find_oracle = mutating_eq_find_oracle; - eq_rhs_oracle = rhs_find_oracle ; - eq_kill_reg = (fun reg -> PMap.get reg !cur_kill_reg); - eq_kill_mem = (fun () -> !cur_kill_mem); - eq_moves = (fun reg -> PMap.get reg !cur_moves) - } tenv f + let ctx = { eq_catalog = (fun eq_id -> PTree.get eq_id !cur_catalog); + eq_find_oracle = mutating_eq_find_oracle; + eq_rhs_oracle = rhs_find_oracle ; + eq_kill_reg = (fun reg -> PMap.get reg !cur_kill_reg); + eq_kill_mem = (fun () -> !cur_kill_mem); + eq_moves = (fun reg -> PMap.get reg !cur_moves) + } in + match internal_analysis ctx tenv f with None -> failwith "CSE3analysisaux analysis failed, try re-running with -fno-cse3" | Some invariants -> - let hints = { hint_eq_catalog = !cur_catalog; + let invariants' = + if ! Clflags.option_fcse3_refine + then refine_analysis ctx tenv f invariants + else invariants + and hints = { hint_eq_catalog = !cur_catalog; hint_eq_find_oracle= eq_find_oracle; hint_eq_rhs_oracle = rhs_find_oracle } in (if !Clflags.option_debug_compcert > 1 - then pp_results f invariants hints stdout); - invariants, hints + then pp_results f invariants' hints stdout); + invariants', hints ;; diff --git a/driver/Clflags.ml b/driver/Clflags.ml index fc9e8247..89090b57 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -34,6 +34,7 @@ let option_fcse3_across_calls = ref false let option_fcse3_across_merges = ref true let option_fcse3_glb = ref true let option_fcse3_trivial_ops = ref false +let option_fcse3_refine = ref true let option_fredundancy = ref true (** Options relative to superblock scheduling *) diff --git a/driver/Driver.ml b/driver/Driver.ml index 5d2c839f..27193ff1 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -203,6 +203,7 @@ Processing options: -fcse3-across-merges Propagate CSE3 information across control-flow merges [on] -fcse3-glb Refine CSE3 information using greatest lower bounds [on] -fcse3-trivial-ops Replace trivial operations as well using CSE3 [off] + -fcse3-refine Refine CSE3 invariants by descending iteration [on] -fmove-loop-invariants Perform loop-invariant code motion [off] -fredundancy Perform redundancy elimination [on] -fpostpass Perform postpass scheduling (only for K1 architecture) [on] @@ -417,6 +418,7 @@ let cmdline_actions = @ f_opt "cse3-across-merges" option_fcse3_across_merges @ f_opt "cse3-glb" option_fcse3_glb @ f_opt "cse3-trivial-ops" option_fcse3_trivial_ops + @ f_opt "cse3-refine" option_fcse3_refine @ f_opt "move-loop-invariants" option_fmove_loop_invariants @ f_opt "redundancy" option_fredundancy @ f_opt "postpass" option_fpostpass -- cgit