aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-11-03 19:56:28 +0100
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-11-03 19:56:28 +0100
commitc7db1353b583b37cf930e9ad7abd97cbf9fb3a65 (patch)
treecc41dce1e6b1ce9bb71bf37e4289e3c96fa268e4
parentb8b9c8a4b90c8623b5d728a56382d2282441554d (diff)
parent025a185487c579f768fb747dd6e91a931a2ae66b (diff)
downloadcompcert-kvx-c7db1353b583b37cf930e9ad7abd97cbf9fb3a65.tar.gz
compcert-kvx-c7db1353b583b37cf930e9ad7abd97cbf9fb3a65.zip
Merge remote-tracking branch 'origin/kvx-work' into kvx-work-ssa
-rw-r--r--backend/CSE3analysis.v53
-rw-r--r--backend/CSE3analysisaux.ml95
-rw-r--r--backend/CSE3analysisproof.v74
-rw-r--r--backend/Duplicateaux.ml50
-rw-r--r--backend/Duplicatepasses.v12
-rw-r--r--driver/Clflags.ml2
-rw-r--r--driver/Driver.ml5
-rw-r--r--tools/compiler_expand.ml2
8 files changed, 241 insertions, 52 deletions
diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v
index 7316c9a9..5ed04bc4 100644
--- a/backend/CSE3analysis.v
+++ b/backend/CSE3analysis.v
@@ -282,12 +282,14 @@ Section OPERATIONS.
Definition oper2 (dst : reg) (op: sym_op)(args : list reg)
(rel : RELATION.t) : RELATION.t :=
- let rel' := kill_reg dst rel in
match eq_find {| eq_lhs := dst;
eq_op := op;
eq_args:= args |} with
- | Some id => PSet.add id rel'
- | None => rel'
+ | Some id =>
+ if PSet.contains rel id
+ then rel
+ else PSet.add id (kill_reg dst rel)
+ | None => kill_reg dst rel
end.
Definition oper1 (dst : reg) (op: sym_op) (args : list reg)
@@ -324,18 +326,21 @@ Section OPERATIONS.
| _ => kill_reg dst rel
end
else
- if is_trivial_sym_op op
- then kill_reg dst rel
- else
- let args' := forward_move_l rel args in
- match rhs_find op args' rel with
- | Some r =>
- if Compopts.optim_CSE3_glb tt
- then RELATION.glb (move r dst rel)
- (oper1 dst op args' rel)
- else oper1 dst op args' rel
- | None => oper1 dst op args' rel
- end.
+ let args' := forward_move_l rel args in
+ match rhs_find op args rel with
+ | Some r =>
+ if Compopts.optim_CSE3_glb tt
+ then RELATION.glb (move r dst rel)
+ (RELATION.glb
+ (oper1 dst op args rel)
+ (oper1 dst op args' rel))
+ else RELATION.glb
+ (oper1 dst op args rel)
+ (oper1 dst op args' rel)
+ | None => RELATION.glb
+ (oper1 dst op args rel)
+ (oper1 dst op args' rel)
+ end.
Definition clever_kill_store
(chunk : memory_chunk) (addr: addressing) (args : list reg)
@@ -378,11 +383,21 @@ Section OPERATIONS.
end
else rel'.
- Definition store
+ Definition store (tenv : typing_env)
(chunk : memory_chunk) (addr: addressing) (args : list reg)
- (src : reg) (ty: typ)
+ (src : reg)
(rel : RELATION.t) : RELATION.t :=
- store1 chunk addr (forward_move_l rel args) (forward_move rel src) ty rel.
+ let args' := forward_move_l rel args in
+ let src' := forward_move rel src in
+ let tsrc := tenv src in
+ let tsrc' := tenv src' in
+ RELATION.glb
+ (RELATION.glb
+ (store1 chunk addr args src tsrc rel)
+ (store1 chunk addr args' src tsrc rel))
+ (RELATION.glb
+ (store1 chunk addr args src' tsrc' rel)
+ (store1 chunk addr args' src' tsrc' rel)).
Definition kill_builtin_res res rel :=
match res with
@@ -425,7 +440,7 @@ Section OPERATIONS.
| Icond _ _ _ _ _
| Ijumptable _ _ => Some rel
| Istore chunk addr args src _ =>
- Some (store chunk addr args src (tenv (forward_move rel src)) rel)
+ Some (store tenv chunk addr args src rel)
| Iop op args dst _ => Some (oper dst (SOp op) args rel)
| Iload trap chunk addr args dst _ => Some (oper dst (SLoad chunk addr) args rel)
| Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel))
diff --git a/backend/CSE3analysisaux.ml b/backend/CSE3analysisaux.ml
index 8c83dc2e..6e190d35 100644
--- a/backend/CSE3analysisaux.ml
+++ b/backend/CSE3analysisaux.ml
@@ -14,7 +14,8 @@ open CSE3analysis
open Maps
open HashedSet
open Camlcoq
-
+open Coqlib
+
let flatten_eq eq =
((P.to_int eq.eq_lhs), eq.eq_op, List.map P.to_int eq.eq_args);;
@@ -98,10 +99,72 @@ let pp_results f (invariants : RB.t PMap.t) hints chan =
let max_pc = P.to_int (RTL.max_pc_function f) in
for pc=max_pc downto 1
do
- Printf.fprintf chan "%d: %a\n" pc
+ Printf.fprintf chan "%d: %a\n\n" pc
(pp_relation_b hints) (PMap.get (P.of_int pc) invariants)
done
+module IntSet=Set.Make(struct type t=int let compare = ( - ) end);;
+
+let rec union_list prev = function
+ | [] -> prev
+ | h::t -> union_list (RB.lub prev h) t;;
+
+let rb_glb (x : RB.t) (y : RB.t) : RB.t =
+ match x, y with
+ | None, _ | _, None -> None
+ | (Some x'), (Some y') -> Some (RELATION.glb x' y');;
+
+let refine_invariants
+ (nodes : RTL.node list)
+ (entrypoint : RTL.node)
+ (successors : RTL.node -> RTL.node list)
+ (predecessors : RTL.node -> RTL.node list)
+ (tfr : RTL.node -> RB.t -> RB.t) (invariants0 : RB.t PMap.t) =
+ let todo = ref IntSet.empty
+ and invariants = ref invariants0 in
+ let add_todo (pc : RTL.node) =
+ todo := IntSet.add (P.to_int pc) !todo in
+ let update_node (pc : RTL.node) =
+ Printf.printf "updating node %d\n" (P.to_int pc);
+ if not (peq pc entrypoint)
+ then
+ let cur = PMap.get pc !invariants in
+ let nxt = union_list RB.bot
+ (List.map
+ (fun pred_pc->
+ rb_glb cur
+ (tfr pred_pc (PMap.get pred_pc !invariants)))
+ (predecessors pc)) in
+ if not (RB.beq cur nxt)
+ then
+ begin
+ Printf.printf "refining CSE3 node %d\n" (P.to_int pc);
+ List.iter add_todo (successors pc)
+ end in
+ (List.iter add_todo nodes);
+ while not (IntSet.is_empty !todo) do
+ let nxt = IntSet.max_elt !todo in
+ todo := IntSet.remove nxt !todo;
+ update_node (P.of_int nxt)
+ done;
+ !invariants;;
+
+let get_default default x ptree =
+ match PTree.get x ptree with
+ | None -> default
+ | Some y -> y;;
+
+let refine_analysis ctx tenv
+ (f : RTL.coq_function) (invariants0 : RB.t PMap.t) =
+ let succ_map = RTL.successors_map f in
+ let succ_f x = get_default [] x succ_map in
+ let pred_map = Kildall.make_predecessors f.RTL.fn_code RTL.successors_instr in
+ let pred_f x = get_default [] x pred_map in
+ let tfr = apply_instr' ctx tenv f.RTL.fn_code in
+ refine_invariants
+ (List.map fst (PTree.elements f.RTL.fn_code))
+ f.RTL.fn_entrypoint succ_f pred_f tfr invariants0;;
+
let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
let cur_eq_id = ref 0
and cur_catalog = ref PTree.empty
@@ -113,6 +176,7 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
let eq_find_oracle node eq =
assert (not (is_trivial eq));
let o = Hashtbl.find_opt eq_table (flatten_eq eq) in
+ (if o = None then failwith "eq_find_oracle");
(if !Clflags.option_debug_compcert > 5
then Printf.printf "@%d: eq_find %a -> %a\n" (P.to_int node)
pp_eq eq (pp_option pp_P) o);
@@ -161,21 +225,24 @@ let preanalysis (tenv : typing_env) (f : RTL.coq_function) =
pp_eq eq (pp_option pp_P) o);
o
in
- match
- internal_analysis
- { eq_catalog = (fun eq_id -> PTree.get eq_id !cur_catalog);
- eq_find_oracle = mutating_eq_find_oracle;
- eq_rhs_oracle = rhs_find_oracle ;
- eq_kill_reg = (fun reg -> PMap.get reg !cur_kill_reg);
- eq_kill_mem = (fun () -> !cur_kill_mem);
- eq_moves = (fun reg -> PMap.get reg !cur_moves)
- } tenv f
+ let ctx = { eq_catalog = (fun eq_id -> PTree.get eq_id !cur_catalog);
+ eq_find_oracle = mutating_eq_find_oracle;
+ eq_rhs_oracle = rhs_find_oracle ;
+ eq_kill_reg = (fun reg -> PMap.get reg !cur_kill_reg);
+ eq_kill_mem = (fun () -> !cur_kill_mem);
+ eq_moves = (fun reg -> PMap.get reg !cur_moves)
+ } in
+ match internal_analysis ctx tenv f
with None -> failwith "CSE3analysisaux analysis failed, try re-running with -fno-cse3"
| Some invariants ->
- let hints = { hint_eq_catalog = !cur_catalog;
+ let invariants' =
+ if ! Clflags.option_fcse3_refine
+ then refine_analysis ctx tenv f invariants
+ else invariants
+ and hints = { hint_eq_catalog = !cur_catalog;
hint_eq_find_oracle= eq_find_oracle;
hint_eq_rhs_oracle = rhs_find_oracle } in
(if !Clflags.option_debug_compcert > 1
- then pp_results f invariants hints stdout);
- invariants, hints
+ then pp_results f invariants' hints stdout);
+ invariants', hints
;;
diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v
index 66b199cc..1e5b88c3 100644
--- a/backend/CSE3analysisproof.v
+++ b/backend/CSE3analysisproof.v
@@ -699,6 +699,27 @@ Section SOUNDNESS.
+ congruence.
Qed.
+ Lemma arglist_idem_write:
+ forall { A : Type} args (rs : Regmap.t A) dst,
+ (rs # dst <- (rs # dst)) ## args = rs ## args.
+ Proof.
+ induction args; trivial.
+ intros. cbn.
+ f_equal; trivial.
+ apply Regmap.gsident.
+ Qed.
+
+ Lemma sem_rhs_idem_write:
+ forall sop args rs dst m v,
+ sem_rhs sop args rs m v ->
+ sem_rhs sop args (rs # dst <- (rs # dst)) m v.
+ Proof.
+ intros.
+ unfold sem_rhs in *.
+ rewrite arglist_idem_write.
+ assumption.
+ Qed.
+
Theorem oper2_sound:
forall no dst sop args rel rs m v,
sem_rel rel rs m ->
@@ -726,6 +747,17 @@ Section SOUNDNESS.
rewrite Regmap.gss.
apply sem_rhs_depends_on_args_only; auto.
}
+ intros INi.
+ destruct (PSet.contains rel e) eqn:CONTAINSe.
+ { pose proof (REL e {| eq_lhs := dst; eq_op := sop; eq_args := args |} CONTAINSe H) as RELe.
+ pose proof (REL i eq CONTAINS INi) as RELi.
+ unfold sem_eq in *.
+ cbn in RELe.
+ replace v with (rs # dst) by (eapply sem_rhs_det; eassumption).
+ rewrite Regmap.gsident.
+ apply sem_rhs_idem_write.
+ assumption.
+ }
rewrite PSet.gaddo in CONTAINS by congruence.
apply (kill_reg_sound rel rs m dst v REL i eq); auto.
Qed.
@@ -821,24 +853,24 @@ Section SOUNDNESS.
subst.
rewrite <- (forward_move_sound rel rs m r) by auto.
apply move_sound; auto.
- - destruct (is_trivial_sym_op sop).
- {
- apply kill_reg_sound; auto.
- }
- destruct rhs_find as [src |] eqn:RHS_FIND.
+ - destruct rhs_find as [src |] eqn:RHS_FIND.
+ destruct (Compopts.optim_CSE3_glb tt).
* apply sem_rel_glb; split.
- ** pose proof (rhs_find_sound no sop (forward_move_l (ctx:=ctx) rel args) rel src rs m REL RHS_FIND) as SOUND.
- eapply forward_move_rhs_sound in RHS.
- 2: eassumption.
+ ** pose proof (rhs_find_sound no sop args rel src rs m REL RHS_FIND) as SOUND.
rewrite <- (sem_rhs_det SOUND RHS).
apply move_sound; auto.
+ ** apply sem_rel_glb; split.
+ *** apply oper1_sound; auto.
+ *** apply oper1_sound; auto.
+ apply forward_move_rhs_sound; auto.
+ * apply sem_rel_glb; split.
+ ** apply oper1_sound; auto.
** apply oper1_sound; auto.
apply forward_move_rhs_sound; auto.
- * ** apply oper1_sound; auto.
- apply forward_move_rhs_sound; auto.
- + apply oper1_sound; auto.
- apply forward_move_rhs_sound; auto.
+ + apply sem_rel_glb; split.
+ * apply oper1_sound; auto.
+ * apply oper1_sound; auto.
+ apply forward_move_rhs_sound; auto.
Qed.
Hint Resolve oper_sound : cse3.
@@ -927,22 +959,28 @@ Section SOUNDNESS.
Hint Resolve store1_sound : cse3.
+
Theorem store_sound:
forall no chunk addr args a src rel tenv rs m m',
sem_rel rel rs m ->
wt_regset tenv rs ->
eval_addressing genv sp addr (rs ## args) = Some a ->
Mem.storev chunk m a (rs#src) = Some m' ->
- sem_rel (store (ctx:=ctx) no chunk addr args src (tenv (forward_move (ctx:=ctx) rel src)) rel) rs m'.
+ sem_rel (store (ctx:=ctx) no tenv chunk addr args src rel) rs m'.
Proof.
unfold store.
intros until m'.
intros REL WT ADDR STORE.
- rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
- rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
- apply store1_sound with (a := a) (m := m); trivial.
- (* rewrite forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
- assumption. *)
+ apply sem_rel_glb; split.
+ - apply sem_rel_glb; split.
+ * apply store1_sound with (a := a) (m := m); trivial.
+ * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
+ apply store1_sound with (a := a) (m := m); trivial.
+ - rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
+ apply sem_rel_glb; split.
+ * apply store1_sound with (a := a) (m := m); trivial.
+ * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
+ apply store1_sound with (a := a) (m := m); trivial.
Qed.
Hint Resolve store_sound : cse3.
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 2b13ab5d..fac0ba76 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -836,6 +836,56 @@ let unroll_inner_loops_body f code revmap =
(!code', !revmap')
end
+let extract_upto_icond f code head =
+ let rec extract h =
+ let inst = get_some @@ PTree.get h code in
+ match inst with
+ | Icond _ -> [h]
+ | _ -> ( match rtl_successors inst with
+ | [n] -> h :: (extract n)
+ | _ -> failwith "Found a node with more than one successor??"
+ )
+ in List.rev @@ extract head
+
+let rotate_inner_loop f code revmap iloop =
+ let header = extract_upto_icond f code iloop.head in
+ let limit = !Clflags.option_flooprotate in
+ if count_ignore_nops code header > limit then begin
+ debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit;
+ (code, revmap)
+ end else
+ let (code2, revmap2, dupheader, fwmap) = clone code revmap header in
+ let code' = ref code2 in
+ let head' = apply_map fwmap iloop.head in
+ begin
+ code' := change_pointers !code' iloop.head head' iloop.preds;
+ (!code', revmap2)
+ end
+
+let rotate_inner_loops f code revmap =
+ let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ let inner_loops = get_inner_loops f code is_loop_header in
+ let code' = ref code in
+ let revmap' = ref revmap in
+ begin
+ print_inner_loops inner_loops;
+ List.iter (fun iloop ->
+ let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in
+ code' := new_code; revmap' := new_revmap
+ ) inner_loops;
+ (!code', !revmap')
+ end
+
+let loop_rotate f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
+ let (code, revmap) =
+ if !Clflags.option_flooprotate > 0 then
+ rotate_inner_loops f code revmap
+ else (code, revmap) in
+ ((code, entrypoint), revmap)
+
let static_predict f =
let entrypoint = f.fn_entrypoint in
let code = f.fn_code in
diff --git a/backend/Duplicatepasses.v b/backend/Duplicatepasses.v
index dc96f966..7e58eedf 100644
--- a/backend/Duplicatepasses.v
+++ b/backend/Duplicatepasses.v
@@ -45,4 +45,14 @@ End TailDuplicateOracle.
Module Tailduplicateproof := DuplicateProof TailDuplicateOracle.
-Module Tailduplicate := Tailduplicateproof. \ No newline at end of file
+Module Tailduplicate := Tailduplicateproof.
+
+(** Loop Rotate *)
+
+Module LoopRotateOracle <: DuplicateOracle.
+ Axiom duplicate_aux : function -> code * node * (PTree.t node).
+ Extract Constant duplicate_aux => "Duplicateaux.loop_rotate".
+End LoopRotateOracle.
+
+Module Looprotateproof := DuplicateProof LoopRotateOracle.
+Module Looprotate := Looprotateproof.
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index bda0f438..c3f30b88 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -35,6 +35,7 @@ let option_fcse3_across_calls = ref false
let option_fcse3_across_merges = ref true
let option_fcse3_glb = ref true
let option_fcse3_trivial_ops = ref false
+let option_fcse3_refine = ref true
let option_fredundancy = ref true
(** Options relative to superblock scheduling *)
@@ -43,6 +44,7 @@ let option_ftailduplicate = ref 0 (* perform tail duplication for blocks of size
let option_ftracelinearize = ref true (* uses branch prediction information to improve the linearization *)
let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *)
let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *)
+let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *)
let option_fpostpass = ref true
let option_fpostpass_sched = ref "list"
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 9aba32f8..cd33e4db 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -220,6 +220,7 @@ Processing options:
-fcse3-across-merges Propagate CSE3 information across control-flow merges [on]
-fcse3-glb Refine CSE3 information using greatest lower bounds [on]
-fcse3-trivial-ops Replace trivial operations as well using CSE3 [off]
+ -fcse3-refine Refine CSE3 invariants by descending iteration [on]
-fmove-loop-invariants Perform loop-invariant code motion [off]
-fredundancy Perform redundancy elimination [on]
-fpostpass Perform postpass scheduling (only for K1 architecture) [on]
@@ -231,6 +232,8 @@ Processing options:
-ftracelinearize Uses branch prediction information to improve the Linearize [on]
-funrollsingle n Unrolls a single iteration of innermost loops of size n (not counting Inops) [0]
-funrollbody n Unrolls once the body of innermost loops of size n (not counting Inops) [0]
+ -flooprotate n Duplicates the header (condition computation part) of innermost loops to perform a loop rotate [0]
+ Doesn't duplicate if the size of that header is strictly greater than n
-fforward-moves Forward moves after CSE
-finline Perform inlining of functions [on]
-finline-functions-called-once Integrate functions only required by their
@@ -458,6 +461,7 @@ let cmdline_actions =
@ f_opt "cse3-across-merges" option_fcse3_across_merges
@ f_opt "cse3-glb" option_fcse3_glb
@ f_opt "cse3-trivial-ops" option_fcse3_trivial_ops
+ @ f_opt "cse3-refine" option_fcse3_refine
@ f_opt "move-loop-invariants" option_fmove_loop_invariants
@ f_opt "move-loop-invariants_2" option_fmove_loop_invariants_2
@ f_opt "move-loop-invariants_s" option_fmove_loop_invariants_s
@@ -467,6 +471,7 @@ let cmdline_actions =
@ f_opt "predict" option_fpredict
@ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ]
@ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ]
+ @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ]
@ f_opt "tracelinearize" option_ftracelinearize
@ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
@ f_opt "inline" option_finline
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index 1da8c21a..f0476b4f 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -26,6 +26,8 @@ PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE";
PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict";
PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle";
TOTAL, Always, Require, (Some "Renumbering pre constprop"), "Renumber";
+PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate";
+TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber";
PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody";
TOTAL, Always, NoRequire, (Some "Renumbering pre constprop"), "Renumber";
PARTIAL, Always, NoRequire, (Some "Performing tail duplication"), "Tailduplicate";