aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-11-03 19:58:02 +0100
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-11-03 19:58:02 +0100
commitbabf0034fed52715ddeefdee4b8d0a365c1247ce (patch)
treee887cb44e2927c879d3bd6a7c5a970d4be5ff437
parent60aa5911a75eaead6503649c879bcc8860df0972 (diff)
parent025a185487c579f768fb747dd6e91a931a2ae66b (diff)
downloadcompcert-kvx-babf0034fed52715ddeefdee4b8d0a365c1247ce.tar.gz
compcert-kvx-babf0034fed52715ddeefdee4b8d0a365c1247ce.zip
Merge remote-tracking branch 'origin/kvx-work' into kvx-test-prepass
-rw-r--r--backend/CSE3analysis.v45
-rw-r--r--backend/CSE3analysisproof.v42
-rw-r--r--backend/Duplicateaux.ml50
-rw-r--r--backend/Duplicatepasses.v12
-rw-r--r--driver/Clflags.ml1
-rw-r--r--driver/Driver.ml3
-rw-r--r--tools/compiler_expand.ml4
7 files changed, 121 insertions, 36 deletions
diff --git a/backend/CSE3analysis.v b/backend/CSE3analysis.v
index 5eb92a82..5ed04bc4 100644
--- a/backend/CSE3analysis.v
+++ b/backend/CSE3analysis.v
@@ -326,18 +326,21 @@ Section OPERATIONS.
| _ => kill_reg dst rel
end
else
- if is_trivial_sym_op op
- then kill_reg dst rel
- else
- let args' := forward_move_l rel args in
- match rhs_find op args' rel with
- | Some r =>
- if Compopts.optim_CSE3_glb tt
- then RELATION.glb (move r dst rel)
- (oper1 dst op args' rel)
- else oper1 dst op args' rel
- | None => oper1 dst op args' rel
- end.
+ let args' := forward_move_l rel args in
+ match rhs_find op args rel with
+ | Some r =>
+ if Compopts.optim_CSE3_glb tt
+ then RELATION.glb (move r dst rel)
+ (RELATION.glb
+ (oper1 dst op args rel)
+ (oper1 dst op args' rel))
+ else RELATION.glb
+ (oper1 dst op args rel)
+ (oper1 dst op args' rel)
+ | None => RELATION.glb
+ (oper1 dst op args rel)
+ (oper1 dst op args' rel)
+ end.
Definition clever_kill_store
(chunk : memory_chunk) (addr: addressing) (args : list reg)
@@ -380,11 +383,21 @@ Section OPERATIONS.
end
else rel'.
- Definition store
+ Definition store (tenv : typing_env)
(chunk : memory_chunk) (addr: addressing) (args : list reg)
- (src : reg) (ty: typ)
+ (src : reg)
(rel : RELATION.t) : RELATION.t :=
- store1 chunk addr (forward_move_l rel args) (forward_move rel src) ty rel.
+ let args' := forward_move_l rel args in
+ let src' := forward_move rel src in
+ let tsrc := tenv src in
+ let tsrc' := tenv src' in
+ RELATION.glb
+ (RELATION.glb
+ (store1 chunk addr args src tsrc rel)
+ (store1 chunk addr args' src tsrc rel))
+ (RELATION.glb
+ (store1 chunk addr args src' tsrc' rel)
+ (store1 chunk addr args' src' tsrc' rel)).
Definition kill_builtin_res res rel :=
match res with
@@ -427,7 +440,7 @@ Section OPERATIONS.
| Icond _ _ _ _ _
| Ijumptable _ _ => Some rel
| Istore chunk addr args src _ =>
- Some (store chunk addr args src (tenv (forward_move rel src)) rel)
+ Some (store tenv chunk addr args src rel)
| Iop op args dst _ => Some (oper dst (SOp op) args rel)
| Iload trap chunk addr args dst _ => Some (oper dst (SLoad chunk addr) args rel)
| Icall _ _ _ dst _ => Some (kill_reg dst (kill_mem rel))
diff --git a/backend/CSE3analysisproof.v b/backend/CSE3analysisproof.v
index 07a58f23..1e5b88c3 100644
--- a/backend/CSE3analysisproof.v
+++ b/backend/CSE3analysisproof.v
@@ -853,24 +853,24 @@ Section SOUNDNESS.
subst.
rewrite <- (forward_move_sound rel rs m r) by auto.
apply move_sound; auto.
- - destruct (is_trivial_sym_op sop).
- {
- apply kill_reg_sound; auto.
- }
- destruct rhs_find as [src |] eqn:RHS_FIND.
+ - destruct rhs_find as [src |] eqn:RHS_FIND.
+ destruct (Compopts.optim_CSE3_glb tt).
* apply sem_rel_glb; split.
- ** pose proof (rhs_find_sound no sop (forward_move_l (ctx:=ctx) rel args) rel src rs m REL RHS_FIND) as SOUND.
- eapply forward_move_rhs_sound in RHS.
- 2: eassumption.
+ ** pose proof (rhs_find_sound no sop args rel src rs m REL RHS_FIND) as SOUND.
rewrite <- (sem_rhs_det SOUND RHS).
apply move_sound; auto.
+ ** apply sem_rel_glb; split.
+ *** apply oper1_sound; auto.
+ *** apply oper1_sound; auto.
+ apply forward_move_rhs_sound; auto.
+ * apply sem_rel_glb; split.
+ ** apply oper1_sound; auto.
** apply oper1_sound; auto.
apply forward_move_rhs_sound; auto.
- * ** apply oper1_sound; auto.
- apply forward_move_rhs_sound; auto.
- + apply oper1_sound; auto.
- apply forward_move_rhs_sound; auto.
+ + apply sem_rel_glb; split.
+ * apply oper1_sound; auto.
+ * apply oper1_sound; auto.
+ apply forward_move_rhs_sound; auto.
Qed.
Hint Resolve oper_sound : cse3.
@@ -959,22 +959,28 @@ Section SOUNDNESS.
Hint Resolve store1_sound : cse3.
+
Theorem store_sound:
forall no chunk addr args a src rel tenv rs m m',
sem_rel rel rs m ->
wt_regset tenv rs ->
eval_addressing genv sp addr (rs ## args) = Some a ->
Mem.storev chunk m a (rs#src) = Some m' ->
- sem_rel (store (ctx:=ctx) no chunk addr args src (tenv (forward_move (ctx:=ctx) rel src)) rel) rs m'.
+ sem_rel (store (ctx:=ctx) no tenv chunk addr args src rel) rs m'.
Proof.
unfold store.
intros until m'.
intros REL WT ADDR STORE.
- rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
- rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
- apply store1_sound with (a := a) (m := m); trivial.
- (* rewrite forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
- assumption. *)
+ apply sem_rel_glb; split.
+ - apply sem_rel_glb; split.
+ * apply store1_sound with (a := a) (m := m); trivial.
+ * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
+ apply store1_sound with (a := a) (m := m); trivial.
+ - rewrite <- forward_move_sound with (rel:=rel) (m:=m) in STORE by trivial.
+ apply sem_rel_glb; split.
+ * apply store1_sound with (a := a) (m := m); trivial.
+ * rewrite <- forward_move_l_sound with (rel:=rel) (m:=m) in ADDR by trivial.
+ apply store1_sound with (a := a) (m := m); trivial.
Qed.
Hint Resolve store_sound : cse3.
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 2b13ab5d..fac0ba76 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -836,6 +836,56 @@ let unroll_inner_loops_body f code revmap =
(!code', !revmap')
end
+let extract_upto_icond f code head =
+ let rec extract h =
+ let inst = get_some @@ PTree.get h code in
+ match inst with
+ | Icond _ -> [h]
+ | _ -> ( match rtl_successors inst with
+ | [n] -> h :: (extract n)
+ | _ -> failwith "Found a node with more than one successor??"
+ )
+ in List.rev @@ extract head
+
+let rotate_inner_loop f code revmap iloop =
+ let header = extract_upto_icond f code iloop.head in
+ let limit = !Clflags.option_flooprotate in
+ if count_ignore_nops code header > limit then begin
+ debug "Loop Rotate: too many nodes to duplicate (%d > %d)" (List.length header) limit;
+ (code, revmap)
+ end else
+ let (code2, revmap2, dupheader, fwmap) = clone code revmap header in
+ let code' = ref code2 in
+ let head' = apply_map fwmap iloop.head in
+ begin
+ code' := change_pointers !code' iloop.head head' iloop.preds;
+ (!code', revmap2)
+ end
+
+let rotate_inner_loops f code revmap =
+ let is_loop_header = get_loop_headers code (f.fn_entrypoint) in
+ let inner_loops = get_inner_loops f code is_loop_header in
+ let code' = ref code in
+ let revmap' = ref revmap in
+ begin
+ print_inner_loops inner_loops;
+ List.iter (fun iloop ->
+ let (new_code, new_revmap) = rotate_inner_loop f !code' !revmap' iloop in
+ code' := new_code; revmap' := new_revmap
+ ) inner_loops;
+ (!code', !revmap')
+ end
+
+let loop_rotate f =
+ let entrypoint = f.fn_entrypoint in
+ let code = f.fn_code in
+ let revmap = make_identity_ptree code in
+ let (code, revmap) =
+ if !Clflags.option_flooprotate > 0 then
+ rotate_inner_loops f code revmap
+ else (code, revmap) in
+ ((code, entrypoint), revmap)
+
let static_predict f =
let entrypoint = f.fn_entrypoint in
let code = f.fn_code in
diff --git a/backend/Duplicatepasses.v b/backend/Duplicatepasses.v
index dc96f966..7e58eedf 100644
--- a/backend/Duplicatepasses.v
+++ b/backend/Duplicatepasses.v
@@ -45,4 +45,14 @@ End TailDuplicateOracle.
Module Tailduplicateproof := DuplicateProof TailDuplicateOracle.
-Module Tailduplicate := Tailduplicateproof. \ No newline at end of file
+Module Tailduplicate := Tailduplicateproof.
+
+(** Loop Rotate *)
+
+Module LoopRotateOracle <: DuplicateOracle.
+ Axiom duplicate_aux : function -> code * node * (PTree.t node).
+ Extract Constant duplicate_aux => "Duplicateaux.loop_rotate".
+End LoopRotateOracle.
+
+Module Looprotateproof := DuplicateProof LoopRotateOracle.
+Module Looprotate := Looprotateproof.
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 68508f2e..bc8a7925 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -43,6 +43,7 @@ let option_ftailduplicate = ref 0 (* perform tail duplication for blocks of size
let option_ftracelinearize = ref true (* uses branch prediction information to improve the linearization *)
let option_funrollsingle = ref 0 (* unroll a single iteration of innermost loops of size n *)
let option_funrollbody = ref 0 (* unroll the body of innermost loops of size n *)
+let option_flooprotate = ref 0 (* rotate the innermost loops to have the condition inside the loop body *)
(* Scheduling *)
let option_mtune = ref ""
diff --git a/driver/Driver.ml b/driver/Driver.ml
index ef4acee7..8ceb3a25 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -219,6 +219,8 @@ Processing options:
-ftracelinearize Uses branch prediction information to improve the Linearize [on]
-funrollsingle n Unrolls a single iteration of innermost loops of size n (not counting Inops) [0]
-funrollbody n Unrolls once the body of innermost loops of size n (not counting Inops) [0]
+ -flooprotate n Duplicates the header (condition computation part) of innermost loops to perform a loop rotate [0]
+ Doesn't duplicate if the size of that header is strictly greater than n
-fforward-moves Forward moves after CSE
-finline Perform inlining of functions [on]
-finline-functions-called-once Integrate functions only required by their
@@ -432,6 +434,7 @@ let cmdline_actions =
@ f_opt "predict" option_fpredict
@ [ Exact "-funrollsingle", Integer (fun n -> option_funrollsingle := n) ]
@ [ Exact "-funrollbody", Integer (fun n -> option_funrollbody := n) ]
+ @ [ Exact "-flooprotate", Integer (fun n -> option_flooprotate := n) ]
@ f_opt "tracelinearize" option_ftracelinearize
@ f_opt_str "prepass" option_fprepass option_fprepass_sched
@ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index 35ddfe5e..d7484628 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -25,7 +25,9 @@ TOTAL, Always, Require, (Some "Renumbering"), "Renumber";
PARTIAL, (Option "optim_CSE"), Require, (Some "CSE"), "CSE";
PARTIAL, Always, NoRequire, (Some "Static Prediction + inverting conditions"), "Staticpredict";
PARTIAL, Always, NoRequire, (Some "Unrolling one iteration out of innermost loops"), "Unrollsingle";
-TOTAL, Always, Require, (Some "Renumbering pre unrolling"), "Renumber";
+TOTAL, Always, Require, (Some "Renumbering pre rotate"), "Renumber";
+PARTIAL, Always, NoRequire, (Some "Loop Rotate"), "Looprotate";
+TOTAL, Always, NoRequire, (Some "Renumbering pre unrolling"), "Renumber";
PARTIAL, Always, NoRequire, (Some "Unrolling the body of innermost loops"), "Unrollbody";
TOTAL, Always, NoRequire, (Some "Renumbering pre tail duplication"), "Renumber";
PARTIAL, Always, NoRequire, (Some "Performing tail duplication"), "Tailduplicate";