aboutsummaryrefslogtreecommitdiffstats
path: root/backend/Tunneling.v
diff options
context:
space:
mode:
Diffstat (limited to 'backend/Tunneling.v')
-rw-r--r--backend/Tunneling.v135
1 files changed, 111 insertions, 24 deletions
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index da1ce45a..265e06ba 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -12,6 +12,7 @@
(** Branch tunneling (optimization of branches to branches). *)
+Require Import FunInd.
Require Import Coqlib Maps UnionFind.
Require Import AST.
Require Import LTL.
@@ -21,8 +22,8 @@ Require Import LTL.
so that they jump directly to the end of the branch sequence.
For example:
<<
- L1: nop L2; L1: nop L3;
- L2; nop L3; becomes L2: nop L3;
+ L1: branch L2; L1: branch L3;
+ L2; branch L3; becomes L2: branch L3;
L3: instr; L3: instr;
L4: if (cond) goto L1; L4: if (cond) goto L3;
>>
@@ -33,70 +34,156 @@ Require Import LTL.
computations or useless moves), therefore there are more
opportunities for tunneling after allocation than before.
Symmetrically, prior tunneling helps linearization to produce
- better code, e.g. by revealing that some [nop] instructions are
- dead code (as the "nop L3" in the example above).
+ better code, e.g. by revealing that some [branch] instructions are
+ dead code (as the "branch L3" in the example above).
*)
(** The naive implementation of branch tunneling would replace
any branch to a node [pc] by a branch to the node
[branch_target f pc], defined as follows:
<<
- branch_target f pc = branch_target f pc' if f(pc) = nop pc'
+ branch_target f pc = branch_target f pc' if f(pc) = branch pc'
= pc otherwise
>>
However, this definition can fail to terminate if
the program can contain loops consisting only of branches, as in
<<
- L1: nop L1;
+ L1: branch L1;
>>
or
-<< L1: nop L2;
- L2: nop L1;
+<<
+ L1: branch L2;
+ L2: branch L1;
>>
Coq warns us of this fact by not accepting the definition
of [branch_target] above.
- To handle this problem, we proceed in two passes. The first pass
- populates a union-find data structure, adding equalities [pc = pc']
- for every instruction [pc: nop pc'] in the function. *)
+ To handle this problem, we proceed in two passes:
+
+- The first pass populates a union-find data structure, adding equalities
+ between PCs of blocks that are connected by branches and no other
+ computation.
+
+- The second pass rewrites the code, replacing every branch to a node [pc]
+ by a branch to the canonical representative of the equivalence class of [pc].
+*)
+
+(** * Construction of the union-find data structure *)
Module U := UnionFind.UF(PTree).
-Definition record_goto (uf: U.t) (pc: node) (b: bblock) : U.t :=
+(** We start populating the union-find data structure by adding
+ equalities [pc = pc'] for every block [pc: branch pc'] in the function. *)
+
+Definition record_branch (uf: U.t) (pc: node) (b: bblock) : U.t :=
match b with
| Lbranch s :: _ => U.union uf pc s
| _ => uf
end.
+Definition record_branches (f: LTL.function) : U.t :=
+ PTree.fold record_branch f.(fn_code) U.empty.
+
+(** An additional optimization opportunity comes from conditional branches.
+ Consider a block [pc: cond ifso ifnot]. If the [ifso] case
+ and the [ifnot] case jump to the same block [pc']
+ (modulo intermediate branches), the block can be simplified into
+ [pc: branch pc'], and the equality [pc = pc'] can be added to the
+ union-find data structure. *)
+
+(** In rare cases, the extra equation [pc = pc'] introduced by the
+ simplification of a conditional branch can trigger further simplifications
+ of other conditional branches. We therefore iterate the analysis
+ until no optimizable conditional branch remains. *)
+
+(** The code [c] (first component of the [st] triple) starts identical
+ to the code [fn.(fn_code)] of the current function, but each time
+ conditional branch at [pc] is optimized, we remove the block at
+ [pc] from the code [c]. This guarantees termination of the
+ iteration. *)
+
+Definition record_cond (st: code * U.t * bool) (pc: node) (b: bblock) : code * U.t * bool :=
+ match b with
+ | Lcond cond args s1 s2 :: _ =>
+ let '(c, u, _) := st in
+ if peq (U.repr u s1) (U.repr u s2)
+ then (PTree.remove pc c, U.union u pc s1, true)
+ else st
+ | _ =>
+ st
+ end.
+
+Definition record_conds_1 (cu: code * U.t) : code * U.t * bool :=
+ let (c, u) := cu in PTree.fold record_cond c (c, u, false).
+
+Definition measure_state (cu: code * U.t) : nat :=
+ PTree_Properties.cardinal (fst cu).
+
+Function record_conds (cu: code * U.t) {measure measure_state cu} : U.t :=
+ let (cu', changed) := record_conds_1 cu in
+ if changed then record_conds cu' else snd cu.
+Proof.
+ intros [c0 u0] [c1 u1].
+ set (P := fun (c: code) (s: code * U.t * bool) =>
+ (forall pc, c!pc = None -> (fst (fst s))!pc = c0!pc) /\
+ (PTree_Properties.cardinal (fst (fst s))
+ + (if snd s then 1 else 0)
+ <= PTree_Properties.cardinal c0)%nat).
+ assert (A: P c0 (PTree.fold record_cond c0 (c0, u0, false))).
+ { apply PTree_Properties.fold_rec; unfold P.
+ - intros. destruct H0; split; auto. intros. rewrite <- H in H2. auto.
+ - simpl; split; intros. auto. simpl; lia.
+ - intros cd [[c u] changed] pc b NONE SOME [HR1 HR2]. simpl. split.
+ + intros p EQ. rewrite PTree.gsspec in EQ. destruct (peq p pc); try discriminate.
+ unfold record_cond. destruct b as [ | [] b ]; auto.
+ destruct (peq (U.repr u s1) (U.repr u s2)); auto.
+ simpl. rewrite PTree.gro by auto. auto.
+ + unfold record_cond. destruct b as [ | [] b ]; auto.
+ destruct (peq (U.repr u s1) (U.repr u s2)); auto.
+ simpl in *.
+ assert (SOME': c!pc = Some (Lcond cond args s1 s2 :: b)).
+ { rewrite HR1 by auto. auto. }
+ generalize (PTree_Properties.cardinal_remove SOME').
+ destruct changed; lia.
+ }
+ unfold record_conds_1, measure_state; intros.
+ destruct A as [_ A]. rewrite teq in A. simpl in *.
+ lia.
+Qed.
+
Definition record_gotos (f: LTL.function) : U.t :=
- PTree.fold record_goto f.(fn_code) U.empty.
+ record_conds (f.(fn_code), record_branches f).
+
+(** * Code transformation *)
-(** The second pass rewrites all LTL instructions, replacing every
+(** The code transformation rewrites all LTL instruction, replacing every
successor [s] of every instruction by the canonical representative
- of its equivalence class in the union-find data structure. *)
+ of its equivalence class in the union-find data structure.
+ Additionally, [Lcond] conditional branches are turned into [Lbranch]
+ unconditional branches whenever possible. *)
-Definition tunnel_instr (uf: U.t) (i: instruction) : instruction :=
+Definition tunnel_instr (u: U.t) (i: instruction) : instruction :=
match i with
- | Lbranch s => Lbranch (U.repr uf s)
+ | Lbranch s => Lbranch (U.repr u s)
| Lcond cond args s1 s2 =>
- let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in
+ let s1' := U.repr u s1 in let s2' := U.repr u s2 in
if peq s1' s2'
then Lbranch s1'
else Lcond cond args s1' s2'
- | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl)
+ | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr u) tbl)
| _ => i
end.
-Definition tunnel_block (uf: U.t) (b: bblock) : bblock :=
- List.map (tunnel_instr uf) b.
+Definition tunnel_block (u: U.t) (b: bblock) : bblock :=
+ List.map (tunnel_instr u) b.
Definition tunnel_function (f: LTL.function) : LTL.function :=
- let uf := record_gotos f in
+ let u := record_gotos f in
mkfunction
(fn_sig f)
(fn_stacksize f)
- (PTree.map1 (tunnel_block uf) (fn_code f))
- (U.repr uf (fn_entrypoint f)).
+ (PTree.map1 (tunnel_block u) (fn_code f))
+ (U.repr u (fn_entrypoint f)).
Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef :=
transf_fundef tunnel_function f.