aboutsummaryrefslogtreecommitdiffstats
path: root/backend/LTLTunneling.v
diff options
context:
space:
mode:
Diffstat (limited to 'backend/LTLTunneling.v')
-rw-r--r--backend/LTLTunneling.v167
1 files changed, 167 insertions, 0 deletions
diff --git a/backend/LTLTunneling.v b/backend/LTLTunneling.v
new file mode 100644
index 00000000..4b404724
--- /dev/null
+++ b/backend/LTLTunneling.v
@@ -0,0 +1,167 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+(** Branch tunneling (optimization of branches to branches). *)
+
+Require Import Coqlib Maps Errors.
+Require Import AST.
+Require Import LTL.
+
+(** Branch tunneling shortens sequences of branches (with no intervening
+ computations) by rewriting the branch and conditional branch instructions
+ so that they jump directly to the end of the branch sequence.
+ For example:
+<<
+ L1: if (cond) nop L2; L1: nop L3;
+ L2: nop L3; becomes L2: nop L3;
+ L3: instr; L3: instr;
+ L4: if (cond) goto L1; L4: if (cond) nop L1;
+>>
+ This optimization can be applied to several of our intermediate
+ languages. We choose to perform it on the [LTL] language,
+ after register allocation but before code linearization.
+ Register allocation can delete instructions (such as dead
+ computations or useless moves), therefore there are more
+ opportunities for tunneling after allocation than before.
+ Symmetrically, prior tunneling helps linearization to produce
+ better code, e.g. by revealing that some [branch] instructions are
+ dead code (as the "branch L3" in the example above).
+*)
+
+(** The implementation consists in two passes: the first pass
+ records the branch t of each "nop"
+ and the second pass replace any "nop" node to [pc]
+ by a branch to a "nop" at [branch_t f pc]
+
+Naively, we may define [branch_t f pc] as follows:
+<<
+ branch_t f pc = branch_t f pc' if f(pc) = nop pc'
+ = pc otherwise
+>>
+ However, this definition can fail to terminate if
+ the program can contain loops consisting only of branches, as in
+<<
+ L1: branch L1;
+>>
+ or
+<<
+ L1: nop L2;
+ L2: nop L1;
+>>
+ Coq warns us of this fact by not accepting the definition
+ of [branch_t] above.
+
+ To handle this problem, we use a union-find data structure, adding equalities [pc = pc']
+ for every instruction [pc: nop pc'] in the function.
+
+ Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure,
+ we need to iterate until we reach a fixpoint.
+
+ Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure
+ in order to help the proof.
+
+ A verifier checks that this data-structure is correct.
+*)
+
+Definition UF := PTree.t (node * Z).
+
+(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
+Axiom branch_target: LTL.function -> UF.
+Extract Constant branch_target => "LTLTunnelingaux.branch_target".
+
+Local Open Scope error_monad_scope.
+
+Definition get (td: UF) pc:node*Z :=
+ match td!pc with
+ | Some (t,d) => (t,Z.abs d)
+ | _ => (pc,0)
+ end.
+
+Definition target (td: UF) (pc:node): node := fst (get td pc).
+Coercion target: UF >-> Funclass.
+
+(* we check that the domain of [td] is included in the domain of [c] *)
+Definition check_included (td: UF) (c: code): option bblock
+ := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil).
+
+(* we check the validity of targets and their bound:
+ the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
+*)
+Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit
+ := match td!pc with
+ | None => OK tt
+ | Some (tpc, dpc) =>
+ let dpc := Z.abs dpc in
+ match bb with
+ | Lbranch s ::_ =>
+ let (ts, ds) := get td s in
+ if peq tpc ts then
+ if zlt ds dpc then OK tt
+ else Error (msg "bad distance in Lbranch")
+ else Error (msg "invalid skip of Lbranch")
+ | Lcond _ _ s1 s2 _ :: _ =>
+ let (ts1, ds1) := get td s1 in
+ let (ts2, ds2) := get td s2 in
+ if peq tpc ts1 then
+ if peq tpc ts2 then
+ if zlt ds1 dpc then
+ if zlt ds2 dpc then OK tt
+ else Error (msg "bad distance on else branch")
+ else Error (msg "bad distance on then branch")
+ else Error (msg "invalid skip of else branch")
+ else Error (msg "invalid skip of then branch")
+ | _ => Error (msg "cannot skip this block")
+ end
+ end.
+
+Definition check_code (td: UF) (c:code): res unit
+ := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt).
+
+(** The second pass rewrites all LTL instructions, replacing every
+ successor [s] of every instruction by [t s], the canonical representative
+ of its equivalence class in the union-find data structure. *)
+
+Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
+ match i with
+ | Lbranch s => Lbranch (t s)
+ | Lcond cond args s1 s2 info =>
+ let s1' := t s1 in let s2' := t s2 in
+ if peq s1' s2'
+ then Lbranch s1'
+ else Lcond cond args s1' s2' info
+ | Ljumptable arg tbl => Ljumptable arg (List.map t tbl)
+ | _ => i
+ end.
+
+Definition tunnel_block (t: node -> node) (b: bblock) : bblock :=
+ List.map (tunnel_instr t) b.
+
+Definition tunnel_function (f: LTL.function) : res LTL.function :=
+ let td := branch_target f in
+ let c := (fn_code f) in
+ if check_included td c then
+ do _ <- check_code td c ; OK
+ (mkfunction
+ (fn_sig f)
+ (fn_stacksize f)
+ (PTree.map1 (tunnel_block td) c)
+ (td (fn_entrypoint f)))
+ else
+ Error (msg "Some node of the union-find is not in the CFG")
+ .
+
+Definition tunnel_fundef (f: fundef) : res fundef :=
+ transf_partial_fundef tunnel_function f.
+
+Definition transf_program (p: program) : res program :=
+ transform_partial_program tunnel_fundef p.