(* *********************************************************************) (* *) (* The Compcert verified compiler *) (* *) (* Xavier Leroy, INRIA Paris-Rocquencourt *) (* Sylvain Boulmé Grenoble-INP, VERIMAG *) (* *) (* Copyright Institut National de Recherche en Informatique et en *) (* Automatique. All rights reserved. This file is distributed *) (* under the terms of the INRIA Non-Commercial License Agreement. *) (* *) (* *********************************************************************) (** Branch tunneling (optimization of branches to branches). *) Require Import Coqlib Maps Errors. Require Import AST. Require Import LTL. (** Branch tunneling shortens sequences of branches (with no intervening computations) by rewriting the branch and conditional branch instructions so that they jump directly to the end of the branch sequence. For example: << L1: if (cond) nop L2; L1: nop L3; L2: nop L3; becomes L2: nop L3; L3: instr; L3: instr; L4: if (cond) goto L1; L4: if (cond) nop L1; >> This optimization can be applied to several of our intermediate languages. We choose to perform it on the [LTL] language, after register allocation but before code linearization. Register allocation can delete instructions (such as dead computations or useless moves), therefore there are more opportunities for tunneling after allocation than before. Symmetrically, prior tunneling helps linearization to produce better code, e.g. by revealing that some [branch] instructions are dead code (as the "branch L3" in the example above). *) (** The implementation consists in two passes: the first pass records the branch t of each "nop" and the second pass replace any "nop" node to [pc] by a branch to a "nop" at [branch_t f pc] Naively, we may define [branch_t f pc] as follows: << branch_t f pc = branch_t f pc' if f(pc) = nop pc' = pc otherwise >> However, this definition can fail to terminate if the program can contain loops consisting only of branches, as in << L1: branch L1; >> or << L1: nop L2; L2: nop L1; >> Coq warns us of this fact by not accepting the definition of [branch_t] above. To handle this problem, we use a union-find data structure, adding equalities [pc = pc'] for every instruction [pc: nop pc'] in the function. Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure, we need to iterate until we reach a fixpoint. Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure in order to help the proof. A verifier checks that this data-structure is correct. *) Definition UF := PTree.t (node * Z). (* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) Axiom branch_target: LTL.function -> UF. Extract Constant branch_target => "LTLTunnelingaux.branch_target". Local Open Scope error_monad_scope. Definition get (td: UF) pc:node*Z := match td!pc with | Some (t,d) => (t,Z.abs d) | _ => (pc,0) end. Definition target (td: UF) (pc:node): node := fst (get td pc). Coercion target: UF >-> Funclass. (* we check that the domain of [td] is included in the domain of [c] *) Definition check_included (td: UF) (c: code): option bblock := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil). (* we check the validity of targets and their bound: the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents. *) Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit := match td!pc with | None => OK tt | Some (tpc, dpc) => let dpc := Z.abs dpc in match bb with | Lbranch s ::_ => let (ts, ds) := get td s in if peq tpc ts then if zlt ds dpc then OK tt else Error (msg "bad distance in Lbranch") else Error (msg "invalid skip of Lbranch") | Lcond _ _ s1 s2 _ :: _ => let (ts1, ds1) := get td s1 in let (ts2, ds2) := get td s2 in if peq tpc ts1 then if peq tpc ts2 then if zlt ds1 dpc then if zlt ds2 dpc then OK tt else Error (msg "bad distance on else branch") else Error (msg "bad distance on then branch") else Error (msg "invalid skip of else branch") else Error (msg "invalid skip of then branch") | _ => Error (msg "cannot skip this block") end end. Definition check_code (td: UF) (c:code): res unit := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt). (** The second pass rewrites all LTL instructions, replacing every successor [s] of every instruction by [t s], the canonical representative of its equivalence class in the union-find data structure. *) Definition tunnel_instr (t: node -> node) (i: instruction) : instruction := match i with | Lbranch s => Lbranch (t s) | Lcond cond args s1 s2 info => let s1' := t s1 in let s2' := t s2 in if peq s1' s2' then Lbranch s1' else Lcond cond args s1' s2' info | Ljumptable arg tbl => Ljumptable arg (List.map t tbl) | _ => i end. Definition tunnel_block (t: node -> node) (b: bblock) : bblock := List.map (tunnel_instr t) b. Definition tunnel_function (f: LTL.function) : res LTL.function := let td := branch_target f in let c := (fn_code f) in if check_included td c then do _ <- check_code td c ; OK (mkfunction (fn_sig f) (fn_stacksize f) (PTree.map1 (tunnel_block td) c) (td (fn_entrypoint f))) else Error (msg "Some node of the union-find is not in the CFG") . Definition tunnel_fundef (f: fundef) : res fundef := transf_partial_fundef tunnel_function f. Definition transf_program (p: program) : res program := transform_partial_program tunnel_fundef p.