From 0053b1aa1d26da5d1f995a603b127daf799c2da9 Mon Sep 17 00:00:00 2001 From: xleroy Date: Mon, 21 May 2012 16:26:30 +0000 Subject: Merge of the newmem branch: - Revised memory model with Max and Cur permissions, but without bounds - Constant propagation of 'const' globals - Function inlining at RTL level - (Unprovable) elimination of unreferenced static definitions git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1899 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- backend/Inlining.v | 477 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 477 insertions(+) create mode 100644 backend/Inlining.v (limited to 'backend/Inlining.v') diff --git a/backend/Inlining.v b/backend/Inlining.v new file mode 100644 index 00000000..406447b7 --- /dev/null +++ b/backend/Inlining.v @@ -0,0 +1,477 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** RTL function inlining *) + +Require Import Coqlib. +Require Import Wfsimpl. +Require Import Errors. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Op. +Require Import Registers. +Require Import RTL. + +Ltac xomega := unfold Plt, Ple in *; zify; omega. + +(** ** Environment of inlinable functions *) + +(** We maintain a mapping from function names to their definitions. + In this mapping, we only include functions that are eligible for + inlining, as determined by the external heuristic + [should_inline]. *) + +Definition funenv : Type := PTree.t function. + +Definition size_fenv (fenv: funenv) := PTree_Properties.cardinal fenv. + +Parameter should_inline: ident -> function -> bool. + +Definition add_fundef (fenv: funenv) (idf: ident * fundef) : funenv := + match idf with + | (id, External ef) => + PTree.remove id fenv + | (id, Internal f) => + if should_inline id f + then PTree.set id f fenv + else PTree.remove id fenv + end. + +Definition remove_vardef (fenv: funenv) (idv: ident * globvar unit) : funenv := + PTree.remove (fst idv) fenv. + +Definition funenv_program (p: program) : funenv := + List.fold_left remove_vardef p.(prog_vars) + (List.fold_left add_fundef p.(prog_funct) (PTree.empty function)). + +(** Resources used by a function. *) + +(** Maximum PC (node number) in the CFG of a function. All nodes of + the CFG of [f] are between 1 and [max_pc_function f] (inclusive). *) + +Definition max_pc_function (f: function) := + PTree.fold (fun m pc i => Pmax m pc) f.(fn_code) 1%positive. + +(** Maximum pseudo-register defined in a function. All results of + an instruction of [f], as well as all parameters of [f], are between + 1 and [max_def_function] (inclusive). *) + +Definition max_def_instr (i: instruction) := + match i with + | Iop op args res s => res + | Iload chunk addr args dst s => dst + | Icall sg ros args res s => res + | Ibuiltin ef args res s => res + | _ => 1%positive + end. + +Definition max_def_function (f: function) := + Pmax + (PTree.fold (fun m pc i => Pmax m (max_def_instr i)) f.(fn_code) 1%positive) + (List.fold_left (fun m r => Pmax m r) f.(fn_params) 1%positive). + +(** State monad *) + +(** To construct incrementally the CFG of a function after inlining, + we use a state monad similar to that used in module [RTLgen]. + It records the current state of the CFG, plus counters to generate + fresh pseudo-registers and fresh CFG nodes. It also records the + stack size needed for the inlined function. *) + +Record state : Type := mkstate { + st_nextreg: positive; (**r last used pseudo-register *) + st_nextnode: positive; (**r last used CFG node *) + st_code: code; (**r current CFG *) + st_stksize: Z (**r current stack size *) +}. + +(** Monotone evolution of the state. *) + +Inductive sincr (s1 s2: state) : Prop := + Sincr (NEXTREG: Ple s1.(st_nextreg) s2.(st_nextreg)) + (NEXTNODE: Ple s1.(st_nextnode) s2.(st_nextnode)) + (STKSIZE: s1.(st_stksize) <= s2.(st_stksize)). + +Remark sincr_refl: forall s, sincr s s. +Proof. + intros; constructor; xomega. +Qed. + +Lemma sincr_trans: forall s1 s2 s3, sincr s1 s2 -> sincr s2 s3 -> sincr s1 s3. +Proof. + intros. inv H; inv H0. constructor; xomega. +Qed. + +(** Dependently-typed state monad, ensuring that the final state is + greater or equal (in the sense of predicate [sincr] above) than + the initial state. *) + +Inductive res {A: Type} {s: state}: Type := R (x: A) (s': state) (I: sincr s s'). + +Definition mon (A: Type) : Type := forall (s: state), @res A s. + +(** Operations on this monad. *) + +Definition ret {A: Type} (x: A): mon A := + fun s => R x s (sincr_refl s). + +Definition bind {A B: Type} (x: mon A) (f: A -> mon B): mon B := + fun s1 => match x s1 with R vx s2 I1 => + match f vx s2 with R vy s3 I2 => + R vy s3 (sincr_trans s1 s2 s3 I1 I2) + end + end. + +Notation "'do' X <- A ; B" := (bind A (fun X => B)) + (at level 200, X ident, A at level 100, B at level 200). + +Definition initstate := + mkstate 1%positive 1%positive (PTree.empty instruction) 0. + +Program Definition set_instr (pc: node) (i: instruction): mon unit := + fun s => + R tt + (mkstate s.(st_nextreg) s.(st_nextnode) (PTree.set pc i s.(st_code)) s.(st_stksize)) + _. +Next Obligation. + intros; constructor; simpl; xomega. +Qed. + +Program Definition add_instr (i: instruction): mon node := + fun s => + let pc := Psucc s.(st_nextnode) in + R pc + (mkstate s.(st_nextreg) pc (PTree.set pc i s.(st_code)) s.(st_stksize)) + _. +Next Obligation. + intros; constructor; simpl; xomega. +Qed. + +Program Definition reserve_nodes (numnodes: positive): mon positive := + fun s => + R s.(st_nextnode) + (mkstate s.(st_nextreg) (Pplus s.(st_nextnode) numnodes) s.(st_code) s.(st_stksize)) + _. +Next Obligation. + intros; constructor; simpl; xomega. +Qed. + +Program Definition reserve_regs (numregs: positive): mon positive := + fun s => + R s.(st_nextreg) + (mkstate (Pplus s.(st_nextreg) numregs) s.(st_nextnode) s.(st_code) s.(st_stksize)) + _. +Next Obligation. + intros; constructor; simpl; xomega. +Qed. + +Program Definition request_stack (sz: Z): mon unit := + fun s => + R tt + (mkstate s.(st_nextreg) s.(st_nextnode) s.(st_code) (Zmax s.(st_stksize) sz)) + _. +Next Obligation. + intros; constructor; simpl; xomega. +Qed. + +Fixpoint mlist_iter2 {A B: Type} (f: A -> B -> mon unit) (l: list (A*B)): mon unit := + match l with + | nil => ret tt + | (x,y) :: l' => do z <- f x y; mlist_iter2 f l' + end. + +(** ** Inlining contexts *) + +(** A context describes how to insert the CFG for a source function into + the CFG for the function after inlining: +- a source instruction at PC [n] is relocated to PC [n + ctx.(dpc)]; +- all pseudo-registers of this instruction are shifted by [ctx.(dreg)]; +- all stack references are shifted by [ctx.(dstk)]; +- "return" instructions are transformed into "return" or "move" instructions + as governed by [ctx.(retinfo)]. +*) + +Record context: Type := mkcontext { + dpc: positive; (**r offset for PCs *) + dreg: positive; (**r offset for pseudo-regs *) + dstk: Z; (**r offset for stack references *) + mreg: positive; (**r max pseudo-reg number *) + mstk: Z; (**r original stack block size *) + retinfo: option(node * reg) (**r where to branch on return *) + (**r and deposit return value *) +}. + +(** The following functions "shift" (relocate) PCs, registers, operations, etc. *) + +Definition spc (ctx: context) (pc: node) := Pplus pc ctx.(dpc). + +Definition sreg (ctx: context) (r: reg) := Pplus r ctx.(dreg). + +Definition sregs (ctx: context) (rl: list reg) := List.map (sreg ctx) rl. + +Definition sros (ctx: context) (ros: reg + ident) := sum_left_map (sreg ctx) ros. + +Definition sop (ctx: context) (op: operation) := + shift_stack_operation (Int.repr ctx.(dstk)) op. + +Definition saddr (ctx: context) (addr: addressing) := + shift_stack_addressing (Int.repr ctx.(dstk)) addr. + +(** The initial context, used to copy the CFG of a toplevel function. *) + +Definition initcontext (dpc dreg nreg: positive) (sz: Z) := + {| dpc := dpc; + dreg := dreg; + dstk := 0; + mreg := nreg; + mstk := Zmax sz 0; + retinfo := None |}. + +(** The context used to inline a call to another function. *) + +Definition min_alignment (sz: Z) := + if zle sz 1 then 1 + else if zle sz 2 then 2 + else if zle sz 4 then 4 else 8. + +Definition callcontext (ctx: context) + (dpc dreg nreg: positive) (sz: Z) + (retpc: node) (retreg: reg) := + {| dpc := dpc; + dreg := dreg; + dstk := align (ctx.(dstk) + ctx.(mstk)) (min_alignment sz); + mreg := nreg; + mstk := Zmax sz 0; + retinfo := Some (spc ctx retpc, sreg ctx retreg) |}. + +(** The context used to inline a tail call to another function. *) + +Definition tailcontext (ctx: context) (dpc dreg nreg: positive) (sz: Z) := + {| dpc := dpc; + dreg := dreg; + dstk := align ctx.(dstk) (min_alignment sz); + mreg := nreg; + mstk := Zmax sz 0; + retinfo := ctx.(retinfo) |}. + +(** ** Recursive expansion and copying of a CFG *) + +(** Insert "move" instructions to copy the arguments of an inlined + function into its parameters. *) + +Fixpoint add_moves (srcs dsts: list reg) (succ: node): mon node := + match srcs, dsts with + | s1 :: sl, d1 :: dl => + do n <- add_instr (Iop Omove (s1 :: nil) d1 succ); + add_moves sl dl n + | _, _ => + ret succ + end. + +(** To prevent infinite inlining of a recursive function, when we + inline the body of a function [f], this function is removed from the + environment of inlinable functions and therefore becomes ineligible + for inlining. This decreases the size (number of entries) of the + environment and guarantees termination. Inlining is, therefore, + presented as a well-founded recursion over the size of the environment. *) + +Section EXPAND_CFG. + +Variable fenv: funenv. + +(** The [rec] parameter is the recursor: [rec fenv' P ctx f] copies + the body of function [f], with inline expansion within, as governed + by context [ctx]. It can only be called for function environments + [fenv'] strictly smaller than the current environment [fenv]. *) + +Variable rec: forall fenv', (size_fenv fenv' < size_fenv fenv)%nat -> context -> function -> mon unit. + +(** Given a register-or-symbol [ros], can we inline the corresponding call? *) + +Inductive inline_decision (ros: reg + ident) : Type := + | Cannot_inline + | Can_inline (id: ident) (f: function) (P: ros = inr reg id) (Q: fenv!id = Some f). + +Program Definition can_inline (ros: reg + ident): inline_decision ros := + match ros with + | inl r => Cannot_inline _ + | inr id => match fenv!id with Some f => Can_inline _ id f _ _ | None => Cannot_inline _ end + end. + +(** Inlining of a call to function [f]. An appropriate context is + created, then the CFG of [f] is recursively copied, then moves + are inserted to copy the arguments of the call to the parameters of [f]. *) + +Definition inline_function (ctx: context) (id: ident) (f: function) + (P: PTree.get id fenv = Some f) + (args: list reg) + (retpc: node) (retreg: reg) : mon node := + let npc := max_pc_function f in + let nreg := max_def_function f in + do dpc <- reserve_nodes npc; + do dreg <- reserve_regs nreg; + let ctx' := callcontext ctx dpc dreg nreg f.(fn_stacksize) retpc retreg in + do x <- rec (PTree.remove id fenv) (PTree_Properties.cardinal_remove P) ctx' f; + add_moves (sregs ctx args) (sregs ctx' f.(fn_params)) (spc ctx' f.(fn_entrypoint)). + +(** Inlining of a tail call to function [f]. Similar to [inline_function], + but the new context is different. *) + +Definition inline_tail_function (ctx: context) (id: ident) (f: function) + (P: PTree.get id fenv = Some f) + (args: list reg): mon node := + let npc := max_pc_function f in + let nreg := max_def_function f in + do dpc <- reserve_nodes npc; + do dreg <- reserve_regs nreg; + let ctx' := tailcontext ctx dpc dreg nreg f.(fn_stacksize) in + do x <- rec (PTree.remove id fenv) (PTree_Properties.cardinal_remove P) ctx' f; + add_moves (sregs ctx args) (sregs ctx' f.(fn_params)) (spc ctx' f.(fn_entrypoint)). + +(** The instruction generated for a [Ireturn] instruction found in an + inlined function body. *) + +Definition inline_return (ctx: context) (or: option reg) (retinfo: node * reg) := + match retinfo, or with + | (retpc, retreg), Some r => Iop Omove (sreg ctx r :: nil) retreg retpc + | (retpc, retreg), None => Inop retpc + end. + +(** Expansion and copying of an instruction. For most instructions, + its registers and successor PC are shifted as per the context [ctx], + then the instruction is inserted in the final CFG at its final position + [spc ctx pc]. + + [Icall] instructions are either replaced by a "goto" to the expansion + of the called function, or shifted as described above. + + [Itailcall] instructions are similar, with one additional case. If + the [Itailcall] occurs in the body of an inlined function, and + cannot be inlined itself, it must be turned into an [Icall] + instruction that branches to the return point of the inlined + function. + + Finally, [Ireturn] instructions within an inlined function are + turned into a "move" or "goto" that stores the result, if any, + into the destination register, then branches back to the successor + of the inlined call. *) + +Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit := + match i with + | Inop s => + set_instr (spc ctx pc) (Inop (spc ctx s)) + | Iop op args res s => + set_instr (spc ctx pc) + (Iop (sop ctx op) (sregs ctx args) (sreg ctx res) (spc ctx s)) + | Iload chunk addr args dst s => + set_instr (spc ctx pc) + (Iload chunk (saddr ctx addr) (sregs ctx args) (sreg ctx dst) (spc ctx s)) + | Istore chunk addr args src s => + set_instr (spc ctx pc) + (Istore chunk (saddr ctx addr) (sregs ctx args) (sreg ctx src) (spc ctx s)) + | Icall sg ros args res s => + match can_inline ros with + | Cannot_inline => + set_instr (spc ctx pc) + (Icall sg (sros ctx ros) (sregs ctx args) (sreg ctx res) (spc ctx s)) + | Can_inline id f P Q => + do n <- inline_function ctx id f Q args s res; + set_instr (spc ctx pc) (Inop n) + end + | Itailcall sg ros args => + match can_inline ros with + | Cannot_inline => + match ctx.(retinfo) with + | None => + set_instr (spc ctx pc) + (Itailcall sg (sros ctx ros) (sregs ctx args)) + | Some(rpc, rreg) => + set_instr (spc ctx pc) + (Icall sg (sros ctx ros) (sregs ctx args) rreg rpc) + end + | Can_inline id f P Q => + do n <- inline_tail_function ctx id f Q args; + set_instr (spc ctx pc) (Inop n) + end + | Ibuiltin ef args res s => + set_instr (spc ctx pc) + (Ibuiltin ef (sregs ctx args) (sreg ctx res) (spc ctx s)) + | Icond cond args s1 s2 => + set_instr (spc ctx pc) + (Icond cond (sregs ctx args) (spc ctx s1) (spc ctx s2)) + | Ijumptable r tbl => + set_instr (spc ctx pc) + (Ijumptable (sreg ctx r) (List.map (spc ctx) tbl)) + | Ireturn or => + match ctx.(retinfo) with + | None => + set_instr (spc ctx pc) (Ireturn (option_map (sreg ctx) or)) + | Some rinfo => + set_instr (spc ctx pc) (inline_return ctx or rinfo) + end + end. + +(** The expansion of a function [f] iteratively expands all its + instructions, after recording how much stack it needs. *) + +Definition expand_cfg_rec (ctx: context) (f: function): mon unit := + do x <- request_stack (ctx.(dstk) + ctx.(mstk)); + mlist_iter2 (expand_instr ctx) (PTree.elements f.(fn_code)). + +End EXPAND_CFG. + +(** Here we "tie the knot" of the recursion, taking the fixpoint + of [expand_cfg_rec]. *) + +Definition expand_cfg := Fixm size_fenv expand_cfg_rec. + +(** Start of the recursion: copy and inline function [f] in the + initial context. *) + +Definition expand_function (fenv: funenv) (f: function): mon context := + let npc := max_pc_function f in + let nreg := max_def_function f in + do dpc <- reserve_nodes npc; + do dreg <- reserve_regs nreg; + let ctx := initcontext dpc dreg nreg f.(fn_stacksize) in + do x <- expand_cfg fenv ctx f; + ret ctx. + +(** ** Inlining in functions and whole programs. *) + +Local Open Scope string_scope. + +(** Inlining can increase the size of the function's stack block. We must + make sure that the new size does not exceed [Int.max_unsigned], otherwise + address computations within the stack would overflow and produce incorrect + results. *) + +Definition transf_function (fenv: funenv) (f: function) : Errors.res function := + let '(R ctx s _) := expand_function fenv f initstate in + if zle s.(st_stksize) Int.max_unsigned then + OK (mkfunction f.(fn_sig) + (sregs ctx f.(fn_params)) + s.(st_stksize) + s.(st_code) + (spc ctx f.(fn_entrypoint))) + else + Error(msg "Inlining: stack too big"). + +Definition transf_fundef (fenv: funenv) (fd: fundef) : Errors.res fundef := + AST.transf_partial_fundef (transf_function fenv) fd. + +Definition transf_program (p: program): Errors.res program := + let fenv := funenv_program p in + AST.transform_partial_program (transf_fundef fenv) p. + -- cgit