aboutsummaryrefslogtreecommitdiffstats
path: root/backend/CSE.v
diff options
context:
space:
mode:
authorxleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2006-02-09 14:55:48 +0000
committerxleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2006-02-09 14:55:48 +0000
commit2ae43be7b9d4118335c9d2cef6e098f9b9f807fe (patch)
treebbb5e49ccbf7e3614966571acc317f8d318fecad /backend/CSE.v
downloadcompcert-kvx-2ae43be7b9d4118335c9d2cef6e098f9b9f807fe.tar.gz
compcert-kvx-2ae43be7b9d4118335c9d2cef6e098f9b9f807fe.zip
Initial import of compcert
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'backend/CSE.v')
-rw-r--r--backend/CSE.v420
1 files changed, 420 insertions, 0 deletions
diff --git a/backend/CSE.v b/backend/CSE.v
new file mode 100644
index 00000000..243f6dd7
--- /dev/null
+++ b/backend/CSE.v
@@ -0,0 +1,420 @@
+(** Common subexpression elimination over RTL. This optimization
+ proceeds by value numbering over extended basic blocks. *)
+
+Require Import Coqlib.
+Require Import Maps.
+Require Import AST.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Mem.
+Require Import Globalenvs.
+Require Import Op.
+Require Import Registers.
+Require Import RTL.
+Require Import Kildall.
+
+(** * Value numbering *)
+
+(** The idea behind value numbering algorithms is to associate
+ abstract identifiers (``value numbers'') to the contents of registers
+ at various program points, and record equations between these
+ identifiers. For instance, consider the instruction
+ [r1 = add(r2, r3)] and assume that [r2] and [r3] are mapped
+ to abstract identifiers [x] and [y] respectively at the program
+ point just before this instruction. At the program point just after,
+ we can add the equation [z = add(x, y)] and associate [r1] with [z],
+ where [z] is a fresh abstract identifier. However, if we already
+ knew an equation [u = add(x, y)], we can preferably add no equation
+ and just associate [r1] with [u]. If there exists a register [r4]
+ mapped with [u] at this point, we can then replace the instruction
+ [r1 = add(r2, r3)] by a move instruction [r1 = r4], therefore eliminating
+ a common subexpression and reusing the result of an earlier addition.
+
+ Abstract identifiers / value numbers are represented by positive integers.
+ Equations are of the form [valnum = rhs], where the right-hand sides
+ [rhs] are either arithmetic operations or memory loads. *)
+
+Definition valnum := positive.
+
+Inductive rhs : Set :=
+ | Op: operation -> list valnum -> rhs
+ | Load: memory_chunk -> addressing -> list valnum -> rhs.
+
+Definition eq_valnum: forall (x y: valnum), {x=y}+{x<>y} := peq.
+
+Definition eq_list_valnum (x y: list valnum) : {x=y}+{x<>y}.
+Proof.
+ induction x; intros; case y; intros.
+ left; auto.
+ right; congruence.
+ right; congruence.
+ case (eq_valnum a v); intros.
+ case (IHx l); intros.
+ left; congruence.
+ right; congruence.
+ right; congruence.
+Qed.
+
+Definition eq_rhs (x y: rhs) : {x=y}+{x<>y}.
+Proof.
+ generalize Int.eq_dec; intro.
+ generalize Float.eq_dec; intro.
+ assert (forall (x y: ident), {x=y}+{x<>y}). exact peq.
+ assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality.
+ assert (forall (x y: condition), {x=y}+{x<>y}). decide equality.
+ assert (forall (x y: operation), {x=y}+{x<>y}). decide equality.
+ assert (forall (x y: memory_chunk), {x=y}+{x<>y}). decide equality.
+ assert (forall (x y: addressing), {x=y}+{x<>y}). decide equality.
+ generalize eq_valnum; intro.
+ generalize eq_list_valnum; intro.
+ decide equality.
+Qed.
+
+(** A value numbering is a collection of equations between value numbers
+ plus a partial map from registers to value numbers. Additionally,
+ we maintain the next unused value number, so as to easily generate
+ fresh value numbers. *)
+
+Record numbering : Set := mknumbering {
+ num_next: valnum;
+ num_eqs: list (valnum * rhs);
+ num_reg: PTree.t valnum
+}.
+
+Definition empty_numbering :=
+ mknumbering 1%positive nil (PTree.empty valnum).
+
+(** [valnum_reg n r] returns the value number for the contents of
+ register [r]. If none exists, a fresh value number is returned
+ and associated with register [r]. The possibly updated numbering
+ is also returned. [valnum_regs] is similar, but for a list of
+ registers. *)
+
+Definition valnum_reg (n: numbering) (r: reg) : numbering * valnum :=
+ match PTree.get r n.(num_reg) with
+ | Some v => (n, v)
+ | None => (mknumbering (Psucc n.(num_next))
+ n.(num_eqs)
+ (PTree.set r n.(num_next) n.(num_reg)),
+ n.(num_next))
+ end.
+
+Fixpoint valnum_regs (n: numbering) (rl: list reg)
+ {struct rl} : numbering * list valnum :=
+ match rl with
+ | nil =>
+ (n, nil)
+ | r1 :: rs =>
+ let (n1, v1) := valnum_reg n r1 in
+ let (ns, vs) := valnum_regs n1 rs in
+ (ns, v1 :: vs)
+ end.
+
+(** [find_valnum_rhs rhs eqs] searches the list of equations [eqs]
+ for an equation of the form [vn = rhs] for some value number [vn].
+ If found, [Some vn] is returned, otherwise [None] is returned. *)
+
+Fixpoint find_valnum_rhs (r: rhs) (eqs: list (valnum * rhs))
+ {struct eqs} : option valnum :=
+ match eqs with
+ | nil => None
+ | (v, r') :: eqs1 =>
+ if eq_rhs r r' then Some v else find_valnum_rhs r eqs1
+ end.
+
+(** [add_rhs n rd rhs] updates the value numbering [n] to reflect
+ the computation of the operation or load represented by [rhs]
+ and the storing of the result in register [rd]. If an equation
+ [vn = rhs] is known, register [rd] is set to [vn]. Otherwise,
+ a fresh value number [vn] is generated and associated with [rd],
+ and the equation [vn = rhs] is added. *)
+
+Definition add_rhs (n: numbering) (rd: reg) (rh: rhs) : numbering :=
+ match find_valnum_rhs rh n.(num_eqs) with
+ | Some vres =>
+ mknumbering n.(num_next) n.(num_eqs)
+ (PTree.set rd vres n.(num_reg))
+ | None =>
+ mknumbering (Psucc n.(num_next))
+ ((n.(num_next), rh) :: n.(num_eqs))
+ (PTree.set rd n.(num_next) n.(num_reg))
+ end.
+
+(** [add_op n rd op rs] specializes [add_rhs] for the case of an
+ arithmetic operation. The right-hand side corresponding to [op]
+ and the value numbers for the argument registers [rs] is built
+ and added to [n] as described in [add_rhs].
+
+ If [op] is a move instruction, we simply assign the value number of
+ the source register to the destination register, since we know that
+ the source and destination registers have exactly the same value.
+ This enables more common subexpressions to be recognized. For instance:
+<<
+ z = add(x, y); u = x; v = add(u, y);
+>>
+ Since [u] and [x] have the same value number, the second [add]
+ is recognized as computing the same result as the first [add],
+ and therefore [u] and [z] have the same value number. *)
+
+Definition add_op (n: numbering) (rd: reg) (op: operation) (rs: list reg) :=
+ match is_move_operation op rs with
+ | Some r =>
+ let (n1, v) := valnum_reg n r in
+ mknumbering n1.(num_next) n1.(num_eqs) (PTree.set rd v n1.(num_reg))
+ | None =>
+ let (n1, vs) := valnum_regs n rs in
+ add_rhs n1 rd (Op op vs)
+ end.
+
+(** [add_load n rd chunk addr rs] specializes [add_rhs] for the case of a
+ memory load. The right-hand side corresponding to [chunk], [addr]
+ and the value numbers for the argument registers [rs] is built
+ and added to [n] as described in [add_rhs]. *)
+
+Definition add_load (n: numbering) (rd: reg)
+ (chunk: memory_chunk) (addr: addressing)
+ (rs: list reg) :=
+ let (n1, vs) := valnum_regs n rs in
+ add_rhs n1 rd (Load chunk addr vs).
+
+(** [kill_load n] removes all equations involving memory loads.
+ It is used to reflect the effect of a memory store, which can
+ potentially invalidate all such equations. *)
+
+Fixpoint kill_load_eqs (eqs: list (valnum * rhs)) : list (valnum * rhs) :=
+ match eqs with
+ | nil => nil
+ | (_, Load _ _ _) :: rem => kill_load_eqs rem
+ | v_rh :: rem => v_rh :: kill_load_eqs rem
+ end.
+
+Definition kill_loads (n: numbering) : numbering :=
+ mknumbering n.(num_next)
+ (kill_load_eqs n.(num_eqs))
+ n.(num_reg).
+
+(* [reg_valnum n vn] returns a register that is mapped to value number
+ [vn], or [None] if no such register exists. *)
+
+Definition reg_valnum (n: numbering) (vn: valnum) : option reg :=
+ PTree.fold
+ (fun (res: option reg) (r: reg) (v: valnum) =>
+ if peq v vn then Some r else res)
+ n.(num_reg) None.
+
+(* [find_rhs] and its specializations [find_op] and [find_load]
+ return a register that already holds the result of the given arithmetic
+ operation or memory load, according to the given numbering.
+ [None] is returned if no such register exists. *)
+
+Definition find_rhs (n: numbering) (rh: rhs) : option reg :=
+ match find_valnum_rhs rh n.(num_eqs) with
+ | None => None
+ | Some vres => reg_valnum n vres
+ end.
+
+Definition find_op
+ (n: numbering) (op: operation) (rs: list reg) : option reg :=
+ let (n1, vl) := valnum_regs n rs in
+ find_rhs n1 (Op op vl).
+
+Definition find_load
+ (n: numbering) (chunk: memory_chunk) (addr: addressing) (rs: list reg) : option reg :=
+ let (n1, vl) := valnum_regs n rs in
+ find_rhs n1 (Load chunk addr vl).
+
+(** * The static analysis *)
+
+(** We now define a notion of satisfiability of a numbering. This semantic
+ notion plays a central role in the correctness proof (see [CSEproof]),
+ but is defined here because we need it to define the ordering over
+ numberings used in the static analysis.
+
+ A numbering is satisfiable in a given register environment and memory
+ state if there exists a valuation, mapping value numbers to actual values,
+ that validates both the equations and the association of registers
+ to value numbers. *)
+
+Definition equation_holds
+ (valuation: valnum -> val)
+ (ge: genv) (sp: val) (m: mem)
+ (vres: valnum) (rh: rhs) : Prop :=
+ match rh with
+ | Op op vl =>
+ eval_operation ge sp op (List.map valuation vl) =
+ Some (valuation vres)
+ | Load chunk addr vl =>
+ exists a,
+ eval_addressing ge sp addr (List.map valuation vl) = Some a /\
+ loadv chunk m a = Some (valuation vres)
+ end.
+
+Definition numbering_holds
+ (valuation: valnum -> val)
+ (ge: genv) (sp: val) (rs: regset) (m: mem) (n: numbering) : Prop :=
+ (forall vn rh,
+ In (vn, rh) n.(num_eqs) ->
+ equation_holds valuation ge sp m vn rh)
+ /\ (forall r vn,
+ PTree.get r n.(num_reg) = Some vn -> rs#r = valuation vn).
+
+Definition numbering_satisfiable
+ (ge: genv) (sp: val) (rs: regset) (m: mem) (n: numbering) : Prop :=
+ exists valuation, numbering_holds valuation ge sp rs m n.
+
+Lemma empty_numbering_satisfiable:
+ forall ge sp rs m, numbering_satisfiable ge sp rs m empty_numbering.
+Proof.
+ intros; red.
+ exists (fun (vn: valnum) => Vundef). split; simpl; intros.
+ elim H.
+ rewrite PTree.gempty in H. discriminate.
+Qed.
+
+(** We now equip the type [numbering] with a partial order and a greatest
+ element. The partial order is based on entailment: [n1] is greater
+ than [n2] if [n1] is satisfiable whenever [n2] is. The greatest element
+ is, of course, the empty numbering (no equations). *)
+
+Module Numbering.
+ Definition t := numbering.
+ Definition ge (n1 n2: numbering) : Prop :=
+ forall ge sp rs m,
+ numbering_satisfiable ge sp rs m n2 ->
+ numbering_satisfiable ge sp rs m n1.
+ Definition top := empty_numbering.
+ Lemma top_ge: forall x, ge top x.
+ Proof.
+ intros; red; intros. unfold top. apply empty_numbering_satisfiable.
+ Qed.
+ Lemma refl_ge: forall x, ge x x.
+ Proof.
+ intros; red; auto.
+ Qed.
+End Numbering.
+
+(** We reuse the solver for forward dataflow inequations based on
+ propagation over extended basic blocks defined in library [Kildall]. *)
+
+Module Solver := BBlock_solver(Numbering).
+
+(** The transfer function for the dataflow analysis returns the numbering
+ ``after'' execution of the instruction at [pc], as a function of the
+ numbering ``before''. For [Iop] and [Iload] instructions, we add
+ equations or reuse existing value numbers as described for
+ [add_op] and [add_load]. For [Istore] instructions, we forget
+ all equations involving memory loads. For [Icall] instructions,
+ we could simply associate a fresh, unconstrained by equations value number
+ to the result register. However, it is often undesirable to eliminate
+ common subexpressions across a function call (there is a risk of
+ increasing too much the register pressure across the call), so we
+ just forget all equations and start afresh with an empty numbering.
+ Finally, the remaining instructions modify neither registers nor
+ the memory, so we keep the numbering unchanged. *)
+
+Definition transfer (f: function) (pc: node) (before: numbering) :=
+ match f.(fn_code)!pc with
+ | None => before
+ | Some i =>
+ match i with
+ | Inop s =>
+ before
+ | Iop op args res s =>
+ add_op before res op args
+ | Iload chunk addr args dst s =>
+ add_load before dst chunk addr args
+ | Istore chunk addr args src s =>
+ kill_loads before
+ | Icall sig ros args res s =>
+ empty_numbering
+ | Icond cond args ifso ifnot =>
+ before
+ | Ireturn optarg =>
+ before
+ end
+ end.
+
+(** The static analysis solves the dataflow inequations implied
+ by the [transfer] function using the ``extended basic block'' solver,
+ which produces sub-optimal solutions quickly. The result is
+ a mapping from program points to numberings. In the unlikely
+ case where the solver fails to find a solution, we simply associate
+ empty numberings to all program points, which is semantically correct
+ and effectively deactivates the CSE optimization. *)
+
+Definition analyze (f: RTL.function): PMap.t numbering :=
+ match Solver.fixpoint (successors f) f.(fn_nextpc)
+ (transfer f) f.(fn_entrypoint) with
+ | None => PMap.init empty_numbering
+ | Some res => res
+ end.
+
+(** * Code transformation *)
+
+(** Some operations are so cheap to compute that it is generally not
+ worth reusing their results. These operations are detected by the
+ function below. *)
+
+Definition is_trivial_op (op: operation) : bool :=
+ match op with
+ | Omove => true
+ | Ointconst _ => true
+ | Oaddrsymbol _ _ => true
+ | Oaddrstack _ => true
+ | Oundef => true
+ | _ => false
+ end.
+
+(** The code transformation is performed instruction by instruction.
+ [Iload] instructions and non-trivial [Iop] instructions are turned
+ into move instructions if their result is already available in a
+ register, as indicated by the numbering inferred at that program point. *)
+
+Definition transf_instr (n: numbering) (instr: instruction) :=
+ match instr with
+ | Iop op args res s =>
+ if is_trivial_op op then instr else
+ match find_op n op args with
+ | None => instr
+ | Some r => Iop Omove (r :: nil) res s
+ end
+ | Iload chunk addr args dst s =>
+ match find_load n chunk addr args with
+ | None => instr
+ | Some r => Iop Omove (r :: nil) dst s
+ end
+ | _ =>
+ instr
+ end.
+
+Definition transf_code (approxs: PMap.t numbering) (instrs: code) : code :=
+ PTree.map (fun pc instr => transf_instr approxs!!pc instr) instrs.
+
+Lemma transf_code_wf:
+ forall f approxs,
+ (forall pc, Plt pc f.(fn_nextpc) \/ f.(fn_code)!pc = None) ->
+ (forall pc, Plt pc f.(fn_nextpc)
+ \/ (transf_code approxs f.(fn_code))!pc = None).
+Proof.
+ intros.
+ elim (H pc); intro.
+ left; auto.
+ right. unfold transf_code. rewrite PTree.gmap.
+ unfold option_map; rewrite H0. reflexivity.
+Qed.
+
+Definition transf_function (f: function) : function :=
+ let approxs := analyze f in
+ mkfunction
+ f.(fn_sig)
+ f.(fn_params)
+ f.(fn_stacksize)
+ (transf_code approxs f.(fn_code))
+ f.(fn_entrypoint)
+ f.(fn_nextpc)
+ (transf_code_wf f approxs f.(fn_code_wf)).
+
+Definition transf_program (p: program) : program :=
+ transform_program transf_function p.
+