From 2ae43be7b9d4118335c9d2cef6e098f9b9f807fe Mon Sep 17 00:00:00 2001 From: xleroy Date: Thu, 9 Feb 2006 14:55:48 +0000 Subject: Initial import of compcert git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- backend/AST.v | 216 ++++ backend/Allocation.v | 418 ++++++++ backend/Allocproof.v | 1827 ++++++++++++++++++++++++++++++++ backend/Allocproof_aux.v | 850 +++++++++++++++ backend/Alloctyping.v | 509 +++++++++ backend/Alloctyping_aux.v | 895 ++++++++++++++++ backend/CSE.v | 420 ++++++++ backend/CSEproof.v | 845 +++++++++++++++ backend/Cmconstr.v | 911 ++++++++++++++++ backend/Cmconstrproof.v | 1154 +++++++++++++++++++++ backend/Cminor.v | 348 +++++++ backend/Cminorgen.v | 398 +++++++ backend/Cminorgenproof.v | 2409 ++++++++++++++++++++++++++++++++++++++++++ backend/Coloring.v | 267 +++++ backend/Coloringproof.v | 845 +++++++++++++++ backend/Constprop.v | 1032 ++++++++++++++++++ backend/Constpropproof.v | 883 ++++++++++++++++ backend/Conventions.v | 690 +++++++++++++ backend/Csharpminor.v | 511 +++++++++ backend/Globalenvs.v | 587 +++++++++++ backend/InterfGraph.v | 310 ++++++ backend/Kildall.v | 1231 ++++++++++++++++++++++ backend/LTL.v | 357 +++++++ backend/LTLtyping.v | 93 ++ backend/Linear.v | 203 ++++ backend/Linearize.v | 212 ++++ backend/Linearizeproof.v | 711 +++++++++++++ backend/Linearizetyping.v | 340 ++++++ backend/Lineartyping.v | 254 +++++ backend/Locations.v | 476 +++++++++ backend/Mach.v | 295 ++++++ backend/Machabstr.v | 371 +++++++ backend/Machabstr2mach.v | 1120 ++++++++++++++++++++ backend/Machtyping.v | 367 +++++++ backend/Main.v | 307 ++++++ backend/Mem.v | 2253 ++++++++++++++++++++++++++++++++++++++++ backend/Op.v | 691 +++++++++++++ backend/PPC.v | 775 ++++++++++++++ backend/PPCgen.v | 514 +++++++++ backend/PPCgenproof.v | 1242 ++++++++++++++++++++++ backend/PPCgenproof1.v | 1566 ++++++++++++++++++++++++++++ backend/Parallelmove.v | 2529 +++++++++++++++++++++++++++++++++++++++++++++ backend/RTL.v | 349 +++++++ backend/RTLgen.v | 473 +++++++++ backend/RTLgenproof.v | 1302 +++++++++++++++++++++++ backend/RTLgenproof1.v | 1463 ++++++++++++++++++++++++++ backend/RTLtyping.v | 1277 +++++++++++++++++++++++ backend/Registers.v | 49 + backend/Stacking.v | 226 ++++ backend/Stackingproof.v | 1610 +++++++++++++++++++++++++++++ backend/Stackingtyping.v | 222 ++++ backend/Tunneling.v | 131 +++ backend/Tunnelingproof.v | 311 ++++++ backend/Tunnelingtyping.v | 44 + backend/Values.v | 888 ++++++++++++++++ 55 files changed, 40577 insertions(+) create mode 100644 backend/AST.v create mode 100644 backend/Allocation.v create mode 100644 backend/Allocproof.v create mode 100644 backend/Allocproof_aux.v create mode 100644 backend/Alloctyping.v create mode 100644 backend/Alloctyping_aux.v create mode 100644 backend/CSE.v create mode 100644 backend/CSEproof.v create mode 100644 backend/Cmconstr.v create mode 100644 backend/Cmconstrproof.v create mode 100644 backend/Cminor.v create mode 100644 backend/Cminorgen.v create mode 100644 backend/Cminorgenproof.v create mode 100644 backend/Coloring.v create mode 100644 backend/Coloringproof.v create mode 100644 backend/Constprop.v create mode 100644 backend/Constpropproof.v create mode 100644 backend/Conventions.v create mode 100644 backend/Csharpminor.v create mode 100644 backend/Globalenvs.v create mode 100644 backend/InterfGraph.v create mode 100644 backend/Kildall.v create mode 100644 backend/LTL.v create mode 100644 backend/LTLtyping.v create mode 100644 backend/Linear.v create mode 100644 backend/Linearize.v create mode 100644 backend/Linearizeproof.v create mode 100644 backend/Linearizetyping.v create mode 100644 backend/Lineartyping.v create mode 100644 backend/Locations.v create mode 100644 backend/Mach.v create mode 100644 backend/Machabstr.v create mode 100644 backend/Machabstr2mach.v create mode 100644 backend/Machtyping.v create mode 100644 backend/Main.v create mode 100644 backend/Mem.v create mode 100644 backend/Op.v create mode 100644 backend/PPC.v create mode 100644 backend/PPCgen.v create mode 100644 backend/PPCgenproof.v create mode 100644 backend/PPCgenproof1.v create mode 100644 backend/Parallelmove.v create mode 100644 backend/RTL.v create mode 100644 backend/RTLgen.v create mode 100644 backend/RTLgenproof.v create mode 100644 backend/RTLgenproof1.v create mode 100644 backend/RTLtyping.v create mode 100644 backend/Registers.v create mode 100644 backend/Stacking.v create mode 100644 backend/Stackingproof.v create mode 100644 backend/Stackingtyping.v create mode 100644 backend/Tunneling.v create mode 100644 backend/Tunnelingproof.v create mode 100644 backend/Tunnelingtyping.v create mode 100644 backend/Values.v (limited to 'backend') diff --git a/backend/AST.v b/backend/AST.v new file mode 100644 index 00000000..aae9e860 --- /dev/null +++ b/backend/AST.v @@ -0,0 +1,216 @@ +(** This file defines a number of data types and operations used in + the abstract syntax trees of many of the intermediate languages. *) + +Require Import Coqlib. + +Set Implicit Arguments. + +(** Identifiers (names of local variables, of global symbols and functions, + etc) are represented by the type [positive] of positive integers. *) + +Definition ident := positive. + +Definition ident_eq := peq. + +(** The languages are weakly typed, using only two types: [Tint] for + integers and pointers, and [Tfloat] for floating-point numbers. *) + +Inductive typ : Set := + | Tint : typ + | Tfloat : typ. + +Definition typesize (ty: typ) : Z := + match ty with Tint => 4 | Tfloat => 8 end. + +(** Additionally, function definitions and function calls are annotated + by function signatures indicating the number and types of arguments, + as well as the type of the returned value if any. These signatures + are used in particular to determine appropriate calling conventions + for the function. *) + +Record signature : Set := mksignature { + sig_args: list typ; + sig_res: option typ +}. + +(** Memory accesses (load and store instructions) are annotated by + a ``memory chunk'' indicating the type, size and signedness of the + chunk of memory being accessed. *) + +Inductive memory_chunk : Set := + | Mint8signed : memory_chunk (**r 8-bit signed integer *) + | Mint8unsigned : memory_chunk (**r 8-bit unsigned integer *) + | Mint16signed : memory_chunk (**r 16-bit signed integer *) + | Mint16unsigned : memory_chunk (**r 16-bit unsigned integer *) + | Mint32 : memory_chunk (**r 32-bit integer, or pointer *) + | Mfloat32 : memory_chunk (**r 32-bit single-precision float *) + | Mfloat64 : memory_chunk. (**r 64-bit double-precision float *) + +(** Comparison instructions can perform one of the six following comparisons + between their two operands. *) + +Inductive comparison : Set := + | Ceq : comparison (**r same *) + | Cne : comparison (**r different *) + | Clt : comparison (**r less than *) + | Cle : comparison (**r less than or equal *) + | Cgt : comparison (**r greater than *) + | Cge : comparison. (**r greater than or equal *) + +Definition negate_comparison (c: comparison): comparison := + match c with + | Ceq => Cne + | Cne => Ceq + | Clt => Cge + | Cle => Cgt + | Cgt => Cle + | Cge => Clt + end. + +Definition swap_comparison (c: comparison): comparison := + match c with + | Ceq => Ceq + | Cne => Cne + | Clt => Cgt + | Cle => Cge + | Cgt => Clt + | Cge => Cle + end. + +(** Whole programs consist of: +- a collection of function definitions (name and description); +- the name of the ``main'' function that serves as entry point in the program; +- a collection of global variable declarations (name and size in bytes). + +The type of function descriptions varies among the various intermediate +languages and is taken as parameter to the [program] type. The other parts +of whole programs are common to all languages. *) + +Record program (funct: Set) : Set := mkprogram { + prog_funct: list (ident * funct); + prog_main: ident; + prog_vars: list (ident * Z) +}. + +(** We now define a general iterator over programs that applies a given + code transformation function to all function descriptions and leaves + the other parts of the program unchanged. *) + +Section TRANSF_PROGRAM. + +Variable A B: Set. +Variable transf: A -> B. + +Fixpoint transf_program (l: list (ident * A)) : list (ident * B) := + match l with + | nil => nil + | (id, fn) :: rem => (id, transf fn) :: transf_program rem + end. + +Definition transform_program (p: program A) : program B := + mkprogram + (transf_program p.(prog_funct)) + p.(prog_main) + p.(prog_vars). + +Remark transf_program_functions: + forall fl i tf, + In (i, tf) (transf_program fl) -> + exists f, In (i, f) fl /\ transf f = tf. +Proof. + induction fl; simpl. + tauto. + destruct a. simpl. intros. + elim H; intro. exists a. split. left. congruence. congruence. + generalize (IHfl _ _ H0). intros [f [IN TR]]. + exists f. split. right. auto. auto. +Qed. + +Lemma transform_program_function: + forall p i tf, + In (i, tf) (transform_program p).(prog_funct) -> + exists f, In (i, f) p.(prog_funct) /\ transf f = tf. +Proof. + simpl. intros. eapply transf_program_functions; eauto. +Qed. + +End TRANSF_PROGRAM. + +(** The following is a variant of [transform_program] where the + code transformation function can fail and therefore returns an + option type. *) + +Section TRANSF_PARTIAL_PROGRAM. + +Variable A B: Set. +Variable transf_partial: A -> option B. + +Fixpoint transf_partial_program + (l: list (ident * A)) : option (list (ident * B)) := + match l with + | nil => Some nil + | (id, fn) :: rem => + match transf_partial fn with + | None => None + | Some fn' => + match transf_partial_program rem with + | None => None + | Some res => Some ((id, fn') :: res) + end + end + end. + +Definition transform_partial_program (p: program A) : option (program B) := + match transf_partial_program p.(prog_funct) with + | None => None + | Some fl => Some (mkprogram fl p.(prog_main) p.(prog_vars)) + end. + +Remark transf_partial_program_functions: + forall fl tfl i tf, + transf_partial_program fl = Some tfl -> + In (i, tf) tfl -> + exists f, In (i, f) fl /\ transf_partial f = Some tf. +Proof. + induction fl; simpl. + intros; injection H; intro; subst tfl; contradiction. + case a; intros id fn. intros until tf. + caseEq (transf_partial fn). + intros tfn TFN. + caseEq (transf_partial_program fl). + intros tfl1 TFL1 EQ. injection EQ; intro; clear EQ; subst tfl. + simpl. intros [EQ1|IN1]. + exists fn. intuition congruence. + generalize (IHfl _ _ _ TFL1 IN1). + intros [f [X Y]]. + exists f. intuition congruence. + intros; discriminate. + intros; discriminate. +Qed. + +Lemma transform_partial_program_function: + forall p tp i tf, + transform_partial_program p = Some tp -> + In (i, tf) tp.(prog_funct) -> + exists f, In (i, f) p.(prog_funct) /\ transf_partial f = Some tf. +Proof. + intros until tf. + unfold transform_partial_program. + caseEq (transf_partial_program (prog_funct p)). + intros. apply transf_partial_program_functions with l; auto. + injection H0; intros; subst tp. exact H1. + intros; discriminate. +Qed. + +Lemma transform_partial_program_main: + forall p tp, + transform_partial_program p = Some tp -> + tp.(prog_main) = p.(prog_main). +Proof. + intros p tp. unfold transform_partial_program. + destruct (transf_partial_program (prog_funct p)). + intro EQ; injection EQ; intro EQ1; rewrite <- EQ1; reflexivity. + intro; discriminate. +Qed. + +End TRANSF_PARTIAL_PROGRAM. diff --git a/backend/Allocation.v b/backend/Allocation.v new file mode 100644 index 00000000..30f9dcc6 --- /dev/null +++ b/backend/Allocation.v @@ -0,0 +1,418 @@ +(** Register allocation, spilling, reloading and explicitation of + calling conventions. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Globalenvs. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import RTLtyping. +Require Import Kildall. +Require Import Locations. +Require Import Conventions. +Require Import Coloring. +Require Import Parallelmove. + +(** * Liveness analysis over RTL *) + +(** A register [r] is live at a point [p] if there exists a path + from [p] to some instruction that uses [r] as argument, + and [r] is not redefined along this path. + Liveness can be computed by a backward dataflow analysis. + The analysis operates over sets of (live) pseudo-registers. *) + +Notation reg_live := Regset.add. +Notation reg_dead := Regset.remove. + +Definition reg_option_live (or: option reg) (lv: Regset.t) := + match or with None => lv | Some r => reg_live r lv end. + +Definition reg_sum_live (ros: reg + ident) (lv: Regset.t) := + match ros with inl r => reg_live r lv | inr s => lv end. + +Fixpoint reg_list_live + (rl: list reg) (lv: Regset.t) {struct rl} : Regset.t := + match rl with + | nil => lv + | r1 :: rs => reg_list_live rs (reg_live r1 lv) + end. + +Fixpoint reg_list_dead + (rl: list reg) (lv: Regset.t) {struct rl} : Regset.t := + match rl with + | nil => lv + | r1 :: rs => reg_list_dead rs (reg_dead r1 lv) + end. + +(** Here is the transfer function for the dataflow analysis. + Since this is a backward dataflow analysis, it takes as argument + the abstract register set ``after'' the given instruction, + i.e. the registers that are live after; and it returns as result + the abstract register set ``before'' the given instruction, + i.e. the registers that must be live before. + The general relation between ``live before'' and ``live after'' + an instruction is that a register is live before if either + it is one of the arguments of the instruction, or it is not the result + of the instruction and it is live after. + However, if the result of a side-effect-free instruction is not + live ``after'', the whole instruction will be removed later + (since it computes a useless result), thus its arguments need not + be live ``before''. *) + +Definition transfer + (f: RTL.function) (pc: node) (after: Regset.t) : Regset.t := + match f.(fn_code)!pc with + | None => + after + | Some i => + match i with + | Inop s => + after + | Iop op args res s => + if Regset.mem res after then + reg_list_live args (reg_dead res after) + else + after + | Iload chunk addr args dst s => + if Regset.mem dst after then + reg_list_live args (reg_dead dst after) + else + after + | Istore chunk addr args src s => + reg_list_live args (reg_live src after) + | Icall sig ros args res s => + reg_list_live args + (reg_sum_live ros (reg_dead res after)) + | Icond cond args ifso ifnot => + reg_list_live args after + | Ireturn optarg => + reg_option_live optarg after + end + end. + +(** The liveness analysis is then obtained by instantiating the + general framework for backward dataflow analysis provided by + module [Kildall]. *) + +Module DS := Backward_Dataflow_Solver(Regset). + +Definition analyze (f: RTL.function): option (PMap.t Regset.t) := + DS.fixpoint (successors f) f.(fn_nextpc) (transfer f) nil. + +(** * Spilling and reloading *) + +(** LTL operations, like those of the target processor, operate only + over machine registers, but not over stack slots. Consider + the RTL instruction +<< + r1 <- Iop(Oadd, r1 :: r2 :: nil) +>> + and assume that [r1] and [r2] are assigned to stack locations [S s1] + and [S s2], respectively. The translated LTL code must load these + stack locations into temporary integer registers (this is called + ``reloading''), perform the [Oadd] operation over these temporaries, + leave the result in a temporary, then store the temporary back to + stack location [S s1] (this is called ``spilling''). In other term, + the generated LTL code has the following shape: +<< + IT1 <- Bgetstack s1; + IT2 <- Bgetstack s2; + IT1 <- Bop(Oadd, IT1 :: IT2 :: nil); + Bsetstack s1 IT1; +>> + This section provides functions that assist in choosing appropriate + temporaries and inserting the required spilling and reloading + operations. *) + +(** ** Allocation of temporary registers for reloading and spilling. *) + +(** [reg_for l] returns a machine register appropriate for working + over the location [l]: either the machine register [m] if [l = R m], + or a temporary register of [l]'s type if [l] is a stack slot. *) + +Definition reg_for (l: loc) : mreg := + match l with + | R r => r + | S s => match slot_type s with Tint => IT1 | Tfloat => FT1 end + end. + +(** [regs_for ll] is similar, for a list of locations [ll] of length + at most 3. We ensure that distinct temporaries are used for + different elements of [ll]. *) + +Fixpoint regs_for_rec (locs: list loc) (itmps ftmps: list mreg) + {struct locs} : list mreg := + match locs, itmps, ftmps with + | l1 :: ls, it1 :: its, ft1 :: fts => + match l1 with + | R r => r + | S s => match slot_type s with Tint => it1 | Tfloat => ft1 end + end + :: regs_for_rec ls its fts + | _, _, _ => nil + end. + +Definition regs_for (locs: list loc) := + regs_for_rec locs (IT1 :: IT2 :: IT3 :: nil) (FT1 :: FT2 :: FT3 :: nil). + +(** ** Insertion of LTL reloads, stores and moves *) + +Require Import LTL. + +(** [add_spill src dst k] prepends to [k] the instructions needed + to assign location [dst] the value of machine register [mreg]. *) + +Definition add_spill (src: mreg) (dst: loc) (k: block) := + match dst with + | R rd => if mreg_eq src rd then k else Bop Omove (src :: nil) rd k + | S sl => Bsetstack src sl k + end. + +(** [add_reload src dst k] prepends to [k] the instructions needed + to assign machine register [mreg] the value of the location [src]. *) + +Definition add_reload (src: loc) (dst: mreg) (k: block) := + match src with + | R rs => if mreg_eq rs dst then k else Bop Omove (rs :: nil) dst k + | S sl => Bgetstack sl dst k + end. + +(** [add_reloads] is similar for a list of locations (as sources) + and a list of machine registers (as destinations). *) + +Fixpoint add_reloads (srcs: list loc) (dsts: list mreg) (k: block) + {struct srcs} : block := + match srcs, dsts with + | s1 :: sl, t1 :: tl => + add_reload s1 t1 (add_reloads sl tl k) + | _, _ => + k + end. + +(** [add_move src dst k] prepends to [k] the instructions that copy + the value of location [src] into location [dst]. *) + +Definition add_move (src dst: loc) (k: block) := + if Loc.eq src dst then k else + match src, dst with + | R rs, _ => + add_spill rs dst k + | _, R rd => + add_reload src rd k + | S ss, S sd => + let tmp := + match slot_type ss with Tint => IT1 | Tfloat => FT1 end in + add_reload src tmp (add_spill tmp dst k) + end. + +(** [parallel_move srcs dsts k] is similar, but for a list of + source locations and a list of destination locations of the same + length. This is a parallel move, meaning that arbitrary overlap + between the sources and destinations is permitted. *) + +Fixpoint listsLoc2Moves (src dst : list loc) {struct src} : Moves := + match src, dst with + | nil, _ => nil + | s :: srcs, nil => nil + | s :: srcs, d :: dsts => (s, d) :: listsLoc2Moves srcs dsts + end. + +Definition parallel_move_order (src dst : list loc) := + Parallelmove.P_move (listsLoc2Moves src dst). + +Definition parallel_move (srcs dsts: list loc) (k: block) : block := + List.fold_left + (fun k p => add_move (fst p) (snd p) k) + (parallel_move_order srcs dsts) k. + +(** ** Constructors for LTL instructions *) + +(** The following functions generate LTL instructions operating + over the given locations. Appropriate reloading and spilling operations + are added around the core LTL instruction. *) + +Definition add_op (op: operation) (args: list loc) (res: loc) (s: node) := + match is_move_operation op args with + | Some src => + add_move src res (Bgoto s) + | None => + let rargs := regs_for args in + let rres := reg_for res in + add_reloads args rargs (Bop op rargs rres (add_spill rres res (Bgoto s))) + end. + +Definition add_load (chunk: memory_chunk) (addr: addressing) + (args: list loc) (dst: loc) (s: node) := + let rargs := regs_for args in + let rdst := reg_for dst in + add_reloads args rargs + (Bload chunk addr rargs rdst (add_spill rdst dst (Bgoto s))). + +Definition add_store (chunk: memory_chunk) (addr: addressing) + (args: list loc) (src: loc) (s: node) := + match regs_for (src :: args) with + | nil => Breturn (* never happens *) + | rsrc :: rargs => + add_reloads (src :: args) (rsrc :: rargs) + (Bstore chunk addr rargs rsrc (Bgoto s)) + end. + +(** For function calls, we also add appropriate moves to and from + the canonical locations for function arguments and function results, + as dictated by the calling conventions. *) + +Definition add_call (sig: signature) (ros: loc + ident) + (args: list loc) (res: loc) (s: node) := + let rargs := loc_arguments sig in + let rres := loc_result sig in + match ros with + | inl fn => + (add_reload fn IT3 + (parallel_move args rargs + (Bcall sig (inl _ IT3) (add_spill rres res (Bgoto s))))) + | inr id => + parallel_move args rargs + (Bcall sig (inr _ id) (add_spill rres res (Bgoto s))) + end. + +Definition add_cond (cond: condition) (args: list loc) (ifso ifnot: node) := + let rargs := regs_for args in + add_reloads args rargs (Bcond cond rargs ifso ifnot). + +(** For function returns, we add the appropriate move of the result + to the conventional location for the function result. If the function + returns with no value, we explicitly set the function result register + to the [Vundef] value, for consistency with RTL's semantics. *) + +Definition add_return (sig: signature) (optarg: option loc) := + match optarg with + | Some arg => add_reload arg (loc_result sig) Breturn + | None => Bop Oundef nil (loc_result sig) Breturn + end. + +(** For function entry points, we move from the parameter locations + dictated by the calling convention to the locations of the function + parameters. We also explicitly set to [Vundef] the locations + of pseudo-registers that are live at function entry but are not + parameters, again for consistency with RTL's semantics. *) + +Fixpoint add_undefs (ll: list loc) (b: block) {struct ll} : block := + match ll with + | nil => b + | R r :: ls => Bop Oundef nil r (add_undefs ls b) + | S s :: ls => add_undefs ls b + end. + +Definition add_entry (sig: signature) (params: list loc) (undefs: list loc) + (s: node) := + parallel_move (loc_parameters sig) params (add_undefs undefs (Bgoto s)). + +(** * Translation from RTL to LTL *) + +(** Each [RTL] instruction translates to an [LTL] basic block. + The register assignment [assign] returned by register allocation + is applied to the arguments and results of the RTL + instruction, followed by an invocation of the appropriate [LTL] + constructor function that will deal with spilling, reloading and + calling conventions. In addition, dead instructions are eliminated. + Dead instructions are instructions without side-effects ([Iop] and + [Iload]) whose result register is dead, i.e. whose result value + is never used. *) + +Definition transf_instr + (f: RTL.function) (live: PMap.t Regset.t) (assign: reg -> loc) + (pc: node) (instr: RTL.instruction) : LTL.block := + match instr with + | Inop s => + Bgoto s + | Iop op args res s => + if Regset.mem res live!!pc then + add_op op (List.map assign args) (assign res) s + else + Bgoto s + | Iload chunk addr args dst s => + if Regset.mem dst live!!pc then + add_load chunk addr (List.map assign args) (assign dst) s + else + Bgoto s + | Istore chunk addr args src s => + add_store chunk addr (List.map assign args) (assign src) s + | Icall sig ros args res s => + add_call sig (sum_left_map assign ros) (List.map assign args) + (assign res) s + | Icond cond args ifso ifnot => + add_cond cond (List.map assign args) ifso ifnot + | Ireturn optarg => + add_return (RTL.fn_sig f) (option_map assign optarg) + end. + +Definition transf_entrypoint + (f: RTL.function) (live: PMap.t Regset.t) (assign: reg -> loc) + (newcode: LTL.code) : LTL.code := + let oldentry := RTL.fn_entrypoint f in + let newentry := RTL.fn_nextpc f in + let undefs := + Regset.elements (reg_list_dead (RTL.fn_params f) + (transfer f oldentry live!!oldentry)) in + PTree.set + newentry + (add_entry (RTL.fn_sig f) + (List.map assign (RTL.fn_params f)) + (List.map assign undefs) + oldentry) + newcode. + +Lemma transf_entrypoint_wf: + forall (f: RTL.function) (live: PMap.t Regset.t) (assign: reg -> loc), + let tc1 := PTree.map (transf_instr f live assign) (RTL.fn_code f) in + let tc2 := transf_entrypoint f live assign tc1 in + forall (pc: node), Plt pc (Psucc (RTL.fn_nextpc f)) \/ tc2!pc = None. +Proof. + intros. case (plt pc (Psucc (RTL.fn_nextpc f))); intro. + left. auto. + right. + assert (pc <> RTL.fn_nextpc f). + red; intro. subst pc. elim n. apply Plt_succ. + assert (~ (Plt pc (RTL.fn_nextpc f))). + red; intro. elim n. apply Plt_trans_succ; auto. + unfold tc2. unfold transf_entrypoint. + rewrite PTree.gso; auto. + unfold tc1. rewrite PTree.gmap. + elim (RTL.fn_code_wf f pc); intro. + contradiction. unfold option_map. rewrite H1. auto. +Qed. + +(** The translation of a function performs liveness analysis, + construction and coloring of the inference graph, and per-instruction + transformation as described above. *) + +Definition transf_function (f: RTL.function) : option LTL.function := + match type_rtl_function f with + | None => None + | Some env => + match analyze f with + | None => None + | Some live => + let pc0 := f.(RTL.fn_entrypoint) in + let live0 := transfer f pc0 live!!pc0 in + match regalloc f live live0 env with + | None => None + | Some assign => + Some (LTL.mkfunction + (RTL.fn_sig f) + (RTL.fn_stacksize f) + (transf_entrypoint f live assign + (PTree.map (transf_instr f live assign) (RTL.fn_code f))) + (RTL.fn_nextpc f) + (transf_entrypoint_wf f live assign)) + end + end + end. + +Definition transf_program (p: RTL.program) : option LTL.program := + transform_partial_program transf_function p. + diff --git a/backend/Allocproof.v b/backend/Allocproof.v new file mode 100644 index 00000000..138e6d79 --- /dev/null +++ b/backend/Allocproof.v @@ -0,0 +1,1827 @@ +(** Correctness proof for the [Allocation] pass (translation from + RTL to LTL). *) + +Require Import Relations. +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import RTLtyping. +Require Import Locations. +Require Import Conventions. +Require Import Coloring. +Require Import Coloringproof. +Require Import Allocation. +Require Import Allocproof_aux. + +(** * Semantic properties of calling conventions *) + +(** The value of a parameter in the called function is the same + as the value of the corresponding argument in the caller function. *) + +Lemma call_regs_param_of_arg: + forall sig ls l, + In l (loc_arguments sig) -> + LTL.call_regs ls (parameter_of_argument l) = ls l. +Proof. + intros. + generalize (loc_arguments_acceptable sig l H). + unfold LTL.call_regs; unfold parameter_of_argument. + unfold loc_argument_acceptable. + destruct l. auto. destruct s; tauto. +Qed. + +(** The return value, stored in the conventional return register, + is correctly passed from the callee back to the caller. *) + +Lemma return_regs_result: + forall sig caller callee, + LTL.return_regs caller callee (R (loc_result sig)) = + callee (R (loc_result sig)). +Proof. + intros. unfold LTL.return_regs. + case (In_dec Loc.eq (R (loc_result sig)) temporaries); intro. + auto. + case (In_dec Loc.eq (R (loc_result sig)) destroyed_at_call); intro. + auto. + elim n0. apply loc_result_acceptable. +Qed. + +(** Acceptable locations that are not destroyed at call keep + their values across a call. *) + +Lemma return_regs_not_destroyed: + forall caller callee l, + Loc.notin l destroyed_at_call -> loc_acceptable l -> + LTL.return_regs caller callee l = caller l. +Proof. + unfold loc_acceptable, LTL.return_regs. + destruct l; auto. + intros. case (In_dec Loc.eq (R m) temporaries); intro. + contradiction. + case (In_dec Loc.eq (R m) destroyed_at_call); intro. + elim (Loc.notin_not_in _ _ H i). + auto. +Qed. + +(** * Correctness condition for the liveness analysis *) + +(** The liveness information computed by the dataflow analysis is + correct in the following sense: all registers live ``before'' + an instruction are live ``after'' all of its predecessors. *) + +Lemma analyze_correct: + forall (f: function) (live: PMap.t Regset.t) (n s: node), + analyze f = Some live -> + f.(fn_code)!n <> None -> + f.(fn_code)!s <> None -> + In s (successors f n) -> + Regset.ge live!!n (transfer f s live!!s). +Proof. + intros. + eapply DS.fixpoint_solution. + unfold analyze in H. eexact H. + elim (fn_code_wf f n); intro. auto. contradiction. + elim (fn_code_wf f s); intro. auto. contradiction. + auto. +Qed. + +Definition live0 (f: RTL.function) (live: PMap.t Regset.t) := + transfer f f.(RTL.fn_entrypoint) live!!(f.(RTL.fn_entrypoint)). + +(** * Properties of allocated locations *) + +(** We list here various properties of the locations [alloc r], + where [r] is an RTL pseudo-register and [alloc] is the register + assignment returned by [regalloc]. *) + +Section REGALLOC_PROPERTIES. + +Variable f: function. +Variable env: regenv. +Variable live: PMap.t Regset.t. +Variable alloc: reg -> loc. +Hypothesis ALLOC: regalloc f live (live0 f live) env = Some alloc. + +Lemma loc_acceptable_noteq_diff: + forall l1 l2, + loc_acceptable l1 -> l1 <> l2 -> Loc.diff l1 l2. +Proof. + unfold loc_acceptable, Loc.diff; destruct l1; destruct l2; + try (destruct s); try (destruct s0); intros; auto; try congruence. + case (zeq z z0); intro. + compare t t0; intro. + subst z0; subst t0; tauto. + tauto. tauto. + contradiction. contradiction. +Qed. + +Lemma regalloc_noteq_diff: + forall r1 l2, + alloc r1 <> l2 -> Loc.diff (alloc r1) l2. +Proof. + intros. apply loc_acceptable_noteq_diff. + eapply regalloc_acceptable; eauto. + auto. +Qed. + +Lemma loc_acceptable_notin_notin: + forall r ll, + loc_acceptable r -> + ~(In r ll) -> Loc.notin r ll. +Proof. + induction ll; simpl; intros. + auto. + split. apply loc_acceptable_noteq_diff. assumption. + apply sym_not_equal. tauto. + apply IHll. assumption. tauto. +Qed. + +Lemma regalloc_notin_notin: + forall r ll, + ~(In (alloc r) ll) -> Loc.notin (alloc r) ll. +Proof. + intros. apply loc_acceptable_notin_notin. + eapply regalloc_acceptable; eauto. auto. +Qed. + +Lemma regalloc_norepet_norepet: + forall rl, + list_norepet (List.map alloc rl) -> + Loc.norepet (List.map alloc rl). +Proof. + induction rl; simpl; intros. + apply Loc.norepet_nil. + inversion H. + apply Loc.norepet_cons. + eapply regalloc_notin_notin; eauto. + auto. +Qed. + +Lemma regalloc_not_temporary: + forall (r: reg), + Loc.notin (alloc r) temporaries. +Proof. + intros. apply temporaries_not_acceptable. + eapply regalloc_acceptable; eauto. +Qed. + +Lemma regalloc_disj_temporaries: + forall (rl: list reg), + Loc.disjoint (List.map alloc rl) temporaries. +Proof. + intros. + apply Loc.notin_disjoint. intros. + generalize (list_in_map_inv _ _ _ H). intros [r [EQ IN]]. + subst x. apply regalloc_not_temporary; auto. +Qed. + +End REGALLOC_PROPERTIES. + +(** * Semantic agreement between RTL registers and LTL locations *) + +Require Import LTL. + +Section AGREE. + +Variable f: RTL.function. +Variable env: regenv. +Variable flive: PMap.t Regset.t. +Variable assign: reg -> loc. +Hypothesis REGALLOC: regalloc f flive (live0 f flive) env = Some assign. + +(** Remember the core of the code transformation performed in module + [Allocation]: every reference to register [r] is replaced by + a reference to location [assign r]. We will shortly prove + the semantic equivalence between the original code and the transformed code. + The key tool to do this is the following relation between + a register set [rs] in the original RTL program and a location set + [ls] in the transformed LTL program. The two sets agree if + they assign identical values to matching registers and locations, + that is, the value of register [r] in [rs] is the same as + the value of location [assign r] in [ls]. However, this equality + needs to hold only for live registers [r]. If [r] is dead at + the current point, its value is never used later, hence the value + of [assign r] can be arbitrary. *) + +Definition agree (live: Regset.t) (rs: regset) (ls: locset) : Prop := + forall (r: reg), Regset.mem r live = true -> ls (assign r) = rs#r. + +(** What follows is a long list of lemmas expressing properties + of the [agree_live_regs] predicate that are useful for the + semantic equivalence proof. First: two register sets that agree + on a given set of live registers also agree on a subset of + those live registers. *) + +Lemma agree_increasing: + forall live1 live2 rs ls, + Regset.ge live1 live2 -> agree live1 rs ls -> + agree live2 rs ls. +Proof. + unfold agree; intros. + apply H0. apply H. auto. +Qed. + +(** Some useful special cases of [agree_increasing]. *) + +Lemma agree_reg_live: + forall r live rs ls, + agree (reg_live r live) rs ls -> agree live rs ls. +Proof. + intros. apply agree_increasing with (reg_live r live). + red; intros. case (Reg.eq r r0); intro. + subst r0. apply Regset.mem_add_same. + rewrite Regset.mem_add_other; auto. auto. +Qed. + +Lemma agree_reg_list_live: + forall rl live rs ls, + agree (reg_list_live rl live) rs ls -> agree live rs ls. +Proof. + induction rl; simpl; intros. + assumption. + apply agree_reg_live with a. apply IHrl. assumption. +Qed. + +Lemma agree_reg_sum_live: + forall ros live rs ls, + agree (reg_sum_live ros live) rs ls -> agree live rs ls. +Proof. + intros. destruct ros; simpl in H. + apply agree_reg_live with r; auto. + auto. +Qed. + +(** Agreement over a set of live registers just extended with [r] + implies equality of the values of [r] and [assign r]. *) + +Lemma agree_eval_reg: + forall r live rs ls, + agree (reg_live r live) rs ls -> ls (assign r) = rs#r. +Proof. + intros. apply H. apply Regset.mem_add_same. +Qed. + +(** Same, for a list of registers. *) + +Lemma agree_eval_regs: + forall rl live rs ls, + agree (reg_list_live rl live) rs ls -> + List.map ls (List.map assign rl) = rs##rl. +Proof. + induction rl; simpl; intros. + reflexivity. + apply (f_equal2 (@cons val)). + apply agree_eval_reg with live. + apply agree_reg_list_live with rl. auto. + eapply IHrl. eexact H. +Qed. + +(** Agreement is insensitive to the current values of the temporary + machine registers. *) + +Lemma agree_exten: + forall live rs ls ls', + agree live rs ls -> + (forall l, Loc.notin l temporaries -> ls' l = ls l) -> + agree live rs ls'. +Proof. + unfold agree; intros. + rewrite H0. apply H. auto. eapply regalloc_not_temporary; eauto. +Qed. + +(** If a register is dead, assigning it an arbitrary value in [rs] + and leaving [ls] unchanged preserves agreement. (This corresponds + to an operation over a dead register in the original program + that is turned into a no-op in the transformed program.) *) + +Lemma agree_assign_dead: + forall live r rs ls v, + Regset.mem r live = false -> + agree live rs ls -> + agree live (rs#r <- v) ls. +Proof. + unfold agree; intros. + case (Reg.eq r r0); intro. + subst r0. congruence. + rewrite Regmap.gso; auto. +Qed. + +(** Setting [r] to value [v] in [rs] + and simultaneously setting [assign r] to value [v] in [ls] + preserves agreement, provided that all live registers except [r] + are mapped to locations other than that of [r]. *) + +Lemma agree_assign_live: + forall live r rs ls ls' v, + (forall s, + Regset.mem s live = true -> s <> r -> assign s <> assign r) -> + ls' (assign r) = v -> + (forall l, Loc.diff l (assign r) -> Loc.notin l temporaries -> ls' l = ls l) -> + agree (reg_dead r live) rs ls -> + agree live (rs#r <- v) ls'. +Proof. + unfold agree; intros. + case (Reg.eq r r0); intro. + subst r0. rewrite Regmap.gss. assumption. + rewrite Regmap.gso; auto. + rewrite H1. apply H2. rewrite Regset.mem_remove_other. auto. + auto. eapply regalloc_noteq_diff. eauto. apply H. auto. auto. + eapply regalloc_not_temporary; eauto. +Qed. + +(** This is a special case of the previous lemma where the value [v] + being stored is not arbitrary, but is the value of + another register [arg]. (This corresponds to a register-register move + instruction.) In this case, the condition can be weakened: + it suffices that all live registers except [arg] and [res] + are mapped to locations other than that of [res]. *) + +Lemma agree_move_live: + forall live arg res rs (ls ls': locset), + (forall r, + Regset.mem r live = true -> r <> res -> r <> arg -> + assign r <> assign res) -> + ls' (assign res) = ls (assign arg) -> + (forall l, Loc.diff l (assign res) -> Loc.notin l temporaries -> ls' l = ls l) -> + agree (reg_live arg (reg_dead res live)) rs ls -> + agree live (rs#res <- (rs#arg)) ls'. +Proof. + unfold agree; intros. + case (Reg.eq res r); intro. + subst r. rewrite Regmap.gss. rewrite H0. apply H2. + apply Regset.mem_add_same. + rewrite Regmap.gso; auto. + case (Loc.eq (assign r) (assign res)); intro. + rewrite e. rewrite H0. + case (Reg.eq arg r); intro. + subst r. apply H2. apply Regset.mem_add_same. + elim (H r); auto. + rewrite H1. apply H2. + case (Reg.eq arg r); intro. subst r. apply Regset.mem_add_same. + rewrite Regset.mem_add_other; auto. + rewrite Regset.mem_remove_other; auto. + eapply regalloc_noteq_diff; eauto. + eapply regalloc_not_temporary; eauto. +Qed. + +(** This complicated lemma states agreement between the states after + a function call, provided that the states before the call agree + and that calling conventions are respected. *) + +Lemma agree_call: + forall live args ros res rs v (ls ls': locset), + (forall r, + Regset.mem r live = true -> + r <> res -> + ~(In (assign r) Conventions.destroyed_at_call)) -> + (forall r, + Regset.mem r live = true -> r <> res -> assign r <> assign res) -> + ls' (assign res) = v -> + (forall l, + Loc.notin l destroyed_at_call -> loc_acceptable l -> Loc.diff l (assign res) -> + ls' l = ls l) -> + agree (reg_list_live args (reg_sum_live ros (reg_dead res live))) rs ls -> + agree live (rs#res <- v) ls'. +Proof. + intros. + assert (agree (reg_dead res live) rs ls). + apply agree_reg_sum_live with ros. + apply agree_reg_list_live with args. assumption. + red; intros. + case (Reg.eq r res); intro. + subst r. rewrite Regmap.gss. assumption. + rewrite Regmap.gso; auto. rewrite H2. apply H4. + rewrite Regset.mem_remove_other; auto. + eapply regalloc_notin_notin; eauto. + eapply regalloc_acceptable; eauto. + eapply regalloc_noteq_diff; eauto. +Qed. + +(** Agreement between the initial register set at RTL function entry + and the location set at LTL function entry. *) + +Lemma agree_init_regs: + forall rl vl ls live, + (forall r1 r2, + In r1 rl -> Regset.mem r2 live = true -> r1 <> r2 -> + assign r1 <> assign r2) -> + List.map ls (List.map assign rl) = vl -> + agree (reg_list_dead rl live) (Regmap.init Vundef) ls -> + agree live (init_regs vl rl) ls. +Proof. + induction rl; simpl; intros. + assumption. + destruct vl. discriminate. + assert (agree (reg_dead a live) (init_regs vl rl) ls). + apply IHrl. intros. apply H. tauto. + case (Reg.eq a r2); intro. subst r2. + rewrite Regset.mem_remove_same in H3. discriminate. + rewrite Regset.mem_remove_other in H3; auto. + auto. congruence. assumption. + red; intros. case (Reg.eq a r); intro. + subst r. rewrite Regmap.gss. congruence. + rewrite Regmap.gso; auto. apply H2. + rewrite Regset.mem_remove_other; auto. +Qed. + +Lemma agree_parameters: + forall vl ls, + let params := f.(RTL.fn_params) in + List.map ls (List.map assign params) = vl -> + (forall r, + Regset.mem r (reg_list_dead params (live0 f flive)) = true -> + ls (assign r) = Vundef) -> + agree (live0 f flive) (init_regs vl params) ls. +Proof. + intros. apply agree_init_regs. + intros. eapply regalloc_correct_3; eauto. + assumption. + red; intros. rewrite Regmap.gi. auto. +Qed. + +End AGREE. + +(** * Correctness of the LTL constructors *) + +(** This section proves theorems that establish the correctness of the + LTL constructor functions such as [add_op]. The theorems are of + the general form ``the generated LTL instructions execute and + modify the location set in the expected way: the result location(s) + contain the expected values and other, non-temporary locations keep + their values''. *) + +Section LTL_CONSTRUCTORS. + +Variable ge: LTL.genv. +Variable sp: val. + +Lemma reg_for_spec: + forall l, + R(reg_for l) = l \/ In (R (reg_for l)) temporaries. +Proof. + intros. unfold reg_for. destruct l. tauto. + case (slot_type s); simpl; tauto. +Qed. + +Lemma add_reload_correct: + forall src dst k rs m, + exists rs', + exec_instrs ge sp (add_reload src dst k) rs m k rs' m /\ + rs' (R dst) = rs src /\ + forall l, Loc.diff (R dst) l -> rs' l = rs l. +Proof. + intros. unfold add_reload. destruct src. + case (mreg_eq m0 dst); intro. + subst dst. exists rs. split. apply exec_refl. tauto. + exists (Locmap.set (R dst) (rs (R m0)) rs). + split. apply exec_one; apply exec_Bop. reflexivity. + split. apply Locmap.gss. + intros; apply Locmap.gso; auto. + exists (Locmap.set (R dst) (rs (S s)) rs). + split. apply exec_one; apply exec_Bgetstack. + split. apply Locmap.gss. + intros; apply Locmap.gso; auto. +Qed. + +Lemma add_spill_correct: + forall src dst k rs m, + exists rs', + exec_instrs ge sp (add_spill src dst k) rs m k rs' m /\ + rs' dst = rs (R src) /\ + forall l, Loc.diff dst l -> rs' l = rs l. +Proof. + intros. unfold add_spill. destruct dst. + case (mreg_eq src m0); intro. + subst src. exists rs. split. apply exec_refl. tauto. + exists (Locmap.set (R m0) (rs (R src)) rs). + split. apply exec_one. apply exec_Bop. reflexivity. + split. apply Locmap.gss. + intros; apply Locmap.gso; auto. + exists (Locmap.set (S s) (rs (R src)) rs). + split. apply exec_one. apply exec_Bsetstack. + split. apply Locmap.gss. + intros; apply Locmap.gso; auto. +Qed. + +Lemma add_reloads_correct_rec: + forall srcs itmps ftmps k rs m, + (List.length srcs <= List.length itmps)%nat -> + (List.length srcs <= List.length ftmps)%nat -> + (forall r, In (R r) srcs -> In r itmps -> False) -> + (forall r, In (R r) srcs -> In r ftmps -> False) -> + list_disjoint itmps ftmps -> + list_norepet itmps -> + list_norepet ftmps -> + exists rs', + exec_instrs ge sp (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m k rs' m /\ + reglist (regs_for_rec srcs itmps ftmps) rs' = map rs srcs /\ + (forall r, ~(In r itmps) -> ~(In r ftmps) -> rs' (R r) = rs (R r)) /\ + (forall s, rs' (S s) = rs (S s)). +Proof. + induction srcs; simpl; intros. + (* base case *) + exists rs. split. apply exec_refl. tauto. + (* inductive case *) + destruct itmps; simpl in H. omegaContradiction. + destruct ftmps; simpl in H0. omegaContradiction. + assert (R1: (length srcs <= length itmps)%nat). omega. + assert (R2: (length srcs <= length ftmps)%nat). omega. + assert (R3: forall r, In (R r) srcs -> In r itmps -> False). + intros. apply H1 with r. tauto. auto with coqlib. + assert (R4: forall r, In (R r) srcs -> In r ftmps -> False). + intros. apply H2 with r. tauto. auto with coqlib. + assert (R5: list_disjoint itmps ftmps). + eapply list_disjoint_cons_left. + eapply list_disjoint_cons_right. eauto. + assert (R6: list_norepet itmps). + inversion H4; auto. + assert (R7: list_norepet ftmps). + inversion H5; auto. + destruct a. + (* a is a register *) + generalize (IHsrcs itmps ftmps k rs m R1 R2 R3 R4 R5 R6 R7). + intros [rs' [EX [RES [OTH1 OTH2]]]]. + exists rs'. split. + unfold add_reload. case (mreg_eq m2 m2); intro; tauto. + split. simpl. apply (f_equal2 (@cons val)). + apply OTH1. + red; intro; apply H1 with m2. tauto. auto with coqlib. + red; intro; apply H2 with m2. tauto. auto with coqlib. + assumption. + split. intros. apply OTH1. simpl in H6; tauto. simpl in H7; tauto. + auto. + (* a is a stack location *) + set (tmp := match slot_type s with Tint => m0 | Tfloat => m1 end). + assert (NI: ~(In tmp itmps)). + unfold tmp; case (slot_type s). + inversion H4; auto. + apply list_disjoint_notin with (m1 :: ftmps). + apply list_disjoint_sym. apply list_disjoint_cons_left with m0. + auto. auto with coqlib. + assert (NF: ~(In tmp ftmps)). + unfold tmp; case (slot_type s). + apply list_disjoint_notin with (m0 :: itmps). + apply list_disjoint_cons_right with m1. + auto. auto with coqlib. + inversion H5; auto. + generalize + (add_reload_correct (S s) tmp + (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m). + intros [rs1 [EX1 [RES1 OTH]]]. + generalize (IHsrcs itmps ftmps k rs1 m R1 R2 R3 R4 R5 R6 R7). + intros [rs' [EX [RES [OTH1 OTH2]]]]. + exists rs'. + split. eapply exec_trans; eauto. + split. simpl. apply (f_equal2 (@cons val)). + rewrite OTH1; auto. + rewrite RES. apply list_map_exten. intros. + symmetry. apply OTH. + destruct x; try exact I. simpl. red; intro; subst m2. + generalize H6; unfold tmp. case (slot_type s). + intro. apply H1 with m0. tauto. auto with coqlib. + intro. apply H2 with m1. tauto. auto with coqlib. + split. intros. simpl in H6; simpl in H7. + rewrite OTH1. apply OTH. + simpl. unfold tmp. case (slot_type s); tauto. + tauto. tauto. + intros. rewrite OTH2. apply OTH. exact I. +Qed. + +Lemma add_reloads_correct: + forall srcs k rs m, + (List.length srcs <= 3)%nat -> + Loc.disjoint srcs temporaries -> + exists rs', + exec_instrs ge sp (add_reloads srcs (regs_for srcs) k) rs m k rs' m /\ + reglist (regs_for srcs) rs' = List.map rs srcs /\ + forall l, Loc.notin l temporaries -> rs' l = rs l. +Proof. + intros. + pose (itmps := IT1 :: IT2 :: IT3 :: nil). + pose (ftmps := FT1 :: FT2 :: FT3 :: nil). + assert (R1: (List.length srcs <= List.length itmps)%nat). + unfold itmps; simpl; assumption. + assert (R2: (List.length srcs <= List.length ftmps)%nat). + unfold ftmps; simpl; assumption. + assert (R3: forall r, In (R r) srcs -> In r itmps -> False). + intros. assert (In (R r) temporaries). + simpl in H2; simpl; intuition congruence. + generalize (H0 _ _ H1 H3). simpl. tauto. + assert (R4: forall r, In (R r) srcs -> In r ftmps -> False). + intros. assert (In (R r) temporaries). + simpl in H2; simpl; intuition congruence. + generalize (H0 _ _ H1 H3). simpl. tauto. + assert (R5: list_disjoint itmps ftmps). + red; intros r1 r2; simpl; intuition congruence. + assert (R6: list_norepet itmps). + unfold itmps. NoRepet. + assert (R7: list_norepet ftmps). + unfold ftmps. NoRepet. + generalize (add_reloads_correct_rec srcs itmps ftmps k rs m + R1 R2 R3 R4 R5 R6 R7). + intros [rs' [EX [RES [OTH1 OTH2]]]]. + exists rs'. split. exact EX. + split. exact RES. + intros. destruct l. apply OTH1. + generalize (Loc.notin_not_in _ _ H1). simpl. intuition congruence. + generalize (Loc.notin_not_in _ _ H1). simpl. intuition congruence. + apply OTH2. +Qed. + +Lemma add_move_correct: + forall src dst k rs m, + exists rs', + exec_instrs ge sp (add_move src dst k) rs m k rs' m /\ + rs' dst = rs src /\ + forall l, Loc.diff l dst -> Loc.diff l (R IT1) -> Loc.diff l (R FT1) -> rs' l = rs l. +Proof. + intros; unfold add_move. + case (Loc.eq src dst); intro. + subst dst. exists rs. split. apply exec_refl. tauto. + destruct src. + (* src is a register *) + generalize (add_spill_correct m0 dst k rs m); intros [rs' [EX [RES OTH]]]. + exists rs'; intuition. apply OTH; apply Loc.diff_sym; auto. + destruct dst. + (* src is a stack slot, dst a register *) + generalize (add_reload_correct (S s) m0 k rs m); intros [rs' [EX [RES OTH]]]. + exists rs'; intuition. apply OTH; apply Loc.diff_sym; auto. + (* src and dst are stack slots *) + set (tmp := match slot_type s with Tint => IT1 | Tfloat => FT1 end). + generalize (add_reload_correct (S s) tmp (add_spill tmp (S s0) k) rs m); + intros [rs1 [EX1 [RES1 OTH1]]]. + generalize (add_spill_correct tmp (S s0) k rs1 m); + intros [rs2 [EX2 [RES2 OTH2]]]. + exists rs2. split. + eapply exec_trans; eauto. + split. congruence. + intros. rewrite OTH2. apply OTH1. + apply Loc.diff_sym. unfold tmp; case (slot_type s); auto. + apply Loc.diff_sym; auto. +Qed. + +Theorem parallel_move_correct: + forall srcs dsts k rs m, + List.length srcs = List.length dsts -> + Loc.no_overlap srcs dsts -> + Loc.norepet dsts -> + Loc.disjoint srcs temporaries -> + Loc.disjoint dsts temporaries -> + exists rs', + exec_instrs ge sp (parallel_move srcs dsts k) rs m k rs' m /\ + List.map rs' dsts = List.map rs srcs /\ + rs' (R IT3) = rs (R IT3) /\ + forall l, Loc.notin l dsts -> Loc.notin l temporaries -> rs' l = rs l. +Proof. + apply (parallel_move_correctX ge sp). + apply add_move_correct. +Qed. + +Lemma add_op_correct: + forall op args res s rs m v, + (List.length args <= 3)%nat -> + Loc.disjoint args temporaries -> + eval_operation ge sp op (List.map rs args) = Some v -> + exists rs', + exec_block ge sp (add_op op args res s) rs m (Cont s) rs' m /\ + rs' res = v /\ + forall l, Loc.diff l res -> Loc.notin l temporaries -> rs' l = rs l. +Proof. + intros. unfold add_op. + caseEq (is_move_operation op args). + (* move *) + intros arg IMO. + generalize (is_move_operation_correct op args IMO). + intros [EQ1 EQ2]. subst op; subst args. + generalize (add_move_correct arg res (Bgoto s) rs m). + intros [rs' [EX [RES OTHER]]]. + exists rs'. split. + apply exec_Bgoto. exact EX. + split. simpl in H1. congruence. + intros. unfold temporaries in H3; simpl in H3. + apply OTHER. assumption. tauto. tauto. + (* other ops *) + intros. + set (rargs := regs_for args). set (rres := reg_for res). + generalize (add_reloads_correct args + (Bop op rargs rres (add_spill rres res (Bgoto s))) + rs m H H0). + intros [rs1 [EX1 [RES1 OTHER1]]]. + pose (rs2 := Locmap.set (R rres) v rs1). + generalize (add_spill_correct rres res (Bgoto s) rs2 m). + intros [rs3 [EX3 [RES3 OTHER3]]]. + exists rs3. + split. apply exec_Bgoto. eapply exec_trans. eexact EX1. + eapply exec_trans; eauto. + apply exec_one. unfold rs2. apply exec_Bop. + unfold rargs. rewrite RES1. auto. + split. rewrite RES3. unfold rs2; apply Locmap.gss. + intros. rewrite OTHER3. unfold rs2. rewrite Locmap.gso. + apply OTHER1. assumption. + apply Loc.diff_sym. unfold rres. elim (reg_for_spec res); intro. + rewrite H5; auto. + eapply Loc.in_notin_diff; eauto. apply Loc.diff_sym; auto. +Qed. + +Lemma add_load_correct: + forall chunk addr args res s rs m a v, + (List.length args <= 2)%nat -> + Loc.disjoint args temporaries -> + eval_addressing ge sp addr (List.map rs args) = Some a -> + loadv chunk m a = Some v -> + exists rs', + exec_block ge sp (add_load chunk addr args res s) rs m (Cont s) rs' m /\ + rs' res = v /\ + forall l, Loc.diff l res -> Loc.notin l temporaries -> rs' l = rs l. +Proof. + intros. unfold add_load. + set (rargs := regs_for args). set (rres := reg_for res). + assert (LL: (List.length args <= 3)%nat). omega. + generalize (add_reloads_correct args + (Bload chunk addr rargs rres (add_spill rres res (Bgoto s))) + rs m LL H0). + intros [rs1 [EX1 [RES1 OTHER1]]]. + pose (rs2 := Locmap.set (R rres) v rs1). + generalize (add_spill_correct rres res (Bgoto s) rs2 m). + intros [rs3 [EX3 [RES3 OTHER3]]]. + exists rs3. + split. apply exec_Bgoto. eapply exec_trans; eauto. + eapply exec_trans; eauto. + apply exec_one. unfold rs2. apply exec_Bload with a. + unfold rargs; rewrite RES1. assumption. assumption. + split. rewrite RES3. unfold rs2; apply Locmap.gss. + intros. rewrite OTHER3. unfold rs2. rewrite Locmap.gso. + apply OTHER1. assumption. + apply Loc.diff_sym. unfold rres. elim (reg_for_spec res); intro. + rewrite H5; auto. + eapply Loc.in_notin_diff; eauto. apply Loc.diff_sym; auto. +Qed. + +Lemma add_store_correct: + forall chunk addr args src s rs m m' a, + (List.length args <= 2)%nat -> + Loc.disjoint args temporaries -> + Loc.notin src temporaries -> + eval_addressing ge sp addr (List.map rs args) = Some a -> + storev chunk m a (rs src) = Some m' -> + exists rs', + exec_block ge sp (add_store chunk addr args src s) rs m (Cont s) rs' m' /\ + forall l, Loc.notin l temporaries -> rs' l = rs l. +Proof. + intros. + assert (LL: (List.length (src :: args) <= 3)%nat). + simpl. omega. + assert (DISJ: Loc.disjoint (src :: args) temporaries). + red; intros. elim H4; intro. subst x1. + eapply Loc.in_notin_diff; eauto. + auto with coqlib. + unfold add_store. caseEq (regs_for (src :: args)). + unfold regs_for; simpl; intro; discriminate. + intros rsrc rargs EQ. + generalize (add_reloads_correct (src :: args) + (Bstore chunk addr rargs rsrc (Bgoto s)) + rs m LL DISJ). + intros [rs1 [EX1 [RES1 OTHER1]]]. + rewrite EQ in RES1. simpl in RES1. injection RES1. + intros RES2 RES3. + exists rs1. + split. apply exec_Bgoto. + eapply exec_trans. rewrite <- EQ. eexact EX1. + apply exec_one. apply exec_Bstore with a. + rewrite RES2. assumption. rewrite RES3. assumption. + exact OTHER1. +Qed. + +Lemma add_cond_correct: + forall cond args ifso ifnot rs m b s, + (List.length args <= 3)%nat -> + Loc.disjoint args temporaries -> + eval_condition cond (List.map rs args) = Some b -> + s = (if b then ifso else ifnot) -> + exists rs', + exec_block ge sp (add_cond cond args ifso ifnot) rs m (Cont s) rs' m /\ + forall l, Loc.notin l temporaries -> rs' l = rs l. +Proof. + intros. unfold add_cond. + set (rargs := regs_for args). + generalize (add_reloads_correct args + (Bcond cond rargs ifso ifnot) + rs m H H0). + intros [rs1 [EX1 [RES1 OTHER1]]]. + fold rargs in EX1. + exists rs1. + split. destruct b; subst s. + eapply exec_Bcond_true. eexact EX1. + unfold rargs; rewrite RES1. assumption. + eapply exec_Bcond_false. eexact EX1. + unfold rargs; rewrite RES1. assumption. + exact OTHER1. +Qed. + +Definition find_function2 (los: loc + ident) (ls: locset) : option function := + match los with + | inl l => Genv.find_funct ge (ls l) + | inr symb => + match Genv.find_symbol ge symb with + | None => None + | Some b => Genv.find_funct_ptr ge b + end + end. + +Lemma add_call_correct: + forall f vargs m vres m' sig los args res s ls + (EXECF: + forall lsi, + List.map lsi (loc_arguments f.(fn_sig)) = vargs -> + exists lso, + exec_function ge f lsi m lso m' + /\ lso (R (loc_result f.(fn_sig))) = vres) + (FIND: find_function2 los ls = Some f) + (SIG: sig = f.(fn_sig)) + (VARGS: List.map ls args = vargs) + (LARGS: List.length args = List.length sig.(sig_args)) + (AARGS: locs_acceptable args) + (RES: loc_acceptable res), + exists ls', + exec_block ge sp (add_call sig los args res s) ls m (Cont s) ls' m' /\ + ls' res = vres /\ + forall l, + Loc.notin l destroyed_at_call -> loc_acceptable l -> Loc.diff l res -> + ls' l = ls l. +Proof. + intros until los. + case los; intro fn; intros; simpl in FIND; rewrite <- SIG in EXECF; unfold add_call. + (* indirect call *) + assert (LEN: List.length args = List.length (loc_arguments sig)). + rewrite LARGS. symmetry. apply loc_arguments_length. + pose (DISJ := locs_acceptable_disj_temporaries args AARGS). + generalize (add_reload_correct fn IT3 + (parallel_move args (loc_arguments sig) + (Bcall sig (inl ident IT3) + (add_spill (loc_result sig) res (Bgoto s)))) + ls m). + intros [ls1 [EX1 [RES1 OTHER1]]]. + generalize + (parallel_move_correct args (loc_arguments sig) + (Bcall sig (inl ident IT3) + (add_spill (loc_result sig) res (Bgoto s))) + ls1 m LEN + (no_overlap_arguments args sig AARGS) + (loc_arguments_norepet sig) + DISJ + (loc_arguments_not_temporaries sig)). + intros [ls2 [EX2 [RES2 [TMP2 OTHER2]]]]. + assert (PARAMS: List.map ls2 (loc_arguments sig) = vargs). + rewrite <- VARGS. rewrite RES2. + apply list_map_exten. intros. symmetry. apply OTHER1. + apply Loc.diff_sym. apply DISJ. auto. simpl; tauto. + generalize (EXECF ls2 PARAMS). + intros [ls3 [EX3 RES3]]. + pose (ls4 := return_regs ls2 ls3). + generalize (add_spill_correct (loc_result sig) res + (Bgoto s) ls4 m'). + intros [ls5 [EX5 [RES5 OTHER5]]]. + exists ls5. + (* Execution *) + split. apply exec_Bgoto. + eapply exec_trans. eexact EX1. + eapply exec_trans. eexact EX2. + eapply exec_trans. apply exec_one. apply exec_Bcall with f. + unfold find_function. rewrite TMP2. rewrite RES1. + assumption. assumption. eexact EX3. + exact EX5. + (* Result *) + split. rewrite RES5. unfold ls4. rewrite return_regs_result. + assumption. + (* Other regs *) + intros. rewrite OTHER5; auto. + unfold ls4; rewrite return_regs_not_destroyed; auto. + rewrite OTHER2. apply OTHER1. + apply Loc.diff_sym. apply Loc.in_notin_diff with temporaries. + apply temporaries_not_acceptable; auto. simpl; tauto. + apply arguments_not_preserved; auto. + apply temporaries_not_acceptable; auto. + apply Loc.diff_sym; auto. + (* direct call *) + assert (LEN: List.length args = List.length (loc_arguments sig)). + rewrite LARGS. symmetry. apply loc_arguments_length. + pose (DISJ := locs_acceptable_disj_temporaries args AARGS). + generalize + (parallel_move_correct args (loc_arguments sig) + (Bcall sig (inr mreg fn) + (add_spill (loc_result sig) res (Bgoto s))) + ls m LEN + (no_overlap_arguments args sig AARGS) + (loc_arguments_norepet sig) + DISJ (loc_arguments_not_temporaries sig)). + intros [ls2 [EX2 [RES2 [TMP2 OTHER2]]]]. + assert (PARAMS: List.map ls2 (loc_arguments sig) = vargs). + rewrite <- VARGS. rewrite RES2. auto. + generalize (EXECF ls2 PARAMS). + intros [ls3 [EX3 RES3]]. + pose (ls4 := return_regs ls2 ls3). + generalize (add_spill_correct (loc_result sig) res + (Bgoto s) ls4 m'). + intros [ls5 [EX5 [RES5 OTHER5]]]. + exists ls5. + (* Execution *) + split. apply exec_Bgoto. + eapply exec_trans. eexact EX2. + eapply exec_trans. apply exec_one. apply exec_Bcall with f. + unfold find_function. assumption. assumption. eexact EX3. + exact EX5. + (* Result *) + split. rewrite RES5. + unfold ls4. rewrite return_regs_result. + assumption. + (* Other regs *) + intros. rewrite OTHER5; auto. + unfold ls4; rewrite return_regs_not_destroyed; auto. + apply OTHER2. + apply arguments_not_preserved; auto. + apply temporaries_not_acceptable; auto. + apply Loc.diff_sym; auto. +Qed. + +Lemma add_undefs_correct: + forall res b ls m, + (forall l, In l res -> loc_acceptable l) -> + (forall ofs ty, In (S (Local ofs ty)) res -> ls (S (Local ofs ty)) = Vundef) -> + exists ls', + exec_instrs ge sp (add_undefs res b) ls m b ls' m /\ + (forall l, In l res -> ls' l = Vundef) /\ + (forall l, Loc.notin l res -> ls' l = ls l). +Proof. + induction res; simpl; intros. + exists ls. split. apply exec_refl. tauto. + assert (ACC: forall l, In l res -> loc_acceptable l). + intros. apply H. tauto. + destruct a. + (* a is a register *) + pose (ls1 := Locmap.set (R m0) Vundef ls). + assert (UNDEFS: forall ofs ty, In (S (Local ofs ty)) res -> ls1 (S (Local ofs ty)) = Vundef). + intros. unfold ls1; rewrite Locmap.gso. auto. red; auto. + generalize (IHres b (Locmap.set (R m0) Vundef ls) m ACC UNDEFS). + intros [ls2 [EX2 [RES2 OTHER2]]]. + exists ls2. split. + eapply exec_trans. apply exec_one. apply exec_Bop. + simpl; reflexivity. exact EX2. + split. intros. case (In_dec Loc.eq l res); intro. + apply RES2; auto. + rewrite OTHER2. elim H1; intro. + subst l. apply Locmap.gss. + contradiction. + apply loc_acceptable_notin_notin; auto. + intros. rewrite OTHER2. apply Locmap.gso. + apply Loc.diff_sym; tauto. tauto. + (* a is a stack location *) + assert (UNDEFS: forall ofs ty, In (S (Local ofs ty)) res -> ls (S (Local ofs ty)) = Vundef). + intros. apply H0. tauto. + generalize (IHres b ls m ACC UNDEFS). + intros [ls2 [EX2 [RES2 OTHER2]]]. + exists ls2. split. assumption. + split. intros. case (In_dec Loc.eq l res); intro. + auto. + rewrite OTHER2. elim H1; intro. + subst l. generalize (H (S s) (in_eq _ _)). + unfold loc_acceptable; destruct s; intuition auto. + contradiction. + apply loc_acceptable_notin_notin; auto. + intros. apply OTHER2. tauto. +Qed. + +Lemma add_entry_correct: + forall sig params undefs s ls m, + List.length params = List.length sig.(sig_args) -> + Loc.norepet params -> + locs_acceptable params -> + Loc.disjoint params undefs -> + locs_acceptable undefs -> + (forall ofs ty, ls (S (Local ofs ty)) = Vundef) -> + exists ls', + exec_block ge sp (add_entry sig params undefs s) ls m (Cont s) ls' m /\ + List.map ls' params = List.map ls (loc_parameters sig) /\ + (forall l, In l undefs -> ls' l = Vundef). +Proof. + intros. + assert (List.length (loc_parameters sig) = List.length params). + unfold loc_parameters. rewrite list_length_map. + rewrite loc_arguments_length. auto. + assert (DISJ: Loc.disjoint params temporaries). + apply locs_acceptable_disj_temporaries; auto. + generalize (parallel_move_correct _ _ (add_undefs undefs (Bgoto s)) + ls m H5 + (no_overlap_parameters _ _ H1) + H0 (loc_parameters_not_temporaries sig) DISJ). + intros [ls1 [EX1 [RES1 [TMP1 OTHER1]]]]. + assert (forall ofs ty, In (S (Local ofs ty)) undefs -> ls1 (S (Local ofs ty)) = Vundef). + intros. rewrite OTHER1. auto. apply Loc.disjoint_notin with undefs. + apply Loc.disjoint_sym. auto. auto. + simpl; tauto. + generalize (add_undefs_correct undefs (Bgoto s) ls1 m H3 H6). + intros [ls2 [EX2 [RES2 OTHER2]]]. + exists ls2. + split. apply exec_Bgoto. unfold add_entry. + eapply exec_trans. eexact EX1. eexact EX2. + split. rewrite <- RES1. apply list_map_exten. + intros. symmetry. apply OTHER2. eapply Loc.disjoint_notin; eauto. + exact RES2. +Qed. + +Lemma add_return_correct: + forall sig optarg ls m, + exists ls', + exec_block ge sp (add_return sig optarg) ls m Return ls' m /\ + match optarg with + | Some arg => ls' (R (loc_result sig)) = ls arg + | None => ls' (R (loc_result sig)) = Vundef + end. +Proof. + intros. unfold add_return. + destruct optarg. + generalize (add_reload_correct l (loc_result sig) Breturn ls m). + intros [ls1 [EX1 [RES1 OTH1]]]. + exists ls1. + split. apply exec_Breturn. assumption. assumption. + exists (Locmap.set (R (loc_result sig)) Vundef ls). + split. apply exec_Breturn. apply exec_one. + apply exec_Bop. reflexivity. apply Locmap.gss. +Qed. + +End LTL_CONSTRUCTORS. + +(** * Exploitation of the typing hypothesis *) + +(** Register allocation is applied to RTL code that passed type inference + (see file [RTLtyping]), and therefore is well-typed in the type system + of [RTLtyping]. We exploit this hypothesis to obtain information on + the number of arguments to operations, addressing modes and conditions. *) + +Remark length_type_of_condition: + forall (c: condition), (List.length (type_of_condition c) <= 3)%nat. +Proof. + destruct c; unfold type_of_condition; simpl; omega. +Qed. + +Remark length_type_of_operation: + forall (op: operation), (List.length (fst (type_of_operation op)) <= 3)%nat. +Proof. + destruct op; unfold type_of_operation; simpl; try omega. + apply length_type_of_condition. +Qed. + +Remark length_type_of_addressing: + forall (addr: addressing), (List.length (type_of_addressing addr) <= 2)%nat. +Proof. + destruct addr; unfold type_of_addressing; simpl; omega. +Qed. + +Lemma length_op_args: + forall (env: regenv) (op: operation) (args: list reg) (res: reg), + (List.map env args, env res) = type_of_operation op -> + (List.length args <= 3)%nat. +Proof. + intros. rewrite <- (list_length_map env). + generalize (length_type_of_operation op). + rewrite <- H. simpl. auto. +Qed. + +Lemma length_addr_args: + forall (env: regenv) (addr: addressing) (args: list reg), + List.map env args = type_of_addressing addr -> + (List.length args <= 2)%nat. +Proof. + intros. rewrite <- (list_length_map env). + rewrite H. apply length_type_of_addressing. +Qed. + +Lemma length_cond_args: + forall (env: regenv) (cond: condition) (args: list reg), + List.map env args = type_of_condition cond -> + (List.length args <= 3)%nat. +Proof. + intros. rewrite <- (list_length_map env). + rewrite H. apply length_type_of_condition. +Qed. + +(** * Preservation of semantics *) + +(** We now show that the LTL code reflecting register allocation has + the same semantics as the original RTL code. We start with + standard properties of translated functions and + global environments in the original and translated code. *) + +Section PRESERVATION. + +Variable prog: RTL.program. +Variable tprog: LTL.program. +Hypothesis TRANSF: transf_program prog = Some tprog. + +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma symbols_preserved: + forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof. + intro. unfold ge, tge. + apply Genv.find_symbol_transf_partial with transf_function. + exact TRANSF. +Qed. + +Lemma functions_translated: + forall (v: val) (f: RTL.function), + Genv.find_funct ge v = Some f -> + exists tf, + Genv.find_funct tge v = Some tf /\ transf_function f = Some tf. +Proof. + intros. + generalize + (Genv.find_funct_transf_partial transf_function TRANSF H). + case (transf_function f). + intros tf [A B]. exists tf. tauto. + intros [A B]. elim B. reflexivity. +Qed. + +Lemma function_ptr_translated: + forall (b: Values.block) (f: RTL.function), + Genv.find_funct_ptr ge b = Some f -> + exists tf, + Genv.find_funct_ptr tge b = Some tf /\ transf_function f = Some tf. +Proof. + intros. + generalize + (Genv.find_funct_ptr_transf_partial transf_function TRANSF H). + case (transf_function f). + intros tf [A B]. exists tf. tauto. + intros [A B]. elim B. reflexivity. +Qed. + +Lemma sig_function_translated: + forall f tf, + transf_function f = Some tf -> + tf.(LTL.fn_sig) = f.(RTL.fn_sig). +Proof. + intros f tf. unfold transf_function. + destruct (type_rtl_function f). + destruct (analyze f). + destruct (regalloc f t0). + intro EQ; injection EQ; intro EQ1; rewrite <- EQ1; simpl; auto. + intros; discriminate. + intros; discriminate. + intros; discriminate. +Qed. + +Lemma entrypoint_function_translated: + forall f tf, + transf_function f = Some tf -> + tf.(LTL.fn_entrypoint) = f.(RTL.fn_nextpc). +Proof. + intros f tf. unfold transf_function. + destruct (type_rtl_function f). + destruct (analyze f). + destruct (regalloc f t0). + intro EQ; injection EQ; intro EQ1; rewrite <- EQ1; simpl; auto. + intros; discriminate. + intros; discriminate. + intros; discriminate. +Qed. + +(** The proof of semantic preservation is a simulation argument + based on diagrams of the following form: +<< + pc, rs, m ------------------- pc, ls, m + | | + | | + v v + pc', rs', m' ---------------- Cont pc', ls', m' +>> + Hypotheses: the left vertical arrow represents a transition in the + original RTL code. The top horizontal bar expresses agreement between + [rs] and [ls] over the pseudo-registers live before the RTL instruction + at [pc]. + + Conclusions: the right vertical arrow is an [exec_blocks] transition + in the LTL code generated by translation of the current function. + The bottom horizontal bar expresses agreement between [rs'] and [ls'] + over the pseudo-registers live after the RTL instruction at [pc] + (which implies agreement over the pseudo-registers live before + the instruction at [pc']). + + We capture these diagrams in the following propositions parameterized + by the transition in the original RTL code (the left arrow). +*) + +Definition exec_instr_prop + (c: RTL.code) (sp: val) + (pc: node) (rs: regset) (m: mem) + (pc': node) (rs': regset) (m': mem) : Prop := + forall f env live assign ls + (CF: c = f.(RTL.fn_code)) + (WT: wt_function env f) + (ASG: regalloc f live (live0 f live) env = Some assign) + (AG: agree assign (transfer f pc live!!pc) rs ls), + let tc := PTree.map (transf_instr f live assign) c in + exists ls', + exec_blocks tge tc sp pc ls m (Cont pc') ls' m' /\ + agree assign live!!pc rs' ls'. + +Definition exec_instrs_prop + (c: RTL.code) (sp: val) + (pc: node) (rs: regset) (m: mem) + (pc': node) (rs': regset) (m': mem) : Prop := + forall f env live assign ls, + forall (CF: c = f.(RTL.fn_code)) + (WT: wt_function env f) + (ANL: analyze f = Some live) + (ASG: regalloc f live (live0 f live) env = Some assign) + (AG: agree assign (transfer f pc live!!pc) rs ls) + (VALIDPC': c!pc' <> None), + let tc := PTree.map (transf_instr f live assign) c in + exists ls', + exec_blocks tge tc sp pc ls m (Cont pc') ls' m' /\ + agree assign (transfer f pc' live!!pc') rs' ls'. + +Definition exec_function_prop + (f: RTL.function) (args: list val) (m: mem) + (res: val) (m': mem) : Prop := + forall ls tf, + transf_function f = Some tf -> + List.map ls (Conventions.loc_arguments tf.(fn_sig)) = args -> + exists ls', + LTL.exec_function tge tf ls m ls' m' /\ + ls' (R (Conventions.loc_result tf.(fn_sig))) = res. + +(** The simulation proof is by structural induction over the RTL evaluation + derivation. We prove each case of the proof as a separate lemma. + There is one lemma for each RTL evaluation rule. Each lemma concludes + one of the [exec_*_prop] predicates, and takes the induction hypotheses + (if any) as hypotheses also expressed with the [exec_*_prop] predicates. +*) + +Ltac CleanupHyps := + match goal with + | H1: (PTree.get _ _ = Some _), + H2: (_ = RTL.fn_code _), + H3: (agree _ (transfer _ _ _) _ _) |- _ => + unfold transfer in H3; rewrite <- H2 in H3; rewrite H1 in H3; + simpl in H3; + CleanupHyps + | H1: (PTree.get _ _ = Some _), + H2: (_ = RTL.fn_code _), + H3: (wt_function _ _) |- _ => + let H := fresh in + let R := fresh "WTI" in ( + generalize (wt_instrs _ _ H3); intro H; + rewrite <- H2 in H; generalize (H _ _ H1); + intro R; clear H; clear H3); + CleanupHyps + | _ => idtac + end. + +Ltac CleanupGoal := + match goal with + | H1: (PTree.get _ _ = Some _) |- _ => + eapply exec_blocks_one; + [rewrite PTree.gmap; rewrite H1; + unfold option_map; unfold transf_instr; reflexivity + |idtac] + end. + +Lemma transl_Inop_correct: + forall (c : PTree.t instruction) (sp: val) (pc : positive) + (rs : regset) (m : mem) (pc' : RTL.node), + c ! pc = Some (Inop pc') -> + exec_instr_prop c sp pc rs m pc' rs m. +Proof. + intros; red; intros; CleanupHyps. + exists ls. split. + CleanupGoal. apply exec_Bgoto. apply exec_refl. + assumption. +Qed. + +Lemma transl_Iop_correct: + forall (c : PTree.t instruction) (sp: val) (pc : positive) + (rs : Regmap.t val) (m : mem) (op : operation) (args : list reg) + (res : reg) (pc' : RTL.node) (v: val), + c ! pc = Some (Iop op args res pc') -> + eval_operation ge sp op (rs ## args) = Some v -> + exec_instr_prop c sp pc rs m pc' (rs # res <- v) m. +Proof. + intros; red; intros; CleanupHyps. + caseEq (Regset.mem res live!!pc); intro LV; + rewrite LV in AG. + assert (LL: (List.length (List.map assign args) <= 3)%nat). + rewrite list_length_map. + inversion WTI. simpl; omega. simpl; omega. + eapply length_op_args. eauto. + assert (DISJ: Loc.disjoint (List.map assign args) temporaries). + eapply regalloc_disj_temporaries; eauto. + assert (eval_operation tge sp op (map ls (map assign args)) = Some v). + replace (map ls (map assign args)) with rs##args. + rewrite (eval_operation_preserved symbols_preserved). assumption. + symmetry. eapply agree_eval_regs; eauto. + generalize (add_op_correct tge sp op + (List.map assign args) (assign res) + pc' ls m v LL DISJ H1). + intros [ls' [EX [RES OTHER]]]. + exists ls'. split. + CleanupGoal. rewrite LV. exact EX. + rewrite CF in H. + generalize (regalloc_correct_1 f env live _ _ _ _ ASG H). + unfold correct_alloc_instr. + caseEq (is_move_operation op args). + (* Special case for moves *) + intros arg IMO CORR. + generalize (is_move_operation_correct _ _ IMO). + intros [EQ1 EQ2]. subst op; subst args. + injection H0; intro. rewrite <- H2. + apply agree_move_live with f env live ls; auto. + rewrite RES. rewrite <- H2. symmetry. eapply agree_eval_reg. + simpl in AG. eexact AG. + (* Not a move *) + intros INMO CORR. + apply agree_assign_live with f env live ls; auto. + eapply agree_reg_list_live; eauto. + (* Result is not live, instruction turned into a nop *) + exists ls. split. + CleanupGoal. rewrite LV. + apply exec_Bgoto; apply exec_refl. + apply agree_assign_dead; assumption. +Qed. + +Lemma transl_Iload_correct: + forall (c : PTree.t instruction) (sp: val) (pc : positive) + (rs : Regmap.t val) (m : mem) (chunk : memory_chunk) + (addr : addressing) (args : list reg) (dst : reg) (pc' : RTL.node) + (a v : val), + c ! pc = Some (Iload chunk addr args dst pc') -> + eval_addressing ge sp addr rs ## args = Some a -> + loadv chunk m a = Some v -> + exec_instr_prop c sp pc rs m pc' rs # dst <- v m. +Proof. + intros; red; intros; CleanupHyps. + caseEq (Regset.mem dst live!!pc); intro LV; + rewrite LV in AG. + (* dst is live *) + assert (LL: (List.length (List.map assign args) <= 2)%nat). + rewrite list_length_map. + inversion WTI. + eapply length_addr_args. eauto. + assert (DISJ: Loc.disjoint (List.map assign args) temporaries). + eapply regalloc_disj_temporaries; eauto. + assert (EADDR: + eval_addressing tge sp addr (map ls (map assign args)) = Some a). + rewrite <- H0. + replace (rs##args) with (map ls (map assign args)). + apply eval_addressing_preserved. exact symbols_preserved. + eapply agree_eval_regs; eauto. + generalize (add_load_correct tge sp chunk addr + (List.map assign args) (assign dst) + pc' ls m _ _ LL DISJ EADDR H1). + intros [ls' [EX [RES OTHER]]]. + exists ls'. split. CleanupGoal. rewrite LV. exact EX. + rewrite CF in H. + generalize (regalloc_correct_1 f env live _ _ _ _ ASG H). + unfold correct_alloc_instr. intro CORR. + eapply agree_assign_live; eauto. + eapply agree_reg_list_live; eauto. + (* dst is dead *) + exists ls. split. + CleanupGoal. rewrite LV. + apply exec_Bgoto; apply exec_refl. + apply agree_assign_dead; auto. +Qed. + +Lemma transl_Istore_correct: + forall (c : PTree.t instruction) (sp: val) (pc : positive) + (rs : Regmap.t val) (m : mem) (chunk : memory_chunk) + (addr : addressing) (args : list reg) (src : reg) (pc' : RTL.node) + (a : val) (m' : mem), + c ! pc = Some (Istore chunk addr args src pc') -> + eval_addressing ge sp addr rs ## args = Some a -> + storev chunk m a rs # src = Some m' -> + exec_instr_prop c sp pc rs m pc' rs m'. +Proof. + intros; red; intros; CleanupHyps. + assert (LL: (List.length (List.map assign args) <= 2)%nat). + rewrite list_length_map. + inversion WTI. + eapply length_addr_args. eauto. + assert (DISJ: Loc.disjoint (List.map assign args) temporaries). + eapply regalloc_disj_temporaries; eauto. + assert (SRC: Loc.notin (assign src) temporaries). + eapply regalloc_not_temporary; eauto. + assert (EADDR: + eval_addressing tge sp addr (map ls (map assign args)) = Some a). + rewrite <- H0. + replace (rs##args) with (map ls (map assign args)). + apply eval_addressing_preserved. exact symbols_preserved. + eapply agree_eval_regs; eauto. + assert (ESRC: ls (assign src) = rs#src). + eapply agree_eval_reg. eapply agree_reg_list_live. eauto. + rewrite <- ESRC in H1. + generalize (add_store_correct tge sp chunk addr + (List.map assign args) (assign src) + pc' ls m m' a LL DISJ SRC EADDR H1). + intros [ls' [EX RES]]. + exists ls'. split. CleanupGoal. exact EX. + rewrite CF in H. + generalize (regalloc_correct_1 f env live _ _ _ _ ASG H). + unfold correct_alloc_instr. intro CORR. + eapply agree_exten. eauto. + eapply agree_reg_live. eapply agree_reg_list_live. eauto. + assumption. +Qed. + +Lemma transl_Icall_correct: + forall (c : PTree.t instruction) (sp: val) (pc : positive) + (rs : regset) (m : mem) (sig : signature) (ros : reg + ident) + (args : list reg) (res : reg) (pc' : RTL.node) + (f : RTL.function) (vres : val) (m' : mem), + c ! pc = Some (Icall sig ros args res pc') -> + RTL.find_function ge ros rs = Some f -> + sig = RTL.fn_sig f -> + RTL.exec_function ge f (rs##args) m vres m' -> + exec_function_prop f (rs##args) m vres m' -> + exec_instr_prop c sp pc rs m pc' (rs#res <- vres) m'. +Proof. + intros; red; intros; CleanupHyps. + set (los := sum_left_map assign ros). + assert (FIND: exists tf, + find_function2 tge los ls = Some tf /\ + transf_function f = Some tf). + unfold los. destruct ros; simpl; simpl in H0. + apply functions_translated. + replace (ls (assign r)) with rs#r. assumption. + simpl in AG. symmetry; eapply agree_eval_reg. + eapply agree_reg_list_live; eauto. + rewrite symbols_preserved. destruct (Genv.find_symbol ge i). + apply function_ptr_translated. auto. + discriminate. + elim FIND; intros tf [AFIND TRF]; clear FIND. + assert (ASIG: sig = fn_sig tf). + rewrite (sig_function_translated _ _ TRF). auto. + generalize (fun ls => H3 ls tf TRF); intro AEXECF. + assert (AVARGS: List.map ls (List.map assign args) = rs##args). + eapply agree_eval_regs; eauto. + assert (ALARGS: List.length (List.map assign args) = + List.length sig.(sig_args)). + inversion WTI. rewrite <- H10. + repeat rewrite list_length_map. auto. + assert (AACCEPT: locs_acceptable (List.map assign args)). + eapply regsalloc_acceptable; eauto. + rewrite CF in H. + generalize (regalloc_correct_1 f0 env live _ _ _ _ ASG H). + unfold correct_alloc_instr. intros [CORR1 CORR2]. + assert (ARES: loc_acceptable (assign res)). + eapply regalloc_acceptable; eauto. + generalize (add_call_correct tge sp tf _ _ _ _ _ _ _ _ pc' _ + AEXECF AFIND ASIG AVARGS ALARGS + AACCEPT ARES). + intros [ls' [EX [RES OTHER]]]. + exists ls'. + split. rewrite CF. CleanupGoal. exact EX. + simpl. eapply agree_call; eauto. +Qed. + +Lemma transl_Icond_true_correct: + forall (c : PTree.t instruction) (sp: val) (pc : positive) + (rs : Regmap.t val) (m : mem) (cond : condition) (args : list reg) + (ifso ifnot : RTL.node), + c ! pc = Some (Icond cond args ifso ifnot) -> + eval_condition cond rs ## args = Some true -> + exec_instr_prop c sp pc rs m ifso rs m. +Proof. + intros; red; intros; CleanupHyps. + assert (LL: (List.length (map assign args) <= 3)%nat). + rewrite list_length_map. inversion WTI. + eapply length_cond_args. eauto. + assert (DISJ: Loc.disjoint (map assign args) temporaries). + eapply regalloc_disj_temporaries; eauto. + assert (COND: eval_condition cond (map ls (map assign args)) = Some true). + replace (map ls (map assign args)) with rs##args. assumption. + symmetry. eapply agree_eval_regs; eauto. + generalize (add_cond_correct tge sp _ _ _ ifnot _ m _ _ + LL DISJ COND (refl_equal ifso)). + intros [ls' [EX OTHER]]. + exists ls'. split. + CleanupGoal. assumption. + eapply agree_exten. eauto. eapply agree_reg_list_live. eauto. + assumption. +Qed. + +Lemma transl_Icond_false_correct: + forall (c : PTree.t instruction) (sp: val) (pc : positive) + (rs : Regmap.t val) (m : mem) (cond : condition) (args : list reg) + (ifso ifnot : RTL.node), + c ! pc = Some (Icond cond args ifso ifnot) -> + eval_condition cond rs ## args = Some false -> + exec_instr_prop c sp pc rs m ifnot rs m. +Proof. + intros; red; intros; CleanupHyps. + assert (LL: (List.length (map assign args) <= 3)%nat). + rewrite list_length_map. inversion WTI. + eapply length_cond_args. eauto. + assert (DISJ: Loc.disjoint (map assign args) temporaries). + eapply regalloc_disj_temporaries; eauto. + assert (COND: eval_condition cond (map ls (map assign args)) = Some false). + replace (map ls (map assign args)) with rs##args. assumption. + symmetry. eapply agree_eval_regs; eauto. + generalize (add_cond_correct tge sp _ _ ifso _ _ m _ _ + LL DISJ COND (refl_equal ifnot)). + intros [ls' [EX OTHER]]. + exists ls'. split. + CleanupGoal. assumption. + eapply agree_exten. eauto. eapply agree_reg_list_live. eauto. + assumption. +Qed. + +Lemma transl_refl_correct: + forall (c : RTL.code) (sp: val) (pc : RTL.node) (rs : regset) + (m : mem), exec_instrs_prop c sp pc rs m pc rs m. +Proof. + intros; red; intros. + exists ls. split. apply exec_blocks_refl. assumption. +Qed. + +Lemma transl_one_correct: + forall (c : RTL.code) (sp: val) (pc : RTL.node) (rs : regset) + (m : mem) (pc' : RTL.node) (rs' : regset) (m' : mem), + RTL.exec_instr ge c sp pc rs m pc' rs' m' -> + exec_instr_prop c sp pc rs m pc' rs' m' -> + exec_instrs_prop c sp pc rs m pc' rs' m'. +Proof. + intros; red; intros. + generalize (H0 f env live assign ls CF WT ASG AG). + intros [ls' [EX AG']]. + exists ls'. split. + exact EX. + apply agree_increasing with live!!pc. + apply analyze_correct. auto. + rewrite <- CF. eapply exec_instr_present; eauto. + rewrite <- CF. auto. + eapply RTL.successors_correct. + rewrite <- CF. eexact H. exact AG'. +Qed. + +Lemma transl_trans_correct: + forall (c : RTL.code) (sp: val) (pc1 : RTL.node) (rs1 : regset) + (m1 : mem) (pc2 : RTL.node) (rs2 : regset) (m2 : mem) + (pc3 : RTL.node) (rs3 : regset) (m3 : mem), + RTL.exec_instrs ge c sp pc1 rs1 m1 pc2 rs2 m2 -> + exec_instrs_prop c sp pc1 rs1 m1 pc2 rs2 m2 -> + RTL.exec_instrs ge c sp pc2 rs2 m2 pc3 rs3 m3 -> + exec_instrs_prop c sp pc2 rs2 m2 pc3 rs3 m3 -> + exec_instrs_prop c sp pc1 rs1 m1 pc3 rs3 m3. +Proof. + intros; red; intros. + assert (VALIDPC2: c!pc2 <> None). + eapply exec_instrs_present; eauto. + generalize (H0 f env live assign ls CF WT ANL ASG AG VALIDPC2). + intros [ls1 [EX1 AG1]]. + generalize (H2 f env live assign ls1 CF WT ANL ASG AG1 VALIDPC'). + intros [ls2 [EX2 AG2]]. + exists ls2. split. + eapply exec_blocks_trans. eexact EX1. exact EX2. + exact AG2. +Qed. + +Remark regset_mem_reg_list_dead: + forall rl r live, + Regset.mem r (reg_list_dead rl live) = true -> + ~(In r rl) /\ Regset.mem r live = true. +Proof. + induction rl; simpl; intros. + tauto. + elim (IHrl r (reg_dead a live) H). intros. + assert (a <> r). red; intro; subst r. + rewrite Regset.mem_remove_same in H1. discriminate. + rewrite Regset.mem_remove_other in H1; auto. + tauto. +Qed. + +Lemma transf_entrypoint_correct: + forall f env live assign c ls args sp m, + wt_function env f -> + regalloc f live (live0 f live) env = Some assign -> + c!(RTL.fn_nextpc f) = None -> + List.map ls (loc_parameters (RTL.fn_sig f)) = args -> + (forall ofs ty, ls (S (Local ofs ty)) = Vundef) -> + let tc := transf_entrypoint f live assign c in + exists ls', + exec_blocks tge tc sp (RTL.fn_nextpc f) ls m + (Cont (RTL.fn_entrypoint f)) ls' m /\ + agree assign (transfer f (RTL.fn_entrypoint f) live!!(RTL.fn_entrypoint f)) + (init_regs args (RTL.fn_params f)) ls'. +Proof. + intros until m. + unfold transf_entrypoint. + set (oldentry := RTL.fn_entrypoint f). + set (newentry := RTL.fn_nextpc f). + set (params := RTL.fn_params f). + set (undefs := Regset.elements (reg_list_dead params (transfer f oldentry live!!oldentry))). + intros. + + assert (A1: List.length (List.map assign params) = + List.length (RTL.fn_sig f).(sig_args)). + rewrite <- (wt_params _ _ H). + repeat (rewrite list_length_map). auto. + assert (A2: Loc.norepet (List.map assign (RTL.fn_params f))). + eapply regalloc_norepet_norepet; eauto. + eapply regalloc_correct_2; eauto. + eapply wt_norepet; eauto. + assert (A3: locs_acceptable (List.map assign (RTL.fn_params f))). + eapply regsalloc_acceptable; eauto. + assert (A4: Loc.disjoint + (List.map assign (RTL.fn_params f)) + (List.map assign undefs)). + red. intros ap au INAP INAU. + generalize (list_in_map_inv _ _ _ INAP). + intros [p [AP INP]]. clear INAP; subst ap. + generalize (list_in_map_inv _ _ _ INAU). + intros [u [AU INU]]. clear INAU; subst au. + generalize (Regset.elements_complete _ _ INU). intro. + generalize (regset_mem_reg_list_dead _ _ _ H4). + intros [A B]. + eapply regalloc_noteq_diff; eauto. + eapply regalloc_correct_3; eauto. + red; intro; subst u. elim (A INP). + assert (A5: forall l, In l (List.map assign undefs) -> loc_acceptable l). + intros. + generalize (list_in_map_inv _ _ _ H4). + intros [r [AR INR]]. clear H4; subst l. + eapply regalloc_acceptable; eauto. + generalize (add_entry_correct + tge sp (RTL.fn_sig f) + (List.map assign (RTL.fn_params f)) + (List.map assign undefs) + oldentry ls m A1 A2 A3 A4 A5 H3). + intros [ls1 [EX1 [PARAMS1 UNDEFS1]]]. + exists ls1. + split. eapply exec_blocks_one. + rewrite PTree.gss. reflexivity. + assumption. + change (transfer f oldentry live!!oldentry) + with (live0 f live). + unfold params; eapply agree_parameters; eauto. + change Regset.elt with reg in PARAMS1. + rewrite PARAMS1. assumption. + fold oldentry; fold params. intros. + apply UNDEFS1. apply in_map. + unfold undefs; apply Regset.elements_correct; auto. +Qed. + +Lemma transl_function_correct: + forall (f : RTL.function) (m m1 : mem) (stk : Values.block) + (args : list val) (pc : RTL.node) (rs : regset) (m2 : mem) + (or : option reg) (vres : val), + alloc m 0 (RTL.fn_stacksize f) = (m1, stk) -> + RTL.exec_instrs ge (RTL.fn_code f) (Vptr stk Int.zero) + (RTL.fn_entrypoint f) (init_regs args (fn_params f)) m1 pc rs m2 -> + exec_instrs_prop (RTL.fn_code f) (Vptr stk Int.zero) + (RTL.fn_entrypoint f) (init_regs args (fn_params f)) m1 pc rs m2 -> + (RTL.fn_code f) ! pc = Some (Ireturn or) -> + vres = regmap_optget or Vundef rs -> + exec_function_prop f args m vres (free m2 stk). +Proof. + intros; red; intros until tf. + unfold transf_function. + caseEq (type_rtl_function f). + intros env TRF. + caseEq (analyze f). + intros live ANL. + change (transfer f (RTL.fn_entrypoint f) live!!(RTL.fn_entrypoint f)) + with (live0 f live). + caseEq (regalloc f live (live0 f live) env). + intros alloc ASG. + set (tc1 := PTree.map (transf_instr f live alloc) (RTL.fn_code f)). + set (tc2 := transf_entrypoint f live alloc tc1). + intro EQ; injection EQ; intro TF; clear EQ. intro VARGS. + generalize (type_rtl_function_correct _ _ TRF); intro WTF. + assert (NEWINSTR: tc1!(RTL.fn_nextpc f) = None). + unfold tc1; rewrite PTree.gmap. unfold option_map. + elim (RTL.fn_code_wf f (fn_nextpc f)); intro. + elim (Plt_ne _ _ H4). auto. + rewrite H4. auto. + pose (ls1 := call_regs ls). + assert (VARGS1: List.map ls1 (loc_parameters (RTL.fn_sig f)) = args). + replace (RTL.fn_sig f) with (fn_sig tf). + rewrite <- VARGS. unfold loc_parameters. + rewrite list_map_compose. apply list_map_exten. + intros. symmetry. unfold ls1. eapply call_regs_param_of_arg; eauto. + rewrite <- TF; reflexivity. + assert (VUNDEFS: forall (ofs : Z) (ty : typ), ls1 (S (Local ofs ty)) = Vundef). + intros. reflexivity. + generalize (transf_entrypoint_correct f env live alloc + tc1 ls1 args (Vptr stk Int.zero) m1 + WTF ASG NEWINSTR VARGS1 VUNDEFS). + fold tc2. intros [ls2 [EX2 AGREE2]]. + assert (VALIDPC: f.(RTL.fn_code)!pc <> None). congruence. + generalize (H1 f env live alloc ls2 + (refl_equal _) WTF ANL ASG AGREE2 VALIDPC). + fold tc1. intros [ls3 [EX3 AGREE3]]. + generalize (add_return_correct tge (Vptr stk Int.zero) (RTL.fn_sig f) + (option_map alloc or) ls3 m2). + intros [ls4 [EX4 RES4]]. + exists ls4. + (* Execution *) + split. apply exec_funct with m1. + rewrite <- TF; simpl; assumption. + rewrite <- TF; simpl. fold ls1. + eapply exec_blocks_trans. eexact EX2. + apply exec_blocks_extends with tc1. + intro p. unfold tc2. unfold transf_entrypoint. + case (peq p (fn_nextpc f)); intro. + subst p. right. unfold tc1; rewrite PTree.gmap. + elim (RTL.fn_code_wf f (fn_nextpc f)); intro. + elim (Plt_ne _ _ H4); auto. rewrite H4; reflexivity. + left; apply PTree.gso; auto. + eapply exec_blocks_trans. eexact EX3. + eapply exec_blocks_one. + unfold tc1. rewrite PTree.gmap. rewrite H2. simpl. reflexivity. + exact EX4. + destruct or. + simpl in RES4. simpl in H3. + rewrite H3. rewrite <- TF; simpl. rewrite RES4. + eapply agree_eval_reg; eauto. + unfold transfer in AGREE3; rewrite H2 in AGREE3. + unfold reg_option_live in AGREE3. eexact AGREE3. + simpl in RES4. simpl in H3. + rewrite <- TF; simpl. congruence. + intros; discriminate. + intros; discriminate. + intros; discriminate. +Qed. + +(** The correctness of the code transformation is obtained by + structural induction (and case analysis on the last rule used) + on the RTL evaluation derivation. + This is a 3-way mutual induction, using [exec_instr_prop], + [exec_instrs_prop] and [exec_function_prop] as the induction + hypotheses, and the lemmas above as cases for the induction. *) + +Scheme exec_instr_ind_3 := Minimality for RTL.exec_instr Sort Prop + with exec_instrs_ind_3 := Minimality for RTL.exec_instrs Sort Prop + with exec_function_ind_3 := Minimality for RTL.exec_function Sort Prop. + +Theorem transl_function_correctness: + forall f args m res m', + RTL.exec_function ge f args m res m' -> + exec_function_prop f args m res m'. +Proof + (exec_function_ind_3 ge + exec_instr_prop + exec_instrs_prop + exec_function_prop + + transl_Inop_correct + transl_Iop_correct + transl_Iload_correct + transl_Istore_correct + transl_Icall_correct + transl_Icond_true_correct + transl_Icond_false_correct + + transl_refl_correct + transl_one_correct + transl_trans_correct + + transl_function_correct). + +(** The semantic equivalence between the original and transformed programs + follows easily. *) + +Theorem transl_program_correct: + forall (r: val), + RTL.exec_program prog r -> LTL.exec_program tprog r. +Proof. + intros r [b [f [m [FIND1 [FIND2 [SIG EXEC]]]]]]. + generalize (function_ptr_translated _ _ FIND2). + intros [tf [TFIND TRF]]. + assert (SIG2: tf.(fn_sig) = mksignature nil (Some Tint)). + rewrite <- SIG. apply sig_function_translated; auto. + assert (VPARAMS: map (Locmap.init Vundef) (loc_arguments (fn_sig tf)) = nil). + rewrite SIG2. reflexivity. + generalize (transl_function_correctness _ _ _ _ _ EXEC + (Locmap.init Vundef) tf TRF VPARAMS). + intros [ls' [TEXEC RES]]. + red. exists b; exists tf; exists ls'; exists m. + split. rewrite symbols_preserved. + rewrite (transform_partial_program_main _ _ TRANSF). + assumption. + split. assumption. + split. assumption. + split. replace (Genv.init_mem tprog) with (Genv.init_mem prog). + assumption. symmetry. + exact (Genv.init_mem_transf_partial _ _ TRANSF). + assumption. +Qed. + +End PRESERVATION. diff --git a/backend/Allocproof_aux.v b/backend/Allocproof_aux.v new file mode 100644 index 00000000..d1b213e2 --- /dev/null +++ b/backend/Allocproof_aux.v @@ -0,0 +1,850 @@ +(** Correctness results for the [parallel_move] function defined in + file [Allocation]. + + The present file, contributed by Laurence Rideau, glues the general + results over the parallel move algorithm (see file [Parallelmove]) + with the correctness proof for register allocation (file [Allocproof]). +*) + +Require Import Coqlib. +Require Import Values. +Require Import Parallelmove. +Require Import Allocation. +Require Import LTL. +Require Import Locations. +Require Import Conventions. + +Section parallel_move_correction. +Variable ge : LTL.genv. +Variable sp : val. +Hypothesis + add_move_correct : + forall src dst k rs m, + (exists rs' , + exec_instrs ge sp (add_move src dst k) rs m k rs' m /\ + (rs' dst = rs src /\ + (forall l, + Loc.diff l dst -> + Loc.diff l (R IT1) -> Loc.diff l (R FT1) -> rs' l = rs l)) ). + +Lemma exec_instr_update: + forall a1 a2 k e m, + (exists rs' , + exec_instrs ge sp (add_move a1 a2 k) e m k rs' m /\ + (rs' a2 = update e a2 (e a1) a2 /\ + (forall l, + Loc.diff l a2 -> + Loc.diff l (R IT1) -> Loc.diff l (R FT1) -> rs' l = (update e a2 (e a1)) l)) + ). +Proof. +intros; destruct (add_move_correct a1 a2 k e m) as [rs' [Hf [R1 R2]]]. +exists rs'; (repeat split); auto. +generalize (get_update_id e a2); unfold get, Locmap.get; intros H; rewrite H; + auto. +intros l H0; generalize (get_update_diff e a2); unfold get, Locmap.get; + intros H; rewrite H; auto. +apply Loc.diff_sym; assumption. +Qed. + +Lemma map_inv: + forall (A B : Set) (f f' : A -> B) l, + map f l = map f' l -> forall x, In x l -> f x = f' x. +Proof. +induction l; simpl; intros Hmap x Hin. +elim Hin. +inversion Hmap. +elim Hin; intros H; [subst a | apply IHl]; auto. +Qed. + +Fixpoint reswellFormed (p : Moves) : Prop := + match p with + nil => True + | (s, d) :: l => Loc.notin s (getdst l) /\ reswellFormed l + end. + +Definition temporaries1 := R IT1 :: (R FT1 :: nil). + +Lemma no_overlap_list_pop: + forall p m, no_overlap_list (m :: p) -> no_overlap_list p. +Proof. +induction p; unfold no_overlap_list, no_overlap; destruct m as [m1 m2]; simpl; + auto. +Qed. + +Lemma exec_instrs_pmov: + forall p k rs m, + no_overlap_list p -> + Loc.disjoint (getdst p) temporaries1 -> + Loc.disjoint (getsrc p) temporaries1 -> + (exists rs' , + exec_instrs + ge sp + (fold_left + (fun (k0 : block) => fun (p0 : loc * loc) => add_move (fst p0) (snd p0) k0) + p k) rs m k rs' m /\ + (forall l, + (forall r, In r (getdst p) -> r = l \/ Loc.diff r l) -> + Loc.diff l (Locations.R IT1) -> + Loc.diff l (Locations.R FT1) -> rs' l = (sexec p rs) l) ). +Proof. +induction p; intros k rs m. +simpl; exists rs; (repeat split); auto; apply exec_refl. +destruct a as [a1 a2]; simpl; intros Hno_O Hdisd Hdiss; + elim (IHp (add_move a1 a2 k) rs m); + [idtac | apply no_overlap_list_pop with (a1, a2) | + apply (Loc.disjoint_cons_left a2) | apply (Loc.disjoint_cons_left a1)]; + (try assumption). +intros rs' Hexec; + destruct (add_move_correct a1 a2 k rs' m) as [rs'' [Hexec2 [R1 R2]]]. +exists rs''; elim Hexec; intros; (repeat split). +apply exec_trans with ( b2 := add_move a1 a2 k ) ( rs2 := rs' ) ( m2 := m ); + auto. +intros l Heqd Hdi Hdf; case (Loc.eq a2 l); intro. +subst l; generalize get_update_id; unfold get, Locmap.get; intros Hgui; + rewrite Hgui; rewrite R1. +apply H0; auto. +unfold no_overlap_list, no_overlap in Hno_O |-; simpl in Hno_O |-; intros; + generalize (Hno_O a1). +intros H8; elim H8 with ( s := r ); + [intros H9; left | intros H9; right; apply Loc.diff_sym | left | right]; auto. +unfold Loc.disjoint in Hdiss |-; apply Hdiss; simpl; left; trivial. +apply Hdiss; simpl; [left | right; left]; auto. +elim (Heqd a2); + [intros H7; absurd (a2 = l); auto | intros H7; auto | left; trivial]. +generalize get_update_diff; unfold get, Locmap.get; intros H6; rewrite H6; auto. +rewrite R2; auto. +apply Loc.diff_sym; auto. +Qed. + +Definition p_move := + fun (l : list (loc * loc)) => + fun (k : block) => + fold_left + (fun (k0 : block) => fun (p : loc * loc) => add_move (fst p) (snd p) k0) + (P_move l) k. + +Lemma norepet_SD: forall p, Loc.norepet (getdst p) -> simpleDest p. +Proof. +induction p; (simpl; auto). +destruct a as [a1 a2]. +intro; inversion H. +split. +apply notindst_nW; auto. +apply IHp; auto. +Qed. + +Theorem SDone_stepf: + forall S1, StateDone (stepf S1) = nil -> StateDone S1 = nil. +Proof. +destruct S1 as [[t b] d]; destruct t. +destruct b; auto. +destruct m as [m1 m2]; simpl. +destruct b. +simpl; intro; discriminate. +case (Loc.eq m2 (fst (last (m :: b)))); simpl; intros; discriminate. +destruct m as [a1 a2]; simpl. +destruct b. +case (Loc.eq a1 a2); simpl; intros; auto. +destruct m as [m1 m2]; case (Loc.eq a1 m2); intro; (try (simpl; auto; fail)). +case (split_move t m2). +(repeat destruct p); simpl; intros; auto. +destruct b; (try (simpl; intro; discriminate)); auto. +case (Loc.eq m2 (fst (last (m :: b)))); intro; simpl; intro; discriminate. +Qed. + +Theorem SDone_Pmov: forall S1, StateDone (Pmov S1) = nil -> StateDone S1 = nil. +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1; intros Hrec. +destruct S1 as [[t b] d]. +rewrite Pmov_equation. +destruct t. +destruct b; auto. +intro; assert (StateDone (stepf (nil, m :: b, d)) = nil); + [apply Hrec; auto; apply stepf1_dec | apply SDone_stepf]; auto. +intro; assert (StateDone (stepf (m :: t, b, d)) = nil); + [apply Hrec; auto; apply stepf1_dec | apply SDone_stepf]; auto. +Qed. + +Lemma no_overlap_temp: forall r s, In s temporaries -> r = s \/ Loc.diff r s. +Proof. +intros r s H; case (Loc.eq r s); intros e; [left | right]; (try assumption). +unfold Loc.diff; destruct r. +destruct s; trivial. +red; intro; elim e; rewrite H0; auto. +destruct s0; destruct s; trivial; + (elim H; [intros H1 | intros [H1|[H1|[H1|[H1|[H1|H1]]]]]]; (try discriminate); + inversion H1). +Qed. + +Lemma getsrcdst_app: + forall l1 l2, + getdst l1 ++ getdst l2 = getsrc l1 ++ getsrc l2 -> + getdst l1 = getsrc l1 /\ getdst l2 = getsrc l2. +Proof. +induction l1; simpl; auto. +destruct a as [a1 a2]; intros; inversion H. +subst a1; inversion H; + (elim IHl1 with ( l2 := l2 ); + [intros H0 H3; (try clear IHl1); (try exact H0) | idtac]; auto). +rewrite H0; auto. +Qed. + +Lemma In_norepet: + forall r x l, Loc.norepet l -> In x l -> In r l -> r = x \/ Loc.diff r x. +Proof. +induction l; simpl; intros. +elim H1. +inversion H. +subst hd. +elim H1; intros H2; clear H1; elim H0; intros H1. +rewrite <- H1; rewrite <- H2; auto. +subst a; right; apply Loc.in_notin_diff with l; auto. +subst a; right; apply Loc.diff_sym; apply Loc.in_notin_diff with l; auto. +apply IHl; auto. +Qed. + +Definition no_overlap_stateD (S : State) := + no_overlap + (getsrc (StateToMove S ++ (StateBeing S ++ StateDone S))) + (getdst (StateToMove S ++ (StateBeing S ++ StateDone S))). + +Ltac +incl_tac_rec := +(auto with datatypes; fail) + || + (apply in_cons; incl_tac_rec) + || + (apply in_or_app; left; incl_tac_rec) + || + (apply in_or_app; right; incl_tac_rec) + || + (apply incl_appl; incl_tac_rec) || + (apply incl_appr; incl_tac_rec) || (apply incl_tl; incl_tac_rec). + +Ltac incl_tac := (repeat (apply incl_cons || apply incl_app)); incl_tac_rec. + +Ltac +in_tac := +match goal with +| |- In ?x ?L1 => +match goal with +| H:In x ?L2 |- _ => +let H1 := fresh "H" in +(assert (H1: incl L2 L1); [incl_tac | apply (H1 x)]); auto; fail +| _ => fail end end. + +Lemma in_cons_noteq: + forall (A : Set) (a b : A) (l : list A), In a (b :: l) -> a <> b -> In a l. +Proof. +intros A a b l; simpl; intros. +elim H; intros H1; (try assumption). +absurd (a = b); auto. +Qed. + +Lemma no_overlapD_inv: + forall S1 S2, step S1 S2 -> no_overlap_stateD S1 -> no_overlap_stateD S2. +Proof. +intros S1 S2 STEP; inversion STEP; unfold no_overlap_stateD, no_overlap; simpl; + auto; (repeat (rewrite getsrc_app; rewrite getdst_app; simpl)); intros. +apply H1; in_tac. +destruct m as [m1 m2]; apply H1; in_tac. +apply H1; in_tac. +case (Loc.eq r (T r0)); intros e. +elim (no_overlap_temp s0 r); + [intro; left; auto | intro; right; apply Loc.diff_sym; auto | unfold T in e |-]. +destruct (Loc.type r0); simpl; [right; left | right; right; right; right; left]; + auto. +case (Loc.eq s0 (T r0)); intros es. +apply (no_overlap_temp r s0); unfold T in es |-; destruct (Loc.type r0); simpl; + [right; left | right; right; right; right; left]; auto. +apply H1; apply in_cons_noteq with ( b := T r0 ); auto; in_tac. +apply H3; in_tac. +Qed. + +Lemma no_overlapD_invpp: + forall S1 S2, stepp S1 S2 -> no_overlap_stateD S1 -> no_overlap_stateD S2. +Proof. +intros; induction H as [r|r1 r2 r3 H H1 HrecH]; auto. +apply HrecH; auto. +apply no_overlapD_inv with r1; auto. +Qed. + +Lemma no_overlapD_invf: + forall S1, stepInv S1 -> no_overlap_stateD S1 -> no_overlap_stateD (stepf S1). +Proof. +intros; destruct S1 as [[t1 b1] d1]. +destruct t1; destruct b1; auto. +set (S1:=(nil (A:=Move), m :: b1, d1)). +apply (no_overlapD_invpp S1); [apply dstep_step; auto | assumption]. +apply f2ind; [unfold S1 | idtac | idtac]; auto. +generalize H0; clear H0; unfold no_overlap_stateD; destruct m as [m1 m2]. +intros; apply no_overlap_noOverlap. +unfold no_overlap_state; simpl. +generalize H0; clear H0; unfold no_overlap; (repeat rewrite getdst_app); + (repeat rewrite getsrc_app); simpl; intros. +apply H0. +elim H1; intros H4; [left; assumption | right; in_tac]. +elim H2; intros H4; [left; assumption | right; in_tac]. +set (S1:=(m :: t1, nil (A:=Move), d1)). +apply (no_overlapD_invpp S1); [apply dstep_step; auto | assumption]. +apply f2ind; [unfold S1 | idtac | idtac]; auto. +generalize H0; clear H0; unfold no_overlap_stateD; destruct m as [m1 m2]. +intros; apply no_overlap_noOverlap. +unfold no_overlap_state; simpl. +generalize H0; clear H0; unfold no_overlap; (repeat rewrite getdst_app); + (repeat rewrite getsrc_app); simpl; (repeat rewrite app_nil); intros. +apply H0. +elim H1; intros H4; [left; assumption | right; (try in_tac)]. +elim H2; intros H4; [left; assumption | right; in_tac]. +set (S1:=(m :: t1, m0 :: b1, d1)). +apply (no_overlapD_invpp S1); [apply dstep_step; auto | assumption]. +apply f2ind; [unfold S1 | idtac | idtac]; auto. +generalize H0; clear H0; unfold no_overlap_stateD; destruct m as [m1 m2]. +intros; apply no_overlap_noOverlap. +unfold no_overlap_state; simpl. +generalize H0; clear H0; unfold no_overlap; (repeat rewrite getdst_app); + (repeat rewrite getsrc_app); destruct m0; simpl; intros. +apply H0. +elim H1; intros H4; [left; assumption | right; in_tac]. +elim H2; intros H4; [left; assumption | right; in_tac]. +Qed. + +Lemma no_overlapD_res: + forall S1, stepInv S1 -> no_overlap_stateD S1 -> no_overlap_stateD (Pmov S1). +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1 Hrec. +destruct S1 as [[t b] d]. +rewrite Pmov_equation. +destruct t; auto. +destruct b; auto. +intros; apply Hrec. +apply stepf1_dec; auto. +apply (dstep_inv (nil, m :: b, d)); auto. +apply f2ind'; auto. +apply no_overlap_noOverlap. +generalize H0; unfold no_overlap_stateD, no_overlap_state, no_overlap; simpl. +destruct m; (repeat rewrite getdst_app); (repeat rewrite getsrc_app). +intros H1 r1 H2 s H3; (try assumption). +apply H1; in_tac. +apply no_overlapD_invf; auto. +intros; apply Hrec. +apply stepf1_dec; auto. +apply (dstep_inv (m :: t, b, d)); auto. +apply f2ind'; auto. +apply no_overlap_noOverlap. +generalize H0; destruct m; + unfold no_overlap_stateD, no_overlap_state, no_overlap; simpl; + (repeat (rewrite getdst_app; simpl)); (repeat (rewrite getsrc_app; simpl)). +simpl; intros H1 r1 H2 s H3; (try assumption). +apply H1. +elim H2; intros H4; [left; (try assumption) | right; in_tac]. +elim H3; intros H4; [left; (try assumption) | right; in_tac]. +apply no_overlapD_invf; auto. +Qed. + +Definition temporaries1_3 := R IT1 :: (R FT1 :: (R IT3 :: nil)). + +Definition temporaries2 := R IT2 :: (R FT2 :: nil). + +Definition no_tmp13_state (S1 : State) := + Loc.disjoint (getsrc (StateDone S1)) temporaries1_3 /\ + Loc.disjoint (getdst (StateDone S1)) temporaries1_3. + +Definition Done_well_formed (S1 S2 : State) := + forall x, + (In x (getsrc (StateDone S2)) -> + In x temporaries2 \/ In x (getsrc (StateToMove S1 ++ StateBeing S1))) /\ + (In x (getdst (StateDone S2)) -> + In x temporaries2 \/ In x (getdst (StateToMove S1 ++ StateBeing S1))). + +Lemma Done_notmp3_inv: + forall S1 S2, + step S1 S2 -> + (forall x, + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT3)) -> + forall x, + In x (getdst (StateToMove S2 ++ (StateBeing S2 ++ StateDone S2))) -> + Loc.diff x (R IT3). +Proof. +intros S1 S2 STEP; inversion STEP; (try (simpl; trivial; fail)); + simpl StateDone; simpl StateToMove; simpl StateBeing; simpl getdst; + (repeat (rewrite getdst_app; simpl)); intros. +apply H1; in_tac. +destruct m; apply H1; in_tac. +apply H1; in_tac. +case (Loc.eq x (T r0)); intros e. +unfold T in e |-; destruct (Loc.type r0); rewrite e; simpl; red; intro; + discriminate. +apply H1; apply in_cons_noteq with ( b := T r0 ); auto; in_tac. +apply H3; in_tac. +Qed. + +Lemma Done_notmp3_invpp: + forall S1 S2, + stepp S1 S2 -> + (forall x, + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT3)) -> + forall x, + In x (getdst (StateToMove S2 ++ (StateBeing S2 ++ StateDone S2))) -> + Loc.diff x (R IT3). +Proof. +intros S1 S2 H H0; (try assumption). +induction H as [r|r1 r2 r3 H1 H2 Hrec]; auto. +apply Hrec; auto. +apply Done_notmp3_inv with r1; auto. +Qed. + +Lemma Done_notmp3_invf: + forall S1, + stepInv S1 -> + (forall x, + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT3)) -> + forall x, + In + x + (getdst + (StateToMove (stepf S1) ++ (StateBeing (stepf S1) ++ StateDone (stepf S1)))) -> + Loc.diff x (R IT3). +Proof. +intros S1 H H0; (try assumption). +generalize H; unfold stepInv; intros [Hpath [HSD [HnoO [Hnotmp HnotmpL]]]]. +destruct S1 as [[t1 b1] d1]; set (S1:=(t1, b1, d1)); destruct t1; destruct b1; + auto; apply (Done_notmp3_invpp S1); auto; apply dstep_step; auto; apply f2ind; + unfold S1; auto. +Qed. + +Lemma Done_notmp3_res: + forall S1, + stepInv S1 -> + (forall x, + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT3)) -> + forall x, + In + x + (getdst + (StateToMove (Pmov S1) ++ (StateBeing (Pmov S1) ++ StateDone (Pmov S1)))) -> + Loc.diff x (R IT3). +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1 Hrec. +destruct S1 as [[t b] d]; set (S1:=(t, b, d)). +unfold S1; rewrite Pmov_equation. +intros H; generalize H; intros [Hpath [HSD [HnoO [Htmp HtmpL]]]]. +destruct t; [destruct b; auto | idtac]; + (intro; apply Hrec; + [apply stepf1_dec | apply (dstep_inv S1); auto; apply f2ind' + | apply Done_notmp3_invf]; auto). +Qed. + +Lemma Done_notmp1_inv: + forall S1 S2, + step S1 S2 -> + (forall x, + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1)) -> + forall x, + In x (getdst (StateToMove S2 ++ (StateBeing S2 ++ StateDone S2))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1). +Proof. +intros S1 S2 STEP; inversion STEP; (try (simpl; trivial; fail)); + (repeat (rewrite getdst_app; simpl)); intros. +apply H1; in_tac. +destruct m; apply H1; in_tac. +apply H1; in_tac. +case (Loc.eq x (T r0)); intro. +rewrite e; unfold T; case (Loc.type r0); simpl; split; red; intro; discriminate. +apply H1; apply in_cons_noteq with ( b := T r0 ); (try assumption); in_tac. +apply H3; in_tac. +Qed. + +Lemma Done_notmp1_invpp: + forall S1 S2, + stepp S1 S2 -> + (forall x, + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1)) -> + forall x, + In x (getdst (StateToMove S2 ++ (StateBeing S2 ++ StateDone S2))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1). +Proof. +intros S1 S2 H H0; (try assumption). +induction H as [r|r1 r2 r3 H1 H2 Hrec]; auto. +apply Hrec; auto. +apply Done_notmp1_inv with r1; auto. +Qed. + +Lemma Done_notmp1_invf: + forall S1, + stepInv S1 -> + (forall x, + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1)) -> + forall x, + In + x + (getdst + (StateToMove (stepf S1) ++ (StateBeing (stepf S1) ++ StateDone (stepf S1)))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1). +Proof. +intros S1 H H0; (try assumption). +generalize H; unfold stepInv; intros [Hpath [HSD [HnoO [Hnotmp HnotmpL]]]]. +destruct S1 as [[t1 b1] d1]; set (S1:=(t1, b1, d1)); destruct t1; destruct b1; + auto; apply (Done_notmp1_invpp S1); auto; apply dstep_step; auto; apply f2ind; + unfold S1; auto. +Qed. + +Lemma Done_notmp1_res: + forall S1, + stepInv S1 -> + (forall x, + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1)) -> + forall x, + In + x + (getdst + (StateToMove (Pmov S1) ++ (StateBeing (Pmov S1) ++ StateDone (Pmov S1)))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1). +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1 Hrec. +destruct S1 as [[t b] d]; set (S1:=(t, b, d)). +intros H; generalize H; intros [Hpath [HSD [HnoO [Htmp HtmpL]]]]. +unfold S1; rewrite Pmov_equation. +destruct t; [destruct b; auto | idtac]; + (intro; apply Hrec; + [apply stepf1_dec | apply (dstep_inv S1); auto; apply f2ind' + | apply Done_notmp1_invf]; auto). +Qed. + +Lemma Done_notmp1src_inv: + forall S1 S2, + step S1 S2 -> + (forall x, + In x (getsrc (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1)) -> + forall x, + In x (getsrc (StateToMove S2 ++ (StateBeing S2 ++ StateDone S2))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1). +Proof. +intros S1 S2 STEP; inversion STEP; (try (simpl; trivial; fail)); + (repeat (rewrite getsrc_app; simpl)); intros. +apply H1; in_tac. +destruct m; apply H1; in_tac. +apply H1; in_tac. +case (Loc.eq x (T r0)); intro. +rewrite e; unfold T; case (Loc.type r0); simpl; split; red; intro; discriminate. +apply H1; apply in_cons_noteq with ( b := T r0 ); (try assumption); in_tac. +apply H3; in_tac. +Qed. + +Lemma Done_notmp1src_invpp: + forall S1 S2, + stepp S1 S2 -> + (forall x, + In x (getsrc (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1)) -> + forall x, + In x (getsrc (StateToMove S2 ++ (StateBeing S2 ++ StateDone S2))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1). +Proof. +intros S1 S2 H H0; (try assumption). +induction H as [r|r1 r2 r3 H1 H2 Hrec]; auto. +apply Hrec; auto. +apply Done_notmp1src_inv with r1; auto. +Qed. + +Lemma Done_notmp1src_invf: + forall S1, + stepInv S1 -> + (forall x, + In x (getsrc (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1)) -> + forall x, + In + x + (getsrc + (StateToMove (stepf S1) ++ (StateBeing (stepf S1) ++ StateDone (stepf S1)))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1). +Proof. +intros S1 H H0. +generalize H; unfold stepInv; intros [Hpath [HSD [HnoO [Hnotmp HnotmpL]]]]. +destruct S1 as [[t1 b1] d1]; set (S1:=(t1, b1, d1)); destruct t1; destruct b1; + auto; apply (Done_notmp1src_invpp S1); auto; apply dstep_step; auto; + apply f2ind; unfold S1; auto. +Qed. + +Lemma Done_notmp1src_res: + forall S1, + stepInv S1 -> + (forall x, + In x (getsrc (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1)) -> + forall x, + In + x + (getsrc + (StateToMove (Pmov S1) ++ (StateBeing (Pmov S1) ++ StateDone (Pmov S1)))) -> + Loc.diff x (R IT1) /\ Loc.diff x (R FT1). +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1 Hrec. +destruct S1 as [[t b] d]; set (S1:=(t, b, d)). +intros H; generalize H; intros [Hpath [HSD [HnoO [Htmp HtmpL]]]]. +unfold S1; rewrite Pmov_equation. +destruct t; [destruct b; auto | idtac]; + (intro; apply Hrec; + [apply stepf1_dec | apply (dstep_inv S1); auto; apply f2ind' + | apply Done_notmp1src_invf]; auto). +Qed. + +Lemma dst_tmp2_step: + forall S1 S2, + step S1 S2 -> + forall x, + In x (getdst (StateToMove S2 ++ (StateBeing S2 ++ StateDone S2))) -> + In x temporaries2 \/ + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))). +Proof. +intros S1 S2 STEP; inversion STEP; (repeat (rewrite getdst_app; simpl)); intros; + (try (right; in_tac)). +destruct m; right; in_tac. +case (Loc.eq x (T r0)); intro. +rewrite e; unfold T; case (Loc.type r0); left; [left | right; left]; trivial. +right; apply in_cons_noteq with ( b := T r0 ); auto; in_tac. +Qed. + +Lemma dst_tmp2_stepp: + forall S1 S2, + stepp S1 S2 -> + forall x, + In x (getdst (StateToMove S2 ++ (StateBeing S2 ++ StateDone S2))) -> + In x temporaries2 \/ + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))). +Proof. +intros S1 S2 H. +induction H as [r|r1 r2 r3 H1 H2 Hrec]; auto. +intros. +elim Hrec with ( x := x ); + [intros H0; (try clear Hrec); (try exact H0) | intros H0; (try clear Hrec) + | try clear Hrec]; auto. +generalize (dst_tmp2_step r1 r2); auto. +Qed. + +Lemma dst_tmp2_stepf: + forall S1, + stepInv S1 -> + forall x, + In + x + (getdst + (StateToMove (stepf S1) ++ (StateBeing (stepf S1) ++ StateDone (stepf S1)))) -> + In x temporaries2 \/ + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))). +Proof. +intros S1 H H0. +generalize H; unfold stepInv; intros [Hpath [HSD [HnoO [Hnotmp HnotmpL]]]]. +destruct S1 as [[t1 b1] d1]; set (S1:=(t1, b1, d1)); destruct t1; destruct b1; + auto; apply (dst_tmp2_stepp S1); auto; apply dstep_step; auto; apply f2ind; + unfold S1; auto. +Qed. + +Lemma dst_tmp2_res: + forall S1, + stepInv S1 -> + forall x, + In + x + (getdst + (StateToMove (Pmov S1) ++ (StateBeing (Pmov S1) ++ StateDone (Pmov S1)))) -> + In x temporaries2 \/ + In x (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1))). +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1 Hrec. +destruct S1 as [[t b] d]; set (S1:=(t, b, d)). +intros H; generalize H; intros [Hpath [HSD [HnoO [Htmp HtmpL]]]]. +unfold S1; rewrite Pmov_equation; intros. +destruct t; auto. +destruct b; auto. +elim Hrec with ( y := stepf S1 ) ( x := x ); + [intros H1 | intros H1 | try clear Hrec | try clear Hrec | try assumption]. +left; (try assumption). +apply dst_tmp2_stepf; auto. +apply stepf1_dec; auto. +apply (dstep_inv S1); auto; unfold S1; apply f2ind'; auto. +elim Hrec with ( y := stepf S1 ) ( x := x ); + [intro; left; (try assumption) | intros H1; apply dst_tmp2_stepf; auto | + apply stepf1_dec; auto | + apply (dstep_inv S1); auto; unfold S1; apply f2ind'; auto | try assumption]. +Qed. + +Lemma getdst_lists2moves: + forall srcs dsts, + length srcs = length dsts -> + getsrc (listsLoc2Moves srcs dsts) = srcs /\ + getdst (listsLoc2Moves srcs dsts) = dsts. +Proof. +induction srcs; intros dsts; destruct dsts; simpl; auto; intro; + (try discriminate). +inversion H. +elim IHsrcs with ( dsts := dsts ); auto; intros. +rewrite H2; rewrite H0; auto. +Qed. + +Lemma stepInv_pnilnil: + forall p, + simpleDest p -> + Loc.disjoint (getsrc p) temporaries -> + Loc.disjoint (getdst p) temporaries -> + no_overlap_list p -> stepInv (p, nil, nil). +Proof. +unfold stepInv; simpl; (repeat split); auto. +rewrite app_nil; auto. +generalize (no_overlap_noOverlap (p, nil, nil)). +simpl; (intros H3; apply H3). +generalize H2; unfold no_overlap_state, no_overlap_list; simpl; intro. +rewrite app_nil; auto. +apply disjoint_tmp__noTmp; auto. +Qed. + +Lemma noO_list_pnilnil: + forall (p : Moves), + simpleDest p -> + Loc.disjoint (getsrc p) temporaries -> + Loc.disjoint (getdst p) temporaries -> + no_overlap_list p -> no_overlap_list (StateDone (Pmov (p, nil, nil))). +Proof. +intros; generalize (no_overlapD_res (p, nil, nil)); + unfold no_overlap_stateD, no_overlap_list. +rewrite STM_Pmov; rewrite SB_Pmov; simpl; rewrite app_nil; intro. +apply H3; auto. +apply stepInv_pnilnil; auto. +Qed. + +Lemma dis_srctmp1_pnilnil: + forall (p : Moves), + simpleDest p -> + Loc.disjoint (getsrc p) temporaries -> + Loc.disjoint (getdst p) temporaries -> + no_overlap_list p -> + Loc.disjoint (getsrc (StateDone (Pmov (p, nil, nil)))) temporaries1. +Proof. +intros; generalize (Done_notmp1src_res (p, nil, nil)); simpl. +rewrite STM_Pmov; rewrite SB_Pmov; simpl; rewrite app_nil; intro. +unfold temporaries1. +apply Loc.notin_disjoint; auto. +simpl; intros. +assert (HsI: stepInv (p, nil, nil)); (try apply stepInv_pnilnil); auto. +elim H3 with x; (try assumption). +intros; (repeat split); auto. +intros; split; + (apply Loc.in_notin_diff with ( ll := temporaries ); + [apply Loc.disjoint_notin with (getsrc p) | simpl]; auto). +Qed. + +Lemma dis_dsttmp1_pnilnil: + forall (p : Moves), + simpleDest p -> + Loc.disjoint (getsrc p) temporaries -> + Loc.disjoint (getdst p) temporaries -> + no_overlap_list p -> + Loc.disjoint (getdst (StateDone (Pmov (p, nil, nil)))) temporaries1. +Proof. +intros; generalize (Done_notmp1_res (p, nil, nil)); simpl. +rewrite STM_Pmov; rewrite SB_Pmov; simpl; rewrite app_nil; intro. +unfold temporaries1. +apply Loc.notin_disjoint; auto. +simpl; intros. +assert (HsI: stepInv (p, nil, nil)); (try apply stepInv_pnilnil); auto. +elim H3 with x; (try assumption). +intros; (repeat split); auto. +intros; split; + (apply Loc.in_notin_diff with ( ll := temporaries ); + [apply Loc.disjoint_notin with (getdst p) | simpl]; auto). +Qed. + +Lemma parallel_move_correct': + forall p k rs m, + Loc.norepet (getdst p) -> + no_overlap_list p -> + Loc.disjoint (getsrc p) temporaries -> + Loc.disjoint (getdst p) temporaries -> + (exists rs' , + exec_instrs ge sp (p_move p k) rs m k rs' m /\ + (List.map rs' (getdst p) = List.map rs (getsrc p) /\ + (rs' (R IT3) = rs (R IT3) /\ + (forall l, + Loc.notin l (getdst p) -> Loc.notin l temporaries -> rs' l = rs l))) + ). +Proof. +unfold p_move, P_move. +intros p k rs m Hnorepet HnoOverlap Hdisjointsrc Hdisjointdst. +assert (HsD: simpleDest p); (try (apply norepet_SD; assumption)). +assert (HsI: stepInv (p, nil, nil)); (try (apply stepInv_pnilnil; assumption)). +generalize HsI; unfold stepInv; simpl StateToMove; simpl StateBeing; + (repeat rewrite app_nil); intros [_ [_ [HnoO [Hnotmp _]]]]. +elim (exec_instrs_pmov (StateDone (Pmov (p, nil, nil))) k rs m); auto; + (try apply noO_list_pnilnil); (try apply dis_dsttmp1_pnilnil); + (try apply dis_srctmp1_pnilnil); auto. +intros rs' [Hexec R]; exists rs'; (repeat split); auto. +rewrite <- (Fpmov_correct_map p rs); auto. +apply list_map_exten; intros; rewrite <- R; auto; + (try + (apply Loc.in_notin_diff with ( ll := temporaries ); + [apply Loc.disjoint_notin with (getdst p) | simpl]; auto)). +generalize (dst_tmp2_res (p, nil, nil)); intros. +elim H0 with ( x := r ); + [intros H2; right | + simpl; rewrite app_nil; intros H2; apply In_norepet with (getdst p); auto | + try assumption | rewrite STM_Pmov; rewrite SB_Pmov; auto]. +apply Loc.diff_sym; apply Loc.in_notin_diff with ( ll := temporaries ); + (try assumption). +apply Loc.disjoint_notin with (getdst p); auto. +generalize H2; unfold temporaries, temporaries2; intros; in_tac. +rewrite <- (Fpmov_correct_IT3 p rs); auto; rewrite <- R; + (try (simpl; intro; discriminate)); auto. +intros r H; right; apply (Done_notmp3_res (p, nil, nil)); auto; + (try (rewrite STM_Pmov; rewrite SB_Pmov; auto)). +simpl; rewrite app_nil; intros; apply Loc.in_notin_diff with temporaries; auto. +apply Loc.disjoint_notin with (getdst p); auto. +simpl; right; right; left; trivial. +intros; rewrite <- (Fpmov_correct_ext p rs); auto; rewrite <- R; auto; + (try (apply Loc.in_notin_diff with temporaries; simpl; auto)). +intros r H1; right; generalize (dst_tmp2_res (p, nil, nil)); intros. +elim H2 with ( x := r ); + [intros H3 | simpl; rewrite app_nil; intros H3 | assumption + | rewrite STM_Pmov; rewrite SB_Pmov; auto]. +apply Loc.diff_sym; apply Loc.in_notin_diff with temporaries; auto. +generalize H3; unfold temporaries, temporaries2; intros; in_tac. +apply Loc.diff_sym; apply Loc.in_notin_diff with ( ll := getdst p ); auto. +Qed. + +Lemma parallel_move_correctX: + forall srcs dsts k rs m, + List.length srcs = List.length dsts -> + no_overlap srcs dsts -> + Loc.norepet dsts -> + Loc.disjoint srcs temporaries -> + Loc.disjoint dsts temporaries -> + (exists rs' , + exec_instrs ge sp (parallel_move srcs dsts k) rs m k rs' m /\ + (List.map rs' dsts = List.map rs srcs /\ + (rs' (R IT3) = rs (R IT3) /\ + (forall l, Loc.notin l dsts -> Loc.notin l temporaries -> rs' l = rs l))) ). +Proof. +intros; unfold parallel_move, parallel_move_order; + generalize (parallel_move_correct' (listsLoc2Moves srcs dsts) k rs m). +elim (getdst_lists2moves srcs dsts); auto. +intros heq1 heq2; rewrite heq1; rewrite heq2; unfold p_move. +intros H4; apply H4; auto. +unfold no_overlap_list; rewrite heq1; rewrite heq2; auto. +Qed. + +End parallel_move_correction. diff --git a/backend/Alloctyping.v b/backend/Alloctyping.v new file mode 100644 index 00000000..39e53eef --- /dev/null +++ b/backend/Alloctyping.v @@ -0,0 +1,509 @@ +(** Preservation of typing during register allocation. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import Locations. +Require Import LTL. +Require Import Coloring. +Require Import Coloringproof. +Require Import Allocation. +Require Import Allocproof. +Require Import RTLtyping. +Require Import LTLtyping. +Require Import Conventions. +Require Import Alloctyping_aux. + +(** This file proves that register allocation (the translation from + RTL to LTL defined in file [Allocation]) preserves typing: + given a well-typed RTL input, it produces LTL code that is + well-typed. *) + +Section TYPING_FUNCTION. + +Variable f: RTL.function. +Variable env: regenv. +Variable live: PMap.t Regset.t. +Variable alloc: reg -> loc. +Variable tf: LTL.function. + +Hypothesis TYPE_RTL: type_rtl_function f = Some env. +Hypothesis ALLOC: regalloc f live (live0 f live) env = Some alloc. +Hypothesis TRANSL: transf_function f = Some tf. + +Lemma wt_rtl_function: RTLtyping.wt_function env f. +Proof. + apply type_rtl_function_correct; auto. +Qed. + +Lemma alloc_type: forall r, Loc.type (alloc r) = env r. +Proof. + intro. eapply regalloc_preserves_types; eauto. +Qed. + +Lemma alloc_types: + forall rl, List.map Loc.type (List.map alloc rl) = List.map env rl. +Proof. + intros. rewrite list_map_compose. apply list_map_exten. + intros. symmetry. apply alloc_type. +Qed. + +(** [loc_read_ok l] states whether location [l] is well-formed in an + argument context (for reading). *) + +Definition loc_read_ok (l: loc) : Prop := + match l with R r => True | S s => slot_bounded tf s end. + +(** [loc_write_ok l] states whether location [l] is well-formed in a + result context (for writing). *) + +Definition loc_write_ok (l: loc) : Prop := + match l with + | R r => True + | S (Incoming _ _) => False + | S s => slot_bounded tf s end. + +Definition locs_read_ok (ll: list loc) : Prop := + forall l, In l ll -> loc_read_ok l. + +Definition locs_write_ok (ll: list loc) : Prop := + forall l, In l ll -> loc_write_ok l. + +Remark loc_write_ok_read_ok: + forall l, loc_write_ok l -> loc_read_ok l. +Proof. + destruct l; simpl. auto. + destruct s; tauto. +Qed. +Hint Resolve loc_write_ok_read_ok: allocty. + +Remark locs_write_ok_read_ok: + forall ll, locs_write_ok ll -> locs_read_ok ll. +Proof. + unfold locs_write_ok, locs_read_ok. auto with allocty. +Qed. +Hint Resolve locs_write_ok_read_ok: allocty. + +Lemma alloc_write_ok: + forall r, loc_write_ok (alloc r). +Proof. + intros. generalize (regalloc_acceptable _ _ _ _ _ r ALLOC). + destruct (alloc r); simpl. auto. + destruct s; try contradiction. simpl. omega. +Qed. +Hint Resolve alloc_write_ok: allocty. + +Lemma allocs_write_ok: + forall rl, locs_write_ok (List.map alloc rl). +Proof. + intros; red; intros. + generalize (list_in_map_inv _ _ _ H). intros [r [EQ IN]]. + subst l. auto with allocty. +Qed. +Hint Resolve allocs_write_ok: allocty. + +(** * Typing LTL constructors *) + +(** We show that the LTL constructor functions defined in [Allocation] + generate well-typed LTL basic blocks, given sufficient typing + and well-formedness hypotheses over the locations involved. *) + +Lemma wt_add_reload: + forall src dst k, + loc_read_ok src -> + Loc.type src = mreg_type dst -> + wt_block tf k -> + wt_block tf (add_reload src dst k). +Proof. + intros. unfold add_reload. destruct src. + case (mreg_eq m dst); intro. auto. apply wt_Bopmove. exact H0. auto. + apply wt_Bgetstack. exact H0. exact H. auto. +Qed. + +Lemma wt_add_spill: + forall src dst k, + loc_write_ok dst -> + mreg_type src = Loc.type dst -> + wt_block tf k -> + wt_block tf (add_spill src dst k). +Proof. + intros. unfold add_spill. destruct dst. + case (mreg_eq src m); intro. auto. + apply wt_Bopmove. exact H0. auto. + apply wt_Bsetstack. generalize H. simpl. destruct s; auto. + symmetry. exact H0. generalize H. simpl. destruct s; auto. contradiction. + auto. +Qed. + +Lemma wt_add_reloads: + forall srcs dsts k, + locs_read_ok srcs -> + List.map Loc.type srcs = List.map mreg_type dsts -> + wt_block tf k -> + wt_block tf (add_reloads srcs dsts k). +Proof. + induction srcs; intros. + destruct dsts. simpl; auto. simpl in H0; discriminate. + destruct dsts; simpl in H0. discriminate. simpl. + apply wt_add_reload. apply H; apply in_eq. congruence. + apply IHsrcs. red; intros; apply H; apply in_cons; auto. + congruence. auto. +Qed. + +Lemma wt_reg_for: + forall l, mreg_type (reg_for l) = Loc.type l. +Proof. + intros. destruct l; simpl. auto. + case (slot_type s); reflexivity. +Qed. +Hint Resolve wt_reg_for: allocty. + +Lemma wt_regs_for_rec: + forall locs itmps ftmps, + (List.length locs <= List.length itmps)%nat -> + (List.length locs <= List.length ftmps)%nat -> + (forall r, In r itmps -> mreg_type r = Tint) -> + (forall r, In r ftmps -> mreg_type r = Tfloat) -> + List.map mreg_type (regs_for_rec locs itmps ftmps) = List.map Loc.type locs. +Proof. + induction locs; intros. + simpl. auto. + destruct itmps; simpl in H. omegaContradiction. + destruct ftmps; simpl in H0. omegaContradiction. + simpl. apply (f_equal2 (@cons typ)). + destruct a. reflexivity. simpl. case (slot_type s). + apply H1; apply in_eq. apply H2; apply in_eq. + apply IHlocs. omega. omega. + intros; apply H1; apply in_cons; auto. + intros; apply H2; apply in_cons; auto. +Qed. + +Lemma wt_regs_for: + forall locs, + (List.length locs <= 3)%nat -> + List.map mreg_type (regs_for locs) = List.map Loc.type locs. +Proof. + intros. unfold regs_for. apply wt_regs_for_rec. + simpl. auto. simpl. auto. + simpl; intros; intuition; subst r; reflexivity. + simpl; intros; intuition; subst r; reflexivity. +Qed. +Hint Resolve wt_regs_for: allocty. + +Lemma wt_add_move: + forall src dst b, + loc_read_ok src -> loc_write_ok dst -> + Loc.type src = Loc.type dst -> + wt_block tf b -> + wt_block tf (add_move src dst b). +Proof. + intros. unfold add_move. + case (Loc.eq src dst); intro. + auto. + destruct src. apply wt_add_spill; auto. + destruct dst. apply wt_add_reload; auto. + set (tmp := match slot_type s with Tint => IT1 | Tfloat => FT1 end). + apply wt_add_reload. auto. + simpl. unfold tmp. case (slot_type s); reflexivity. + apply wt_add_spill. auto. + simpl. simpl in H1. rewrite <- H1. unfold tmp. case (slot_type s); reflexivity. + auto. +Qed. + +Theorem wt_parallel_move: + forall srcs dsts b, + List.map Loc.type srcs = List.map Loc.type dsts -> + locs_read_ok srcs -> + locs_write_ok dsts -> wt_block tf b -> wt_block tf (parallel_move srcs dsts b). +Proof. + unfold locs_read_ok, locs_write_ok. + apply wt_parallel_moveX; simpl; auto. + exact wt_add_move. +Qed. + +Lemma wt_add_op_move: + forall src res s, + Loc.type src = Loc.type res -> + loc_read_ok src -> loc_write_ok res -> + wt_block tf (add_op Omove (src :: nil) res s). +Proof. + intros. unfold add_op. simpl. + apply wt_add_move. auto. auto. auto. constructor. +Qed. + +Lemma wt_add_op_undef: + forall res s, + loc_write_ok res -> + wt_block tf (add_op Oundef nil res s). +Proof. + intros. unfold add_op. simpl. + apply wt_Bopundef. apply wt_add_spill. auto. auto with allocty. + constructor. +Qed. + +Lemma wt_add_op_others: + forall op args res s, + op <> Omove -> op <> Oundef -> + (List.map Loc.type args, Loc.type res) = type_of_operation op -> + locs_read_ok args -> + loc_write_ok res -> + wt_block tf (add_op op args res s). +Proof. + intros. unfold add_op. + caseEq (is_move_operation op args). intros. + generalize (is_move_operation_correct op args H4). tauto. + intro. assert ((List.length args <= 3)%nat). + replace (length args) with (length (fst (type_of_operation op))). + apply Allocproof.length_type_of_operation. + rewrite <- H1. simpl. apply list_length_map. + generalize (wt_regs_for args H5); intro. + generalize (wt_reg_for res); intro. + apply wt_add_reloads. auto. auto. + apply wt_Bop. auto. auto. congruence. + apply wt_add_spill. auto. auto. constructor. +Qed. + +Lemma wt_add_load: + forall chunk addr args dst s, + List.map Loc.type args = type_of_addressing addr -> + Loc.type dst = type_of_chunk chunk -> + locs_read_ok args -> + loc_write_ok dst -> + wt_block tf (add_load chunk addr args dst s). +Proof. + intros. unfold add_load. + assert ((List.length args <= 2)%nat). + replace (length args) with (length (type_of_addressing addr)). + apply Allocproof.length_type_of_addressing. + rewrite <- H. apply list_length_map. + assert ((List.length args <= 3)%nat). omega. + generalize (wt_regs_for args H4); intro. + generalize (wt_reg_for dst); intro. + apply wt_add_reloads. auto. auto. + apply wt_Bload. congruence. congruence. + apply wt_add_spill. auto. auto. constructor. +Qed. + +Lemma wt_add_store: + forall chunk addr args src s, + List.map Loc.type args = type_of_addressing addr -> + Loc.type src = type_of_chunk chunk -> + locs_read_ok args -> + loc_read_ok src -> + wt_block tf (add_store chunk addr args src s). +Proof. + intros. unfold add_store. + assert ((List.length args <= 2)%nat). + replace (length args) with (length (type_of_addressing addr)). + apply Allocproof.length_type_of_addressing. + rewrite <- H. apply list_length_map. + assert ((List.length (src :: args) <= 3)%nat). simpl. omega. + generalize (wt_regs_for (src :: args) H4); intro. + caseEq (regs_for (src :: args)). + intro. constructor. + intros rsrc rargs EQ. rewrite EQ in H5. simpl in H5. + apply wt_add_reloads. + red; intros. elim H6; intro. subst l; auto. auto. + simpl. congruence. + apply wt_Bstore. congruence. congruence. constructor. +Qed. + +Lemma wt_add_call: + forall sig los args res s, + match los with inl l => Loc.type l = Tint | inr s => True end -> + List.map Loc.type args = sig.(sig_args) -> + Loc.type res = match sig.(sig_res) with None => Tint | Some ty => ty end -> + locs_read_ok args -> + match los with inl l => loc_read_ok l | inr s => True end -> + loc_write_ok res -> + wt_block tf (add_call sig los args res s). +Proof. + intros. + assert (locs_write_ok (loc_arguments sig)). + red; intros. generalize (loc_arguments_acceptable sig l H5). + destruct l; simpl. auto. + destruct s0; try contradiction. simpl. omega. + unfold add_call. destruct los. + apply wt_add_reload. auto. simpl; congruence. + apply wt_parallel_move. rewrite loc_arguments_type. auto. + auto. auto. + apply wt_Bcall. reflexivity. + apply wt_add_spill. auto. + rewrite loc_result_type. auto. constructor. + apply wt_parallel_move. rewrite loc_arguments_type. auto. + auto. auto. + apply wt_Bcall. auto. + apply wt_add_spill. auto. + rewrite loc_result_type. auto. constructor. +Qed. + +Lemma wt_add_cond: + forall cond args ifso ifnot, + List.map Loc.type args = type_of_condition cond -> + locs_read_ok args -> + wt_block tf (add_cond cond args ifso ifnot). +Proof. + intros. + assert ((List.length args) <= 3)%nat. + replace (length args) with (length (type_of_condition cond)). + apply Allocproof.length_type_of_condition. + rewrite <- H. apply list_length_map. + generalize (wt_regs_for args H1). intro. + unfold add_cond. apply wt_add_reloads. + auto. auto. + apply wt_Bcond. congruence. +Qed. + +Lemma wt_add_return: + forall sig optarg, + option_map Loc.type optarg = sig.(sig_res) -> + match optarg with None => True | Some arg => loc_read_ok arg end -> + wt_block tf (add_return sig optarg). +Proof. + intros. unfold add_return. destruct optarg. + apply wt_add_reload. auto. rewrite loc_result_type. + simpl in H. destruct (sig_res sig). congruence. discriminate. + constructor. + apply wt_Bopundef. constructor. +Qed. + +Lemma wt_add_undefs: + forall ll b, + wt_block tf b -> wt_block tf (add_undefs ll b). +Proof. + induction ll; intros. + simpl. auto. + simpl. destruct a. apply wt_Bopundef. auto. auto. +Qed. + +Lemma wt_add_entry: + forall params undefs s, + List.map Loc.type params = sig_args (RTL.fn_sig f) -> + locs_write_ok params -> + wt_block tf (add_entry (RTL.fn_sig f) params undefs s). +Proof. + set (sig := RTL.fn_sig f). + assert (sig = tf.(fn_sig)). + unfold sig. symmetry. apply Allocproof.sig_function_translated. auto. + assert (locs_read_ok (loc_parameters sig)). + red; unfold loc_parameters. intros. + generalize (list_in_map_inv _ _ _ H0). intros [l1 [EQ IN]]. + subst l. generalize (loc_arguments_acceptable _ _ IN). + destruct l1. simpl. auto. + destruct s; try contradiction. + simpl; intros. split. omega. rewrite <- H. + apply loc_arguments_bounded. auto. + intros. unfold add_entry. + apply wt_parallel_move. rewrite loc_parameters_type. auto. + auto. auto. + apply wt_add_undefs. constructor. +Qed. + +(** * Type preservation during translation from RTL to LTL *) + +Lemma wt_transf_instr: + forall pc instr, + RTLtyping.wt_instr env f instr -> + wt_block tf (transf_instr f live alloc pc instr). +Proof. + intros. inversion H; simpl. + (* nop *) + constructor. + (* move *) + case (Regset.mem r live!!pc). + apply wt_add_op_move; auto with allocty. + repeat rewrite alloc_type. auto. constructor. + (* undef *) + case (Regset.mem r live!!pc). + apply wt_add_op_undef; auto with allocty. + constructor. + (* other ops *) + case (Regset.mem res live!!pc). + apply wt_add_op_others; auto with allocty. + rewrite alloc_types. rewrite alloc_type. auto. + constructor. + (* load *) + case (Regset.mem dst live!!pc). + apply wt_add_load; auto with allocty. + rewrite alloc_types. auto. rewrite alloc_type. auto. + constructor. + (* store *) + apply wt_add_store; auto with allocty. + rewrite alloc_types. auto. rewrite alloc_type. auto. + (* call *) + apply wt_add_call. + destruct ros; simpl. rewrite alloc_type; auto. auto. + rewrite alloc_types; auto. + rewrite alloc_type. auto. + auto with allocty. + destruct ros; simpl; auto with allocty. + auto with allocty. + (* cond *) + apply wt_add_cond. rewrite alloc_types; auto. auto with allocty. + (* return *) + apply wt_add_return. + destruct optres; simpl. rewrite alloc_type. exact H0. exact H0. + destruct optres; simpl; auto with allocty. +Qed. + +Lemma wt_transf_instrs: + let c := PTree.map (transf_instr f live alloc) (RTL.fn_code f) in + forall pc b, c!pc = Some b -> wt_block tf b. +Proof. + intros until b. + unfold c. rewrite PTree.gmap. caseEq (RTL.fn_code f)!pc. + intros instr EQ. simpl. intros. injection H; intro; subst b. + apply wt_transf_instr. eapply RTLtyping.wt_instrs; eauto. + apply wt_rtl_function. + simpl; intros; discriminate. +Qed. + +Lemma wt_transf_entrypoint: + let c := transf_entrypoint f live alloc + (PTree.map (transf_instr f live alloc) (RTL.fn_code f)) in + (forall pc b, c!pc = Some b -> wt_block tf b). +Proof. + simpl. unfold transf_entrypoint. + intros pc b. rewrite PTree.gsspec. + case (peq pc (fn_nextpc f)); intros. + injection H; intro; subst b. + apply wt_add_entry. + rewrite alloc_types. eapply RTLtyping.wt_params. apply wt_rtl_function. + auto with allocty. + apply wt_transf_instrs with pc; auto. +Qed. + +End TYPING_FUNCTION. + +Lemma wt_transf_function: + forall f tf, + transf_function f = Some tf -> wt_function tf. +Proof. + intros. generalize H; unfold transf_function. + caseEq (type_rtl_function f). intros env TYP. + caseEq (analyze f). intros live ANL. + change (transfer f (RTL.fn_entrypoint f) + live!!(RTL.fn_entrypoint f)) + with (live0 f live). + caseEq (regalloc f live (live0 f live) env). + intros alloc ALLOC. + intro EQ; injection EQ; intro; subst tf. + red. simpl. intros. eapply wt_transf_entrypoint; eauto. + intros; discriminate. + intros; discriminate. + intros; discriminate. +Qed. + +Lemma program_typing_preserved: + forall (p: RTL.program) (tp: LTL.program), + transf_program p = Some tp -> + LTLtyping.wt_program tp. +Proof. + intros; red; intros. + generalize (transform_partial_program_function transf_function p i f H H0). + intros [f0 [IN TRANSF]]. + apply wt_transf_function with f0; auto. +Qed. diff --git a/backend/Alloctyping_aux.v b/backend/Alloctyping_aux.v new file mode 100644 index 00000000..0013c240 --- /dev/null +++ b/backend/Alloctyping_aux.v @@ -0,0 +1,895 @@ +(** Type preservation for parallel move compilation. *) + +(** This file, contributed by Laurence Rideau, shows that the parallel + move compilation algorithm (file [Parallelmove]) generates a well-typed + sequence of LTL instructions, provided the original problem is well-typed: + the types of source and destination locations match pairwise. *) + +Require Import Coqlib. +Require Import Locations. +Require Import LTL. +Require Import Allocation. +Require Import LTLtyping. +Require Import Allocproof_aux. +Require Import Parallelmove. +Require Import Inclusion. + +Section wt_move_correction. +Variable tf : LTL.function. +Variable loc_read_ok : loc -> Prop. +Hypothesis loc_read_ok_R : forall r, loc_read_ok (R r). +Variable loc_write_ok : loc -> Prop. +Hypothesis loc_write_ok_R : forall r, loc_write_ok (R r). + +Let locs_read_ok (ll : list loc) : Prop := + forall l, In l ll -> loc_read_ok l. + +Let locs_write_ok (ll : list loc) : Prop := + forall l, In l ll -> loc_write_ok l. + +Hypothesis + wt_add_move : + forall (src dst : loc) (b : LTL.block), + loc_read_ok src -> + loc_write_ok dst -> + Loc.type src = Loc.type dst -> + wt_block tf b -> wt_block tf (add_move src dst b). + +Lemma in_move__in_srcdst: + forall m p, In m p -> In (fst m) (getsrc p) /\ In (snd m) (getdst p). +Proof. +intros; induction p. +inversion H. +destruct a as [a1 a2]; destruct m as [m1 m2]; simpl. +elim H; intro. +inversion H0. +subst a2; subst a1. +split; [left; trivial | left; trivial]. +split; right; (elim IHp; simpl; intros; auto). +Qed. + +Lemma T_type: forall r, Loc.type r = Loc.type (T r). +Proof. +intro; unfold T. +case (Loc.type r); auto. +Qed. + +Theorem incl_nil: forall A (l : list A), incl nil l. +Proof. +intros A l a; simpl; intros H; case H. +Qed. +Hint Resolve incl_nil :datatypes. + +Lemma split_move_incl: + forall (l t1 t2 : Moves) (s d : Reg), + split_move l s = Some (t1, d, t2) -> incl t1 l /\ incl t2 l. +Proof. +induction l. +simpl; (intros; discriminate). +intros t1 t2 s d; destruct a as [a1 a2]; simpl. +case (Loc.eq a1 s); intro. +intros. +inversion H. +split; auto. +apply incl_nil. +apply incl_tl; apply incl_refl; auto. +caseEq (split_move l s); intro; (try (intros; discriminate)). +destruct p as [[p1 p2] p3]. +intros. +inversion H0. +elim (IHl p1 p3 s p2); intros; auto. +subst p3. +split; auto. +generalize H1; unfold incl; simpl. +intros H4 a [H7|H6]; [try exact H7 | idtac]. +left; (try assumption). +right; apply H4; auto. +apply incl_tl; auto. +Qed. + +Lemma in_split_move: + forall (l t1 t2 : Moves) (s d : Reg), + split_move l s = Some (t1, d, t2) -> In (s, d) l. +Proof. +induction l. +simpl; intros; discriminate. +intros t1 t2 s d; simpl. +destruct a as [a1 a2]. +case (Loc.eq a1 s). +intros. +inversion H. +subst a1; left; auto. +intro; caseEq (split_move l s); (intros; (try discriminate)). +destruct p as [[p1 p2] p3]. +right; inversion H0. +subst p2. +apply (IHl p1 p3); auto. +Qed. + +Lemma move_types_stepf: + forall S1, + (forall x1 x2, + In (x1, x2) (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1)) -> + Loc.type x1 = Loc.type x2) -> + forall x1 x2, + In + (x1, x2) + (StateToMove (stepf S1) ++ (StateBeing (stepf S1) ++ StateDone (stepf S1))) -> + Loc.type x1 = Loc.type x2. +Proof. +intros S1 H x1 x2. +destruct S1 as [[t1 b1] d1]; set (S1:=(t1, b1, d1)); destruct t1; destruct b1; + auto; simpl StateToMove in H |-; simpl StateBeing in H |-; + simpl StateDone in H |-; simpl app in H |-. +intro; + elim + (in_app_or + (StateToMove (stepf S1)) (StateBeing (stepf S1) ++ StateDone (stepf S1)) + (x1, x2)); auto. +assert (StateToMove (stepf S1) = nil). +simpl stepf. +destruct m as [s d]. +case (Loc.eq d (fst (last b1))); case b1; simpl; auto. +rewrite H1; intros H2; inversion H2. +intro; elim (in_app_or (StateBeing (stepf S1)) (StateDone (stepf S1)) (x1, x2)); + auto. +assert + (StateBeing (stepf S1) = nil \/ + (StateBeing (stepf S1) = b1 \/ StateBeing (stepf S1) = replace_last_s b1)). +simpl stepf. +destruct m as [s d]. +case (Loc.eq d (fst (last b1))); case b1; simpl; auto. +elim H2; [intros H3; (try clear H2); (try exact H3) | intros H3; (try clear H2)]. +rewrite H3; intros H4; inversion H4. +elim H3; [intros H2; (try clear H3); (try exact H2) | intros H2; (try clear H3)]. +rewrite H2; intros H4. +apply H; (try in_tac). +rewrite H2; intros H4. +caseEq b1; intro; simpl; auto. +rewrite H3 in H4; simpl in H4 |-; inversion H4. +intros l H5; rewrite H5 in H4. +generalize (app_rewriter _ l m0). +intros [y [r H3]]; (try exact H3). +rewrite H3 in H4. +destruct y. +rewrite last_replace in H4. +elim (in_app_or r ((T r0, r1) :: nil) (x1, x2)); auto. +intro; apply H. +rewrite H5. +rewrite H3; in_tac. +intros H6; inversion H6. +inversion H7. +rewrite <- T_type. +rewrite <- H10; apply H. +rewrite H5; rewrite H3; (try in_tac). +assert (In (r0, r1) ((r0, r1) :: nil)); [simpl; auto | in_tac]. +inversion H7. +intro. +destruct m as [s d]. +assert + (StateDone (stepf S1) = (s, d) :: d1 \/ + StateDone (stepf S1) = (s, d) :: ((d, T d) :: d1)). +simpl. +case (Loc.eq d (fst (last b1))); case b1; simpl; auto. +elim H3; [intros H4; (try clear H3); (try exact H4) | intros H4; (try clear H3)]. +apply H; (try in_tac). +rewrite H4 in H2; in_tac. +rewrite H4 in H2. +simpl in H2 |-. +elim H2; [intros H3; apply H | intros H3; elim H3; intros; [idtac | apply H]]; + (try in_tac). +simpl; left; auto. +inversion H5; apply T_type. +intro; + elim + (in_app_or + (StateToMove (stepf S1)) (StateBeing (stepf S1) ++ StateDone (stepf S1)) + (x1, x2)); auto. +simpl stepf. +destruct m as [s d]. +case (Loc.eq s d); simpl; intros; apply H; in_tac. +intro; elim (in_app_or (StateBeing (stepf S1)) (StateDone (stepf S1)) (x1, x2)); + auto. +simpl stepf. +destruct m as [s d]. +case (Loc.eq s d); intros; apply H; (try in_tac). +inversion H2. +simpl stepf. +destruct m as [s d]. +case (Loc.eq s d); intros; apply H; (try in_tac). +simpl in H2 |-; in_tac. +simpl in H2 |-; in_tac. +intro; + elim + (in_app_or + (StateToMove (stepf S1)) (StateBeing (stepf S1) ++ StateDone (stepf S1)) + (x1, x2)); auto. +simpl stepf. +destruct m as [s d]. +destruct m0 as [s0 d0]. +case (Loc.eq s d0); [simpl; intros; apply H; in_tac | idtac]. +caseEq (split_move t1 d0); intro. +destruct p as [[t2 b2] d2]. +intros Hsplit Hd; simpl StateToMove; intro. +elim (split_move_incl t1 t2 d2 d0 b2 Hsplit); auto. +intros; apply H. +assert (In (x1, x2) ((s, d) :: (t1 ++ t1))). +generalize H1; simpl; intros. +elim H4; [intros H5; left; (try exact H5) | intros H5; right]. +elim (in_app_or t2 d2 (x1, x2)); auto; intro; apply in_or_app; left. +unfold incl in H2 |-. +apply H2; auto. +unfold incl in H3 |-; apply H3; auto. +in_tac. +intro; case (Loc.eq d0 (fst (last b1))); case b1; auto; simpl StateToMove; + intros; apply H; in_tac. +intro; elim (in_app_or (StateBeing (stepf S1)) (StateDone (stepf S1)) (x1, x2)); + auto. +simpl stepf. +destruct m as [s d]. +destruct m0 as [s0 d0]. +case (Loc.eq s d0). +intros e; rewrite <- e; simpl StateBeing. +rewrite <- e in H. +intro; apply H; in_tac. +caseEq (split_move t1 d0); intro. +destruct p as [[t2 b2] d2]. +simpl StateBeing. +intros. +apply H. +generalize (in_split_move t1 t2 d2 d0 b2 H2). +intros. +elim H3; intros. +rewrite <- H5. +in_tac. +in_tac. +caseEq b1. +simpl; intros e n F; elim F. +intros m l H3 H4. +case (Loc.eq d0 (fst (last (m :: l)))). +generalize (app_rewriter Move l m). +intros [y [r H5]]; rewrite H5. +simpl StateBeing. +destruct y as [y1 y2]; generalize (last_replace r y1 y2). +simpl; intros heq H6. +unfold Move in heq |-; unfold Move. +rewrite heq. +intro. +elim (in_app_or r ((T y1, y2) :: nil) (x1, x2)); auto. +intro; apply H. +rewrite H3; rewrite H5; in_tac. +simpl; intros [H8|H8]; inversion H8. +rewrite <- T_type. +apply H. +rewrite H3; rewrite H5. +rewrite <- H11; assert (In (y1, y2) ((y1, y2) :: nil)); auto. +simpl; auto. +in_tac. +simpl StateBeing; intros. +apply H; rewrite H3; (try in_tac). +simpl stepf. +destruct m as [s d]. +destruct m0 as [s0 d0]. +case (Loc.eq s d0); [simpl; intros; apply H; in_tac | idtac]. +caseEq (split_move t1 d0); intro. +destruct p as [[t2 b2] d2]. +intros Hsplit Hd; simpl StateDone; intro. +apply H; (try in_tac). +case (Loc.eq d0 (fst (last b1))); case b1; simpl StateDone; intros; + (try (apply H; in_tac)). +elim H3; intros. +apply H. +assert (In (x1, x2) ((s0, d0) :: nil)); auto. +rewrite H4; auto. +simpl; left; auto. +in_tac. +elim H4; intros. +inversion H5; apply T_type. +apply H; in_tac. +Qed. + +Lemma move_types_res: + forall S1, + (forall x1 x2, + In (x1, x2) (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1)) -> + Loc.type x1 = Loc.type x2) -> + forall x1 x2, + In + (x1, x2) + (StateToMove (Pmov S1) ++ (StateBeing (Pmov S1) ++ StateDone (Pmov S1))) -> + Loc.type x1 = Loc.type x2. +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1 Hrec. +destruct S1 as [[t b] d]; set (S1:=(t, b, d)). +unfold S1; rewrite Pmov_equation; intros. +destruct t; auto. +destruct b; auto. +apply (Hrec (stepf S1)). +apply stepf1_dec; auto. +apply move_types_stepf; auto. +unfold S1; auto. +apply (Hrec (stepf S1)). +apply stepf1_dec; auto. +apply move_types_stepf; auto. +unfold S1; auto. +Qed. + +Lemma srcdst_tmp2_stepf: + forall S1 x1 x2, + In + (x1, x2) + (StateToMove (stepf S1) ++ (StateBeing (stepf S1) ++ StateDone (stepf S1))) -> + (In x1 temporaries2 \/ + In x1 (getsrc (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1)))) /\ + (In x2 temporaries2 \/ + In x2 (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1)))). +Proof. +intros S1 x1 x2 H. +(repeat rewrite getsrc_app); (repeat rewrite getdst_app). +destruct S1 as [[t1 b1] d1]; set (S1:=(t1, b1, d1)); destruct t1; destruct b1; + auto. +simpl in H |-. +elim (in_move__in_srcdst (x1, x2) d1); intros; auto. +elim + (in_app_or + (StateToMove (stepf S1)) (StateBeing (stepf S1) ++ StateDone (stepf S1)) + (x1, x2)); auto. +assert (StateToMove (stepf S1) = nil). +simpl stepf. +destruct m as [s d]. +case (Loc.eq d (fst (last b1))); case b1; simpl; auto. +rewrite H0; intros H2; inversion H2. +intro; elim (in_app_or (StateBeing (stepf S1)) (StateDone (stepf S1)) (x1, x2)); + auto. +simpl stepf. +destruct m as [s d]. +caseEq b1. +simpl. +intros h1 h2; inversion h2. +intros m l heq; generalize (app_rewriter _ l m). +intros [y [r H3]]; (try exact H3). +rewrite H3. +destruct y. +rewrite last_app; simpl fst. +case (Loc.eq d r0). +intros heqd. +rewrite last_replace. +simpl. +intro; elim (in_app_or r ((T r0, r1) :: nil) (x1, x2)); auto. +rewrite heq; rewrite H3. +rewrite getsrc_app; simpl; rewrite getdst_app; simpl. +intro; elim (in_move__in_srcdst (x1, x2) r); auto; simpl; intros; split; right; + right; in_tac. +intro. +inversion H2; inversion H4. +split. +unfold T; case (Loc.type r0); left; [left | right]; auto. +right; right; (try assumption). +rewrite heq; rewrite H3. +rewrite H7; simpl. +rewrite getdst_app; simpl. +assert (In x2 (x2 :: nil)); simpl; auto. +in_tac. +simpl StateBeing. +intros; elim (in_move__in_srcdst (x1, x2) (r ++ ((r0, r1) :: nil))); auto; + intros; split; right; right. +unfold snd in H4 |-; unfold fst in H2 |-; rewrite heq; rewrite H3; (try in_tac). +unfold snd in H4 |-; unfold fst in H2 |-; rewrite heq; rewrite H3; (try in_tac). +simpl stepf. +destruct m as [s d]. +caseEq b1; intro. +simpl StateDone; intro. +unfold S1, StateToMove, StateBeing. +simpl app. +elim (in_move__in_srcdst (x1, x2) ((s, d) :: d1)); auto; intros; split; right. +simpl snd in H4 |-; simpl fst in H3 |-; simpl getdst in H4 |-; + simpl getsrc in H3 |-; (try in_tac). +simpl snd in H4 |-; simpl fst in H3 |-; simpl getdst in H4 |-; + simpl getsrc in H3 |-; (try in_tac). +intros; generalize (app_rewriter _ l m). +intros [y [r H4]]. +generalize H2; rewrite H4; rewrite last_app. +destruct y as [y1 y2]. +simpl fst. +case (Loc.eq d y1). +simpl StateDone; intros. +elim H3; [intros H6; inversion H6; (try exact H6) | intros H6; (try clear H5)]. +simpl; split; right; left; auto. +elim H6; [intros H5; inversion H5; (try exact H5) | intros H5; (try clear H6)]. +split; [right; simpl; right | left]. +rewrite H1; rewrite H4; rewrite getsrc_app; simpl getsrc. +rewrite <- e; rewrite H8; assert (In x1 (x1 :: nil)); simpl; auto; (try in_tac). +unfold T; case (Loc.type x1); simpl; auto. +elim (in_move__in_srcdst (x1, x2) d1); auto; intros; split; right; right; + (try in_tac). +intro; simpl StateDone. +unfold S1, StateToMove, StateBeing, StateDone. +simpl getsrc; simpl app; (try in_tac). +intro; elim (in_move__in_srcdst (x1, x2) ((s, d) :: d1)); + (auto; (simpl fst; simpl snd; simpl getsrc; simpl getdst); intros); + (split; right; (try in_tac)). +unfold S1, StateToMove, StateBeing, StateDone. +elim + (in_app_or + (StateToMove (stepf S1)) (StateBeing (stepf S1) ++ StateDone (stepf S1)) + (x1, x2)); auto. +simpl stepf. +destruct m as [s d]. +case (Loc.eq s d). +simpl StateToMove. +intros; elim (in_move__in_srcdst (x1, x2) t1); auto; + (repeat (rewrite getsrc_app; simpl getsrc)); + (repeat (rewrite getdst_app; simpl getdst)); simpl fst; simpl snd; intros; + split; right; simpl; right; (try in_tac). +simpl StateToMove. +intros; elim (in_move__in_srcdst (x1, x2) t1); auto; + (repeat (rewrite getsrc_app; simpl getsrc)); + (repeat (rewrite getdst_app; simpl getdst)); simpl fst; simpl snd; intros; + split; right; simpl; right; (try in_tac). +intro; elim (in_app_or (StateBeing (stepf S1)) (StateDone (stepf S1)) (x1, x2)); + auto. +simpl stepf. +destruct m as [s d]. +case (Loc.eq s d). +simpl StateBeing; intros h1 h2; inversion h2. +simpl StateBeing; intros h1 h2. +elim (in_move__in_srcdst (x1, x2) ((s, d) :: nil)); auto; simpl fst; simpl snd; + simpl; intros; split; right; (try in_tac). +elim H1; [intros H3; left; (try exact H3) | intros H3; inversion H3]. +elim H2; [intros H3; left; (try exact H3) | intros H3; inversion H3]. +simpl stepf. +destruct m as [s d]. +case (Loc.eq s d). +simpl StateDone; intros h1 h2. +elim (in_move__in_srcdst (x1, x2) d1); auto; simpl fst; simpl snd; simpl; + intros; split; right; right; (try in_tac). +simpl StateDone; intros h1 h2. +elim (in_move__in_srcdst (x1, x2) d1); auto; simpl fst; simpl snd; simpl; + intros; split; right; right; (try in_tac). +elim + (in_app_or + (StateToMove (stepf S1)) (StateBeing (stepf S1) ++ StateDone (stepf S1)) + (x1, x2)); auto. +simpl stepf. +destruct m as [s d]. +destruct m0 as [s0 d0]. +case (Loc.eq s d0). +unfold S1, StateToMove, StateBeing, StateDone. +simpl app at 1. +intros; elim (in_move__in_srcdst (x1, x2) t1); + (auto; simpl; intros; (split; right; right; (try in_tac))). +intro; caseEq (split_move t1 d0); intro. +destruct p as [[t2 b2] d2]. +intros Hsplit; unfold S1, StateToMove, StateBeing, StateDone; intro. +elim (split_move_incl t1 t2 d2 d0 b2 Hsplit); auto. +intros. +assert (In (x1, x2) ((s, d) :: (t1 ++ t1))). +generalize H0; simpl; intros. +elim H3; [intros H5; left; (try exact H5) | intros H5; right]. +elim (in_app_or t2 d2 (x1, x2)); auto; intro; apply in_or_app; left. +unfold incl in H1 |-. +apply H1; auto. +unfold incl in H2 |-; apply H2; auto. +split; right. +elim (in_move__in_srcdst (x1, x2) ((s, d) :: (t1 ++ t1))); + (auto; simpl; intros; (try in_tac)). +elim H4; [intros H6; (try clear H4); (try exact H6) | intros H6; (try clear H4)]. +left; (try assumption). +right; (try in_tac). +rewrite getsrc_app in H6; (try in_tac). +elim (in_move__in_srcdst (x1, x2) ((s, d) :: (t1 ++ t1))); + (auto; simpl; intros; (try in_tac)). +elim H5; [intros H6; (try clear H5); (try exact H6) | intros H6; (try clear H5)]. +left; (try assumption). +right; rewrite getdst_app in H6; (try in_tac). +caseEq b1; intro. +unfold S1, StateToMove, StateBeing, StateDone. +intro; elim (in_move__in_srcdst (x1, x2) ((s, d) :: t1)); (auto; intros). +simpl snd in H4 |-; simpl fst in H3 |-; split; right; (try in_tac). +intros l heq; generalize (app_rewriter _ l m). +intros [y [r H1]]; rewrite H1. +destruct y as [y1 y2]. +rewrite last_app; simpl fst. +case (Loc.eq d0 y1). +unfold S1, StateToMove, StateBeing, StateDone. +intros; elim (in_move__in_srcdst (x1, x2) ((s, d) :: t1)); auto; intros. +simpl snd in H4 |-; simpl fst in H3 |-; (split; right; (try in_tac)). +unfold S1, StateToMove, StateBeing, StateDone. +intros; elim (in_move__in_srcdst (x1, x2) ((s, d) :: t1)); auto; intros. +simpl snd in H4 |-; simpl fst in H3 |-; (split; right; (try in_tac)). +intro; elim (in_app_or (StateBeing (stepf S1)) (StateDone (stepf S1)) (x1, x2)); + auto. +simpl stepf. +destruct m as [s d]. +destruct m0 as [s0 d0]. +case (Loc.eq s d0). +intros e; rewrite <- e; simpl StateBeing. +unfold S1, StateToMove, StateBeing, StateDone. +intros; elim (in_move__in_srcdst (x1, x2) ((s, d) :: ((s0, s) :: b1))); auto; + simpl; intros. +split; right; (try in_tac). +elim H2; [intros H4; left; (try exact H4) | intros H4; (try clear H2)]. +elim H4; [intros H2; right; (try exact H2) | intros H2; (try clear H4)]. +assert (In x1 (s0 :: nil)); auto; (try in_tac). +simpl; auto. +right; (try in_tac). +elim H3; [intros H4; left; (try exact H4) | intros H4; (try clear H3)]. +elim H4; [intros H3; right; (try exact H3) | intros H3; (try clear H4)]. +rewrite <- e; (try in_tac). +assert (In x2 (s :: nil)); [simpl; auto | try in_tac]. +right; (try in_tac). +intro; caseEq (split_move t1 d0); intro. +destruct p as [[t2 b2] d2]. +simpl StateBeing. +intros. +generalize (in_split_move t1 t2 d2 d0 b2 H1). +intros. +split; right; elim H2; intros. +rewrite H4 in H3; elim (in_move__in_srcdst (x1, x2) t1); auto; intros. +simpl snd in H6 |-; simpl fst in H5 |-; (try in_tac). +unfold S1, StateToMove, StateBeing, StateDone. +simpl getsrc; (try in_tac). +elim (in_move__in_srcdst (x1, x2) ((s0, d0) :: b1)); (auto; intros). +simpl snd in H6 |-; simpl fst in H5 |-; (try in_tac). +unfold S1, StateToMove, StateBeing, StateDone. +simpl. +simpl in H5 |-. +elim H5; [intros H7; (try clear H5); (try exact H7) | intros H7; (try clear H5)]. +assert (In x1 (s0 :: nil)); simpl; auto. +right; in_tac. +right; in_tac. +inversion H4. +simpl. +subst b2. +rewrite H4 in H3. +elim (in_move__in_srcdst (x1, x2) t1); (auto; intros). +simpl snd in H7 |-. +right; in_tac. +unfold S1, StateToMove, StateBeing, StateDone. +elim (in_move__in_srcdst (x1, x2) ((s0, d0) :: b1)); auto; intros. +simpl snd in H6 |-; (try in_tac). +apply + (in_or_app (getdst ((s, d) :: t1)) (getdst ((s0, d0) :: b1) ++ getdst d1) x2); + right; (try in_tac). +caseEq b1. +intros h1 h2; inversion h2. +intros m l heq. +generalize (app_rewriter _ l m); intros [y [r H2]]; rewrite H2. +destruct y as [y1 y2]. +rewrite last_app; simpl fst. +case (Loc.eq d0 y1). +unfold S1, StateToMove, StateBeing, StateDone. +generalize (last_replace r y1 y2). +unfold Move; intros H3 H6. +rewrite H3. +intro. +elim (in_app_or r ((T y1, y2) :: nil) (x1, x2)); auto. +intro. +rewrite heq; rewrite H2; (split; right). +elim (in_move__in_srcdst (x1, x2) r); auto; simpl fst; simpl snd; intros; + (try in_tac). +simpl. +rewrite getsrc_app; (right; (try in_tac)). +elim (in_move__in_srcdst (x1, x2) r); auto; simpl fst; simpl snd; intros; + (try in_tac). +simpl. +rewrite getdst_app; right; (try in_tac). +intros h; inversion h; inversion H5. +split; [left; simpl; auto | right]. +unfold T; case (Loc.type y1); auto. +subst y2. +rewrite heq; rewrite H2. +simpl. +rewrite getdst_app; simpl. +assert (In x2 (x2 :: nil)); [simpl; auto | right; (try in_tac)]. +unfold S1, StateToMove, StateBeing, StateDone. +intro; rewrite heq; rewrite H2; (split; right). +intros; elim (in_move__in_srcdst (x1, x2) (r ++ ((y1, y2) :: nil))); auto; + intros. +simpl snd in H5 |-; simpl fst in H4 |-. +simpl. +right; (try in_tac). +apply in_or_app; right; simpl; right; (try in_tac). +elim (in_move__in_srcdst (x1, x2) (r ++ ((y1, y2) :: nil))); auto; intros. +simpl snd in H5 |-. +simpl. +right; (try in_tac). +apply in_or_app; right; simpl; right; (try in_tac). +simpl stepf. +destruct m as [s d]. +destruct m0 as [s0 d0]. +case (Loc.eq s d0). +unfold S1, StateToMove, StateBeing, StateDone. +intros; elim (in_move__in_srcdst (x1, x2) d1); auto; intros. +simpl in H3 |-; simpl in H2 |-. +split; right; (try in_tac). +intro; caseEq (split_move t1 d0); intro. +destruct p as [[t2 b2] d2]. +simpl StateDone. +unfold S1, StateToMove, StateBeing, StateDone. +intros; elim (in_move__in_srcdst (x1, x2) d1); auto; intros. +simpl in H3 |-; simpl in H4 |-. +split; right; (try in_tac). +caseEq b1. +unfold S1, StateToMove, StateBeing, StateDone. +intros; elim (in_move__in_srcdst (x1, x2) ((s0, d0) :: d1)); auto; intros. +simpl in H5 |-; simpl in H4 |-; split; right; (try in_tac). +simpl. +elim H4; [intros H6; right; (try exact H6) | intros H6; (try clear H4)]. +assert (In x1 (x1 :: nil)); [simpl; auto | rewrite H6; (try in_tac)]. +right; (try in_tac). +elim H5; [intros H6; right; simpl; (try exact H6) | intros H6; (try clear H5)]. +assert (In x2 (x2 :: nil)); [simpl; auto | rewrite H6; (try in_tac)]. +try in_tac. +intros m l heq. +generalize (app_rewriter _ l m); intros [y [r H2]]; rewrite H2. +destruct y as [y1 y2]. +rewrite last_app; simpl fst. +case (Loc.eq d0 y1). +unfold S1, StateToMove, StateBeing, StateDone. +unfold S1, StateToMove, StateBeing, StateDone. +intros. +elim H3; intros. +inversion H4. +simpl; split; right; auto. +right; apply in_or_app; right; simpl; auto. +right; apply in_or_app; right; simpl; auto. +elim H4; intros. +inversion H5. +simpl; split; [right | left]. +rewrite heq; rewrite H2; simpl. +rewrite <- e; rewrite H7. +rewrite getsrc_app; simpl. +right; assert (In x1 (x1 :: nil)); [simpl; auto | try in_tac]. +unfold T; case (Loc.type x1); auto. +elim (in_move__in_srcdst (x1, x2) d1); (auto; intros). +simpl snd in H7 |-; simpl fst in H6 |-; split; right; (try in_tac). +unfold S1, StateToMove, StateBeing, StateDone. +intros; elim (in_move__in_srcdst (x1, x2) ((s0, d0) :: d1)); + (auto; simpl; intros). +split; right. +elim H4; [intros H6; right; (try exact H6) | intros H6; (try clear H4)]. +apply in_or_app; right; simpl; auto. +right; (try in_tac). +elim H5; [intros H6; right; (try exact H6) | intros H6; (try clear H5)]. +apply in_or_app; right; simpl; auto. +right; (try in_tac). +Qed. + +Lemma getsrc_f: forall s l, In s (getsrc l) -> (exists d , In (s, d) l ). +Proof. +induction l; simpl getsrc. +simpl; (intros h; elim h). +intros; destruct a as [a1 a2]. +simpl in H |-. +elim H; [intros H0; (try clear H); (try exact H0) | intros H0; (try clear H)]. +subst a1. +exists a2; simpl; auto. +simpl. +elim IHl; [intros d H; (try clear IHl); (try exact H) | idtac]; auto. +exists d; [right; (try assumption)]. +Qed. + +Lemma incl_src: forall l1 l2, incl l1 l2 -> incl (getsrc l1) (getsrc l2). +Proof. +intros. +unfold incl in H |-. +unfold incl. +intros a H0; (try assumption). +generalize (getsrc_f a). +intros H1; elim H1 with ( l := l1 ); + [intros d H2; (try clear H1); (try exact H2) | idtac]; auto. +assert (In (a, d) l2). +apply H; auto. +elim (in_move__in_srcdst (a, d) l2); auto. +Qed. + +Lemma getdst_f: forall d l, In d (getdst l) -> (exists s , In (s, d) l ). +Proof. +induction l; simpl getdst. +simpl; (intros h; elim h). +intros; destruct a as [a1 a2]. +simpl in H |-. +elim H; [intros H0; (try clear H); (try exact H0) | intros H0; (try clear H)]. +subst a2. +exists a1; simpl; auto. +simpl. +elim IHl; [intros s H; (try clear IHl); (try exact H) | idtac]; auto. +exists s; [right; (try assumption)]. +Qed. + +Lemma incl_dst: forall l1 l2, incl l1 l2 -> incl (getdst l1) (getdst l2). +Proof. +intros. +unfold incl in H |-. +unfold incl. +intros a H0; (try assumption). +generalize (getdst_f a). +intros H1; elim H1 with ( l := l1 ); + [intros d H2; (try clear H1); (try exact H2) | idtac]; auto. +assert (In (d, a) l2). +apply H; auto. +elim (in_move__in_srcdst (d, a) l2); auto. +Qed. + +Lemma src_tmp2_res: + forall S1 x1 x2, + In + (x1, x2) + (StateToMove (Pmov S1) ++ (StateBeing (Pmov S1) ++ StateDone (Pmov S1))) -> + (In x1 temporaries2 \/ + In x1 (getsrc (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1)))) /\ + (In x2 temporaries2 \/ + In x2 (getdst (StateToMove S1 ++ (StateBeing S1 ++ StateDone S1)))). +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1 Hrec. +destruct S1 as [[t b] d]; set (S1:=(t, b, d)). +unfold S1; rewrite Pmov_equation; intros. +destruct t. +destruct b. +apply srcdst_tmp2_stepf; auto. +elim Hrec with ( y := stepf S1 ) ( x1 := x1 ) ( x2 := x2 ); + [idtac | apply stepf1_dec; auto | auto]. +intros. +elim H1; [intros H2; (try clear H1); (try exact H2) | intros H2; (try clear H1)]. +elim H0; [intros H1; (try clear H0); (try exact H1) | intros H1; (try clear H0)]. +split; [left; (try assumption) | idtac]. +left; (try assumption). +elim (getsrc_f x1) with ( 1 := H1 ); intros x3 H3. +split; auto. +elim srcdst_tmp2_stepf with ( 1 := H3 ); auto. +elim H0; [intros H1; (try clear H0); (try exact H1) | intros H1; (try clear H0)]. +elim (getdst_f x2) with ( 1 := H2 ); intros x3 H3. +split; auto. +elim srcdst_tmp2_stepf with ( 1 := H3 ); auto. +elim (getsrc_f x1) with ( 1 := H1 ); intros x3 H3. +elim srcdst_tmp2_stepf with ( 1 := H3 ); auto. +clear H3. +elim (getdst_f x2) with ( 1 := H2 ); intros x4 H3. +elim srcdst_tmp2_stepf with ( 1 := H3 ); auto. +elim Hrec with ( y := stepf S1 ) ( x1 := x1 ) ( x2 := x2 ); + [idtac | apply stepf1_dec; auto | auto]. +intros. +elim H1; [intros H2; (try clear H1); (try exact H2) | intros H2; (try clear H1)]. +elim H0; [intros H1; (try clear H0); (try exact H1) | intros H1; (try clear H0)]. +split; [left; (try assumption) | idtac]. +left; (try assumption). +elim (getsrc_f x1) with ( 1 := H1 ); intros x3 H3. +split; auto. +elim srcdst_tmp2_stepf with ( 1 := H3 ); auto. +elim H0; [intros H1; (try clear H0); (try exact H1) | intros H1; (try clear H0)]. +elim (getdst_f x2) with ( 1 := H2 ); intros x3 H3. +split; auto. +elim srcdst_tmp2_stepf with ( 1 := H3 ); auto. +elim (getsrc_f x1) with ( 1 := H1 ); intros x3 H3. +elim srcdst_tmp2_stepf with ( 1 := H3 ); auto. +clear H3. +elim (getdst_f x2) with ( 1 := H2 ); intros x4 H3. +elim srcdst_tmp2_stepf with ( 1 := H3 ); auto. +Qed. + +Lemma wt_add_moves: + forall p b, + List.map Loc.type (getsrc p) = List.map Loc.type (getdst p) -> + locs_read_ok (getsrc p) -> + locs_write_ok (getdst p) -> + wt_block tf b -> + wt_block + tf + (fold_left + (fun (k0 : LTL.block) => + fun (p0 : loc * loc) => add_move (fst p0) (snd p0) k0) p b). +Proof. +induction p. +intros; simpl; auto. +intros; destruct a as [a1 a2]; simpl. +apply IHp; auto. +inversion H; auto. +simpl in H0 |-. +unfold locs_read_ok in H0 |-. +simpl in H0 |-. +unfold locs_read_ok; auto. +generalize H1; unfold locs_write_ok; simpl; auto. +apply wt_add_move; (try assumption). +simpl in H0 |-. +unfold locs_read_ok in H0 |-. +apply H0. +simpl; left; trivial. +unfold locs_write_ok in H1 |-; apply H1. +simpl; left; trivial. +inversion H; auto. +Qed. + +Lemma map_f_getsrc_getdst: + forall (b : Set) (f : Reg -> b) p, + map f (getsrc p) = map f (getdst p) -> + forall x1 x2, In (x1, x2) p -> f x1 = f x2. +Proof. +intros b f0 p; induction p; simpl; auto. +intros; contradiction. +destruct a. +simpl. +intros heq; injection heq. +intros h1 h2. +intros x1 x2 [H3|H3]. +injection H3. +intros; subst; auto. +apply IHp; auto. +Qed. + +Lemma wt_parallel_move': + forall p b, + List.map Loc.type (getsrc p) = List.map Loc.type (getdst p) -> + locs_read_ok (getsrc p) -> + locs_write_ok (getdst p) -> wt_block tf b -> wt_block tf (p_move p b). +Proof. +unfold p_move. +unfold P_move. +intros; apply wt_add_moves; auto. +rewrite getsrc_map; rewrite getdst_map. +rewrite list_map_compose. +rewrite list_map_compose. +apply list_map_exten. +generalize (move_types_res (p, nil, nil)); auto. +destruct x as [x1 x2]; simpl; intros; auto. +symmetry; apply H3. +simpl. +rewrite app_nil. +apply map_f_getsrc_getdst; auto. +in_tac. +unfold locs_read_ok. +intros l H3. +elim getsrc_f with ( 1 := H3 ); intros x3 H4. +elim (src_tmp2_res (p, nil, nil) l x3). +simpl. +rewrite app_nil. +intros [[H'|[H'|H']]|H'] _. +subst l; hnf; auto. +subst l; hnf; auto. +contradiction. +apply H0; auto. +in_tac. +intros l H3. +elim getdst_f with ( 1 := H3 ); intros x3 H4. +elim (src_tmp2_res (p, nil, nil) x3 l). +simpl. +rewrite app_nil. +intros _ [[H'|[H'|H']]|H']. +subst l; hnf; auto. +subst l; hnf; auto. +contradiction. +apply H1; auto. +in_tac. +Qed. + +Theorem wt_parallel_moveX: + forall srcs dsts b, + List.map Loc.type srcs = List.map Loc.type dsts -> + locs_read_ok srcs -> + locs_write_ok dsts -> wt_block tf b -> wt_block tf (parallel_move srcs dsts b). +Proof. +unfold parallel_move, parallel_move_order, P_move. +intros. +generalize (wt_parallel_move' (listsLoc2Moves srcs dsts)); intros H'. +unfold p_move, P_move in H' |-. +apply H'; auto. +elim (getdst_lists2moves srcs dsts); auto. +unfold Allocation.listsLoc2Moves, listsLoc2Moves. +intros heq1 heq2; rewrite heq1; rewrite heq2; auto. +repeat rewrite <- (list_length_map Loc.type). +rewrite H; auto. +elim (getdst_lists2moves srcs dsts); auto. +unfold Allocation.listsLoc2Moves, listsLoc2Moves. +intros heq1 heq2; rewrite heq1; auto. +repeat rewrite <- (list_length_map Loc.type). +rewrite H; auto. +elim (getdst_lists2moves srcs dsts); auto. +unfold Allocation.listsLoc2Moves, listsLoc2Moves. +intros heq1 heq2; rewrite heq2; auto. +repeat rewrite <- (list_length_map Loc.type). +rewrite H; auto. +Qed. + +End wt_move_correction. diff --git a/backend/CSE.v b/backend/CSE.v new file mode 100644 index 00000000..243f6dd7 --- /dev/null +++ b/backend/CSE.v @@ -0,0 +1,420 @@ +(** Common subexpression elimination over RTL. This optimization + proceeds by value numbering over extended basic blocks. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import Kildall. + +(** * Value numbering *) + +(** The idea behind value numbering algorithms is to associate + abstract identifiers (``value numbers'') to the contents of registers + at various program points, and record equations between these + identifiers. For instance, consider the instruction + [r1 = add(r2, r3)] and assume that [r2] and [r3] are mapped + to abstract identifiers [x] and [y] respectively at the program + point just before this instruction. At the program point just after, + we can add the equation [z = add(x, y)] and associate [r1] with [z], + where [z] is a fresh abstract identifier. However, if we already + knew an equation [u = add(x, y)], we can preferably add no equation + and just associate [r1] with [u]. If there exists a register [r4] + mapped with [u] at this point, we can then replace the instruction + [r1 = add(r2, r3)] by a move instruction [r1 = r4], therefore eliminating + a common subexpression and reusing the result of an earlier addition. + + Abstract identifiers / value numbers are represented by positive integers. + Equations are of the form [valnum = rhs], where the right-hand sides + [rhs] are either arithmetic operations or memory loads. *) + +Definition valnum := positive. + +Inductive rhs : Set := + | Op: operation -> list valnum -> rhs + | Load: memory_chunk -> addressing -> list valnum -> rhs. + +Definition eq_valnum: forall (x y: valnum), {x=y}+{x<>y} := peq. + +Definition eq_list_valnum (x y: list valnum) : {x=y}+{x<>y}. +Proof. + induction x; intros; case y; intros. + left; auto. + right; congruence. + right; congruence. + case (eq_valnum a v); intros. + case (IHx l); intros. + left; congruence. + right; congruence. + right; congruence. +Qed. + +Definition eq_rhs (x y: rhs) : {x=y}+{x<>y}. +Proof. + generalize Int.eq_dec; intro. + generalize Float.eq_dec; intro. + assert (forall (x y: ident), {x=y}+{x<>y}). exact peq. + assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality. + assert (forall (x y: condition), {x=y}+{x<>y}). decide equality. + assert (forall (x y: operation), {x=y}+{x<>y}). decide equality. + assert (forall (x y: memory_chunk), {x=y}+{x<>y}). decide equality. + assert (forall (x y: addressing), {x=y}+{x<>y}). decide equality. + generalize eq_valnum; intro. + generalize eq_list_valnum; intro. + decide equality. +Qed. + +(** A value numbering is a collection of equations between value numbers + plus a partial map from registers to value numbers. Additionally, + we maintain the next unused value number, so as to easily generate + fresh value numbers. *) + +Record numbering : Set := mknumbering { + num_next: valnum; + num_eqs: list (valnum * rhs); + num_reg: PTree.t valnum +}. + +Definition empty_numbering := + mknumbering 1%positive nil (PTree.empty valnum). + +(** [valnum_reg n r] returns the value number for the contents of + register [r]. If none exists, a fresh value number is returned + and associated with register [r]. The possibly updated numbering + is also returned. [valnum_regs] is similar, but for a list of + registers. *) + +Definition valnum_reg (n: numbering) (r: reg) : numbering * valnum := + match PTree.get r n.(num_reg) with + | Some v => (n, v) + | None => (mknumbering (Psucc n.(num_next)) + n.(num_eqs) + (PTree.set r n.(num_next) n.(num_reg)), + n.(num_next)) + end. + +Fixpoint valnum_regs (n: numbering) (rl: list reg) + {struct rl} : numbering * list valnum := + match rl with + | nil => + (n, nil) + | r1 :: rs => + let (n1, v1) := valnum_reg n r1 in + let (ns, vs) := valnum_regs n1 rs in + (ns, v1 :: vs) + end. + +(** [find_valnum_rhs rhs eqs] searches the list of equations [eqs] + for an equation of the form [vn = rhs] for some value number [vn]. + If found, [Some vn] is returned, otherwise [None] is returned. *) + +Fixpoint find_valnum_rhs (r: rhs) (eqs: list (valnum * rhs)) + {struct eqs} : option valnum := + match eqs with + | nil => None + | (v, r') :: eqs1 => + if eq_rhs r r' then Some v else find_valnum_rhs r eqs1 + end. + +(** [add_rhs n rd rhs] updates the value numbering [n] to reflect + the computation of the operation or load represented by [rhs] + and the storing of the result in register [rd]. If an equation + [vn = rhs] is known, register [rd] is set to [vn]. Otherwise, + a fresh value number [vn] is generated and associated with [rd], + and the equation [vn = rhs] is added. *) + +Definition add_rhs (n: numbering) (rd: reg) (rh: rhs) : numbering := + match find_valnum_rhs rh n.(num_eqs) with + | Some vres => + mknumbering n.(num_next) n.(num_eqs) + (PTree.set rd vres n.(num_reg)) + | None => + mknumbering (Psucc n.(num_next)) + ((n.(num_next), rh) :: n.(num_eqs)) + (PTree.set rd n.(num_next) n.(num_reg)) + end. + +(** [add_op n rd op rs] specializes [add_rhs] for the case of an + arithmetic operation. The right-hand side corresponding to [op] + and the value numbers for the argument registers [rs] is built + and added to [n] as described in [add_rhs]. + + If [op] is a move instruction, we simply assign the value number of + the source register to the destination register, since we know that + the source and destination registers have exactly the same value. + This enables more common subexpressions to be recognized. For instance: +<< + z = add(x, y); u = x; v = add(u, y); +>> + Since [u] and [x] have the same value number, the second [add] + is recognized as computing the same result as the first [add], + and therefore [u] and [z] have the same value number. *) + +Definition add_op (n: numbering) (rd: reg) (op: operation) (rs: list reg) := + match is_move_operation op rs with + | Some r => + let (n1, v) := valnum_reg n r in + mknumbering n1.(num_next) n1.(num_eqs) (PTree.set rd v n1.(num_reg)) + | None => + let (n1, vs) := valnum_regs n rs in + add_rhs n1 rd (Op op vs) + end. + +(** [add_load n rd chunk addr rs] specializes [add_rhs] for the case of a + memory load. The right-hand side corresponding to [chunk], [addr] + and the value numbers for the argument registers [rs] is built + and added to [n] as described in [add_rhs]. *) + +Definition add_load (n: numbering) (rd: reg) + (chunk: memory_chunk) (addr: addressing) + (rs: list reg) := + let (n1, vs) := valnum_regs n rs in + add_rhs n1 rd (Load chunk addr vs). + +(** [kill_load n] removes all equations involving memory loads. + It is used to reflect the effect of a memory store, which can + potentially invalidate all such equations. *) + +Fixpoint kill_load_eqs (eqs: list (valnum * rhs)) : list (valnum * rhs) := + match eqs with + | nil => nil + | (_, Load _ _ _) :: rem => kill_load_eqs rem + | v_rh :: rem => v_rh :: kill_load_eqs rem + end. + +Definition kill_loads (n: numbering) : numbering := + mknumbering n.(num_next) + (kill_load_eqs n.(num_eqs)) + n.(num_reg). + +(* [reg_valnum n vn] returns a register that is mapped to value number + [vn], or [None] if no such register exists. *) + +Definition reg_valnum (n: numbering) (vn: valnum) : option reg := + PTree.fold + (fun (res: option reg) (r: reg) (v: valnum) => + if peq v vn then Some r else res) + n.(num_reg) None. + +(* [find_rhs] and its specializations [find_op] and [find_load] + return a register that already holds the result of the given arithmetic + operation or memory load, according to the given numbering. + [None] is returned if no such register exists. *) + +Definition find_rhs (n: numbering) (rh: rhs) : option reg := + match find_valnum_rhs rh n.(num_eqs) with + | None => None + | Some vres => reg_valnum n vres + end. + +Definition find_op + (n: numbering) (op: operation) (rs: list reg) : option reg := + let (n1, vl) := valnum_regs n rs in + find_rhs n1 (Op op vl). + +Definition find_load + (n: numbering) (chunk: memory_chunk) (addr: addressing) (rs: list reg) : option reg := + let (n1, vl) := valnum_regs n rs in + find_rhs n1 (Load chunk addr vl). + +(** * The static analysis *) + +(** We now define a notion of satisfiability of a numbering. This semantic + notion plays a central role in the correctness proof (see [CSEproof]), + but is defined here because we need it to define the ordering over + numberings used in the static analysis. + + A numbering is satisfiable in a given register environment and memory + state if there exists a valuation, mapping value numbers to actual values, + that validates both the equations and the association of registers + to value numbers. *) + +Definition equation_holds + (valuation: valnum -> val) + (ge: genv) (sp: val) (m: mem) + (vres: valnum) (rh: rhs) : Prop := + match rh with + | Op op vl => + eval_operation ge sp op (List.map valuation vl) = + Some (valuation vres) + | Load chunk addr vl => + exists a, + eval_addressing ge sp addr (List.map valuation vl) = Some a /\ + loadv chunk m a = Some (valuation vres) + end. + +Definition numbering_holds + (valuation: valnum -> val) + (ge: genv) (sp: val) (rs: regset) (m: mem) (n: numbering) : Prop := + (forall vn rh, + In (vn, rh) n.(num_eqs) -> + equation_holds valuation ge sp m vn rh) + /\ (forall r vn, + PTree.get r n.(num_reg) = Some vn -> rs#r = valuation vn). + +Definition numbering_satisfiable + (ge: genv) (sp: val) (rs: regset) (m: mem) (n: numbering) : Prop := + exists valuation, numbering_holds valuation ge sp rs m n. + +Lemma empty_numbering_satisfiable: + forall ge sp rs m, numbering_satisfiable ge sp rs m empty_numbering. +Proof. + intros; red. + exists (fun (vn: valnum) => Vundef). split; simpl; intros. + elim H. + rewrite PTree.gempty in H. discriminate. +Qed. + +(** We now equip the type [numbering] with a partial order and a greatest + element. The partial order is based on entailment: [n1] is greater + than [n2] if [n1] is satisfiable whenever [n2] is. The greatest element + is, of course, the empty numbering (no equations). *) + +Module Numbering. + Definition t := numbering. + Definition ge (n1 n2: numbering) : Prop := + forall ge sp rs m, + numbering_satisfiable ge sp rs m n2 -> + numbering_satisfiable ge sp rs m n1. + Definition top := empty_numbering. + Lemma top_ge: forall x, ge top x. + Proof. + intros; red; intros. unfold top. apply empty_numbering_satisfiable. + Qed. + Lemma refl_ge: forall x, ge x x. + Proof. + intros; red; auto. + Qed. +End Numbering. + +(** We reuse the solver for forward dataflow inequations based on + propagation over extended basic blocks defined in library [Kildall]. *) + +Module Solver := BBlock_solver(Numbering). + +(** The transfer function for the dataflow analysis returns the numbering + ``after'' execution of the instruction at [pc], as a function of the + numbering ``before''. For [Iop] and [Iload] instructions, we add + equations or reuse existing value numbers as described for + [add_op] and [add_load]. For [Istore] instructions, we forget + all equations involving memory loads. For [Icall] instructions, + we could simply associate a fresh, unconstrained by equations value number + to the result register. However, it is often undesirable to eliminate + common subexpressions across a function call (there is a risk of + increasing too much the register pressure across the call), so we + just forget all equations and start afresh with an empty numbering. + Finally, the remaining instructions modify neither registers nor + the memory, so we keep the numbering unchanged. *) + +Definition transfer (f: function) (pc: node) (before: numbering) := + match f.(fn_code)!pc with + | None => before + | Some i => + match i with + | Inop s => + before + | Iop op args res s => + add_op before res op args + | Iload chunk addr args dst s => + add_load before dst chunk addr args + | Istore chunk addr args src s => + kill_loads before + | Icall sig ros args res s => + empty_numbering + | Icond cond args ifso ifnot => + before + | Ireturn optarg => + before + end + end. + +(** The static analysis solves the dataflow inequations implied + by the [transfer] function using the ``extended basic block'' solver, + which produces sub-optimal solutions quickly. The result is + a mapping from program points to numberings. In the unlikely + case where the solver fails to find a solution, we simply associate + empty numberings to all program points, which is semantically correct + and effectively deactivates the CSE optimization. *) + +Definition analyze (f: RTL.function): PMap.t numbering := + match Solver.fixpoint (successors f) f.(fn_nextpc) + (transfer f) f.(fn_entrypoint) with + | None => PMap.init empty_numbering + | Some res => res + end. + +(** * Code transformation *) + +(** Some operations are so cheap to compute that it is generally not + worth reusing their results. These operations are detected by the + function below. *) + +Definition is_trivial_op (op: operation) : bool := + match op with + | Omove => true + | Ointconst _ => true + | Oaddrsymbol _ _ => true + | Oaddrstack _ => true + | Oundef => true + | _ => false + end. + +(** The code transformation is performed instruction by instruction. + [Iload] instructions and non-trivial [Iop] instructions are turned + into move instructions if their result is already available in a + register, as indicated by the numbering inferred at that program point. *) + +Definition transf_instr (n: numbering) (instr: instruction) := + match instr with + | Iop op args res s => + if is_trivial_op op then instr else + match find_op n op args with + | None => instr + | Some r => Iop Omove (r :: nil) res s + end + | Iload chunk addr args dst s => + match find_load n chunk addr args with + | None => instr + | Some r => Iop Omove (r :: nil) dst s + end + | _ => + instr + end. + +Definition transf_code (approxs: PMap.t numbering) (instrs: code) : code := + PTree.map (fun pc instr => transf_instr approxs!!pc instr) instrs. + +Lemma transf_code_wf: + forall f approxs, + (forall pc, Plt pc f.(fn_nextpc) \/ f.(fn_code)!pc = None) -> + (forall pc, Plt pc f.(fn_nextpc) + \/ (transf_code approxs f.(fn_code))!pc = None). +Proof. + intros. + elim (H pc); intro. + left; auto. + right. unfold transf_code. rewrite PTree.gmap. + unfold option_map; rewrite H0. reflexivity. +Qed. + +Definition transf_function (f: function) : function := + let approxs := analyze f in + mkfunction + f.(fn_sig) + f.(fn_params) + f.(fn_stacksize) + (transf_code approxs f.(fn_code)) + f.(fn_entrypoint) + f.(fn_nextpc) + (transf_code_wf f approxs f.(fn_code_wf)). + +Definition transf_program (p: program) : program := + transform_program transf_function p. + diff --git a/backend/CSEproof.v b/backend/CSEproof.v new file mode 100644 index 00000000..db8a973b --- /dev/null +++ b/backend/CSEproof.v @@ -0,0 +1,845 @@ +(** Correctness proof for common subexpression elimination. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import Kildall. +Require Import CSE. + +(** * Semantic properties of value numberings *) + +(** ** Well-formedness of numberings *) + +(** A numbering is well-formed if all registers mentioned in equations + are less than the ``next'' register number given in the numbering. + This guarantees that the next register is fresh with respect to + the equations. *) + +Definition wf_rhs (next: valnum) (rh: rhs) : Prop := + match rh with + | Op op vl => forall v, In v vl -> Plt v next + | Load chunk addr vl => forall v, In v vl -> Plt v next + end. + +Definition wf_equation (next: valnum) (vr: valnum) (rh: rhs) : Prop := + Plt vr next /\ wf_rhs next rh. + +Definition wf_numbering (n: numbering) : Prop := + (forall v rh, + In (v, rh) n.(num_eqs) -> wf_equation n.(num_next) v rh) +/\ + (forall r v, + PTree.get r n.(num_reg) = Some v -> Plt v n.(num_next)). + +Lemma wf_empty_numbering: + wf_numbering empty_numbering. +Proof. + unfold empty_numbering; split; simpl; intros. + elim H. + rewrite PTree.gempty in H. congruence. +Qed. + +Lemma wf_rhs_increasing: + forall next1 next2 rh, + Ple next1 next2 -> + wf_rhs next1 rh -> wf_rhs next2 rh. +Proof. + intros; destruct rh; simpl; intros; apply Plt_Ple_trans with next1; auto. +Qed. + +Lemma wf_equation_increasing: + forall next1 next2 vr rh, + Ple next1 next2 -> + wf_equation next1 vr rh -> wf_equation next2 vr rh. +Proof. + intros. elim H0; intros. split. + apply Plt_Ple_trans with next1; auto. + apply wf_rhs_increasing with next1; auto. +Qed. + +(** We now show that all operations over numberings + preserve well-formedness. *) + +Lemma wf_valnum_reg: + forall n r n' v, + wf_numbering n -> + valnum_reg n r = (n', v) -> + wf_numbering n' /\ Plt v n'.(num_next) /\ Ple n.(num_next) n'.(num_next). +Proof. + intros until v. intros WF. inversion WF. + generalize (H0 r v). + unfold valnum_reg. destruct ((num_reg n)!r). + intros. replace n' with n. split. auto. + split. apply H1. congruence. + apply Ple_refl. + congruence. + intros. inversion H2. simpl. split. + split; simpl; intros. + apply wf_equation_increasing with (num_next n). apply Ple_succ. auto. + rewrite PTree.gsspec in H3. destruct (peq r0 r). + replace v0 with (num_next n). apply Plt_succ. congruence. + apply Plt_trans_succ; eauto. + split. apply Plt_succ. apply Ple_succ. +Qed. + +Lemma wf_valnum_regs: + forall rl n n' vl, + wf_numbering n -> + valnum_regs n rl = (n', vl) -> + wf_numbering n' /\ + (forall v, In v vl -> Plt v n'.(num_next)) /\ + Ple n.(num_next) n'.(num_next). +Proof. + induction rl; intros. + simpl in H0. inversion H0. subst n'; subst vl. + simpl. intuition. + simpl in H0. + caseEq (valnum_reg n a). intros n1 v1 EQ1. + caseEq (valnum_regs n1 rl). intros ns vs EQS. + rewrite EQ1 in H0; rewrite EQS in H0; simpl in H0. + inversion H0. subst n'; subst vl. + generalize (wf_valnum_reg _ _ _ _ H EQ1); intros [A1 [B1 C1]]. + generalize (IHrl _ _ _ A1 EQS); intros [As [Bs Cs]]. + split. auto. + split. simpl; intros. elim H1; intro. + subst v. eapply Plt_Ple_trans; eauto. + auto. + eapply Ple_trans; eauto. +Qed. + +Lemma find_valnum_rhs_correct: + forall rh vn eqs, + find_valnum_rhs rh eqs = Some vn -> In (vn, rh) eqs. +Proof. + induction eqs; simpl. + congruence. + case a; intros v r'. case (eq_rhs rh r'); intro. + intro. left. congruence. + intro. right. auto. +Qed. + +Lemma wf_add_rhs: + forall n rd rh, + wf_numbering n -> + wf_rhs n.(num_next) rh -> + wf_numbering (add_rhs n rd rh). +Proof. + intros. inversion H. unfold add_rhs. + caseEq (find_valnum_rhs rh n.(num_eqs)); intros. + split; simpl. assumption. + intros r v0. rewrite PTree.gsspec. case (peq r rd); intros. + inversion H4. subst v0. + elim (H1 v rh (find_valnum_rhs_correct _ _ _ H3)). auto. + eauto. + split; simpl. + intros v rh0 [A1|A2]. inversion A1. subst rh0. + split. apply Plt_succ. apply wf_rhs_increasing with n.(num_next). + apply Ple_succ. auto. + apply wf_equation_increasing with n.(num_next). apply Ple_succ. auto. + intros r v. rewrite PTree.gsspec. case (peq r rd); intro. + intro. inversion H4. apply Plt_succ. + intro. apply Plt_trans_succ. eauto. +Qed. + +Lemma wf_add_op: + forall n rd op rs, + wf_numbering n -> + wf_numbering (add_op n rd op rs). +Proof. + intros. unfold add_op. + case (is_move_operation op rs). + intro r. caseEq (valnum_reg n r); intros n' v EQ. + destruct (wf_valnum_reg _ _ _ _ H EQ) as [[A1 A2] [B C]]. + split; simpl. assumption. intros until v0. rewrite PTree.gsspec. + case (peq r0 rd); intros. replace v0 with v. auto. congruence. + eauto. + caseEq (valnum_regs n rs). intros n' vl EQ. + generalize (wf_valnum_regs _ _ _ _ H EQ). intros [A [B C]]. + apply wf_add_rhs; auto. +Qed. + +Lemma wf_add_load: + forall n rd chunk addr rs, + wf_numbering n -> + wf_numbering (add_load n rd chunk addr rs). +Proof. + intros. unfold add_load. + caseEq (valnum_regs n rs). intros n' vl EQ. + generalize (wf_valnum_regs _ _ _ _ H EQ). intros [A [B C]]. + apply wf_add_rhs; auto. +Qed. + +Lemma kill_load_eqs_incl: + forall eqs, List.incl (kill_load_eqs eqs) eqs. +Proof. + induction eqs; simpl; intros. + apply incl_refl. + destruct a. destruct r. apply incl_same_head; auto. + auto. + apply incl_tl. auto. +Qed. + +Lemma wf_kill_loads: + forall n, wf_numbering n -> wf_numbering (kill_loads n). +Proof. + intros. inversion H. unfold kill_loads; split; simpl; intros. + apply H0. apply kill_load_eqs_incl. auto. + eauto. +Qed. + +Lemma wf_transfer: + forall f pc n, wf_numbering n -> wf_numbering (transfer f pc n). +Proof. + intros. unfold transfer. + destruct (f.(fn_code)!pc); auto. + destruct i; auto. + apply wf_add_op; auto. + apply wf_add_load; auto. + apply wf_kill_loads; auto. + apply wf_empty_numbering. +Qed. + +(** As a consequence, the numberings computed by the static analysis + are well formed. *) + +Theorem wf_analyze: + forall f pc, wf_numbering (analyze f)!!pc. +Proof. + unfold analyze; intros. + caseEq (Solver.fixpoint (successors f) (fn_nextpc f) + (transfer f) (fn_entrypoint f)). + intros approx EQ. + eapply Solver.fixpoint_invariant with (P := wf_numbering); eauto. + exact wf_empty_numbering. + exact (wf_transfer f). + intro. rewrite PMap.gi. apply wf_empty_numbering. +Qed. + +(** ** Properties of satisfiability of numberings *) + +Module ValnumEq. + Definition t := valnum. + Definition eq := peq. +End ValnumEq. + +Module VMap := EMap(ValnumEq). + +Section SATISFIABILITY. + +Variable ge: genv. +Variable sp: val. +Variable m: mem. + +(** Agremment between two mappings from value numbers to values + up to a given value number. *) + +Definition valu_agree (valu1 valu2: valnum -> val) (upto: valnum) : Prop := + forall v, Plt v upto -> valu2 v = valu1 v. + +Lemma valu_agree_refl: + forall valu upto, valu_agree valu valu upto. +Proof. + intros; red; auto. +Qed. + +Lemma valu_agree_trans: + forall valu1 valu2 valu3 upto12 upto23, + valu_agree valu1 valu2 upto12 -> + valu_agree valu2 valu3 upto23 -> + Ple upto12 upto23 -> + valu_agree valu1 valu3 upto12. +Proof. + intros; red; intros. transitivity (valu2 v). + apply H0. apply Plt_Ple_trans with upto12; auto. + apply H; auto. +Qed. + +Lemma valu_agree_list: + forall valu1 valu2 upto vl, + valu_agree valu1 valu2 upto -> + (forall v, In v vl -> Plt v upto) -> + map valu2 vl = map valu1 vl. +Proof. + intros. apply list_map_exten. intros. symmetry. apply H. auto. +Qed. + +(** The [numbering_holds] predicate (defined in file [CSE]) is + extensional with respect to [valu_agree]. *) + +Lemma numbering_holds_exten: + forall valu1 valu2 n rs, + valu_agree valu1 valu2 n.(num_next) -> + wf_numbering n -> + numbering_holds valu1 ge sp rs m n -> + numbering_holds valu2 ge sp rs m n. +Proof. + intros. inversion H0. inversion H1. split; intros. + generalize (H2 _ _ H6). intro WFEQ. + generalize (H4 _ _ H6). + unfold equation_holds; destruct rh. + elim WFEQ; intros. + rewrite (valu_agree_list valu1 valu2 n.(num_next)). + rewrite H. auto. auto. auto. exact H8. + elim WFEQ; intros. + rewrite (valu_agree_list valu1 valu2 n.(num_next)). + rewrite H. auto. auto. auto. exact H8. + rewrite H. auto. eauto. +Qed. + +(** [valnum_reg] and [valnum_regs] preserve the [numbering_holds] predicate. + Moreover, it is always the case that the returned value number has + the value of the given register in the final assignment of values to + value numbers. *) + +Lemma valnum_reg_holds: + forall valu1 rs n r n' v, + wf_numbering n -> + numbering_holds valu1 ge sp rs m n -> + valnum_reg n r = (n', v) -> + exists valu2, + numbering_holds valu2 ge sp rs m n' /\ + valu2 v = rs#r /\ + valu_agree valu1 valu2 n.(num_next). +Proof. + intros until v. unfold valnum_reg. + caseEq (n.(num_reg)!r). + (* Register already has a value number *) + intros. inversion H2. subst n'; subst v0. + inversion H1. + exists valu1. split. auto. + split. symmetry. auto. + apply valu_agree_refl. + (* Register gets a fresh value number *) + intros. inversion H2. subst n'. subst v. inversion H1. + set (valu2 := VMap.set n.(num_next) rs#r valu1). + assert (AG: valu_agree valu1 valu2 n.(num_next)). + red; intros. unfold valu2. apply VMap.gso. + auto with coqlib. + elim (numbering_holds_exten _ _ _ _ AG H0 H1); intros. + exists valu2. + split. split; simpl; intros. auto. + unfold valu2, VMap.set, ValnumEq.eq. + rewrite PTree.gsspec in H7. destruct (peq r0 r). + inversion H7. rewrite peq_true. congruence. + case (peq vn (num_next n)); intro. + inversion H0. generalize (H9 _ _ H7). rewrite e. intro. + elim (Plt_strict _ H10). + auto. + split. unfold valu2. apply VMap.gss. + auto. +Qed. + +Lemma valnum_regs_holds: + forall rs rl valu1 n n' vl, + wf_numbering n -> + numbering_holds valu1 ge sp rs m n -> + valnum_regs n rl = (n', vl) -> + exists valu2, + numbering_holds valu2 ge sp rs m n' /\ + List.map valu2 vl = rs##rl /\ + valu_agree valu1 valu2 n.(num_next). +Proof. + induction rl; simpl; intros. + (* base case *) + inversion H1; subst n'; subst vl. + exists valu1. split. auto. split. reflexivity. apply valu_agree_refl. + (* inductive case *) + caseEq (valnum_reg n a); intros n1 v1 EQ1. + caseEq (valnum_regs n1 rl); intros ns vs EQs. + rewrite EQ1 in H1; rewrite EQs in H1. inversion H1. subst vl; subst n'. + generalize (valnum_reg_holds _ _ _ _ _ _ H H0 EQ1). + intros [valu2 [A [B C]]]. + generalize (wf_valnum_reg _ _ _ _ H EQ1). intros [D [E F]]. + generalize (IHrl _ _ _ _ D A EQs). + intros [valu3 [P [Q R]]]. + exists valu3. + split. auto. + split. simpl. rewrite R. congruence. auto. + eapply valu_agree_trans; eauto. +Qed. + +(** A reformulation of the [equation_holds] predicate in terms + of the value to which a right-hand side of an equation evaluates. *) + +Definition rhs_evals_to + (valu: valnum -> val) (rh: rhs) (v: val) : Prop := + match rh with + | Op op vl => + eval_operation ge sp op (List.map valu vl) = Some v + | Load chunk addr vl => + exists a, + eval_addressing ge sp addr (List.map valu vl) = Some a /\ + loadv chunk m a = Some v + end. + +Lemma equation_evals_to_holds_1: + forall valu rh v vres, + rhs_evals_to valu rh v -> + equation_holds valu ge sp m vres rh -> + valu vres = v. +Proof. + intros until vres. unfold rhs_evals_to, equation_holds. + destruct rh. congruence. + intros [a1 [A1 B1]] [a2 [A2 B2]]. congruence. +Qed. + +Lemma equation_evals_to_holds_2: + forall valu rh v vres, + wf_rhs vres rh -> + rhs_evals_to valu rh v -> + equation_holds (VMap.set vres v valu) ge sp m vres rh. +Proof. + intros until vres. unfold wf_rhs, rhs_evals_to, equation_holds. + rewrite VMap.gss. + assert (forall vl: list valnum, + (forall v, In v vl -> Plt v vres) -> + map (VMap.set vres v valu) vl = map valu vl). + intros. apply list_map_exten. intros. + symmetry. apply VMap.gso. apply Plt_ne. auto. + destruct rh; intros; rewrite H; auto. +Qed. + +(** The numbering obtained by adding an equation [rd = rhs] is satisfiable + in a concrete register set where [rd] is set to the value of [rhs]. *) + +Lemma add_rhs_satisfiable: + forall n rh valu1 rs rd v, + wf_numbering n -> + wf_rhs n.(num_next) rh -> + numbering_holds valu1 ge sp rs m n -> + rhs_evals_to valu1 rh v -> + numbering_satisfiable ge sp (rs#rd <- v) m (add_rhs n rd rh). +Proof. + intros. unfold add_rhs. + caseEq (find_valnum_rhs rh n.(num_eqs)). + (* RHS found *) + intros vres FINDVN. inversion H1. + exists valu1. split; simpl; intros. + auto. + rewrite Regmap.gsspec. + rewrite PTree.gsspec in H5. + destruct (peq r rd). + symmetry. eapply equation_evals_to_holds_1; eauto. + apply H3. apply find_valnum_rhs_correct. congruence. + auto. + (* RHS not found *) + intro FINDVN. + set (valu2 := VMap.set n.(num_next) v valu1). + assert (AG: valu_agree valu1 valu2 n.(num_next)). + red; intros. unfold valu2. apply VMap.gso. + auto with coqlib. + elim (numbering_holds_exten _ _ _ _ AG H H1); intros. + exists valu2. split; simpl; intros. + elim H5; intro. + inversion H6; subst vn; subst rh0. + unfold valu2. eapply equation_evals_to_holds_2; eauto. + auto. + rewrite Regmap.gsspec. rewrite PTree.gsspec in H5. destruct (peq r rd). + unfold valu2. inversion H5. symmetry. apply VMap.gss. + auto. +Qed. + +(** [add_op] returns a numbering that is satisfiable in the register + set after execution of the corresponding [Iop] instruction. *) + +Lemma add_op_satisfiable: + forall n rs op args dst v, + wf_numbering n -> + numbering_satisfiable ge sp rs m n -> + eval_operation ge sp op rs##args = Some v -> + numbering_satisfiable ge sp (rs#dst <- v) m (add_op n dst op args). +Proof. + intros. inversion H0. + unfold add_op. + caseEq (@is_move_operation reg op args). + intros arg EQ. + destruct (is_move_operation_correct _ _ EQ) as [A B]. subst op args. + caseEq (valnum_reg n arg). intros n1 v1 VL. + generalize (valnum_reg_holds _ _ _ _ _ _ H H2 VL). intros [valu2 [A [B C]]]. + generalize (wf_valnum_reg _ _ _ _ H VL). intros [D [E F]]. + elim A; intros. exists valu2; split; simpl; intros. + auto. rewrite Regmap.gsspec. rewrite PTree.gsspec in H5. + destruct (peq r dst). simpl in H1. congruence. auto. + intro NEQ. caseEq (valnum_regs n args). intros n1 vl VRL. + generalize (valnum_regs_holds _ _ _ _ _ _ H H2 VRL). intros [valu2 [A [B C]]]. + generalize (wf_valnum_regs _ _ _ _ H VRL). intros [D [E F]]. + apply add_rhs_satisfiable with valu2; auto. + simpl. congruence. +Qed. + +(** [add_load] returns a numbering that is satisfiable in the register + set after execution of the corresponding [Iload] instruction. *) + +Lemma add_load_satisfiable: + forall n rs chunk addr args dst a v, + wf_numbering n -> + numbering_satisfiable ge sp rs m n -> + eval_addressing ge sp addr rs##args = Some a -> + loadv chunk m a = Some v -> + numbering_satisfiable ge sp + (rs#dst <- v) + m (add_load n dst chunk addr args). +Proof. + intros. inversion H0. + unfold add_load. + caseEq (valnum_regs n args). intros n1 vl VRL. + generalize (valnum_regs_holds _ _ _ _ _ _ H H3 VRL). intros [valu2 [A [B C]]]. + generalize (wf_valnum_regs _ _ _ _ H VRL). intros [D [E F]]. + apply add_rhs_satisfiable with valu2; auto. + simpl. exists a; split; congruence. +Qed. + +(** [kill_load] preserves satisfiability. Moreover, the resulting numbering + is satisfiable in any concrete memory state. *) + +Lemma kill_load_eqs_ops: + forall v rhs eqs, + In (v, rhs) (kill_load_eqs eqs) -> + match rhs with Op _ _ => True | Load _ _ _ => False end. +Proof. + induction eqs; simpl; intros. + elim H. + destruct a. destruct r. + elim H; intros. inversion H0; subst v0; subst rhs. auto. + apply IHeqs. auto. + apply IHeqs. auto. +Qed. + +Lemma kill_load_satisfiable: + forall n rs m', + numbering_satisfiable ge sp rs m n -> + numbering_satisfiable ge sp rs m' (kill_loads n). +Proof. + intros. inversion H. inversion H0. + generalize (kill_load_eqs_incl n.(num_eqs)). intro. + exists x. split; intros. + generalize (H1 _ _ (H3 _ H4)). + generalize (kill_load_eqs_ops _ _ _ H4). + destruct rh; simpl. auto. tauto. + apply H2. assumption. +Qed. + +(** Correctness of [reg_valnum]: if it returns a register [r], + that register correctly maps back to the given value number. *) + +Lemma reg_valnum_correct: + forall n v r, reg_valnum n v = Some r -> n.(num_reg)!r = Some v. +Proof. + intros until r. unfold reg_valnum. rewrite PTree.fold_spec. + assert(forall l acc0, + List.fold_left + (fun (acc: option reg) (p: reg * valnum) => + if peq (snd p) v then Some (fst p) else acc) + l acc0 = Some r -> + In (r, v) l \/ acc0 = Some r). + induction l; simpl. + intros. tauto. + case a; simpl; intros r1 v1 acc0 FL. + generalize (IHl _ FL). + case (peq v1 v); intro. + subst v1. intros [A|B]. tauto. inversion B; subst r1. tauto. + tauto. + intro. elim (H _ _ H0); intro. + apply PTree.elements_complete; auto. + discriminate. +Qed. + +(** Correctness of [find_op] and [find_load]: if successful and in a + satisfiable numbering, the returned register does contain the + result value of the operation or memory load. *) + +Lemma find_rhs_correct: + forall valu rs n rh r, + numbering_holds valu ge sp rs m n -> + find_rhs n rh = Some r -> + rhs_evals_to valu rh rs#r. +Proof. + intros until r. intros NH. + unfold find_rhs. + caseEq (find_valnum_rhs rh n.(num_eqs)); intros. + generalize (find_valnum_rhs_correct _ _ _ H); intro. + generalize (reg_valnum_correct _ _ _ H0); intro. + inversion NH. + generalize (H3 _ _ H1). rewrite (H4 _ _ H2). + destruct rh; simpl; auto. + discriminate. +Qed. + +Lemma find_op_correct: + forall rs n op args r, + wf_numbering n -> + numbering_satisfiable ge sp rs m n -> + find_op n op args = Some r -> + eval_operation ge sp op rs##args = Some rs#r. +Proof. + intros until r. intros WF [valu NH]. + unfold find_op. caseEq (valnum_regs n args). intros n' vl VR FIND. + generalize (valnum_regs_holds _ _ _ _ _ _ WF NH VR). + intros [valu2 [NH2 [EQ AG]]]. + rewrite <- EQ. + change (rhs_evals_to valu2 (Op op vl) rs#r). + eapply find_rhs_correct; eauto. +Qed. + +Lemma find_load_correct: + forall rs n chunk addr args r, + wf_numbering n -> + numbering_satisfiable ge sp rs m n -> + find_load n chunk addr args = Some r -> + exists a, + eval_addressing ge sp addr rs##args = Some a /\ + loadv chunk m a = Some rs#r. +Proof. + intros until r. intros WF [valu NH]. + unfold find_load. caseEq (valnum_regs n args). intros n' vl VR FIND. + generalize (valnum_regs_holds _ _ _ _ _ _ WF NH VR). + intros [valu2 [NH2 [EQ AG]]]. + rewrite <- EQ. + change (rhs_evals_to valu2 (Load chunk addr vl) rs#r). + eapply find_rhs_correct; eauto. +Qed. + +End SATISFIABILITY. + +(** The transfer function preserves satisfiability of numberings. *) + +Lemma transfer_correct: + forall ge c sp pc rs m pc' rs' m' f n, + exec_instr ge c sp pc rs m pc' rs' m' -> + c = f.(fn_code) -> + wf_numbering n -> + numbering_satisfiable ge sp rs m n -> + numbering_satisfiable ge sp rs' m' (transfer f pc n). +Proof. + induction 1; intros; subst c; unfold transfer; rewrite H; auto. + (* Iop *) + eapply add_op_satisfiable; eauto. + (* Iload *) + eapply add_load_satisfiable; eauto. + (* Istore *) + eapply kill_load_satisfiable; eauto. + (* Icall *) + apply empty_numbering_satisfiable. +Qed. + +(** The numberings associated to each instruction by the static analysis + are inductively satisfiable, in the following sense: the numbering + at the function entry point is satisfiable, and for any RTL execution + from [pc] to [pc'], satisfiability at [pc] implies + satisfiability at [pc']. *) + +Theorem analysis_correct_1: + forall ge c sp pc rs m pc' rs' m' f, + exec_instr ge c sp pc rs m pc' rs' m' -> + c = f.(fn_code) -> + numbering_satisfiable ge sp rs m (analyze f)!!pc -> + numbering_satisfiable ge sp rs' m' (analyze f)!!pc'. +Proof. + intros until f. intros EXEC CODE. + generalize (wf_analyze f pc). + unfold analyze. + caseEq (Solver.fixpoint (successors f) (fn_nextpc f) + (transfer f) (fn_entrypoint f)). + intros res FIXPOINT WF NS. + assert (numbering_satisfiable ge sp rs' m' (transfer f pc res!!pc)). + eapply transfer_correct; eauto. + assert (Numbering.ge res!!pc' (transfer f pc res!!pc)). + eapply Solver.fixpoint_solution; eauto. + elim (fn_code_wf f pc); intro. auto. + rewrite <- CODE in H0. + elim (exec_instr_present _ _ _ _ _ _ _ _ _ EXEC H0). + rewrite CODE in EXEC. eapply successors_correct; eauto. + apply H0. auto. + intros. rewrite PMap.gi. apply empty_numbering_satisfiable. +Qed. + +Theorem analysis_correct_N: + forall ge c sp pc rs m pc' rs' m' f, + exec_instrs ge c sp pc rs m pc' rs' m' -> + c = f.(fn_code) -> + numbering_satisfiable ge sp rs m (analyze f)!!pc -> + numbering_satisfiable ge sp rs' m' (analyze f)!!pc'. +Proof. + induction 1; intros. + assumption. + eapply analysis_correct_1; eauto. + eauto. +Qed. + +Theorem analysis_correct_entry: + forall ge sp rs m f, + numbering_satisfiable ge sp rs m (analyze f)!!(f.(fn_entrypoint)). +Proof. + intros. + replace ((analyze f)!!(f.(fn_entrypoint))) + with empty_numbering. + apply empty_numbering_satisfiable. + unfold analyze. + caseEq (Solver.fixpoint (successors f) (fn_nextpc f) + (transfer f) (fn_entrypoint f)). + intros res FIXPOINT. + symmetry. change empty_numbering with Solver.L.top. + eapply Solver.fixpoint_entry; eauto. + intros. symmetry. apply PMap.gi. +Qed. + +(** * Semantic preservation *) + +Section PRESERVATION. + +Variable prog: program. +Let tprog := transf_program prog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma symbols_preserved: + forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof (Genv.find_symbol_transf transf_function prog). + +Lemma functions_translated: + forall (v: val) (f: RTL.function), + Genv.find_funct ge v = Some f -> + Genv.find_funct tge v = Some (transf_function f). +Proof (@Genv.find_funct_transf _ _ transf_function prog). + +Lemma funct_ptr_translated: + forall (b: block) (f: RTL.function), + Genv.find_funct_ptr ge b = Some f -> + Genv.find_funct_ptr tge b = Some (transf_function f). +Proof (@Genv.find_funct_ptr_transf _ _ transf_function prog). + +(** The proof of semantic preservation is a simulation argument using + diagrams of the following form: +<< + pc, rs, m ------------------------ pc, rs, m + | | + | | + v v + pc', rs', m' --------------------- pc', rs', m' +>> + Left: RTL execution in the original program. Right: RTL execution in + the optimized program. Precondition (top): the numbering at [pc] + (returned by the static analysis) is satisfiable. Postcondition: none. +*) + +Definition exec_instr_prop + (c: code) (sp: val) + (pc: node) (rs: regset) (m: mem) + (pc': node) (rs': regset) (m': mem) : Prop := + forall f + (CF: c = f.(RTL.fn_code)) + (SAT: numbering_satisfiable ge sp rs m (analyze f)!!pc), + exec_instr tge (transf_code (analyze f) c) sp pc rs m pc' rs' m'. + +Definition exec_instrs_prop + (c: code) (sp: val) + (pc: node) (rs: regset) (m: mem) + (pc': node) (rs': regset) (m': mem) : Prop := + forall f + (CF: c = f.(RTL.fn_code)) + (SAT: numbering_satisfiable ge sp rs m (analyze f)!!pc), + exec_instrs tge (transf_code (analyze f) c) sp pc rs m pc' rs' m'. + +Definition exec_function_prop + (f: RTL.function) (args: list val) (m: mem) + (res: val) (m': mem) : Prop := + exec_function tge (transf_function f) args m res m'. + +Ltac TransfInstr := + match goal with + | H1: (PTree.get ?pc ?c = Some ?instr), f: function |- _ => + cut ((transf_code (analyze f) c)!pc = Some(transf_instr (analyze f)!!pc instr)); + [ simpl + | unfold transf_code; rewrite PTree.gmap; + unfold option_map; rewrite H1; reflexivity ] + end. + +(** The proof of simulation is by structural induction on the evaluation + derivation for the source program. *) + +Lemma transf_function_correct: + forall f args m res m', + exec_function ge f args m res m' -> + exec_function_prop f args m res m'. +Proof. + apply (exec_function_ind_3 ge + exec_instr_prop exec_instrs_prop exec_function_prop); + intros; red; intros; try TransfInstr. + (* Inop *) + intro; apply exec_Inop; auto. + (* Iop *) + assert (eval_operation tge sp op rs##args = Some v). + rewrite <- H0. apply eval_operation_preserved. exact symbols_preserved. + case (is_trivial_op op). + intro. eapply exec_Iop'; eauto. + caseEq (find_op (analyze f)!!pc op args). intros r FIND CODE. + eapply exec_Iop'; eauto. simpl. + assert (eval_operation ge sp op rs##args = Some rs#r). + eapply find_op_correct; eauto. + eapply wf_analyze; eauto. + congruence. + intros. eapply exec_Iop'; eauto. + (* Iload *) + assert (eval_addressing tge sp addr rs##args = Some a). + rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. + caseEq (find_load (analyze f)!!pc chunk addr args). intros r FIND CODE. + eapply exec_Iop'; eauto. simpl. + assert (exists a, eval_addressing ge sp addr rs##args = Some a + /\ loadv chunk m a = Some rs#r). + eapply find_load_correct; eauto. + eapply wf_analyze; eauto. + elim H3; intros a' [A B]. + congruence. + intros. eapply exec_Iload'; eauto. + (* Istore *) + assert (eval_addressing tge sp addr rs##args = Some a). + rewrite <- H0. apply eval_addressing_preserved. exact symbols_preserved. + intro; eapply exec_Istore; eauto. + (* Icall *) + assert (find_function tge ros rs = Some (transf_function f)). + destruct ros; simpl in H0; simpl. + apply functions_translated; auto. + rewrite symbols_preserved. destruct (Genv.find_symbol ge i). + apply funct_ptr_translated; auto. discriminate. + intro; eapply exec_Icall with (f := transf_function f); eauto. + (* Icond true *) + intro; eapply exec_Icond_true; eauto. + (* Icond false *) + intro; eapply exec_Icond_false; eauto. + (* refl *) + apply exec_refl. + (* one *) + apply exec_one; auto. + (* trans *) + eapply exec_trans; eauto. apply H2; auto. + eapply analysis_correct_N; eauto. + (* function *) + intro. unfold transf_function; eapply exec_funct; simpl; eauto. + eapply H1; eauto. eapply analysis_correct_entry; eauto. +Qed. + +Theorem transf_program_correct: + forall (r: val), + exec_program prog r -> exec_program tprog r. +Proof. + intros r [fptr [f [m [FINDS [FINDF [SIG EXEC]]]]]]. + red. exists fptr; exists (transf_function f); exists m. + split. change (prog_main tprog) with (prog_main prog). + rewrite symbols_preserved. assumption. + split. apply funct_ptr_translated; auto. + split. unfold transf_function. + rewrite <- SIG. destruct (analyze f); reflexivity. + apply transf_function_correct. + unfold tprog, transf_program. rewrite Genv.init_mem_transf. + exact EXEC. +Qed. + +End PRESERVATION. diff --git a/backend/Cmconstr.v b/backend/Cmconstr.v new file mode 100644 index 00000000..cd50e38e --- /dev/null +++ b/backend/Cmconstr.v @@ -0,0 +1,911 @@ +(** Smart constructors for Cminor. This library provides functions + for building Cminor expressions and statements, especially expressions + consisting of operator applications. These functions examine their + arguments to choose cheaper forms of operators whenever possible. + + For instance, [add e1 e2] will return a Cminor expression semantically + equivalent to [Eop Oadd (e1 ::: e2 ::: Enil)], but will use a + [Oaddimm] operator if one of the arguments is an integer constant, + or suppress the addition altogether if one of the arguments is the + null integer. In passing, we perform operator reassociation + ([(e + c1) * c2] becomes [(e * c2) + (c1 * c2)]) and a small amount + of constant propagation. + + In more general terms, the purpose of the smart constructors is twofold: +- Perform instruction selection (for operators, loads, stores and + conditional expressions); +- Abstract over processor dependencies in operators and addressing modes, + providing Cminor providers with processor-independent ways of constructing + Cminor terms. +*) + +Require Import Coqlib. +Require Import Compare_dec. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Op. +Require Import Globalenvs. +Require Import Cminor. + +Infix ":::" := Econs (at level 60, right associativity) : cminor_scope. + +Open Scope cminor_scope. + +(** * Lifting of let-bound variables *) + +(** Some of the smart constructors, as well as the Cminor producers, + generate [Elet] constructs to share the evaluation of a subexpression. + Owing to the use of de Bruijn indices for let-bound variables, + we need to shift de Bruijn indices when an expression [b] is put + in a [Elet a b] context. *) + +Fixpoint lift_expr (p: nat) (a: expr) {struct a}: expr := + match a with + | Evar id => Evar id + | Eassign id b => Eassign id (lift_expr p b) + | Eop op bl => Eop op (lift_exprlist p bl) + | Eload chunk addr bl => Eload chunk addr (lift_exprlist p bl) + | Estore chunk addr bl c => + Estore chunk addr (lift_exprlist p bl) (lift_expr p c) + | Ecall sig b cl => Ecall sig (lift_expr p b) (lift_exprlist p cl) + | Econdition b c d => + Econdition (lift_condexpr p b) (lift_expr p c) (lift_expr p d) + | Elet b c => Elet (lift_expr p b) (lift_expr (S p) c) + | Eletvar n => + if le_gt_dec p n then Eletvar (S n) else Eletvar n + end + +with lift_condexpr (p: nat) (a: condexpr) {struct a}: condexpr := + match a with + | CEtrue => CEtrue + | CEfalse => CEfalse + | CEcond cond bl => CEcond cond (lift_exprlist p bl) + | CEcondition b c d => + CEcondition (lift_condexpr p b) (lift_condexpr p c) (lift_condexpr p d) + end + +with lift_exprlist (p: nat) (a: exprlist) {struct a}: exprlist := + match a with + | Enil => Enil + | Econs b cl => Econs (lift_expr p b) (lift_exprlist p cl) + end. + +Definition lift (a: expr): expr := lift_expr O a. + +(** * Smart constructors for operators *) + +Definition negint (e: expr) := Eop (Osubimm Int.zero) (e ::: Enil). +Definition negfloat (e: expr) := Eop Onegf (e ::: Enil). +Definition absfloat (e: expr) := Eop Oabsf (e ::: Enil). +Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil). +Definition floatofint (e: expr) := Eop Ofloatofint (e ::: Enil). +Definition floatofintu (e: expr) := Eop Ofloatofintu (e ::: Enil). + +(** ** Integer logical negation *) + +(** The natural way to write smart constructors is by pattern-matching + on their arguments, recognizing cases where cheaper operators + or combined operators are applicable. For instance, integer logical + negation has three special cases (not-and, not-or and not-xor), + along with a default case that uses not-or over its arguments and itself. + This is written naively as follows: +<< +Definition notint (e: expr) := + match e with + | Eop Oand (t1:::t2:::Enil) => Eop Onand (t1:::t2:::Enil) + | Eop Oor (t1:::t2:::Enil) => Eop Onor (t1:::t2:::Enil) + | Eop Oxor (t1:::t2:::Enil) => Eop Onxor (t1:::t2:::Enil) + | _ => Elet(e, Eop Onor (Eletvar O ::: Eletvar O ::: Enil) + end. +>> + However, Coq expands complex pattern-matchings like the above into + elementary matchings over all constructors of an inductive type, + resulting in much duplication of the final catch-all case. + Such duplications generate huge executable code and duplicate + cases in the correctness proofs. + + To limit this duplication, we use the following trick due to + Yves Bertot. We first define a dependent inductive type that + characterizes the expressions that match each of the 4 cases of interest. +*) + +Inductive notint_cases: forall (e: expr), Set := + | notint_case1: + forall (t1: expr) (t2: expr), + notint_cases (Eop Oand (t1:::t2:::Enil)) + | notint_case2: + forall (t1: expr) (t2: expr), + notint_cases (Eop Oor (t1:::t2:::Enil)) + | notint_case3: + forall (t1: expr) (t2: expr), + notint_cases (Eop Oxor (t1:::t2:::Enil)) + | notint_default: + forall (e: expr), + notint_cases e. + +(** We then define a classification function that takes an expression + and return in which case it falls. Note that the catch-all case + [notint_default] does not state that it is mutually exclusive with + the first three, more specific cases. The classification function + nonetheless chooses the specific cases in preference to the catch-all + case. *) + +Definition notint_match (e: expr) := + match e as z1 return notint_cases z1 with + | Eop Oand (t1:::t2:::Enil) => + notint_case1 t1 t2 + | Eop Oor (t1:::t2:::Enil) => + notint_case2 t1 t2 + | Eop Oxor (t1:::t2:::Enil) => + notint_case3 t1 t2 + | e => + notint_default e + end. + +(** Finally, the [notint] function we need is defined by a 4-case match + over the result of the classification function. Thus, no duplication + of the right-hand sides of this match occur, and the proof has only + 4 cases to consider (it proceeds by case over [notint_match e]). + Since the default case is not obviously exclusive with the three + specific cases, it is important that its right-hand side is + semantically correct for all possible values of [e], which is the + case here and for all other smart constructors. *) + +Definition notint (e: expr) := + match notint_match e with + | notint_case1 t1 t2 => + Eop Onand (t1:::t2:::Enil) + | notint_case2 t1 t2 => + Eop Onor (t1:::t2:::Enil) + | notint_case3 t1 t2 => + Eop Onxor (t1:::t2:::Enil) + | notint_default e => + Elet e (Eop Onor (Eletvar O ::: Eletvar O ::: Enil)) + end. + +(** This programming pattern will be applied systematically for the + other smart constructors in this file. *) + +(** ** Boolean negation *) + +Definition notbool_base (e: expr) := + Eop (Ocmp (Ccompuimm Ceq Int.zero)) (e ::: Enil). + +Fixpoint notbool (e: expr) {struct e} : expr := + match e with + | Eop (Ointconst n) Enil => + Eop (Ointconst (if Int.eq n Int.zero then Int.one else Int.zero)) Enil + | Eop (Ocmp cond) args => + Eop (Ocmp (negate_condition cond)) args + | Econdition e1 e2 e3 => + Econdition e1 (notbool e2) (notbool e3) + | _ => + notbool_base e + end. + +(** ** Truncations and sign extensions *) + +Definition cast8signed (e: expr) := Eop Ocast8signed (e ::: Enil). + +Definition cast8unsigned (e: expr) := + Eop (Orolm Int.zero (Int.repr 255)) (e ::: Enil). +Definition cast16signed (e: expr) := + Eop Ocast16signed (e ::: Enil). +Definition cast16unsigned (e: expr) := + Eop (Orolm Int.zero (Int.repr 65535)) (e ::: Enil). +Definition singleoffloat (e: expr) := + Eop Osingleoffloat (e ::: Enil). + +(** ** Integer addition and pointer addition *) + +(* +Definition addimm (n: int) (e: expr) := + if Int.eq n Int.zero then e else + match e with + | Eop (Ointconst m) Enil => Eop (Ointconst(Int.add n m)) Enil + | Eop (Oaddrsymbol s m) Enil => Eop (Oaddrsymbol s (Int.add n m)) Enil + | Eop (Oaddrstack m) Enil => Eop (Oaddrstack (Int.add n m)) Enil + | Eop (Oaddimm m) (t ::: Enil) => Eop (Oaddimm(Int.add n m)) (t ::: Enil) + | _ => Eop (Oaddimm n) (e ::: Enil) + end. +*) + +(** Addition of an integer constant. *) + +Inductive addimm_cases: forall (e: expr), Set := + | addimm_case1: + forall (m: int), + addimm_cases (Eop (Ointconst m) Enil) + | addimm_case2: + forall (s: ident) (m: int), + addimm_cases (Eop (Oaddrsymbol s m) Enil) + | addimm_case3: + forall (m: int), + addimm_cases (Eop (Oaddrstack m) Enil) + | addimm_case4: + forall (m: int) (t: expr), + addimm_cases (Eop (Oaddimm m) (t ::: Enil)) + | addimm_default: + forall (e: expr), + addimm_cases e. + +Definition addimm_match (e: expr) := + match e as z1 return addimm_cases z1 with + | Eop (Ointconst m) Enil => + addimm_case1 m + | Eop (Oaddrsymbol s m) Enil => + addimm_case2 s m + | Eop (Oaddrstack m) Enil => + addimm_case3 m + | Eop (Oaddimm m) (t ::: Enil) => + addimm_case4 m t + | e => + addimm_default e + end. + +Definition addimm (n: int) (e: expr) := + if Int.eq n Int.zero then e else + match addimm_match e with + | addimm_case1 m => + Eop (Ointconst(Int.add n m)) Enil + | addimm_case2 s m => + Eop (Oaddrsymbol s (Int.add n m)) Enil + | addimm_case3 m => + Eop (Oaddrstack (Int.add n m)) Enil + | addimm_case4 m t => + Eop (Oaddimm(Int.add n m)) (t ::: Enil) + | addimm_default e => + Eop (Oaddimm n) (e ::: Enil) + end. + +(** Addition of two integer or pointer expressions. *) + +(* +Definition add (e1: expr) (e2: expr) := + match e1, e2 with + | Eop (Ointconst n1) Enil, t2 => addimm n1 t2 + | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) => addimm (Int.add n1 n2) (Eop Oadd (t1:::t2:::Enil)) + | Eop(Oaddimm n1) (t1:::Enil)), t2 => addimm n1 (Eop Oadd (t1:::t2:::Enil)) + | t1, Eop (Ointconst n2) Enil => addimm n2 t1 + | t1, Eop (Oaddimm n2) (t2:::Enil) => addimm n2 (Eop Oadd (t1:::t2:::Enil)) + | _, _ => Eop Oadd (e1:::e2:::Enil) + end. +*) + +Inductive add_cases: forall (e1: expr) (e2: expr), Set := + | add_case1: + forall (n1: int) (t2: expr), + add_cases (Eop (Ointconst n1) Enil) (t2) + | add_case2: + forall (n1: int) (t1: expr) (n2: int) (t2: expr), + add_cases (Eop (Oaddimm n1) (t1:::Enil)) (Eop (Oaddimm n2) (t2:::Enil)) + | add_case3: + forall (n1: int) (t1: expr) (t2: expr), + add_cases (Eop(Oaddimm n1) (t1:::Enil)) (t2) + | add_case4: + forall (t1: expr) (n2: int), + add_cases (t1) (Eop (Ointconst n2) Enil) + | add_case5: + forall (t1: expr) (n2: int) (t2: expr), + add_cases (t1) (Eop (Oaddimm n2) (t2:::Enil)) + | add_default: + forall (e1: expr) (e2: expr), + add_cases e1 e2. + +Definition add_match_aux (e1: expr) (e2: expr) := + match e2 as z2 return add_cases e1 z2 with + | Eop (Ointconst n2) Enil => + add_case4 e1 n2 + | Eop (Oaddimm n2) (t2:::Enil) => + add_case5 e1 n2 t2 + | e2 => + add_default e1 e2 + end. + +Definition add_match (e1: expr) (e2: expr) := + match e1 as z1, e2 as z2 return add_cases z1 z2 with + | Eop (Ointconst n1) Enil, t2 => + add_case1 n1 t2 + | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) => + add_case2 n1 t1 n2 t2 + | Eop(Oaddimm n1) (t1:::Enil), t2 => + add_case3 n1 t1 t2 + | e1, e2 => + add_match_aux e1 e2 + end. + +Definition add (e1: expr) (e2: expr) := + match add_match e1 e2 with + | add_case1 n1 t2 => + addimm n1 t2 + | add_case2 n1 t1 n2 t2 => + addimm (Int.add n1 n2) (Eop Oadd (t1:::t2:::Enil)) + | add_case3 n1 t1 t2 => + addimm n1 (Eop Oadd (t1:::t2:::Enil)) + | add_case4 t1 n2 => + addimm n2 t1 + | add_case5 t1 n2 t2 => + addimm n2 (Eop Oadd (t1:::t2:::Enil)) + | add_default e1 e2 => + Eop Oadd (e1:::e2:::Enil) + end. + +(** ** Integer and pointer subtraction *) + +(* +Definition sub (e1: expr) (e2: expr) := + match e1, e2 with + | t1, Eop (Ointconst n2) Enil => addimm (Int.neg n2) t1 + | Eop (Oaddimm n1) (t1:::Enil), Eop (Oaddimm n2) (t2:::Enil) => addimm +(intsub n1 n2) (Eop Osub (t1:::t2:::Enil)) + | Eop (Oaddimm n1) (t1:::Enil), t2 => addimm n1 (Eop Osub (t1:::t2:::Rni +l)) + | t1, Eop (Oaddimm n2) (t2:::Enil) => addimm (Int.neg n2) (Eop Osub (t1::: +:t2:::Enil)) + | _, _ => Eop Osub (e1:::e2:::Enil) + end. +*) + +Inductive sub_cases: forall (e1: expr) (e2: expr), Set := + | sub_case1: + forall (t1: expr) (n2: int), + sub_cases (t1) (Eop (Ointconst n2) Enil) + | sub_case2: + forall (n1: int) (t1: expr) (n2: int) (t2: expr), + sub_cases (Eop (Oaddimm n1) (t1:::Enil)) (Eop (Oaddimm n2) (t2:::Enil)) + | sub_case3: + forall (n1: int) (t1: expr) (t2: expr), + sub_cases (Eop (Oaddimm n1) (t1:::Enil)) (t2) + | sub_case4: + forall (t1: expr) (n2: int) (t2: expr), + sub_cases (t1) (Eop (Oaddimm n2) (t2:::Enil)) + | sub_default: + forall (e1: expr) (e2: expr), + sub_cases e1 e2. + +Definition sub_match_aux (e1: expr) (e2: expr) := + match e1 as z1 return sub_cases z1 e2 with + | Eop (Oaddimm n1) (t1:::Enil) => + sub_case3 n1 t1 e2 + | e1 => + sub_default e1 e2 + end. + +Definition sub_match (e1: expr) (e2: expr) := + match e2 as z2, e1 as z1 return sub_cases z1 z2 with + | Eop (Ointconst n2) Enil, t1 => + sub_case1 t1 n2 + | Eop (Oaddimm n2) (t2:::Enil), Eop (Oaddimm n1) (t1:::Enil) => + sub_case2 n1 t1 n2 t2 + | Eop (Oaddimm n2) (t2:::Enil), t1 => + sub_case4 t1 n2 t2 + | e2, e1 => + sub_match_aux e1 e2 + end. + +Definition sub (e1: expr) (e2: expr) := + match sub_match e1 e2 with + | sub_case1 t1 n2 => + addimm (Int.neg n2) t1 + | sub_case2 n1 t1 n2 t2 => + addimm (Int.sub n1 n2) (Eop Osub (t1:::t2:::Enil)) + | sub_case3 n1 t1 t2 => + addimm n1 (Eop Osub (t1:::t2:::Enil)) + | sub_case4 t1 n2 t2 => + addimm (Int.neg n2) (Eop Osub (t1:::t2:::Enil)) + | sub_default e1 e2 => + Eop Osub (e1:::e2:::Enil) + end. + +(** ** Rotates and immediate shifts *) + +(* +Definition rolm (e1: expr) := + match e1 with + | Eop (Ointconst n1) Enil => + Eop (Ointconst(Int.and (Int.rol n1 amount2) mask2)) Enil + | Eop (Orolm amount1 mask1) (t1:::Enil) => + let amount := Int.and (Int.add amount1 amount2) Ox1Fl in + let mask := Int.and (Int.rol mask1 amount2) mask2 in + if Int.is_rlw_mask mask + then Eop (Orolm amount mask) (t1:::Enil) + else Eop (Orolm amount2 mask2) (e1:::Enil) + | _ => Eop (Orolm amount2 mask2) (e1:::Enil) + end +*) + +Inductive rolm_cases: forall (e1: expr), Set := + | rolm_case1: + forall (n1: int), + rolm_cases (Eop (Ointconst n1) Enil) + | rolm_case2: + forall (amount1: int) (mask1: int) (t1: expr), + rolm_cases (Eop (Orolm amount1 mask1) (t1:::Enil)) + | rolm_default: + forall (e1: expr), + rolm_cases e1. + +Definition rolm_match (e1: expr) := + match e1 as z1 return rolm_cases z1 with + | Eop (Ointconst n1) Enil => + rolm_case1 n1 + | Eop (Orolm amount1 mask1) (t1:::Enil) => + rolm_case2 amount1 mask1 t1 + | e1 => + rolm_default e1 + end. + +Definition rolm (e1: expr) (amount2 mask2: int) := + match rolm_match e1 with + | rolm_case1 n1 => + Eop (Ointconst(Int.and (Int.rol n1 amount2) mask2)) Enil + | rolm_case2 amount1 mask1 t1 => + let amount := Int.and (Int.add amount1 amount2) (Int.repr 31) in + let mask := Int.and (Int.rol mask1 amount2) mask2 in + if Int.is_rlw_mask mask + then Eop (Orolm amount mask) (t1:::Enil) + else Eop (Orolm amount2 mask2) (e1:::Enil) + | rolm_default e1 => + Eop (Orolm amount2 mask2) (e1:::Enil) + end. + +Definition shlimm (e1: expr) (n2: int) := + if Int.eq n2 Int.zero then + e1 + else if Int.ltu n2 (Int.repr 32) then + rolm e1 n2 (Int.shl Int.mone n2) + else + Eop Oshl (e1:::Eop (Ointconst n2) Enil:::Enil). + +Definition shruimm (e1: expr) (n2: int) := + if Int.eq n2 Int.zero then + e1 + else if Int.ltu n2 (Int.repr 32) then + rolm e1 (Int.sub (Int.repr 32) n2) (Int.shru Int.mone n2) + else + Eop Oshru (e1:::Eop (Ointconst n2) Enil:::Enil). + +(** ** Integer multiply *) + +Definition mulimm_base (n1: int) (e2: expr) := + match Int.one_bits n1 with + | i :: nil => + shlimm e2 i + | i :: j :: nil => + Elet e2 + (Eop Oadd (shlimm (Eletvar 0) i ::: + shlimm (Eletvar 0) j ::: Enil)) + | _ => + Eop (Omulimm n1) (e2:::Enil) + end. + +(* +Definition mulimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then + Elet e2 (Eop (Ointconst Int.zero) Enil) + else if Int.eq n1 Int.one then + e2 + else match e2 with + | Eop (Ointconst n2) Enil => Eop (Ointconst(intmul n1 n2)) Enil + | Eop (Oaddimm n2) (t2:::Enil) => addimm (intmul n1 n2) (mulimm_base n1 t2) + | _ => mulimm_base n1 e2 + end. +*) + +Inductive mulimm_cases: forall (e2: expr), Set := + | mulimm_case1: + forall (n2: int), + mulimm_cases (Eop (Ointconst n2) Enil) + | mulimm_case2: + forall (n2: int) (t2: expr), + mulimm_cases (Eop (Oaddimm n2) (t2:::Enil)) + | mulimm_default: + forall (e2: expr), + mulimm_cases e2. + +Definition mulimm_match (e2: expr) := + match e2 as z1 return mulimm_cases z1 with + | Eop (Ointconst n2) Enil => + mulimm_case1 n2 + | Eop (Oaddimm n2) (t2:::Enil) => + mulimm_case2 n2 t2 + | e2 => + mulimm_default e2 + end. + +Definition mulimm (n1: int) (e2: expr) := + if Int.eq n1 Int.zero then + Elet e2 (Eop (Ointconst Int.zero) Enil) + else if Int.eq n1 Int.one then + e2 + else match mulimm_match e2 with + | mulimm_case1 n2 => + Eop (Ointconst(Int.mul n1 n2)) Enil + | mulimm_case2 n2 t2 => + addimm (Int.mul n1 n2) (mulimm_base n1 t2) + | mulimm_default e2 => + mulimm_base n1 e2 + end. + +(* +Definition mul (e1: expr) (e2: expr) := + match e1, e2 with + | Eop (Ointconst n1) Enil, t2 => mulimm n1 t2 + | t1, Eop (Ointconst n2) Enil => mulimm n2 t1 + | _, _ => Eop Omul (e1:::e2:::Enil) + end. +*) + +Inductive mul_cases: forall (e1: expr) (e2: expr), Set := + | mul_case1: + forall (n1: int) (t2: expr), + mul_cases (Eop (Ointconst n1) Enil) (t2) + | mul_case2: + forall (t1: expr) (n2: int), + mul_cases (t1) (Eop (Ointconst n2) Enil) + | mul_default: + forall (e1: expr) (e2: expr), + mul_cases e1 e2. + +Definition mul_match_aux (e1: expr) (e2: expr) := + match e2 as z2 return mul_cases e1 z2 with + | Eop (Ointconst n2) Enil => + mul_case2 e1 n2 + | e2 => + mul_default e1 e2 + end. + +Definition mul_match (e1: expr) (e2: expr) := + match e1 as z1 return mul_cases z1 e2 with + | Eop (Ointconst n1) Enil => + mul_case1 n1 e2 + | e1 => + mul_match_aux e1 e2 + end. + +Definition mul (e1: expr) (e2: expr) := + match mul_match e1 e2 with + | mul_case1 n1 t2 => + mulimm n1 t2 + | mul_case2 t1 n2 => + mulimm n2 t1 + | mul_default e1 e2 => + Eop Omul (e1:::e2:::Enil) + end. + +(** ** Integer division and modulus *) + +Definition divs (e1: expr) (e2: expr) := Eop Odiv (e1:::e2:::Enil). + +Definition mod_aux (divop: operation) (e1 e2: expr) := + Elet e1 + (Elet (lift e2) + (Eop Osub (Eletvar 1 ::: + Eop Omul (Eop divop (Eletvar 1 ::: Eletvar 0 ::: Enil) ::: + Eletvar 0 ::: + Enil) ::: + Enil))). + +Definition mods := mod_aux Odiv. + +Inductive divu_cases: forall (e2: expr), Set := + | divu_case1: + forall (n2: int), + divu_cases (Eop (Ointconst n2) Enil) + | divu_default: + forall (e2: expr), + divu_cases e2. + +Definition divu_match (e2: expr) := + match e2 as z1 return divu_cases z1 with + | Eop (Ointconst n2) Enil => + divu_case1 n2 + | e2 => + divu_default e2 + end. + +Definition divu (e1: expr) (e2: expr) := + match divu_match e2 with + | divu_case1 n2 => + match Int.is_power2 n2 with + | Some l2 => shruimm e1 l2 + | None => Eop Odivu (e1:::e2:::Enil) + end + | divu_default e2 => + Eop Odivu (e1:::e2:::Enil) + end. + +Definition modu (e1: expr) (e2: expr) := + match divu_match e2 with + | divu_case1 n2 => + match Int.is_power2 n2 with + | Some l2 => rolm e1 Int.zero (Int.sub n2 Int.one) + | None => mod_aux Odivu e1 e2 + end + | divu_default e2 => + mod_aux Odivu e1 e2 + end. + +(** ** Bitwise and, or, xor *) + +Definition andimm (n1: int) (e2: expr) := + if Int.is_rlw_mask n1 + then rolm e2 Int.zero n1 + else Eop (Oandimm n1) (e2:::Enil). + +Definition and (e1: expr) (e2: expr) := + match mul_match e1 e2 with + | mul_case1 n1 t2 => + andimm n1 t2 + | mul_case2 t1 n2 => + andimm n2 t1 + | mul_default e1 e2 => + Eop Oand (e1:::e2:::Enil) + end. + +Definition same_expr_pure (e1 e2: expr) := + match e1, e2 with + | Evar v1, Evar v2 => if ident_eq v1 v2 then true else false + | _, _ => false + end. + +Inductive or_cases: forall (e1: expr) (e2: expr), Set := + | or_case1: + forall (amount1: int) (mask1: int) (t1: expr) + (amount2: int) (mask2: int) (t2: expr), + or_cases (Eop (Orolm amount1 mask1) (t1:::Enil)) + (Eop (Orolm amount2 mask2) (t2:::Enil)) + | or_default: + forall (e1: expr) (e2: expr), + or_cases e1 e2. + +Definition or_match (e1: expr) (e2: expr) := + match e1 as z1, e2 as z2 return or_cases z1 z2 with + | Eop (Orolm amount1 mask1) (t1:::Enil), + Eop (Orolm amount2 mask2) (t2:::Enil) => + or_case1 amount1 mask1 t1 amount2 mask2 t2 + | e1, e2 => + or_default e1 e2 + end. + +Definition or (e1: expr) (e2: expr) := + match or_match e1 e2 with + | or_case1 amount1 mask1 t1 amount2 mask2 t2 => + if Int.eq amount1 amount2 + && Int.is_rlw_mask (Int.or mask1 mask2) + && same_expr_pure t1 t2 + then Eop (Orolm amount1 (Int.or mask1 mask2)) (t1:::Enil) + else Eop Oor (e1:::e2:::Enil) + | or_default e1 e2 => + Eop Oor (e1:::e2:::Enil) + end. + +Definition xor (e1 e2: expr) := Eop Oxor (e1:::e2:::Enil). + +(** ** General shifts *) + +Inductive shift_cases: forall (e1: expr), Set := + | shift_case1: + forall (n2: int), + shift_cases (Eop (Ointconst n2) Enil) + | shift_default: + forall (e1: expr), + shift_cases e1. + +Definition shift_match (e1: expr) := + match e1 as z1 return shift_cases z1 with + | Eop (Ointconst n2) Enil => + shift_case1 n2 + | e1 => + shift_default e1 + end. + +Definition shl (e1: expr) (e2: expr) := + match shift_match e2 with + | shift_case1 n2 => + shlimm e1 n2 + | shift_default e2 => + Eop Oshl (e1:::e2:::Enil) + end. + +Definition shr (e1 e2: expr) := + Eop Oshr (e1:::e2:::Enil). + +Definition shru (e1: expr) (e2: expr) := + match shift_match e2 with + | shift_case1 n2 => + shruimm e1 n2 + | shift_default e2 => + Eop Oshru (e1:::e2:::Enil) + end. + +(** ** Floating-point arithmetic *) + +(* +Definition addf (e1: expr) (e2: expr) := + match e1, e2 with + | Eop Omulf (t1:::t2:::Enil), t3 => Eop Omuladdf (t1:::t2:::t3:::Enil) + | t1, Eop Omulf (t2:::t3:::Enil) => Elet t1 (Eop Omuladdf (t2:::t3:::Rvar 0:::Enil)) + | _, _ => Eop Oaddf (e1:::e2:::Enil) + end. +*) + +Inductive addf_cases: forall (e1: expr) (e2: expr), Set := + | addf_case1: + forall (t1: expr) (t2: expr) (t3: expr), + addf_cases (Eop Omulf (t1:::t2:::Enil)) (t3) + | addf_case2: + forall (t1: expr) (t2: expr) (t3: expr), + addf_cases (t1) (Eop Omulf (t2:::t3:::Enil)) + | addf_default: + forall (e1: expr) (e2: expr), + addf_cases e1 e2. + +Definition addf_match_aux (e1: expr) (e2: expr) := + match e2 as z2 return addf_cases e1 z2 with + | Eop Omulf (t2:::t3:::Enil) => + addf_case2 e1 t2 t3 + | e2 => + addf_default e1 e2 + end. + +Definition addf_match (e1: expr) (e2: expr) := + match e1 as z1 return addf_cases z1 e2 with + | Eop Omulf (t1:::t2:::Enil) => + addf_case1 t1 t2 e2 + | e1 => + addf_match_aux e1 e2 + end. + +Definition addf (e1: expr) (e2: expr) := + match addf_match e1 e2 with + | addf_case1 t1 t2 t3 => + Eop Omuladdf (t1:::t2:::t3:::Enil) + | addf_case2 t1 t2 t3 => + Elet t1 (Eop Omuladdf (lift t2:::lift t3:::Eletvar 0:::Enil)) + | addf_default e1 e2 => + Eop Oaddf (e1:::e2:::Enil) + end. + +(* +Definition subf (e1: expr) (e2: expr) := + match e1, e2 with + | Eop Omulfloat (t1:::t2:::Enil), t3 => Eop Omulsubf (t1:::t2:::t3:::Enil) + | _, _ => Eop Osubf (e1:::e2:::Enil) + end. +*) + +Inductive subf_cases: forall (e1: expr) (e2: expr), Set := + | subf_case1: + forall (t1: expr) (t2: expr) (t3: expr), + subf_cases (Eop Omulf (t1:::t2:::Enil)) (t3) + | subf_default: + forall (e1: expr) (e2: expr), + subf_cases e1 e2. + +Definition subf_match (e1: expr) (e2: expr) := + match e1 as z1 return subf_cases z1 e2 with + | Eop Omulf (t1:::t2:::Enil) => + subf_case1 t1 t2 e2 + | e1 => + subf_default e1 e2 + end. + +Definition subf (e1: expr) (e2: expr) := + match subf_match e1 e2 with + | subf_case1 t1 t2 t3 => + Eop Omulsubf (t1:::t2:::t3:::Enil) + | subf_default e1 e2 => + Eop Osubf (e1:::e2:::Enil) + end. + +Definition mulf (e1 e2: expr) := Eop Omulf (e1:::e2:::Enil). +Definition divf (e1 e2: expr) := Eop Odivf (e1:::e2:::Enil). + +(** ** Comparisons and conditional expressions *) + +Definition cmp (c: comparison) (e1 e2: expr) := + Eop (Ocmp (Ccomp c)) (e1:::e2:::Enil). +Definition cmpu (c: comparison) (e1 e2: expr) := + Eop (Ocmp (Ccompu c)) (e1:::e2:::Enil). +Definition cmpf (c: comparison) (e1 e2: expr) := + Eop (Ocmp (Ccompf c)) (e1:::e2:::Enil). + +Fixpoint condexpr_of_expr (e: expr) : condexpr := + match e with + | Eop (Ointconst n) Enil => + if Int.eq n Int.zero then CEfalse else CEtrue + | Eop (Ocmp c) el => CEcond c el + | Econdition e1 e2 e3 => + CEcondition e1 (condexpr_of_expr e2) (condexpr_of_expr e3) + | e => CEcond (Ccompuimm Cne Int.zero) (e:::Enil) + end. + +Definition conditionalexpr (e1 e2 e3: expr) : expr := + Econdition (condexpr_of_expr e1) e2 e3. + +(** ** Recognition of addressing modes for load and store operations *) + +(* +Definition addressing (e: expr) := + match e with + | Eop (Oaddrsymbol s n) Enil => (Aglobal s n, Enil) + | Eop (Oaddrstack n) Enil => (Ainstack n, Enil) + | Eop Oadd (Eop (Oaddrsymbol s n) Enil) e2 => (Abased(s, n), e2:::Enil) + | Eop (Oaddimm n) (e1:::Enil) => (Aindexed n, e1:::Enil) + | Eop Oadd (e1:::e2:::Enil) => (Aindexed2, e1:::e2:::Enil) + | _ => (Aindexed Int.zero, e:::Enil) + end. +*) + +Inductive addressing_cases: forall (e: expr), Set := + | addressing_case1: + forall (s: ident) (n: int), + addressing_cases (Eop (Oaddrsymbol s n) Enil) + | addressing_case2: + forall (n: int), + addressing_cases (Eop (Oaddrstack n) Enil) + | addressing_case3: + forall (s: ident) (n: int) (e2: expr), + addressing_cases + (Eop Oadd (Eop (Oaddrsymbol s n) Enil:::e2:::Enil)) + | addressing_case4: + forall (n: int) (e1: expr), + addressing_cases (Eop (Oaddimm n) (e1:::Enil)) + | addressing_case5: + forall (e1: expr) (e2: expr), + addressing_cases (Eop Oadd (e1:::e2:::Enil)) + | addressing_default: + forall (e: expr), + addressing_cases e. + +Definition addressing_match (e: expr) := + match e as z1 return addressing_cases z1 with + | Eop (Oaddrsymbol s n) Enil => + addressing_case1 s n + | Eop (Oaddrstack n) Enil => + addressing_case2 n + | Eop Oadd (Eop (Oaddrsymbol s n) Enil:::e2:::Enil) => + addressing_case3 s n e2 + | Eop (Oaddimm n) (e1:::Enil) => + addressing_case4 n e1 + | Eop Oadd (e1:::e2:::Enil) => + addressing_case5 e1 e2 + | e => + addressing_default e + end. + +Definition addressing (e: expr) := + match addressing_match e with + | addressing_case1 s n => + (Aglobal s n, Enil) + | addressing_case2 n => + (Ainstack n, Enil) + | addressing_case3 s n e2 => + (Abased s n, e2:::Enil) + | addressing_case4 n e1 => + (Aindexed n, e1:::Enil) + | addressing_case5 e1 e2 => + (Aindexed2, e1:::e2:::Enil) + | addressing_default e => + (Aindexed Int.zero, e:::Enil) + end. + +Definition load (chunk: memory_chunk) (e1: expr) := + match addressing e1 with + | (mode, args) => Eload chunk mode args + end. + +Definition store (chunk: memory_chunk) (e1 e2: expr) := + match addressing e1 with + | (mode, args) => Estore chunk mode args e2 + end. + +(** ** If-then-else statement *) + +Definition ifthenelse (e: expr) (ifso ifnot: stmtlist) : stmt := + Sifthenelse (condexpr_of_expr e) ifso ifnot. diff --git a/backend/Cmconstrproof.v b/backend/Cmconstrproof.v new file mode 100644 index 00000000..19b7473d --- /dev/null +++ b/backend/Cmconstrproof.v @@ -0,0 +1,1154 @@ +(** Correctness of the Cminor smart constructors. This file states + evaluation rules for the smart constructors, for instance that [add + a b] evaluates to [Vint(Int.add i j)] if [a] evaluates to [Vint i] + and [b] to [Vint j]. It then proves that these rules are + admissible, that is, satisfied for all possible choices of [a] and + [b]. The Cminor producer can then use these evaluation rules + (theorems) to reason about the execution of terms produced by the + smart constructors. +*) + +Require Import Coqlib. +Require Import Compare_dec. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Op. +Require Import Globalenvs. +Require Import Cminor. +Require Import Cmconstr. + +Section CMCONSTR. + +Variable ge: Cminor.genv. + +(** * Lifting of let-bound variables *) + +Inductive insert_lenv: letenv -> nat -> val -> letenv -> Prop := + | insert_lenv_0: + forall le v, + insert_lenv le O v (v :: le) + | insert_lenv_S: + forall le p w le' v, + insert_lenv le p w le' -> + insert_lenv (v :: le) (S p) w (v :: le'). + +Lemma insert_lenv_lookup1: + forall le p w le', + insert_lenv le p w le' -> + forall n v, + nth_error le n = Some v -> (p > n)%nat -> + nth_error le' n = Some v. +Proof. + induction 1; intros. + omegaContradiction. + destruct n; simpl; simpl in H0. auto. + apply IHinsert_lenv. auto. omega. +Qed. + +Lemma insert_lenv_lookup2: + forall le p w le', + insert_lenv le p w le' -> + forall n v, + nth_error le n = Some v -> (p <= n)%nat -> + nth_error le' (S n) = Some v. +Proof. + induction 1; intros. + simpl. assumption. + simpl. destruct n. omegaContradiction. + apply IHinsert_lenv. exact H0. omega. +Qed. + +Scheme eval_expr_ind_3 := Minimality for eval_expr Sort Prop + with eval_condexpr_ind_3 := Minimality for eval_condexpr Sort Prop + with eval_exprlist_ind_3 := Minimality for eval_exprlist Sort Prop. + +Hint Resolve eval_Evar eval_Eassign eval_Eop eval_Eload eval_Estore + eval_Ecall eval_Econdition + eval_Elet eval_Eletvar + eval_CEtrue eval_CEfalse eval_CEcond + eval_CEcondition eval_Enil eval_Econs: evalexpr. + +Lemma eval_lift_expr: + forall w sp le e1 m1 a e2 m2 v, + eval_expr ge sp le e1 m1 a e2 m2 v -> + forall p le', insert_lenv le p w le' -> + eval_expr ge sp le' e1 m1 (lift_expr p a) e2 m2 v. +Proof. + intros w. + apply (eval_expr_ind_3 ge + (fun sp le e1 m1 a e2 m2 v => + forall p le', insert_lenv le p w le' -> + eval_expr ge sp le' e1 m1 (lift_expr p a) e2 m2 v) + (fun sp le e1 m1 a e2 m2 vb => + forall p le', insert_lenv le p w le' -> + eval_condexpr ge sp le' e1 m1 (lift_condexpr p a) e2 m2 vb) + (fun sp le e1 m1 al e2 m2 vl => + forall p le', insert_lenv le p w le' -> + eval_exprlist ge sp le' e1 m1 (lift_exprlist p al) e2 m2 vl)); + simpl; intros; eauto with evalexpr. + + destruct v1; eapply eval_Econdition; + eauto with evalexpr; simpl; eauto with evalexpr. + + eapply eval_Elet. eauto. apply H2. apply insert_lenv_S; auto. + + case (le_gt_dec p n); intro. + apply eval_Eletvar. eapply insert_lenv_lookup2; eauto. + apply eval_Eletvar. eapply insert_lenv_lookup1; eauto. + + destruct vb1; eapply eval_CEcondition; + eauto with evalexpr; simpl; eauto with evalexpr. +Qed. + +Lemma eval_lift: + forall sp le e1 m1 a e2 m2 v w, + eval_expr ge sp le e1 m1 a e2 m2 v -> + eval_expr ge sp (w::le) e1 m1 (lift a) e2 m2 v. +Proof. + intros. unfold lift. eapply eval_lift_expr. + eexact H. apply insert_lenv_0. +Qed. +Hint Resolve eval_lift: evalexpr. + +(** * Useful lemmas and tactics *) + +Ltac EvalOp := eapply eval_Eop; eauto with evalexpr. + +Ltac TrivialOp cstr := + unfold cstr; intros; EvalOp. + +(** The following are trivial lemmas and custom tactics that help + perform backward (inversion) and forward reasoning over the evaluation + of operator applications. *) + +Lemma inv_eval_Eop_0: + forall sp le e1 m1 op e2 m2 v, + eval_expr ge sp le e1 m1 (Eop op Enil) e2 m2 v -> + e2 = e1 /\ m2 = m1 /\ eval_operation ge sp op nil = Some v. +Proof. + intros. inversion H. inversion H6. + intuition. congruence. +Qed. + +Lemma inv_eval_Eop_1: + forall sp le e1 m1 op a1 e2 m2 v, + eval_expr ge sp le e1 m1 (Eop op (a1 ::: Enil)) e2 m2 v -> + exists v1, + eval_expr ge sp le e1 m1 a1 e2 m2 v1 /\ + eval_operation ge sp op (v1 :: nil) = Some v. +Proof. + intros. + inversion H. inversion H6. inversion H21. + subst e4; subst m4. subst e6; subst m6. + exists v1; intuition. congruence. +Qed. + +Lemma inv_eval_Eop_2: + forall sp le e1 m1 op a1 a2 e3 m3 v, + eval_expr ge sp le e1 m1 (Eop op (a1 ::: a2 ::: Enil)) e3 m3 v -> + exists e2, exists m2, exists v1, exists v2, + eval_expr ge sp le e1 m1 a1 e2 m2 v1 /\ + eval_expr ge sp le e2 m2 a2 e3 m3 v2 /\ + eval_operation ge sp op (v1 :: v2 :: nil) = Some v. +Proof. + intros. + inversion H. inversion H6. inversion H21. inversion H32. + subst e7; subst m7. subst e9; subst m9. + exists e4; exists m4; exists v1; exists v2. intuition. congruence. +Qed. + +Ltac SimplEval := + match goal with + | [ |- (eval_expr _ ?sp ?le ?e1 ?m1 (Eop ?op Enil) ?e2 ?m2 ?v) -> _] => + intro XX1; + generalize (inv_eval_Eop_0 sp le e1 m1 op e2 m2 v XX1); + clear XX1; + intros [XX1 [XX2 XX3]]; + subst e2; subst m2; simpl in XX3; + try (simplify_eq XX3; clear XX3; + let EQ := fresh "EQ" in (intro EQ; rewrite EQ)) + | [ |- (eval_expr _ ?sp ?le ?e1 ?m1 (Eop ?op (?a1 ::: Enil)) ?e2 ?m2 ?v) -> _] => + intro XX1; + generalize (inv_eval_Eop_1 sp le e1 m1 op a1 e2 m2 v XX1); + clear XX1; + let v1 := fresh "v" in let EV := fresh "EV" in + let EQ := fresh "EQ" in + (intros [v1 [EV EQ]]; simpl in EQ) + | [ |- (eval_expr _ ?sp ?le ?e1 ?m1 (Eop ?op (?a1 ::: ?a2 ::: Enil)) ?e2 ?m2 ?v) -> _] => + intro XX1; + generalize (inv_eval_Eop_2 sp le e1 m1 op a1 a2 e2 m2 v XX1); + clear XX1; + let e := fresh "e" in let m := fresh "m" in + let v1 := fresh "v" in let v2 := fresh "v" in + let EV1 := fresh "EV" in let EV2 := fresh "EV" in + let EQ := fresh "EQ" in + (intros [e [m [v1 [v2 [EV1 [EV2 EQ]]]]]]; simpl in EQ) + | _ => idtac + end. + +Ltac InvEval H := + generalize H; SimplEval; clear H. + +(** ** Admissible evaluation rules for the smart constructors *) + +(** All proofs follow a common pattern: +- Reasoning by case over the result of the classification functions + (such as [add_match] for integer addition), gathering additional + information on the shape of the argument expressions in the non-default + cases. +- Inversion of the evaluations of the arguments, exploiting the additional + information thus gathered. +- Equational reasoning over the arithmetic operations performed, + using the lemmas from the [Int] and [Float] modules. +- Construction of an evaluation derivation for the expression returned + by the smart constructor. +*) + +Theorem eval_negint: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (negint a) e2 m2 (Vint (Int.neg x)). +Proof. + TrivialOp negint. +Qed. + +Theorem eval_negfloat: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vfloat x) -> + eval_expr ge sp le e1 m1 (negfloat a) e2 m2 (Vfloat (Float.neg x)). +Proof. + TrivialOp negfloat. +Qed. + +Theorem eval_absfloat: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vfloat x) -> + eval_expr ge sp le e1 m1 (absfloat a) e2 m2 (Vfloat (Float.abs x)). +Proof. + TrivialOp absfloat. +Qed. + +Theorem eval_intoffloat: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vfloat x) -> + eval_expr ge sp le e1 m1 (intoffloat a) e2 m2 (Vint (Float.intoffloat x)). +Proof. + TrivialOp intoffloat. +Qed. + +Theorem eval_floatofint: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (floatofint a) e2 m2 (Vfloat (Float.floatofint x)). +Proof. + TrivialOp floatofint. +Qed. + +Theorem eval_floatofintu: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (floatofintu a) e2 m2 (Vfloat (Float.floatofintu x)). +Proof. + TrivialOp floatofintu. +Qed. + +Theorem eval_notint: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (notint a) e2 m2 (Vint (Int.not x)). +Proof. + unfold notint; intros until x; case (notint_match a); intros. + InvEval H. FuncInv. EvalOp. simpl. congruence. + InvEval H. FuncInv. EvalOp. simpl. congruence. + InvEval H. FuncInv. EvalOp. simpl. congruence. + eapply eval_Elet. eexact H. + eapply eval_Eop. + eapply eval_Econs. apply eval_Eletvar. simpl. reflexivity. + eapply eval_Econs. apply eval_Eletvar. simpl. reflexivity. + apply eval_Enil. + simpl. rewrite Int.or_idem. auto. +Qed. + +Lemma eval_notbool_base: + forall sp le e1 m1 a e2 m2 v b, + eval_expr ge sp le e1 m1 a e2 m2 v -> + Val.bool_of_val v b -> + eval_expr ge sp le e1 m1 (notbool_base a) e2 m2 (Val.of_bool (negb b)). +Proof. + TrivialOp notbool_base. simpl. + inversion H0. + rewrite Int.eq_false; auto. + rewrite Int.eq_true; auto. + reflexivity. +Qed. + +Hint Resolve Val.bool_of_true_val Val.bool_of_false_val + Val.bool_of_true_val_inv Val.bool_of_false_val_inv: valboolof. + +Theorem eval_notbool: + forall a sp le e1 m1 e2 m2 v b, + eval_expr ge sp le e1 m1 a e2 m2 v -> + Val.bool_of_val v b -> + eval_expr ge sp le e1 m1 (notbool a) e2 m2 (Val.of_bool (negb b)). +Proof. + assert (N1: forall v b, Val.is_false v -> Val.bool_of_val v b -> Val.is_true (Val.of_bool (negb b))). + intros. inversion H0; simpl; auto; subst v; simpl in H. + congruence. apply Int.one_not_zero. contradiction. + assert (N2: forall v b, Val.is_true v -> Val.bool_of_val v b -> Val.is_false (Val.of_bool (negb b))). + intros. inversion H0; simpl; auto; subst v; simpl in H. + congruence. + + induction a; simpl; intros; try (eapply eval_notbool_base; eauto). + destruct o; try (eapply eval_notbool_base; eauto). + + destruct e. inversion H. inversion H7. subst vl. + simpl in H11. inversion H11. subst v0; subst v. + inversion H0. rewrite Int.eq_false; auto. + simpl; eauto with evalexpr. + rewrite Int.eq_true; simpl; eauto with evalexpr. + eapply eval_notbool_base; eauto. + + inversion H. subst sp0 le0 e0 m op al e3 m0 v0. + simpl in H11. eapply eval_Eop; eauto. + simpl. caseEq (eval_condition c vl); intros. + rewrite H1 in H11. + assert (b0 = b). + destruct b0; inversion H11; subst v; inversion H0; auto. + subst b0. rewrite (Op.eval_negate_condition _ _ H1). + destruct b; reflexivity. + rewrite H1 in H11; discriminate. + + inversion H; eauto 10 with evalexpr valboolof. + inversion H; eauto 10 with evalexpr valboolof. + + inversion H. eapply eval_Econdition. eexact H11. + destruct v1; eauto. +Qed. + +Theorem eval_cast8signed: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (cast8signed a) e2 m2 (Vint (Int.cast8signed x)). +Proof. TrivialOp cast8signed. Qed. + +Theorem eval_cast8unsigned: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (cast8unsigned a) e2 m2 (Vint (Int.cast8unsigned x)). +Proof. + TrivialOp cast8unsigned. simpl. + rewrite Int.rolm_zero. rewrite Int.cast8unsigned_and. auto. +Qed. + +Theorem eval_cast16signed: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (cast16signed a) e2 m2 (Vint (Int.cast16signed x)). +Proof. TrivialOp cast16signed. Qed. + +Theorem eval_cast16unsigned: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (cast16unsigned a) e2 m2 (Vint (Int.cast16unsigned x)). +Proof. + TrivialOp cast16unsigned. simpl. + rewrite Int.rolm_zero. rewrite Int.cast16unsigned_and. auto. +Qed. + +Theorem eval_singleoffloat: + forall sp le e1 m1 a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vfloat x) -> + eval_expr ge sp le e1 m1 (singleoffloat a) e2 m2 (Vfloat (Float.singleoffloat x)). +Proof. + TrivialOp singleoffloat. +Qed. + +Theorem eval_addimm: + forall sp le e1 m1 n a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (addimm n a) e2 m2 (Vint (Int.add x n)). +Proof. + unfold addimm; intros until x. + generalize (Int.eq_spec n Int.zero). case (Int.eq n Int.zero); intro. + subst n. rewrite Int.add_zero. auto. + case (addimm_match a); intros. + InvEval H0. EvalOp. simpl. rewrite Int.add_commut. auto. + InvEval H0. destruct (Genv.find_symbol ge s); discriminate. + InvEval H0. + destruct sp; simpl in XX3; discriminate. + InvEval H0. FuncInv. EvalOp. simpl. subst x. + rewrite Int.add_assoc. decEq; decEq; decEq. apply Int.add_commut. + EvalOp. +Qed. + +Theorem eval_addimm_ptr: + forall sp le e1 m1 n a e2 m2 b ofs, + eval_expr ge sp le e1 m1 a e2 m2 (Vptr b ofs) -> + eval_expr ge sp le e1 m1 (addimm n a) e2 m2 (Vptr b (Int.add ofs n)). +Proof. + unfold addimm; intros until ofs. + generalize (Int.eq_spec n Int.zero). case (Int.eq n Int.zero); intro. + subst n. rewrite Int.add_zero. auto. + case (addimm_match a); intros. + InvEval H0. + InvEval H0. EvalOp. simpl. + destruct (Genv.find_symbol ge s). + rewrite Int.add_commut. congruence. + discriminate. + InvEval H0. destruct sp; simpl in XX3; try discriminate. + inversion XX3. EvalOp. simpl. decEq. decEq. + rewrite Int.add_assoc. decEq. apply Int.add_commut. + InvEval H0. FuncInv. subst b0; subst ofs. EvalOp. simpl. + rewrite (Int.add_commut n m). rewrite Int.add_assoc. auto. + EvalOp. +Qed. + +Theorem eval_add: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + eval_expr ge sp le e1 m1 (add a b) e3 m3 (Vint (Int.add x y)). +Proof. + intros until y. unfold add; case (add_match a b); intros. + InvEval H. rewrite Int.add_commut. apply eval_addimm. assumption. + InvEval H. FuncInv. InvEval H0. FuncInv. + replace (Int.add x y) with (Int.add (Int.add i i0) (Int.add n1 n2)). + apply eval_addimm. EvalOp. + subst x; subst y. + repeat rewrite Int.add_assoc. decEq. apply Int.add_permut. + InvEval H. FuncInv. + replace (Int.add x y) with (Int.add (Int.add i y) n1). + apply eval_addimm. EvalOp. + subst x. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. + InvEval H0. FuncInv. + apply eval_addimm. auto. + InvEval H0. FuncInv. + replace (Int.add x y) with (Int.add (Int.add x i) n2). + apply eval_addimm. EvalOp. + subst y. rewrite Int.add_assoc. auto. + EvalOp. +Qed. + +Theorem eval_add_ptr: + forall sp le e1 m1 a e2 m2 p x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vptr p x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + eval_expr ge sp le e1 m1 (add a b) e3 m3 (Vptr p (Int.add x y)). +Proof. + intros until y. unfold add; case (add_match a b); intros. + InvEval H. + InvEval H. FuncInv. InvEval H0. FuncInv. + replace (Int.add x y) with (Int.add (Int.add i i0) (Int.add n1 n2)). + apply eval_addimm_ptr. subst b0. EvalOp. + subst x; subst y. + repeat rewrite Int.add_assoc. decEq. apply Int.add_permut. + InvEval H. FuncInv. + replace (Int.add x y) with (Int.add (Int.add i y) n1). + apply eval_addimm_ptr. subst b0. EvalOp. + subst x. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. + InvEval H0. apply eval_addimm_ptr. auto. + InvEval H0. FuncInv. + replace (Int.add x y) with (Int.add (Int.add x i) n2). + apply eval_addimm_ptr. EvalOp. + subst y. rewrite Int.add_assoc. auto. + EvalOp. +Qed. + +Theorem eval_add_ptr_2: + forall sp le e1 m1 a e2 m2 p x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vptr p y) -> + eval_expr ge sp le e1 m1 (add a b) e3 m3 (Vptr p (Int.add y x)). +Proof. + intros until y. unfold add; case (add_match a b); intros. + InvEval H. + apply eval_addimm_ptr. auto. + InvEval H. FuncInv. InvEval H0. FuncInv. + replace (Int.add y x) with (Int.add (Int.add i0 i) (Int.add n1 n2)). + apply eval_addimm_ptr. subst b0. EvalOp. + subst x; subst y. + repeat rewrite Int.add_assoc. decEq. + rewrite (Int.add_commut n1 n2). apply Int.add_permut. + InvEval H. FuncInv. + replace (Int.add y x) with (Int.add (Int.add y i) n1). + apply eval_addimm_ptr. EvalOp. + subst x. repeat rewrite Int.add_assoc. auto. + InvEval H0. + InvEval H0. FuncInv. + replace (Int.add y x) with (Int.add (Int.add i x) n2). + apply eval_addimm_ptr. EvalOp. subst b0; reflexivity. + subst y. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. + EvalOp. +Qed. + +Theorem eval_sub: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + eval_expr ge sp le e1 m1 (sub a b) e3 m3 (Vint (Int.sub x y)). +Proof. + intros until y. + unfold sub; case (sub_match a b); intros. + InvEval H0. rewrite Int.sub_add_opp. + apply eval_addimm. assumption. + InvEval H. FuncInv. InvEval H0. FuncInv. + replace (Int.sub x y) with (Int.add (Int.sub i i0) (Int.sub n1 n2)). + apply eval_addimm. EvalOp. + subst x; subst y. + repeat rewrite Int.sub_add_opp. + repeat rewrite Int.add_assoc. decEq. + rewrite Int.add_permut. decEq. symmetry. apply Int.neg_add_distr. + InvEval H. FuncInv. + replace (Int.sub x y) with (Int.add (Int.sub i y) n1). + apply eval_addimm. EvalOp. + subst x. rewrite Int.sub_add_l. auto. + InvEval H0. FuncInv. + replace (Int.sub x y) with (Int.add (Int.sub x i) (Int.neg n2)). + apply eval_addimm. EvalOp. + subst y. rewrite (Int.add_commut i n2). symmetry. apply Int.sub_add_r. + EvalOp. +Qed. + +Theorem eval_sub_ptr_int: + forall sp le e1 m1 a e2 m2 p x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vptr p x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + eval_expr ge sp le e1 m1 (sub a b) e3 m3 (Vptr p (Int.sub x y)). +Proof. + intros until y. + unfold sub; case (sub_match a b); intros. + InvEval H0. rewrite Int.sub_add_opp. + apply eval_addimm_ptr. assumption. + InvEval H. FuncInv. InvEval H0. FuncInv. + subst b0. + replace (Int.sub x y) with (Int.add (Int.sub i i0) (Int.sub n1 n2)). + apply eval_addimm_ptr. EvalOp. + subst x; subst y. + repeat rewrite Int.sub_add_opp. + repeat rewrite Int.add_assoc. decEq. + rewrite Int.add_permut. decEq. symmetry. apply Int.neg_add_distr. + InvEval H. FuncInv. subst b0. + replace (Int.sub x y) with (Int.add (Int.sub i y) n1). + apply eval_addimm_ptr. EvalOp. + subst x. rewrite Int.sub_add_l. auto. + InvEval H0. FuncInv. + replace (Int.sub x y) with (Int.add (Int.sub x i) (Int.neg n2)). + apply eval_addimm_ptr. EvalOp. + subst y. rewrite (Int.add_commut i n2). symmetry. apply Int.sub_add_r. + EvalOp. +Qed. + +Theorem eval_sub_ptr_ptr: + forall sp le e1 m1 a e2 m2 p x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vptr p x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vptr p y) -> + eval_expr ge sp le e1 m1 (sub a b) e3 m3 (Vint (Int.sub x y)). +Proof. + intros until y. + unfold sub; case (sub_match a b); intros. + InvEval H0. + InvEval H. FuncInv. InvEval H0. FuncInv. + replace (Int.sub x y) with (Int.add (Int.sub i i0) (Int.sub n1 n2)). + apply eval_addimm. EvalOp. + simpl; unfold eq_block. subst b0; subst b1; rewrite zeq_true. auto. + subst x; subst y. + repeat rewrite Int.sub_add_opp. + repeat rewrite Int.add_assoc. decEq. + rewrite Int.add_permut. decEq. symmetry. apply Int.neg_add_distr. + InvEval H. FuncInv. subst b0. + replace (Int.sub x y) with (Int.add (Int.sub i y) n1). + apply eval_addimm. EvalOp. + simpl. unfold eq_block. rewrite zeq_true. auto. + subst x. rewrite Int.sub_add_l. auto. + InvEval H0. FuncInv. subst b0. + replace (Int.sub x y) with (Int.add (Int.sub x i) (Int.neg n2)). + apply eval_addimm. EvalOp. + simpl. unfold eq_block. rewrite zeq_true. auto. + subst y. rewrite (Int.add_commut i n2). symmetry. apply Int.sub_add_r. + EvalOp. simpl. unfold eq_block. rewrite zeq_true. auto. +Qed. + +Lemma eval_rolm: + forall sp le e1 m1 a amount mask e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (rolm a amount mask) e2 m2 (Vint (Int.rolm x amount mask)). +Proof. + intros until x. unfold rolm; case (rolm_match a); intros. + InvEval H. eauto with evalexpr. + case (Int.is_rlw_mask (Int.and (Int.rol mask1 amount) mask)). + InvEval H. FuncInv. EvalOp. simpl. subst x. + decEq. decEq. + replace (Int.and (Int.add amount1 amount) (Int.repr 31)) + with (Int.modu (Int.add amount1 amount) (Int.repr 32)). + symmetry. apply Int.rolm_rolm. + change (Int.repr 31) with (Int.sub (Int.repr 32) Int.one). + apply Int.modu_and with (Int.repr 5). reflexivity. + EvalOp. + EvalOp. +Qed. + +Theorem eval_shlimm: + forall sp le e1 m1 a n e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + Int.ltu n (Int.repr 32) = true -> + eval_expr ge sp le e1 m1 (shlimm a n) e2 m2 (Vint (Int.shl x n)). +Proof. + intros. unfold shlimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intro. + subst n. rewrite Int.shl_zero. auto. + rewrite H0. + replace (Int.shl x n) with (Int.rolm x n (Int.shl Int.mone n)). + apply eval_rolm. auto. symmetry. apply Int.shl_rolm. exact H0. +Qed. + +Theorem eval_shruimm: + forall sp le e1 m1 a n e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + Int.ltu n (Int.repr 32) = true -> + eval_expr ge sp le e1 m1 (shruimm a n) e2 m2 (Vint (Int.shru x n)). +Proof. + intros. unfold shruimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intro. + subst n. rewrite Int.shru_zero. auto. + rewrite H0. + replace (Int.shru x n) with (Int.rolm x (Int.sub (Int.repr 32) n) (Int.shru Int.mone n)). + apply eval_rolm. auto. symmetry. apply Int.shru_rolm. exact H0. +Qed. + +Lemma eval_mulimm_base: + forall sp le e1 m1 a n e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (mulimm_base n a) e2 m2 (Vint (Int.mul x n)). +Proof. + intros; unfold mulimm_base. + generalize (Int.one_bits_decomp n). + generalize (Int.one_bits_range n). + change (Z_of_nat wordsize) with 32. + destruct (Int.one_bits n). + intros. EvalOp. + destruct l. + intros. rewrite H1. simpl. + rewrite Int.add_zero. rewrite <- Int.shl_mul. + apply eval_shlimm. auto. auto with coqlib. + destruct l. + intros. apply eval_Elet with e2 m2 (Vint x). auto. + rewrite H1. simpl. rewrite Int.add_zero. + rewrite Int.mul_add_distr_r. + rewrite <- Int.shl_mul. + rewrite <- Int.shl_mul. + EvalOp. eapply eval_Econs. + apply eval_shlimm. apply eval_Eletvar. simpl. reflexivity. + auto with coqlib. + eapply eval_Econs. + apply eval_shlimm. apply eval_Eletvar. simpl. reflexivity. + auto with coqlib. + auto with evalexpr. + reflexivity. + intros. EvalOp. +Qed. + +Theorem eval_mulimm: + forall sp le e1 m1 a n e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (mulimm n a) e2 m2 (Vint (Int.mul x n)). +Proof. + intros until x; unfold mulimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intro. + subst n. rewrite Int.mul_zero. eauto with evalexpr. + generalize (Int.eq_spec n Int.one); case (Int.eq n Int.one); intro. + subst n. rewrite Int.mul_one. auto. + case (mulimm_match a); intros. + InvEval H1. EvalOp. rewrite Int.mul_commut. reflexivity. + InvEval H1. FuncInv. + replace (Int.mul x n) with (Int.add (Int.mul i n) (Int.mul n n2)). + apply eval_addimm. apply eval_mulimm_base. auto. + subst x. rewrite Int.mul_add_distr_l. decEq. apply Int.mul_commut. + apply eval_mulimm_base. assumption. +Qed. + +Theorem eval_mul: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + eval_expr ge sp le e1 m1 (mul a b) e3 m3 (Vint (Int.mul x y)). +Proof. + intros until y. + unfold mul; case (mul_match a b); intros. + InvEval H. rewrite Int.mul_commut. apply eval_mulimm. auto. + InvEval H0. apply eval_mulimm. auto. + EvalOp. +Qed. + +Theorem eval_divs: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + y <> Int.zero -> + eval_expr ge sp le e1 m1 (divs a b) e3 m3 (Vint (Int.divs x y)). +Proof. + TrivialOp divs. simpl. + predSpec Int.eq Int.eq_spec y Int.zero. contradiction. auto. +Qed. + +Lemma eval_mod_aux: + forall divop semdivop, + (forall sp x y, + y <> Int.zero -> + eval_operation ge sp divop (Vint x :: Vint y :: nil) = + Some (Vint (semdivop x y))) -> + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + y <> Int.zero -> + eval_expr ge sp le e1 m1 (mod_aux divop a b) e3 m3 + (Vint (Int.sub x (Int.mul (semdivop x y) y))). +Proof. + intros; unfold mod_aux. + eapply eval_Elet. eexact H0. eapply eval_Elet. + apply eval_lift. eexact H1. + eapply eval_Eop. eapply eval_Econs. + eapply eval_Eletvar. simpl; reflexivity. + eapply eval_Econs. eapply eval_Eop. + eapply eval_Econs. eapply eval_Eop. + eapply eval_Econs. apply eval_Eletvar. simpl; reflexivity. + eapply eval_Econs. apply eval_Eletvar. simpl; reflexivity. + apply eval_Enil. apply H. assumption. + eapply eval_Econs. apply eval_Eletvar. simpl; reflexivity. + apply eval_Enil. simpl; reflexivity. apply eval_Enil. + reflexivity. +Qed. + +Theorem eval_mods: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + y <> Int.zero -> + eval_expr ge sp le e1 m1 (mods a b) e3 m3 (Vint (Int.mods x y)). +Proof. + intros; unfold mods. + rewrite Int.mods_divs. + eapply eval_mod_aux; eauto. + intros. simpl. predSpec Int.eq Int.eq_spec y0 Int.zero. + contradiction. auto. +Qed. + +Lemma eval_divu_base: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + y <> Int.zero -> + eval_expr ge sp le e1 m1 (Eop Odivu (a ::: b ::: Enil)) e3 m3 (Vint (Int.divu x y)). +Proof. + intros. EvalOp. simpl. + predSpec Int.eq Int.eq_spec y Int.zero. contradiction. auto. +Qed. + +Theorem eval_divu: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + y <> Int.zero -> + eval_expr ge sp le e1 m1 (divu a b) e3 m3 (Vint (Int.divu x y)). +Proof. + intros until y. + unfold divu; case (divu_match b); intros. + InvEval H0. caseEq (Int.is_power2 y). + intros. rewrite (Int.divu_pow2 x y i H0). + apply eval_shruimm. auto. + apply Int.is_power2_range with y. auto. + intros. subst n2. eapply eval_divu_base. eexact H. EvalOp. auto. + eapply eval_divu_base; eauto. +Qed. + +Theorem eval_modu: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + y <> Int.zero -> + eval_expr ge sp le e1 m1 (modu a b) e3 m3 (Vint (Int.modu x y)). +Proof. + intros until y; unfold modu; case (divu_match b); intros. + InvEval H0. caseEq (Int.is_power2 y). + intros. rewrite (Int.modu_and x y i H0). + rewrite <- Int.rolm_zero. apply eval_rolm. auto. + intro. rewrite Int.modu_divu. eapply eval_mod_aux. + intros. simpl. predSpec Int.eq Int.eq_spec y0 Int.zero. + contradiction. auto. + eexact H. EvalOp. auto. auto. + rewrite Int.modu_divu. eapply eval_mod_aux. + intros. simpl. predSpec Int.eq Int.eq_spec y0 Int.zero. + contradiction. auto. + eexact H. eexact H0. auto. auto. +Qed. + +Theorem eval_andimm: + forall sp le e1 m1 n a e2 m2 x, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e1 m1 (andimm n a) e2 m2 (Vint (Int.and x n)). +Proof. + intros. unfold andimm. case (Int.is_rlw_mask n). + rewrite <- Int.rolm_zero. apply eval_rolm; auto. + EvalOp. +Qed. + +Theorem eval_and: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + eval_expr ge sp le e1 m1 (and a b) e3 m3 (Vint (Int.and x y)). +Proof. + intros until y; unfold and; case (mul_match a b); intros. + InvEval H. rewrite Int.and_commut. apply eval_andimm; auto. + InvEval H0. apply eval_andimm; auto. + EvalOp. +Qed. + +Remark eval_same_expr_pure: + forall a1 a2 sp le e1 m1 e2 m2 v1 e3 m3 v2, + same_expr_pure a1 a2 = true -> + eval_expr ge sp le e1 m1 a1 e2 m2 v1 -> + eval_expr ge sp le e2 m2 a2 e3 m3 v2 -> + a2 = a1 /\ v2 = v1 /\ e2 = e1 /\ m2 = m1. +Proof. + intros until v1. + destruct a1; simpl; try (intros; discriminate). + destruct a2; simpl; try (intros; discriminate). + case (ident_eq i i0); intros. + subst i0. inversion H0. inversion H1. + assert (v2 = v1). congruence. tauto. + discriminate. +Qed. + +Lemma eval_or: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + eval_expr ge sp le e1 m1 (or a b) e3 m3 (Vint (Int.or x y)). +Proof. + intros until y; unfold or; case (or_match a b); intros. + generalize (Int.eq_spec amount1 amount2); case (Int.eq amount1 amount2); intro. + case (Int.is_rlw_mask (Int.or mask1 mask2)). + caseEq (same_expr_pure t1 t2); intro. + simpl. InvEval H. FuncInv. InvEval H0. FuncInv. + generalize (eval_same_expr_pure _ _ _ _ _ _ _ _ _ _ _ _ H2 EV EV0). + intros [EQ1 [EQ2 [EQ3 EQ4]]]. + injection EQ2; intro EQ5. + subst t2; subst e2; subst m2; subst i0. + EvalOp. simpl. subst x; subst y; subst amount2. + rewrite Int.or_rolm. auto. + simpl. EvalOp. + simpl. EvalOp. + simpl. EvalOp. + EvalOp. +Qed. + +Theorem eval_xor: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + eval_expr ge sp le e1 m1 (xor a b) e3 m3 (Vint (Int.xor x y)). +Proof. TrivialOp xor. Qed. + +Theorem eval_shl: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + Int.ltu y (Int.repr 32) = true -> + eval_expr ge sp le e1 m1 (shl a b) e3 m3 (Vint (Int.shl x y)). +Proof. + intros until y; unfold shl; case (shift_match b); intros. + InvEval H0. apply eval_shlimm; auto. + EvalOp. simpl. rewrite H1. auto. +Qed. + +Theorem eval_shr: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + Int.ltu y (Int.repr 32) = true -> + eval_expr ge sp le e1 m1 (shr a b) e3 m3 (Vint (Int.shr x y)). +Proof. + TrivialOp shr. simpl. rewrite H1. auto. +Qed. + +Theorem eval_shru: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + Int.ltu y (Int.repr 32) = true -> + eval_expr ge sp le e1 m1 (shru a b) e3 m3 (Vint (Int.shru x y)). +Proof. + intros until y; unfold shru; case (shift_match b); intros. + InvEval H0. apply eval_shruimm; auto. + EvalOp. simpl. rewrite H1. auto. +Qed. + +Theorem eval_addf: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vfloat x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vfloat y) -> + eval_expr ge sp le e1 m1 (addf a b) e3 m3 (Vfloat (Float.add x y)). +Proof. + intros until y; unfold addf; case (addf_match a b); intros. + InvEval H. FuncInv. EvalOp. simpl. subst x. reflexivity. + InvEval H0. FuncInv. eapply eval_Elet. eexact H. EvalOp. + eapply eval_Econs. apply eval_lift. eexact EV. + eapply eval_Econs. apply eval_lift. eexact EV0. + eapply eval_Econs. apply eval_Eletvar. simpl; reflexivity. + apply eval_Enil. + subst y. rewrite Float.addf_commut. reflexivity. + EvalOp. +Qed. + +Theorem eval_subf: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vfloat x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vfloat y) -> + eval_expr ge sp le e1 m1 (subf a b) e3 m3 (Vfloat (Float.sub x y)). +Proof. + intros until y; unfold subf; case (subf_match a b); intros. + InvEval H. FuncInv. EvalOp. subst x. reflexivity. + EvalOp. +Qed. + +Theorem eval_mulf: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vfloat x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vfloat y) -> + eval_expr ge sp le e1 m1 (mulf a b) e3 m3 (Vfloat (Float.mul x y)). +Proof. TrivialOp mulf. Qed. + +Theorem eval_divf: + forall sp le e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vfloat x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vfloat y) -> + eval_expr ge sp le e1 m1 (divf a b) e3 m3 (Vfloat (Float.div x y)). +Proof. TrivialOp divf. Qed. + +Theorem eval_cmp: + forall sp le c e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + eval_expr ge sp le e1 m1 (cmp c a b) e3 m3 (Val.of_bool (Int.cmp c x y)). +Proof. + TrivialOp cmp. + simpl. case (Int.cmp c x y); auto. +Qed. + +Theorem eval_cmp_null_r: + forall sp le c e1 m1 a e2 m2 p x b e3 m3 v, + eval_expr ge sp le e1 m1 a e2 m2 (Vptr p x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint Int.zero) -> + (c = Ceq /\ v = Vfalse) \/ (c = Cne /\ v = Vtrue) -> + eval_expr ge sp le e1 m1 (cmp c a b) e3 m3 v. +Proof. + TrivialOp cmp. + simpl. elim H1; intros [EQ1 EQ2]; subst c; subst v; reflexivity. +Qed. + +Theorem eval_cmp_null_l: + forall sp le c e1 m1 a e2 m2 p x b e3 m3 v, + eval_expr ge sp le e1 m1 a e2 m2 (Vint Int.zero) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vptr p x) -> + (c = Ceq /\ v = Vfalse) \/ (c = Cne /\ v = Vtrue) -> + eval_expr ge sp le e1 m1 (cmp c a b) e3 m3 v. +Proof. + TrivialOp cmp. + simpl. elim H1; intros [EQ1 EQ2]; subst c; subst v; reflexivity. +Qed. + +Theorem eval_cmp_ptr: + forall sp le c e1 m1 a e2 m2 p x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vptr p x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vptr p y) -> + eval_expr ge sp le e1 m1 (cmp c a b) e3 m3 (Val.of_bool (Int.cmp c x y)). +Proof. + TrivialOp cmp. + simpl. unfold eq_block. rewrite zeq_true. + case (Int.cmp c x y); auto. +Qed. + +Theorem eval_cmpu: + forall sp le c e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vint x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vint y) -> + eval_expr ge sp le e1 m1 (cmpu c a b) e3 m3 (Val.of_bool (Int.cmpu c x y)). +Proof. + TrivialOp cmpu. + simpl. case (Int.cmpu c x y); auto. +Qed. + +Theorem eval_cmpf: + forall sp le c e1 m1 a e2 m2 x b e3 m3 y, + eval_expr ge sp le e1 m1 a e2 m2 (Vfloat x) -> + eval_expr ge sp le e2 m2 b e3 m3 (Vfloat y) -> + eval_expr ge sp le e1 m1 (cmpf c a b) e3 m3 (Val.of_bool (Float.cmp c x y)). +Proof. + TrivialOp cmpf. + simpl. case (Float.cmp c x y); auto. +Qed. + +Lemma eval_base_condition_of_expr: + forall sp le a e1 m1 e2 m2 v (b: bool), + eval_expr ge sp le e1 m1 a e2 m2 v -> + Val.bool_of_val v b -> + eval_condexpr ge sp le e1 m1 + (CEcond (Ccompuimm Cne Int.zero) (a ::: Enil)) + e2 m2 b. +Proof. + intros. + eapply eval_CEcond. eauto with evalexpr. + inversion H0; simpl. rewrite Int.eq_false; auto. auto. auto. +Qed. + +Lemma eval_condition_of_expr: + forall a sp le e1 m1 e2 m2 v (b: bool), + eval_expr ge sp le e1 m1 a e2 m2 v -> + Val.bool_of_val v b -> + eval_condexpr ge sp le e1 m1 (condexpr_of_expr a) e2 m2 b. +Proof. + induction a; simpl; intros; + try (eapply eval_base_condition_of_expr; eauto; fail). + destruct o; try (eapply eval_base_condition_of_expr; eauto; fail). + + destruct e. inversion H. subst sp0 le0 e m op al e0 m0 v0. + inversion H7. subst sp0 le0 e m e1 m1 vl. + simpl in H11. inversion H11; subst v. + inversion H0. + rewrite Int.eq_false; auto. constructor. + subst i; rewrite Int.eq_true. constructor. + eapply eval_base_condition_of_expr; eauto. + + inversion H. eapply eval_CEcond; eauto. simpl in H11. + destruct (eval_condition c vl); try discriminate. + destruct b0; inversion H11; subst v0; subst v; inversion H0; congruence. + + inversion H. + destruct v1; eauto with evalexpr. +Qed. + +Theorem eval_conditionalexpr_true: + forall sp le e1 m1 a1 e2 m2 v1 a2 e3 m3 v2 a3, + eval_expr ge sp le e1 m1 a1 e2 m2 v1 -> + Val.is_true v1 -> + eval_expr ge sp le e2 m2 a2 e3 m3 v2 -> + eval_expr ge sp le e1 m1 (conditionalexpr a1 a2 a3) e3 m3 v2. +Proof. + intros; unfold conditionalexpr. + apply eval_Econdition with e2 m2 true; auto. + eapply eval_condition_of_expr; eauto with valboolof. +Qed. + +Theorem eval_conditionalexpr_false: + forall sp le e1 m1 a1 e2 m2 v1 a2 e3 m3 v2 a3, + eval_expr ge sp le e1 m1 a1 e2 m2 v1 -> + Val.is_false v1 -> + eval_expr ge sp le e2 m2 a3 e3 m3 v2 -> + eval_expr ge sp le e1 m1 (conditionalexpr a1 a2 a3) e3 m3 v2. +Proof. + intros; unfold conditionalexpr. + apply eval_Econdition with e2 m2 false; auto. + eapply eval_condition_of_expr; eauto with valboolof. +Qed. + +Lemma eval_addressing: + forall sp le e1 m1 a e2 m2 v b ofs, + eval_expr ge sp le e1 m1 a e2 m2 v -> + v = Vptr b ofs -> + match addressing a with (mode, args) => + exists vl, + eval_exprlist ge sp le e1 m1 args e2 m2 vl /\ + eval_addressing ge sp mode vl = Some v + end. +Proof. + intros until v. unfold addressing; case (addressing_match a); intros. + InvEval H. exists (@nil val). split. eauto with evalexpr. + simpl. auto. + InvEval H. exists (@nil val). split. eauto with evalexpr. + simpl. auto. + InvEval H. FuncInv. + congruence. + InvEval EV. + exists (Vint i0 :: nil). split. eauto with evalexpr. + simpl. subst v. destruct (Genv.find_symbol ge s). congruence. + discriminate. + InvEval H. FuncInv. + destruct (Genv.find_symbol ge s); congruence. + InvEval EV. + exists (Vint i0 :: nil). split. eauto with evalexpr. + simpl. destruct (Genv.find_symbol ge s). congruence. + discriminate. + InvEval H. FuncInv. + congruence. + exists (Vptr b0 i :: nil). split. eauto with evalexpr. + simpl. congruence. + InvEval H. FuncInv. + congruence. + exists (Vint i :: Vptr b0 i0 :: nil). + split. eauto with evalexpr. simpl. + rewrite Int.add_commut. congruence. + exists (Vptr b0 i :: Vint i0 :: nil). + split. eauto with evalexpr. simpl. congruence. + exists (v :: nil). split. eauto with evalexpr. + subst v. simpl. rewrite Int.add_zero. auto. +Qed. + +Theorem eval_load: + forall sp le e1 m1 a e2 m2 v chunk v', + eval_expr ge sp le e1 m1 a e2 m2 v -> + Mem.loadv chunk m2 v = Some v' -> + eval_expr ge sp le e1 m1 (load chunk a) e2 m2 v'. +Proof. + intros. generalize H0; destruct v; simpl; intro; try discriminate. + unfold load. + generalize (eval_addressing _ _ _ _ _ _ _ _ _ _ H (refl_equal _)). + destruct (addressing a). intros [vl [EV EQ]]. + eapply eval_Eload; eauto. +Qed. + +Theorem eval_store: + forall sp le e1 m1 a1 e2 m2 v1 a2 e3 m3 v2 chunk m4, + eval_expr ge sp le e1 m1 a1 e2 m2 v1 -> + eval_expr ge sp le e2 m2 a2 e3 m3 v2 -> + Mem.storev chunk m3 v1 v2 = Some m4 -> + eval_expr ge sp le e1 m1 (store chunk a1 a2) e3 m4 v2. +Proof. + intros. generalize H1; destruct v1; simpl; intro; try discriminate. + unfold store. + generalize (eval_addressing _ _ _ _ _ _ _ _ _ _ H (refl_equal _)). + destruct (addressing a1). intros [vl [EV EQ]]. + eapply eval_Estore; eauto. +Qed. + +Theorem exec_ifthenelse_true: + forall sp e1 m1 a e2 m2 v ifso ifnot e3 m3 out, + eval_expr ge sp nil e1 m1 a e2 m2 v -> + Val.is_true v -> + exec_stmtlist ge sp e2 m2 ifso e3 m3 out -> + exec_stmt ge sp e1 m1 (ifthenelse a ifso ifnot) e3 m3 out. +Proof. + intros. unfold ifthenelse. + apply exec_Sifthenelse with e2 m2 true. + eapply eval_condition_of_expr; eauto with valboolof. + auto. +Qed. + +Theorem exec_ifthenelse_false: + forall sp e1 m1 a e2 m2 v ifso ifnot e3 m3 out, + eval_expr ge sp nil e1 m1 a e2 m2 v -> + Val.is_false v -> + exec_stmtlist ge sp e2 m2 ifnot e3 m3 out -> + exec_stmt ge sp e1 m1 (ifthenelse a ifso ifnot) e3 m3 out. +Proof. + intros. unfold ifthenelse. + apply exec_Sifthenelse with e2 m2 false. + eapply eval_condition_of_expr; eauto with valboolof. + auto. +Qed. + +End CMCONSTR. + diff --git a/backend/Cminor.v b/backend/Cminor.v new file mode 100644 index 00000000..f08d927e --- /dev/null +++ b/backend/Cminor.v @@ -0,0 +1,348 @@ +(** Abstract syntax and semantics for the Cminor language. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Op. +Require Import Globalenvs. + +(** * Abstract syntax *) + +(** Cminor is a low-level imperative language structured in expressions, + statements, functions and programs. Expressions include + reading and writing local variables, reading and writing store locations, + arithmetic operations, function calls, and conditional expressions + (similar to [e1 ? e2 : e3] in C). The [Elet] and [Eletvar] constructs + enable sharing the computations of subexpressions. De Bruijn notation + is used: [Eletvar n] refers to the value bound by then [n+1]-th enclosing + [Elet] construct. + + A variant [condexpr] of [expr] is used to represent expressions + which are evaluated for their boolean value only and not their exact value. +*) + +Inductive expr : Set := + | Evar : ident -> expr + | Eassign : ident -> expr -> expr + | Eop : operation -> exprlist -> expr + | Eload : memory_chunk -> addressing -> exprlist -> expr + | Estore : memory_chunk -> addressing -> exprlist -> expr -> expr + | Ecall : signature -> expr -> exprlist -> expr + | Econdition : condexpr -> expr -> expr -> expr + | Elet : expr -> expr -> expr + | Eletvar : nat -> expr + +with condexpr : Set := + | CEtrue: condexpr + | CEfalse: condexpr + | CEcond: condition -> exprlist -> condexpr + | CEcondition : condexpr -> condexpr -> condexpr -> condexpr + +with exprlist : Set := + | Enil: exprlist + | Econs: expr -> exprlist -> exprlist. + +(** Statements include expression evaluation, an if/then/else conditional, + infinite loops, blocks and early block exits, and early function returns. + [Sexit n] terminates prematurely the execution of the [n+1] enclosing + [Sblock] statements. *) + +Inductive stmt : Set := + | Sexpr: expr -> stmt + | Sifthenelse: condexpr -> stmtlist -> stmtlist -> stmt + | Sloop: stmtlist -> stmt + | Sblock: stmtlist -> stmt + | Sexit: nat -> stmt + | Sreturn: option expr -> stmt + +with stmtlist : Set := + | Snil: stmtlist + | Scons: stmt -> stmtlist -> stmtlist. + +(** Functions are composed of a signature, a list of parameter names, + a list of local variables, and a list of statements representing + the function body. Each function can allocate a memory block of + size [fn_stackspace] on entrance. This block will be deallocated + automatically before the function returns. Pointers into this block + can be taken with the [Oaddrstack] operator. *) + +Record function : Set := mkfunction { + fn_sig: signature; + fn_params: list ident; + fn_vars: list ident; + fn_stackspace: Z; + fn_body: stmtlist +}. + +Definition program := AST.program function. + +(** * Operational semantics *) + +(** The operational semantics for Cminor is given in big-step operational + style. Expressions evaluate to values, and statements evaluate to + ``outcomes'' indicating how execution should proceed afterwards. *) + +Inductive outcome: Set := + | Out_normal: outcome (**r continue in sequence *) + | Out_exit: nat -> outcome (**r terminate [n+1] enclosing blocks *) + | Out_return: option val -> outcome. (**r return immediately to caller *) + +Definition outcome_result_value + (out: outcome) (ot: option typ) (v: val) : Prop := + match out, ot with + | Out_normal, None => v = Vundef + | Out_return None, None => v = Vundef + | Out_return (Some v'), Some ty => v = v' + | _, _ => False + end. + +Definition outcome_block (out: outcome) : outcome := + match out with + | Out_normal => Out_normal + | Out_exit O => Out_normal + | Out_exit (S n) => Out_exit n + | Out_return optv => Out_return optv + end. + +(** Three kinds of evaluation environments are involved: +- [genv]: global environments, define symbols and functions; +- [env]: local environments, map local variables to values; +- [lenv]: let environments, map de Bruijn indices to values. +*) + +Definition genv := Genv.t function. +Definition env := PTree.t val. +Definition letenv := list val. + +(** The following functions build the initial local environment at + function entry, binding parameters to the provided arguments and + initializing local variables to [Vundef]. *) + +Fixpoint set_params (vl: list val) (il: list ident) {struct il} : env := + match il, vl with + | i1 :: is, v1 :: vs => PTree.set i1 v1 (set_params vs is) + | i1 :: is, nil => PTree.set i1 Vundef (set_params nil is) + | _, _ => PTree.empty val + end. + +Fixpoint set_locals (il: list ident) (e: env) {struct il} : env := + match il with + | nil => e + | i1 :: is => PTree.set i1 Vundef (set_locals is e) + end. + +Section RELSEM. + +Variable ge: genv. + +(** Evaluation of an expression: [eval_expr ge sp le e m a e' m' v] + states that expression [a], in initial local environment [e] and + memory state [m], evaluates to value [v]. [e'] and [m'] are the final + local environment and memory state, respectively, reflecting variable + assignments and memory stores possibly performed by [a]. [ge] and + [le] are the global environment and let environment respectively, and + are unchanged during evaluation. [sp] is the pointer to the memory + block allocated for this function (stack frame). +*) + +Inductive eval_expr: + val -> letenv -> + env -> mem -> expr -> + env -> mem -> val -> Prop := + | eval_Evar: + forall sp le e m id v, + PTree.get id e = Some v -> + eval_expr sp le e m (Evar id) e m v + | eval_Eassign: + forall sp le e m id a e1 m1 v, + eval_expr sp le e m a e1 m1 v -> + eval_expr sp le e m (Eassign id a) (PTree.set id v e1) m1 v + | eval_Eop: + forall sp le e m op al e1 m1 vl v, + eval_exprlist sp le e m al e1 m1 vl -> + eval_operation ge sp op vl = Some v -> + eval_expr sp le e m (Eop op al) e1 m1 v + | eval_Eload: + forall sp le e m chunk addr al e1 m1 v vl a, + eval_exprlist sp le e m al e1 m1 vl -> + eval_addressing ge sp addr vl = Some a -> + Mem.loadv chunk m1 a = Some v -> + eval_expr sp le e m (Eload chunk addr al) e1 m1 v + | eval_Estore: + forall sp le e m chunk addr al b e1 m1 vl e2 m2 m3 v a, + eval_exprlist sp le e m al e1 m1 vl -> + eval_expr sp le e1 m1 b e2 m2 v -> + eval_addressing ge sp addr vl = Some a -> + Mem.storev chunk m2 a v = Some m3 -> + eval_expr sp le e m (Estore chunk addr al b) e2 m3 v + | eval_Ecall: + forall sp le e m sig a bl e1 e2 m1 m2 m3 vf vargs vres f, + eval_expr sp le e m a e1 m1 vf -> + eval_exprlist sp le e1 m1 bl e2 m2 vargs -> + Genv.find_funct ge vf = Some f -> + f.(fn_sig) = sig -> + eval_funcall m2 f vargs m3 vres -> + eval_expr sp le e m (Ecall sig a bl) e2 m3 vres + | eval_Econdition: + forall sp le e m a b c e1 m1 v1 e2 m2 v2, + eval_condexpr sp le e m a e1 m1 v1 -> + eval_expr sp le e1 m1 (if v1 then b else c) e2 m2 v2 -> + eval_expr sp le e m (Econdition a b c) e2 m2 v2 + | eval_Elet: + forall sp le e m a b e1 m1 v1 e2 m2 v2, + eval_expr sp le e m a e1 m1 v1 -> + eval_expr sp (v1::le) e1 m1 b e2 m2 v2 -> + eval_expr sp le e m (Elet a b) e2 m2 v2 + | eval_Eletvar: + forall sp le e m n v, + nth_error le n = Some v -> + eval_expr sp le e m (Eletvar n) e m v + +(** Evaluation of a condition expression: + [eval_condexpr ge sp le e m a e' m' b] + states that condition expression [a] evaluates to the boolean value [b]. + The other parameters are as in [eval_expr]. +*) + +with eval_condexpr: + val -> letenv -> + env -> mem -> condexpr -> + env -> mem -> bool -> Prop := + | eval_CEtrue: + forall sp le e m, + eval_condexpr sp le e m CEtrue e m true + | eval_CEfalse: + forall sp le e m, + eval_condexpr sp le e m CEfalse e m false + | eval_CEcond: + forall sp le e m cond al e1 m1 vl b, + eval_exprlist sp le e m al e1 m1 vl -> + eval_condition cond vl = Some b -> + eval_condexpr sp le e m (CEcond cond al) e1 m1 b + | eval_CEcondition: + forall sp le e m a b c e1 m1 vb1 e2 m2 vb2, + eval_condexpr sp le e m a e1 m1 vb1 -> + eval_condexpr sp le e1 m1 (if vb1 then b else c) e2 m2 vb2 -> + eval_condexpr sp le e m (CEcondition a b c) e2 m2 vb2 + +(** Evaluation of a list of expressions: + [eval_exprlist ge sp le al m a e' m' vl] + states that the list [al] of expressions evaluate + to the list [vl] of values. + The other parameters are as in [eval_expr]. +*) + +with eval_exprlist: + val -> letenv -> + env -> mem -> exprlist -> + env -> mem -> list val -> Prop := + | eval_Enil: + forall sp le e m, + eval_exprlist sp le e m Enil e m nil + | eval_Econs: + forall sp le e m a bl e1 m1 v e2 m2 vl, + eval_expr sp le e m a e1 m1 v -> + eval_exprlist sp le e1 m1 bl e2 m2 vl -> + eval_exprlist sp le e m (Econs a bl) e2 m2 (v :: vl) + +(** Evaluation of a function invocation: [eval_funcall ge m f args m' res] + means that the function [f], applied to the arguments [args] in + memory state [m], returns the value [res] in modified memory state [m']. +*) + +with eval_funcall: + mem -> function -> list val -> + mem -> val -> Prop := + | eval_funcall_intro: + forall m f vargs m1 sp e e2 m2 out vres, + Mem.alloc m 0 f.(fn_stackspace) = (m1, sp) -> + set_locals f.(fn_vars) (set_params vargs f.(fn_params)) = e -> + exec_stmtlist (Vptr sp Int.zero) e m1 f.(fn_body) e2 m2 out -> + outcome_result_value out f.(fn_sig).(sig_res) vres -> + eval_funcall m f vargs (Mem.free m2 sp) vres + +(** Execution of a statement: [exec_stmt ge sp e m s e' m' out] + means that statement [s] executes with outcome [out]. + The other parameters are as in [eval_expr]. *) + +with exec_stmt: + val -> + env -> mem -> stmt -> + env -> mem -> outcome -> Prop := + | exec_Sexpr: + forall sp e m a e1 m1 v, + eval_expr sp nil e m a e1 m1 v -> + exec_stmt sp e m (Sexpr a) e1 m1 Out_normal + | exec_Sifthenelse: + forall sp e m a sl1 sl2 e1 m1 v1 e2 m2 out, + eval_condexpr sp nil e m a e1 m1 v1 -> + exec_stmtlist sp e1 m1 (if v1 then sl1 else sl2) e2 m2 out -> + exec_stmt sp e m (Sifthenelse a sl1 sl2) e2 m2 out + | exec_Sloop_loop: + forall sp e m sl e1 m1 e2 m2 out, + exec_stmtlist sp e m sl e1 m1 Out_normal -> + exec_stmt sp e1 m1 (Sloop sl) e2 m2 out -> + exec_stmt sp e m (Sloop sl) e2 m2 out + | exec_Sloop_stop: + forall sp e m sl e1 m1 out, + exec_stmtlist sp e m sl e1 m1 out -> + out <> Out_normal -> + exec_stmt sp e m (Sloop sl) e1 m1 out + | exec_Sblock: + forall sp e m sl e1 m1 out, + exec_stmtlist sp e m sl e1 m1 out -> + exec_stmt sp e m (Sblock sl) e1 m1 (outcome_block out) + | exec_Sexit: + forall sp e m n, + exec_stmt sp e m (Sexit n) e m (Out_exit n) + | exec_Sreturn_none: + forall sp e m, + exec_stmt sp e m (Sreturn None) e m (Out_return None) + | exec_Sreturn_some: + forall sp e m a e1 m1 v, + eval_expr sp nil e m a e1 m1 v -> + exec_stmt sp e m (Sreturn (Some a)) e1 m1 (Out_return (Some v)) + +(** Execution of a list of statements: [exec_stmtlist ge sp e m sl e' m' out] + means that the list [sl] of statements executes sequentially + with outcome [out]. Execution stops at the first statement that + leads an outcome different from [Out_normal]. + The other parameters are as in [eval_expr]. *) + +with exec_stmtlist: + val -> + env -> mem -> stmtlist -> + env -> mem -> outcome -> Prop := + | exec_Snil: + forall sp e m, + exec_stmtlist sp e m Snil e m Out_normal + | exec_Scons_continue: + forall sp e m s sl e1 m1 e2 m2 out, + exec_stmt sp e m s e1 m1 Out_normal -> + exec_stmtlist sp e1 m1 sl e2 m2 out -> + exec_stmtlist sp e m (Scons s sl) e2 m2 out + | exec_Scons_stop: + forall sp e m s sl e1 m1 out, + exec_stmt sp e m s e1 m1 out -> + out <> Out_normal -> + exec_stmtlist sp e m (Scons s sl) e1 m1 out. + +End RELSEM. + +(** Execution of a whole program: [exec_program p r] + holds if the application of [p]'s main function to no arguments + in the initial memory state for [p] eventually returns value [r]. *) + +Definition exec_program (p: program) (r: val) : Prop := + let ge := Genv.globalenv p in + let m0 := Genv.init_mem p in + exists b, exists f, exists m, + Genv.find_symbol ge p.(prog_main) = Some b /\ + Genv.find_funct_ptr ge b = Some f /\ + f.(fn_sig) = mksignature nil (Some Tint) /\ + eval_funcall ge m0 f nil m r. + diff --git a/backend/Cminorgen.v b/backend/Cminorgen.v new file mode 100644 index 00000000..6af5260e --- /dev/null +++ b/backend/Cminorgen.v @@ -0,0 +1,398 @@ +(** Translation from Csharpminor to Cminor. *) + +Require Import Coqlib. +Require Import Maps. +Require Import Sets. +Require Import AST. +Require Import Integers. +Require Mem. +Require Import Csharpminor. +Require Import Op. +Require Import Cminor. +Require Cmconstr. + +(** The main task in translating Csharpminor to Cminor is to explicitly + stack-allocate local variables whose address is taken: these local + variables become sub-blocks of the Cminor stack data block for the + current function. Taking the address of such a variable becomes + a [Oaddrstack] operation with the corresponding offset. Accessing + or assigning such a variable becomes a load or store operation at + that address. Only scalar local variables whose address is never + taken in the Csharpminor code can be mapped to Cminor local + variable, since the latter do not reside in memory. + + Other tasks performed during the translation to Cminor: +- Transformation of Csharpminor's standard set of operators and + trivial addressing modes to Cminor's processor-dependent operators + and addressing modes. This is done using the optimizing Cminor + constructor functions provided in file [Cmconstr], therefore performing + instruction selection on the fly. +- Insertion of truncation, zero- and sign-extension operations when + assigning to a Csharpminor local variable of ``small'' type + (e.g. [Mfloat32] or [Mint8signed]). This is necessary to preserve + the ``normalize at assignment-time'' semantics of Csharpminor. +*) + +(** Translation of operators. *) + +Definition make_op (op: Csharpminor.operation) (el: exprlist): option expr := + match el with + | Enil => + match op with + | Csharpminor.Ointconst n => Some(Eop (Ointconst n) Enil) + | Csharpminor.Ofloatconst n => Some(Eop (Ofloatconst n) Enil) + | _ => None + end + | Econs e1 Enil => + match op with + | Csharpminor.Ocast8unsigned => Some(Cmconstr.cast8unsigned e1) + | Csharpminor.Ocast8signed => Some(Cmconstr.cast8signed e1) + | Csharpminor.Ocast16unsigned => Some(Cmconstr.cast16unsigned e1) + | Csharpminor.Ocast16signed => Some(Cmconstr.cast16signed e1) + | Csharpminor.Onotint => Some(Cmconstr.notint e1) + | Csharpminor.Onegf => Some(Cmconstr.negfloat e1) + | Csharpminor.Oabsf => Some(Cmconstr.absfloat e1) + | Csharpminor.Osingleoffloat => Some(Cmconstr.singleoffloat e1) + | Csharpminor.Ointoffloat => Some(Cmconstr.intoffloat e1) + | Csharpminor.Ofloatofint => Some(Cmconstr.floatofint e1) + | Csharpminor.Ofloatofintu => Some(Cmconstr.floatofintu e1) + | _ => None + end + | Econs e1 (Econs e2 Enil) => + match op with + | Csharpminor.Oadd => Some(Cmconstr.add e1 e2) + | Csharpminor.Osub => Some(Cmconstr.sub e1 e2) + | Csharpminor.Omul => Some(Cmconstr.mul e1 e2) + | Csharpminor.Odiv => Some(Cmconstr.divs e1 e2) + | Csharpminor.Odivu => Some(Cmconstr.divu e1 e2) + | Csharpminor.Omod => Some(Cmconstr.mods e1 e2) + | Csharpminor.Omodu => Some(Cmconstr.modu e1 e2) + | Csharpminor.Oand => Some(Cmconstr.and e1 e2) + | Csharpminor.Oor => Some(Cmconstr.or e1 e2) + | Csharpminor.Oxor => Some(Cmconstr.xor e1 e2) + | Csharpminor.Oshl => Some(Cmconstr.shl e1 e2) + | Csharpminor.Oshr => Some(Cmconstr.shr e1 e2) + | Csharpminor.Oshru => Some(Cmconstr.shru e1 e2) + | Csharpminor.Oaddf => Some(Cmconstr.addf e1 e2) + | Csharpminor.Osubf => Some(Cmconstr.subf e1 e2) + | Csharpminor.Omulf => Some(Cmconstr.mulf e1 e2) + | Csharpminor.Odivf => Some(Cmconstr.divf e1 e2) + | Csharpminor.Ocmp c => Some(Cmconstr.cmp c e1 e2) + | Csharpminor.Ocmpu c => Some(Cmconstr.cmpu c e1 e2) + | Csharpminor.Ocmpf c => Some(Cmconstr.cmpf c e1 e2) + | _ => None + end + | _ => None + end. + +(** [make_cast chunk e] returns a Cminor expression that normalizes + the value of Cminor expression [e] as prescribed by the memory chunk + [chunk]. For instance, 8-bit sign extension is performed if + [chunk] is [Mint8signed]. *) + +Definition make_cast (chunk: memory_chunk) (e: expr): expr := + match chunk with + | Mint8signed => Cmconstr.cast8signed e + | Mint8unsigned => Cmconstr.cast8unsigned e + | Mint16signed => Cmconstr.cast16signed e + | Mint16unsigned => Cmconstr.cast16unsigned e + | Mint32 => e + | Mfloat32 => Cmconstr.singleoffloat e + | Mfloat64 => e + end. + +Definition make_load (chunk: memory_chunk) (e: expr): expr := + Cmconstr.load chunk e. + +(** In Csharpminor, the value of a store expression is the stored data + normalized to the memory chunk. In Cminor, it is the stored data. + For the translation, we could normalize before storing. However, + the memory model performs automatic normalization of the stored + data. It is therefore correct to store the data as is, then + normalize the result value of the store expression. This is more + efficient in general because often the result value is ignored: + the normalization code will therefore be eliminated later as dead + code. *) + +Definition make_store (chunk: memory_chunk) (e1 e2: expr): expr := + make_cast chunk (Cmconstr.store chunk e1 e2). + +Definition make_stackaddr (ofs: Z): expr := + Eop (Oaddrstack (Int.repr ofs)) Enil. + +(** Compile-time information attached to each Csharpminor + variable: global variables, local variables, function parameters. + [Var_local] denotes a scalar local variable whose address is not + taken; it will be translated to a Cminor local variable of the + same name. [Var_stack_scalar] and [Var_stack_array] denote + local variables that are stored as sub-blocks of the Cminor stack + data block. [Var_global] denotes a global variable, stored in + the global symbol with the same name. *) + +Inductive var_info: Set := + | Var_local: memory_chunk -> var_info + | Var_stack_scalar: memory_chunk -> Z -> var_info + | Var_stack_array: Z -> var_info + | Var_global: var_info. + +Definition compilenv := PMap.t var_info. + +(** Generation of Cminor code corresponding to accesses to Csharpminor + local variables: reads, assignments, and taking the address of. *) + +Definition var_get (cenv: compilenv) (id: ident): option expr := + match PMap.get id cenv with + | Var_local chunk => Some(Evar id) + | Var_stack_scalar chunk ofs => Some(make_load chunk (make_stackaddr ofs)) + | Var_stack_array ofs => None + | Var_global => None + end. + +Definition var_set (cenv: compilenv) (id: ident) (rhs: expr): option expr := + match PMap.get id cenv with + | Var_local chunk => Some(Eassign id (make_cast chunk rhs)) + | Var_stack_scalar chunk ofs => + Some(make_store chunk (make_stackaddr ofs) rhs) + | Var_stack_array ofs => None + | Var_global => None + end. + +Definition var_addr (cenv: compilenv) (id: ident): option expr := + match PMap.get id cenv with + | Var_local chunk => None + | Var_stack_scalar chunk ofs => Some (make_stackaddr ofs) + | Var_stack_array ofs => Some (make_stackaddr ofs) + | Var_global => Some (Eop (Oaddrsymbol id Int.zero) Enil) + end. + +(** The translation from Csharpminor to Cminor can fail, which we + encode by returning option types ([None] denotes error). + Propagation of errors is done in monadic style, using the following + [bind] monadic composition operator, and a [do] notation inspired + by Haskell's. *) + +Definition bind (A B: Set) (a: option A) (b: A -> option B): option B := + match a with + | None => None + | Some x => b x + end. + +Notation "'do' X <- A ; B" := (bind _ _ A (fun X => B)) + (at level 200, X ident, A at level 100, B at level 200). + +(** Translation of expressions. All the hard work is done by the + [make_*] and [var_*] functions defined above. *) + +Fixpoint transl_expr (cenv: compilenv) (e: Csharpminor.expr) + {struct e}: option expr := + match e with + | Csharpminor.Evar id => var_get cenv id + | Csharpminor.Eaddrof id => var_addr cenv id + | Csharpminor.Eassign id e => + do te <- transl_expr cenv e; var_set cenv id te + | Csharpminor.Eop op el => + do tel <- transl_exprlist cenv el; make_op op tel + | Csharpminor.Eload chunk e => + do te <- transl_expr cenv e; Some (make_load chunk te) + | Csharpminor.Estore chunk e1 e2 => + do te1 <- transl_expr cenv e1; + do te2 <- transl_expr cenv e2; + Some (make_store chunk te1 te2) + | Csharpminor.Ecall sig e el => + do te <- transl_expr cenv e; + do tel <- transl_exprlist cenv el; + Some (Ecall sig te tel) + | Csharpminor.Econdition e1 e2 e3 => + do te1 <- transl_expr cenv e1; + do te2 <- transl_expr cenv e2; + do te3 <- transl_expr cenv e3; + Some (Cmconstr.conditionalexpr te1 te2 te3) + | Csharpminor.Elet e1 e2 => + do te1 <- transl_expr cenv e1; + do te2 <- transl_expr cenv e2; + Some (Elet te1 te2) + | Csharpminor.Eletvar n => + Some (Eletvar n) + end + +with transl_exprlist (cenv: compilenv) (el: Csharpminor.exprlist) + {struct el}: option exprlist := + match el with + | Csharpminor.Enil => + Some Enil + | Csharpminor.Econs e1 e2 => + do te1 <- transl_expr cenv e1; + do te2 <- transl_exprlist cenv e2; + Some (Econs te1 te2) + end. + +(** Translation of statements. Entirely straightforward. *) + +Fixpoint transl_stmt (cenv: compilenv) (s: Csharpminor.stmt) + {struct s}: option stmt := + match s with + | Csharpminor.Sexpr e => + do te <- transl_expr cenv e; Some(Sexpr te) + | Csharpminor.Sifthenelse e s1 s2 => + do te <- transl_expr cenv e; + do ts1 <- transl_stmtlist cenv s1; + do ts2 <- transl_stmtlist cenv s2; + Some (Cmconstr.ifthenelse te ts1 ts2) + | Csharpminor.Sloop s => + do ts <- transl_stmtlist cenv s; + Some (Sloop ts) + | Csharpminor.Sblock s => + do ts <- transl_stmtlist cenv s; + Some (Sblock ts) + | Csharpminor.Sexit n => + Some (Sexit n) + | Csharpminor.Sreturn None => + Some (Sreturn None) + | Csharpminor.Sreturn (Some e) => + do te <- transl_expr cenv e; + Some (Sreturn (Some te)) + end + +with transl_stmtlist (cenv: compilenv) (s: Csharpminor.stmtlist) + {struct s}: option stmtlist := + match s with + | Csharpminor.Snil => Some Snil + | Csharpminor.Scons s1 s2 => + do ts1 <- transl_stmt cenv s1; + do ts2 <- transl_stmtlist cenv s2; + Some (Scons ts1 ts2) + end. + +(** Computation of the set of variables whose address is taken in + a piece of Csharpminor code. *) + +Module Identset := MakeSet(PTree). + +Fixpoint addr_taken_expr (e: Csharpminor.expr): Identset.t := + match e with + | Csharpminor.Evar id => Identset.empty + | Csharpminor.Eaddrof id => Identset.add id Identset.empty + | Csharpminor.Eassign id e => addr_taken_expr e + | Csharpminor.Eop op el => addr_taken_exprlist el + | Csharpminor.Eload chunk e => addr_taken_expr e + | Csharpminor.Estore chunk e1 e2 => + Identset.union (addr_taken_expr e1) (addr_taken_expr e2) + | Csharpminor.Ecall sig e el => + Identset.union (addr_taken_expr e) (addr_taken_exprlist el) + | Csharpminor.Econdition e1 e2 e3 => + Identset.union (addr_taken_expr e1) + (Identset.union (addr_taken_expr e2) (addr_taken_expr e3)) + | Csharpminor.Elet e1 e2 => + Identset.union (addr_taken_expr e1) (addr_taken_expr e2) + | Csharpminor.Eletvar n => Identset.empty + end + +with addr_taken_exprlist (e: Csharpminor.exprlist): Identset.t := + match e with + | Csharpminor.Enil => Identset.empty + | Csharpminor.Econs e1 e2 => + Identset.union (addr_taken_expr e1) (addr_taken_exprlist e2) + end. + +Fixpoint addr_taken_stmt (s: Csharpminor.stmt): Identset.t := + match s with + | Csharpminor.Sexpr e => addr_taken_expr e + | Csharpminor.Sifthenelse e s1 s2 => + Identset.union (addr_taken_expr e) + (Identset.union (addr_taken_stmtlist s1) (addr_taken_stmtlist s2)) + | Csharpminor.Sloop s => addr_taken_stmtlist s + | Csharpminor.Sblock s => addr_taken_stmtlist s + | Csharpminor.Sexit n => Identset.empty + | Csharpminor.Sreturn None => Identset.empty + | Csharpminor.Sreturn (Some e) => addr_taken_expr e + end + +with addr_taken_stmtlist (s: Csharpminor.stmtlist): Identset.t := + match s with + | Csharpminor.Snil => Identset.empty + | Csharpminor.Scons s1 s2 => + Identset.union (addr_taken_stmt s1) (addr_taken_stmtlist s2) + end. + +(** Layout of the Cminor stack data block and construction of the + compilation environment. Csharpminor local variables that are + arrays or whose address is taken are allocated a slot in the Cminor + stack data. While this is not important for correctness, sufficient + padding is inserted to ensure adequate alignment of addresses. *) + +Definition assign_variable + (atk: Identset.t) + (id_lv: ident * local_variable) + (cenv_stacksize: compilenv * Z) : compilenv * Z := + let (cenv, stacksize) := cenv_stacksize in + match id_lv with + | (id, LVarray sz) => + let ofs := align stacksize 8 in + (PMap.set id (Var_stack_array ofs) cenv, ofs + Zmax 0 sz) + | (id, LVscalar chunk) => + if Identset.mem id atk then + let sz := Mem.size_chunk chunk in + let ofs := align stacksize sz in + (PMap.set id (Var_stack_scalar chunk ofs) cenv, ofs + sz) + else + (PMap.set id (Var_local chunk) cenv, stacksize) + end. + +Fixpoint assign_variables + (atk: Identset.t) + (id_lv_list: list (ident * local_variable)) + (cenv_stacksize: compilenv * Z) + {struct id_lv_list}: compilenv * Z := + match id_lv_list with + | nil => cenv_stacksize + | id_lv :: rem => + assign_variables atk rem (assign_variable atk id_lv cenv_stacksize) + end. + +Definition build_compilenv (f: Csharpminor.function) : compilenv * Z := + assign_variables + (addr_taken_stmtlist f.(Csharpminor.fn_body)) + (fn_variables f) + (PMap.init Var_global, 0). + +(** Function parameters whose address is taken must be stored in their + stack slots at function entry. (Cminor passes these parameters in + local variables.) *) + +Fixpoint store_parameters + (cenv: compilenv) (params: list (ident * memory_chunk)) + {struct params} : stmtlist := + match params with + | nil => Snil + | (id, chunk) :: rem => + match PMap.get id cenv with + | Var_local chunk => + Scons (Sexpr (Eassign id (make_cast chunk (Evar id)))) + (store_parameters cenv rem) + | Var_stack_scalar chunk ofs => + Scons (Sexpr (make_store chunk (make_stackaddr ofs) (Evar id))) + (store_parameters cenv rem) + | _ => + Snil (* should never happen *) + end + end. + +(** Translation of a Csharpminor function. We must check that the + required Cminor stack block is no bigger than [Int.max_signed], + otherwise address computations within the stack block could + overflow machine arithmetic and lead to incorrect code. *) + +Definition transl_function (f: Csharpminor.function): option function := + let (cenv, stacksize) := build_compilenv f in + if zle stacksize Int.max_signed then + do tbody <- transl_stmtlist cenv f.(Csharpminor.fn_body); + Some (mkfunction + (Csharpminor.fn_sig f) + (Csharpminor.fn_params_names f) + (Csharpminor.fn_vars_names f) + stacksize + (Scons (Sblock (store_parameters cenv f.(Csharpminor.fn_params))) tbody)) + else None. + +Definition transl_program (p: Csharpminor.program) : option program := + transform_partial_program transl_function p. diff --git a/backend/Cminorgenproof.v b/backend/Cminorgenproof.v new file mode 100644 index 00000000..b7c78843 --- /dev/null +++ b/backend/Cminorgenproof.v @@ -0,0 +1,2409 @@ +(** Correctness proof for Cminor generation. *) + +Require Import Coqlib. +Require Import Maps. +Require Import Sets. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Csharpminor. +Require Import Op. +Require Import Cminor. +Require Cmconstr. +Require Import Cminorgen. +Require Import Cmconstrproof. + +Section TRANSLATION. + +Variable prog: Csharpminor.program. +Variable tprog: program. +Hypothesis TRANSL: transl_program prog = Some tprog. +Let ge : Csharpminor.genv := Genv.globalenv prog. +Let tge: genv := Genv.globalenv tprog. + +Lemma symbols_preserved: + forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof. + intro. unfold ge, tge. + apply Genv.find_symbol_transf_partial with transl_function. + exact TRANSL. +Qed. + +Lemma function_ptr_translated: + forall (b: block) (f: Csharpminor.function), + Genv.find_funct_ptr ge b = Some f -> + exists tf, + Genv.find_funct_ptr tge b = Some tf /\ transl_function f = Some tf. +Proof. + intros. + generalize + (Genv.find_funct_ptr_transf_partial transl_function TRANSL H). + case (transl_function f). + intros tf [A B]. exists tf. tauto. + intros [A B]. elim B. reflexivity. +Qed. + +Lemma functions_translated: + forall (v: val) (f: Csharpminor.function), + Genv.find_funct ge v = Some f -> + exists tf, + Genv.find_funct tge v = Some tf /\ transl_function f = Some tf. +Proof. + intros. + generalize + (Genv.find_funct_transf_partial transl_function TRANSL H). + case (transl_function f). + intros tf [A B]. exists tf. tauto. + intros [A B]. elim B. reflexivity. +Qed. + +(** * Correspondence between Csharpminor's and Cminor's environments and memory states *) + +(** In Csharpminor, every variable is stored in a separate memory block. + In the corresponding Cminor code, some of these variables reside in + the local variable environment; others are sub-blocks of the stack data + block. We capture these changes in memory via a memory injection [f]: +- [f b = None] means that the Csharpminor block [b] no longer exist + in the execution of the generated Cminor code. This corresponds to + a Csharpminor local variable translated to a Cminor local variable. +- [f b = Some(b', ofs)] means that Csharpminor block [b] corresponds + to a sub-block of Cminor block [b] at offset [ofs]. + + A memory injection [f] defines a relation [val_inject f] between + values and a relation [mem_inject f] between memory states. + These relations, defined in file [Memory], will be used intensively + in our proof of simulation between Csharpminor and Cminor executions. + + In the remainder of this section, we define relations between + Csharpminor and Cminor environments and call stacks. + + Global environments match if the memory injection [f] leaves unchanged + the references to global symbols and functions. *) + +Record match_globalenvs (f: meminj) : Prop := + mk_match_globalenvs { + mg_symbols: + forall id b, + Genv.find_symbol ge id = Some b -> + f b = Some (b, 0) /\ Genv.find_symbol tge id = Some b; + mg_functions: + forall b, b < 0 -> f b = Some(b, 0) + }. + +(** Matching for a Csharpminor variable [id]. +- If this variable is mapped to a Cminor local variable, the + corresponding Csharpminor memory block [b] must no longer exist in + Cminor ([f b = None]). Moreover, the content of block [b] must + match the value of [id] found in the Cminor local environment [te]. +- If this variable is mapped to a sub-block of the Cminor stack data + at offset [ofs], the address of this variable in Csharpminor [Vptr b + Int.zero] must match the address of the sub-block [Vptr sp (Int.repr + ofs)]. +*) + +Inductive match_var (f: meminj) (id: ident) + (e: Csharpminor.env) (m: mem) (te: env) (sp: block) : + var_info -> Prop := + | match_var_local: + forall chunk b v v', + PTree.get id e = Some (b, LVscalar chunk) -> + Mem.load chunk m b 0 = Some v -> + f b = None -> + PTree.get id te = Some v' -> + val_inject f v v' -> + match_var f id e m te sp (Var_local chunk) + | match_var_stack_scalar: + forall chunk ofs b, + PTree.get id e = Some (b, LVscalar chunk) -> + val_inject f (Vptr b Int.zero) (Vptr sp (Int.repr ofs)) -> + match_var f id e m te sp (Var_stack_scalar chunk ofs) + | match_var_stack_array: + forall ofs sz b, + PTree.get id e = Some (b, LVarray sz) -> + val_inject f (Vptr b Int.zero) (Vptr sp (Int.repr ofs)) -> + match_var f id e m te sp (Var_stack_array ofs) + | match_var_global: + PTree.get id e = None -> + match_var f id e m te sp (Var_global). + +(** Matching between a Csharpminor environment [e] and a Cminor + environment [te]. The [lo] and [hi] parameters delimit the range + of addresses for the blocks referenced from [te]. *) + +Record match_env (f: meminj) (cenv: compilenv) + (e: Csharpminor.env) (m: mem) (te: env) (sp: block) + (lo hi: Z) : Prop := + mk_match_env { + +(** Each variable mentioned in the compilation environment must match + as defined above. *) + me_vars: + forall id, match_var f id e m te sp (PMap.get id cenv); + +(** The range [lo, hi] must not be empty. *) + me_low_high: + lo <= hi; + +(** Every block appearing in the Csharpminor environment [e] must be + in the range [lo, hi]. *) + me_bounded: + forall id b lv, PTree.get id e = Some(b, lv) -> lo <= b < hi; + +(** Distinct Csharpminor local variables must be mapped to distinct blocks. *) + me_inj: + forall id1 b1 lv1 id2 b2 lv2, + PTree.get id1 e = Some(b1, lv1) -> + PTree.get id2 e = Some(b2, lv2) -> + id1 <> id2 -> b1 <> b2; + +(** All blocks mapped to sub-blocks of the Cminor stack data must be in + the range [lo, hi]. *) + me_inv: + forall b delta, + f b = Some(sp, delta) -> lo <= b < hi; + +(** All Csharpminor blocks below [lo] (i.e. allocated before the blocks + referenced from [e]) must map to blocks that are below [sp] + (i.e. allocated before the stack data for the current Cminor function). *) + me_incr: + forall b tb delta, + f b = Some(tb, delta) -> b < lo -> tb < sp + }. + +(** Call stacks represent abstractly the execution state of the current + Csharpminor and Cminor functions, as well as the states of the + calling functions. A call stack is a list of frames, each frame + collecting information on the current execution state of a Csharpminor + function and its Cminor translation. *) + +Record frame : Set := + mkframe { + fr_cenv: compilenv; + fr_e: Csharpminor.env; + fr_te: env; + fr_sp: block; + fr_low: Z; + fr_high: Z + }. + +Definition callstack : Set := list frame. + +(** Matching of call stacks imply matching of environments for each of + the frames, plus matching of the global environments, plus disjointness + conditions over the memory blocks allocated for local variables + and Cminor stack data. *) + +Inductive match_callstack: meminj -> callstack -> Z -> Z -> mem -> Prop := + | mcs_nil: + forall f bound tbound m, + match_globalenvs f -> + match_callstack f nil bound tbound m + | mcs_cons: + forall f cenv e te sp lo hi cs bound tbound m, + hi <= bound -> + sp < tbound -> + match_env f cenv e m te sp lo hi -> + match_callstack f cs lo sp m -> + match_callstack f (mkframe cenv e te sp lo hi :: cs) bound tbound m. + +(** The remainder of this section is devoted to showing preservation + of the [match_callstack] invariant under various assignments and memory + stores. First: preservation by memory stores to ``mapped'' blocks + (block that have a counterpart in the Cminor execution). *) + +Lemma match_env_store_mapped: + forall f cenv e m1 m2 te sp lo hi chunk b ofs v, + f b <> None -> + store chunk m1 b ofs v = Some m2 -> + match_env f cenv e m1 te sp lo hi -> + match_env f cenv e m2 te sp lo hi. +Proof. + intros. inversion H1. constructor; auto. + (* vars *) + intros. generalize (me_vars0 id); intro. + inversion H2; econstructor; eauto. + rewrite <- H5. eapply load_store_other; eauto. + left. congruence. +Qed. + +Lemma match_callstack_mapped: + forall f cs bound tbound m1, + match_callstack f cs bound tbound m1 -> + forall chunk b ofs v m2, + f b <> None -> + store chunk m1 b ofs v = Some m2 -> + match_callstack f cs bound tbound m2. +Proof. + induction 1; intros; econstructor; eauto. + eapply match_env_store_mapped; eauto. +Qed. + +(** Preservation by assignment to a Csharpminor variable that is + translated to a Cminor local variable. The value being assigned + must be normalized with respect to the memory chunk of the variable, + in the following sense. *) + +Definition val_normalized (chunk: memory_chunk) (v: val) : Prop := + exists v0, v = Val.load_result chunk v0. + +Lemma load_result_idem: + forall chunk v, + Val.load_result chunk (Val.load_result chunk v) = + Val.load_result chunk v. +Proof. + destruct chunk; destruct v; simpl; auto. + rewrite Int.cast8_signed_idem; auto. + rewrite Int.cast8_unsigned_idem; auto. + rewrite Int.cast16_signed_idem; auto. + rewrite Int.cast16_unsigned_idem; auto. + rewrite Float.singleoffloat_idem; auto. +Qed. + +Lemma load_result_normalized: + forall chunk v, + val_normalized chunk v -> Val.load_result chunk v = v. +Proof. + intros chunk v [v0 EQ]. rewrite EQ. apply load_result_idem. +Qed. + +Lemma match_env_store_local: + forall f cenv e m1 m2 te sp lo hi id b chunk v tv, + e!id = Some(b, LVscalar chunk) -> + val_inject f v tv -> + val_normalized chunk tv -> + store chunk m1 b 0 v = Some m2 -> + match_env f cenv e m1 te sp lo hi -> + match_env f cenv e m2 (PTree.set id tv te) sp lo hi. +Proof. + intros. inversion H3. constructor; auto. + intros. generalize (me_vars0 id0); intro. + inversion H4. + (* var_local *) + case (peq id id0); intro. + (* the stored variable *) + subst id0. + change Csharpminor.local_variable with local_variable in H6. + rewrite H in H6. injection H6; clear H6; intros; subst b0 chunk0. + econstructor. eauto. + eapply load_store_same; eauto. auto. + rewrite PTree.gss. reflexivity. + replace tv with (Val.load_result chunk tv). + apply Mem.load_result_inject. constructor; auto. + apply load_result_normalized; auto. + (* a different variable *) + econstructor; eauto. + rewrite <- H7. eapply load_store_other; eauto. + rewrite PTree.gso; auto. + (* var_stack_scalar *) + econstructor; eauto. + (* var_stack_array *) + econstructor; eauto. + (* var_global *) + econstructor; eauto. +Qed. + +Lemma match_env_store_above: + forall f cenv e m1 m2 te sp lo hi chunk b v, + store chunk m1 b 0 v = Some m2 -> + hi <= b -> + match_env f cenv e m1 te sp lo hi -> + match_env f cenv e m2 te sp lo hi. +Proof. + intros. inversion H1; constructor; auto. + intros. generalize (me_vars0 id); intro. + inversion H2; econstructor; eauto. + rewrite <- H5. eapply load_store_other; eauto. + left. generalize (me_bounded0 _ _ _ H4). unfold block in *. omega. +Qed. + +Lemma match_callstack_store_above: + forall f cs bound tbound m1, + match_callstack f cs bound tbound m1 -> + forall chunk b v m2, + store chunk m1 b 0 v = Some m2 -> + bound <= b -> + match_callstack f cs bound tbound m2. +Proof. + induction 1; intros; econstructor; eauto. + eapply match_env_store_above with (b := b); eauto. omega. + eapply IHmatch_callstack; eauto. + inversion H1. omega. +Qed. + +Lemma match_callstack_store_local: + forall f cenv e te sp lo hi cs bound tbound m1 m2 id b chunk v tv, + e!id = Some(b, LVscalar chunk) -> + val_inject f v tv -> + val_normalized chunk tv -> + store chunk m1 b 0 v = Some m2 -> + match_callstack f (mkframe cenv e te sp lo hi :: cs) bound tbound m1 -> + match_callstack f (mkframe cenv e (PTree.set id tv te) sp lo hi :: cs) bound tbound m2. +Proof. + intros. inversion H3. constructor; auto. + eapply match_env_store_local; eauto. + eapply match_callstack_store_above; eauto. + inversion H17. + generalize (me_bounded0 _ _ _ H). omega. +Qed. + +(** A variant of [match_callstack_store_local] where the Cminor environment + [te] already associates to [id] a value that matches the assigned value. + In this case, [match_callstack] is preserved even if no assignment + takes place on the Cminor side. *) + +Lemma match_env_extensional: + forall f cenv e m te1 sp lo hi, + match_env f cenv e m te1 sp lo hi -> + forall te2, + (forall id, te2!id = te1!id) -> + match_env f cenv e m te2 sp lo hi. +Proof. + induction 1; intros; econstructor; eauto. + intros. generalize (me_vars0 id); intro. + inversion H0; econstructor; eauto. + rewrite H. auto. +Qed. + +Lemma match_callstack_store_local_unchanged: + forall f cenv e te sp lo hi cs bound tbound m1 m2 id b chunk v tv, + e!id = Some(b, LVscalar chunk) -> + val_inject f v tv -> + val_normalized chunk tv -> + store chunk m1 b 0 v = Some m2 -> + te!id = Some tv -> + match_callstack f (mkframe cenv e te sp lo hi :: cs) bound tbound m1 -> + match_callstack f (mkframe cenv e te sp lo hi :: cs) bound tbound m2. +Proof. + intros. inversion H4. constructor; auto. + apply match_env_extensional with (PTree.set id tv te). + eapply match_env_store_local; eauto. + intros. rewrite PTree.gsspec. + case (peq id0 id); intros. congruence. auto. + eapply match_callstack_store_above; eauto. + inversion H18. + generalize (me_bounded0 _ _ _ H). omega. +Qed. + +(** Preservation of [match_callstack] by freeing all blocks allocated + for local variables at function entry (on the Csharpminor side). *) + +Lemma match_callstack_incr_bound: + forall f cs bound tbound m, + match_callstack f cs bound tbound m -> + forall bound' tbound', + bound <= bound' -> tbound <= tbound' -> + match_callstack f cs bound' tbound' m. +Proof. + intros. inversion H; constructor; auto. omega. omega. +Qed. + +Lemma load_freelist: + forall fbl chunk m b ofs, + (forall b', In b' fbl -> b' <> b) -> + load chunk (free_list m fbl) b ofs = load chunk m b ofs. +Proof. + induction fbl; simpl; intros. + auto. + rewrite load_free. apply IHfbl. + intros. apply H. tauto. + apply sym_not_equal. apply H. tauto. +Qed. + +Lemma match_env_freelist: + forall f cenv e m te sp lo hi fbl, + match_env f cenv e m te sp lo hi -> + (forall b, In b fbl -> hi <= b) -> + match_env f cenv e (free_list m fbl) te sp lo hi. +Proof. + intros. inversion H. econstructor; eauto. + intros. generalize (me_vars0 id); intro. + inversion H1; econstructor; eauto. + rewrite <- H4. apply load_freelist. + intros. generalize (H0 _ H8); intro. + generalize (me_bounded0 _ _ _ H3). unfold block; omega. +Qed. + +Lemma match_callstack_freelist_rec: + forall f cs bound tbound m, + match_callstack f cs bound tbound m -> + forall fbl, + (forall b, In b fbl -> bound <= b) -> + match_callstack f cs bound tbound (free_list m fbl). +Proof. + induction 1; intros; constructor; auto. + eapply match_env_freelist; eauto. + intros. generalize (H3 _ H4). omega. + apply IHmatch_callstack. intros. + generalize (H3 _ H4). inversion H1. omega. +Qed. + +Lemma match_callstack_freelist: + forall f cenv e te sp lo hi cs bound tbound m fbl, + (forall b, In b fbl -> lo <= b) -> + match_callstack f (mkframe cenv e te sp lo hi :: cs) bound tbound m -> + match_callstack f cs bound tbound (free_list m fbl). +Proof. + intros. inversion H0. inversion H14. + apply match_callstack_incr_bound with lo sp. + apply match_callstack_freelist_rec. auto. + assumption. + omega. omega. +Qed. + +(** Preservation of [match_callstack] when allocating a block for + a local variable on the Csharpminor side. *) + +Lemma load_from_alloc_is_undef: + forall m1 chunk m2 b, + alloc m1 0 (size_chunk chunk) = (m2, b) -> + load chunk m2 b 0 = Some Vundef. +Proof. + intros. + assert (valid_block m2 b). eapply valid_new_block; eauto. + assert (low_bound m2 b <= 0). + generalize (low_bound_alloc _ _ b _ _ _ H). rewrite zeq_true. omega. + assert (0 + size_chunk chunk <= high_bound m2 b). + generalize (high_bound_alloc _ _ b _ _ _ H). rewrite zeq_true. omega. + elim (load_in_bounds _ _ _ _ H0 H1 H2). intros v LOAD. + assert (v = Vundef). eapply load_alloc_same; eauto. + congruence. +Qed. + +Lemma match_env_alloc_same: + forall m1 lv m2 b info f1 cenv1 e1 te sp lo id data tv, + alloc m1 0 (sizeof lv) = (m2, b) -> + match info with + | Var_local chunk => data = None /\ lv = LVscalar chunk + | Var_stack_scalar chunk pos => data = Some(sp, pos) /\ lv = LVscalar chunk + | Var_stack_array pos => data = Some(sp, pos) /\ exists sz, lv = LVarray sz + | Var_global => False + end -> + match_env f1 cenv1 e1 m1 te sp lo m1.(nextblock) -> + te!id = Some tv -> + let f2 := extend_inject b data f1 in + let cenv2 := PMap.set id info cenv1 in + let e2 := PTree.set id (b, lv) e1 in + inject_incr f1 f2 -> + match_env f2 cenv2 e2 m2 te sp lo m2.(nextblock). +Proof. + intros. + assert (b = m1.(nextblock)). + injection H; intros. auto. + assert (m2.(nextblock) = Zsucc m1.(nextblock)). + injection H; intros. rewrite <- H6; reflexivity. + inversion H1. constructor. + (* me_vars *) + intros. unfold cenv2. rewrite PMap.gsspec. case (peq id0 id); intro. + (* same var *) + subst id0. destruct info. + (* info = Var_local chunk *) + elim H0; intros. + apply match_var_local with b Vundef tv. + unfold e2; rewrite PTree.gss. congruence. + eapply load_from_alloc_is_undef; eauto. + rewrite H7 in H. unfold sizeof in H. eauto. + unfold f2, extend_inject, eq_block. rewrite zeq_true. auto. + auto. + constructor. + (* info = Var_stack_scalar chunk ofs *) + elim H0; intros. + apply match_var_stack_scalar with b. + unfold e2; rewrite PTree.gss. congruence. + eapply val_inject_ptr. + unfold f2, extend_inject, eq_block. rewrite zeq_true. eauto. + rewrite Int.add_commut. rewrite Int.add_zero. auto. + (* info = Var_stack_array z *) + elim H0; intros A [sz B]. + apply match_var_stack_array with sz b. + unfold e2; rewrite PTree.gss. congruence. + eapply val_inject_ptr. + unfold f2, extend_inject, eq_block. rewrite zeq_true. eauto. + rewrite Int.add_commut. rewrite Int.add_zero. auto. + (* info = Var_global *) + contradiction. + (* other vars *) + generalize (me_vars0 id0); intros. + inversion H6; econstructor; eauto. + unfold e2; rewrite PTree.gso; auto. + unfold f2, extend_inject, eq_block; rewrite zeq_false; auto. + generalize (me_bounded0 _ _ _ H8). unfold block in *; omega. + unfold e2; rewrite PTree.gso; eauto. + unfold e2; rewrite PTree.gso; eauto. + unfold e2; rewrite PTree.gso; eauto. + (* lo <= hi *) + unfold block in *; omega. + (* me_bounded *) + intros until lv0. unfold e2; rewrite PTree.gsspec. + case (peq id0 id); intros. + subst id0. inversion H6. subst b0. unfold block in *; omega. + generalize (me_bounded0 _ _ _ H6). rewrite H5. omega. + (* me_inj *) + intros until lv2. unfold e2; repeat rewrite PTree.gsspec. + case (peq id1 id); case (peq id2 id); intros. + congruence. + inversion H6. subst b1. rewrite H4. + generalize (me_bounded0 _ _ _ H7). unfold block; omega. + inversion H7. subst b2. rewrite H4. + generalize (me_bounded0 _ _ _ H6). unfold block; omega. + eauto. + (* me_inv *) + intros until delta. unfold f2, extend_inject, eq_block. + case (zeq b0 b); intros. + subst b0. rewrite H4; rewrite H5. omega. + generalize (me_inv0 _ _ H6). rewrite H5. omega. + (* me_incr *) + intros until delta. unfold f2, extend_inject, eq_block. + case (zeq b0 b); intros. + subst b0. unfold block in *; omegaContradiction. + eauto. +Qed. + +Lemma match_env_alloc_other: + forall f1 cenv e m1 m2 te sp lo hi chunk b data, + alloc m1 0 (sizeof chunk) = (m2, b) -> + match data with None => True | Some (b', delta') => sp < b' end -> + hi <= m1.(nextblock) -> + match_env f1 cenv e m1 te sp lo hi -> + let f2 := extend_inject b data f1 in + inject_incr f1 f2 -> + match_env f2 cenv e m2 te sp lo hi. +Proof. + intros. + assert (b = m1.(nextblock)). injection H; auto. + rewrite <- H4 in H1. + inversion H2. constructor; auto. + (* me_vars *) + intros. generalize (me_vars0 id); intro. + inversion H5; econstructor; eauto. + unfold f2, extend_inject, eq_block. rewrite zeq_false. auto. + generalize (me_bounded0 _ _ _ H7). unfold block in *; omega. + (* me_bounded *) + intros until delta. unfold f2, extend_inject, eq_block. + case (zeq b0 b); intros. rewrite H5 in H0. omegaContradiction. + eauto. + (* me_incr *) + intros until delta. unfold f2, extend_inject, eq_block. + case (zeq b0 b); intros. subst b0. omegaContradiction. + eauto. +Qed. + +Lemma match_callstack_alloc_other: + forall f1 cs bound tbound m1, + match_callstack f1 cs bound tbound m1 -> + forall lv m2 b data, + alloc m1 0 (sizeof lv) = (m2, b) -> + match data with None => True | Some (b', delta') => tbound <= b' end -> + bound <= m1.(nextblock) -> + let f2 := extend_inject b data f1 in + inject_incr f1 f2 -> + match_callstack f2 cs bound tbound m2. +Proof. + induction 1; intros. + constructor. + inversion H. constructor. + intros. elim (mg_symbols0 _ _ H4); intros. + split; auto. elim (H3 b0); intros; congruence. + intros. generalize (mg_functions0 _ H4). elim (H3 b0); congruence. + constructor. auto. auto. + unfold f2; eapply match_env_alloc_other; eauto. + destruct data; auto. destruct p. omega. omega. + unfold f2; eapply IHmatch_callstack; eauto. + destruct data; auto. destruct p. omega. + inversion H1; omega. +Qed. + +Lemma match_callstack_alloc_left: + forall m1 lv m2 b info f1 cenv1 e1 te sp lo id data cs tv tbound, + alloc m1 0 (sizeof lv) = (m2, b) -> + match info with + | Var_local chunk => data = None /\ lv = LVscalar chunk + | Var_stack_scalar chunk pos => data = Some(sp, pos) /\ lv = LVscalar chunk + | Var_stack_array pos => data = Some(sp, pos) /\ exists sz, lv = LVarray sz + | Var_global => False + end -> + match_callstack f1 (mkframe cenv1 e1 te sp lo m1.(nextblock) :: cs) m1.(nextblock) tbound m1 -> + te!id = Some tv -> + let f2 := extend_inject b data f1 in + let cenv2 := PMap.set id info cenv1 in + let e2 := PTree.set id (b, lv) e1 in + inject_incr f1 f2 -> + match_callstack f2 (mkframe cenv2 e2 te sp lo m2.(nextblock) :: cs) m2.(nextblock) tbound m2. +Proof. + intros. inversion H1. constructor. omega. auto. + unfold f2, cenv2, e2. eapply match_env_alloc_same; eauto. + unfold f2; eapply match_callstack_alloc_other; eauto. + destruct info. + elim H0; intros. rewrite H19. auto. + elim H0; intros. rewrite H19. omega. + elim H0; intros. rewrite H19. omega. + contradiction. + inversion H17; omega. +Qed. + +Lemma match_callstack_alloc_right: + forall f cs bound tm1 m tm2 lo hi b, + alloc tm1 lo hi = (tm2, b) -> + match_callstack f cs bound tm1.(nextblock) m -> + match_callstack f cs bound tm2.(nextblock) m. +Proof. + intros. eapply match_callstack_incr_bound; eauto. omega. + injection H; intros. rewrite <- H2; simpl. omega. +Qed. + +(** [match_callstack] implies [match_globalenvs]. *) + +Lemma match_callstack_match_globalenvs: + forall f cs bound tbound m, + match_callstack f cs bound tbound m -> + match_globalenvs f. +Proof. + induction 1; eauto. +Qed. + +(** * Correctness of Cminor construction functions *) + +Hint Resolve eval_negint eval_negfloat eval_absfloat eval_intoffloat + eval_floatofint eval_floatofintu eval_notint eval_notbool + eval_cast8signed eval_cast8unsigned eval_cast16signed + eval_cast16unsigned eval_singleoffloat eval_add eval_add_ptr + eval_add_ptr_2 eval_sub eval_sub_ptr_int eval_sub_ptr_ptr + eval_mul eval_divs eval_mods eval_divu eval_modu + eval_and eval_or eval_xor eval_shl eval_shr eval_shru + eval_addf eval_subf eval_mulf eval_divf + eval_cmp eval_cmp_null_r eval_cmp_null_l eval_cmp_ptr + eval_cmpu eval_cmpf: evalexpr. + +Remark val_inject_val_of_bool: + forall f b, val_inject f (Val.of_bool b) (Val.of_bool b). +Proof. + intros; destruct b; unfold Val.of_bool, Vtrue, Vfalse; constructor. +Qed. + +Ltac TrivialOp := + match goal with + | [ |- exists x, _ /\ val_inject _ (Vint ?x) _ ] => + exists (Vint x); split; + [eauto with evalexpr | constructor] + | [ |- exists x, _ /\ val_inject _ (Vfloat ?x) _ ] => + exists (Vfloat x); split; + [eauto with evalexpr | constructor] + | [ |- exists x, _ /\ val_inject _ (Val.of_bool ?x) _ ] => + exists (Val.of_bool x); split; + [eauto with evalexpr | apply val_inject_val_of_bool] + | _ => idtac + end. + +Remark eval_compare_null_inv: + forall c i v, + Csharpminor.eval_compare_null c i = Some v -> + i = Int.zero /\ (c = Ceq /\ v = Vfalse \/ c = Cne /\ v = Vtrue). +Proof. + intros until v. unfold Csharpminor.eval_compare_null. + predSpec Int.eq Int.eq_spec i Int.zero. + case c; intro EQ; simplify_eq EQ; intro; subst v; tauto. + congruence. +Qed. + +(** Correctness of [make_op]. The generated Cminor code evaluates + to a value that matches the result value of the Csharpminor operation, + provided arguments match pairwise ([val_list_inject f] hypothesis). *) + +Lemma make_op_correct: + forall al a op vl m2 v sp le te1 tm1 te2 tm2 tvl f, + make_op op al = Some a -> + Csharpminor.eval_operation op vl m2 = Some v -> + eval_exprlist tge (Vptr sp Int.zero) le te1 tm1 al te2 tm2 tvl -> + val_list_inject f vl tvl -> + mem_inject f m2 tm2 -> + exists tv, + eval_expr tge (Vptr sp Int.zero) le te1 tm1 a te2 tm2 tv + /\ val_inject f v tv. +Proof. + intros. + destruct al as [ | a1 al]; + [idtac | destruct al as [ | a2 al]; + [idtac | destruct al as [ | a3 al]]]; + simpl in H; try discriminate. + (* Constant operators *) + inversion H1. subst sp0 le0 e m te2 tm1 tvl. + inversion H2. subst vl. + destruct op; simplify_eq H; intro; subst a; + simpl in H0; injection H0; intro; subst v. + (* intconst *) + TrivialOp. econstructor. constructor. reflexivity. + (* floatconst *) + TrivialOp. econstructor. constructor. reflexivity. + (* Unary operators *) + inversion H1. subst sp0 le0 e m a0 bl e2 m0 tvl. + inversion H14. subst sp0 le0 e m e1 m1 vl0. + inversion H2. subst vl v' vl'. inversion H8. subst vl0. + destruct op; simplify_eq H; intro; subst a; + simpl in H0; destruct v1; simplify_eq H0; intro; subst v; + inversion H7; subst v0; + TrivialOp. + (* Binary operations *) + inversion H1. subst sp0 le0 e m a0 bl e2 m0 tvl. + inversion H14. subst sp0 le0 e m a0 bl e2 m3 vl0. + inversion H16. subst sp0 le0 e m e0 m0 vl1. + inversion H2. subst vl v' vl'. + inversion H8. subst vl0 v' vl'. + inversion H12. subst vl. + destruct op; simplify_eq H; intro; subst a; + simpl in H0; destruct v2; destruct v3; simplify_eq H0; intro; try subst v; + inversion H7; inversion H9; subst v0; subst v1; + TrivialOp. + (* add int ptr *) + exists (Vptr b2 (Int.add ofs2 i)); split. + eauto with evalexpr. apply val_inject_ptr with x. auto. + subst ofs2. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. + (* add ptr int *) + exists (Vptr b2 (Int.add ofs2 i0)); split. + eauto with evalexpr. apply val_inject_ptr with x. auto. + subst ofs2. repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. + (* sub ptr int *) + exists (Vptr b2 (Int.sub ofs2 i0)); split. + eauto with evalexpr. apply val_inject_ptr with x. auto. + subst ofs2. apply Int.sub_add_l. + (* sub ptr ptr *) + destruct (eq_block b b0); simplify_eq H0; intro; subst v; subst b0. + assert (b4 = b2). congruence. subst b4. + exists (Vint (Int.sub ofs2 ofs3)); split. eauto with evalexpr. + subst ofs2 ofs3. replace x0 with x. rewrite Int.sub_shifted. constructor. + congruence. + (* divs *) + generalize (Int.eq_spec i0 Int.zero); destruct (Int.eq i0 Int.zero); intro; + simplify_eq H0; intro; subst v. TrivialOp. + (* divu *) + generalize (Int.eq_spec i0 Int.zero); destruct (Int.eq i0 Int.zero); intro; + simplify_eq H0; intro; subst v. TrivialOp. + (* mods *) + generalize (Int.eq_spec i0 Int.zero); destruct (Int.eq i0 Int.zero); intro; + simplify_eq H0; intro; subst v. TrivialOp. + (* modu *) + generalize (Int.eq_spec i0 Int.zero); destruct (Int.eq i0 Int.zero); intro; + simplify_eq H0; intro; subst v. TrivialOp. + (* shl *) + caseEq (Int.ltu i0 (Int.repr 32)); intro EQ; rewrite EQ in H0; + simplify_eq H0; intro; subst v. TrivialOp. + (* shr *) + caseEq (Int.ltu i0 (Int.repr 32)); intro EQ; rewrite EQ in H0; + simplify_eq H0; intro; subst v. TrivialOp. + (* shru *) + caseEq (Int.ltu i0 (Int.repr 32)); intro EQ; rewrite EQ in H0; + simplify_eq H0; intro; subst v. TrivialOp. + (* cmp int ptr *) + elim (eval_compare_null_inv _ _ _ H0); intros; subst i1 i. + exists v; split. eauto with evalexpr. + elim H18; intros [A B]; subst v; unfold Vtrue, Vfalse; constructor. + (* cmp ptr int *) + elim (eval_compare_null_inv _ _ _ H0); intros; subst i1 i0. + exists v; split. eauto with evalexpr. + elim H18; intros [A B]; subst v; unfold Vtrue, Vfalse; constructor. + (* cmp ptr ptr *) + caseEq (valid_pointer m2 b (Int.signed i) && valid_pointer m2 b0 (Int.signed i0)); + intro EQ; rewrite EQ in H0; try discriminate. + destruct (eq_block b b0); simplify_eq H0; intro; subst v b0. + assert (b4 = b2); [congruence|subst b4]. + assert (x0 = x); [congruence|subst x0]. + elim (andb_prop _ _ EQ); intros. + exists (Val.of_bool (Int.cmp c ofs2 ofs3)); split. + eauto with evalexpr. + subst ofs2 ofs3. rewrite Int.translate_cmp. + apply val_inject_val_of_bool. + eapply valid_pointer_inject_no_overflow; eauto. + eapply valid_pointer_inject_no_overflow; eauto. +Qed. + +(** Correctness of [make_cast]. Note that the resulting Cminor value is + normalized according to the given memory chunk. *) + +Lemma make_cast_correct: + forall f sp le te1 tm1 a te2 tm2 v chunk v' tv, + eval_expr tge (Vptr sp Int.zero) le te1 tm1 a te2 tm2 tv -> + cast chunk v = Some v' -> + val_inject f v tv -> + exists tv', + eval_expr tge (Vptr sp Int.zero) le + te1 tm1 (make_cast chunk a) + te2 tm2 tv' + /\ val_inject f v' tv' + /\ val_normalized chunk tv'. +Proof. + intros. destruct chunk; destruct v; simplify_eq H0; intro; subst v'; simpl; + inversion H1; subst tv. + + exists (Vint (Int.cast8signed i)). + split. apply eval_cast8signed; auto. + split. constructor. exists (Vint i); reflexivity. + + exists (Vint (Int.cast8unsigned i)). + split. apply eval_cast8unsigned; auto. + split. constructor. exists (Vint i); reflexivity. + + exists (Vint (Int.cast16signed i)). + split. apply eval_cast16signed; auto. + split. constructor. exists (Vint i); reflexivity. + + exists (Vint (Int.cast16unsigned i)). + split. apply eval_cast16unsigned; auto. + split. constructor. exists (Vint i); reflexivity. + + exists (Vint i). + split. auto. split. auto. exists (Vint i); reflexivity. + + exists (Vptr b2 ofs2). + split. auto. split. auto. exists (Vptr b2 ofs2); reflexivity. + + exists (Vfloat (Float.singleoffloat f0)). + split. apply eval_singleoffloat; auto. + split. constructor. exists (Vfloat f0); reflexivity. + + exists (Vfloat f0). + split. auto. split. auto. exists (Vfloat f0); reflexivity. +Qed. + +Lemma make_stackaddr_correct: + forall sp le te tm ofs, + eval_expr tge (Vptr sp Int.zero) le + te tm (make_stackaddr ofs) + te tm (Vptr sp (Int.repr ofs)). +Proof. + intros; unfold make_stackaddr. + eapply eval_Eop. econstructor. simpl. decEq. decEq. + rewrite Int.add_commut. apply Int.add_zero. +Qed. + +(** Correctness of [make_load] and [make_store]. *) + +Lemma make_load_correct: + forall sp le te1 tm1 a te2 tm2 va chunk v, + eval_expr tge (Vptr sp Int.zero) le te1 tm1 a te2 tm2 va -> + Mem.loadv chunk tm2 va = Some v -> + eval_expr tge (Vptr sp Int.zero) le + te1 tm1 (make_load chunk a) + te2 tm2 v. +Proof. + intros; unfold make_load. + eapply eval_load; eauto. +Qed. + +Lemma val_content_inject_cast: + forall f chunk v1 v2 tv1, + cast chunk v1 = Some v2 -> + val_inject f v1 tv1 -> + val_content_inject f (mem_chunk chunk) v2 tv1. +Proof. + intros. destruct chunk; destruct v1; simplify_eq H; intro; subst v2; + inversion H0; simpl. + apply val_content_inject_8. apply Int.cast8_unsigned_signed. + apply val_content_inject_8. apply Int.cast8_unsigned_idem. + apply val_content_inject_16. apply Int.cast16_unsigned_signed. + apply val_content_inject_16. apply Int.cast16_unsigned_idem. + constructor; constructor. + constructor; econstructor; eauto. + apply val_content_inject_32. apply Float.singleoffloat_idem. + constructor; constructor. +Qed. + +Lemma make_store_correct: + forall f sp le te1 tm1 addr te2 tm2 tvaddr rhs te3 tm3 tvrhs + chunk vrhs v m3 vaddr m4, + eval_expr tge (Vptr sp Int.zero) le + te1 tm1 addr te2 tm2 tvaddr -> + eval_expr tge (Vptr sp Int.zero) le + te2 tm2 rhs te3 tm3 tvrhs -> + cast chunk vrhs = Some v -> + Mem.storev chunk m3 vaddr v = Some m4 -> + mem_inject f m3 tm3 -> + val_inject f vaddr tvaddr -> + val_inject f vrhs tvrhs -> + exists tm4, exists tv, + eval_expr tge (Vptr sp Int.zero) le + te1 tm1 (make_store chunk addr rhs) + te3 tm4 tv + /\ mem_inject f m4 tm4 + /\ val_inject f v tv + /\ nextblock tm4 = nextblock tm3. +Proof. + intros. unfold make_store. + assert (val_content_inject f (mem_chunk chunk) v tvrhs). + eapply val_content_inject_cast; eauto. + elim (storev_mapped_inject_1 _ _ _ _ _ _ _ _ _ H3 H2 H4 H6). + intros tm4 [STORE MEMINJ]. + generalize (eval_store _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ H H0 STORE). + intro EVALSTORE. + elim (make_cast_correct _ _ _ _ _ _ _ _ _ _ _ _ EVALSTORE H1 H5). + intros tv [EVALCAST [VALINJ VALNORM]]. + exists tm4; exists tv. intuition. + unfold storev in STORE; destruct tvaddr; try discriminate. + generalize (store_inv _ _ _ _ _ _ STORE). simpl. tauto. +Qed. + +(** Correctness of the variable accessors [var_get], [var_set] + and [var_addr]. *) + +Lemma var_get_correct: + forall cenv id a f e te sp lo hi m cs tm b chunk v le, + var_get cenv id = Some a -> + match_callstack f (mkframe cenv e te sp lo hi :: cs) m.(nextblock) tm.(nextblock) m -> + mem_inject f m tm -> + e!id = Some(b, LVscalar chunk) -> + load chunk m b 0 = Some v -> + exists tv, + eval_expr tge (Vptr sp Int.zero) le te tm a te tm tv /\ + val_inject f v tv. +Proof. + unfold var_get; intros. + assert (match_var f id e m te sp cenv!!id). + inversion H0. inversion H17. auto. + caseEq (cenv!!id); intros; rewrite H5 in H; simplify_eq H; clear H; intros; subst a. + (* var_local *) + rewrite H5 in H4. inversion H4. + exists v'; split. + apply eval_Evar. auto. + replace v with v0. auto. congruence. + (* var_stack_scalar *) + rewrite H5 in H4. inversion H4. + inversion H8. subst b1 b2 ofs1 ofs2. + assert (b0 = b). congruence. subst b0. + assert (m0 = chunk). congruence. subst m0. + assert (loadv chunk m (Vptr b Int.zero) = Some v). assumption. + generalize (loadv_inject _ _ _ _ _ _ _ H1 H H8). + subst chunk0. + intros [tv [LOAD INJ]]. + exists tv; split. + eapply make_load_correct; eauto. eapply make_stackaddr_correct; eauto. + auto. +Qed. + +Lemma var_addr_local_correct: + forall cenv id a f e te sp lo hi m cs tm b lv le, + var_addr cenv id = Some a -> + match_callstack f (mkframe cenv e te sp lo hi :: cs) m.(nextblock) tm.(nextblock) m -> + e!id = Some(b, lv) -> + exists tv, + eval_expr tge (Vptr sp Int.zero) le te tm a te tm tv /\ + val_inject f (Vptr b Int.zero) tv. +Proof. + unfold var_addr; intros. + assert (match_var f id e m te sp cenv!!id). + inversion H0. inversion H15. auto. + caseEq (cenv!!id); intros; rewrite H3 in H; simplify_eq H; clear H; intros; subst a; + rewrite H3 in H2; inversion H2. + (* var_stack_scalar *) + exists (Vptr sp (Int.repr z)); split. + eapply make_stackaddr_correct. + replace b with b0. auto. congruence. + (* var_stack_array *) + exists (Vptr sp (Int.repr z)); split. + eapply make_stackaddr_correct. + replace b with b0. auto. congruence. + (* var_global *) + congruence. +Qed. + +Lemma var_addr_global_correct: + forall cenv id a f e te sp lo hi m cs tm b le, + var_addr cenv id = Some a -> + match_callstack f (mkframe cenv e te sp lo hi :: cs) m.(nextblock) tm.(nextblock) m -> + e!id = None -> + Genv.find_symbol ge id = Some b -> + exists tv, + eval_expr tge (Vptr sp Int.zero) le te tm a te tm tv /\ + val_inject f (Vptr b Int.zero) tv. +Proof. + unfold var_addr; intros. + assert (match_var f id e m te sp cenv!!id). + inversion H0. inversion H16. auto. + destruct (cenv!!id); inversion H3; try congruence. + injection H; intro; subst a. + (* var_global *) + generalize (match_callstack_match_globalenvs _ _ _ _ _ H0); intro. + inversion H5. + elim (mg_symbols0 _ _ H2); intros. + exists (Vptr b Int.zero); split. + eapply eval_Eop. econstructor. simpl. rewrite H7. auto. + econstructor. eauto. reflexivity. +Qed. + +Lemma var_set_correct: + forall cenv id rhs a f e te2 sp lo hi m2 cs tm2 le te1 tm1 vrhs b chunk v1 v2 m3, + var_set cenv id rhs = Some a -> + match_callstack f (mkframe cenv e te2 sp lo hi :: cs) m2.(nextblock) tm2.(nextblock) m2 -> + eval_expr tge (Vptr sp Int.zero) le te1 tm1 rhs te2 tm2 vrhs -> + val_inject f v1 vrhs -> + mem_inject f m2 tm2 -> + e!id = Some(b, LVscalar chunk) -> + cast chunk v1 = Some v2 -> + store chunk m2 b 0 v2 = Some m3 -> + exists te3, exists tm3, exists tv, + eval_expr tge (Vptr sp Int.zero) le te1 tm1 a te3 tm3 tv /\ + val_inject f v2 tv /\ + mem_inject f m3 tm3 /\ + match_callstack f (mkframe cenv e te3 sp lo hi :: cs) m3.(nextblock) tm3.(nextblock) m3. +Proof. + unfold var_set; intros. + assert (NEXTBLOCK: nextblock m3 = nextblock m2). + generalize (store_inv _ _ _ _ _ _ H6). simpl. tauto. + inversion H0. subst f0 cenv0 e0 te sp0 lo0 hi0 cs0 bound tbound m. + inversion H20. + caseEq (cenv!!id); intros; rewrite H7 in H; simplify_eq H; clear H; intros; subst a. + (* var_local *) + generalize (me_vars0 id); intro. rewrite H7 in H. inversion H. + subst chunk0. + assert (b0 = b). congruence. subst b0. + assert (m = chunk). congruence. subst m. + elim (make_cast_correct _ _ _ _ _ _ _ _ _ _ _ _ H1 H5 H2). + intros tv [EVAL [INJ NORM]]. + exists (PTree.set id tv te2); exists tm2; exists tv. + split. eapply eval_Eassign. auto. + split. auto. + split. eapply store_unmapped_inject; eauto. + rewrite NEXTBLOCK. eapply match_callstack_store_local; eauto. + (* var_stack_scalar *) + generalize (me_vars0 id); intro. rewrite H7 in H. inversion H. + subst chunk0 z. + assert (b0 = b). congruence. subst b0. + assert (m = chunk). congruence. subst m. + assert (storev chunk m2 (Vptr b Int.zero) v2 = Some m3). assumption. + generalize (make_stackaddr_correct sp le te1 tm1 ofs). intro EVALSTACKADDR. + generalize (make_store_correct _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + EVALSTACKADDR H1 H5 H8 H3 H11 H2). + intros [tm3 [tv [EVAL [MEMINJ [VALINJ TNEXTBLOCK]]]]]. + exists te2; exists tm3; exists tv. + split. auto. split. auto. split. auto. + rewrite NEXTBLOCK; rewrite TNEXTBLOCK. + eapply match_callstack_mapped; eauto. + inversion H11; congruence. +Qed. + +(** * Correctness of stack allocation of local variables *) + +(** This section shows the correctness of the translation of Csharpminor + local variables, either as Cminor local variables or as sub-blocks + of the Cminor stack data. This is the most difficult part of the proof. *) + +Remark assign_variables_incr: + forall atk vars cenv sz cenv' sz', + assign_variables atk vars (cenv, sz) = (cenv', sz') -> sz <= sz'. +Proof. + induction vars; intros until sz'; simpl. + intro. replace sz' with sz. omega. congruence. + destruct a. destruct l. case (Identset.mem i atk); intros. + generalize (IHvars _ _ _ _ H). + generalize (size_chunk_pos m). intro. + generalize (align_le sz (size_chunk m) H0). omega. + eauto. + intro. generalize (IHvars _ _ _ _ H). + assert (8 > 0). omega. generalize (align_le sz 8 H0). + assert (0 <= Zmax 0 z). apply Zmax_bound_l. omega. + omega. +Qed. + +Lemma match_callstack_alloc_variables_rec: + forall tm sp cenv' sz' te lo cs atk, + valid_block tm sp -> + low_bound tm sp = 0 -> + high_bound tm sp = sz' -> + sz' <= Int.max_signed -> + forall e m vars e' m' lb, + alloc_variables e m vars e' m' lb -> + forall f cenv sz, + assign_variables atk vars (cenv, sz) = (cenv', sz') -> + match_callstack f (mkframe cenv e te sp lo m.(nextblock) :: cs) + m.(nextblock) tm.(nextblock) m -> + mem_inject f m tm -> + 0 <= sz -> + (forall b delta, f b = Some(sp, delta) -> high_bound m b + delta <= sz) -> + (forall id lv, In (id, lv) vars -> te!id <> None) -> + exists f', + inject_incr f f' + /\ mem_inject f' m' tm + /\ match_callstack f' (mkframe cenv' e' te sp lo m'.(nextblock) :: cs) + m'.(nextblock) tm.(nextblock) m'. +Proof. + intros until atk. intros VB LB HB NOOV. + induction 1. + (* base case *) + intros. simpl in H. inversion H; subst cenv sz. + exists f. split. apply inject_incr_refl. split. auto. auto. + (* inductive case *) + intros until sz. + change (assign_variables atk ((id, lv) :: vars) (cenv, sz)) + with (assign_variables atk vars (assign_variable atk (id, lv) (cenv, sz))). + caseEq (assign_variable atk (id, lv) (cenv, sz)). + intros cenv1 sz1 ASV1 ASVS MATCH MINJ SZPOS BOUND DEFINED. + assert (DEFINED1: forall id0 lv0, In (id0, lv0) vars -> te!id0 <> None). + intros. eapply DEFINED. simpl. right. eauto. + assert (exists tv, te!id = Some tv). + assert (te!id <> None). eapply DEFINED. simpl; left; auto. + destruct (te!id). exists v; auto. congruence. + elim H1; intros tv TEID; clear H1. + generalize ASV1. unfold assign_variable. + caseEq lv. + (* 1. lv = LVscalar chunk *) + intros chunk LV. case (Identset.mem id atk). + (* 1.1 info = Var_stack_scalar chunk ... *) + set (ofs := align sz (size_chunk chunk)). + intro EQ; injection EQ; intros; clear EQ. + set (f1 := extend_inject b1 (Some (sp, ofs)) f). + generalize (size_chunk_pos chunk); intro SIZEPOS. + generalize (align_le sz (size_chunk chunk) SIZEPOS). fold ofs. intro SZOFS. + assert (mem_inject f1 m1 tm /\ inject_incr f f1). + assert (Int.min_signed < 0). compute; auto. + generalize (assign_variables_incr _ _ _ _ _ _ ASVS). intro. + unfold f1; eapply alloc_mapped_inject; eauto. + omega. omega. omega. omega. unfold sizeof; rewrite LV. omega. + intros. left. generalize (BOUND _ _ H5). omega. + elim H3; intros MINJ1 INCR1; clear H3. + assert (MATCH1: match_callstack f1 + (mkframe cenv1 (PTree.set id (b1, lv) e) te sp lo (nextblock m1) :: cs) + (nextblock m1) (nextblock tm) m1). + unfold f1; rewrite <- H2; eapply match_callstack_alloc_left; eauto. + assert (SZ1POS: 0 <= sz1). rewrite <- H1. omega. + assert (BOUND1: forall b delta, f1 b = Some(sp, delta) -> + high_bound m1 b + delta <= sz1). + intros until delta; unfold f1, extend_inject, eq_block. + rewrite (high_bound_alloc _ _ b _ _ _ H). + case (zeq b b1); intros. + inversion H3. unfold sizeof; rewrite LV. omega. + generalize (BOUND _ _ H3). omega. + generalize (IHalloc_variables _ _ _ ASVS MATCH1 MINJ1 SZ1POS BOUND1 DEFINED1). + intros [f' [INCR2 [MINJ2 MATCH2]]]. + exists f'; intuition. eapply inject_incr_trans; eauto. + (* 1.2 info = Var_local chunk *) + intro EQ; injection EQ; intros; clear EQ. subst sz1. + generalize (alloc_unmapped_inject _ _ _ _ _ _ _ MINJ H). + set (f1 := extend_inject b1 None f). intros [MINJ1 INCR1]. + assert (MATCH1: match_callstack f1 + (mkframe cenv1 (PTree.set id (b1, lv) e) te sp lo (nextblock m1) :: cs) + (nextblock m1) (nextblock tm) m1). + unfold f1; rewrite <- H2; eapply match_callstack_alloc_left; eauto. + assert (BOUND1: forall b delta, f1 b = Some(sp, delta) -> + high_bound m1 b + delta <= sz). + intros until delta; unfold f1, extend_inject, eq_block. + rewrite (high_bound_alloc _ _ b _ _ _ H). + case (zeq b b1); intros. discriminate. + eapply BOUND; eauto. + generalize (IHalloc_variables _ _ _ ASVS MATCH1 MINJ1 SZPOS BOUND1 DEFINED1). + intros [f' [INCR2 [MINJ2 MATCH2]]]. + exists f'; intuition. eapply inject_incr_trans; eauto. + (* 2. lv = LVarray dim, info = Var_stack_array *) + intros dim LV EQ. injection EQ; clear EQ; intros. + assert (0 <= Zmax 0 dim). apply Zmax1. + assert (8 > 0). omega. + generalize (align_le sz 8 H4). intro. + set (ofs := align sz 8) in *. + set (f1 := extend_inject b1 (Some (sp, ofs)) f). + assert (mem_inject f1 m1 tm /\ inject_incr f f1). + assert (Int.min_signed < 0). compute; auto. + generalize (assign_variables_incr _ _ _ _ _ _ ASVS). intro. + unfold f1; eapply alloc_mapped_inject; eauto. + omega. omega. omega. omega. unfold sizeof; rewrite LV. omega. + intros. left. generalize (BOUND _ _ H8). omega. + elim H6; intros MINJ1 INCR1; clear H6. + assert (MATCH1: match_callstack f1 + (mkframe cenv1 (PTree.set id (b1, lv) e) te sp lo (nextblock m1) :: cs) + (nextblock m1) (nextblock tm) m1). + unfold f1; rewrite <- H2; eapply match_callstack_alloc_left; eauto. + assert (SZ1POS: 0 <= sz1). rewrite <- H1. omega. + assert (BOUND1: forall b delta, f1 b = Some(sp, delta) -> + high_bound m1 b + delta <= sz1). + intros until delta; unfold f1, extend_inject, eq_block. + rewrite (high_bound_alloc _ _ b _ _ _ H). + case (zeq b b1); intros. + inversion H6. unfold sizeof; rewrite LV. omega. + generalize (BOUND _ _ H6). omega. + generalize (IHalloc_variables _ _ _ ASVS MATCH1 MINJ1 SZ1POS BOUND1 DEFINED1). + intros [f' [INCR2 [MINJ2 MATCH2]]]. + exists f'; intuition. eapply inject_incr_trans; eauto. +Qed. + +Lemma set_params_defined: + forall params args id, + In id params -> (set_params args params)!id <> None. +Proof. + induction params; simpl; intros. + elim H. + destruct args. + rewrite PTree.gsspec. case (peq id a); intro. + congruence. eapply IHparams. elim H; intro. congruence. auto. + rewrite PTree.gsspec. case (peq id a); intro. + congruence. eapply IHparams. elim H; intro. congruence. auto. +Qed. + +Lemma set_locals_defined: + forall e vars id, + In id vars \/ e!id <> None -> (set_locals vars e)!id <> None. +Proof. + induction vars; simpl; intros. + tauto. + rewrite PTree.gsspec. case (peq id a); intro. + congruence. + apply IHvars. assert (a <> id). congruence. tauto. +Qed. + +Lemma set_locals_params_defined: + forall args params vars id, + In id (params ++ vars) -> + (set_locals vars (set_params args params))!id <> None. +Proof. + intros. apply set_locals_defined. + elim (in_app_or _ _ _ H); intro. + right. apply set_params_defined; auto. + left; auto. +Qed. + +(** Preservation of [match_callstack] by simultaneous allocation + of Csharpminor local variables and of the Cminor stack data block. *) + +Lemma match_callstack_alloc_variables: + forall fn cenv sz m e m' lb tm tm' sp f cs targs, + build_compilenv fn = (cenv, sz) -> + sz <= Int.max_signed -> + alloc_variables Csharpminor.empty_env m (fn_variables fn) e m' lb -> + Mem.alloc tm 0 sz = (tm', sp) -> + match_callstack f cs m.(nextblock) tm.(nextblock) m -> + mem_inject f m tm -> + let tparams := List.map (@fst ident memory_chunk) fn.(Csharpminor.fn_params) in + let tvars := List.map (@fst ident local_variable) fn.(Csharpminor.fn_vars) in + let te := set_locals tvars (set_params targs tparams) in + exists f', + inject_incr f f' + /\ mem_inject f' m' tm' + /\ match_callstack f' (mkframe cenv e te sp m.(nextblock) m'.(nextblock) :: cs) + m'.(nextblock) tm'.(nextblock) m'. +Proof. + intros. + assert (SP: sp = nextblock tm). injection H2; auto. + unfold build_compilenv in H. + eapply match_callstack_alloc_variables_rec with (sz' := sz); eauto. + eapply valid_new_block; eauto. + rewrite (low_bound_alloc _ _ sp _ _ _ H2). apply zeq_true. + rewrite (high_bound_alloc _ _ sp _ _ _ H2). apply zeq_true. + (* match_callstack *) + constructor. omega. change (valid_block tm' sp). eapply valid_new_block; eauto. + constructor. + (* me_vars *) + intros. rewrite PMap.gi. constructor. + unfold Csharpminor.empty_env. apply PTree.gempty. + (* me_low_high *) + omega. + (* me_bounded *) + intros until lv. unfold Csharpminor.empty_env. rewrite PTree.gempty. congruence. + (* me_inj *) + intros until lv2. unfold Csharpminor.empty_env; rewrite PTree.gempty; congruence. + (* me_inv *) + intros. elim (mi_mappedblocks _ _ _ H4 _ _ _ H5); intros. + elim (fresh_block_alloc _ _ _ _ _ H2 H6). + (* me_incr *) + intros. elim (mi_mappedblocks _ _ _ H4 _ _ _ H5); intros. + rewrite SP; auto. + rewrite SP; auto. + eapply alloc_right_inject; eauto. + omega. + intros. elim (mi_mappedblocks _ _ _ H4 _ _ _ H5); intros. + unfold block in SP; omegaContradiction. + (* defined *) + intros. unfold te. apply set_locals_params_defined. + unfold tparams, tvars. unfold fn_variables in H5. + change Csharpminor.fn_params with Csharpminor.fn_params in H5. + change Csharpminor.fn_vars with Csharpminor.fn_vars in H5. + elim (in_app_or _ _ _ H5); intros. + elim (list_in_map_inv _ _ _ H6). intros x [A B]. + apply in_or_app; left. inversion A. apply List.in_map. auto. + apply in_or_app; right. + change id with (fst (id, lv)). apply List.in_map; auto. +Qed. + +(** Characterization of the range of addresses for the blocks allocated + to hold Csharpminor local variables. *) + +Lemma alloc_variables_nextblock_incr: + forall e1 m1 vars e2 m2 lb, + alloc_variables e1 m1 vars e2 m2 lb -> + nextblock m1 <= nextblock m2. +Proof. + induction 1; intros. + omega. + inversion H; subst m1; simpl in IHalloc_variables. omega. +Qed. + +Lemma alloc_variables_list_block: + forall e1 m1 vars e2 m2 lb, + alloc_variables e1 m1 vars e2 m2 lb -> + forall b, m1.(nextblock) <= b < m2.(nextblock) <-> In b lb. +Proof. + induction 1; intros. + simpl; split; intro. omega. contradiction. + elim (IHalloc_variables b); intros A B. + assert (nextblock m = b1). injection H; intros. auto. + assert (nextblock m1 = Zsucc (nextblock m)). + injection H; intros; subst m1; reflexivity. + simpl; split; intro. + assert (nextblock m = b \/ nextblock m1 <= b < nextblock m2). + unfold block; rewrite H2; omega. + elim H4; intro. left; congruence. right; auto. + elim H3; intro. subst b b1. + generalize (alloc_variables_nextblock_incr _ _ _ _ _ _ H0). + rewrite H2. omega. + generalize (B H4). rewrite H2. omega. +Qed. + +(** Correctness of the code generated by [store_parameters] + to store in memory the values of parameters that are stack-allocated. *) + +Inductive vars_vals_match: + meminj -> list (ident * memory_chunk) -> list val -> env -> Prop := + | vars_vals_nil: + forall f te, + vars_vals_match f nil nil te + | vars_vals_cons: + forall f te id chunk vars v vals tv, + te!id = Some tv -> + val_inject f v tv -> + vars_vals_match f vars vals te -> + vars_vals_match f ((id, chunk) :: vars) (v :: vals) te. + +Lemma vars_vals_match_extensional: + forall f vars vals te, + vars_vals_match f vars vals te -> + forall te', + (forall id lv, In (id, lv) vars -> te'!id = te!id) -> + vars_vals_match f vars vals te'. +Proof. + induction 1; intros. + constructor. + econstructor; eauto. rewrite <- H. eapply H2. left. reflexivity. + apply IHvars_vals_match. intros. eapply H2; eauto. right. eauto. +Qed. + +Lemma store_parameters_correct: + forall e m1 params vl m2, + bind_parameters e m1 params vl m2 -> + forall f te1 cenv sp lo hi cs tm1, + vars_vals_match f params vl te1 -> + list_norepet (List.map (@fst ident memory_chunk) params) -> + mem_inject f m1 tm1 -> + match_callstack f (mkframe cenv e te1 sp lo hi :: cs) m1.(nextblock) tm1.(nextblock) m1 -> + exists te2, exists tm2, + exec_stmtlist tge (Vptr sp Int.zero) + te1 tm1 (store_parameters cenv params) + te2 tm2 Out_normal + /\ mem_inject f m2 tm2 + /\ match_callstack f (mkframe cenv e te2 sp lo hi :: cs) m2.(nextblock) tm2.(nextblock) m2. +Proof. + induction 1. + (* base case *) + intros; simpl. exists te1; exists tm1. split. constructor. tauto. + (* inductive case *) + intros until tm1. intros VVM NOREPET MINJ MATCH. simpl. + inversion VVM. subst f0 id0 chunk0 vars v vals te. + inversion MATCH. subst f0 cenv0 e0 te sp0 lo0 hi0 cs0 bound tbound m0. + inversion H19. + inversion NOREPET. subst hd tl. + assert (NEXT: nextblock m1 = nextblock m). + generalize (store_inv _ _ _ _ _ _ H1). simpl; tauto. + generalize (me_vars0 id); intro MV. inversion MV. + (* cenv!!id = Var_local chunk *) + change Csharpminor.local_variable with local_variable in H4. + rewrite H in H4. injection H4; clear H4; intros; subst b0 chunk0. + assert (v' = tv). congruence. subst v'. + assert (eval_expr tge (Vptr sp Int.zero) nil te1 tm1 (Evar id) te1 tm1 tv). + constructor. auto. + generalize (make_cast_correct _ _ _ _ _ _ _ _ _ _ _ _ + H4 H0 H11). + intros [tv' [EVAL1 [VINJ1 VNORM]]]. + set (te2 := PTree.set id tv' te1). + assert (VVM2: vars_vals_match f params vl te2). + apply vars_vals_match_extensional with te1; auto. + intros. unfold te2; apply PTree.gso. red; intro; subst id0. + elim H5. change id with (fst (id, lv)). apply List.in_map; auto. + generalize (store_unmapped_inject _ _ _ _ _ _ _ _ MINJ H1 H8); intro MINJ2. + generalize (match_callstack_store_local _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + H VINJ1 VNORM H1 MATCH); + fold te2; rewrite <- NEXT; intro MATCH2. + destruct (IHbind_parameters _ _ _ _ _ _ _ _ VVM2 H6 MINJ2 MATCH2) + as [te3 [tm3 [EXEC3 [MINJ3 MATCH3]]]]. + exists te3; exists tm3. + (* execution *) + split. apply exec_Scons_continue with te2 tm1. + econstructor. unfold te2. constructor. assumption. + assumption. + (* meminj & match_callstack *) + tauto. + + (* cenv!!id = Var_stack_scalar *) + change Csharpminor.local_variable with local_variable in H4. + rewrite H in H4. injection H4; clear H4; intros; subst b0 chunk0. + pose (EVAL1 := make_stackaddr_correct sp nil te1 tm1 ofs). + assert (EVAL2: eval_expr tge (Vptr sp Int.zero) nil te1 tm1 (Evar id) te1 tm1 tv). + constructor. auto. + destruct (make_store_correct _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + (Vptr b Int.zero) _ + EVAL1 EVAL2 H0 H1 MINJ H7 H11) + as [tm2 [tv' [EVAL3 [MINJ2 [VINJ NEXT1]]]]]. + assert (f b <> None). inversion H7. congruence. + generalize (match_callstack_mapped _ _ _ _ _ MATCH _ _ _ _ _ H4 H1). + rewrite <- NEXT; rewrite <- NEXT1; intro MATCH2. + destruct (IHbind_parameters _ _ _ _ _ _ _ _ + H12 H6 MINJ2 MATCH2) as [te3 [tm3 [EVAL4 [MINJ3 MATCH3]]]]. + exists te3; exists tm3. + (* execution *) + split. apply exec_Scons_continue with te1 tm2. + econstructor. eauto. + assumption. + (* meminj & match_callstack *) + tauto. + + (* Impossible cases on cenv!!id *) + change Csharpminor.local_variable with local_variable in H4. + congruence. + change Csharpminor.local_variable with local_variable in H4. + congruence. +Qed. + +Lemma vars_vals_match_holds_1: + forall f params args targs, + list_norepet (List.map (@fst ident memory_chunk) params) -> + List.length params = List.length args -> + val_list_inject f args targs -> + vars_vals_match f params args + (set_params targs (List.map (@fst ident memory_chunk) params)). +Proof. + induction params; destruct args; simpl; intros; try discriminate. + constructor. + inversion H1. subst v0 vl targs. + inversion H. subst hd tl. + destruct a as [id chunk]. econstructor. + simpl. rewrite PTree.gss. reflexivity. + auto. + apply vars_vals_match_extensional + with (set_params vl' (map (@fst ident memory_chunk) params)). + eapply IHparams; eauto. + intros. simpl. apply PTree.gso. red; intro; subst id0. + elim H5. change (fst (id, chunk)) with (fst (id, lv)). + apply List.in_map; auto. +Qed. + +Lemma vars_vals_match_holds: + forall f params args targs, + List.length params = List.length args -> + val_list_inject f args targs -> + forall vars, + list_norepet (List.map (@fst ident local_variable) vars + ++ List.map (@fst ident memory_chunk) params) -> + vars_vals_match f params args + (set_locals (List.map (@fst ident local_variable) vars) + (set_params targs (List.map (@fst ident memory_chunk) params))). +Proof. + induction vars; simpl; intros. + eapply vars_vals_match_holds_1; eauto. + inversion H1. subst hd tl. + eapply vars_vals_match_extensional; eauto. + intros. apply PTree.gso. red; intro; subst id; elim H4. + apply in_or_app. right. change (fst a) with (fst (fst a, lv)). + apply List.in_map; auto. +Qed. + +Lemma bind_parameters_length: + forall e m1 params args m2, + bind_parameters e m1 params args m2 -> + List.length params = List.length args. +Proof. + induction 1; simpl; eauto. +Qed. + +(** The final result in this section: the behaviour of function entry + in the generated Cminor code (allocate stack data block and store + parameters whose address is taken) simulates what happens at function + entry in the original Csharpminor (allocate one block per local variable + and initialize the blocks corresponding to function parameters). *) + +Lemma function_entry_ok: + forall fn m e m1 lb vargs m2 f cs tm cenv sz tm1 sp tvargs, + alloc_variables empty_env m (fn_variables fn) e m1 lb -> + bind_parameters e m1 fn.(Csharpminor.fn_params) vargs m2 -> + match_callstack f cs m.(nextblock) tm.(nextblock) m -> + build_compilenv fn = (cenv, sz) -> + sz <= Int.max_signed -> + Mem.alloc tm 0 sz = (tm1, sp) -> + let te := + set_locals (fn_vars_names fn) (set_params tvargs (fn_params_names fn)) in + val_list_inject f vargs tvargs -> + mem_inject f m tm -> + list_norepet (fn_params_names fn ++ fn_vars_names fn) -> + exists f2, exists te2, exists tm2, + exec_stmtlist tge (Vptr sp Int.zero) + te tm1 (store_parameters cenv fn.(Csharpminor.fn_params)) + te2 tm2 Out_normal + /\ mem_inject f2 m2 tm2 + /\ inject_incr f f2 + /\ match_callstack f2 + (mkframe cenv e te2 sp m.(nextblock) m1.(nextblock) :: cs) + m2.(nextblock) tm2.(nextblock) m2 + /\ (forall b, m.(nextblock) <= b < m1.(nextblock) <-> In b lb). +Proof. + intros. + generalize (bind_parameters_length _ _ _ _ _ H0); intro LEN1. + destruct (match_callstack_alloc_variables _ _ _ _ _ _ _ _ _ _ _ _ tvargs + H2 H3 H H4 H1 H6) + as [f1 [INCR1 [MINJ1 MATCH1]]]. + fold te in MATCH1. + assert (VLI: val_list_inject f1 vargs tvargs). + eapply val_list_inject_incr; eauto. + generalize (vars_vals_match_holds _ _ _ _ LEN1 VLI _ + (list_norepet_append_commut _ _ H7)). + fold te. intro VVM. + assert (NOREPET: list_norepet (List.map (@fst ident memory_chunk) fn.(Csharpminor.fn_params))). + unfold fn_params_names in H7. + eapply list_norepet_append_left; eauto. + destruct (store_parameters_correct _ _ _ _ _ H0 _ _ _ _ _ _ _ _ + VVM NOREPET MINJ1 MATCH1) + as [te2 [tm2 [EXEC [MINJ2 MATCH2]]]]. + exists f1; exists te2; exists tm2. + split. auto. split. auto. split. auto. split. auto. + intros; eapply alloc_variables_list_block; eauto. +Qed. + +(** * Semantic preservation for the translation *) + +(** These tactics simplify hypotheses of the form [f ... = Some x]. *) + +Ltac monadSimpl1 := + match goal with + | [ |- (bind _ _ ?F ?G = Some ?X) -> _ ] => + unfold bind at 1; + generalize (refl_equal F); + pattern F at -1 in |- *; + case F; + [ (let EQ := fresh "EQ" in + (intro; intro EQ; + try monadSimpl1)) + | intros; discriminate ] + | [ |- (None = Some _) -> _ ] => + intro; discriminate + | [ |- (Some _ = Some _) -> _ ] => + let h := fresh "H" in + (intro h; injection h; intro; clear h) + end. + +Ltac monadSimpl := + match goal with + | [ |- (bind _ _ ?F ?G = Some ?X) -> _ ] => monadSimpl1 + | [ |- (None = Some _) -> _ ] => monadSimpl1 + | [ |- (Some _ = Some _) -> _ ] => monadSimpl1 + | [ |- (?F _ _ _ _ _ _ _ = Some _) -> _ ] => simpl F; monadSimpl1 + | [ |- (?F _ _ _ _ _ _ = Some _) -> _ ] => simpl F; monadSimpl1 + | [ |- (?F _ _ _ _ _ = Some _) -> _ ] => simpl F; monadSimpl1 + | [ |- (?F _ _ _ _ = Some _) -> _ ] => simpl F; monadSimpl1 + | [ |- (?F _ _ _ = Some _) -> _ ] => simpl F; monadSimpl1 + | [ |- (?F _ _ = Some _) -> _ ] => simpl F; monadSimpl1 + | [ |- (?F _ = Some _) -> _ ] => simpl F; monadSimpl1 + end. + +Ltac monadInv H := + generalize H; monadSimpl. + +(** The proof of semantic preservation uses simulation diagrams of the + following form: +<< + le, e, m1, a --------------- tle, sp, te1, tm1, ta + | | + | | + v v + le, e, m2, v --------------- tle, sp, te2, tm2, tv +>> + where [ta] is the Cminor expression obtained by translating the + Csharpminor expression [a]. The left vertical arrow is an evaluation + of a Csharpminor expression. The right vertical arrow is an evaluation + of a Cminor expression. The precondition (top vertical bar) + includes a [mem_inject] relation between the memory states [m1] and [tm1], + a [val_list_inject] relation between the let environments [le] and [tle], + and a [match_callstack] relation for any callstack having + [e], [te1], [sp] as top frame. The postcondition (bottom vertical bar) + is the existence of a memory injection [f2] that extends the injection + [f1] we started with, preserves the [match_callstack] relation for + the transformed callstack at the final state, and validates a + [val_inject] relation between the result values [v] and [tv]. + + We capture these diagrams by the following predicates, parameterized + over the Csharpminor executions, which will serve as induction + hypotheses in the proof of simulation. *) + +Definition eval_expr_prop + (le: Csharpminor.letenv) (e: Csharpminor.env) (m1: mem) (a: Csharpminor.expr) (m2: mem) (v: val) : Prop := + forall cenv ta f1 tle te1 tm1 sp lo hi cs + (TR: transl_expr cenv a = Some ta) + (LINJ: val_list_inject f1 le tle) + (MINJ: mem_inject f1 m1 tm1) + (MATCH: match_callstack f1 + (mkframe cenv e te1 sp lo hi :: cs) + m1.(nextblock) tm1.(nextblock) m1), + exists f2, exists te2, exists tm2, exists tv, + eval_expr tge (Vptr sp Int.zero) tle te1 tm1 ta te2 tm2 tv + /\ val_inject f2 v tv + /\ mem_inject f2 m2 tm2 + /\ inject_incr f1 f2 + /\ match_callstack f2 + (mkframe cenv e te2 sp lo hi :: cs) + m2.(nextblock) tm2.(nextblock) m2. + +Definition eval_exprlist_prop + (le: Csharpminor.letenv) (e: Csharpminor.env) (m1: mem) (al: Csharpminor.exprlist) (m2: mem) (vl: list val) : Prop := + forall cenv tal f1 tle te1 tm1 sp lo hi cs + (TR: transl_exprlist cenv al = Some tal) + (LINJ: val_list_inject f1 le tle) + (MINJ: mem_inject f1 m1 tm1) + (MATCH: match_callstack f1 + (mkframe cenv e te1 sp lo hi :: cs) + m1.(nextblock) tm1.(nextblock) m1), + exists f2, exists te2, exists tm2, exists tvl, + eval_exprlist tge (Vptr sp Int.zero) tle te1 tm1 tal te2 tm2 tvl + /\ val_list_inject f2 vl tvl + /\ mem_inject f2 m2 tm2 + /\ inject_incr f1 f2 + /\ match_callstack f2 + (mkframe cenv e te2 sp lo hi :: cs) + m2.(nextblock) tm2.(nextblock) m2. + +Definition eval_funcall_prop + (m1: mem) (fn: Csharpminor.function) (args: list val) (m2: mem) (res: val) : Prop := + forall tfn f1 tm1 cs targs + (TR: transl_function fn = Some tfn) + (MINJ: mem_inject f1 m1 tm1) + (MATCH: match_callstack f1 cs m1.(nextblock) tm1.(nextblock) m1) + (ARGSINJ: val_list_inject f1 args targs), + exists f2, exists tm2, exists tres, + eval_funcall tge tm1 tfn targs tm2 tres + /\ val_inject f2 res tres + /\ mem_inject f2 m2 tm2 + /\ inject_incr f1 f2 + /\ match_callstack f2 cs m2.(nextblock) tm2.(nextblock) m2. + +Inductive outcome_inject (f: meminj) : Csharpminor.outcome -> outcome -> Prop := + | outcome_inject_normal: + outcome_inject f Csharpminor.Out_normal Out_normal + | outcome_inject_exit: + forall n, outcome_inject f (Csharpminor.Out_exit n) (Out_exit n) + | outcome_inject_return_none: + outcome_inject f (Csharpminor.Out_return None) (Out_return None) + | outcome_inject_return_some: + forall v1 v2, + val_inject f v1 v2 -> + outcome_inject f (Csharpminor.Out_return (Some v1)) (Out_return (Some v2)). + +Definition exec_stmt_prop + (e: Csharpminor.env) (m1: mem) (s: Csharpminor.stmt) (m2: mem) (out: Csharpminor.outcome): Prop := + forall cenv ts f1 te1 tm1 sp lo hi cs + (TR: transl_stmt cenv s = Some ts) + (MINJ: mem_inject f1 m1 tm1) + (MATCH: match_callstack f1 + (mkframe cenv e te1 sp lo hi :: cs) + m1.(nextblock) tm1.(nextblock) m1), + exists f2, exists te2, exists tm2, exists tout, + exec_stmt tge (Vptr sp Int.zero) te1 tm1 ts te2 tm2 tout + /\ outcome_inject f2 out tout + /\ mem_inject f2 m2 tm2 + /\ inject_incr f1 f2 + /\ match_callstack f2 + (mkframe cenv e te2 sp lo hi :: cs) + m2.(nextblock) tm2.(nextblock) m2. + +Definition exec_stmtlist_prop + (e: Csharpminor.env) (m1: mem) (s: Csharpminor.stmtlist) (m2: mem) (out: Csharpminor.outcome): Prop := + forall cenv ts f1 te1 tm1 sp lo hi cs + (TR: transl_stmtlist cenv s = Some ts) + (MINJ: mem_inject f1 m1 tm1) + (MATCH: match_callstack f1 + (mkframe cenv e te1 sp lo hi :: cs) + m1.(nextblock) tm1.(nextblock) m1), + exists f2, exists te2, exists tm2, exists tout, + exec_stmtlist tge (Vptr sp Int.zero) te1 tm1 ts te2 tm2 tout + /\ outcome_inject f2 out tout + /\ mem_inject f2 m2 tm2 + /\ inject_incr f1 f2 + /\ match_callstack f2 + (mkframe cenv e te2 sp lo hi :: cs) + m2.(nextblock) tm2.(nextblock) m2. + +(** There are as many cases in the inductive proof as there are evaluation + rules in the Csharpminor semantics. We treat each case as a separate + lemma. *) + +Lemma transl_expr_Evar_correct: + forall (le : Csharpminor.letenv) + (e : PTree.t (block * local_variable)) (m : mem) (id : positive) + (b : block) (chunk : memory_chunk) (v : val), + e ! id = Some (b, LVscalar chunk) -> + load chunk m b 0 = Some v -> + eval_expr_prop le e m (Csharpminor.Evar id) m v. +Proof. + intros; red; intros. unfold transl_expr in TR. + generalize (var_get_correct _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ tle + TR MATCH MINJ H H0). + intros [tv [EVAL VINJ]]. + exists f1; exists te1; exists tm1; exists tv; intuition. +Qed. + +Lemma transl_expr_Eassign_correct: + forall (le : Csharpminor.letenv) (e : Csharpminor.env) (m : mem) + (id : positive) (a : Csharpminor.expr) (m1 : mem) (b : block) + (chunk : memory_chunk) (v1 v2 : val) (m2 : mem), + Csharpminor.eval_expr ge le e m a m1 v1 -> + eval_expr_prop le e m a m1 v1 -> + e ! id = Some (b, LVscalar chunk) -> + cast chunk v1 = Some v2 -> + store chunk m1 b 0 v2 = Some m2 -> + eval_expr_prop le e m (Csharpminor.Eassign id a) m2 v2. +Proof. + intros; red; intros. monadInv TR; intro EQ0. + generalize (H0 _ _ _ _ _ _ _ _ _ _ EQ LINJ MINJ MATCH). + intros [f2 [te2 [tm2 [tv1 [EVAL1 [VINJ1 [MINJ1 [INCR12 MATCH1]]]]]]]]. + generalize (var_set_correct _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + EQ0 MATCH1 EVAL1 VINJ1 MINJ1 H1 H2 H3). + intros [te3 [tm3 [tv2 [EVAL2 [VINJ2 [MINJ2 MATCH2]]]]]]. + exists f2; exists te3; exists tm3; exists tv2. tauto. +Qed. + +Lemma transl_expr_Eaddrof_local_correct: + forall (le : Csharpminor.letenv) + (e : PTree.t (block * local_variable)) (m : mem) (id : positive) + (b : block) (lv : local_variable), + e ! id = Some (b, lv) -> + eval_expr_prop le e m (Eaddrof id) m (Vptr b Int.zero). +Proof. + intros; red; intros. simpl in TR. + generalize (var_addr_local_correct _ _ _ _ _ _ _ _ _ _ _ _ _ _ tle + TR MATCH H). + intros [tv [EVAL VINJ]]. + exists f1; exists te1; exists tm1; exists tv. intuition. +Qed. + +Lemma transl_expr_Eaddrof_global_correct: + forall (le : Csharpminor.letenv) + (e : PTree.t (block * local_variable)) (m : mem) (id : positive) + (b : block), + e ! id = None -> + Genv.find_symbol ge id = Some b -> + eval_expr_prop le e m (Eaddrof id) m (Vptr b Int.zero). +Proof. + intros; red; intros. simpl in TR. + generalize (var_addr_global_correct _ _ _ _ _ _ _ _ _ _ _ _ _ tle + TR MATCH H H0). + intros [tv [EVAL VINJ]]. + exists f1; exists te1; exists tm1; exists tv. intuition. +Qed. + +Lemma transl_expr_Eop_correct: + forall (le : Csharpminor.letenv) (e : Csharpminor.env) (m : mem) + (op : Csharpminor.operation) (al : Csharpminor.exprlist) (m1 : mem) + (vl : list val) (v : val), + Csharpminor.eval_exprlist ge le e m al m1 vl -> + eval_exprlist_prop le e m al m1 vl -> + Csharpminor.eval_operation op vl m1 = Some v -> + eval_expr_prop le e m (Csharpminor.Eop op al) m1 v. +Proof. + intros; red; intros. monadInv TR; intro EQ0. + generalize (H0 _ _ _ _ _ _ _ _ _ _ EQ LINJ MINJ MATCH). + intros [f2 [te2 [tm2 [tvl [EVAL1 [VINJ1 [MINJ1 [INCR1 MATCH1]]]]]]]]. + generalize (make_op_correct _ _ _ _ _ _ _ _ _ _ _ _ _ _ + EQ0 H1 EVAL1 VINJ1 MINJ1). + intros [tv [EVAL2 VINJ2]]. + exists f2; exists te2; exists tm2; exists tv. intuition. +Qed. + +Lemma transl_expr_Eload_correct: + forall (le : Csharpminor.letenv) (e : Csharpminor.env) (m : mem) + (chunk : memory_chunk) (a : Csharpminor.expr) (m1 : mem) + (v1 v : val), + Csharpminor.eval_expr ge le e m a m1 v1 -> + eval_expr_prop le e m a m1 v1 -> + loadv chunk m1 v1 = Some v -> + eval_expr_prop le e m (Csharpminor.Eload chunk a) m1 v. +Proof. + intros; red; intros. + monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ _ EQ LINJ MINJ MATCH) + as [f2 [te2 [tm2 [tv1 [EVAL [VINJ1 [MINJ2 [INCR MATCH2]]]]]]]]. + destruct (loadv_inject _ _ _ _ _ _ _ MINJ2 H1 VINJ1) + as [tv [TLOAD VINJ]]. + exists f2; exists te2; exists tm2; exists tv. + intuition. + subst ta. eapply make_load_correct; eauto. +Qed. + +Lemma transl_expr_Estore_correct: + forall (le : Csharpminor.letenv) (e : Csharpminor.env) (m : mem) + (chunk : memory_chunk) (a b : Csharpminor.expr) (m1 : mem) + (v1 : val) (m2 : mem) (v2 : val) (m3 : mem) (v3 : val), + Csharpminor.eval_expr ge le e m a m1 v1 -> + eval_expr_prop le e m a m1 v1 -> + Csharpminor.eval_expr ge le e m1 b m2 v2 -> + eval_expr_prop le e m1 b m2 v2 -> + cast chunk v2 = Some v3 -> + storev chunk m2 v1 v3 = Some m3 -> + eval_expr_prop le e m (Csharpminor.Estore chunk a b) m3 v3. +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ _ EQ LINJ MINJ MATCH) + as [f2 [te2 [tm2 [tv1 [EVAL1 [VINJ1 [MINJ2 [INCR2 MATCH2]]]]]]]]. + assert (LINJ2: val_list_inject f2 le tle). eapply val_list_inject_incr; eauto. + destruct (H2 _ _ _ _ _ _ _ _ _ _ EQ0 LINJ2 MINJ2 MATCH2) + as [f3 [te3 [tm3 [tv2 [EVAL2 [VINJ2 [MINJ3 [INCR3 MATCH3]]]]]]]]. + assert (VINJ1': val_inject f3 v1 tv1). eapply val_inject_incr; eauto. + destruct (make_store_correct _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + EVAL1 EVAL2 H3 H4 MINJ3 VINJ1' VINJ2) + as [tm4 [tv [EVAL [MINJ4 [VINJ4 NEXTBLOCK]]]]]. + exists f3; exists te3; exists tm4; exists tv. + rewrite <- H6. intuition. + eapply inject_incr_trans; eauto. + assert (val_inject f3 v1 tv1). eapply val_inject_incr; eauto. + unfold storev in H4; destruct v1; try discriminate. + inversion H5. + rewrite NEXTBLOCK. replace (nextblock m3) with (nextblock m2). + eapply match_callstack_mapped; eauto. congruence. + generalize (store_inv _ _ _ _ _ _ H4). simpl; symmetry; tauto. +Qed. + +Lemma sig_transl_function: + forall f tf, transl_function f = Some tf -> tf.(fn_sig) = f.(Csharpminor.fn_sig). +Proof. + intros f tf. unfold transl_function. + destruct (build_compilenv f). + case (zle z Int.max_signed); intros. + monadInv H. subst tf; reflexivity. + congruence. +Qed. + +Lemma transl_expr_Ecall_correct: + forall (le : Csharpminor.letenv) (e : Csharpminor.env) (m : mem) + (sig : signature) (a : Csharpminor.expr) (bl : Csharpminor.exprlist) + (m1 m2 m3 : mem) (vf : val) (vargs : list val) (vres : val) + (f : Csharpminor.function), + Csharpminor.eval_expr ge le e m a m1 vf -> + eval_expr_prop le e m a m1 vf -> + Csharpminor.eval_exprlist ge le e m1 bl m2 vargs -> + eval_exprlist_prop le e m1 bl m2 vargs -> + Genv.find_funct ge vf = Some f -> + Csharpminor.fn_sig f = sig -> + Csharpminor.eval_funcall ge m2 f vargs m3 vres -> + eval_funcall_prop m2 f vargs m3 vres -> + eval_expr_prop le e m (Csharpminor.Ecall sig a bl) m3 vres. +Proof. + intros;red;intros. monadInv TR. subst ta. + generalize (H0 _ _ _ _ _ _ _ _ _ _ EQ LINJ MINJ MATCH). + intros [f2 [te2 [tm2 [tv1 [EVAL1 [VINJ1 [MINJ1 [INCR1 MATCH1]]]]]]]]. + assert (LINJ1: val_list_inject f2 le tle). eapply val_list_inject_incr; eauto. + generalize (H2 _ _ _ _ _ _ _ _ _ _ EQ0 LINJ1 MINJ1 MATCH1). + intros [f3 [te3 [tm3 [tv2 [EVAL2 [VINJ2 [MINJ2 [INCR2 MATCH2]]]]]]]]. + assert (tv1 = vf). + elim (Genv.find_funct_inv H3). intros bf VF. rewrite VF in H3. + rewrite Genv.find_funct_find_funct_ptr in H3. + generalize (Genv.find_funct_ptr_inv H3). intro. + assert (match_globalenvs f2). eapply match_callstack_match_globalenvs; eauto. + generalize (mg_functions _ H8 _ H7). intro. + rewrite VF in VINJ1. inversion VINJ1. subst vf. + decEq. congruence. + subst ofs2. replace x with 0. reflexivity. congruence. + subst tv1. elim (functions_translated _ _ H3). intros tf [FIND TRF]. + generalize (H6 _ _ _ _ _ TRF MINJ2 MATCH2 VINJ2). + intros [f4 [tm4 [tres [EVAL3 [VINJ3 [MINJ3 [INCR3 MATCH3]]]]]]]. + exists f4; exists te3; exists tm4; exists tres. intuition. + eapply eval_Ecall; eauto. rewrite <- H4. apply sig_transl_function; auto. + apply inject_incr_trans with f2; auto. + apply inject_incr_trans with f3; auto. +Qed. + +Lemma transl_expr_Econdition_true_correct: + forall (le : Csharpminor.letenv) (e : Csharpminor.env) (m : mem) + (a b c : Csharpminor.expr) (m1 : mem) (v1 : val) (m2 : mem) + (v2 : val), + Csharpminor.eval_expr ge le e m a m1 v1 -> + eval_expr_prop le e m a m1 v1 -> + Val.is_true v1 -> + Csharpminor.eval_expr ge le e m1 b m2 v2 -> + eval_expr_prop le e m1 b m2 v2 -> + eval_expr_prop le e m (Csharpminor.Econdition a b c) m2 v2. +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ _ EQ LINJ MINJ MATCH) + as [f2 [te2 [tm2 [tv1 [EVAL1 [VINJ1 [MINJ1 [INCR1 MATCH1]]]]]]]]. + assert (LINJ1: val_list_inject f2 le tle). eapply val_list_inject_incr; eauto. + destruct (H3 _ _ _ _ _ _ _ _ _ _ EQ0 LINJ1 MINJ1 MATCH1) + as [f3 [te3 [tm3 [tv2 [EVAL2 [VINJ2 [MINJ2 [INCR2 MATCH2]]]]]]]]. + exists f3; exists te3; exists tm3; exists tv2. + intuition. + rewrite <- H5. eapply eval_conditionalexpr_true; eauto. + inversion VINJ1; subst v1 tv1; simpl in H1; simpl; contradiction || auto. + eapply inject_incr_trans; eauto. +Qed. + +Lemma transl_expr_Econdition_false_correct: + forall (le : Csharpminor.letenv) (e : Csharpminor.env) (m : mem) + (a b c : Csharpminor.expr) (m1 : mem) (v1 : val) (m2 : mem) + (v2 : val), + Csharpminor.eval_expr ge le e m a m1 v1 -> + eval_expr_prop le e m a m1 v1 -> + Val.is_false v1 -> + Csharpminor.eval_expr ge le e m1 c m2 v2 -> + eval_expr_prop le e m1 c m2 v2 -> + eval_expr_prop le e m (Csharpminor.Econdition a b c) m2 v2. +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ _ EQ LINJ MINJ MATCH) + as [f2 [te2 [tm2 [tv1 [EVAL1 [VINJ1 [MINJ1 [INCR1 MATCH1]]]]]]]]. + assert (LINJ1: val_list_inject f2 le tle). eapply val_list_inject_incr; eauto. + destruct (H3 _ _ _ _ _ _ _ _ _ _ EQ1 LINJ1 MINJ1 MATCH1) + as [f3 [te3 [tm3 [tv2 [EVAL2 [VINJ2 [MINJ2 [INCR2 MATCH2]]]]]]]]. + exists f3; exists te3; exists tm3; exists tv2. + intuition. + rewrite <- H5. eapply eval_conditionalexpr_false; eauto. + inversion VINJ1; subst v1 tv1; simpl in H1; simpl; contradiction || auto. + eapply inject_incr_trans; eauto. +Qed. + +Lemma transl_expr_Elet_correct: + forall (le : Csharpminor.letenv) (e : Csharpminor.env) (m : mem) + (a b : Csharpminor.expr) (m1 : mem) (v1 : val) (m2 : mem) (v2 : val), + Csharpminor.eval_expr ge le e m a m1 v1 -> + eval_expr_prop le e m a m1 v1 -> + Csharpminor.eval_expr ge (v1 :: le) e m1 b m2 v2 -> + eval_expr_prop (v1 :: le) e m1 b m2 v2 -> + eval_expr_prop le e m (Csharpminor.Elet a b) m2 v2. +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ _ EQ LINJ MINJ MATCH) + as [f2 [te2 [tm2 [tv1 [EVAL1 [VINJ1 [MINJ1 [INCR1 MATCH1]]]]]]]]. + assert (LINJ1: val_list_inject f2 (v1 :: le) (tv1 :: tle)). + constructor. auto. eapply val_list_inject_incr; eauto. + destruct (H2 _ _ _ _ _ _ _ _ _ _ EQ0 LINJ1 MINJ1 MATCH1) + as [f3 [te3 [tm3 [tv2 [EVAL2 [VINJ2 [MINJ2 [INCR2 MATCH2]]]]]]]]. + exists f3; exists te3; exists tm3; exists tv2. + intuition. + subst ta; eapply eval_Elet; eauto. + eapply inject_incr_trans; eauto. +Qed. + +Remark val_list_inject_nth: + forall f l1 l2, val_list_inject f l1 l2 -> + forall n v1, nth_error l1 n = Some v1 -> + exists v2, nth_error l2 n = Some v2 /\ val_inject f v1 v2. +Proof. + induction 1; destruct n; simpl; intros. + discriminate. discriminate. + injection H1; intros; subst v. exists v'; split; auto. + eauto. +Qed. + +Lemma transl_expr_Eletvar_correct: + forall (le : list val) (e : Csharpminor.env) (m : mem) (n : nat) + (v : val), + nth_error le n = Some v -> + eval_expr_prop le e m (Csharpminor.Eletvar n) m v. +Proof. + intros; red; intros. monadInv TR. + destruct (val_list_inject_nth _ _ _ LINJ _ _ H) + as [tv [A B]]. + exists f1; exists te1; exists tm1; exists tv. + intuition. + subst ta. eapply eval_Eletvar; auto. +Qed. + +Lemma transl_exprlist_Enil_correct: + forall (le : Csharpminor.letenv) (e : Csharpminor.env) (m : mem), + eval_exprlist_prop le e m Csharpminor.Enil m nil. +Proof. + intros; red; intros. monadInv TR. + exists f1; exists te1; exists tm1; exists (@nil val). + intuition. subst tal; constructor. +Qed. + +Lemma transl_exprlist_Econs_correct: + forall (le : Csharpminor.letenv) (e : Csharpminor.env) (m : mem) + (a : Csharpminor.expr) (bl : Csharpminor.exprlist) (m1 : mem) + (v : val) (m2 : mem) (vl : list val), + Csharpminor.eval_expr ge le e m a m1 v -> + eval_expr_prop le e m a m1 v -> + Csharpminor.eval_exprlist ge le e m1 bl m2 vl -> + eval_exprlist_prop le e m1 bl m2 vl -> + eval_exprlist_prop le e m (Csharpminor.Econs a bl) m2 (v :: vl). +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ _ EQ LINJ MINJ MATCH) + as [f2 [te2 [tm2 [tv1 [EVAL1 [VINJ1 [MINJ2 [INCR2 MATCH2]]]]]]]]. + assert (LINJ2: val_list_inject f2 le tle). eapply val_list_inject_incr; eauto. + destruct (H2 _ _ _ _ _ _ _ _ _ _ EQ0 LINJ2 MINJ2 MATCH2) + as [f3 [te3 [tm3 [tv2 [EVAL2 [VINJ2 [MINJ3 [INCR3 MATCH3]]]]]]]]. + assert (VINJ1': val_inject f3 v tv1). eapply val_inject_incr; eauto. + exists f3; exists te3; exists tm3; exists (tv1 :: tv2). + intuition. subst tal; econstructor; eauto. + eapply inject_incr_trans; eauto. +Qed. + +Lemma transl_funcall_correct: + forall (m : mem) (f : Csharpminor.function) (vargs : list val) + (e : Csharpminor.env) (m1 : mem) (lb : list block) (m2 m3 : mem) + (out : Csharpminor.outcome) (vres : val), + list_norepet (fn_params_names f ++ fn_vars_names f) -> + alloc_variables empty_env m (fn_variables f) e m1 lb -> + bind_parameters e m1 (Csharpminor.fn_params f) vargs m2 -> + Csharpminor.exec_stmtlist ge e m2 (Csharpminor.fn_body f) m3 out -> + exec_stmtlist_prop e m2 (Csharpminor.fn_body f) m3 out -> + Csharpminor.outcome_result_value out (sig_res (Csharpminor.fn_sig f)) vres -> + eval_funcall_prop m f vargs (free_list m3 lb) vres. +Proof. + intros; red. intros tfn f1 tm; intros. + unfold transl_function in TR. + caseEq (build_compilenv f); intros cenv stacksize CENV. + rewrite CENV in TR. + destruct (zle stacksize Int.max_signed); try discriminate. + monadInv TR. clear TR. + caseEq (alloc tm 0 stacksize). intros tm1 sp ALLOC. + destruct (function_entry_ok _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + H0 H1 MATCH CENV z ALLOC ARGSINJ MINJ H) + as [f2 [te2 [tm2 [STOREPARAM [MINJ2 [INCR12 [MATCH2 BLOCKS]]]]]]]. + destruct (H3 _ _ _ _ _ _ _ _ _ EQ MINJ2 MATCH2) + as [f3 [te3 [tm3 [tout [EXECBODY [OUTINJ [MINJ3 [INCR23 MATCH3]]]]]]]]. + assert (exists tvres, + outcome_result_value tout f.(Csharpminor.fn_sig).(sig_res) tvres /\ + val_inject f3 vres tvres). + generalize H4. unfold Csharpminor.outcome_result_value, outcome_result_value. + inversion OUTINJ. + destruct (sig_res (Csharpminor.fn_sig f)); intro. contradiction. + exists Vundef; split. auto. subst vres; constructor. + tauto. + destruct (sig_res (Csharpminor.fn_sig f)); intro. contradiction. + exists Vundef; split. auto. subst vres; constructor. + destruct (sig_res (Csharpminor.fn_sig f)); intro. + exists v2; split. auto. subst vres; auto. + contradiction. + elim H5; clear H5; intros tvres [TOUT VINJRES]. + exists f3; exists (Mem.free tm3 sp); exists tvres. + (* execution *) + split. rewrite <- H6; econstructor; simpl; eauto. + apply exec_Scons_continue with te2 tm2. + change Out_normal with (outcome_block Out_normal). + apply exec_Sblock. exact STOREPARAM. + eexact EXECBODY. + (* val_inject *) + split. assumption. + (* mem_inject *) + split. apply free_inject; auto. + intros. elim (BLOCKS b1); intros B1 B2. apply B1. inversion MATCH3. inversion H20. + eapply me_inv0. eauto. + (* inject_incr *) + split. eapply inject_incr_trans; eauto. + (* match_callstack *) + assert (forall bl mm, nextblock (free_list mm bl) = nextblock mm). + induction bl; intros. reflexivity. simpl. auto. + unfold free; simpl nextblock. rewrite H5. + eapply match_callstack_freelist; eauto. + intros. elim (BLOCKS b); intros B1 B2. generalize (B2 H7). omega. +Qed. + +Lemma transl_stmt_Sexpr_correct: + forall (e : Csharpminor.env) (m : mem) (a : Csharpminor.expr) + (m1 : mem) (v : val), + Csharpminor.eval_expr ge nil e m a m1 v -> + eval_expr_prop nil e m a m1 v -> + exec_stmt_prop e m (Csharpminor.Sexpr a) m1 Csharpminor.Out_normal. +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ _ EQ (val_nil_inject f1) MINJ MATCH) + as [f2 [te2 [tm2 [tv1 [EVAL1 [VINJ1 [MINJ2 [INCR2 MATCH2]]]]]]]]. + exists f2; exists te2; exists tm2; exists Out_normal. + intuition. subst ts. econstructor; eauto. + constructor. +Qed. + +Lemma transl_stmt_Sifthenelse_true_correct: + forall (e : Csharpminor.env) (m : mem) (a : Csharpminor.expr) + (sl1 sl2 : Csharpminor.stmtlist) (m1 : mem) (v1 : val) (m2 : mem) + (out : Csharpminor.outcome), + Csharpminor.eval_expr ge nil e m a m1 v1 -> + eval_expr_prop nil e m a m1 v1 -> + Val.is_true v1 -> + Csharpminor.exec_stmtlist ge e m1 sl1 m2 out -> + exec_stmtlist_prop e m1 sl1 m2 out -> + exec_stmt_prop e m (Csharpminor.Sifthenelse a sl1 sl2) m2 out. +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ _ EQ (val_nil_inject f1) MINJ MATCH) + as [f2 [te2 [tm2 [tv1 [EVAL1 [VINJ1 [MINJ2 [INCR2 MATCH2]]]]]]]]. + destruct (H3 _ _ _ _ _ _ _ _ _ EQ0 MINJ2 MATCH2) + as [f3 [te3 [tm3 [tout [EVAL2 [OINJ [MINJ3 [INCR3 MATCH3]]]]]]]]. + exists f3; exists te3; exists tm3; exists tout. + intuition. + subst ts. eapply exec_ifthenelse_true; eauto. + inversion VINJ1; subst v1 tv1; simpl in H1; simpl; contradiction || auto. + eapply inject_incr_trans; eauto. +Qed. + +Lemma transl_stmt_Sifthenelse_false_correct: + forall (e : Csharpminor.env) (m : mem) (a : Csharpminor.expr) + (sl1 sl2 : Csharpminor.stmtlist) (m1 : mem) (v1 : val) (m2 : mem) + (out : Csharpminor.outcome), + Csharpminor.eval_expr ge nil e m a m1 v1 -> + eval_expr_prop nil e m a m1 v1 -> + Val.is_false v1 -> + Csharpminor.exec_stmtlist ge e m1 sl2 m2 out -> + exec_stmtlist_prop e m1 sl2 m2 out -> + exec_stmt_prop e m (Csharpminor.Sifthenelse a sl1 sl2) m2 out. +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ _ EQ (val_nil_inject f1) MINJ MATCH) + as [f2 [te2 [tm2 [tv1 [EVAL1 [VINJ1 [MINJ2 [INCR2 MATCH2]]]]]]]]. + destruct (H3 _ _ _ _ _ _ _ _ _ EQ1 MINJ2 MATCH2) + as [f3 [te3 [tm3 [tout [EVAL2 [OINJ [MINJ3 [INCR3 MATCH3]]]]]]]]. + exists f3; exists te3; exists tm3; exists tout. + intuition. + subst ts. eapply exec_ifthenelse_false; eauto. + inversion VINJ1; subst v1 tv1; simpl in H1; simpl; contradiction || auto. + eapply inject_incr_trans; eauto. +Qed. + +Lemma transl_stmt_Sloop_loop_correct: + forall (e : Csharpminor.env) (m : mem) (sl : Csharpminor.stmtlist) + (m1 m2 : mem) (out : Csharpminor.outcome), + Csharpminor.exec_stmtlist ge e m sl m1 Csharpminor.Out_normal -> + exec_stmtlist_prop e m sl m1 Csharpminor.Out_normal -> + Csharpminor.exec_stmt ge e m1 (Csharpminor.Sloop sl) m2 out -> + exec_stmt_prop e m1 (Csharpminor.Sloop sl) m2 out -> + exec_stmt_prop e m (Csharpminor.Sloop sl) m2 out. +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ EQ MINJ MATCH) + as [f2 [te2 [tm2 [tout1 [EVAL1 [OINJ1 [MINJ2 [INCR2 MATCH2]]]]]]]]. + destruct (H2 _ _ _ _ _ _ _ _ _ TR MINJ2 MATCH2) + as [f3 [te3 [tm3 [tout2 [EVAL2 [OINJ2 [MINJ3 [INCR3 MATCH3]]]]]]]]. + exists f3; exists te3; exists tm3; exists tout2. + intuition. + subst ts. eapply exec_Sloop_loop; eauto. + inversion OINJ1; subst tout1; eauto. + eapply inject_incr_trans; eauto. +Qed. + + +Lemma transl_stmt_Sloop_exit_correct: + forall (e : Csharpminor.env) (m : mem) (sl : Csharpminor.stmtlist) + (m1 : mem) (out : Csharpminor.outcome), + Csharpminor.exec_stmtlist ge e m sl m1 out -> + exec_stmtlist_prop e m sl m1 out -> + out <> Csharpminor.Out_normal -> + exec_stmt_prop e m (Csharpminor.Sloop sl) m1 out. +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ EQ MINJ MATCH) + as [f2 [te2 [tm2 [tout1 [EVAL1 [OINJ1 [MINJ2 [INCR2 MATCH2]]]]]]]]. + exists f2; exists te2; exists tm2; exists tout1. + intuition. subst ts; eapply exec_Sloop_stop; eauto. + inversion OINJ1; subst out tout1; congruence. +Qed. + +Lemma transl_stmt_Sblock_correct: + forall (e : Csharpminor.env) (m : mem) (sl : Csharpminor.stmtlist) + (m1 : mem) (out : Csharpminor.outcome), + Csharpminor.exec_stmtlist ge e m sl m1 out -> + exec_stmtlist_prop e m sl m1 out -> + exec_stmt_prop e m (Csharpminor.Sblock sl) m1 + (Csharpminor.outcome_block out). +Proof. + intros; red; intros. monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ EQ MINJ MATCH) + as [f2 [te2 [tm2 [tout1 [EVAL1 [OINJ1 [MINJ2 [INCR2 MATCH2]]]]]]]]. + exists f2; exists te2; exists tm2; exists (outcome_block tout1). + intuition. subst ts; eapply exec_Sblock; eauto. + inversion OINJ1; subst out tout1; simpl. + constructor. + destruct n; constructor. + constructor. + constructor; auto. +Qed. + +Lemma transl_stmt_Sexit_correct: + forall (e : Csharpminor.env) (m : mem) (n : nat), + exec_stmt_prop e m (Csharpminor.Sexit n) m (Csharpminor.Out_exit n). +Proof. + intros; red; intros. monadInv TR. + exists f1; exists te1; exists tm1; exists (Out_exit n). + intuition. subst ts; constructor. constructor. +Qed. + +Lemma transl_stmt_Sreturn_none_correct: + forall (e : Csharpminor.env) (m : mem), + exec_stmt_prop e m (Csharpminor.Sreturn None) m + (Csharpminor.Out_return None). +Proof. + intros; red; intros. monadInv TR. + exists f1; exists te1; exists tm1; exists (Out_return None). + intuition. subst ts; constructor. constructor. +Qed. + +Lemma transl_stmt_Sreturn_some_correct: + forall (e : Csharpminor.env) (m : mem) (a : Csharpminor.expr) + (m1 : mem) (v : val), + Csharpminor.eval_expr ge nil e m a m1 v -> + eval_expr_prop nil e m a m1 v -> + exec_stmt_prop e m (Csharpminor.Sreturn (Some a)) m1 + (Csharpminor.Out_return (Some v)). +Proof. + intros; red; intros; monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ _ EQ (val_nil_inject f1) MINJ MATCH) + as [f2 [te2 [tm2 [tv1 [EVAL [VINJ1 [MINJ2 [INCR MATCH2]]]]]]]]. + exists f2; exists te2; exists tm2; exists (Out_return (Some tv1)). + intuition. subst ts; econstructor; eauto. constructor; auto. +Qed. + +Lemma transl_stmtlist_Snil_correct: + forall (e : Csharpminor.env) (m : mem), + exec_stmtlist_prop e m Csharpminor.Snil m Csharpminor.Out_normal. +Proof. + intros; red; intros; monadInv TR. + exists f1; exists te1; exists tm1; exists Out_normal. + intuition. subst ts; constructor. constructor. +Qed. + +Lemma transl_stmtlist_Scons_2_correct: + forall (e : Csharpminor.env) (m : mem) (s : Csharpminor.stmt) + (sl : Csharpminor.stmtlist) (m1 m2 : mem) (out : Csharpminor.outcome), + Csharpminor.exec_stmt ge e m s m1 Csharpminor.Out_normal -> + exec_stmt_prop e m s m1 Csharpminor.Out_normal -> + Csharpminor.exec_stmtlist ge e m1 sl m2 out -> + exec_stmtlist_prop e m1 sl m2 out -> + exec_stmtlist_prop e m (Csharpminor.Scons s sl) m2 out. +Proof. + intros; red; intros; monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ EQ MINJ MATCH) + as [f2 [te2 [tm2 [tout1 [EXEC1 [OINJ1 [MINJ2 [INCR2 MATCH2]]]]]]]]. + destruct (H2 _ _ _ _ _ _ _ _ _ EQ0 MINJ2 MATCH2) + as [f3 [te3 [tm3 [tout2 [EXEC2 [OINJ2 [MINJ3 [INCR3 MATCH3]]]]]]]]. + exists f3; exists te3; exists tm3; exists tout2. + intuition. subst ts; eapply exec_Scons_continue; eauto. + inversion OINJ1. subst tout1. auto. + eapply inject_incr_trans; eauto. +Qed. + +Lemma transl_stmtlist_Scons_1_correct: + forall (e : Csharpminor.env) (m : mem) (s : Csharpminor.stmt) + (sl : Csharpminor.stmtlist) (m1 : mem) (out : Csharpminor.outcome), + Csharpminor.exec_stmt ge e m s m1 out -> + exec_stmt_prop e m s m1 out -> + out <> Csharpminor.Out_normal -> + exec_stmtlist_prop e m (Csharpminor.Scons s sl) m1 out. +Proof. + intros; red; intros; monadInv TR. + destruct (H0 _ _ _ _ _ _ _ _ _ EQ MINJ MATCH) + as [f2 [te2 [tm2 [tout1 [EXEC1 [OINJ1 [MINJ2 [INCR2 MATCH2]]]]]]]]. + exists f2; exists te2; exists tm2; exists tout1. + intuition. subst ts; eapply exec_Scons_stop; eauto. + inversion OINJ1; subst out tout1; congruence. +Qed. + +(** We conclude by an induction over the structure of the Csharpminor + evaluation derivation, using the lemmas above for each case. *) + +Lemma transl_function_correct: + forall m1 f vargs m2 vres, + Csharpminor.eval_funcall ge m1 f vargs m2 vres -> + eval_funcall_prop m1 f vargs m2 vres. +Proof + (eval_funcall_ind5 ge + eval_expr_prop + eval_exprlist_prop + eval_funcall_prop + exec_stmt_prop + exec_stmtlist_prop + + transl_expr_Evar_correct + transl_expr_Eassign_correct + transl_expr_Eaddrof_local_correct + transl_expr_Eaddrof_global_correct + transl_expr_Eop_correct + transl_expr_Eload_correct + transl_expr_Estore_correct + transl_expr_Ecall_correct + transl_expr_Econdition_true_correct + transl_expr_Econdition_false_correct + transl_expr_Elet_correct + transl_expr_Eletvar_correct + transl_exprlist_Enil_correct + transl_exprlist_Econs_correct + transl_funcall_correct + transl_stmt_Sexpr_correct + transl_stmt_Sifthenelse_true_correct + transl_stmt_Sifthenelse_false_correct + transl_stmt_Sloop_loop_correct + transl_stmt_Sloop_exit_correct + transl_stmt_Sblock_correct + transl_stmt_Sexit_correct + transl_stmt_Sreturn_none_correct + transl_stmt_Sreturn_some_correct + transl_stmtlist_Snil_correct + transl_stmtlist_Scons_2_correct + transl_stmtlist_Scons_1_correct). + +(** The [match_globalenvs] relation holds for the global environments + of the source program and the transformed program. *) + +Lemma match_globalenvs_init: + let m := Genv.init_mem prog in + let tm := Genv.init_mem tprog in + let f := fun b => if zlt b m.(nextblock) then Some(b, 0) else None in + match_globalenvs f. +Proof. + intros. constructor. + (* symbols *) + intros. split. + unfold f. rewrite zlt_true. auto. unfold m. + eapply Genv.find_symbol_inv; eauto. + rewrite <- H. apply symbols_preserved. + intros. unfold f. rewrite zlt_true. auto. + generalize (nextblock_pos m). omega. +Qed. + +(** The correctness of the translation of a whole Csharpminor program + follows. *) + +Theorem transl_program_correct: + forall n, + Csharpminor.exec_program prog (Vint n) -> + exec_program tprog (Vint n). +Proof. + intros n [b [fn [m [FINDS [FINDF [SIG EVAL]]]]]]. + elim (function_ptr_translated _ _ FINDF). intros tfn [TFIND TR]. + set (m0 := Genv.init_mem prog) in *. + set (f := fun b => if zlt b m0.(nextblock) then Some(b, 0) else None). + assert (MINJ0: mem_inject f m0 m0). + unfold f; constructor; intros. + apply zlt_false; auto. + destruct (zlt b0 (nextblock m0)); try discriminate. + inversion H; subst b' delta. + split. auto. + constructor. compute. split; congruence. left; auto. + intros; omega. + generalize (Genv.initmem_undef prog b0). fold m0. intros [lo [hi EQ]]. + rewrite EQ. simpl. constructor. + destruct (zlt b1 (nextblock m0)); try discriminate. + destruct (zlt b2 (nextblock m0)); try discriminate. + left; congruence. + assert (MATCH0: match_callstack f nil m0.(nextblock) m0.(nextblock) m0). + constructor. unfold f; apply match_globalenvs_init. + fold ge in EVAL. + destruct (transl_function_correct _ _ _ _ _ EVAL + _ _ _ _ _ TR MINJ0 MATCH0 (val_nil_inject f)) + as [f1 [tm1 [tres [TEVAL [VINJ [MINJ1 [INCR MATCH1]]]]]]]. + exists b; exists tfn; exists tm1. + split. fold tge. rewrite <- FINDS. + replace (prog_main prog) with (prog_main tprog). fold ge. apply symbols_preserved. + apply transform_partial_program_main with transl_function. assumption. + split. assumption. + split. rewrite <- SIG. apply sig_transl_function; auto. + rewrite (Genv.init_mem_transf_partial transl_function _ TRANSL). + inversion VINJ; subst tres. assumption. +Qed. + +End TRANSLATION. diff --git a/backend/Coloring.v b/backend/Coloring.v new file mode 100644 index 00000000..1a34a124 --- /dev/null +++ b/backend/Coloring.v @@ -0,0 +1,267 @@ +(** Construction and coloring of the interference graph. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import RTLtyping. +Require Import Locations. +Require Import Conventions. +Require Import InterfGraph. + +(** * Construction of the interference graph *) + +(** Two registers interfere if there exists a program point where + they are both simultaneously live, and it is possible that they + contain different values at this program point. Consequently, + two registers that do not interfere can be merged into one register + while preserving the program behavior: there is no program point + where this merged register would have to hold two different values + (for the two original registers), so to speak. + + The simplified algorithm for constructing the interference graph + from the results of the liveness analysis is as follows: +<< + start with empty interference graph + for each parameter p and register r live at the function entry point: + add conflict edge p <-> r + for each instruction I in function: + let L be the live registers "after" I + if I is a "move" instruction dst <- src, and dst is live: + add conflict edges dst <-> r for each r in L \ {dst, src} + else if I is an instruction with result dst, and dst is live: + add conflict edges dst <-> r for each r in L \ {dst}; + if I is a "call" instruction dst <- f(args), + add conflict edges between all pseudo-registers in L \ {dst} + and all caller-save machine registers + done +>> + Notice that edges are added only when a register becomes live. + A register becomes live either if it is the result of an operation + (and is live afterwards), or if we are at the function entrance + and the register is a function parameter. For two registers to + be simultaneously live at some program point, it must be the case + that one becomes live at a point where the other is already live. + Hence, it suffices to add interference edges between registers + that become live at some instruction and registers that are already + live at this instruction. + + Notice also the special treatment of ``move'' instructions: + since the destination register of the ``move'' is assigned the same value + as the source register, it is semantically correct to assign + the destination and the source registers to the same register, + even if the source register remains live afterwards. + (This is even desirable, since the ``move'' instruction can then + be eliminated.) Thus, no interference is added between the + source and the destination of a ``move'' instruction. + + Finally, for ``call'' instructions, we must make sure that + pseudo-registers live across the instruction are allocated to + callee-save machine register or to stack slots, but never to + caller-save machine registers (these lose their values across + the call). We therefore add the corresponding conflict edges + between pseudo-registers live across and caller-save machine + registers (pairwise). + + The full algorithm is similar to the simplified algorithm above, + but records preference edges in addition to conflict edges. + Preference edges guide the graph coloring algorithm by telling it + that better code will be obtained eventually if it is possible + to allocate certain pseudo-registers to the same location or to + a given machine register. Preference edges are added: +- between the destination and source pseudo-registers of a ``move'' + instruction; +- between the arguments of a ``call'' instruction and the locations + of the arguments as dictated by the calling conventions; +- between the result of a ``call'' instruction and the location + of the result as dictated by the calling conventions. +*) + +Definition add_interf_live + (filter: reg -> bool) (res: reg) (live: Regset.t) (g: graph): graph := + Regset.fold + (fun g r => if filter r then add_interf r res g else g) live g. + +Definition add_interf_op + (res: reg) (live: Regset.t) (g: graph): graph := + add_interf_live + (fun r => if Reg.eq r res then false else true) + res live g. + +Definition add_interf_move + (arg res: reg) (live: Regset.t) (g: graph): graph := + add_interf_live + (fun r => + if Reg.eq r res then false else + if Reg.eq r arg then false else true) + res live g. + +Definition add_interf_call + (live: Regset.t) (destroyed: list mreg) (g: graph): graph := + List.fold_left + (fun g mr => Regset.fold (fun g r => add_interf_mreg r mr g) live g) + destroyed g. + +Fixpoint add_prefs_call + (args: list reg) (locs: list loc) (g: graph) {struct args} : graph := + match args, locs with + | a1 :: al, l1 :: ll => + add_prefs_call al ll + (match l1 with R mr => add_pref_mreg a1 mr g | _ => g end) + | _, _ => g + end. + +Definition add_interf_entry + (params: list reg) (live: Regset.t) (g: graph): graph := + List.fold_left (fun g r => add_interf_op r live g) params g. + +Fixpoint add_interf_params + (params: list reg) (g: graph) {struct params}: graph := + match params with + | nil => g + | p1 :: pl => + add_interf_params pl + (List.fold_left + (fun g r => if Reg.eq r p1 then g else add_interf r p1 g) + pl g) + end. + +Definition add_edges_instr + (sig: signature) (i: instruction) (live: Regset.t) (g: graph) : graph := + match i with + | Iop op args res s => + if Regset.mem res live then + match is_move_operation op args with + | Some arg => + add_pref arg res (add_interf_move arg res live g) + | None => + add_interf_op res live g + end + else g + | Iload chunk addr args dst s => + if Regset.mem dst live + then add_interf_op dst live g + else g + | Icall sig ros args res s => + add_prefs_call args (loc_arguments sig) + (add_pref_mreg res (loc_result sig) + (add_interf_op res live + (add_interf_call + (Regset.remove res live) destroyed_at_call_regs g))) + | Ireturn (Some r) => + add_pref_mreg r (loc_result sig) g + | _ => g + end. + +Definition add_edges_instrs (f: function) (live: PMap.t Regset.t) : graph := + PTree.fold + (fun g pc i => add_edges_instr f.(fn_sig) i live!!pc g) + f.(fn_code) + empty_graph. + +Definition interf_graph (f: function) (live: PMap.t Regset.t) (live0: Regset.t) := + add_prefs_call f.(fn_params) (loc_parameters f.(fn_sig)) + (add_interf_params f.(fn_params) + (add_interf_entry f.(fn_params) live0 + (add_edges_instrs f live))). + +(** * Graph coloring *) + +(** The actual coloring of the graph is performed by a function written + directly in Caml, and not proved correct in any way. This function + takes as argument the [RTL] function, the interference graph for + this function, an assignment of types to [RTL] pseudo-registers, + and the set of all [RTL] pseudo-registers mentioned in the + interference graph. It returns the coloring as a function from + pseudo-registers to locations. *) + +Parameter graph_coloring: + function -> graph -> regenv -> Regset.t -> (reg -> loc). + +(** To ensure that the result of [graph_coloring] is a correct coloring, + we check a posteriori its result using the following Coq functions. + Let [coloring] be the function [reg -> loc] returned by [graph_coloring]. + The three properties checked are: +- [coloring r1 <> coloring r2] if there is a conflict edge between + [r1] and [r2] in the interference graph. +- [coloring r1 <> R m2] if there is a conflict edge between pseudo-register + [r1] and machine register [m2] in the interference graph. +- For all [r] mentioned in the interference graph, + the location [coloring r] is acceptable and has the same type as [r]. +*) + +Definition check_coloring_1 (g: graph) (coloring: reg -> loc) := + SetRegReg.for_all + (fun r1r2 => + if Loc.eq (coloring (fst r1r2)) (coloring (snd r1r2)) then false else true) + g.(interf_reg_reg). + +Definition check_coloring_2 (g: graph) (coloring: reg -> loc) := + SetRegMreg.for_all + (fun r1mr2 => + if Loc.eq (coloring (fst r1mr2)) (R (snd r1mr2)) then false else true) + g.(interf_reg_mreg). + +Definition same_typ (t1 t2: typ) := + match t1, t2 with + | Tint, Tint => true + | Tfloat, Tfloat => true + | _, _ => false + end. + +Definition loc_is_acceptable (l: loc) := + match l with + | R r => + if In_dec Loc.eq l temporaries then false else true + | S (Local ofs ty) => + if zlt ofs 0 then false else true + | _ => + false + end. + +Definition check_coloring_3 (rs: Regset.t) (env: regenv) (coloring: reg -> loc) := + Regset.for_all + (fun r => + let l := coloring r in + andb (loc_is_acceptable l) (same_typ (env r) (Loc.type l))) + rs. + +Definition check_coloring + (g: graph) (env: regenv) (rs: Regset.t) (coloring: reg -> loc) := + andb (check_coloring_1 g coloring) + (andb (check_coloring_2 g coloring) + (check_coloring_3 rs env coloring)). + +(** To preserve decidability of checking, the checks + (especially the third one) are performed for the pseudo-registers + mentioned in the interference graph. To facilitate the proofs, + it is convenient to ensure that the properties hold for all + pseudo-registers. To this end, we ``clip'' the candidate coloring + returned by [graph_coloring]: the final coloring behave identically + over pseudo-registers mentioned in the interference graph, + but returns a dummy machine register of the correct type otherwise. *) + +Definition alloc_of_coloring (coloring: reg -> loc) (env: regenv) (rs: Regset.t) := + fun r => + if Regset.mem r rs + then coloring r + else match env r with Tint => R R3 | Tfloat => R F1 end. + +(** * Coloring of the interference graph *) + +(** The following function combines the phases described above: + construction of the interference graph, coloring by untrusted + Caml code, checking of the candidate coloring returned, + and adjustment of this coloring. If the coloring candidate is + incorrect, [None] is returned, causing register allocation to fail. *) + +Definition regalloc + (f: function) (live: PMap.t Regset.t) (live0: Regset.t) (env: regenv) := + let g := interf_graph f live live0 in + let rs := all_interf_regs g in + let coloring := graph_coloring f g env rs in + if check_coloring g env rs coloring + then Some (alloc_of_coloring coloring env rs) + else None. diff --git a/backend/Coloringproof.v b/backend/Coloringproof.v new file mode 100644 index 00000000..39b208ec --- /dev/null +++ b/backend/Coloringproof.v @@ -0,0 +1,845 @@ +(** Correctness of graph coloring. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import RTLtyping. +Require Import Locations. +Require Import Conventions. +Require Import InterfGraph. +Require Import Coloring. + +(** * Correctness of the interference graph *) + +(** We show that the interference graph built by [interf_graph] + is correct in the sense that it contains all conflict edges + that we need. + + Many boring lemmas on the auxiliary functions used to construct + the interference graph follow. The lemmas are of two kinds: + the ``increasing'' lemmas show that the auxiliary functions only add + edges to the interference graph, but do not remove existing edges; + and the ``correct'' lemmas show that the auxiliary functions + correctly add the edges that we'd like them to add. *) + +Lemma graph_incl_refl: + forall g, graph_incl g g. +Proof. + intros; split; auto. +Qed. + +Lemma add_interf_live_incl_aux: + forall (filter: reg -> bool) res live g, + graph_incl g + (List.fold_left + (fun g r => if filter r then add_interf r res g else g) + live g). +Proof. + induction live; simpl; intros. + apply graph_incl_refl. + apply graph_incl_trans with (if filter a then add_interf a res g else g). + case (filter a). + apply add_interf_incl. + apply graph_incl_refl. + apply IHlive. +Qed. + +Lemma add_interf_live_incl: + forall (filter: reg -> bool) res live g, + graph_incl g (add_interf_live filter res live g). +Proof. + intros. unfold add_interf_live. rewrite Regset.fold_spec. + apply add_interf_live_incl_aux. +Qed. + +Lemma add_interf_live_correct_aux: + forall filter res live g r, + In r live -> filter r = true -> + interfere r res + (List.fold_left + (fun g r => if filter r then add_interf r res g else g) + live g). +Proof. + induction live; simpl; intros. + contradiction. + elim H; intros. + subst a. rewrite H0. + generalize (add_interf_live_incl_aux filter res live (add_interf r res g)). + intros [A B]. + apply A. apply add_interf_correct. + apply IHlive; auto. +Qed. + +Lemma add_interf_live_correct: + forall filter res live g r, + Regset.mem r live = true -> + filter r = true -> + interfere r res (add_interf_live filter res live g). +Proof. + intros. unfold add_interf_live. rewrite Regset.fold_spec. + apply add_interf_live_correct_aux; auto. + apply Regset.elements_correct. auto. +Qed. + +Lemma add_interf_op_incl: + forall res live g, + graph_incl g (add_interf_op res live g). +Proof. + intros; unfold add_interf_op. apply add_interf_live_incl. +Qed. + +Lemma add_interf_op_correct: + forall res live g r, + Regset.mem r live = true -> + r <> res -> + interfere r res (add_interf_op res live g). +Proof. + intros. unfold add_interf_op. + apply add_interf_live_correct. + auto. + case (Reg.eq r res); intro. + contradiction. auto. +Qed. + +Lemma add_interf_move_incl: + forall arg res live g, + graph_incl g (add_interf_move arg res live g). +Proof. + intros; unfold add_interf_move. apply add_interf_live_incl. +Qed. + +Lemma add_interf_move_correct: + forall arg res live g r, + Regset.mem r live = true -> + r <> arg -> r <> res -> + interfere r res (add_interf_move arg res live g). +Proof. + intros. unfold add_interf_move. + apply add_interf_live_correct. + auto. + case (Reg.eq r res); intro. + contradiction. + case (Reg.eq r arg); intro. + contradiction. + auto. +Qed. + +Lemma add_interf_call_incl_aux_1: + forall mr live g, + graph_incl g + (List.fold_left (fun g r => add_interf_mreg r mr g) live g). +Proof. + induction live; simpl; intros. + apply graph_incl_refl. + apply graph_incl_trans with (add_interf_mreg a mr g). + apply add_interf_mreg_incl. + auto. +Qed. + +Lemma add_interf_call_incl_aux_2: + forall mr live g, + graph_incl g + (Regset.fold (fun g r => add_interf_mreg r mr g) live g). +Proof. + intros. rewrite Regset.fold_spec. apply add_interf_call_incl_aux_1. +Qed. + +Lemma add_interf_call_incl: + forall live destroyed g, + graph_incl g (add_interf_call live destroyed g). +Proof. + induction destroyed; simpl; intros. + apply graph_incl_refl. + eapply graph_incl_trans; [idtac|apply IHdestroyed]. + apply add_interf_call_incl_aux_2. +Qed. + +Lemma interfere_incl: + forall r1 r2 g1 g2, + graph_incl g1 g2 -> + interfere r1 r2 g1 -> + interfere r1 r2 g2. +Proof. + unfold graph_incl; intros. elim H; auto. +Qed. + +Lemma interfere_mreg_incl: + forall r1 r2 g1 g2, + graph_incl g1 g2 -> + interfere_mreg r1 r2 g1 -> + interfere_mreg r1 r2 g2. +Proof. + unfold graph_incl; intros. elim H; auto. +Qed. + +Lemma add_interf_call_correct_aux_1: + forall mr live g r, + In r live -> + interfere_mreg r mr + (List.fold_left (fun g r => add_interf_mreg r mr g) live g). +Proof. + induction live; simpl; intros. + elim H. + elim H; intros. + subst a. eapply interfere_mreg_incl. + apply add_interf_call_incl_aux_1. + apply add_interf_mreg_correct. + apply IHlive; auto. +Qed. + +Lemma add_interf_call_correct_aux_2: + forall mr live g r, + Regset.mem r live = true -> + interfere_mreg r mr + (Regset.fold (fun g r => add_interf_mreg r mr g) live g). +Proof. + intros. rewrite Regset.fold_spec. apply add_interf_call_correct_aux_1. + apply Regset.elements_correct; auto. +Qed. + +Lemma add_interf_call_correct: + forall live destroyed g r mr, + Regset.mem r live = true -> + In mr destroyed -> + interfere_mreg r mr (add_interf_call live destroyed g). +Proof. + induction destroyed; simpl; intros. + elim H0. + elim H0; intros. + subst a. eapply interfere_mreg_incl. + apply add_interf_call_incl. + apply add_interf_call_correct_aux_2; auto. + apply IHdestroyed; auto. +Qed. + +Lemma add_prefs_call_incl: + forall args locs g, + graph_incl g (add_prefs_call args locs g). +Proof. + induction args; destruct locs; simpl; intros; + try apply graph_incl_refl. + destruct l. + eapply graph_incl_trans; [idtac|eauto]. + apply add_pref_mreg_incl. + auto. +Qed. + +Lemma add_interf_entry_incl: + forall params live g, + graph_incl g (add_interf_entry params live g). +Proof. + unfold add_interf_entry; induction params; simpl; intros. + apply graph_incl_refl. + eapply graph_incl_trans; [idtac|eauto]. + apply add_interf_op_incl. +Qed. + +Lemma add_interf_entry_correct: + forall params live g r1 r2, + In r1 params -> + Regset.mem r2 live = true -> + r1 <> r2 -> + interfere r1 r2 (add_interf_entry params live g). +Proof. + unfold add_interf_entry; induction params; simpl; intros. + elim H. + elim H; intro. + subst a. apply interfere_incl with (add_interf_op r1 live g). + exact (add_interf_entry_incl _ _ _). + apply interfere_sym. apply add_interf_op_correct; auto. + auto. +Qed. + +Lemma add_interf_params_incl_aux: + forall p1 pl g, + graph_incl g + (List.fold_left + (fun g r => if Reg.eq r p1 then g else add_interf r p1 g) + pl g). +Proof. + induction pl; simpl; intros. + apply graph_incl_refl. + eapply graph_incl_trans; [idtac|eauto]. + case (Reg.eq a p1); intro. + apply graph_incl_refl. apply add_interf_incl. +Qed. + +Lemma add_interf_params_incl: + forall pl g, + graph_incl g (add_interf_params pl g). +Proof. + induction pl; simpl; intros. + apply graph_incl_refl. + eapply graph_incl_trans; [idtac|eauto]. + apply add_interf_params_incl_aux. +Qed. + +Lemma add_interf_params_correct_aux: + forall p1 pl g p2, + In p2 pl -> + p1 <> p2 -> + interfere p1 p2 + (List.fold_left + (fun g r => if Reg.eq r p1 then g else add_interf r p1 g) + pl g). +Proof. + induction pl; simpl; intros. + elim H. + elim H; intro; clear H. + subst a. apply interfere_sym. eapply interfere_incl. + apply add_interf_params_incl_aux. + case (Reg.eq p2 p1); intro. + congruence. apply add_interf_correct. + auto. +Qed. + +Lemma add_interf_params_correct: + forall pl g r1 r2, + In r1 pl -> In r2 pl -> r1 <> r2 -> + interfere r1 r2 (add_interf_params pl g). +Proof. + induction pl; simpl; intros. + elim H. + elim H; intro; clear H; elim H0; intro; clear H0. + congruence. + subst a. eapply interfere_incl. apply add_interf_params_incl. + apply add_interf_params_correct_aux; auto. + subst a. apply interfere_sym. + eapply interfere_incl. apply add_interf_params_incl. + apply add_interf_params_correct_aux; auto. + auto. +Qed. + +Lemma add_edges_instr_incl: + forall sig instr live g, + graph_incl g (add_edges_instr sig instr live g). +Proof. + intros. destruct instr; unfold add_edges_instr; + try apply graph_incl_refl. + case (Regset.mem r live). + destruct (is_move_operation o l). + eapply graph_incl_trans; [idtac|apply add_pref_incl]. + apply add_interf_move_incl. + apply add_interf_op_incl. + apply graph_incl_refl. + case (Regset.mem r live). + apply add_interf_op_incl. + apply graph_incl_refl. + eapply graph_incl_trans; [idtac|apply add_prefs_call_incl]. + eapply graph_incl_trans; [idtac|apply add_pref_mreg_incl]. + eapply graph_incl_trans; [idtac|apply add_interf_op_incl]. + apply add_interf_call_incl. + destruct o. + apply add_pref_mreg_incl. + apply graph_incl_refl. +Qed. + +(** The proposition below states that graph [g] contains + all the conflict edges expected for instruction [instr]. *) + +Definition correct_interf_instr + (live: Regset.t) (instr: instruction) (g: graph) : Prop := + match instr with + | Iop op args res s => + match is_move_operation op args with + | Some arg => + forall r, + Regset.mem res live = true -> + Regset.mem r live = true -> + r <> res -> r <> arg -> interfere r res g + | None => + forall r, + Regset.mem res live = true -> + Regset.mem r live = true -> + r <> res -> interfere r res g + end + | Iload chunk addr args res s => + forall r, + Regset.mem res live = true -> + Regset.mem r live = true -> + r <> res -> interfere r res g + | Icall sig ros args res s => + (forall r mr, + Regset.mem r live = true -> + In mr destroyed_at_call_regs -> + r <> res -> + interfere_mreg r mr g) + /\ (forall r, + Regset.mem r live = true -> + r <> res -> interfere r res g) + | _ => + True + end. + +Lemma correct_interf_instr_incl: + forall live instr g1 g2, + graph_incl g1 g2 -> + correct_interf_instr live instr g1 -> + correct_interf_instr live instr g2. +Proof. + intros until g2. intro. + unfold correct_interf_instr; destruct instr; auto. + destruct (is_move_operation o l). + intros. eapply interfere_incl; eauto. + intros. eapply interfere_incl; eauto. + intros. eapply interfere_incl; eauto. + intros [A B]. split. + intros. eapply interfere_mreg_incl; eauto. + intros. eapply interfere_incl; eauto. +Qed. + +Lemma add_edges_instr_correct: + forall sig instr live g, + correct_interf_instr live instr (add_edges_instr sig instr live g). +Proof. + intros. + destruct instr; unfold add_edges_instr; unfold correct_interf_instr; auto. + destruct (is_move_operation o l); intros. + rewrite H. eapply interfere_incl. + apply add_pref_incl. apply add_interf_move_correct; auto. + rewrite H. apply add_interf_op_correct; auto. + + intros. rewrite H. apply add_interf_op_correct; auto. + + split; intros. + apply interfere_mreg_incl with + (add_interf_call (Regset.remove r live) destroyed_at_call_regs g). + eapply graph_incl_trans; [idtac|apply add_prefs_call_incl]. + eapply graph_incl_trans; [idtac|apply add_pref_mreg_incl]. + apply add_interf_op_incl. + apply add_interf_call_correct; auto. + rewrite Regset.mem_remove_other; auto. + + eapply interfere_incl. + eapply graph_incl_trans; [idtac|apply add_prefs_call_incl]. + apply add_pref_mreg_incl. + apply add_interf_op_correct; auto. +Qed. + +Lemma add_edges_instrs_incl_aux: + forall sig live instrs g, + graph_incl g + (fold_left + (fun (a : graph) (p : positive * instruction) => + add_edges_instr sig (snd p) live !! (fst p) a) + instrs g). +Proof. + induction instrs; simpl; intros. + apply graph_incl_refl. + eapply graph_incl_trans; [idtac|eauto]. + apply add_edges_instr_incl. +Qed. + +Lemma add_edges_instrs_correct_aux: + forall sig live instrs g pc i, + In (pc, i) instrs -> + correct_interf_instr live!!pc i + (fold_left + (fun (a : graph) (p : positive * instruction) => + add_edges_instr sig (snd p) live !! (fst p) a) + instrs g). +Proof. + induction instrs; simpl; intros. + elim H. + elim H; intro. + subst a; simpl. eapply correct_interf_instr_incl. + apply add_edges_instrs_incl_aux. + apply add_edges_instr_correct. + auto. +Qed. + +Lemma add_edges_instrs_correct: + forall f live pc i, + f.(fn_code)!pc = Some i -> + correct_interf_instr live!!pc i (add_edges_instrs f live). +Proof. + intros. + unfold add_edges_instrs. + rewrite PTree.fold_spec. + apply add_edges_instrs_correct_aux. + apply PTree.elements_correct. auto. +Qed. + +(** Here are the three correctness properties of the generated + inference graph. First, it contains the conflict edges + needed by every instruction of the function. *) + +Lemma interf_graph_correct_1: + forall f live live0 pc i, + f.(fn_code)!pc = Some i -> + correct_interf_instr live!!pc i (interf_graph f live live0). +Proof. + intros. unfold interf_graph. + apply correct_interf_instr_incl with (add_edges_instrs f live). + eapply graph_incl_trans; [idtac|apply add_prefs_call_incl]. + eapply graph_incl_trans; [idtac|apply add_interf_params_incl]. + apply add_interf_entry_incl. + apply add_edges_instrs_correct; auto. +Qed. + +(** Second, function parameters conflict pairwise. *) + +Lemma interf_graph_correct_2: + forall f live live0 r1 r2, + In r1 f.(fn_params) -> + In r2 f.(fn_params) -> + r1 <> r2 -> + interfere r1 r2 (interf_graph f live live0). +Proof. + intros. unfold interf_graph. + eapply interfere_incl. + apply add_prefs_call_incl. + apply add_interf_params_correct; auto. +Qed. + +(** Third, function parameters conflict pairwise with pseudo-registers + live at function entry. If the function never uses a pseudo-register + before it is defined, pseudo-registers live at function entry + are a subset of the function parameters and therefore this condition + is implied by [interf_graph_correct_3]. However, we prefer not + to make this assumption. *) + +Lemma interf_graph_correct_3: + forall f live live0 r1 r2, + In r1 f.(fn_params) -> + Regset.mem r2 live0 = true -> + r1 <> r2 -> + interfere r1 r2 (interf_graph f live live0). +Proof. + intros. unfold interf_graph. + eapply interfere_incl. + eapply graph_incl_trans; [idtac|apply add_prefs_call_incl]. + apply add_interf_params_incl. + apply add_interf_entry_correct; auto. +Qed. + +(** * Correctness of the a priori checks over the result of graph coloring *) + +(** We now show that the checks performed over the candidate coloring + returned by [graph_coloring] are correct: candidate colorings that + pass these checks are indeed correct colorings. *) + +Section CORRECT_COLORING. + +Variable g: graph. +Variable env: regenv. +Variable allregs: Regset.t. +Variable coloring: reg -> loc. + +Lemma check_coloring_1_correct: + forall r1 r2, + check_coloring_1 g coloring = true -> + SetRegReg.In (r1, r2) g.(interf_reg_reg) -> + coloring r1 <> coloring r2. +Proof. + unfold check_coloring_1. intros. + assert (FSetInterface.compat_bool OrderedRegReg.eq + (fun r1r2 => if Loc.eq (coloring (fst r1r2)) (coloring (snd r1r2)) + then false else true)). + red. unfold OrderedRegReg.eq. unfold OrderedReg.eq. + intros x y [EQ1 EQ2]. rewrite EQ1; rewrite EQ2; auto. + generalize (SetRegReg.for_all_2 H1 H H0). + simpl. case (Loc.eq (coloring r1) (coloring r2)); intro. + intro; discriminate. auto. +Qed. + +Lemma check_coloring_2_correct: + forall r1 mr2, + check_coloring_2 g coloring = true -> + SetRegMreg.In (r1, mr2) g.(interf_reg_mreg) -> + coloring r1 <> R mr2. +Proof. + unfold check_coloring_2. intros. + assert (FSetInterface.compat_bool OrderedRegMreg.eq + (fun r1r2 => if Loc.eq (coloring (fst r1r2)) (R (snd r1r2)) + then false else true)). + red. unfold OrderedRegMreg.eq. unfold OrderedReg.eq. + intros x y [EQ1 EQ2]. rewrite EQ1; rewrite EQ2; auto. + generalize (SetRegMreg.for_all_2 H1 H H0). + simpl. case (Loc.eq (coloring r1) (R mr2)); intro. + intro; discriminate. auto. +Qed. + +Lemma same_typ_correct: + forall t1 t2, same_typ t1 t2 = true -> t1 = t2. +Proof. + destruct t1; destruct t2; simpl; congruence. +Qed. + +Lemma loc_is_acceptable_correct: + forall l, loc_is_acceptable l = true -> loc_acceptable l. +Proof. + destruct l; unfold loc_is_acceptable, loc_acceptable. + case (In_dec Loc.eq (R m) temporaries); intro. + intro; discriminate. auto. + destruct s. + case (zlt z 0); intro. intro; discriminate. auto. + intro; discriminate. + intro; discriminate. +Qed. + +Lemma check_coloring_3_correct: + forall r, + check_coloring_3 allregs env coloring = true -> + Regset.mem r allregs = true -> + loc_acceptable (coloring r) /\ env r = Loc.type (coloring r). +Proof. + unfold check_coloring_3; intros. + generalize (Regset.for_all_spec _ H H0). intro. + elim (andb_prop _ _ H1); intros. + split. apply loc_is_acceptable_correct; auto. + apply same_typ_correct; auto. +Qed. + +End CORRECT_COLORING. + +(** * Correctness of clipping *) + +(** We then show the correctness of the ``clipped'' coloring + returned by [alloc_of_coloring] applied to a candidate coloring + that passes the a posteriori checks. *) + +Section ALLOC_OF_COLORING. + +Variable g: graph. +Variable env: regenv. +Let allregs := all_interf_regs g. +Variable coloring: reg -> loc. +Let alloc := alloc_of_coloring coloring env allregs. + +Lemma alloc_of_coloring_correct_1: + forall r1 r2, + check_coloring g env allregs coloring = true -> + SetRegReg.In (r1, r2) g.(interf_reg_reg) -> + alloc r1 <> alloc r2. +Proof. + unfold check_coloring, alloc, alloc_of_coloring; intros. + elim (andb_prop _ _ H); intros. + generalize (all_interf_regs_correct_1 _ _ _ H0). + intros [A B]. + unfold allregs. rewrite A; rewrite B. + eapply check_coloring_1_correct; eauto. +Qed. + +Lemma alloc_of_coloring_correct_2: + forall r1 mr2, + check_coloring g env allregs coloring = true -> + SetRegMreg.In (r1, mr2) g.(interf_reg_mreg) -> + alloc r1 <> R mr2. +Proof. + unfold check_coloring, alloc, alloc_of_coloring; intros. + elim (andb_prop _ _ H); intros. + elim (andb_prop _ _ H2); intros. + generalize (all_interf_regs_correct_2 _ _ _ H0). intros. + unfold allregs. rewrite H5. + eapply check_coloring_2_correct; eauto. +Qed. + +Lemma alloc_of_coloring_correct_3: + forall r, + check_coloring g env allregs coloring = true -> + loc_acceptable (alloc r). +Proof. + unfold check_coloring, alloc, alloc_of_coloring; intros. + elim (andb_prop _ _ H); intros. + elim (andb_prop _ _ H1); intros. + caseEq (Regset.mem r allregs); intro. + generalize (check_coloring_3_correct _ _ _ r H3 H4). tauto. + case (env r); simpl; intuition congruence. +Qed. + +Lemma alloc_of_coloring_correct_4: + forall r, + check_coloring g env allregs coloring = true -> + env r = Loc.type (alloc r). +Proof. + unfold check_coloring, alloc, alloc_of_coloring; intros. + elim (andb_prop _ _ H); intros. + elim (andb_prop _ _ H1); intros. + caseEq (Regset.mem r allregs); intro. + generalize (check_coloring_3_correct _ _ _ r H3 H4). tauto. + case (env r); reflexivity. +Qed. + +End ALLOC_OF_COLORING. + +(** * Correctness of the whole graph coloring algorithm *) + +(** Combining results from the previous sections, we now summarize + the correctness properties of the assignment (of locations to + registers) returned by [regalloc]. *) + +Definition correct_alloc_instr + (live: PMap.t Regset.t) (alloc: reg -> loc) + (pc: node) (instr: instruction) : Prop := + match instr with + | Iop op args res s => + match is_move_operation op args with + | Some arg => + forall r, + Regset.mem res live!!pc = true -> + Regset.mem r live!!pc = true -> + r <> res -> r <> arg -> alloc r <> alloc res + | None => + forall r, + Regset.mem res live!!pc = true -> + Regset.mem r live!!pc = true -> + r <> res -> alloc r <> alloc res + end + | Iload chunk addr args res s => + forall r, + Regset.mem res live!!pc = true -> + Regset.mem r live!!pc = true -> + r <> res -> alloc r <> alloc res + | Icall sig ros args res s => + (forall r, + Regset.mem r live!!pc = true -> + r <> res -> + ~(In (alloc r) Conventions.destroyed_at_call)) + /\ (forall r, + Regset.mem r live!!pc = true -> + r <> res -> alloc r <> alloc res) + | _ => + True + end. + +Section REGALLOC_PROPERTIES. + +Variable f: function. +Variable env: regenv. +Variable live: PMap.t Regset.t. +Variable live0: Regset.t. +Variable alloc: reg -> loc. + +Let g := interf_graph f live live0. +Let allregs := all_interf_regs g. +Let coloring := graph_coloring f g env allregs. + +Lemma regalloc_ok: + regalloc f live live0 env = Some alloc -> + check_coloring g env allregs coloring = true /\ + alloc = alloc_of_coloring coloring env allregs. +Proof. + unfold regalloc, coloring, allregs, g. + case (check_coloring (interf_graph f live live0) env). + intro EQ; injection EQ; intro; clear EQ. + split. auto. auto. + intro; discriminate. +Qed. + +Lemma regalloc_acceptable: + forall r, + regalloc f live live0 env = Some alloc -> + loc_acceptable (alloc r). +Proof. + intros. elim (regalloc_ok H); intros. + rewrite H1. unfold allregs. apply alloc_of_coloring_correct_3. + exact H0. +Qed. + +Lemma regsalloc_acceptable: + forall rl, + regalloc f live live0 env = Some alloc -> + locs_acceptable (List.map alloc rl). +Proof. + intros; red; intros. + elim (list_in_map_inv _ _ _ H0). intros r [EQ IN]. + subst l. apply regalloc_acceptable. auto. +Qed. + +Lemma regalloc_preserves_types: + forall r, + regalloc f live live0 env = Some alloc -> + Loc.type (alloc r) = env r. +Proof. + intros. elim (regalloc_ok H); intros. + rewrite H1. unfold allregs. symmetry. + apply alloc_of_coloring_correct_4. + exact H0. +Qed. + +Lemma correct_interf_alloc_instr: + forall pc instr, + (forall r1 r2, interfere r1 r2 g -> alloc r1 <> alloc r2) -> + (forall r1 mr2, interfere_mreg r1 mr2 g -> alloc r1 <> R mr2) -> + correct_interf_instr live!!pc instr g -> + correct_alloc_instr live alloc pc instr. +Proof. + intros pc instr ALL1 ALL2. + unfold correct_interf_instr, correct_alloc_instr; + destruct instr; auto. + destruct (is_move_operation o l); auto. + intros [A B]. split. + intros; red; intros. + unfold destroyed_at_call in H1. + generalize (list_in_map_inv R _ _ H1). + intros [mr [EQ IN]]. + generalize (A r0 mr H IN H0). intro. + generalize (ALL2 _ _ H2). contradiction. + auto. +Qed. + +Lemma regalloc_correct_1: + forall pc instr, + regalloc f live live0 env = Some alloc -> + f.(fn_code)!pc = Some instr -> + correct_alloc_instr live alloc pc instr. +Proof. + intros. elim (regalloc_ok H); intros. + apply correct_interf_alloc_instr. + intros. rewrite H2. unfold allregs. red in H3. + elim (ordered_pair_charact r1 r2); intro. + apply alloc_of_coloring_correct_1. auto. rewrite H4 in H3; auto. + apply sym_not_equal. + apply alloc_of_coloring_correct_1. auto. rewrite H4 in H3; auto. + intros. rewrite H2. unfold allregs. + apply alloc_of_coloring_correct_2. auto. exact H3. + unfold g. apply interf_graph_correct_1. auto. +Qed. + +Lemma regalloc_correct_2: + regalloc f live live0 env = Some alloc -> + list_norepet f.(fn_params) -> + list_norepet (List.map alloc f.(fn_params)). +Proof. + intros. elim (regalloc_ok H); intros. + apply list_map_norepet; auto. + intros. rewrite H2. unfold allregs. + elim (ordered_pair_charact x y); intro. + apply alloc_of_coloring_correct_1. auto. + change OrderedReg.t with reg. rewrite <- H6. + change (interfere x y g). unfold g. + apply interf_graph_correct_2; auto. + apply sym_not_equal. + apply alloc_of_coloring_correct_1. auto. + change OrderedReg.t with reg. rewrite <- H6. + change (interfere x y g). unfold g. + apply interf_graph_correct_2; auto. +Qed. + +Lemma regalloc_correct_3: + forall r1 r2, + regalloc f live live0 env = Some alloc -> + In r1 f.(fn_params) -> + Regset.mem r2 live0 = true -> + r1 <> r2 -> + alloc r1 <> alloc r2. +Proof. + intros. elim (regalloc_ok H); intros. + rewrite H4; unfold allregs. + elim (ordered_pair_charact r1 r2); intro. + apply alloc_of_coloring_correct_1. auto. + change OrderedReg.t with reg. rewrite <- H5. + change (interfere r1 r2 g). unfold g. + apply interf_graph_correct_3; auto. + apply sym_not_equal. + apply alloc_of_coloring_correct_1. auto. + change OrderedReg.t with reg. rewrite <- H5. + change (interfere r1 r2 g). unfold g. + apply interf_graph_correct_3; auto. +Qed. + +End REGALLOC_PROPERTIES. diff --git a/backend/Constprop.v b/backend/Constprop.v new file mode 100644 index 00000000..b1c5a2bb --- /dev/null +++ b/backend/Constprop.v @@ -0,0 +1,1032 @@ +(** Constant propagation over RTL. This is the first of the two + optimizations performed at RTL level. It proceeds by a standard + dataflow analysis and the corresponding code transformation. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Globalenvs. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import Lattice. +Require Import Kildall. + +(** * Static analysis *) + +(** To each pseudo-register at each program point, the static analysis + associates a compile-time approximation taken from the following set. *) + +Inductive approx : Set := + | Novalue: approx (** No value possible, code is unreachable. *) + | Unknown: approx (** All values are possible, + no compile-time information is available. *) + | I: int -> approx (** A known integer value. *) + | F: float -> approx (** A known floating-point value. *) + | S: ident -> int -> approx. + (** The value is the address of the given global + symbol plus the given integer offset. *) + +(** We equip this set of approximations with a semi-lattice structure. + The ordering is inclusion between the sets of values denoted by + the approximations. *) + +Module Approx <: SEMILATTICE_WITH_TOP. + Definition t := approx. + Lemma eq: forall (x y: t), {x=y} + {x<>y}. + Proof. + decide equality. + apply Int.eq_dec. + apply Float.eq_dec. + apply Int.eq_dec. + apply ident_eq. + Qed. + Definition ge (x y: t) : Prop := + x = Unknown \/ y = Novalue \/ x = y. + Lemma ge_refl: forall x, ge x x. + Proof. + unfold ge; tauto. + Qed. + Lemma ge_trans: forall x y z, ge x y -> ge y z -> ge x z. + Proof. + unfold ge; intuition congruence. + Qed. + Definition bot := Novalue. + Definition top := Unknown. + Lemma ge_bot: forall x, ge x bot. + Proof. + unfold ge, bot; tauto. + Qed. + Lemma ge_top: forall x, ge top x. + Proof. + unfold ge, bot; tauto. + Qed. + Definition lub (x y: t) : t := + if eq x y then x else + match x, y with + | Novalue, _ => y + | _, Novalue => x + | _, _ => Unknown + end. + Lemma lub_commut: forall x y, lub x y = lub y x. + Proof. + unfold lub; intros. + case (eq x y); case (eq y x); intros; try congruence. + destruct x; destruct y; auto. + Qed. + Lemma ge_lub_left: forall x y, ge (lub x y) x. + Proof. + unfold lub; intros. + case (eq x y); intro. + apply ge_refl. + destruct x; destruct y; unfold ge; tauto. + Qed. +End Approx. + +Module D := LPMap Approx. + +(** We now define the abstract interpretations of conditions and operators + over this set of approximations. For instance, the abstract interpretation + of the operator [Oaddf] applied to two expressions [a] and [b] is + [F(Float.add f g)] if [a] and [b] have static approximations [Vfloat f] + and [Vfloat g] respectively, and [Unknown] otherwise. + + The static approximations are defined by large pattern-matchings over + the approximations of the results. We write these matchings in the + indirect style described in file [Cmconstr] to avoid excessive + duplication of cases in proofs. *) + +(* +Definition eval_static_condition (cond: condition) (vl: list approx) := + match cond, vl with + | Ccomp c, I n1 :: I n2 :: nil => Some(Int.cmp c n1 n2) + | Ccompu c, I n1 :: I n2 :: nil => Some(Int.cmpu c n1 n2) + | Ccompimm c n, I n1 :: nil => Some(Int.cmp c n1 n) + | Ccompuimm c n, I n1 :: nil => Some(Int.cmpu c n1 n) + | Ccompf c, F n1 :: F n2 :: nil => Some(Float.cmp c n1 n2) + | Cnotcompf c, F n1 :: F n2 :: nil => Some(negb(Float.cmp c n1 n2)) + | Cmaskzero n, I n1 :: nil => Some(Int.eq (Int.and n1 n) Int.zero) + | Cmasknotzero n, n1::nil => Some(negb(Int.eq (Int.and n1 n) Int.zero)) + | _, _ => None + end. +*) + +Inductive eval_static_condition_cases: forall (cond: condition) (vl: list approx), Set := + | eval_static_condition_case1: + forall c n1 n2, + eval_static_condition_cases (Ccomp c) (I n1 :: I n2 :: nil) + | eval_static_condition_case2: + forall c n1 n2, + eval_static_condition_cases (Ccompu c) (I n1 :: I n2 :: nil) + | eval_static_condition_case3: + forall c n n1, + eval_static_condition_cases (Ccompimm c n) (I n1 :: nil) + | eval_static_condition_case4: + forall c n n1, + eval_static_condition_cases (Ccompuimm c n) (I n1 :: nil) + | eval_static_condition_case5: + forall c n1 n2, + eval_static_condition_cases (Ccompf c) (F n1 :: F n2 :: nil) + | eval_static_condition_case6: + forall c n1 n2, + eval_static_condition_cases (Cnotcompf c) (F n1 :: F n2 :: nil) + | eval_static_condition_case7: + forall n n1, + eval_static_condition_cases (Cmaskzero n) (I n1 :: nil) + | eval_static_condition_case8: + forall n n1, + eval_static_condition_cases (Cmasknotzero n) (I n1 :: nil) + | eval_static_condition_default: + forall (cond: condition) (vl: list approx), + eval_static_condition_cases cond vl. + +Definition eval_static_condition_match (cond: condition) (vl: list approx) := + match cond as z1, vl as z2 return eval_static_condition_cases z1 z2 with + | Ccomp c, I n1 :: I n2 :: nil => + eval_static_condition_case1 c n1 n2 + | Ccompu c, I n1 :: I n2 :: nil => + eval_static_condition_case2 c n1 n2 + | Ccompimm c n, I n1 :: nil => + eval_static_condition_case3 c n n1 + | Ccompuimm c n, I n1 :: nil => + eval_static_condition_case4 c n n1 + | Ccompf c, F n1 :: F n2 :: nil => + eval_static_condition_case5 c n1 n2 + | Cnotcompf c, F n1 :: F n2 :: nil => + eval_static_condition_case6 c n1 n2 + | Cmaskzero n, I n1 :: nil => + eval_static_condition_case7 n n1 + | Cmasknotzero n, I n1 :: nil => + eval_static_condition_case8 n n1 + | cond, vl => + eval_static_condition_default cond vl + end. + +Definition eval_static_condition (cond: condition) (vl: list approx) := + match eval_static_condition_match cond vl with + | eval_static_condition_case1 c n1 n2 => + Some(Int.cmp c n1 n2) + | eval_static_condition_case2 c n1 n2 => + Some(Int.cmpu c n1 n2) + | eval_static_condition_case3 c n n1 => + Some(Int.cmp c n1 n) + | eval_static_condition_case4 c n n1 => + Some(Int.cmpu c n1 n) + | eval_static_condition_case5 c n1 n2 => + Some(Float.cmp c n1 n2) + | eval_static_condition_case6 c n1 n2 => + Some(negb(Float.cmp c n1 n2)) + | eval_static_condition_case7 n n1 => + Some(Int.eq (Int.and n1 n) Int.zero) + | eval_static_condition_case8 n n1 => + Some(negb(Int.eq (Int.and n1 n) Int.zero)) + | eval_static_condition_default cond vl => + None + end. + +(* +Definition eval_static_operation (op: operation) (vl: list approx) := + match op, vl with + | Omove, v1::nil => v1 + | Ointconst n, nil => I n + | Ofloatconst n, nil => F n + | Oaddrsymbol s n, nil => S s n + | Ocast8signed, I n1 :: nil => I(Int.cast8signed n) + | Ocast16signed, I n1 :: nil => I(Int.cast16signed n) + | Oadd, I n1 :: I n2 :: nil => I(Int.add n1 n2) + | Oadd, S s1 n1 :: I n2 :: nil => S s1 (Int.add n1 n2) + | Oaddimm n, I n1 :: nil => I (Int.add n1 n) + | Oaddimm n, S s1 n1 :: nil => S s1 (Int.add n1 n) + | Osub, I n1 :: I n2 :: nil => I(Int.sub n1 n2) + | Osub, S s1 n1 :: I n2 :: nil => S s1 (Int.sub n1 n2) + | Osubimm n, I n1 :: nil => I (Int.sub n n1) + | Omul, I n1 :: I n2 :: nil => I(Int.mul n1 n2) + | Omulimm n, I n1 :: nil => I(Int.mul n1 n) + | Odiv, I n1 :: I n2 :: nil => if Int.eq n2 Int.zero then Unknown else I(Int.divs n1 n2) + | Odivu, I n1 :: I n2 :: nil => if Int.eq n2 Int.zero then Unknown else I(Int.divu n1 n2) + | Oand, I n1 :: I n2 :: nil => I(Int.and n1 n2) + | Oandimm n, I n1 :: nil => I(Int.and n1 n) + | Oor, I n1 :: I n2 :: nil => I(Int.or n1 n2) + | Oorimm n, I n1 :: nil => I(Int.or n1 n) + | Oxor, I n1 :: I n2 :: nil => I(Int.xor n1 n2) + | Oxorimm n, I n1 :: nil => I(Int.xor n1 n) + | Onand, I n1 :: I n2 :: nil => I(Int.xor (Int.and n1 n2) Int.mone) + | Onor, I n1 :: I n2 :: nil => I(Int.xor (Int.or n1 n2) Int.mone) + | Onxor, I n1 :: I n2 :: nil => I(Int.xor (Int.xor n1 n2) Int.mone) + | Oshl, I n1 :: I n2 :: nil => if Int.ltu n2 (Int.repr 32) then I(Int.shl n1 n2) else Unknown + | Oshr, I n1 :: I n2 :: nil => if Int.ltu n2 (Int.repr 32) then I(Int.shr n1 n2) else Unknown + | Oshrimm n, I n1 :: nil => if Int.ltu n (Int.repr 32) then I(Int.shr n1 n) else Unknown + | Oshrximm n, I n1 :: nil => if Int.ltu n (Int.repr 32) then I(Int.shrx n1 n) else Unknown + | Oshru, I n1 :: I n2 :: nil => if Int.ltu n2 (Int.repr 32) then I(Int.shru n1 n2) else Unknown + | Orolm amount mask, I n1 :: nil => I(Int.rolm n1 amount mask) + | Onegf, F n1 :: nil => F(Float.neg n1) + | Oabsf, F n1 :: nil => F(Float.abs n1) + | Oaddf, F n1 :: F n2 :: nil => F(Float.add n1 n2) + | Osubf, F n1 :: F n2 :: nil => F(Float.sub n1 n2) + | Omulf, F n1 :: F n2 :: nil => F(Float.mul n1 n2) + | Odivf, F n1 :: F n2 :: nil => F(Float.div n1 n2) + | Omuladdf, F n1 :: F n2 :: F n3 :: nil => F(Float.add (Float.mul n1 n2) n3) + | Omulsubf, F n1 :: F n2 :: F n3 :: nil => F(Float.sub (Float.mul n1 n2) n3) + | Osingleoffloat, F n1 :: nil => F(Float.singleoffloat n1) + | Ointoffloat, F n1 :: nil => I(Float.intoffloat n1) + | Ofloatofint, I n1 :: nil => F(Float.floatofint n1) + | Ofloatofintu, I n1 :: nil => F(Float.floatofintu n1) + | Ocmp c, vl => + match eval_static_condition c vl with + | None => Unknown + | Some b => I(if b then Int.one else Int.zero) + end + | _, _ => Unknown + end. +*) + +Inductive eval_static_operation_cases: forall (op: operation) (vl: list approx), Set := + | eval_static_operation_case1: + forall v1, + eval_static_operation_cases (Omove) (v1::nil) + | eval_static_operation_case2: + forall n, + eval_static_operation_cases (Ointconst n) (nil) + | eval_static_operation_case3: + forall n, + eval_static_operation_cases (Ofloatconst n) (nil) + | eval_static_operation_case4: + forall s n, + eval_static_operation_cases (Oaddrsymbol s n) (nil) + | eval_static_operation_case6: + forall n1, + eval_static_operation_cases (Ocast8signed) (I n1 :: nil) + | eval_static_operation_case7: + forall n1, + eval_static_operation_cases (Ocast16signed) (I n1 :: nil) + | eval_static_operation_case8: + forall n1 n2, + eval_static_operation_cases (Oadd) (I n1 :: I n2 :: nil) + | eval_static_operation_case9: + forall s1 n1 n2, + eval_static_operation_cases (Oadd) (S s1 n1 :: I n2 :: nil) + | eval_static_operation_case11: + forall n n1, + eval_static_operation_cases (Oaddimm n) (I n1 :: nil) + | eval_static_operation_case12: + forall n s1 n1, + eval_static_operation_cases (Oaddimm n) (S s1 n1 :: nil) + | eval_static_operation_case13: + forall n1 n2, + eval_static_operation_cases (Osub) (I n1 :: I n2 :: nil) + | eval_static_operation_case14: + forall s1 n1 n2, + eval_static_operation_cases (Osub) (S s1 n1 :: I n2 :: nil) + | eval_static_operation_case15: + forall n n1, + eval_static_operation_cases (Osubimm n) (I n1 :: nil) + | eval_static_operation_case16: + forall n1 n2, + eval_static_operation_cases (Omul) (I n1 :: I n2 :: nil) + | eval_static_operation_case17: + forall n n1, + eval_static_operation_cases (Omulimm n) (I n1 :: nil) + | eval_static_operation_case18: + forall n1 n2, + eval_static_operation_cases (Odiv) (I n1 :: I n2 :: nil) + | eval_static_operation_case19: + forall n1 n2, + eval_static_operation_cases (Odivu) (I n1 :: I n2 :: nil) + | eval_static_operation_case20: + forall n1 n2, + eval_static_operation_cases (Oand) (I n1 :: I n2 :: nil) + | eval_static_operation_case21: + forall n n1, + eval_static_operation_cases (Oandimm n) (I n1 :: nil) + | eval_static_operation_case22: + forall n1 n2, + eval_static_operation_cases (Oor) (I n1 :: I n2 :: nil) + | eval_static_operation_case23: + forall n n1, + eval_static_operation_cases (Oorimm n) (I n1 :: nil) + | eval_static_operation_case24: + forall n1 n2, + eval_static_operation_cases (Oxor) (I n1 :: I n2 :: nil) + | eval_static_operation_case25: + forall n n1, + eval_static_operation_cases (Oxorimm n) (I n1 :: nil) + | eval_static_operation_case26: + forall n1 n2, + eval_static_operation_cases (Onand) (I n1 :: I n2 :: nil) + | eval_static_operation_case27: + forall n1 n2, + eval_static_operation_cases (Onor) (I n1 :: I n2 :: nil) + | eval_static_operation_case28: + forall n1 n2, + eval_static_operation_cases (Onxor) (I n1 :: I n2 :: nil) + | eval_static_operation_case29: + forall n1 n2, + eval_static_operation_cases (Oshl) (I n1 :: I n2 :: nil) + | eval_static_operation_case30: + forall n1 n2, + eval_static_operation_cases (Oshr) (I n1 :: I n2 :: nil) + | eval_static_operation_case31: + forall n n1, + eval_static_operation_cases (Oshrimm n) (I n1 :: nil) + | eval_static_operation_case32: + forall n n1, + eval_static_operation_cases (Oshrximm n) (I n1 :: nil) + | eval_static_operation_case33: + forall n1 n2, + eval_static_operation_cases (Oshru) (I n1 :: I n2 :: nil) + | eval_static_operation_case34: + forall amount mask n1, + eval_static_operation_cases (Orolm amount mask) (I n1 :: nil) + | eval_static_operation_case35: + forall n1, + eval_static_operation_cases (Onegf) (F n1 :: nil) + | eval_static_operation_case36: + forall n1, + eval_static_operation_cases (Oabsf) (F n1 :: nil) + | eval_static_operation_case37: + forall n1 n2, + eval_static_operation_cases (Oaddf) (F n1 :: F n2 :: nil) + | eval_static_operation_case38: + forall n1 n2, + eval_static_operation_cases (Osubf) (F n1 :: F n2 :: nil) + | eval_static_operation_case39: + forall n1 n2, + eval_static_operation_cases (Omulf) (F n1 :: F n2 :: nil) + | eval_static_operation_case40: + forall n1 n2, + eval_static_operation_cases (Odivf) (F n1 :: F n2 :: nil) + | eval_static_operation_case41: + forall n1 n2 n3, + eval_static_operation_cases (Omuladdf) (F n1 :: F n2 :: F n3 :: nil) + | eval_static_operation_case42: + forall n1 n2 n3, + eval_static_operation_cases (Omulsubf) (F n1 :: F n2 :: F n3 :: nil) + | eval_static_operation_case43: + forall n1, + eval_static_operation_cases (Osingleoffloat) (F n1 :: nil) + | eval_static_operation_case44: + forall n1, + eval_static_operation_cases (Ointoffloat) (F n1 :: nil) + | eval_static_operation_case45: + forall n1, + eval_static_operation_cases (Ofloatofint) (I n1 :: nil) + | eval_static_operation_case46: + forall n1, + eval_static_operation_cases (Ofloatofintu) (I n1 :: nil) + | eval_static_operation_case47: + forall c vl, + eval_static_operation_cases (Ocmp c) (vl) + | eval_static_operation_default: + forall (op: operation) (vl: list approx), + eval_static_operation_cases op vl. + +Definition eval_static_operation_match (op: operation) (vl: list approx) := + match op as z1, vl as z2 return eval_static_operation_cases z1 z2 with + | Omove, v1::nil => + eval_static_operation_case1 v1 + | Ointconst n, nil => + eval_static_operation_case2 n + | Ofloatconst n, nil => + eval_static_operation_case3 n + | Oaddrsymbol s n, nil => + eval_static_operation_case4 s n + | Ocast8signed, I n1 :: nil => + eval_static_operation_case6 n1 + | Ocast16signed, I n1 :: nil => + eval_static_operation_case7 n1 + | Oadd, I n1 :: I n2 :: nil => + eval_static_operation_case8 n1 n2 + | Oadd, S s1 n1 :: I n2 :: nil => + eval_static_operation_case9 s1 n1 n2 + | Oaddimm n, I n1 :: nil => + eval_static_operation_case11 n n1 + | Oaddimm n, S s1 n1 :: nil => + eval_static_operation_case12 n s1 n1 + | Osub, I n1 :: I n2 :: nil => + eval_static_operation_case13 n1 n2 + | Osub, S s1 n1 :: I n2 :: nil => + eval_static_operation_case14 s1 n1 n2 + | Osubimm n, I n1 :: nil => + eval_static_operation_case15 n n1 + | Omul, I n1 :: I n2 :: nil => + eval_static_operation_case16 n1 n2 + | Omulimm n, I n1 :: nil => + eval_static_operation_case17 n n1 + | Odiv, I n1 :: I n2 :: nil => + eval_static_operation_case18 n1 n2 + | Odivu, I n1 :: I n2 :: nil => + eval_static_operation_case19 n1 n2 + | Oand, I n1 :: I n2 :: nil => + eval_static_operation_case20 n1 n2 + | Oandimm n, I n1 :: nil => + eval_static_operation_case21 n n1 + | Oor, I n1 :: I n2 :: nil => + eval_static_operation_case22 n1 n2 + | Oorimm n, I n1 :: nil => + eval_static_operation_case23 n n1 + | Oxor, I n1 :: I n2 :: nil => + eval_static_operation_case24 n1 n2 + | Oxorimm n, I n1 :: nil => + eval_static_operation_case25 n n1 + | Onand, I n1 :: I n2 :: nil => + eval_static_operation_case26 n1 n2 + | Onor, I n1 :: I n2 :: nil => + eval_static_operation_case27 n1 n2 + | Onxor, I n1 :: I n2 :: nil => + eval_static_operation_case28 n1 n2 + | Oshl, I n1 :: I n2 :: nil => + eval_static_operation_case29 n1 n2 + | Oshr, I n1 :: I n2 :: nil => + eval_static_operation_case30 n1 n2 + | Oshrimm n, I n1 :: nil => + eval_static_operation_case31 n n1 + | Oshrximm n, I n1 :: nil => + eval_static_operation_case32 n n1 + | Oshru, I n1 :: I n2 :: nil => + eval_static_operation_case33 n1 n2 + | Orolm amount mask, I n1 :: nil => + eval_static_operation_case34 amount mask n1 + | Onegf, F n1 :: nil => + eval_static_operation_case35 n1 + | Oabsf, F n1 :: nil => + eval_static_operation_case36 n1 + | Oaddf, F n1 :: F n2 :: nil => + eval_static_operation_case37 n1 n2 + | Osubf, F n1 :: F n2 :: nil => + eval_static_operation_case38 n1 n2 + | Omulf, F n1 :: F n2 :: nil => + eval_static_operation_case39 n1 n2 + | Odivf, F n1 :: F n2 :: nil => + eval_static_operation_case40 n1 n2 + | Omuladdf, F n1 :: F n2 :: F n3 :: nil => + eval_static_operation_case41 n1 n2 n3 + | Omulsubf, F n1 :: F n2 :: F n3 :: nil => + eval_static_operation_case42 n1 n2 n3 + | Osingleoffloat, F n1 :: nil => + eval_static_operation_case43 n1 + | Ointoffloat, F n1 :: nil => + eval_static_operation_case44 n1 + | Ofloatofint, I n1 :: nil => + eval_static_operation_case45 n1 + | Ofloatofintu, I n1 :: nil => + eval_static_operation_case46 n1 + | Ocmp c, vl => + eval_static_operation_case47 c vl + | op, vl => + eval_static_operation_default op vl + end. + +Definition eval_static_operation (op: operation) (vl: list approx) := + match eval_static_operation_match op vl with + | eval_static_operation_case1 v1 => + v1 + | eval_static_operation_case2 n => + I n + | eval_static_operation_case3 n => + F n + | eval_static_operation_case4 s n => + S s n + | eval_static_operation_case6 n1 => + I(Int.cast8signed n1) + | eval_static_operation_case7 n1 => + I(Int.cast16signed n1) + | eval_static_operation_case8 n1 n2 => + I(Int.add n1 n2) + | eval_static_operation_case9 s1 n1 n2 => + S s1 (Int.add n1 n2) + | eval_static_operation_case11 n n1 => + I (Int.add n1 n) + | eval_static_operation_case12 n s1 n1 => + S s1 (Int.add n1 n) + | eval_static_operation_case13 n1 n2 => + I(Int.sub n1 n2) + | eval_static_operation_case14 s1 n1 n2 => + S s1 (Int.sub n1 n2) + | eval_static_operation_case15 n n1 => + I (Int.sub n n1) + | eval_static_operation_case16 n1 n2 => + I(Int.mul n1 n2) + | eval_static_operation_case17 n n1 => + I(Int.mul n1 n) + | eval_static_operation_case18 n1 n2 => + if Int.eq n2 Int.zero then Unknown else I(Int.divs n1 n2) + | eval_static_operation_case19 n1 n2 => + if Int.eq n2 Int.zero then Unknown else I(Int.divu n1 n2) + | eval_static_operation_case20 n1 n2 => + I(Int.and n1 n2) + | eval_static_operation_case21 n n1 => + I(Int.and n1 n) + | eval_static_operation_case22 n1 n2 => + I(Int.or n1 n2) + | eval_static_operation_case23 n n1 => + I(Int.or n1 n) + | eval_static_operation_case24 n1 n2 => + I(Int.xor n1 n2) + | eval_static_operation_case25 n n1 => + I(Int.xor n1 n) + | eval_static_operation_case26 n1 n2 => + I(Int.xor (Int.and n1 n2) Int.mone) + | eval_static_operation_case27 n1 n2 => + I(Int.xor (Int.or n1 n2) Int.mone) + | eval_static_operation_case28 n1 n2 => + I(Int.xor (Int.xor n1 n2) Int.mone) + | eval_static_operation_case29 n1 n2 => + if Int.ltu n2 (Int.repr 32) then I(Int.shl n1 n2) else Unknown + | eval_static_operation_case30 n1 n2 => + if Int.ltu n2 (Int.repr 32) then I(Int.shr n1 n2) else Unknown + | eval_static_operation_case31 n n1 => + if Int.ltu n (Int.repr 32) then I(Int.shr n1 n) else Unknown + | eval_static_operation_case32 n n1 => + if Int.ltu n (Int.repr 32) then I(Int.shrx n1 n) else Unknown + | eval_static_operation_case33 n1 n2 => + if Int.ltu n2 (Int.repr 32) then I(Int.shru n1 n2) else Unknown + | eval_static_operation_case34 amount mask n1 => + I(Int.rolm n1 amount mask) + | eval_static_operation_case35 n1 => + F(Float.neg n1) + | eval_static_operation_case36 n1 => + F(Float.abs n1) + | eval_static_operation_case37 n1 n2 => + F(Float.add n1 n2) + | eval_static_operation_case38 n1 n2 => + F(Float.sub n1 n2) + | eval_static_operation_case39 n1 n2 => + F(Float.mul n1 n2) + | eval_static_operation_case40 n1 n2 => + F(Float.div n1 n2) + | eval_static_operation_case41 n1 n2 n3 => + F(Float.add (Float.mul n1 n2) n3) + | eval_static_operation_case42 n1 n2 n3 => + F(Float.sub (Float.mul n1 n2) n3) + | eval_static_operation_case43 n1 => + F(Float.singleoffloat n1) + | eval_static_operation_case44 n1 => + I(Float.intoffloat n1) + | eval_static_operation_case45 n1 => + F(Float.floatofint n1) + | eval_static_operation_case46 n1 => + F(Float.floatofintu n1) + | eval_static_operation_case47 c vl => + match eval_static_condition c vl with + | None => Unknown + | Some b => I(if b then Int.one else Int.zero) + end + | eval_static_operation_default op vl => + Unknown + end. + +(** The transfer function for the dataflow analysis is straightforward: + for [Iop] instructions, we set the approximation of the destination + register to the result of executing abstractly the operation; + for [Iload] and [Icall], we set the approximation of the destination + to [Unknown]. *) + +Definition approx_regs (rl: list reg) (approx: D.t) := + List.map (fun r => D.get r approx) rl. + +Definition transfer (f: function) (pc: node) (before: D.t) := + match f.(fn_code)!pc with + | None => before + | Some i => + match i with + | Inop s => + before + | Iop op args res s => + let a := eval_static_operation op (approx_regs args before) in + D.set res a before + | Iload chunk addr args dst s => + D.set dst Unknown before + | Istore chunk addr args src s => + before + | Icall sig ros args res s => + D.set res Unknown before + | Icond cond args ifso ifnot => + before + | Ireturn optarg => + before + end + end. + +(** The static analysis itself is then an instantiation of Kildall's + generic solver for forward dataflow inequations. [analyze f] + returns a mapping from program points to mappings of pseudo-registers + to approximations. It can fail to reach a fixpoint in a reasonable + number of iterations, in which case [None] is returned. *) + +Module DS := Dataflow_Solver D. + +Definition analyze (f: RTL.function): option (PMap.t D.t) := + DS.fixpoint (successors f) f.(fn_nextpc) (transfer f) + ((f.(fn_entrypoint), D.top) :: nil). + +(** * Code transformation *) + +(** ** Operator strength reduction *) + +(** We now define auxiliary functions for strength reduction of + operators and addressing modes: replacing an operator with a cheaper + one if some of its arguments are statically known. These are again + large pattern-matchings expressed in indirect style. *) + +Section STRENGTH_REDUCTION. + +Variable approx: D.t. + +Definition intval (r: reg) : option int := + match D.get r approx with I n => Some n | _ => None end. + +Inductive cond_strength_reduction_cases: condition -> list reg -> Set := + | csr_case1: + forall c r1 r2, + cond_strength_reduction_cases (Ccomp c) (r1 :: r2 :: nil) + | csr_case2: + forall c r1 r2, + cond_strength_reduction_cases (Ccompu c) (r1 :: r2 :: nil) + | csr_default: + forall c rl, + cond_strength_reduction_cases c rl. + +Definition cond_strength_reduction_match (cond: condition) (rl: list reg) := + match cond as x, rl as y return cond_strength_reduction_cases x y with + | Ccomp c, r1 :: r2 :: nil => + csr_case1 c r1 r2 + | Ccompu c, r1 :: r2 :: nil => + csr_case2 c r1 r2 + | cond, rl => + csr_default cond rl + end. + +Definition cond_strength_reduction + (cond: condition) (args: list reg) : condition * list reg := + match cond_strength_reduction_match cond args with + | csr_case1 c r1 r2 => + match intval r1, intval r2 with + | Some n, _ => + (Ccompimm (swap_comparison c) n, r2 :: nil) + | _, Some n => + (Ccompimm c n, r1 :: nil) + | _, _ => + (cond, args) + end + | csr_case2 c r1 r2 => + match intval r1, intval r2 with + | Some n, _ => + (Ccompuimm (swap_comparison c) n, r2 :: nil) + | _, Some n => + (Ccompuimm c n, r1 :: nil) + | _, _ => + (cond, args) + end + | csr_default cond args => + (cond, args) + end. + +Definition make_addimm (n: int) (r: reg) := + if Int.eq n Int.zero + then (Omove, r :: nil) + else (Oaddimm n, r :: nil). + +Definition make_shlimm (n: int) (r: reg) := + if Int.eq n Int.zero + then (Omove, r :: nil) + else (Orolm n (Int.shl Int.mone n), r :: nil). + +Definition make_shrimm (n: int) (r: reg) := + if Int.eq n Int.zero + then (Omove, r :: nil) + else (Oshrimm n, r :: nil). + +Definition make_shruimm (n: int) (r: reg) := + if Int.eq n Int.zero + then (Omove, r :: nil) + else (Orolm (Int.sub (Int.repr 32) n) (Int.shru Int.mone n), r :: nil). + +Definition make_mulimm (n: int) (r: reg) := + if Int.eq n Int.zero then + (Ointconst Int.zero, nil) + else if Int.eq n Int.one then + (Omove, r :: nil) + else + match Int.is_power2 n with + | Some l => make_shlimm l r + | None => (Omulimm n, r :: nil) + end. + +Definition make_andimm (n: int) (r: reg) := + if Int.eq n Int.zero + then (Ointconst Int.zero, nil) + else if Int.eq n Int.mone then (Omove, r :: nil) + else (Oandimm n, r :: nil). + +Definition make_orimm (n: int) (r: reg) := + if Int.eq n Int.zero then (Omove, r :: nil) + else if Int.eq n Int.mone then (Ointconst Int.mone, nil) + else (Oorimm n, r :: nil). + +Definition make_xorimm (n: int) (r: reg) := + if Int.eq n Int.zero + then (Omove, r :: nil) + else (Oxorimm n, r :: nil). + +Inductive op_strength_reduction_cases: operation -> list reg -> Set := + | op_strength_reduction_case1: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Oadd (r1 :: r2 :: nil) + | op_strength_reduction_case2: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Osub (r1 :: r2 :: nil) + | op_strength_reduction_case3: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Omul (r1 :: r2 :: nil) + | op_strength_reduction_case4: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Odiv (r1 :: r2 :: nil) + | op_strength_reduction_case5: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Odivu (r1 :: r2 :: nil) + | op_strength_reduction_case6: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Oand (r1 :: r2 :: nil) + | op_strength_reduction_case7: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Oor (r1 :: r2 :: nil) + | op_strength_reduction_case8: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Oxor (r1 :: r2 :: nil) + | op_strength_reduction_case9: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Oshl (r1 :: r2 :: nil) + | op_strength_reduction_case10: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Oshr (r1 :: r2 :: nil) + | op_strength_reduction_case11: + forall (r1: reg) (r2: reg), + op_strength_reduction_cases Oshru (r1 :: r2 :: nil) + | op_strength_reduction_case12: + forall (c: condition) (rl: list reg), + op_strength_reduction_cases (Ocmp c) rl + | op_strength_reduction_default: + forall (op: operation) (args: list reg), + op_strength_reduction_cases op args. + +Definition op_strength_reduction_match (op: operation) (args: list reg) := + match op as z1, args as z2 return op_strength_reduction_cases z1 z2 with + | Oadd, r1 :: r2 :: nil => + op_strength_reduction_case1 r1 r2 + | Osub, r1 :: r2 :: nil => + op_strength_reduction_case2 r1 r2 + | Omul, r1 :: r2 :: nil => + op_strength_reduction_case3 r1 r2 + | Odiv, r1 :: r2 :: nil => + op_strength_reduction_case4 r1 r2 + | Odivu, r1 :: r2 :: nil => + op_strength_reduction_case5 r1 r2 + | Oand, r1 :: r2 :: nil => + op_strength_reduction_case6 r1 r2 + | Oor, r1 :: r2 :: nil => + op_strength_reduction_case7 r1 r2 + | Oxor, r1 :: r2 :: nil => + op_strength_reduction_case8 r1 r2 + | Oshl, r1 :: r2 :: nil => + op_strength_reduction_case9 r1 r2 + | Oshr, r1 :: r2 :: nil => + op_strength_reduction_case10 r1 r2 + | Oshru, r1 :: r2 :: nil => + op_strength_reduction_case11 r1 r2 + | Ocmp c, rl => + op_strength_reduction_case12 c rl + | op, args => + op_strength_reduction_default op args + end. + +Definition op_strength_reduction (op: operation) (args: list reg) := + match op_strength_reduction_match op args with + | op_strength_reduction_case1 r1 r2 => (* Oadd *) + match intval r1, intval r2 with + | Some n, _ => make_addimm n r2 + | _, Some n => make_addimm n r1 + | _, _ => (op, args) + end + | op_strength_reduction_case2 r1 r2 => (* Osub *) + match intval r1, intval r2 with + | Some n, _ => (Osubimm n, r2 :: nil) + | _, Some n => make_addimm (Int.neg n) r1 + | _, _ => (op, args) + end + | op_strength_reduction_case3 r1 r2 => (* Omul *) + match intval r1, intval r2 with + | Some n, _ => make_mulimm n r2 + | _, Some n => make_mulimm n r1 + | _, _ => (op, args) + end + | op_strength_reduction_case4 r1 r2 => (* Odiv *) + match intval r2 with + | Some n => + match Int.is_power2 n with + | Some l => (Oshrximm l, r1 :: nil) + | None => (op, args) + end + | None => + (op, args) + end + | op_strength_reduction_case5 r1 r2 => (* Odivu *) + match intval r2 with + | Some n => + match Int.is_power2 n with + | Some l => make_shruimm l r1 + | None => (op, args) + end + | None => + (op, args) + end + | op_strength_reduction_case6 r1 r2 => (* Oand *) + match intval r1, intval r2 with + | Some n, _ => make_andimm n r2 + | _, Some n => make_andimm n r1 + | _, _ => (op, args) + end + | op_strength_reduction_case7 r1 r2 => (* Oor *) + match intval r1, intval r2 with + | Some n, _ => make_orimm n r2 + | _, Some n => make_orimm n r1 + | _, _ => (op, args) + end + | op_strength_reduction_case8 r1 r2 => (* Oxor *) + match intval r1, intval r2 with + | Some n, _ => make_xorimm n r2 + | _, Some n => make_xorimm n r1 + | _, _ => (op, args) + end + | op_strength_reduction_case9 r1 r2 => (* Oshl *) + match intval r2 with + | Some n => + if Int.ltu n (Int.repr 32) + then make_shlimm n r1 + else (op, args) + | _ => (op, args) + end + | op_strength_reduction_case10 r1 r2 => (* Oshr *) + match intval r2 with + | Some n => + if Int.ltu n (Int.repr 32) + then make_shrimm n r1 + else (op, args) + | _ => (op, args) + end + | op_strength_reduction_case11 r1 r2 => (* Oshru *) + match intval r2 with + | Some n => + if Int.ltu n (Int.repr 32) + then make_shruimm n r1 + else (op, args) + | _ => (op, args) + end + | op_strength_reduction_case12 c args => (* Ocmp *) + let (c', args') := cond_strength_reduction c args in + (Ocmp c', args') + | op_strength_reduction_default op args => (* default *) + (op, args) + end. + +Inductive addr_strength_reduction_cases: forall (addr: addressing) (args: list reg), Set := + | addr_strength_reduction_case1: + forall (r1: reg) (r2: reg), + addr_strength_reduction_cases (Aindexed2) (r1 :: r2 :: nil) + | addr_strength_reduction_case2: + forall (symb: ident) (ofs: int) (r1: reg), + addr_strength_reduction_cases (Abased symb ofs) (r1 :: nil) + | addr_strength_reduction_case3: + forall n r1, + addr_strength_reduction_cases (Aindexed n) (r1 :: nil) + | addr_strength_reduction_default: + forall (addr: addressing) (args: list reg), + addr_strength_reduction_cases addr args. + +Definition addr_strength_reduction_match (addr: addressing) (args: list reg) := + match addr as z1, args as z2 return addr_strength_reduction_cases z1 z2 with + | Aindexed2, r1 :: r2 :: nil => + addr_strength_reduction_case1 r1 r2 + | Abased symb ofs, r1 :: nil => + addr_strength_reduction_case2 symb ofs r1 + | Aindexed n, r1 :: nil => + addr_strength_reduction_case3 n r1 + | addr, args => + addr_strength_reduction_default addr args + end. + +Definition addr_strength_reduction (addr: addressing) (args: list reg) := + match addr_strength_reduction_match addr args with + | addr_strength_reduction_case1 r1 r2 => (* Aindexed2 *) + match D.get r1 approx, D.get r2 approx with + | S symb n1, I n2 => (Aglobal symb (Int.add n1 n2), nil) + | S symb n1, _ => (Abased symb n1, r2 :: nil) + | I n1, S symb n2 => (Aglobal symb (Int.add n1 n2), nil) + | I n1, _ => (Aindexed n1, r2 :: nil) + | _, S symb n2 => (Abased symb n2, r1 :: nil) + | _, I n2 => (Aindexed n2, r1 :: nil) + | _, _ => (addr, args) + end + | addr_strength_reduction_case2 symb ofs r1 => (* Abased *) + match intval r1 with + | Some n => (Aglobal symb (Int.add ofs n), nil) + | _ => (addr, args) + end + | addr_strength_reduction_case3 n r1 => (* Aindexed *) + match D.get r1 approx with + | S symb ofs => (Aglobal symb (Int.add ofs n), nil) + | _ => (addr, args) + end + | addr_strength_reduction_default addr args => (* default *) + (addr, args) + end. + +End STRENGTH_REDUCTION. + +(** ** Code transformation *) + +(** The code transformation proceeds instruction by instruction. + Operators whose arguments are all statically known are turned + into ``load integer constant'', ``load float constant'' or + ``load symbol address'' operations. Operators for which some + but not all arguments are known are subject to strength reduction, + and similarly for the addressing modes of load and store instructions. + Other instructions are unchanged. *) + +Definition transf_instr (approx: D.t) (instr: instruction) := + match instr with + | Iop op args res s => + match eval_static_operation op (approx_regs args approx) with + | I n => + Iop (Ointconst n) nil res s + | F n => + Iop (Ofloatconst n) nil res s + | S symb ofs => + Iop (Oaddrsymbol symb ofs) nil res s + | _ => + let (op', args') := op_strength_reduction approx op args in + Iop op' args' res s + end + | Iload chunk addr args dst s => + let (addr', args') := addr_strength_reduction approx addr args in + Iload chunk addr' args' dst s + | Istore chunk addr args src s => + let (addr', args') := addr_strength_reduction approx addr args in + Istore chunk addr' args' src s + | Icall sig ros args res s => + let ros' := + match ros with + | inl r => + match D.get r approx with + | S symb ofs => if Int.eq ofs Int.zero then inr _ symb else ros + | _ => ros + end + | inr s => ros + end in + Icall sig ros' args res s + | Icond cond args s1 s2 => + match eval_static_condition cond (approx_regs args approx) with + | Some b => + if b then Inop s1 else Inop s2 + | None => + let (cond', args') := cond_strength_reduction approx cond args in + Icond cond' args' s1 s2 + end + | _ => + instr + end. + +Definition transf_code (approxs: PMap.t D.t) (instrs: code) : code := + PTree.map (fun pc instr => transf_instr approxs!!pc instr) instrs. + +Lemma transf_code_wf: + forall f approxs, + (forall pc, Plt pc f.(fn_nextpc) \/ f.(fn_code)!pc = None) -> + (forall pc, Plt pc f.(fn_nextpc) + \/ (transf_code approxs f.(fn_code))!pc = None). +Proof. + intros. + elim (H pc); intro. + left; auto. + right. unfold transf_code. rewrite PTree.gmap. + unfold option_map; rewrite H0. reflexivity. +Qed. + +Definition transf_function (f: function) : function := + match analyze f with + | None => f + | Some approxs => + mkfunction + f.(fn_sig) + f.(fn_params) + f.(fn_stacksize) + (transf_code approxs f.(fn_code)) + f.(fn_entrypoint) + f.(fn_nextpc) + (transf_code_wf f approxs f.(fn_code_wf)) + end. + +Definition transf_program (p: program) : program := + transform_program transf_function p. diff --git a/backend/Constpropproof.v b/backend/Constpropproof.v new file mode 100644 index 00000000..080aa74d --- /dev/null +++ b/backend/Constpropproof.v @@ -0,0 +1,883 @@ +(** Correctness proof for constant propagation. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import Lattice. +Require Import Kildall. +Require Import Constprop. + +(** * Correctness of the static analysis *) + +Section ANALYSIS. + +Variable ge: genv. + +(** We first show that the dataflow analysis is correct with respect + to the dynamic semantics: the approximations (sets of values) + of a register at a program point predicted by the static analysis + are a superset of the values actually encountered during concrete + executions. We formalize this correspondence between run-time values and + compile-time approximations by the following predicate. *) + +Definition val_match_approx (a: approx) (v: val) : Prop := + match a with + | Unknown => True + | I p => v = Vint p + | F p => v = Vfloat p + | S symb ofs => exists b, Genv.find_symbol ge symb = Some b /\ v = Vptr b ofs + | _ => False + end. + +Definition regs_match_approx (a: D.t) (rs: regset) : Prop := + forall r, val_match_approx (D.get r a) rs#r. + +Lemma val_match_approx_increasing: + forall a1 a2 v, + Approx.ge a1 a2 -> val_match_approx a2 v -> val_match_approx a1 v. +Proof. + intros until v. + intros [A|[B|C]]. + subst a1. simpl. auto. + subst a2. simpl. tauto. + subst a2. auto. +Qed. + +Lemma regs_match_approx_increasing: + forall a1 a2 rs, + D.ge a1 a2 -> regs_match_approx a2 rs -> regs_match_approx a1 rs. +Proof. + unfold D.ge, regs_match_approx. intros. + apply val_match_approx_increasing with (D.get r a2); auto. +Qed. + +Lemma regs_match_approx_update: + forall ra rs a v r, + val_match_approx a v -> + regs_match_approx ra rs -> + regs_match_approx (D.set r a ra) (rs#r <- v). +Proof. + intros; red; intros. rewrite Regmap.gsspec. + case (peq r0 r); intro. + subst r0. rewrite D.gss. auto. + rewrite D.gso; auto. +Qed. + +Inductive val_list_match_approx: list approx -> list val -> Prop := + | vlma_nil: + val_list_match_approx nil nil + | vlma_cons: + forall a al v vl, + val_match_approx a v -> + val_list_match_approx al vl -> + val_list_match_approx (a :: al) (v :: vl). + +Lemma approx_regs_val_list: + forall ra rs rl, + regs_match_approx ra rs -> + val_list_match_approx (approx_regs rl ra) rs##rl. +Proof. + induction rl; simpl; intros. + constructor. + constructor. apply H. auto. +Qed. + +Ltac SimplVMA := + match goal with + | H: (val_match_approx (I _) ?v) |- _ => + simpl in H; (try subst v); SimplVMA + | H: (val_match_approx (F _) ?v) |- _ => + simpl in H; (try subst v); SimplVMA + | H: (val_match_approx (S _ _) ?v) |- _ => + simpl in H; + (try (elim H; + let b := fresh "b" in let A := fresh in let B := fresh in + (intros b [A B]; subst v; clear H))); + SimplVMA + | _ => + idtac + end. + +Ltac InvVLMA := + match goal with + | H: (val_list_match_approx nil ?vl) |- _ => + inversion H + | H: (val_list_match_approx (?a :: ?al) ?vl) |- _ => + inversion H; SimplVMA; InvVLMA + | _ => + idtac + end. + +(** We then show that [eval_static_operation] is a correct abstract + interpretations of [eval_operation]: if the concrete arguments match + the given approximations, the concrete results match the + approximations returned by [eval_static_operation]. *) + +Lemma eval_static_condition_correct: + forall cond al vl b, + val_list_match_approx al vl -> + eval_static_condition cond al = Some b -> + eval_condition cond vl = Some b. +Proof. + intros until b. + unfold eval_static_condition. + case (eval_static_condition_match cond al); intros; + InvVLMA; simpl; congruence. +Qed. + +Lemma eval_static_operation_correct: + forall op sp al vl v, + val_list_match_approx al vl -> + eval_operation ge sp op vl = Some v -> + val_match_approx (eval_static_operation op al) v. +Proof. + intros until v. + unfold eval_static_operation. + case (eval_static_operation_match op al); intros; + InvVLMA; simpl in *; FuncInv; try congruence. + + replace v with v0. auto. congruence. + + destruct (Genv.find_symbol ge s). exists b. intuition congruence. + congruence. + + exists b. split. auto. congruence. + exists b. split. auto. congruence. + exists b. split. auto. congruence. + + replace n2 with i0. destruct (Int.eq i0 Int.zero). + discriminate. injection H0; intro; subst v. simpl. congruence. congruence. + + replace n2 with i0. destruct (Int.eq i0 Int.zero). + discriminate. injection H0; intro; subst v. simpl. congruence. congruence. + + subst v. unfold Int.not. congruence. + subst v. unfold Int.not. congruence. + subst v. unfold Int.not. congruence. + + replace n2 with i0. destruct (Int.ltu i0 (Int.repr 32)). + injection H0; intro; subst v. simpl. congruence. discriminate. congruence. + + replace n2 with i0. destruct (Int.ltu i0 (Int.repr 32)). + injection H0; intro; subst v. simpl. congruence. discriminate. congruence. + + destruct (Int.ltu n (Int.repr 32)). + injection H0; intro; subst v. simpl. congruence. discriminate. + + destruct (Int.ltu n (Int.repr 32)). + injection H0; intro; subst v. simpl. congruence. discriminate. + + replace n2 with i0. destruct (Int.ltu i0 (Int.repr 32)). + injection H0; intro; subst v. simpl. congruence. discriminate. congruence. + + caseEq (eval_static_condition c vl0). + intros. generalize (eval_static_condition_correct _ _ _ _ H H1). + intro. rewrite H2 in H0. + destruct b; injection H0; intro; subst v; simpl; auto. + intros; simpl; auto. + + auto. +Qed. + +(** The correctness of the transfer function follows: if the register + state before a transition matches the static approximations at that + program point, the register state after that transition matches + the static approximation returned by the transfer function. *) + +Lemma transfer_correct: + forall f c sp pc rs m pc' rs' m' ra, + exec_instr ge c sp pc rs m pc' rs' m' -> + c = f.(fn_code) -> + regs_match_approx ra rs -> + regs_match_approx (transfer f pc ra) rs'. +Proof. + induction 1; intros; subst c; unfold transfer; rewrite H; auto. + (* Iop *) + apply regs_match_approx_update. + apply eval_static_operation_correct with sp rs##args. + eapply approx_regs_val_list. auto. auto. auto. + (* Iload *) + apply regs_match_approx_update; auto. simpl; auto. + (* Icall *) + apply regs_match_approx_update; auto. simpl; auto. +Qed. + +(** The correctness of the static analysis follows from the results + above and the fact that the result of the static analysis is + a solution of the forward dataflow inequations. *) + +Lemma analyze_correct_1: + forall f approxs, + analyze f = Some approxs -> + forall c sp pc rs m pc' rs' m', + exec_instr ge c sp pc rs m pc' rs' m' -> + c = f.(fn_code) -> + regs_match_approx approxs!!pc rs -> + regs_match_approx approxs!!pc' rs'. +Proof. + intros. + apply regs_match_approx_increasing with (transfer f pc approxs!!pc). + eapply DS.fixpoint_solution. + unfold analyze in H. eexact H. + elim (fn_code_wf f pc); intro. auto. + generalize (exec_instr_present _ _ _ _ _ _ _ _ _ H0). + rewrite H1. intro. contradiction. + eapply successors_correct. rewrite <- H1. eauto. + eapply transfer_correct; eauto. +Qed. + +Lemma analyze_correct_2: + forall f approxs, + analyze f = Some approxs -> + forall c sp pc rs m pc' rs' m', + exec_instrs ge c sp pc rs m pc' rs' m' -> + c = f.(fn_code) -> + regs_match_approx approxs!!pc rs -> + regs_match_approx approxs!!pc' rs'. +Proof. + intros f approxs ANL. induction 1. + auto. + intros. eapply analyze_correct_1; eauto. + eauto. +Qed. + +Lemma analyze_correct_3: + forall f approxs rs, + analyze f = Some approxs -> + regs_match_approx approxs!!(f.(fn_entrypoint)) rs. +Proof. + intros. + apply regs_match_approx_increasing with D.top. + eapply DS.fixpoint_entry. unfold analyze in H; eexact H. + auto with coqlib. + red; intros. rewrite D.get_top. simpl; auto. +Qed. + +(** * Correctness of strength reduction *) + +(** We now show that strength reduction over operators and addressing + modes preserve semantics: the strength-reduced operations and + addressings evaluate to the same values as the original ones if the + actual arguments match the static approximations used for strength + reduction. *) + +Section STRENGTH_REDUCTION. + +Variable approx: D.t. +Variable sp: val. +Variable rs: regset. +Hypothesis MATCH: regs_match_approx approx rs. + +Lemma intval_correct: + forall r n, + intval approx r = Some n -> rs#r = Vint n. +Proof. + intros until n. + unfold intval. caseEq (D.get r approx); intros; try discriminate. + generalize (MATCH r). unfold val_match_approx. rewrite H. + congruence. +Qed. + +Lemma cond_strength_reduction_correct: + forall cond args, + let (cond', args') := cond_strength_reduction approx cond args in + eval_condition cond' rs##args' = eval_condition cond rs##args. +Proof. + intros. unfold cond_strength_reduction. + case (cond_strength_reduction_match cond args); intros. + caseEq (intval approx r1); intros. + simpl. rewrite (intval_correct _ _ H). + destruct (rs#r2); auto. rewrite Int.swap_cmp. auto. + destruct c; reflexivity. + caseEq (intval approx r2); intros. + simpl. rewrite (intval_correct _ _ H0). auto. + auto. + caseEq (intval approx r1); intros. + simpl. rewrite (intval_correct _ _ H). + destruct (rs#r2); auto. rewrite Int.swap_cmpu. auto. + destruct c; reflexivity. + caseEq (intval approx r2); intros. + simpl. rewrite (intval_correct _ _ H0). auto. + auto. + auto. +Qed. + +Lemma make_addimm_correct: + forall n r v, + let (op, args) := make_addimm n r in + eval_operation ge sp Oadd (rs#r :: Vint n :: nil) = Some v -> + eval_operation ge sp op rs##args = Some v. +Proof. + intros; unfold make_addimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros. + subst n. simpl in *. FuncInv. rewrite Int.add_zero in H. congruence. + rewrite Int.add_zero in H. congruence. + exact H0. +Qed. + +Lemma make_shlimm_correct: + forall n r v, + let (op, args) := make_shlimm n r in + eval_operation ge sp Oshl (rs#r :: Vint n :: nil) = Some v -> + eval_operation ge sp op rs##args = Some v. +Proof. + intros; unfold make_shlimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros. + subst n. simpl in *. FuncInv. rewrite Int.shl_zero in H. congruence. + simpl in *. FuncInv. caseEq (Int.ltu n (Int.repr 32)); intros. + rewrite H1 in H0. rewrite Int.shl_rolm in H0. auto. exact H1. + rewrite H1 in H0. discriminate. +Qed. + +Lemma make_shrimm_correct: + forall n r v, + let (op, args) := make_shrimm n r in + eval_operation ge sp Oshr (rs#r :: Vint n :: nil) = Some v -> + eval_operation ge sp op rs##args = Some v. +Proof. + intros; unfold make_shrimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros. + subst n. simpl in *. FuncInv. rewrite Int.shr_zero in H. congruence. + assumption. +Qed. + +Lemma make_shruimm_correct: + forall n r v, + let (op, args) := make_shruimm n r in + eval_operation ge sp Oshru (rs#r :: Vint n :: nil) = Some v -> + eval_operation ge sp op rs##args = Some v. +Proof. + intros; unfold make_shruimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros. + subst n. simpl in *. FuncInv. rewrite Int.shru_zero in H. congruence. + simpl in *. FuncInv. caseEq (Int.ltu n (Int.repr 32)); intros. + rewrite H1 in H0. rewrite Int.shru_rolm in H0. auto. exact H1. + rewrite H1 in H0. discriminate. +Qed. + +Lemma make_mulimm_correct: + forall n r v, + let (op, args) := make_mulimm n r in + eval_operation ge sp Omul (rs#r :: Vint n :: nil) = Some v -> + eval_operation ge sp op rs##args = Some v. +Proof. + intros; unfold make_mulimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros. + subst n. simpl in H0. FuncInv. rewrite Int.mul_zero in H. simpl. congruence. + generalize (Int.eq_spec n Int.one); case (Int.eq n Int.one); intros. + subst n. simpl in H1. simpl. FuncInv. rewrite Int.mul_one in H0. congruence. + caseEq (Int.is_power2 n); intros. + replace (eval_operation ge sp Omul (rs # r :: Vint n :: nil)) + with (eval_operation ge sp Oshl (rs # r :: Vint i :: nil)). + apply make_shlimm_correct. + simpl. generalize (Int.is_power2_range _ _ H1). + change (Z_of_nat wordsize) with 32. intro. rewrite H2. + destruct rs#r; auto. rewrite (Int.mul_pow2 i0 _ _ H1). auto. + exact H2. +Qed. + +Lemma make_andimm_correct: + forall n r v, + let (op, args) := make_andimm n r in + eval_operation ge sp Oand (rs#r :: Vint n :: nil) = Some v -> + eval_operation ge sp op rs##args = Some v. +Proof. + intros; unfold make_andimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros. + subst n. simpl in *. FuncInv. rewrite Int.and_zero in H. congruence. + generalize (Int.eq_spec n Int.mone); case (Int.eq n Int.mone); intros. + subst n. simpl in *. FuncInv. rewrite Int.and_mone in H0. congruence. + exact H1. +Qed. + +Lemma make_orimm_correct: + forall n r v, + let (op, args) := make_orimm n r in + eval_operation ge sp Oor (rs#r :: Vint n :: nil) = Some v -> + eval_operation ge sp op rs##args = Some v. +Proof. + intros; unfold make_orimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros. + subst n. simpl in *. FuncInv. rewrite Int.or_zero in H. congruence. + generalize (Int.eq_spec n Int.mone); case (Int.eq n Int.mone); intros. + subst n. simpl in *. FuncInv. rewrite Int.or_mone in H0. congruence. + exact H1. +Qed. + +Lemma make_xorimm_correct: + forall n r v, + let (op, args) := make_xorimm n r in + eval_operation ge sp Oxor (rs#r :: Vint n :: nil) = Some v -> + eval_operation ge sp op rs##args = Some v. +Proof. + intros; unfold make_xorimm. + generalize (Int.eq_spec n Int.zero); case (Int.eq n Int.zero); intros. + subst n. simpl in *. FuncInv. rewrite Int.xor_zero in H. congruence. + exact H0. +Qed. + +Lemma op_strength_reduction_correct: + forall op args v, + let (op', args') := op_strength_reduction approx op args in + eval_operation ge sp op rs##args = Some v -> + eval_operation ge sp op' rs##args' = Some v. +Proof. + intros; unfold op_strength_reduction; + case (op_strength_reduction_match op args); intros; simpl List.map. + (* Oadd *) + caseEq (intval approx r1); intros. + rewrite (intval_correct _ _ H). + replace (eval_operation ge sp Oadd (Vint i :: rs # r2 :: nil)) + with (eval_operation ge sp Oadd (rs # r2 :: Vint i :: nil)). + apply make_addimm_correct. + simpl. destruct rs#r2; auto. rewrite Int.add_commut; auto. + caseEq (intval approx r2); intros. + rewrite (intval_correct _ _ H0). apply make_addimm_correct. + assumption. + (* Osub *) + caseEq (intval approx r1); intros. + rewrite (intval_correct _ _ H) in H0. assumption. + caseEq (intval approx r2); intros. + rewrite (intval_correct _ _ H0). + replace (eval_operation ge sp Osub (rs # r1 :: Vint i :: nil)) + with (eval_operation ge sp Oadd (rs # r1 :: Vint (Int.neg i) :: nil)). + apply make_addimm_correct. + simpl. destruct rs#r1; auto; rewrite Int.sub_add_opp; auto. + assumption. + (* Omul *) + caseEq (intval approx r1); intros. + rewrite (intval_correct _ _ H). + replace (eval_operation ge sp Omul (Vint i :: rs # r2 :: nil)) + with (eval_operation ge sp Omul (rs # r2 :: Vint i :: nil)). + apply make_mulimm_correct. + simpl. destruct rs#r2; auto. rewrite Int.mul_commut; auto. + caseEq (intval approx r2); intros. + rewrite (intval_correct _ _ H0). apply make_mulimm_correct. + assumption. + (* Odiv *) + caseEq (intval approx r2); intros. + caseEq (Int.is_power2 i); intros. + rewrite (intval_correct _ _ H) in H1. + simpl in *; FuncInv. destruct (Int.eq i Int.zero). congruence. + change 32 with (Z_of_nat wordsize). + rewrite (Int.is_power2_range _ _ H0). + rewrite (Int.divs_pow2 i1 _ _ H0) in H1. auto. + assumption. + assumption. + (* Odivu *) + caseEq (intval approx r2); intros. + caseEq (Int.is_power2 i); intros. + rewrite (intval_correct _ _ H). + replace (eval_operation ge sp Odivu (rs # r1 :: Vint i :: nil)) + with (eval_operation ge sp Oshru (rs # r1 :: Vint i0 :: nil)). + apply make_shruimm_correct. + simpl. destruct rs#r1; auto. + change 32 with (Z_of_nat wordsize). + rewrite (Int.is_power2_range _ _ H0). + generalize (Int.eq_spec i Int.zero); case (Int.eq i Int.zero); intros. + subst i. discriminate. + rewrite (Int.divu_pow2 i1 _ _ H0). auto. + assumption. + assumption. + (* Oand *) + caseEq (intval approx r1); intros. + rewrite (intval_correct _ _ H). + replace (eval_operation ge sp Oand (Vint i :: rs # r2 :: nil)) + with (eval_operation ge sp Oand (rs # r2 :: Vint i :: nil)). + apply make_andimm_correct. + simpl. destruct rs#r2; auto. rewrite Int.and_commut; auto. + caseEq (intval approx r2); intros. + rewrite (intval_correct _ _ H0). apply make_andimm_correct. + assumption. + (* Oor *) + caseEq (intval approx r1); intros. + rewrite (intval_correct _ _ H). + replace (eval_operation ge sp Oor (Vint i :: rs # r2 :: nil)) + with (eval_operation ge sp Oor (rs # r2 :: Vint i :: nil)). + apply make_orimm_correct. + simpl. destruct rs#r2; auto. rewrite Int.or_commut; auto. + caseEq (intval approx r2); intros. + rewrite (intval_correct _ _ H0). apply make_orimm_correct. + assumption. + (* Oxor *) + caseEq (intval approx r1); intros. + rewrite (intval_correct _ _ H). + replace (eval_operation ge sp Oxor (Vint i :: rs # r2 :: nil)) + with (eval_operation ge sp Oxor (rs # r2 :: Vint i :: nil)). + apply make_xorimm_correct. + simpl. destruct rs#r2; auto. rewrite Int.xor_commut; auto. + caseEq (intval approx r2); intros. + rewrite (intval_correct _ _ H0). apply make_xorimm_correct. + assumption. + (* Oshl *) + caseEq (intval approx r2); intros. + caseEq (Int.ltu i (Int.repr 32)); intros. + rewrite (intval_correct _ _ H). apply make_shlimm_correct. + assumption. + assumption. + (* Oshr *) + caseEq (intval approx r2); intros. + caseEq (Int.ltu i (Int.repr 32)); intros. + rewrite (intval_correct _ _ H). apply make_shrimm_correct. + assumption. + assumption. + (* Oshru *) + caseEq (intval approx r2); intros. + caseEq (Int.ltu i (Int.repr 32)); intros. + rewrite (intval_correct _ _ H). apply make_shruimm_correct. + assumption. + assumption. + (* Ocmp *) + generalize (cond_strength_reduction_correct c rl). + destruct (cond_strength_reduction approx c rl). + simpl. intro. rewrite H. auto. + (* default *) + assumption. +Qed. + +Ltac KnownApprox := + match goal with + | MATCH: (regs_match_approx ?approx ?rs), + H: (D.get ?r ?approx = ?a) |- _ => + generalize (MATCH r); rewrite H; intro; clear H; KnownApprox + | _ => idtac + end. + +Lemma addr_strength_reduction_correct: + forall addr args, + let (addr', args') := addr_strength_reduction approx addr args in + eval_addressing ge sp addr' rs##args' = eval_addressing ge sp addr rs##args. +Proof. + intros. + + (* Useful lemmas *) + assert (A0: forall r1 r2, + eval_addressing ge sp Aindexed2 (rs ## (r1 :: r2 :: nil)) = + eval_addressing ge sp Aindexed2 (rs ## (r2 :: r1 :: nil))). + intros. simpl. destruct (rs#r1); destruct (rs#r2); auto; + rewrite Int.add_commut; auto. + + assert (A1: forall r1 r2 n, + val_match_approx (I n) rs#r2 -> + eval_addressing ge sp (Aindexed n) (rs ## (r1 :: nil)) = + eval_addressing ge sp Aindexed2 (rs ## (r1 :: r2 :: nil))). + intros; simpl in *. rewrite H. auto. + + assert (A2: forall r1 r2 n, + val_match_approx (I n) rs#r1 -> + eval_addressing ge sp (Aindexed n) (rs ## (r2 :: nil)) = + eval_addressing ge sp Aindexed2 (rs ## (r1 :: r2 :: nil))). + intros. rewrite A0. apply A1. auto. + + assert (A3: forall r1 r2 id ofs, + val_match_approx (S id ofs) rs#r1 -> + eval_addressing ge sp (Abased id ofs) (rs ## (r2 :: nil)) = + eval_addressing ge sp Aindexed2 (rs ## (r1 :: r2 :: nil))). + intros. elim H. intros b [A B]. simpl. rewrite A; rewrite B. auto. + + assert (A4: forall r1 r2 id ofs, + val_match_approx (S id ofs) rs#r2 -> + eval_addressing ge sp (Abased id ofs) (rs ## (r1 :: nil)) = + eval_addressing ge sp Aindexed2 (rs ## (r1 :: r2 :: nil))). + intros. rewrite A0. apply A3. auto. + + assert (A5: forall r1 r2 id ofs n, + val_match_approx (S id ofs) rs#r1 -> + val_match_approx (I n) rs#r2 -> + eval_addressing ge sp (Aglobal id (Int.add ofs n)) nil = + eval_addressing ge sp Aindexed2 (rs ## (r1 :: r2 :: nil))). + intros. elim H. intros b [A B]. simpl. rewrite A; rewrite B. + simpl in H0. rewrite H0. auto. + + unfold addr_strength_reduction; + case (addr_strength_reduction_match addr args); intros. + + (* Aindexed2 *) + caseEq (D.get r1 approx); intros; + caseEq (D.get r2 approx); intros; + try reflexivity; KnownApprox; auto. + rewrite A0. rewrite Int.add_commut. apply A5; auto. + + (* Abased *) + caseEq (intval approx r1); intros. + simpl; rewrite (intval_correct _ _ H). auto. + auto. + + (* Aindexed *) + caseEq (D.get r1 approx); intros; auto. + simpl; KnownApprox. + elim H0. intros b [A B]. rewrite A; rewrite B. auto. + + (* default *) + reflexivity. +Qed. + +End STRENGTH_REDUCTION. + +End ANALYSIS. + +(** * Correctness of the code transformation *) + +(** We now show that the transformed code after constant propagation + has the same semantics as the original code. *) + +Section PRESERVATION. + +Variable prog: program. +Let tprog := transf_program prog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma symbols_preserved: + forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof (Genv.find_symbol_transf transf_function prog). + +Lemma functions_translated: + forall (v: val) (f: RTL.function), + Genv.find_funct ge v = Some f -> + Genv.find_funct tge v = Some (transf_function f). +Proof (@Genv.find_funct_transf _ _ transf_function prog). + +Lemma function_ptr_translated: + forall (v: block) (f: RTL.function), + Genv.find_funct_ptr ge v = Some f -> + Genv.find_funct_ptr tge v = Some (transf_function f). +Proof (@Genv.find_funct_ptr_transf _ _ transf_function prog). + +(** The proof of semantic preservation is a simulation argument + based on diagrams of the following form: +<< + pc, rs, m ------------------- pc, rs, m + | | + | | + v v + pc', rs', m' ---------------- pc', rs', m' +>> + The left vertical arrow represents a transition in the + original RTL code. The top horizontal bar expresses that the values + of registers [rs] match their compile-time approximations at point [pc]. + These two parts of the diagram are the hypotheses. In conclusions, + we want to prove the other two parts: the right vertical arrow, + which is a transition in the transformed RTL code, and the bottom + horizontal bar, which means that [rs'] matches the compile-time + approximations at [pc']. + + To help express those diagrams, we define the following propositions + parameterized by the transition in the original RTL code (left arrow) + and summarizing the three other arrows of the diagram. *) + +Definition exec_instr_prop + (c: code) (sp: val) + (pc: node) (rs: regset) (m: mem) + (pc': node) (rs': regset) (m': mem) : Prop := + exec_instr tge c sp pc rs m pc' rs' m' /\ + forall f approxs + (CF: c = f.(RTL.fn_code)) + (ANL: analyze f = Some approxs) + (MATCH: regs_match_approx ge approxs!!pc rs), + exec_instr tge (transf_code approxs c) sp pc rs m pc' rs' m'. + +Definition exec_instrs_prop + (c: code) (sp: val) + (pc: node) (rs: regset) (m: mem) + (pc': node) (rs': regset) (m': mem) : Prop := + exec_instrs tge c sp pc rs m pc' rs' m' /\ + forall f approxs + (CF: c = f.(RTL.fn_code)) + (ANL: analyze f = Some approxs) + (MATCH: regs_match_approx ge approxs!!pc rs), + exec_instrs tge (transf_code approxs c) sp pc rs m pc' rs' m'. + +Definition exec_function_prop + (f: RTL.function) (args: list val) (m: mem) + (res: val) (m': mem) : Prop := + exec_function tge (transf_function f) args m res m'. + +Ltac TransfInstr := + match goal with + | H1: (PTree.get ?pc ?c = Some ?instr), + H2: (analyze ?f = Some ?approxs) |- _ => + cut ((transf_code approxs c)!pc = Some(transf_instr approxs!!pc instr)); + [ simpl + | unfold transf_code; rewrite PTree.gmap; + unfold option_map; rewrite H1; reflexivity ] + end. + +(** The predicates above serve as induction hypotheses in the proof of + simulation, which proceeds by induction over the + evaluation derivation of the original code. *) + +Lemma transf_funct_correct: + forall f args m res m', + exec_function ge f args m res m' -> + exec_function_prop f args m res m'. +Proof. + apply (exec_function_ind_3 ge + exec_instr_prop exec_instrs_prop exec_function_prop); + intros; red. + (* Inop *) + split; [idtac| intros; TransfInstr]. + apply exec_Inop; auto. + intros; apply exec_Inop; auto. + (* Iop *) + split; [idtac| intros; TransfInstr]. + apply exec_Iop with op args. auto. + rewrite (eval_operation_preserved symbols_preserved). auto. + caseEq (op_strength_reduction approxs!!pc op args); + intros op' args' OSR. + assert (eval_operation tge sp op' rs##args' = Some v). + rewrite (eval_operation_preserved symbols_preserved). + generalize (op_strength_reduction_correct ge approxs!!pc sp rs + MATCH op args v). + rewrite OSR; simpl. auto. + generalize (eval_static_operation_correct ge op sp + (approx_regs args approxs!!pc) rs##args v + (approx_regs_val_list _ _ _ args MATCH) H0). + case (eval_static_operation op (approx_regs args approxs!!pc)); intros; + simpl in H1; + eapply exec_Iop; eauto; simpl. + simpl in H2; congruence. + simpl in H2; congruence. + elim H2; intros b [A B]. rewrite symbols_preserved. + rewrite A; rewrite B; auto. + (* Iload *) + split; [idtac| intros; TransfInstr]. + eapply exec_Iload; eauto. + rewrite (eval_addressing_preserved symbols_preserved). auto. + caseEq (addr_strength_reduction approxs!!pc addr args); + intros addr' args' ASR. + assert (eval_addressing tge sp addr' rs##args' = Some a). + rewrite (eval_addressing_preserved symbols_preserved). + generalize (addr_strength_reduction_correct ge approxs!!pc sp rs + MATCH addr args). + rewrite ASR; simpl. congruence. + intro. eapply exec_Iload; eauto. + (* Istore *) + split; [idtac| intros; TransfInstr]. + eapply exec_Istore; eauto. + rewrite (eval_addressing_preserved symbols_preserved). auto. + caseEq (addr_strength_reduction approxs!!pc addr args); + intros addr' args' ASR. + assert (eval_addressing tge sp addr' rs##args' = Some a). + rewrite (eval_addressing_preserved symbols_preserved). + generalize (addr_strength_reduction_correct ge approxs!!pc sp rs + MATCH addr args). + rewrite ASR; simpl. congruence. + intro. eapply exec_Istore; eauto. + (* Icall *) + assert (find_function tge ros rs = Some (transf_function f)). + generalize H0; unfold find_function; destruct ros. + apply functions_translated. + rewrite symbols_preserved. destruct (Genv.find_symbol ge i). + apply function_ptr_translated. congruence. + assert (sig = fn_sig (transf_function f)). + rewrite H1. unfold transf_function. destruct (analyze f); reflexivity. + split; [idtac| intros; TransfInstr]. + eapply exec_Icall; eauto. + set (ros' := + match ros with + | inl r => + match D.get r approxs !! pc with + | Novalue => ros + | Unknown => ros + | I _ => ros + | F _ => ros + | S symb ofs => if Int.eq ofs Int.zero then inr reg symb else ros + end + | inr _ => ros + end). + intros; eapply exec_Icall; eauto. + unfold ros'; destruct ros; auto. + caseEq (D.get r approxs!!pc); intros; auto. + generalize (Int.eq_spec i0 Int.zero); case (Int.eq i0 Int.zero); intros; auto. + generalize (MATCH r). rewrite H7. intros [b [A B]]. + rewrite <- symbols_preserved in A. + generalize H4. simpl. rewrite A. rewrite B. subst i0. + rewrite Genv.find_funct_find_funct_ptr. auto. + + (* Icond, true *) + split; [idtac| intros; TransfInstr]. + eapply exec_Icond_true; eauto. + caseEq (cond_strength_reduction approxs!!pc cond args); + intros cond' args' CSR. + assert (eval_condition cond' rs##args' = Some true). + generalize (cond_strength_reduction_correct + ge approxs!!pc rs MATCH cond args). + rewrite CSR. intro. congruence. + caseEq (eval_static_condition cond (approx_regs args approxs!!pc)). + intros b ESC. + generalize (eval_static_condition_correct ge cond _ _ _ + (approx_regs_val_list _ _ _ args MATCH) ESC); intro. + replace b with true. intro; eapply exec_Inop; eauto. congruence. + intros. eapply exec_Icond_true; eauto. + + (* Icond, false *) + split; [idtac| intros; TransfInstr]. + eapply exec_Icond_false; eauto. + caseEq (cond_strength_reduction approxs!!pc cond args); + intros cond' args' CSR. + assert (eval_condition cond' rs##args' = Some false). + generalize (cond_strength_reduction_correct + ge approxs!!pc rs MATCH cond args). + rewrite CSR. intro. congruence. + caseEq (eval_static_condition cond (approx_regs args approxs!!pc)). + intros b ESC. + generalize (eval_static_condition_correct ge cond _ _ _ + (approx_regs_val_list _ _ _ args MATCH) ESC); intro. + replace b with false. intro; eapply exec_Inop; eauto. congruence. + intros. eapply exec_Icond_false; eauto. + + (* refl *) + split. apply exec_refl. intros. apply exec_refl. + (* one *) + elim H0; intros. + split. apply exec_one; auto. + intros. apply exec_one. eapply H2; eauto. + (* trans *) + elim H0; intros. elim H2; intros. + split. + apply exec_trans with pc2 rs2 m2; auto. + intros; apply exec_trans with pc2 rs2 m2. + eapply H4; eauto. eapply H6; eauto. + eapply analyze_correct_2; eauto. + + (* function *) + elim H1; intros. + unfold transf_function. + caseEq (analyze f). + intros approxs ANL. + eapply exec_funct; simpl; eauto. + eapply H5. reflexivity. auto. + apply analyze_correct_3; auto. + TransfInstr; auto. + intros. eapply exec_funct; eauto. +Qed. + +(** The preservation of the observable behavior of the program then + follows. *) + +Theorem transf_program_correct: + forall (r: val), + exec_program prog r -> exec_program tprog r. +Proof. + intros r [b [f [m [SYMB [FIND [SIG EXEC]]]]]]. + red. exists b. exists (transf_function f). exists m. + split. change (prog_main tprog) with (prog_main prog). + rewrite symbols_preserved. auto. + split. apply function_ptr_translated; auto. + split. unfold transf_function. + rewrite <- SIG. destruct (analyze f); reflexivity. + apply transf_funct_correct. + unfold tprog, transf_program. rewrite Genv.init_mem_transf. + exact EXEC. +Qed. + +End PRESERVATION. diff --git a/backend/Conventions.v b/backend/Conventions.v new file mode 100644 index 00000000..99cc9338 --- /dev/null +++ b/backend/Conventions.v @@ -0,0 +1,690 @@ +(** Function calling conventions and other conventions regarding the use of + machine registers and stack slots. *) + +Require Import Coqlib. +Require Import AST. +Require Import Locations. + +(** * Classification of machine registers *) + +(** Machine registers (type [mreg] in module [Locations]) are divided in + the following groups: +- Temporaries used for spilling, reloading, and parallel move operations. +- Allocatable registers, that can be assigned to RTL pseudo-registers. + These are further divided into: +-- Callee-save registers, whose value is preserved across a function call. +-- Caller-save registers that can be modified during a function call. + + We follow the PowerPC application binary interface (ABI) in our choice + of callee- and caller-save registers. +*) + +Definition destroyed_at_call_regs := + R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: + F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: nil. + +Definition destroyed_at_call := + List.map R destroyed_at_call_regs. + +Definition int_callee_save_regs := + R13 :: R14 :: R15 :: R16 :: R17 :: R18 :: R19 :: R20 :: R21 :: R22 :: + R23 :: R24 :: R25 :: R26 :: R27 :: R28 :: R29 :: R30 :: R31 :: nil. + +Definition float_callee_save_regs := + F14 :: F15 :: F16 :: F17 :: F18 :: F19 :: F20 :: F21 :: F22 :: + F23 :: F24 :: F25 :: F26 :: F27 :: F28 :: F29 :: F30 :: F31 :: nil. + +Definition temporaries := + R IT1 :: R IT2 :: R IT3 :: R FT1 :: R FT2 :: R FT3 :: nil. + +(** The [index_int_callee_save] and [index_float_callee_save] associate + a unique positive integer to callee-save registers. This integer is + used in [Stacking] to determine where to save these registers in + the activation record if they are used by the current function. *) + +Definition index_int_callee_save (r: mreg) := + match r with + | R13 => 0 | R14 => 1 | R15 => 2 | R16 => 3 + | R17 => 4 | R18 => 5 | R19 => 6 | R20 => 7 + | R21 => 8 | R22 => 9 | R23 => 10 | R24 => 11 + | R25 => 12 | R26 => 13 | R27 => 14 | R28 => 15 + | R29 => 16 | R30 => 17 | R31 => 18 | _ => -1 + end. + +Definition index_float_callee_save (r: mreg) := + match r with + | F14 => 0 | F15 => 1 | F16 => 2 | F17 => 3 + | F18 => 4 | F19 => 5 | F20 => 6 | F21 => 7 + | F22 => 8 | F23 => 9 | F24 => 10 | F25 => 11 + | F26 => 12 | F27 => 13 | F28 => 14 | F29 => 15 + | F30 => 16 | F31 => 17 | _ => -1 + end. + +Ltac ElimOrEq := + match goal with + | |- (?x = ?y) \/ _ -> _ => + let H := fresh in + (intro H; elim H; clear H; + [intro H; rewrite <- H; clear H | ElimOrEq]) + | |- False -> _ => + let H := fresh in (intro H; contradiction) + end. + +Ltac OrEq := + match goal with + | |- (?x = ?x) \/ _ => left; reflexivity + | |- (?x = ?y) \/ _ => right; OrEq + | |- False => fail + end. + +Ltac NotOrEq := + match goal with + | |- (?x = ?y) \/ _ -> False => + let H := fresh in ( + intro H; elim H; clear H; [intro; discriminate | NotOrEq]) + | |- False -> False => + contradiction + end. + +Lemma index_int_callee_save_pos: + forall r, In r int_callee_save_regs -> index_int_callee_save r >= 0. +Proof. + intro r. simpl; ElimOrEq; unfold index_int_callee_save; omega. +Qed. + +Lemma index_float_callee_save_pos: + forall r, In r float_callee_save_regs -> index_float_callee_save r >= 0. +Proof. + intro r. simpl; ElimOrEq; unfold index_float_callee_save; omega. +Qed. + +Lemma index_int_callee_save_pos2: + forall r, index_int_callee_save r >= 0 -> In r int_callee_save_regs. +Proof. + destruct r; simpl; intro; omegaContradiction || OrEq. +Qed. + +Lemma index_float_callee_save_pos2: + forall r, index_float_callee_save r >= 0 -> In r float_callee_save_regs. +Proof. + destruct r; simpl; intro; omegaContradiction || OrEq. +Qed. + +Lemma index_int_callee_save_inj: + forall r1 r2, + In r1 int_callee_save_regs -> + In r2 int_callee_save_regs -> + r1 <> r2 -> + index_int_callee_save r1 <> index_int_callee_save r2. +Proof. + intros r1 r2. + simpl; ElimOrEq; ElimOrEq; unfold index_int_callee_save; + intros; congruence. +Qed. + +Lemma index_float_callee_save_inj: + forall r1 r2, + In r1 float_callee_save_regs -> + In r2 float_callee_save_regs -> + r1 <> r2 -> + index_float_callee_save r1 <> index_float_callee_save r2. +Proof. + intros r1 r2. + simpl; ElimOrEq; ElimOrEq; unfold index_float_callee_save; + intros; congruence. +Qed. + +(** The following lemmas show that + (temporaries, destroyed at call, integer callee-save, float callee-save) + is a partition of the set of machine registers. *) + +Lemma int_float_callee_save_disjoint: + list_disjoint int_callee_save_regs float_callee_save_regs. +Proof. + red; intros r1 r2. simpl; ElimOrEq; ElimOrEq; discriminate. +Qed. + +Lemma register_classification: + forall r, + (In (R r) temporaries \/ In (R r) destroyed_at_call) \/ + (In r int_callee_save_regs \/ In r float_callee_save_regs). +Proof. + destruct r; + try (left; left; simpl; OrEq); + try (left; right; simpl; OrEq); + try (right; left; simpl; OrEq); + try (right; right; simpl; OrEq). +Qed. + +Lemma int_callee_save_not_destroyed: + forall r, + In (R r) temporaries \/ In (R r) destroyed_at_call -> + ~(In r int_callee_save_regs). +Proof. + intros; red; intros. elim H. + generalize H0. simpl; ElimOrEq; NotOrEq. + generalize H0. simpl; ElimOrEq; NotOrEq. +Qed. + +Lemma float_callee_save_not_destroyed: + forall r, + In (R r) temporaries \/ In (R r) destroyed_at_call -> + ~(In r float_callee_save_regs). +Proof. + intros; red; intros. elim H. + generalize H0. simpl; ElimOrEq; NotOrEq. + generalize H0. simpl; ElimOrEq; NotOrEq. +Qed. + +Lemma int_callee_save_type: + forall r, In r int_callee_save_regs -> mreg_type r = Tint. +Proof. + intro. simpl; ElimOrEq; reflexivity. +Qed. + +Lemma float_callee_save_type: + forall r, In r float_callee_save_regs -> mreg_type r = Tfloat. +Proof. + intro. simpl; ElimOrEq; reflexivity. +Qed. + +Ltac NoRepet := + match goal with + | |- list_norepet nil => + apply list_norepet_nil + | |- list_norepet (?a :: ?b) => + apply list_norepet_cons; [simpl; intuition discriminate | NoRepet] + end. + +Lemma int_callee_save_norepet: + list_norepet int_callee_save_regs. +Proof. + unfold int_callee_save_regs; NoRepet. +Qed. + +Lemma float_callee_save_norepet: + list_norepet float_callee_save_regs. +Proof. + unfold float_callee_save_regs; NoRepet. +Qed. + +(** * Acceptable locations for register allocation *) + +(** The following predicate describes the locations that can be assigned + to an RTL pseudo-register during register allocation: a non-temporary + machine register or a [Local] stack slot are acceptable. *) + +Definition loc_acceptable (l: loc) : Prop := + match l with + | R r => ~(In l temporaries) + | S (Local ofs ty) => ofs >= 0 + | S (Incoming _ _) => False + | S (Outgoing _ _) => False + end. + +Definition locs_acceptable (ll: list loc) : Prop := + forall l, In l ll -> loc_acceptable l. + +Lemma temporaries_not_acceptable: + forall l, loc_acceptable l -> Loc.notin l temporaries. +Proof. + unfold loc_acceptable; destruct l. + simpl. intuition congruence. + destruct s; try contradiction. + intro. simpl. tauto. +Qed. +Hint Resolve temporaries_not_acceptable: locs. + +Lemma locs_acceptable_disj_temporaries: + forall ll, locs_acceptable ll -> Loc.disjoint ll temporaries. +Proof. + intros. apply Loc.notin_disjoint. intros. + apply temporaries_not_acceptable. auto. +Qed. + +(** * Function calling conventions *) + +(** The functions in this section determine the locations (machine registers + and stack slots) used to communicate arguments and results between the + caller and the callee during function calls. These locations are functions + of the signature of the function and of the call instruction. + Agreement between the caller and the callee on the locations to use + is guaranteed by our dynamic semantics for Cminor and RTL, which demand + that the signature of the call instruction is identical to that of the + called function. + + Calling conventions are largely arbitrary: they must respect the properties + proved in this section (such as no overlapping between the locations + of function arguments), but this leaves much liberty in choosing actual + locations. To ensure binary interoperability of code generated by our + compiler with libraries compiled by another PowerPC compiler, we + implement the standard conventions defined in the PowerPC application + binary interface. *) + +(** ** Location of function result *) + +(** The result value of a function is passed back to the caller in + registers [R3] or [F1], depending on the type of the returned value. + We treat a function without result as a function with one integer result. *) + +Definition loc_result (s: signature) : mreg := + match s.(sig_res) with + | None => R3 + | Some Tint => R3 + | Some Tfloat => F1 + end. + +(** The result location has the type stated in the signature. *) + +Lemma loc_result_type: + forall sig, + mreg_type (loc_result sig) = + match sig.(sig_res) with None => Tint | Some ty => ty end. +Proof. + intros; unfold loc_result. + destruct (sig_res sig). + destruct t; reflexivity. + reflexivity. +Qed. + +(** The result location is a caller-save register. *) + +Lemma loc_result_acceptable: + forall (s: signature), In (R (loc_result s)) destroyed_at_call. +Proof. + intros; unfold loc_result. + destruct (sig_res s). + destruct t; simpl; OrEq. + simpl; OrEq. +Qed. + +(** ** Location of function arguments *) + +(** The PowerPC ABI states the following convention for passing arguments + to a function: +- The first 8 integer arguments are passed in registers [R3] to [R10]. +- The first 10 float arguments are passed in registers [F1] to [F10]. +- Each float argument passed in a float register ``consumes'' two + integer arguments. +- Extra arguments are passed on the stack, in [Outgoing] slots, consecutively + assigned (1 word for an integer argument, 2 words for a float), + starting at word offset 6 (the bottom 24 bytes of the stack are reserved + for other purposes). +- Stack space is reserved (as unused [Outgoing] slots) for the arguments + that are passed in registers. + +These conventions are somewhat baroque, but they are mandated by the ABI. +*) + +Definition drop1 (x: list mreg) := + match x with nil => nil | hd :: tl => tl end. +Definition drop2 (x: list mreg) := + match x with nil => nil | hd :: nil => nil | hd1 :: hd2 :: tl => tl end. + +Fixpoint loc_arguments_rec + (tyl: list typ) (iregl: list mreg) (fregl: list mreg) + (ofs: Z) {struct tyl} : list loc := + match tyl with + | nil => nil + | Tint :: tys => + match iregl with + | nil => S (Outgoing ofs Tint) + | ireg :: _ => R ireg + end :: + loc_arguments_rec tys (drop1 iregl) fregl (ofs + 1) + | Tfloat :: tys => + match fregl with + | nil => S (Outgoing ofs Tfloat) + | freg :: _ => R freg + end :: + loc_arguments_rec tys (drop2 iregl) (drop1 fregl) (ofs + 2) + end. + +Definition int_param_regs := + R3 :: R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R10 :: nil. +Definition float_param_regs := + F1 :: F2 :: F3 :: F4 :: F5 :: F6 :: F7 :: F8 :: F9 :: F10 :: nil. + +(** [loc_arguments s] returns the list of locations where to store arguments + when calling a function with signature [s]. *) + +Definition loc_arguments (s: signature) : list loc := + loc_arguments_rec s.(sig_args) int_param_regs float_param_regs 6. + +(** [size_arguments s] returns the number of [Outgoing] slots used + to call a function with signature [s]. *) + +Fixpoint size_arguments_rec (tyl: list typ) : Z := + match tyl with + | nil => 6 + | Tint :: tys => 1 + size_arguments_rec tys + | Tfloat :: tys => 2 + size_arguments_rec tys + end. + +Definition size_arguments (s: signature) : Z := + size_arguments_rec s.(sig_args). + +(** Argument locations are either non-temporary registers or [Outgoing] + stack slots at offset 6 or more. *) + +Definition loc_argument_acceptable (l: loc) : Prop := + match l with + | R r => ~(In l temporaries) + | S (Outgoing ofs ty) => ofs >= 6 + | _ => False + end. + +Remark drop1_incl: + forall x l, In x (drop1 l) -> In x l. +Proof. + intros. destruct l. elim H. simpl in H. auto with coqlib. +Qed. +Remark drop2_incl: + forall x l, In x (drop2 l) -> In x l. +Proof. + intros. destruct l. elim H. destruct l. elim H. + simpl in H. auto with coqlib. +Qed. + +Remark loc_arguments_rec_charact: + forall tyl iregl fregl ofs l, + In l (loc_arguments_rec tyl iregl fregl ofs) -> + match l with + | R r => In r iregl \/ In r fregl + | S (Outgoing ofs' ty) => ofs' >= ofs + | S _ => False + end. +Proof. + induction tyl; simpl loc_arguments_rec; intros. + elim H. + destruct a; elim H; intros. + destruct iregl; subst l. omega. left; auto with coqlib. + generalize (IHtyl _ _ _ _ H0). + destruct l. intros [A|B]. left; apply drop1_incl; auto. tauto. + destruct s; try contradiction. omega. + destruct fregl; subst l. omega. right; auto with coqlib. + generalize (IHtyl _ _ _ _ H0). + destruct l. intros [A|B]. left; apply drop2_incl; auto. + right; apply drop1_incl; auto. + destruct s; try contradiction. omega. +Qed. + +Lemma loc_arguments_acceptable: + forall (s: signature) (r: loc), + In r (loc_arguments s) -> loc_argument_acceptable r. +Proof. + unfold loc_arguments; intros. + generalize (loc_arguments_rec_charact _ _ _ _ _ H). + destruct r. + intro H0; elim H0. simpl. unfold not. ElimOrEq; NotOrEq. + simpl. unfold not. ElimOrEq; NotOrEq. + destruct s0; try contradiction. + simpl. auto. +Qed. +Hint Resolve loc_arguments_acceptable: locs. + +(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *) + +Remark drop1_norepet: + forall l, list_norepet l -> list_norepet (drop1 l). +Proof. + intros. destruct l; simpl. constructor. inversion H. auto. +Qed. +Remark drop2_norepet: + forall l, list_norepet l -> list_norepet (drop2 l). +Proof. + intros. destruct l; simpl. constructor. + destruct l; simpl. constructor. inversion H. inversion H3. auto. +Qed. + +Remark loc_arguments_rec_notin_reg: + forall tyl iregl fregl ofs r, + ~(In r iregl) -> ~(In r fregl) -> + Loc.notin (R r) (loc_arguments_rec tyl iregl fregl ofs). +Proof. + induction tyl; simpl; intros. + auto. + destruct a; simpl; split. + destruct iregl. auto. red; intro; subst m. apply H. auto with coqlib. + apply IHtyl. red; intro. apply H. apply drop1_incl; auto. auto. + destruct fregl. auto. red; intro; subst m. apply H0. auto with coqlib. + apply IHtyl. red; intro. apply H. apply drop2_incl; auto. + red; intro. apply H0. apply drop1_incl; auto. +Qed. + +Remark loc_arguments_rec_notin_local: + forall tyl iregl fregl ofs ofs0 ty0, + Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs). +Proof. + induction tyl; simpl; intros. + auto. + destruct a; simpl; split. + destruct iregl. auto. auto. + apply IHtyl. + destruct fregl. auto. auto. + apply IHtyl. +Qed. + +Remark loc_arguments_rec_notin_outgoing: + forall tyl iregl fregl ofs ofs0 ty0, + ofs0 + typesize ty0 <= ofs -> + Loc.notin (S (Outgoing ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs). +Proof. + induction tyl; simpl; intros. + auto. + destruct a; simpl; split. + destruct iregl. omega. auto. + apply IHtyl. omega. + destruct fregl. omega. auto. + apply IHtyl. omega. +Qed. + +Lemma loc_arguments_norepet: + forall (s: signature), Loc.norepet (loc_arguments s). +Proof. + assert (forall tyl iregl fregl ofs, + list_norepet iregl -> + list_norepet fregl -> + list_disjoint iregl fregl -> + Loc.norepet (loc_arguments_rec tyl iregl fregl ofs)). + induction tyl; simpl; intros. + constructor. + destruct a; constructor. + destruct iregl. + apply loc_arguments_rec_notin_outgoing. simpl; omega. + apply loc_arguments_rec_notin_reg. simpl. inversion H. auto. + apply list_disjoint_notin with (m :: iregl); auto with coqlib. + apply IHtyl. apply drop1_norepet; auto. auto. + red; intros. apply H1. apply drop1_incl; auto. auto. + destruct fregl. + apply loc_arguments_rec_notin_outgoing. simpl; omega. + apply loc_arguments_rec_notin_reg. simpl. + red; intro. apply (H1 m m). apply drop2_incl; auto. + auto with coqlib. auto. + simpl. inversion H0. auto. + apply IHtyl. apply drop2_norepet; auto. apply drop1_norepet; auto. + red; intros. apply H1. apply drop2_incl; auto. apply drop1_incl; auto. + + intro. unfold loc_arguments. apply H. + unfold int_param_regs. NoRepet. + unfold float_param_regs. NoRepet. + red; intros x y; simpl. ElimOrEq; ElimOrEq; discriminate. +Qed. + +(** The offsets of [Outgoing] arguments are below [size_arguments s]. *) + +Lemma loc_arguments_bounded: + forall (s: signature) (ofs: Z) (ty: typ), + In (S (Outgoing ofs ty)) (loc_arguments s) -> + ofs + typesize ty <= size_arguments s. +Proof. + intros. + assert (forall tyl, size_arguments_rec tyl >= 6). + induction tyl; unfold size_arguments_rec; fold size_arguments_rec; intros. + omega. + destruct a; omega. + assert (forall tyl iregl fregl ofs0, + In (S (Outgoing ofs ty)) (loc_arguments_rec tyl iregl fregl ofs0) -> + ofs + typesize ty <= size_arguments_rec tyl + ofs0 - 6). + induction tyl; simpl loc_arguments_rec; intros. + elim H1. + unfold size_arguments_rec; fold size_arguments_rec. + destruct a. + elim H1; intro. destruct iregl; simplify_eq H2; intros. + subst ty; subst ofs. generalize (H0 tyl). simpl typesize. omega. + generalize (IHtyl _ _ _ H2). omega. + elim H1; intro. destruct fregl; simplify_eq H2; intros. + subst ty; subst ofs. generalize (H0 tyl). simpl typesize. omega. + generalize (IHtyl _ _ _ H2). omega. + replace (size_arguments s) with (size_arguments s + 6 - 6). + unfold size_arguments. eapply H1. unfold loc_arguments in H. eauto. + omega. +Qed. + +(** Temporary registers do not overlap with argument locations. *) + +Lemma loc_arguments_not_temporaries: + forall sig, Loc.disjoint (loc_arguments sig) temporaries. +Proof. + intros; red; intros x1 x2 H. + generalize (loc_arguments_rec_charact _ _ _ _ _ H). + destruct x1. + intro H0; elim H0; simpl; (ElimOrEq; ElimOrEq; congruence). + destruct s; try contradiction. intro. + simpl; ElimOrEq; auto. +Qed. +Hint Resolve loc_arguments_not_temporaries: locs. + +(** Callee-save registers do not overlap with argument locations. *) + +Lemma arguments_not_preserved: + forall sig l, + Loc.notin l destroyed_at_call -> loc_acceptable l -> + Loc.notin l (loc_arguments sig). +Proof. + intros. unfold loc_arguments. destruct l. + apply loc_arguments_rec_notin_reg. + generalize (Loc.notin_not_in _ _ H). intro; red; intro. + apply H1. generalize H2. simpl. ElimOrEq; OrEq. + generalize (Loc.notin_not_in _ _ H). intro; red; intro. + apply H1. generalize H2. simpl. ElimOrEq; OrEq. + destruct s; simpl in H0; try contradiction. + apply loc_arguments_rec_notin_local. +Qed. +Hint Resolve arguments_not_preserved: locs. + +(** Argument locations agree in number with the function signature. *) + +Lemma loc_arguments_length: + forall sig, + List.length (loc_arguments sig) = List.length sig.(sig_args). +Proof. + assert (forall tyl iregl fregl ofs, + List.length (loc_arguments_rec tyl iregl fregl ofs) = List.length tyl). + induction tyl; simpl; intros. + auto. + destruct a; simpl; decEq; auto. + intros. unfold loc_arguments. auto. +Qed. + +(** Argument locations agree in types with the function signature. *) + +Lemma loc_arguments_type: + forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args). +Proof. + assert (forall tyl iregl fregl ofs, + (forall r, In r iregl -> mreg_type r = Tint) -> + (forall r, In r fregl -> mreg_type r = Tfloat) -> + List.map Loc.type (loc_arguments_rec tyl iregl fregl ofs) = tyl). + induction tyl; simpl; intros. + auto. + destruct a; simpl; apply (f_equal2 (@cons typ)). + destruct iregl. reflexivity. simpl. apply H. auto with coqlib. + apply IHtyl. + intros. apply H. apply drop1_incl. auto. auto. + destruct fregl. reflexivity. simpl. apply H0. auto with coqlib. + apply IHtyl. + intros. apply H. apply drop2_incl. auto. + intros. apply H0. apply drop1_incl. auto. + + intros. unfold loc_arguments. apply H. + intro; simpl. ElimOrEq; reflexivity. + intro; simpl. ElimOrEq; reflexivity. +Qed. + +(** There is no partial overlap between an argument location and an + acceptable location: they are either identical or disjoint. *) + +Lemma no_overlap_arguments: + forall args sg, + locs_acceptable args -> + Loc.no_overlap args (loc_arguments sg). +Proof. + unfold Loc.no_overlap; intros. + generalize (H r H0). + generalize (loc_arguments_acceptable _ _ H1). + destruct s; destruct r; simpl. + intros. case (mreg_eq m0 m); intro. left; congruence. tauto. + intros. right; destruct s; auto. + intros. right. auto. + destruct s; try tauto. destruct s0; tauto. +Qed. + +(** ** Location of function parameters *) + +(** A function finds the values of its parameter in the same locations + where its caller stored them, except that the stack-allocated arguments, + viewed as [Outgoing] slots by the caller, are accessed via [Incoming] + slots (at the same offsets and types) in the callee. *) + +Definition parameter_of_argument (l: loc) : loc := + match l with + | S (Outgoing n ty) => S (Incoming n ty) + | _ => l + end. + +Definition loc_parameters (s: signature) := + List.map parameter_of_argument (loc_arguments s). + +Lemma loc_parameters_type: + forall sig, List.map Loc.type (loc_parameters sig) = sig.(sig_args). +Proof. + intros. unfold loc_parameters. + rewrite list_map_compose. + rewrite <- loc_arguments_type. + apply list_map_exten. + intros. destruct x; simpl. auto. + destruct s; reflexivity. +Qed. + +Lemma loc_parameters_not_temporaries: + forall sig, Loc.disjoint (loc_parameters sig) temporaries. +Proof. + intro; red; intros. + unfold loc_parameters in H. + elim (list_in_map_inv _ _ _ H). intros y [EQ IN]. + generalize (loc_arguments_not_temporaries sig y x2 IN H0). + subst x1. destruct x2. + destruct y; simpl. auto. destruct s; auto. + byContradiction. generalize H0. simpl. NotOrEq. +Qed. + +Lemma no_overlap_parameters: + forall params sg, + locs_acceptable params -> + Loc.no_overlap (loc_parameters sg) params. +Proof. + unfold Loc.no_overlap; intros. + unfold loc_parameters in H0. + elim (list_in_map_inv _ _ _ H0). intros t [EQ IN]. + rewrite EQ. + generalize (loc_arguments_acceptable _ _ IN). + generalize (H s H1). + destruct s; destruct t; simpl. + intros. case (mreg_eq m0 m); intro. left; congruence. tauto. + intros. right; destruct s; simpl; auto. + intros; right; auto. + destruct s; try tauto. destruct s0; try tauto. + intros; simpl. tauto. +Qed. + diff --git a/backend/Csharpminor.v b/backend/Csharpminor.v new file mode 100644 index 00000000..858d9454 --- /dev/null +++ b/backend/Csharpminor.v @@ -0,0 +1,511 @@ +(** Abstract syntax and semantics for the Csharpminor language. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. + +(** Abstract syntax *) + +(** Cminor is a low-level imperative language structured in expressions, + statements, functions and programs. Expressions include + reading and writing local variables, reading and writing store locations, + arithmetic operations, function calls, and conditional expressions + (similar to [e1 ? e2 : e3] in C). The [Elet] and [Eletvar] constructs + enable sharing the computations of subexpressions. De Bruijn notation + is used: [Eletvar n] refers to the value bound by then [n+1]-th enclosing + [Elet] construct. + + Unlike in Cminor (the next intermediate language of the back-end), + Csharpminor local variables reside in memory, and their address can + be taken using [Eaddrof] expressions. + + Another difference with Cminor is that Csharpminor is entirely + processor-neutral. In particular, Csharpminor uses a standard set of + operations: it does not reflect processor-specific operations nor + addressing modes. *) + +Inductive operation : Set := + | Ointconst: int -> operation (**r integer constant *) + | Ofloatconst: float -> operation (**r floating-point constant *) + | Ocast8unsigned: operation (**r 8-bit zero extension *) + | Ocast8signed: operation (**r 8-bit sign extension *) + | Ocast16unsigned: operation (**r 16-bit zero extension *) + | Ocast16signed: operation (**r 16-bit sign extension *) + | Onotint: operation (**r bitwise complement *) + | Oadd: operation (**r integer addition *) + | Osub: operation (**r integer subtraction *) + | Omul: operation (**r integer multiplication *) + | Odiv: operation (**r integer signed division *) + | Odivu: operation (**r integer unsigned division *) + | Omod: operation (**r integer signed modulus *) + | Omodu: operation (**r integer unsigned modulus *) + | Oand: operation (**r bitwise ``and'' *) + | Oor: operation (**r bitwise ``or'' *) + | Oxor: operation (**r bitwise ``xor'' *) + | Oshl: operation (**r left shift *) + | Oshr: operation (**r right signed shift *) + | Oshru: operation (**r right unsigned shift *) + | Onegf: operation (**r float opposite *) + | Oabsf: operation (**r float absolute value *) + | Oaddf: operation (**r float addition *) + | Osubf: operation (**r float subtraction *) + | Omulf: operation (**r float multiplication *) + | Odivf: operation (**r float division *) + | Osingleoffloat: operation (**r float truncation *) + | Ointoffloat: operation (**r integer to float *) + | Ofloatofint: operation (**r float to signed integer *) + | Ofloatofintu: operation (**r float to unsigned integer *) + | Ocmp: comparison -> operation (**r integer signed comparison *) + | Ocmpu: comparison -> operation (**r integer unsigned comparison *) + | Ocmpf: comparison -> operation. (**r float comparison *) + +Inductive expr : Set := + | Evar : ident -> expr (**r reading a scalar variable *) + | Eaddrof : ident -> expr (**r taking the address of a variable *) + | Eassign : ident -> expr -> expr (**r assignment to a scalar variable *) + | Eop : operation -> exprlist -> expr (**r arithmetic operation *) + | Eload : memory_chunk -> expr -> expr (**r memory read *) + | Estore : memory_chunk -> expr -> expr -> expr (**r memory write *) + | Ecall : signature -> expr -> exprlist -> expr (**r function call *) + | Econdition : expr -> expr -> expr -> expr (**r conditional expression *) + | Elet : expr -> expr -> expr (**r let binding *) + | Eletvar : nat -> expr (**r reference to a let-bound variable *) + +with exprlist : Set := + | Enil: exprlist + | Econs: expr -> exprlist -> exprlist. + +(** Statements include expression evaluation, an if/then/else conditional, + infinite loops, blocks and early block exits, and early function returns. + [Sexit n] terminates prematurely the execution of the [n+1] enclosing + [Sblock] statements. *) + +Inductive stmt : Set := + | Sexpr: expr -> stmt + | Sifthenelse: expr -> stmtlist -> stmtlist -> stmt + | Sloop: stmtlist -> stmt + | Sblock: stmtlist -> stmt + | Sexit: nat -> stmt + | Sreturn: option expr -> stmt + +with stmtlist : Set := + | Snil: stmtlist + | Scons: stmt -> stmtlist -> stmtlist. + +(** The local variables of a function can be either scalar variables + (whose type, size and signedness are given by a [memory_chunk] + or array variables (of the indicated sizes). The only operation + permitted on an array variable is taking its address. *) + +Inductive local_variable : Set := + | LVscalar: memory_chunk -> local_variable + | LVarray: Z -> local_variable. + +(** Functions are composed of a signature, a list of parameter names + with associated memory chunks (parameters must be scalar), a list of + local variables with associated [local_variable] description, and a + list of statements representing the function body. *) + +Record function : Set := mkfunction { + fn_sig: signature; + fn_params: list (ident * memory_chunk); + fn_vars: list (ident * local_variable); + fn_body: stmtlist +}. + +Definition program := AST.program function. + +(** * Operational semantics *) + +(** The operational semantics for Csharpminor is given in big-step operational + style. Expressions evaluate to values, and statements evaluate to + ``outcomes'' indicating how execution should proceed afterwards. *) + +Inductive outcome: Set := + | Out_normal: outcome (**r continue in sequence *) + | Out_exit: nat -> outcome (**r terminate [n+1] enclosing blocks *) + | Out_return: option val -> outcome. (**r return immediately to caller *) + +Definition outcome_result_value + (out: outcome) (ot: option typ) (v: val) : Prop := + match out, ot with + | Out_normal, None => v = Vundef + | Out_return None, None => v = Vundef + | Out_return (Some v'), Some ty => v = v' + | _, _ => False + end. + +Definition outcome_block (out: outcome) : outcome := + match out with + | Out_normal => Out_normal + | Out_exit O => Out_normal + | Out_exit (S n) => Out_exit n + | Out_return optv => Out_return optv + end. + +(** Three kinds of evaluation environments are involved: +- [genv]: global environments, define symbols and functions; +- [env]: local environments, map local variables to memory blocks; +- [lenv]: let environments, map de Bruijn indices to values. +*) +Definition genv := Genv.t function. +Definition env := PTree.t (block * local_variable). +Definition empty_env : env := PTree.empty (block * local_variable). +Definition letenv := list val. + +Definition sizeof (lv: local_variable) : Z := + match lv with + | LVscalar chunk => size_chunk chunk + | LVarray sz => Zmax 0 sz + end. + +Definition fn_variables (f: function) := + List.map + (fun id_chunk => (fst id_chunk, LVscalar (snd id_chunk))) f.(fn_params) + ++ f.(fn_vars). + +Definition fn_params_names (f: function) := + List.map (@fst ident memory_chunk) f.(fn_params). + +Definition fn_vars_names (f: function) := + List.map (@fst ident local_variable) f.(fn_vars). + + +(** Evaluation of operator applications. *) + +Definition eval_compare_null (c: comparison) (n: int) : option val := + if Int.eq n Int.zero + then match c with Ceq => Some Vfalse | Cne => Some Vtrue | _ => None end + else None. + +Definition eval_operation (op: operation) (vl: list val) (m: mem): option val := + match op, vl with + | Ointconst n, nil => Some (Vint n) + | Ofloatconst n, nil => Some (Vfloat n) + | Ocast8unsigned, Vint n1 :: nil => Some (Vint (Int.cast8unsigned n1)) + | Ocast8signed, Vint n1 :: nil => Some (Vint (Int.cast8signed n1)) + | Ocast16unsigned, Vint n1 :: nil => Some (Vint (Int.cast16unsigned n1)) + | Ocast16signed, Vint n1 :: nil => Some (Vint (Int.cast16signed n1)) + | Onotint, Vint n1 :: nil => Some (Vint (Int.not n1)) + | Oadd, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.add n1 n2)) + | Oadd, Vint n1 :: Vptr b2 n2 :: nil => Some (Vptr b2 (Int.add n2 n1)) + | Oadd, Vptr b1 n1 :: Vint n2 :: nil => Some (Vptr b1 (Int.add n1 n2)) + | Osub, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.sub n1 n2)) + | Osub, Vptr b1 n1 :: Vint n2 :: nil => Some (Vptr b1 (Int.sub n1 n2)) + | Osub, Vptr b1 n1 :: Vptr b2 n2 :: nil => + if eq_block b1 b2 then Some (Vint (Int.sub n1 n2)) else None + | Omul, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.mul n1 n2)) + | Odiv, Vint n1 :: Vint n2 :: nil => + if Int.eq n2 Int.zero then None else Some (Vint (Int.divs n1 n2)) + | Odivu, Vint n1 :: Vint n2 :: nil => + if Int.eq n2 Int.zero then None else Some (Vint (Int.divu n1 n2)) + | Omod, Vint n1 :: Vint n2 :: nil => + if Int.eq n2 Int.zero then None else Some (Vint (Int.mods n1 n2)) + | Omodu, Vint n1 :: Vint n2 :: nil => + if Int.eq n2 Int.zero then None else Some (Vint (Int.modu n1 n2)) + | Oand, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.and n1 n2)) + | Oor, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.or n1 n2)) + | Oxor, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.xor n1 n2)) + | Oshl, Vint n1 :: Vint n2 :: nil => + if Int.ltu n2 (Int.repr 32) then Some (Vint (Int.shl n1 n2)) else None + | Oshr, Vint n1 :: Vint n2 :: nil => + if Int.ltu n2 (Int.repr 32) then Some (Vint (Int.shr n1 n2)) else None + | Oshru, Vint n1 :: Vint n2 :: nil => + if Int.ltu n2 (Int.repr 32) then Some (Vint (Int.shru n1 n2)) else None + | Onegf, Vfloat f1 :: nil => Some (Vfloat (Float.neg f1)) + | Oabsf, Vfloat f1 :: nil => Some (Vfloat (Float.abs f1)) + | Oaddf, Vfloat f1 :: Vfloat f2 :: nil => Some (Vfloat (Float.add f1 f2)) + | Osubf, Vfloat f1 :: Vfloat f2 :: nil => Some (Vfloat (Float.sub f1 f2)) + | Omulf, Vfloat f1 :: Vfloat f2 :: nil => Some (Vfloat (Float.mul f1 f2)) + | Odivf, Vfloat f1 :: Vfloat f2 :: nil => Some (Vfloat (Float.div f1 f2)) + | Osingleoffloat, Vfloat f1 :: nil => + Some (Vfloat (Float.singleoffloat f1)) + | Ointoffloat, Vfloat f1 :: nil => + Some (Vint (Float.intoffloat f1)) + | Ofloatofint, Vint n1 :: nil => + Some (Vfloat (Float.floatofint n1)) + | Ofloatofintu, Vint n1 :: nil => + Some (Vfloat (Float.floatofintu n1)) + | Ocmp c, Vint n1 :: Vint n2 :: nil => + Some (Val.of_bool(Int.cmp c n1 n2)) + | Ocmp c, Vptr b1 n1 :: Vptr b2 n2 :: nil => + if valid_pointer m b1 (Int.signed n1) + && valid_pointer m b2 (Int.signed n2) then + if eq_block b1 b2 then Some(Val.of_bool(Int.cmp c n1 n2)) else None + else + None + | Ocmp c, Vptr b1 n1 :: Vint n2 :: nil => eval_compare_null c n2 + | Ocmp c, Vint n1 :: Vptr b2 n2 :: nil => eval_compare_null c n1 + | Ocmpu c, Vint n1 :: Vint n2 :: nil => + Some (Val.of_bool(Int.cmpu c n1 n2)) + | Ocmpf c, Vfloat f1 :: Vfloat f2 :: nil => + Some (Val.of_bool (Float.cmp c f1 f2)) + | _, _ => None + end. + +(** ``Casting'' a value to a memory chunk. The value is truncated and + zero- or sign-extended as dictated by the memory chunk. *) + +Definition cast (chunk: memory_chunk) (v: val) : option val := + match chunk, v with + | Mint8signed, Vint n => Some (Vint (Int.cast8signed n)) + | Mint8unsigned, Vint n => Some (Vint (Int.cast8unsigned n)) + | Mint16signed, Vint n => Some (Vint (Int.cast16signed n)) + | Mint16unsigned, Vint n => Some (Vint (Int.cast16unsigned n)) + | Mint32, Vint n => Some(Vint n) + | Mint32, Vptr b ofs => Some(Vptr b ofs) + | Mfloat32, Vfloat f => Some(Vfloat(Float.singleoffloat f)) + | Mfloat64, Vfloat f => Some(Vfloat f) + | _, _ => None + end. + +(** Allocation of local variables at function entry. Each variable is + bound to the reference to a fresh block of the appropriate size. *) + +Inductive alloc_variables: env -> mem -> + list (ident * local_variable) -> + env -> mem -> list block -> Prop := + | alloc_variables_nil: + forall e m, + alloc_variables e m nil e m nil + | alloc_variables_cons: + forall e m id lv vars m1 b1 m2 e2 lb, + Mem.alloc m 0 (sizeof lv) = (m1, b1) -> + alloc_variables (PTree.set id (b1, lv) e) m1 vars e2 m2 lb -> + alloc_variables e m ((id, lv) :: vars) e2 m2 (b1 :: lb). + +(** Initialization of local variables that are parameters. The value + of the corresponding argument is stored into the memory block + bound to the parameter. *) + +Inductive bind_parameters: env -> + mem -> list (ident * memory_chunk) -> list val -> + mem -> Prop := + | bind_parameters_nil: + forall e m, + bind_parameters e m nil nil m + | bind_parameters_cons: + forall e m id chunk params v1 v2 vl b m1 m2, + PTree.get id e = Some(b, LVscalar chunk) -> + cast chunk v1 = Some v2 -> + Mem.store chunk m b 0 v2 = Some m1 -> + bind_parameters e m1 params vl m2 -> + bind_parameters e m ((id, chunk) :: params) (v1 :: vl) m2. + +Section RELSEM. + +Variable ge: genv. + +(** Evaluation of an expression: [eval_expr ge le e m a m' v] states + that expression [a], in initial memory state [m], evaluates to value + [v]. [m'] is the final memory state, respectively, reflecting + memory stores possibly performed by [a]. [ge], [e] and [le] are the + global environment, local environment and let environment + respectively. They do not change during evaluation. *) + +Inductive eval_expr: + letenv -> env -> + mem -> expr -> mem -> val -> Prop := + | eval_Evar: + forall le e m id b chunk v, + PTree.get id e = Some (b, LVscalar chunk) -> + Mem.load chunk m b 0 = Some v -> + eval_expr le e m (Evar id) m v + | eval_Eassign: + forall le e m id a m1 b chunk v1 v2 m2, + eval_expr le e m a m1 v1 -> + PTree.get id e = Some (b, LVscalar chunk) -> + cast chunk v1 = Some v2 -> + Mem.store chunk m1 b 0 v2 = Some m2 -> + eval_expr le e m (Eassign id a) m2 v2 + | eval_Eaddrof_local: + forall le e m id b lv, + PTree.get id e = Some (b, lv) -> + eval_expr le e m (Eaddrof id) m (Vptr b Int.zero) + | eval_Eaddrof_global: + forall le e m id b, + PTree.get id e = None -> + Genv.find_symbol ge id = Some b -> + eval_expr le e m (Eaddrof id) m (Vptr b Int.zero) + | eval_Eop: + forall le e m op al m1 vl v, + eval_exprlist le e m al m1 vl -> + eval_operation op vl m1 = Some v -> + eval_expr le e m (Eop op al) m1 v + | eval_Eload: + forall le e m chunk a m1 v1 v, + eval_expr le e m a m1 v1 -> + Mem.loadv chunk m1 v1 = Some v -> + eval_expr le e m (Eload chunk a) m1 v + | eval_Estore: + forall le e m chunk a b m1 v1 m2 v2 m3 v3, + eval_expr le e m a m1 v1 -> + eval_expr le e m1 b m2 v2 -> + cast chunk v2 = Some v3 -> + Mem.storev chunk m2 v1 v3 = Some m3 -> + eval_expr le e m (Estore chunk a b) m3 v3 + | eval_Ecall: + forall le e m sig a bl m1 m2 m3 vf vargs vres f, + eval_expr le e m a m1 vf -> + eval_exprlist le e m1 bl m2 vargs -> + Genv.find_funct ge vf = Some f -> + f.(fn_sig) = sig -> + eval_funcall m2 f vargs m3 vres -> + eval_expr le e m (Ecall sig a bl) m3 vres + | eval_Econdition_true: + forall le e m a b c m1 v1 m2 v2, + eval_expr le e m a m1 v1 -> + Val.is_true v1 -> + eval_expr le e m1 b m2 v2 -> + eval_expr le e m (Econdition a b c) m2 v2 + | eval_Econdition_false: + forall le e m a b c m1 v1 m2 v2, + eval_expr le e m a m1 v1 -> + Val.is_false v1 -> + eval_expr le e m1 c m2 v2 -> + eval_expr le e m (Econdition a b c) m2 v2 + | eval_Elet: + forall le e m a b m1 v1 m2 v2, + eval_expr le e m a m1 v1 -> + eval_expr (v1::le) e m1 b m2 v2 -> + eval_expr le e m (Elet a b) m2 v2 + | eval_Eletvar: + forall le e m n v, + nth_error le n = Some v -> + eval_expr le e m (Eletvar n) m v + +(** Evaluation of a list of expressions: + [eval_exprlist ge le al m a m' vl] + states that the list [al] of expressions evaluate + to the list [vl] of values. + The other parameters are as in [eval_expr]. +*) + +with eval_exprlist: + letenv -> env -> + mem -> exprlist -> + mem -> list val -> Prop := + | eval_Enil: + forall le e m, + eval_exprlist le e m Enil m nil + | eval_Econs: + forall le e m a bl m1 v m2 vl, + eval_expr le e m a m1 v -> + eval_exprlist le e m1 bl m2 vl -> + eval_exprlist le e m (Econs a bl) m2 (v :: vl) + +(** Evaluation of a function invocation: [eval_funcall ge m f args m' res] + means that the function [f], applied to the arguments [args] in + memory state [m], returns the value [res] in modified memory state [m']. +*) +with eval_funcall: + mem -> function -> list val -> + mem -> val -> Prop := + | eval_funcall_intro: + forall m f vargs e m1 lb m2 m3 out vres, + list_norepet (fn_params_names f ++ fn_vars_names f) -> + alloc_variables empty_env m (fn_variables f) e m1 lb -> + bind_parameters e m1 f.(fn_params) vargs m2 -> + exec_stmtlist e m2 f.(fn_body) m3 out -> + outcome_result_value out f.(fn_sig).(sig_res) vres -> + eval_funcall m f vargs (Mem.free_list m3 lb) vres + +(** Execution of a statement: [exec_stmt ge e m s m' out] + means that statement [s] executes with outcome [out]. + The other parameters are as in [eval_expr]. *) + +with exec_stmt: + env -> + mem -> stmt -> + mem -> outcome -> Prop := + | exec_Sexpr: + forall e m a m1 v, + eval_expr nil e m a m1 v -> + exec_stmt e m (Sexpr a) m1 Out_normal + | exec_Sifthenelse_true: + forall e m a sl1 sl2 m1 v1 m2 out, + eval_expr nil e m a m1 v1 -> + Val.is_true v1 -> + exec_stmtlist e m1 sl1 m2 out -> + exec_stmt e m (Sifthenelse a sl1 sl2) m2 out + | exec_Sifthenelse_false: + forall e m a sl1 sl2 m1 v1 m2 out, + eval_expr nil e m a m1 v1 -> + Val.is_false v1 -> + exec_stmtlist e m1 sl2 m2 out -> + exec_stmt e m (Sifthenelse a sl1 sl2) m2 out + | exec_Sloop_loop: + forall e m sl m1 m2 out, + exec_stmtlist e m sl m1 Out_normal -> + exec_stmt e m1 (Sloop sl) m2 out -> + exec_stmt e m (Sloop sl) m2 out + | exec_Sloop_stop: + forall e m sl m1 out, + exec_stmtlist e m sl m1 out -> + out <> Out_normal -> + exec_stmt e m (Sloop sl) m1 out + | exec_Sblock: + forall e m sl m1 out, + exec_stmtlist e m sl m1 out -> + exec_stmt e m (Sblock sl) m1 (outcome_block out) + | exec_Sexit: + forall e m n, + exec_stmt e m (Sexit n) m (Out_exit n) + | exec_Sreturn_none: + forall e m, + exec_stmt e m (Sreturn None) m (Out_return None) + | exec_Sreturn_some: + forall e m a m1 v, + eval_expr nil e m a m1 v -> + exec_stmt e m (Sreturn (Some a)) m1 (Out_return (Some v)) + +(** Execution of a list of statements: [exec_stmtlist ge e m sl m' out] + means that the list [sl] of statements executes sequentially + with outcome [out]. Execution stops at the first statement that + leads an outcome different from [Out_normal]. + The other parameters are as in [eval_expr]. *) + +with exec_stmtlist: + env -> + mem -> stmtlist -> + mem -> outcome -> Prop := + | exec_Snil: + forall e m, + exec_stmtlist e m Snil m Out_normal + | exec_Scons_continue: + forall e m s sl m1 m2 out, + exec_stmt e m s m1 Out_normal -> + exec_stmtlist e m1 sl m2 out -> + exec_stmtlist e m (Scons s sl) m2 out + | exec_Scons_stop: + forall e m s sl m1 out, + exec_stmt e m s m1 out -> + out <> Out_normal -> + exec_stmtlist e m (Scons s sl) m1 out. + +Scheme eval_expr_ind5 := Minimality for eval_expr Sort Prop + with eval_exprlist_ind5 := Minimality for eval_exprlist Sort Prop + with eval_funcall_ind5 := Minimality for eval_funcall Sort Prop + with exec_stmt_ind5 := Minimality for exec_stmt Sort Prop + with exec_stmtlist_ind5 := Minimality for exec_stmtlist Sort Prop. + +End RELSEM. + +(** Execution of a whole program: [exec_program p r] + holds if the application of [p]'s main function to no arguments + in the initial memory state for [p] eventually returns value [r]. *) + +Definition exec_program (p: program) (r: val) : Prop := + let ge := Genv.globalenv p in + let m0 := Genv.init_mem p in + exists b, exists f, exists m, + Genv.find_symbol ge p.(prog_main) = Some b /\ + Genv.find_funct_ptr ge b = Some f /\ + f.(fn_sig) = mksignature nil (Some Tint) /\ + eval_funcall ge m0 f nil m r. + diff --git a/backend/Globalenvs.v b/backend/Globalenvs.v new file mode 100644 index 00000000..55afc353 --- /dev/null +++ b/backend/Globalenvs.v @@ -0,0 +1,587 @@ +(** Global environments are a component of the dynamic semantics of + all languages involved in the compiler. A global environment + maps symbol names (names of functions and of global variables) + to the corresponding memory addresses. It also maps memory addresses + of functions to the corresponding function descriptions. + + Global environments, along with the initial memory state at the beginning + of program execution, are built from the program of interest, as follows: +- A distinct memory address is assigned to each function of the program. + These function addresses use negative numbers to distinguish them from + addresses of memory blocks. The associations of function name to function + address and function address to function description are recorded in + the global environment. +- For each global variable, a memory block is allocated and associated to + the name of the variable. + + These operations reflect (at a high level of abstraction) what takes + place during program linking and program loading in a real operating + system. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Mem. + +Set Implicit Arguments. + +Module Type GENV. + +(** ** Types and operations *) + + Variable t: Set -> Set. + (** The type of global environments. The parameter [F] is the type + of function descriptions. *) + + Variable globalenv: forall (F: Set), program F -> t F. + (** Return the global environment for the given program. *) + + Variable init_mem: forall (F: Set), program F -> mem. + (** Return the initial memory state for the given program. *) + + Variable find_funct_ptr: forall (F: Set), t F -> block -> option F. + (** Return the function description associated with the given address, + if any. *) + + Variable find_funct: forall (F: Set), t F -> val -> option F. + (** Same as [find_funct_ptr] but the function address is given as + a value, which must be a pointer with offset 0. *) + + Variable find_symbol: forall (F: Set), t F -> ident -> option block. + (** Return the address of the given global symbol, if any. *) + +(** ** Properties of the operations. *) + + Hypothesis find_funct_inv: + forall (F: Set) (ge: t F) (v: val) (f: F), + find_funct ge v = Some f -> exists b, v = Vptr b Int.zero. + Hypothesis find_funct_find_funct_ptr: + forall (F: Set) (ge: t F) (b: block), + find_funct ge (Vptr b Int.zero) = find_funct_ptr ge b. + Hypothesis find_funct_ptr_prop: + forall (F: Set) (P: F -> Prop) (p: program F) (b: block) (f: F), + (forall id f, In (id, f) (prog_funct p) -> P f) -> + find_funct_ptr (globalenv p) b = Some f -> + P f. + Hypothesis find_funct_prop: + forall (F: Set) (P: F -> Prop) (p: program F) (v: val) (f: F), + (forall id f, In (id, f) (prog_funct p) -> P f) -> + find_funct (globalenv p) v = Some f -> + P f. + Hypothesis initmem_nullptr: + forall (F: Set) (p: program F), + let m := init_mem p in + valid_block m nullptr /\ + m.(blocks) nullptr = empty_block 0 0. + Hypothesis initmem_undef: + forall (F: Set) (p: program F) (b: block), + exists lo, exists hi, + (init_mem p).(blocks) b = empty_block lo hi. + Hypothesis find_funct_ptr_inv: + forall (F: Set) (p: program F) (b: block) (f: F), + find_funct_ptr (globalenv p) b = Some f -> b < 0. + Hypothesis find_symbol_inv: + forall (F: Set) (p: program F) (id: ident) (b: block), + find_symbol (globalenv p) id = Some b -> b < nextblock (init_mem p). + +(** Commutation properties between program transformations + and operations over global environments. *) + + Hypothesis find_funct_ptr_transf: + forall (A B: Set) (transf: A -> B) (p: program A) (b: block) (f: A), + find_funct_ptr (globalenv p) b = Some f -> + find_funct_ptr (globalenv (transform_program transf p)) b = Some (transf f). + Hypothesis find_funct_transf: + forall (A B: Set) (transf: A -> B) (p: program A) (v: val) (f: A), + find_funct (globalenv p) v = Some f -> + find_funct (globalenv (transform_program transf p)) v = Some (transf f). + Hypothesis find_symbol_transf: + forall (A B: Set) (transf: A -> B) (p: program A) (s: ident), + find_symbol (globalenv (transform_program transf p)) s = + find_symbol (globalenv p) s. + Hypothesis init_mem_transf: + forall (A B: Set) (transf: A -> B) (p: program A), + init_mem (transform_program transf p) = init_mem p. + +(** Commutation properties between partial program transformations + and operations over global environments. *) + + Hypothesis find_funct_ptr_transf_partial: + forall (A B: Set) (transf: A -> option B) + (p: program A) (p': program B), + transform_partial_program transf p = Some p' -> + forall (b: block) (f: A), + find_funct_ptr (globalenv p) b = Some f -> + find_funct_ptr (globalenv p') b = transf f /\ transf f <> None. + Hypothesis find_funct_transf_partial: + forall (A B: Set) (transf: A -> option B) + (p: program A) (p': program B), + transform_partial_program transf p = Some p' -> + forall (v: val) (f: A), + find_funct (globalenv p) v = Some f -> + find_funct (globalenv p') v = transf f /\ transf f <> None. + Hypothesis find_symbol_transf_partial: + forall (A B: Set) (transf: A -> option B) + (p: program A) (p': program B), + transform_partial_program transf p = Some p' -> + forall (s: ident), + find_symbol (globalenv p') s = find_symbol (globalenv p) s. + Hypothesis init_mem_transf_partial: + forall (A B: Set) (transf: A -> option B) + (p: program A) (p': program B), + transform_partial_program transf p = Some p' -> + init_mem p' = init_mem p. +End GENV. + +(** The rest of this library is a straightforward implementation of + the module signature above. *) + +Module Genv: GENV. + +Section GENV. + +Variable funct: Set. (* The type of functions *) + +Record genv : Set := mkgenv { + functions: ZMap.t (option funct); (* mapping function ptr -> function *) + nextfunction: Z; + symbols: PTree.t block (* mapping symbol -> block *) +}. + +Definition t := genv. + +Definition add_funct (name_fun: (ident * funct)) (g: genv) : genv := + let b := g.(nextfunction) in + mkgenv (ZMap.set b (Some (snd name_fun)) g.(functions)) + (Zpred b) + (PTree.set (fst name_fun) b g.(symbols)). + +Definition add_symbol (name: ident) (b: block) (g: genv) : genv := + mkgenv g.(functions) + g.(nextfunction) + (PTree.set name b g.(symbols)). + +Definition find_funct_ptr (g: genv) (b: block) : option funct := + ZMap.get b g.(functions). + +Definition find_funct (g: genv) (v: val) : option funct := + match v with + | Vptr b ofs => + if Int.eq ofs Int.zero then find_funct_ptr g b else None + | _ => + None + end. + +Definition find_symbol (g: genv) (symb: ident) : option block := + PTree.get symb g.(symbols). + +Lemma find_funct_inv: + forall (ge: t) (v: val) (f: funct), + find_funct ge v = Some f -> exists b, v = Vptr b Int.zero. +Proof. + intros until f. unfold find_funct. destruct v; try (intros; discriminate). + generalize (Int.eq_spec i Int.zero). case (Int.eq i Int.zero); intros. + exists b. congruence. + discriminate. +Qed. + +Lemma find_funct_find_funct_ptr: + forall (ge: t) (b: block), + find_funct ge (Vptr b Int.zero) = find_funct_ptr ge b. +Proof. + intros. simpl. + generalize (Int.eq_spec Int.zero Int.zero). + case (Int.eq Int.zero Int.zero); intros. + auto. tauto. +Qed. + +(* Construct environment and initial memory store *) + +Definition empty : genv := + mkgenv (ZMap.init None) (-1) (PTree.empty block). + +Definition add_functs (init: genv) (fns: list (ident * funct)) : genv := + List.fold_right add_funct init fns. + +Definition add_globals + (init: genv * mem) (vars: list (ident * Z)) : genv * mem := + List.fold_right + (fun (id_sz: ident * Z) (g_st: genv * mem) => + let (id, sz) := id_sz in + let (g, st) := g_st in + let (st', b) := Mem.alloc st 0 sz in + (add_symbol id b g, st')) + init vars. + +Definition globalenv_initmem (p: program funct) : (genv * mem) := + add_globals + (add_functs empty p.(prog_funct), Mem.empty) + p.(prog_vars). + +Definition globalenv (p: program funct) : genv := + fst (globalenv_initmem p). +Definition init_mem (p: program funct) : mem := + snd (globalenv_initmem p). + +Lemma functions_globalenv: + forall (p: program funct), + functions (globalenv p) = functions (add_functs empty p.(prog_funct)). +Proof. + assert (forall (init: genv * mem) (vars: list (ident * Z)), + functions (fst (add_globals init vars)) = functions (fst init)). + induction vars; simpl. + reflexivity. + destruct a. destruct (add_globals init vars). + simpl. exact IHvars. + + unfold add_globals; simpl. + intros. unfold globalenv; unfold globalenv_initmem. + rewrite H. reflexivity. +Qed. + +Lemma initmem_nullptr: + forall (p: program funct), + let m := init_mem p in + valid_block m nullptr /\ + m.(blocks) nullptr = mkblock 0 0 (fun y => Undef) (undef_undef_outside 0 0). +Proof. + assert + (forall (init: genv * mem), + let m1 := snd init in + 0 < m1.(nextblock) -> + m1.(blocks) nullptr = mkblock 0 0 (fun y => Undef) (undef_undef_outside 0 0) -> + forall (vars: list (ident * Z)), + let m2 := snd (add_globals init vars) in + 0 < m2.(nextblock) /\ + m2.(blocks) nullptr = mkblock 0 0 (fun y => Undef) (undef_undef_outside 0 0)). + induction vars; simpl; intros. + tauto. + destruct a. + caseEq (add_globals init vars). intros g m2 EQ. + rewrite EQ in IHvars. simpl in IHvars. elim IHvars; intros. + simpl. split. omega. + rewrite update_o. auto. apply sym_not_equal. apply Zlt_not_eq. exact H1. + + intro. unfold init_mem. unfold globalenv_initmem. + unfold valid_block. apply H. simpl. omega. reflexivity. +Qed. + +Lemma initmem_undef: + forall (p: program funct) (b: block), + exists lo, exists hi, + (init_mem p).(blocks) b = empty_block lo hi. +Proof. + assert (forall g0 vars g1 m b, + add_globals (g0, Mem.empty) vars = (g1, m) -> + exists lo, exists hi, + m.(blocks) b = empty_block lo hi). + induction vars; simpl. + intros. inversion H. unfold Mem.empty; simpl. + exists 0; exists 0. auto. + destruct a. caseEq (add_globals (g0, Mem.empty) vars). intros g1 m1 EQ. + intros g m b EQ1. injection EQ1; intros EQ2 EQ3; clear EQ1. + rewrite <- EQ2; simpl. unfold update. + case (zeq b (nextblock m1)); intro. + exists 0; exists z; auto. + eauto. + intros. caseEq (globalenv_initmem p). + intros g m EQ. unfold init_mem; rewrite EQ; simpl. + unfold globalenv_initmem in EQ. eauto. +Qed. + +Remark nextfunction_add_functs_neg: + forall fns, nextfunction (add_functs empty fns) < 0. +Proof. + induction fns; simpl; intros. omega. unfold Zpred. omega. +Qed. + +Theorem find_funct_ptr_inv: + forall (p: program funct) (b: block) (f: funct), + find_funct_ptr (globalenv p) b = Some f -> b < 0. +Proof. + intros until f. + assert (forall fns, ZMap.get b (functions (add_functs empty fns)) = Some f -> b < 0). + induction fns; simpl. + rewrite ZMap.gi. congruence. + rewrite ZMap.gsspec. case (ZIndexed.eq b (nextfunction (add_functs empty fns))); intro. + intro. rewrite e. apply nextfunction_add_functs_neg. + auto. + unfold find_funct_ptr. rewrite functions_globalenv. + intros. eauto. +Qed. + +Theorem find_symbol_inv: + forall (p: program funct) (id: ident) (b: block), + find_symbol (globalenv p) id = Some b -> b < nextblock (init_mem p). +Proof. + assert (forall fns s b, + (symbols (add_functs empty fns)) ! s = Some b -> b < 0). + induction fns; simpl; intros until b. + rewrite PTree.gempty. congruence. + rewrite PTree.gsspec. destruct a; simpl. case (peq s i); intro. + intro EQ; inversion EQ. apply nextfunction_add_functs_neg. + eauto. + assert (forall fns vars g m s b, + add_globals (add_functs empty fns, Mem.empty) vars = (g, m) -> + (symbols g)!s = Some b -> + b < nextblock m). + induction vars; simpl; intros until b. + intros. inversion H0. subst g m. simpl. + generalize (H fns s b H1). omega. + destruct a. caseEq (add_globals (add_functs empty fns, Mem.empty) vars). + intros g1 m1 ADG EQ. inversion EQ; subst g m; clear EQ. + unfold add_symbol; simpl. rewrite PTree.gsspec. case (peq s i); intro. + intro EQ; inversion EQ. omega. + intro. generalize (IHvars _ _ _ _ ADG H0). omega. + intros until b. unfold find_symbol, globalenv, init_mem, globalenv_initmem; simpl. + caseEq (add_globals (add_functs empty (prog_funct p), Mem.empty) + (prog_vars p)); intros g m EQ. + simpl; intros. eauto. +Qed. + +End GENV. + +(* Invariants on functions *) +Lemma find_funct_ptr_prop: + forall (F: Set) (P: F -> Prop) (p: program F) (b: block) (f: F), + (forall id f, In (id, f) (prog_funct p) -> P f) -> + find_funct_ptr (globalenv p) b = Some f -> + P f. +Proof. + intros until f. + unfold find_funct_ptr. rewrite functions_globalenv. + generalize (prog_funct p). induction l; simpl. + rewrite ZMap.gi. intros; discriminate. + rewrite ZMap.gsspec. + case (ZIndexed.eq b (nextfunction (add_functs (empty F) l))); intros. + apply H with (fst a). left. destruct a. simpl in *. congruence. + apply IHl. intros. apply H with id. right. auto. auto. +Qed. + +Lemma find_funct_prop: + forall (F: Set) (P: F -> Prop) (p: program F) (v: val) (f: F), + (forall id f, In (id, f) (prog_funct p) -> P f) -> + find_funct (globalenv p) v = Some f -> + P f. +Proof. + intros until f. unfold find_funct. + destruct v; try (intros; discriminate). + case (Int.eq i Int.zero); [idtac | intros; discriminate]. + intros. eapply find_funct_ptr_prop; eauto. +Qed. + +(* Global environments and program transformations. *) + +Section TRANSF_PROGRAM_PARTIAL. + +Variable A B: Set. +Variable transf: A -> option B. +Variable p: program A. +Variable p': program B. +Hypothesis transf_OK: transform_partial_program transf p = Some p'. + +Lemma add_functs_transf: + forall (fns: list (ident * A)) (tfns: list (ident * B)), + transf_partial_program transf fns = Some tfns -> + let r := add_functs (empty A) fns in + let tr := add_functs (empty B) tfns in + nextfunction tr = nextfunction r /\ + symbols tr = symbols r /\ + forall (b: block) (f: A), + ZMap.get b (functions r) = Some f -> + ZMap.get b (functions tr) = transf f /\ transf f <> None. +Proof. + induction fns; simpl. + + intros; injection H; intro; subst tfns. + simpl. split. reflexivity. split. reflexivity. + intros b f; repeat (rewrite ZMap.gi). intros; discriminate. + + intro tfns. destruct a. caseEq (transf a). intros a' TA. + caseEq (transf_partial_program transf fns). intros l TPP EQ. + injection EQ; intro; subst tfns. + clear EQ. simpl. + generalize (IHfns l TPP). + intros [HR1 [HR2 HR3]]. + rewrite HR1. rewrite HR2. + split. reflexivity. + split. reflexivity. + intros b f. + case (zeq b (nextfunction (add_functs (empty A) fns))); intro. + subst b. repeat (rewrite ZMap.gss). + intro EQ; injection EQ; intro; subst f; clear EQ. + rewrite TA. split. auto. discriminate. + repeat (rewrite ZMap.gso; auto). + + intros; discriminate. + intros; discriminate. +Qed. + +Lemma mem_add_globals_transf: + forall (g1: genv A) (g2: genv B) (m: mem) (vars: list (ident * Z)), + snd (add_globals (g1, m) vars) = snd (add_globals (g2, m) vars). +Proof. + induction vars; simpl. + reflexivity. + destruct a. destruct (add_globals (g1, m) vars). + destruct (add_globals (g2, m) vars). + simpl in IHvars. subst m1. reflexivity. +Qed. + +Lemma symbols_add_globals_transf: + forall (g1: genv A) (g2: genv B) (m: mem), + symbols g1 = symbols g2 -> + forall (vars: list (ident * Z)), + symbols (fst (add_globals (g1, m) vars)) = + symbols (fst (add_globals (g2, m) vars)). +Proof. + induction vars; simpl. + assumption. + generalize (mem_add_globals_transf g1 g2 m vars); intro. + destruct a. destruct (add_globals (g1, m) vars). + destruct (add_globals (g2, m) vars). + simpl. simpl in IHvars. simpl in H0. + rewrite H0; rewrite IHvars. reflexivity. +Qed. + +Lemma prog_funct_transf_OK: + transf_partial_program transf p.(prog_funct) = Some p'.(prog_funct). +Proof. + generalize transf_OK; unfold transform_partial_program. + case (transf_partial_program transf (prog_funct p)); simpl; intros. + injection transf_OK0; intros; subst p'. reflexivity. + discriminate. +Qed. + +Theorem find_funct_ptr_transf_partial: + forall (b: block) (f: A), + find_funct_ptr (globalenv p) b = Some f -> + find_funct_ptr (globalenv p') b = transf f /\ transf f <> None. +Proof. + intros until f. + generalize (add_functs_transf p.(prog_funct) prog_funct_transf_OK). + intros [X [Y Z]]. + unfold find_funct_ptr. + repeat (rewrite functions_globalenv). + apply Z. +Qed. + +Theorem find_funct_transf_partial: + forall (v: val) (f: A), + find_funct (globalenv p) v = Some f -> + find_funct (globalenv p') v = transf f /\ transf f <> None. +Proof. + intros until f. unfold find_funct. + case v; try (intros; discriminate). + intros b ofs. + case (Int.eq ofs Int.zero); try (intros; discriminate). + apply find_funct_ptr_transf_partial. +Qed. + +Lemma symbols_init_transf: + symbols (globalenv p') = symbols (globalenv p). +Proof. + unfold globalenv. unfold globalenv_initmem. + generalize (add_functs_transf p.(prog_funct) prog_funct_transf_OK). + intros [X [Y Z]]. + generalize transf_OK. + unfold transform_partial_program. + case (transf_partial_program transf (prog_funct p)). + intros. injection transf_OK0; intro; subst p'; simpl. + symmetry. apply symbols_add_globals_transf. + symmetry. exact Y. + intros; discriminate. +Qed. + +Theorem find_symbol_transf_partial: + forall (s: ident), + find_symbol (globalenv p') s = find_symbol (globalenv p) s. +Proof. + intros. unfold find_symbol. + rewrite symbols_init_transf. auto. +Qed. + +Theorem init_mem_transf_partial: + init_mem p' = init_mem p. +Proof. + unfold init_mem. unfold globalenv_initmem. + generalize transf_OK. + unfold transform_partial_program. + case (transf_partial_program transf (prog_funct p)). + intros. injection transf_OK0; intro; subst p'; simpl. + symmetry. apply mem_add_globals_transf. + intros; discriminate. +Qed. + +End TRANSF_PROGRAM_PARTIAL. + +Section TRANSF_PROGRAM. + +Variable A B: Set. +Variable transf: A -> B. +Variable p: program A. +Let tp := transform_program transf p. + +Definition transf_partial (x: A) : option B := Some (transf x). + +Lemma transf_program_transf_partial_program: + forall (fns: list (ident * A)), + transf_partial_program transf_partial fns = + Some (transf_program transf fns). +Proof. + induction fns; simpl. + reflexivity. + destruct a. rewrite IHfns. reflexivity. +Qed. + +Lemma transform_program_transform_partial_program: + transform_partial_program transf_partial p = Some tp. +Proof. + unfold tp. unfold transform_partial_program, transform_program. + rewrite transf_program_transf_partial_program. + reflexivity. +Qed. + +Theorem find_funct_ptr_transf: + forall (b: block) (f: A), + find_funct_ptr (globalenv p) b = Some f -> + find_funct_ptr (globalenv tp) b = Some (transf f). +Proof. + intros. + generalize (find_funct_ptr_transf_partial transf_partial p + transform_program_transform_partial_program). + intros. elim (H0 b f H). intros. exact H1. +Qed. + +Theorem find_funct_transf: + forall (v: val) (f: A), + find_funct (globalenv p) v = Some f -> + find_funct (globalenv tp) v = Some (transf f). +Proof. + intros. + generalize (find_funct_transf_partial transf_partial p + transform_program_transform_partial_program). + intros. elim (H0 v f H). intros. exact H1. +Qed. + +Theorem find_symbol_transf: + forall (s: ident), + find_symbol (globalenv tp) s = find_symbol (globalenv p) s. +Proof. + intros. + apply find_symbol_transf_partial with transf_partial. + apply transform_program_transform_partial_program. +Qed. + +Theorem init_mem_transf: + init_mem tp = init_mem p. +Proof. + apply init_mem_transf_partial with transf_partial. + apply transform_program_transform_partial_program. +Qed. + +End TRANSF_PROGRAM. + +End Genv. diff --git a/backend/InterfGraph.v b/backend/InterfGraph.v new file mode 100644 index 00000000..37248f58 --- /dev/null +++ b/backend/InterfGraph.v @@ -0,0 +1,310 @@ +(** Representation of interference graphs for register allocation. *) + +Require Import Coqlib. +Require Import FSet. +Require Import Maps. +Require Import Ordered. +Require Import Registers. +Require Import Locations. + +(** Interference graphs are undirected graphs with two kinds of nodes: +- RTL pseudo-registers; +- Machine registers. + +and four kind of edges: +- Conflict edges between two pseudo-registers. + (Meaning: these two pseudo-registers must not be assigned the same + location.) +- Conflict edges between a pseudo-register and a machine register + (Meaning: this pseudo-register must not be assigned this machine + register.) +- Preference edges between two pseudo-registers. + (Meaning: the generated code would be more efficient if those two + pseudo-registers were assigned the same location, but if this is not + possible, the generated code will still be correct.) +- Preference edges between a pseudo-register and a machine register + (Meaning: the generated code would be more efficient if this + pseudo-register was assigned this machine register, but if this is not + possible, the generated code will still be correct.) + +A graph is represented by four finite sets of edges (one of each kind +above). An edge is represented by a pair of two pseudo-registers or +a pair (pseudo-register, machine register). +In the case of two pseudo-registers ([r1], [r2]), we adopt the convention +that [r1] <= [r2], so as to reflect the undirected nature of the edge. +*) + +Module OrderedReg <: OrderedType with Definition t := reg := OrderedPositive. +Module OrderedRegReg := OrderedPair(OrderedReg)(OrderedReg). +Module OrderedMreg := OrderedIndexed(IndexedMreg). +Module OrderedRegMreg := OrderedPair(OrderedReg)(OrderedMreg). + +Module SetDepRegReg := FSetAVL.Make(OrderedRegReg). +Module SetRegReg := NodepOfDep(SetDepRegReg). +Module SetDepRegMreg := FSetAVL.Make(OrderedRegMreg). +Module SetRegMreg := NodepOfDep(SetDepRegMreg). + +Record graph: Set := mkgraph { + interf_reg_reg: SetRegReg.t; + interf_reg_mreg: SetRegMreg.t; + pref_reg_reg: SetRegReg.t; + pref_reg_mreg: SetRegMreg.t +}. + +Definition empty_graph := + mkgraph SetRegReg.empty SetRegMreg.empty + SetRegReg.empty SetRegMreg.empty. + +(** The following functions add a new edge (if not already present) + to the given graph. *) + +Definition ordered_pair (x y: reg) := + if plt x y then (x, y) else (y, x). + +Definition add_interf (x y: reg) (g: graph) := + mkgraph (SetRegReg.add (ordered_pair x y) g.(interf_reg_reg)) + g.(interf_reg_mreg) + g.(pref_reg_reg) + g.(pref_reg_mreg). + +Definition add_interf_mreg (x: reg) (y: mreg) (g: graph) := + mkgraph g.(interf_reg_reg) + (SetRegMreg.add (x, y) g.(interf_reg_mreg)) + g.(pref_reg_reg) + g.(pref_reg_mreg). + +Definition add_pref (x y: reg) (g: graph) := + mkgraph g.(interf_reg_reg) + g.(interf_reg_mreg) + (SetRegReg.add (ordered_pair x y) g.(pref_reg_reg)) + g.(pref_reg_mreg). + +Definition add_pref_mreg (x: reg) (y: mreg) (g: graph) := + mkgraph g.(interf_reg_reg) + g.(interf_reg_mreg) + g.(pref_reg_reg) + (SetRegMreg.add (x, y) g.(pref_reg_mreg)). + +(** [interfere x y g] holds iff there is a conflict edge in [g] + between the two pseudo-registers [x] and [y]. *) + +Definition interfere (x y: reg) (g: graph) : Prop := + SetRegReg.In (ordered_pair x y) g.(interf_reg_reg). + +(** [interfere_mreg x y g] holds iff there is a conflict edge in [g] + between the pseudo-register [x] and the machine register [y]. *) + +Definition interfere_mreg (x: reg) (y: mreg) (g: graph) : Prop := + SetRegMreg.In (x, y) g.(interf_reg_mreg). + +Lemma ordered_pair_charact: + forall x y, + ordered_pair x y = (x, y) \/ ordered_pair x y = (y, x). +Proof. + unfold ordered_pair; intros. + case (plt x y); intro; tauto. +Qed. + +Lemma ordered_pair_sym: + forall x y, ordered_pair y x = ordered_pair x y. +Proof. + unfold ordered_pair; intros. + case (plt x y); intro. + case (plt y x); intro. + unfold Plt in *; omegaContradiction. + auto. + case (plt y x); intro. + auto. + assert (Zpos x = Zpos y). unfold Plt in *. omega. + congruence. +Qed. + +Lemma interfere_sym: + forall x y g, interfere x y g -> interfere y x g. +Proof. + unfold interfere; intros. + rewrite ordered_pair_sym. auto. +Qed. + +(** [graph_incl g1 g2] holds if [g2] contains all the conflict edges of [g1] + and possibly more. *) + +Definition graph_incl (g1 g2: graph) : Prop := + (forall x y, interfere x y g1 -> interfere x y g2) /\ + (forall x y, interfere_mreg x y g1 -> interfere_mreg x y g2). + +Lemma graph_incl_trans: + forall g1 g2 g3, graph_incl g1 g2 -> graph_incl g2 g3 -> graph_incl g1 g3. +Proof. + unfold graph_incl; intros. + elim H0; elim H; intros. + split; eauto. +Qed. + +(** We show that the [add_] functions correctly record the desired + conflicts, and preserve whatever conflict edges were already present. *) + +Lemma add_interf_correct: + forall x y g, + interfere x y (add_interf x y g). +Proof. + intros. unfold interfere, add_interf; simpl. + apply SetRegReg.add_1. red. apply OrderedRegReg.eq_refl. +Qed. + +Lemma add_interf_incl: + forall a b g, graph_incl g (add_interf a b g). +Proof. + intros. split; intros. + unfold add_interf, interfere; simpl. + apply SetRegReg.add_2. exact H. + exact H. +Qed. + +Lemma add_interf_mreg_correct: + forall x y g, + interfere_mreg x y (add_interf_mreg x y g). +Proof. + intros. unfold interfere_mreg, add_interf_mreg; simpl. + apply SetRegMreg.add_1. red. apply OrderedRegMreg.eq_refl. +Qed. + +Lemma add_interf_mreg_incl: + forall a b g, graph_incl g (add_interf_mreg a b g). +Proof. + intros. split; intros. + exact H. + unfold add_interf_mreg, interfere_mreg; simpl. + apply SetRegMreg.add_2. exact H. +Qed. + +Lemma add_pref_incl: + forall a b g, graph_incl g (add_pref a b g). +Proof. + intros. split; intros. + exact H. + exact H. +Qed. + +Lemma add_pref_mreg_incl: + forall a b g, graph_incl g (add_pref_mreg a b g). +Proof. + intros. split; intros. + exact H. + exact H. +Qed. + +(** [all_interf_regs g] returns the set of pseudo-registers that + are nodes of [g]. *) + +Definition all_interf_regs (g: graph) : Regset.t := + SetRegReg.fold + (fun r1r2 u => Regset.add (fst r1r2) (Regset.add (snd r1r2) u)) + g.(interf_reg_reg) + (SetRegMreg.fold + (fun r1m2 u => Regset.add (fst r1m2) u) + g.(interf_reg_mreg) + Regset.empty). + +Lemma mem_add_tail: + forall r r' u, + Regset.mem r u = true -> Regset.mem r (Regset.add r' u) = true. +Proof. + intros. case (Reg.eq r r'); intro. + subst r'. apply Regset.mem_add_same. + rewrite Regset.mem_add_other; auto. +Qed. + +Lemma all_interf_regs_correct_aux_1: + forall l u r, + Regset.mem r u = true -> + Regset.mem r + (List.fold_right + (fun r1r2 u => Regset.add (fst r1r2) (Regset.add (snd r1r2) u)) + u l) = true. +Proof. + induction l; simpl; intros. + auto. + apply mem_add_tail. apply mem_add_tail. auto. +Qed. + +Lemma all_interf_regs_correct_aux_2: + forall l u r1 r2, + InList OrderedRegReg.eq (r1, r2) l -> + let u' := + List.fold_right + (fun r1r2 u => Regset.add (fst r1r2) (Regset.add (snd r1r2) u)) + u l in + Regset.mem r1 u' = true /\ Regset.mem r2 u' = true. +Proof. + induction l; simpl; intros. + inversion H. + inversion H. elim H1. simpl. unfold OrderedReg.eq. + intros; subst r1; subst r2. + split. apply Regset.mem_add_same. + apply mem_add_tail. apply Regset.mem_add_same. + generalize (IHl u r1 r2 H1). intros [A B]. + split; repeat rewrite mem_add_tail; auto. +Qed. + +Lemma all_interf_regs_correct_aux_3: + forall l u r1 r2, + InList OrderedRegMreg.eq (r1, r2) l -> + let u' := + List.fold_right + (fun r1r2 u => Regset.add (fst r1r2) u) + u l in + Regset.mem r1 u' = true. +Proof. + induction l; simpl; intros. + inversion H. + inversion H. elim H1. simpl. unfold OrderedReg.eq. + intros; subst r1. + apply Regset.mem_add_same. + apply mem_add_tail. apply IHl with r2. auto. +Qed. + +Lemma all_interf_regs_correct_1: + forall r1 r2 g, + SetRegReg.In (r1, r2) g.(interf_reg_reg) -> + Regset.mem r1 (all_interf_regs g) = true /\ + Regset.mem r2 (all_interf_regs g) = true. +Proof. + intros. unfold all_interf_regs. + generalize (SetRegReg.fold_1 + g.(interf_reg_reg) + (SetRegMreg.fold + (fun (r1m2 : SetDepRegMreg.elt) (u : Regset.t) => + Regset.add (fst r1m2) u) (interf_reg_mreg g) Regset.empty) + (fun (r1r2 : SetDepRegReg.elt) (u : Regset.t) => + Regset.add (fst r1r2) (Regset.add (snd r1r2) u))). + intros [l [UN [INEQ EQ]]]. + rewrite EQ. apply all_interf_regs_correct_aux_2. + elim (INEQ (r1, r2)); intros. auto. +Qed. + +Lemma all_interf_regs_correct_2: + forall r1 mr2 g, + SetRegMreg.In (r1, mr2) g.(interf_reg_mreg) -> + Regset.mem r1 (all_interf_regs g) = true. +Proof. + intros. unfold all_interf_regs. + generalize (SetRegReg.fold_1 + g.(interf_reg_reg) + (SetRegMreg.fold + (fun (r1m2 : SetDepRegMreg.elt) (u : Regset.t) => + Regset.add (fst r1m2) u) (interf_reg_mreg g) Regset.empty) + (fun (r1r2 : SetDepRegReg.elt) (u : Regset.t) => + Regset.add (fst r1r2) (Regset.add (snd r1r2) u))). + intros [l [UN [INEQ EQ]]]. + rewrite EQ. apply all_interf_regs_correct_aux_1. + generalize (SetRegMreg.fold_1 + g.(interf_reg_mreg) + Regset.empty + (fun (r1r2 : SetDepRegMreg.elt) (u : Regset.t) => + Regset.add (fst r1r2) u)). + change (PTree.t unit) with Regset.t. + intros [l' [UN' [INEQ' EQ']]]. + rewrite EQ'. apply all_interf_regs_correct_aux_3 with mr2. + elim (INEQ' (r1, mr2)); intros. auto. +Qed. diff --git a/backend/Kildall.v b/backend/Kildall.v new file mode 100644 index 00000000..10b2e1d9 --- /dev/null +++ b/backend/Kildall.v @@ -0,0 +1,1231 @@ +(** Solvers for dataflow inequations. *) + +Require Import Coqlib. +Require Import Maps. +Require Import Lattice. + +(** A forward dataflow problem is a set of inequations of the form +- [X(s) >= transf n X(n)] + if program point [s] is a successor of program point [n] +- [X(n) >= a] + if [(n, a)] belongs to a given list of (program points, approximations). + +The unknowns are the [X(n)], indexed by program points (e.g. nodes in the +CFG graph of a RTL function). They range over a given ordered set that +represents static approximations of the program state at each point. +The [transf] function is the abstract transfer function: it computes an +approximation [transf n X(n)] of the program state after executing instruction +at point [n], as a function of the approximation [X(n)] of the program state +before executing that instruction. + +Symmetrically, a backward dataflow problem is a set of inequations of the form +- [X(n) >= transf s X(s)] + if program point [s] is a successor of program point [n] +- [X(n) >= a] + if [(n, a)] belongs to a given list of (program points, approximations). + +The only difference with a forward dataflow problem is that the transfer +function [transf] now computes the approximation before a program point [s] +from the approximation [X(s)] after point [s]. + +This file defines three solvers for dataflow problems. The first two +solve (optimally) forward and backward problems using Kildall's worklist +algorithm. They assume that the unknowns range over a semi-lattice, that is, +an ordered type equipped with a least upper bound operation. + +The last solver corresponds to propagation over extended basic blocks: +it returns approximate solutions of forward problems where the unknowns +range over any ordered type having a greatest element [top]. It simply +sets [X(n) = top] for all merge points [n], that is, program points having +several predecessors. This solver is useful when least upper bounds of +approximations do not exist or are too expensive to compute. *) + +(** * Bounded iteration *) + +(** The three solvers proceed iteratively, increasing the value of one of + the unknowns [X(n)] at each iteration until a solution is reached. + This section defines the general form of iteration used. *) + +Section BOUNDED_ITERATION. + +Variables A B: Set. +Variable step: A -> B + A. + +(** The [step] parameter represents one step of the iteration. From a + current iteration state [a: A], it either returns a value of type [B], + meaning that iteration is over and that this [B] value is the final + result of the iteration, or a value [a' : A] which is the next state + of the iteration. + + The naive way to define the iteration is: +<< +Fixpoint iterate (a: A) : B := + match step a with + | inl b => b + | inr a' => iterate a' + end. +>> + However, this is a general recursion, not guaranteed to terminate, + and therefore not expressible in Coq. The standard way to work around + this difficulty is to use Noetherian recursion (Coq module [Wf]). + This requires that we equip the type [A] with a well-founded ordering [<] + (no infinite ascending chains) and we demand that [step] satisfies + [step a = inr a' -> a < a']. For the types [A] that are of interest to us + in this development, it is however very painful to define adequate + well-founded orderings, even though we know our iterations always + terminate. + + Instead, we choose to bound the number of iterations by an arbitrary + constant. [iterate] then becomes a function that can fail, + of type [A -> option B]. The [None] result denotes failure to reach + a result in the number of iterations prescribed, or, in other terms, + failure to find a solution to the dataflow problem. The compiler + passes that exploit dataflow analysis (the [Constprop], [CSE] and + [Allocation] passes) will, in this case, either fail ([Allocation]) + or turn off the optimization pass ([Constprop] and [CSE]). + + Since we know (informally) that our computations terminate, we can + take a very large constant as the maximal number of iterations. + Failure will therefore never happen in practice, but of + course our proofs also cover the failure case and show that + nothing bad happens in this hypothetical case either. *) + +Definition num_iterations := 1000000000000%positive. + +(** The simple definition of bounded iteration is: +<< +Fixpoint iterate (niter: nat) (a: A) {struct niter} : option B := + match niter with + | O => None + | S niter' => + match step a with + | inl b => b + | inr a' => iterate niter' a' + end + end. +>> + This function is structural recursive over the parameter [niter] + (number of iterations), represented here as a Peano integer (type [nat]). + However, we want to use very large values of [niter]. As Peano integers, + these values would be much too large to fit in memory. Therefore, + we must express iteration counts as a binary integer (type [positive]). + However, Peano induction over type [positive] is not structural recursion, + so we cannot define [iterate] as a Coq fixpoint and must use + Noetherian recursion instead. *) + +Definition iter_step (x: positive) + (next: forall y, Plt y x -> A -> option B) + (s: A) : option B := + match peq x xH with + | left EQ => None + | right NOTEQ => + match step s with + | inl res => Some res + | inr s' => next (Ppred x) (Ppred_Plt x NOTEQ) s' + end + end. + +Definition iterate: positive -> A -> option B := + Fix Plt_wf (fun _ => A -> option B) iter_step. + +(** We then prove the expected unrolling equations for [iterate]. *) + +Remark unroll_iterate: + forall x, iterate x = iter_step x (fun y _ => iterate y). +Proof. + unfold iterate; apply (Fix_eq Plt_wf (fun _ => A -> option B) iter_step). + intros. unfold iter_step. apply extensionality. intro s. + case (peq x xH); intro. auto. + rewrite H. auto. +Qed. + +Lemma iterate_base: + forall s, iterate 1%positive s = None. +Proof. + intro; rewrite unroll_iterate; unfold iter_step. + case (peq 1 1); congruence. +Qed. + +Lemma iterate_step: + forall x s, + iterate (Psucc x) s = + match step s with + | inl res => Some res + | inr s' => iterate x s' + end. +Proof. + intro; rewrite unroll_iterate; unfold iter_step; intros. + case (peq (Psucc x) 1); intro. + destruct x; simpl in e; discriminate. + rewrite Ppred_succ. auto. +Qed. + +End BOUNDED_ITERATION. + +(** * Solving forward dataflow problems using Kildall's algorithm *) + +(** A forward dataflow solver has the following generic interface. + Unknowns range over the type [L.t], which is equipped with + semi-lattice operations (see file [Lattice]). *) + +Module Type DATAFLOW_SOLVER. + + Declare Module L: SEMILATTICE. + + Variable fixpoint: + (positive -> list positive) -> + positive -> + (positive -> L.t -> L.t) -> + list (positive * L.t) -> + option (PMap.t L.t). + + (** [fixpoint successors topnode transf entrypoints] is the solver. + It returns either an error or a mapping from program points to + values of type [L.t] representing the solution. [successors] + is a function returning the list of successors of the given program + point. [topnode] is the maximal number of nodes considered. + [transf] is the transfer function, and [entrypoints] the additional + constraints imposed on the solution. *) + + Hypothesis fixpoint_solution: + forall successors topnode transf entrypoints res n s, + fixpoint successors topnode transf entrypoints = Some res -> + Plt n topnode -> In s (successors n) -> + L.ge res!!s (transf n res!!n). + + (** The [fixpoint_solution] theorem shows that the returned solution, + if any, satisfies the dataflow inequations. *) + + Hypothesis fixpoint_entry: + forall successors topnode transf entrypoints res n v, + fixpoint successors topnode transf entrypoints = Some res -> + In (n, v) entrypoints -> + L.ge res!!n v. + + (** The [fixpoint_entry] theorem shows that the returned solution, + if any, satisfies the additional constraints expressed + by [entrypoints]. *) + +End DATAFLOW_SOLVER. + +(** We now define a generic solver that works over + any semi-lattice structure. *) + +Module Dataflow_Solver (LAT: SEMILATTICE): + DATAFLOW_SOLVER with Module L := LAT. + +Module L := LAT. + +Section Kildall. + +Variable successors: positive -> list positive. +Variable topnode: positive. +Variable transf: positive -> L.t -> L.t. +Variable entrypoints: list (positive * L.t). + +(** The state of the iteration has two components: +- A mapping from program points to values of type [L.t] representing + the candidate solution found so far. +- A worklist of program points that remain to be considered. +*) + +Record state : Set := + mkstate { st_in: PMap.t L.t; st_wrk: list positive }. + +(** Kildall's algorithm, in pseudo-code, is as follows: +<< + while st_wrk is not empty, do + extract a node n from st_wrk + compute out = transf n st_in[n] + for each successor s of n: + compute in = lub st_in[s] out + if in <> st_in[s]: + st_in[s] := in + st_wrk := st_wrk union {n} + end if + end for + end while + return st_in +>> + +The initial state is built as follows: +- The initial mapping sets all program points to [L.bot], except + those mentioned in the [entrypoints] list, for which we take + the associated approximation as initial value. Since a program + point can be mentioned several times in [entrypoints], with different + approximations, we actually take the l.u.b. of these approximations. +- The initial worklist contains all the program points up to [topnode]. *) + +Fixpoint start_state_in (ep: list (positive * L.t)) : PMap.t L.t := + match ep with + | nil => + PMap.init L.bot + | (n, v) :: rem => + let m := start_state_in rem in PMap.set n (L.lub m!!n v) m + end. + +Definition start_state_wrk := + positive_rec (list positive) nil (@cons positive) topnode. + +Definition start_state := + mkstate (start_state_in entrypoints) start_state_wrk. + +(** The worklist is actually treated as a set: it is not useful + (and detrimental to performance) to put the same point twice in the + worklist. The following function adds a point to the worklist if + it was not already there. *) + +Definition add_to_worklist (n: positive) (wrk: list positive) := + if List.In_dec peq n wrk then wrk else n :: wrk. + +(** [propagate_succ] corresponds, in the pseudocode, + to the body of the [for] loop iterating over all successors. *) + +Definition propagate_succ (s: state) (out: L.t) (n: positive) := + let oldl := s.(st_in)!!n in + let newl := L.lub oldl out in + if L.eq oldl newl + then s + else mkstate (PMap.set n newl s.(st_in)) (add_to_worklist n s.(st_wrk)). + +(** [propagate_succ_list] corresponds, in the pseudocode, + to the [for] loop iterating over all successors. *) + +Fixpoint propagate_succ_list (s: state) (out: L.t) (succs: list positive) + {struct succs} : state := + match succs with + | nil => s + | n :: rem => propagate_succ_list (propagate_succ s out n) out rem + end. + +(** [step] corresponds to the body of the outer [while] loop in the + pseudocode. *) + +Definition step (s: state) : PMap.t L.t + state := + match s.(st_wrk) with + | nil => + inl _ s.(st_in) + | n :: rem => + inr _ (propagate_succ_list + (mkstate s.(st_in) rem) + (transf n s.(st_in)!!n) + (successors n)) + end. + +(** The whole fixpoint computation is the iteration of [step] from + the start state. *) + +Definition fixpoint : option (PMap.t L.t) := + iterate _ _ step num_iterations start_state. + +(** ** Monotonicity properties *) + +(** We first show that the [st_in] part of the state evolves monotonically: + at each step, the values of the [st_in[n]] either remain the same or + increase with respect to the [L.ge] ordering. *) + +Definition in_incr (in1 in2: PMap.t L.t) : Prop := + forall n, L.ge in2!!n in1!!n. + +Lemma in_incr_refl: + forall in1, in_incr in1 in1. +Proof. + unfold in_incr; intros. apply L.ge_refl. +Qed. + +Lemma in_incr_trans: + forall in1 in2 in3, in_incr in1 in2 -> in_incr in2 in3 -> in_incr in1 in3. +Proof. + unfold in_incr; intros. apply L.ge_trans with in2!!n; auto. +Qed. + +Lemma propagate_succ_incr: + forall st out n, + in_incr st.(st_in) (propagate_succ st out n).(st_in). +Proof. + unfold in_incr, propagate_succ; simpl; intros. + case (L.eq st.(st_in)!!n (L.lub st.(st_in)!!n out)); intro. + apply L.ge_refl. + simpl. case (peq n n0); intro. + subst n0. rewrite PMap.gss. apply L.ge_lub_left. + rewrite PMap.gso; auto. apply L.ge_refl. +Qed. + +Lemma propagate_succ_list_incr: + forall out succs st, + in_incr st.(st_in) (propagate_succ_list st out succs).(st_in). +Proof. + induction succs; simpl; intros. + apply in_incr_refl. + apply in_incr_trans with (propagate_succ st out a).(st_in). + apply propagate_succ_incr. auto. +Qed. + +Lemma iterate_incr: + forall n st res, + iterate _ _ step n st = Some res -> + in_incr st.(st_in) res. +Proof. + intro n; pattern n. apply positive_Peano_ind; intros until res. + rewrite iterate_base. congruence. + rewrite iterate_step. unfold step. + destruct st.(st_wrk); intros. + injection H0; intro; subst res. + red; intros; apply L.ge_refl. + apply in_incr_trans with + (propagate_succ_list (mkstate (st_in st) l) + (transf p (st_in st)!!p) + (successors p)).(st_in). + change (st_in st) with (st_in (mkstate (st_in st) l)). + apply propagate_succ_list_incr. + apply H. auto. +Qed. + +Lemma fixpoint_incr: + forall res, + fixpoint = Some res -> in_incr (start_state_in entrypoints) res. +Proof. + unfold fixpoint; intros. + change (start_state_in entrypoints) with start_state.(st_in). + apply iterate_incr with num_iterations; auto. +Qed. + +(** ** Correctness invariant *) + +(** The following invariant is preserved at each iteration of Kildall's + algorithm: for all program points [n] below [topnode], either + [n] is in the worklist, or the inequations associated with [n] + ([st_in[s] >= transf n st_in[n]] for all successors [s] of [n]) + hold. In other terms, the worklist contains all nodes that do not + yet satisfy their inequations. *) + +Definition good_state (st: state) : Prop := + forall n, + Plt n topnode -> + In n st.(st_wrk) \/ + (forall s, In s (successors n) -> + L.ge st.(st_in)!!s (transf n st.(st_in)!!n)). + +(** We show that the start state satisfies the invariant, and that + the [step] function preserves it. *) + +Lemma start_state_good: + good_state start_state. +Proof. + unfold good_state, start_state; intros. + left; simpl. unfold start_state_wrk. + generalize H. pattern topnode. apply positive_Peano_ind. + intro. compute in H0. destruct n; discriminate. + intros. rewrite positive_rec_succ. + elim (Plt_succ_inv _ _ H1); intro. + auto with coqlib. + subst x; auto with coqlib. +Qed. + +Lemma add_to_worklist_1: + forall n wkl, In n (add_to_worklist n wkl). +Proof. + intros. unfold add_to_worklist. + case (In_dec peq n wkl); auto with coqlib. +Qed. + +Lemma add_to_worklist_2: + forall n wkl n', In n' wkl -> In n' (add_to_worklist n wkl). +Proof. + intros. unfold add_to_worklist. + case (In_dec peq n wkl); auto with coqlib. +Qed. + +Lemma propagate_succ_charact: + forall st out n, + let st' := propagate_succ st out n in + L.ge st'.(st_in)!!n out /\ + (forall s, n <> s -> st'.(st_in)!!s = st.(st_in)!!s). +Proof. + unfold propagate_succ; intros; simpl. + case (L.eq (st_in st) !! n (L.lub (st_in st) !! n out)); intro. + split. rewrite e. rewrite L.lub_commut. apply L.ge_lub_left. + auto. + simpl. split. + rewrite PMap.gss. rewrite L.lub_commut. apply L.ge_lub_left. + intros. rewrite PMap.gso; auto. +Qed. + +Lemma propagate_succ_list_charact: + forall out succs st, + let st' := propagate_succ_list st out succs in + forall s, + (In s succs -> L.ge st'.(st_in)!!s out) /\ + (~(In s succs) -> st'.(st_in)!!s = st.(st_in)!!s). +Proof. + induction succs; simpl; intros. + tauto. + generalize (IHsuccs (propagate_succ st out a) s). intros [A B]. + generalize (propagate_succ_charact st out a). intros [C D]. + split; intros. + elim H; intro. + subst s. + apply L.ge_trans with (propagate_succ st out a).(st_in)!!a. + apply propagate_succ_list_incr. assumption. + apply A. auto. + transitivity (propagate_succ st out a).(st_in)!!s. + apply B. tauto. + apply D. tauto. +Qed. + +Lemma propagate_succ_incr_worklist: + forall st out n, + incl st.(st_wrk) (propagate_succ st out n).(st_wrk). +Proof. + intros. unfold propagate_succ. + case (L.eq (st_in st) !! n (L.lub (st_in st) !! n out)); intro. + apply incl_refl. + simpl. red; intros. apply add_to_worklist_2; auto. +Qed. + +Lemma propagate_succ_list_incr_worklist: + forall out succs st, + incl st.(st_wrk) (propagate_succ_list st out succs).(st_wrk). +Proof. + induction succs; simpl; intros. + apply incl_refl. + apply incl_tran with (propagate_succ st out a).(st_wrk). + apply propagate_succ_incr_worklist. + auto. +Qed. + +Lemma propagate_succ_records_changes: + forall st out n s, + let st' := propagate_succ st out n in + In s st'.(st_wrk) \/ st'.(st_in)!!s = st.(st_in)!!s. +Proof. + simpl. intros. unfold propagate_succ. + case (L.eq (st_in st) !! n (L.lub (st_in st) !! n out)); intro. + right; auto. + case (peq s n); intro. + subst s. left. simpl. apply add_to_worklist_1. + right. simpl. apply PMap.gso. auto. +Qed. + +Lemma propagate_succ_list_records_changes: + forall out succs st s, + let st' := propagate_succ_list st out succs in + In s st'.(st_wrk) \/ st'.(st_in)!!s = st.(st_in)!!s. +Proof. + induction succs; simpl; intros. + right; auto. + elim (propagate_succ_records_changes st out a s); intro. + left. apply propagate_succ_list_incr_worklist. auto. + rewrite <- H. auto. +Qed. + +Lemma step_state_good: + forall st n rem, + st.(st_wrk) = n :: rem -> + good_state st -> + good_state (propagate_succ_list (mkstate st.(st_in) rem) + (transf n st.(st_in)!!n) + (successors n)). +Proof. + unfold good_state. intros st n rem WKL GOOD x LTx. + set (out := transf n st.(st_in)!!n). + rewrite WKL in GOOD. + elim (propagate_succ_list_records_changes + out (successors n) (mkstate st.(st_in) rem) x). + intro; left; auto. + simpl; intro EQ. rewrite EQ. + (* Case 1: x = n *) + case (peq x n); intro. + subst x. fold out. + right; intros. + elim (propagate_succ_list_charact out (successors n) + (mkstate st.(st_in) rem) s); intros. + auto. + (* Case 2: x <> n *) + elim (GOOD x LTx); intro. + (* Case 2.1: x was already in worklist, still is *) + left. apply propagate_succ_list_incr_worklist. + simpl. elim H; intro. elim n0; auto. auto. + (* Case 2.2: x was not in worklist *) + right; intros. + case (In_dec peq s (successors n)); intro. + (* Case 2.2.1: s is a successor of n, it may have increased *) + apply L.ge_trans with st.(st_in)!!s. + change st.(st_in)!!s with (mkstate st.(st_in) rem).(st_in)!!s. + apply propagate_succ_list_incr. + auto. + (* Case 2.2.2: s is not a successor of n, it did not change *) + elim (propagate_succ_list_charact out (successors n) + (mkstate st.(st_in) rem) s); intros. + rewrite H2. simpl. auto. auto. +Qed. + +(** ** Correctness of the solution returned by [iterate]. *) + +(** As a consequence of the [good_state] invariant, the result of + [iterate], if defined, is a solution of the dataflow inequations, + since [st_wrk] is empty when [iterate] terminates. *) + +Lemma iterate_solution: + forall niter st res n s, + good_state st -> + iterate _ _ step niter st = Some res -> + Plt n topnode -> In s (successors n) -> + L.ge res!!s (transf n res!!n). +Proof. + intro niter; pattern niter; apply positive_Peano_ind; intros until s. + rewrite iterate_base. congruence. + intro GS. rewrite iterate_step. + unfold step; caseEq (st.(st_wrk)). + intros. injection H1; intros; subst res. + elim (GS n H2); intro. + rewrite H0 in H4. elim H4. + auto. + intros. apply H with + (propagate_succ_list (mkstate st.(st_in) l) + (transf p st.(st_in)!!p) (successors p)). + apply step_state_good; auto. + auto. auto. auto. +Qed. + +Theorem fixpoint_solution: + forall res n s, + fixpoint = Some res -> + Plt n topnode -> In s (successors n) -> + L.ge res!!s (transf n res!!n). +Proof. + unfold fixpoint. intros. + apply iterate_solution with num_iterations start_state. + apply start_state_good. + auto. auto. auto. +Qed. + +(** As a consequence of the monotonicity property, the result of + [fixpoint], if defined, is pointwise greater than or equal the + initial mapping. Therefore, it satisfies the additional constraints + stated in [entrypoints]. *) + +Lemma start_state_in_entry: + forall ep n v, + In (n, v) ep -> + L.ge (start_state_in ep)!!n v. +Proof. + induction ep; simpl; intros. + elim H. + elim H; intros. + subst a. rewrite PMap.gss. rewrite L.lub_commut. apply L.ge_lub_left. + destruct a. rewrite PMap.gsspec. case (peq n p); intro. + subst p. apply L.ge_trans with (start_state_in ep)!!n. + apply L.ge_lub_left. auto. + auto. +Qed. + +Theorem fixpoint_entry: + forall res n v, + fixpoint = Some res -> + In (n, v) entrypoints -> + L.ge res!!n v. +Proof. + intros. + apply L.ge_trans with (start_state_in entrypoints)!!n. + apply fixpoint_incr. auto. + apply start_state_in_entry. auto. +Qed. + +End Kildall. + +End Dataflow_Solver. + +(** * Solving backward dataflow problems using Kildall's algorithm *) + +(** A backward dataflow problem on a given flow graph is a forward + dataflow program on the reversed flow graph, where predecessors replace + successors. We exploit this observation to cheaply derive a backward + solver from the forward solver. *) + +(** ** Construction of the predecessor relation *) + +Section Predecessor. + +Variable successors: positive -> list positive. +Variable topnode: positive. + +Fixpoint add_successors (pred: PMap.t (list positive)) + (from: positive) (tolist: list positive) + {struct tolist} : PMap.t (list positive) := + match tolist with + | nil => pred + | to :: rem => + add_successors (PMap.set to (from :: pred!!to) pred) from rem + end. + +Lemma add_successors_correct: + forall tolist from pred n s, + In n pred!!s \/ (n = from /\ In s tolist) -> + In n (add_successors pred from tolist)!!s. +Proof. + induction tolist; simpl; intros. + tauto. + apply IHtolist. + rewrite PMap.gsspec. case (peq s a); intro. + subst a. elim H; intro. left; auto with coqlib. + elim H0; intros. subst n. left; auto with coqlib. + intuition. elim n0; auto. +Qed. + +Definition make_predecessors : PMap.t (list positive) := + positive_rec (PMap.t (list positive)) (PMap.init nil) + (fun n pred => add_successors pred n (successors n)) + topnode. + +Lemma make_predecessors_correct: + forall n s, + Plt n topnode -> + In s (successors n) -> + In n make_predecessors!!s. +Proof. + unfold make_predecessors. pattern topnode. + apply positive_Peano_ind; intros. + compute in H. destruct n; discriminate. + rewrite positive_rec_succ. + apply add_successors_correct. + elim (Plt_succ_inv _ _ H0); intro. + left; auto. + right. subst x. tauto. +Qed. + +End Predecessor. + +(** ** Solving backward dataflow problems *) + +(** The interface to a backward dataflow solver is as follows. *) + +Module Type BACKWARD_DATAFLOW_SOLVER. + + Declare Module L: SEMILATTICE. + + Variable fixpoint: + (positive -> list positive) -> + positive -> + (positive -> L.t -> L.t) -> + list (positive * L.t) -> + option (PMap.t L.t). + + Hypothesis fixpoint_solution: + forall successors topnode transf entrypoints res n s, + fixpoint successors topnode transf entrypoints = Some res -> + Plt n topnode -> Plt s topnode -> In s (successors n) -> + L.ge res!!n (transf s res!!s). + + Hypothesis fixpoint_entry: + forall successors topnode transf entrypoints res n v, + fixpoint successors topnode transf entrypoints = Some res -> + In (n, v) entrypoints -> + L.ge res!!n v. + +End BACKWARD_DATAFLOW_SOLVER. + +(** We construct a generic backward dataflow solver, working over any + semi-lattice structure, by applying the forward dataflow solver + with the predecessor relation instead of the successor relation. *) + +Module Backward_Dataflow_Solver (LAT: SEMILATTICE): + BACKWARD_DATAFLOW_SOLVER with Module L := LAT. + +Module L := LAT. + +Module DS := Dataflow_Solver L. + +Section Kildall. + +Variable successors: positive -> list positive. +Variable topnode: positive. +Variable transf: positive -> L.t -> L.t. +Variable entrypoints: list (positive * L.t). + +Definition fixpoint := + let pred := make_predecessors successors topnode in + DS.fixpoint (fun s => pred!!s) topnode transf entrypoints. + +Theorem fixpoint_solution: + forall res n s, + fixpoint = Some res -> + Plt n topnode -> Plt s topnode -> + In s (successors n) -> + L.ge res!!n (transf s res!!s). +Proof. + intros. apply DS.fixpoint_solution with + (fun s => (make_predecessors successors topnode)!!s) topnode entrypoints. + exact H. + assumption. + apply make_predecessors_correct; auto. +Qed. + +Theorem fixpoint_entry: + forall res n v, + fixpoint = Some res -> + In (n, v) entrypoints -> + L.ge res!!n v. +Proof. + intros. apply DS.fixpoint_entry with + (fun s => (make_predecessors successors topnode)!!s) topnode transf entrypoints. + exact H. auto. +Qed. + +End Kildall. + +End Backward_Dataflow_Solver. + +(** * Analysis on extended basic blocks *) + +(** We now define an approximate solver for forward dataflow problems + that proceeds by forward propagation over extended basic blocks. + In other terms, program points with multiple predecessors are mapped + to [L.top] (the greatest, or coarsest, approximation) and the other + program points are mapped to [transf p X[p]] where [p] is their unique + predecessor. + + This analysis applies to any type of approximations equipped with + an ordering and a greatest element. *) + +Module Type ORDERED_TYPE_WITH_TOP. + + Variable t: Set. + Variable ge: t -> t -> Prop. + Variable top: t. + Hypothesis top_ge: forall x, ge top x. + Hypothesis refl_ge: forall x, ge x x. + +End ORDERED_TYPE_WITH_TOP. + +(** The interface of the solver is similar to that of Kildall's forward + solver. We provide one additional theorem [fixpoint_invariant] + stating that any property preserved by the [transf] function + holds for the returned solution. *) + +Module Type BBLOCK_SOLVER. + + Declare Module L: ORDERED_TYPE_WITH_TOP. + + Variable fixpoint: + (positive -> list positive) -> + positive -> + (positive -> L.t -> L.t) -> + positive -> + option (PMap.t L.t). + + Hypothesis fixpoint_solution: + forall successors topnode transf entrypoint res n s, + fixpoint successors topnode transf entrypoint = Some res -> + Plt n topnode -> In s (successors n) -> + L.ge res!!s (transf n res!!n). + + Hypothesis fixpoint_entry: + forall successors topnode transf entrypoint res, + fixpoint successors topnode transf entrypoint = Some res -> + res!!entrypoint = L.top. + + Hypothesis fixpoint_invariant: + forall successors topnode transf entrypoint + (P: L.t -> Prop), + P L.top -> + (forall pc x, P x -> P (transf pc x)) -> + forall res pc, + fixpoint successors topnode transf entrypoint = Some res -> + P res!!pc. + +End BBLOCK_SOLVER. + +(** The implementation of the ``extended basic block'' solver is a + functor parameterized by any ordered type with a top element. *) + +Module BBlock_solver(LAT: ORDERED_TYPE_WITH_TOP): + BBLOCK_SOLVER with Module L := LAT. + +Module L := LAT. + +Section Solver. + +Variable successors: positive -> list positive. +Variable topnode: positive. +Variable transf: positive -> L.t -> L.t. +Variable entrypoint: positive. +Variable P: L.t -> Prop. +Hypothesis Ptop: P L.top. +Hypothesis Ptransf: forall pc x, P x -> P (transf pc x). + +Definition bbmap := positive -> bool. +Definition result := PMap.t L.t. + +(** As in Kildall's solver, the state of the iteration has two components: +- A mapping from program points to values of type [L.t] representing + the candidate solution found so far. +- A worklist of program points that remain to be considered. +*) + +Record state : Set := mkstate + { st_in: result; st_wrk: list positive }. + +(** The ``extended basic block'' algorithm, in pseudo-code, is as follows: +<< + st_wrk := the set of all points n having multiple predecessors + st_in := the mapping n -> L.top + + while st_wrk is not empty, do + extract a node n from st_wrk + compute out = transf n st_in[n] + for each successor s of n: + compute in = lub st_in[s] out + if s has only one predecessor (namely, n): + st_in[s] := in + st_wrk := st_wrk union {s} + end if + end for + end while + return st_in +>> +**) + +Fixpoint propagate_successors + (bb: bbmap) (succs: list positive) (l: L.t) (st: state) + {struct succs} : state := + match succs with + | nil => st + | s1 :: sl => + if bb s1 then + propagate_successors bb sl l st + else + propagate_successors bb sl l + (mkstate (PMap.set s1 l st.(st_in)) + (s1 :: st.(st_wrk))) + end. + +Definition step (bb: bbmap) (st: state) : result + state := + match st.(st_wrk) with + | nil => inl _ st.(st_in) + | pc :: rem => + if plt pc topnode then + inr _ (propagate_successors + bb (successors pc) + (transf pc st.(st_in)!!pc) + (mkstate st.(st_in) rem)) + else + inr _ (mkstate st.(st_in) rem) + end. + +(** Recognition of program points that have more than one predecessor. *) + +Definition is_basic_block_head + (preds: PMap.t (list positive)) (pc: positive) : bool := + match preds!!pc with + | nil => true + | s :: nil => + if peq s pc then true else + if peq pc entrypoint then true else false + | _ :: _ :: _ => true + end. + +Definition basic_block_map : bbmap := + is_basic_block_head (make_predecessors successors topnode). + +Definition basic_block_list (bb: bbmap) : list positive := + positive_rec (list positive) nil + (fun pc l => if bb pc then pc :: l else l) topnode. + +(** The computation of the approximate solution. *) + +Definition fixpoint : option result := + let bb := basic_block_map in + iterate _ _ (step bb) num_iterations + (mkstate (PMap.init L.top) (basic_block_list bb)). + +(** ** Properties of predecessors and multiple-predecessors nodes *) + +Definition predecessors := make_predecessors successors topnode. + +Lemma predecessors_correct: + forall n s, + Plt n topnode -> In s (successors n) -> In n predecessors!!s. +Proof. + intros. unfold predecessors. eapply make_predecessors_correct; eauto. +Qed. + +Lemma multiple_predecessors: + forall s n1 n2, + Plt n1 topnode -> In s (successors n1) -> + Plt n2 topnode -> In s (successors n2) -> + n1 <> n2 -> + basic_block_map s = true. +Proof. + intros. + assert (In n1 predecessors!!s). apply predecessors_correct; auto. + assert (In n2 predecessors!!s). apply predecessors_correct; auto. + unfold basic_block_map, is_basic_block_head. + fold predecessors. + destruct (predecessors!!s). + auto. + destruct l. + simpl in H4. simpl in H5. intuition congruence. + auto. +Qed. + +Lemma no_self_loop: + forall n, + Plt n topnode -> In n (successors n) -> basic_block_map n = true. +Proof. + intros. unfold basic_block_map, is_basic_block_head. + fold predecessors. + generalize (predecessors_correct n n H H0). intro. + destruct (predecessors!!n). auto. + destruct l. replace n with p. apply peq_true. simpl in H1. tauto. + auto. +Qed. + +(** ** Correctness invariant *) + +(** The invariant over the state is as follows: +- Points with several predecessors are mapped to [L.top] +- Points not in the worklist satisfy their inequations + (as in Kildall's algorithm). +*) + +Definition state_invariant (st: state) : Prop := + (forall n, basic_block_map n = true -> st.(st_in)!!n = L.top) +/\ + (forall n, Plt n topnode -> + In n st.(st_wrk) \/ + (forall s, In s (successors n) -> + L.ge st.(st_in)!!s (transf n st.(st_in)!!n))). + +Lemma propagate_successors_charact1: + forall bb succs l st, + incl st.(st_wrk) + (propagate_successors bb succs l st).(st_wrk). +Proof. + induction succs; simpl; intros. + apply incl_refl. + case (bb a). + auto. + apply incl_tran with (a :: st_wrk st). + apply incl_tl. apply incl_refl. + set (st1 := (mkstate (PMap.set a l (st_in st)) (a :: st_wrk st))). + change (a :: st_wrk st) with (st_wrk st1). + auto. +Qed. + +Lemma propagate_successors_charact2: + forall bb succs l st n, + let st' := propagate_successors bb succs l st in + (In n succs -> bb n = false -> In n st'.(st_wrk) /\ st'.(st_in)!!n = l) +/\ (~In n succs \/ bb n = true -> st'.(st_in)!!n = st.(st_in)!!n). +Proof. + induction succs; simpl; intros. + (* Base case *) + split. tauto. auto. + (* Inductive case *) + caseEq (bb a); intro. + elim (IHsuccs l st n); intros A B. + split; intros. apply A; auto. + elim H0; intro. subst a. congruence. auto. + apply B. tauto. + set (st1 := mkstate (PMap.set a l (st_in st)) (a :: st_wrk st)). + elim (IHsuccs l st1 n); intros A B. + split; intros. + elim H0; intros. + subst n. split. + apply propagate_successors_charact1. simpl. tauto. + case (In_dec peq a succs); intro. + elim (A i H1); auto. + rewrite B. unfold st1; simpl. apply PMap.gss. tauto. + apply A; auto. + rewrite B. unfold st1; simpl. apply PMap.gso. + red; intro; subst n. elim H0; intro. tauto. congruence. + tauto. +Qed. + +Lemma propagate_successors_invariant: + forall pc res rem, + Plt pc topnode -> + state_invariant (mkstate res (pc :: rem)) -> + state_invariant + (propagate_successors basic_block_map (successors pc) + (transf pc res!!pc) + (mkstate res rem)). +Proof. + intros until rem. intros PC [INV1 INV2]. simpl in INV1. simpl in INV2. + set (l := transf pc res!!pc). + generalize (propagate_successors_charact1 basic_block_map + (successors pc) l (mkstate res rem)). + generalize (propagate_successors_charact2 basic_block_map + (successors pc) l (mkstate res rem)). + set (st1 := propagate_successors basic_block_map + (successors pc) l (mkstate res rem)). + intros A B. + (* First part: BB entries remain at top *) + split; intros. + elim (A n); intros C D. rewrite D. simpl. apply INV1. auto. tauto. + (* Second part: monotonicity *) + (* Case 1: n = pc *) + case (peq pc n); intros. + subst n. right; intros. + elim (A s); intros C D. + replace (st1.(st_in)!!pc) with res!!pc. fold l. + caseEq (basic_block_map s); intro. + rewrite D. simpl. rewrite INV1. apply L.top_ge. auto. tauto. + elim (C H0 H1); intros. rewrite H3. apply L.refl_ge. + elim (A pc); intros E F. rewrite F. reflexivity. + case (In_dec peq pc (successors pc)); intro. + right. apply no_self_loop; auto. + left; auto. + (* Case 2: n <> pc *) + elim (INV2 n H); intro. + (* Case 2.1: n was already in worklist, still is *) + left. apply B. simpl. simpl in H0. tauto. + (* Case 2.2: n was not in worklist *) + assert (INV3: forall s, In s (successors n) -> st1.(st_in)!!s = res!!s). + (* Amazingly, successors of n do not change. The only way + they could change is if they were successors of pc as well, + but that gives them two different predecessors, so + they are basic block heads, and thus do not change! *) + intros. elim (A s); intros C D. rewrite D. reflexivity. + case (In_dec peq s (successors pc)); intro. + right. apply multiple_predecessors with n pc; auto. + left; auto. + case (In_dec peq n (successors pc)); intro. + (* Case 2.2.1: n is a successor of pc. Either it is in the + worklist or it did not change *) + caseEq (basic_block_map n); intro. + right; intros. + elim (A n); intros C D. rewrite D. simpl. + rewrite INV3; auto. + tauto. + left. elim (A n); intros C D. elim (C i H1); intros. auto. + (* Case 2.2.2: n is not a successor of pc. It did not change. *) + right; intros. + elim (A n); intros C D. rewrite D. simpl. + rewrite INV3; auto. + tauto. +Qed. + +Lemma discard_top_worklist_invariant: + forall pc res rem, + ~(Plt pc topnode) -> + state_invariant (mkstate res (pc :: rem)) -> + state_invariant (mkstate res rem). +Proof. + intros; red; intros. + elim H0; simpl; intros A B. + split. assumption. + intros. elim (B n H1); intros. + left. elim H2; intro. subst n. contradiction. auto. + right. assumption. +Qed. + +Lemma analyze_invariant: + forall res count st, + iterate _ _ (step basic_block_map) count st = Some res -> + state_invariant st -> + state_invariant (mkstate res nil). +Proof. + intros until count. pattern count. + apply positive_Peano_ind; intros until st. + rewrite iterate_base. congruence. + rewrite iterate_step. unfold step at 1. case st; intros r w; simpl. + case w. + intros. replace res with r. auto. congruence. + intros pc wl. case (plt pc topnode); intros. + eapply H. eauto. apply propagate_successors_invariant; auto. + eapply H. eauto. eapply discard_top_worklist_invariant; eauto. +Qed. + +Lemma initial_state_invariant: + state_invariant (mkstate (PMap.init L.top) (basic_block_list basic_block_map)). +Proof. + split; simpl; intros. + apply PMap.gi. + right. intros. repeat rewrite PMap.gi. apply L.top_ge. +Qed. + +(** ** Correctness of the returned solution *) + +Theorem fixpoint_solution: + forall res n s, + fixpoint = Some res -> + Plt n topnode -> In s (successors n) -> + L.ge res!!s (transf n res!!n). +Proof. + unfold fixpoint. + intros. + assert (state_invariant (mkstate res nil)). + eapply analyze_invariant; eauto. apply initial_state_invariant. + elim H2; simpl; intros. + elim (H4 n H0); intros. + contradiction. + auto. +Qed. + +Theorem fixpoint_entry: + forall res, + fixpoint = Some res -> + res!!entrypoint = L.top. +Proof. + unfold fixpoint. + intros. + assert (state_invariant (mkstate res nil)). + eapply analyze_invariant; eauto. apply initial_state_invariant. + elim H0; simpl; intros. + apply H1. unfold basic_block_map, is_basic_block_head. + fold predecessors. + destruct (predecessors!!entrypoint). auto. + destruct l. destruct (peq p entrypoint). auto. apply peq_true. + auto. +Qed. + +(** ** Preservation of a property over solutions *) + +Definition Pstate (st: state) : Prop := + forall pc, P st.(st_in)!!pc. + +Lemma propagate_successors_P: + forall bb l, + P l -> + forall succs st, + Pstate st -> + Pstate (propagate_successors bb succs l st). +Proof. + induction succs; simpl; intros. + auto. + case (bb a). auto. + apply IHsuccs. red; simpl; intros. + rewrite PMap.gsspec. case (peq pc a); intro. + auto. apply H0. +Qed. + +Lemma analyze_P: + forall bb res count st, + iterate _ _ (step bb) count st = Some res -> + Pstate st -> + (forall pc, P res!!pc). +Proof. + intros until count; pattern count; apply positive_Peano_ind; intros until st. + rewrite iterate_base. congruence. + rewrite iterate_step; unfold step at 1; destruct st.(st_wrk). + intros. inversion H0. apply H1. + destruct (plt p topnode). + intros. eapply H. eauto. + apply propagate_successors_P. apply Ptransf. apply H1. + red; intro; simpl. apply H1. + intros. eauto. +Qed. + +Theorem fixpoint_invariant: + forall res pc, fixpoint = Some res -> P res!!pc. +Proof. + intros. unfold fixpoint in H. eapply analyze_P; eauto. + red; intro; simpl. rewrite PMap.gi. apply Ptop. +Qed. + +End Solver. + +End BBlock_solver. + diff --git a/backend/LTL.v b/backend/LTL.v new file mode 100644 index 00000000..2c36cba9 --- /dev/null +++ b/backend/LTL.v @@ -0,0 +1,357 @@ +(** The LTL intermediate language: abstract syntax and semantics. + + LTL (``Location Transfer Language'') is the target language + for register allocation and the source language for linearization. *) + +Require Import Relations. +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Conventions. + +(** * Abstract syntax *) + +(** LTL is close to RTL, but uses locations instead of pseudo-registers, + and basic blocks instead of single instructions as nodes of its + control-flow graph. *) + +Definition node := positive. + +(** A basic block is a sequence of instructions terminated by + a [Bgoto], [Bcond] or [Breturn] instruction. (This invariant + is enforced by the following inductive type definition.) + The instructions behave like the similarly-named instructions + of RTL. They take machine registers (type [mreg]) as arguments + and results. Two new instructions are added: [Bgetstack] + and [Bsetstack], which are ``move'' instructions between + a machine register and a stack slot. *) + +Inductive block: Set := + | Bgetstack: slot -> mreg -> block -> block + | Bsetstack: mreg -> slot -> block -> block + | Bop: operation -> list mreg -> mreg -> block -> block + | Bload: memory_chunk -> addressing -> list mreg -> mreg -> block -> block + | Bstore: memory_chunk -> addressing -> list mreg -> mreg -> block -> block + | Bcall: signature -> mreg + ident -> block -> block + | Bgoto: node -> block + | Bcond: condition -> list mreg -> node -> node -> block + | Breturn: block. + +Definition code: Set := PTree.t block. + +(** Unlike in RTL, parameter passing (passing values of the arguments + to a function call to the parameters of the called function) is done + via conventional locations (machine registers and stack slots). + Consequently, the [Bcall] instruction has no list of argument registers, + and function descriptions have no list of parameter registers. *) + +Record function: Set := mkfunction { + fn_sig: signature; + fn_stacksize: Z; + fn_code: code; + fn_entrypoint: node; + fn_code_wf: + forall (pc: node), Plt pc (Psucc fn_entrypoint) \/ fn_code!pc = None +}. + +Definition program := AST.program function. + +(** * Operational semantics *) + +Definition genv := Genv.t function. +Definition locset := Locmap.t. + +Section RELSEM. + +(** Calling conventions are reflected at the level of location sets + (environments mapping locations to values) by the following two + functions. + + [call_regs caller] returns the location set at function entry, + as a function of the location set [caller] of the calling function. +- Machine registers have the same values as in the caller. +- Incoming stack slots (used for parameter passing) have the same + values as the corresponding outgoing stack slots (used for argument + passing) in the caller. +- Local and outgoing stack slots are initialized to undefined values. +*) + +Definition call_regs (caller: locset) : locset := + fun (l: loc) => + match l with + | R r => caller (R r) + | S (Local ofs ty) => Vundef + | S (Incoming ofs ty) => caller (S (Outgoing ofs ty)) + | S (Outgoing ofs ty) => Vundef + end. + +(** [return_regs caller callee] returns the location set after + a call instruction, as a function of the location set [caller] + of the caller before the call instruction and of the location + set [callee] of the callee at the return instruction. +- Callee-save machine registers have the same values as in the caller + before the call. +- Caller-save and temporary machine registers have the same values + as in the callee at the return point. +- Stack slots have the same values as in the caller before the call. +*) + +Definition return_regs (caller callee: locset) : locset := + fun (l: loc) => + match l with + | R r => + if In_dec Loc.eq (R r) Conventions.temporaries then + callee (R r) + else if In_dec Loc.eq (R r) Conventions.destroyed_at_call then + callee (R r) + else + caller (R r) + | S s => caller (S s) + end. + +Variable ge: genv. + +Definition find_function (ros: mreg + ident) (rs: locset) : option function := + match ros with + | inl r => Genv.find_funct ge (rs (R r)) + | inr symb => + match Genv.find_symbol ge symb with + | None => None + | Some b => Genv.find_funct_ptr ge b + end + end. + +Definition reglist (rl: list mreg) (rs: locset) : list val := + List.map (fun r => rs (R r)) rl. + +(** The dynamic semantics of LTL, like that of RTL, is a combination + of small-step transition semantics and big-step semantics. + Function calls are treated in big-step style so that they appear + as a single transition in the caller function. Other instructions + are treated in purely small-step style, as a single transition. + + The introduction of basic blocks increases the number of inductive + predicates needed to express the semantics: +- [exec_instr ge sp b ls m b' ls' m'] is the execution of the first + instruction of block [b]. [b'] is the remainder of the block. +- [exec_instrs ge sp b ls m b' ls' m'] is similar, but executes + zero, one or several instructions at the beginning of block [b]. +- [exec_block ge sp b ls m out ls' m'] executes all instructions + of block [b]. The outcome [out] is either [Cont s], indicating + that the block terminates by branching to block labeled [s], + or [Return], indicating that the block terminates by returning + from the current function. +- [exec_blocks ge code sp pc ls m out ls' m'] executes a sequence + of zero, one or several blocks, starting at the block labeled [pc]. + [code] is the control-flow graph for the current function. + The outcome [out] indicates how the last block in this sequence + terminates: by branching to another block or by returning from the + function. +- [exec_function ge f ls m ls' m'] executes the body of function [f], + from its entry point to the first [Lreturn] instruction encountered. + + In all these predicates, [ls] and [ls'] are the location sets + (values of locations) at the beginning and end of the transitions, + respectively. +*) + +Inductive outcome: Set := + | Cont: node -> outcome + | Return: outcome. + +Inductive exec_instr: val -> + block -> locset -> mem -> + block -> locset -> mem -> Prop := + | exec_Bgetstack: + forall sp sl r b rs m, + exec_instr sp (Bgetstack sl r b) rs m + b (Locmap.set (R r) (rs (S sl)) rs) m + | exec_Bsetstack: + forall sp r sl b rs m, + exec_instr sp (Bsetstack r sl b) rs m + b (Locmap.set (S sl) (rs (R r)) rs) m + | exec_Bop: + forall sp op args res b rs m v, + eval_operation ge sp op (reglist args rs) = Some v -> + exec_instr sp (Bop op args res b) rs m + b (Locmap.set (R res) v rs) m + | exec_Bload: + forall sp chunk addr args dst b rs m a v, + eval_addressing ge sp addr (reglist args rs) = Some a -> + loadv chunk m a = Some v -> + exec_instr sp (Bload chunk addr args dst b) rs m + b (Locmap.set (R dst) v rs) m + | exec_Bstore: + forall sp chunk addr args src b rs m m' a, + eval_addressing ge sp addr (reglist args rs) = Some a -> + storev chunk m a (rs (R src)) = Some m' -> + exec_instr sp (Bstore chunk addr args src b) rs m + b rs m' + | exec_Bcall: + forall sp sig ros b rs m f rs' m', + find_function ros rs = Some f -> + sig = f.(fn_sig) -> + exec_function f rs m rs' m' -> + exec_instr sp (Bcall sig ros b) rs m + b (return_regs rs rs') m' + +with exec_instrs: val -> + block -> locset -> mem -> + block -> locset -> mem -> Prop := + | exec_refl: + forall sp b rs m, + exec_instrs sp b rs m b rs m + | exec_one: + forall sp b1 rs1 m1 b2 rs2 m2, + exec_instr sp b1 rs1 m1 b2 rs2 m2 -> + exec_instrs sp b1 rs1 m1 b2 rs2 m2 + | exec_trans: + forall sp b1 rs1 m1 b2 rs2 m2 b3 rs3 m3, + exec_instrs sp b1 rs1 m1 b2 rs2 m2 -> + exec_instrs sp b2 rs2 m2 b3 rs3 m3 -> + exec_instrs sp b1 rs1 m1 b3 rs3 m3 + +with exec_block: val -> + block -> locset -> mem -> + outcome -> locset -> mem -> Prop := + | exec_Bgoto: + forall sp b rs m s rs' m', + exec_instrs sp b rs m (Bgoto s) rs' m' -> + exec_block sp b rs m (Cont s) rs' m' + | exec_Bcond_true: + forall sp b rs m cond args ifso ifnot rs' m', + exec_instrs sp b rs m (Bcond cond args ifso ifnot) rs' m' -> + eval_condition cond (reglist args rs') = Some true -> + exec_block sp b rs m (Cont ifso) rs' m' + | exec_Bcond_false: + forall sp b rs m cond args ifso ifnot rs' m', + exec_instrs sp b rs m (Bcond cond args ifso ifnot) rs' m' -> + eval_condition cond (reglist args rs') = Some false -> + exec_block sp b rs m (Cont ifnot) rs' m' + | exec_Breturn: + forall sp b rs m rs' m', + exec_instrs sp b rs m Breturn rs' m' -> + exec_block sp b rs m Return rs' m' + +with exec_blocks: code -> val -> + node -> locset -> mem -> + outcome -> locset -> mem -> Prop := + | exec_blocks_refl: + forall c sp pc rs m, + exec_blocks c sp pc rs m (Cont pc) rs m + | exec_blocks_one: + forall c sp pc b m rs out rs' m', + c!pc = Some b -> + exec_block sp b rs m out rs' m' -> + exec_blocks c sp pc rs m out rs' m' + | exec_blocks_trans: + forall c sp pc1 rs1 m1 pc2 rs2 m2 out rs3 m3, + exec_blocks c sp pc1 rs1 m1 (Cont pc2) rs2 m2 -> + exec_blocks c sp pc2 rs2 m2 out rs3 m3 -> + exec_blocks c sp pc1 rs1 m1 out rs3 m3 + +with exec_function: function -> locset -> mem -> + locset -> mem -> Prop := + | exec_funct: + forall f rs m m1 stk rs2 m2, + alloc m 0 f.(fn_stacksize) = (m1, stk) -> + exec_blocks f.(fn_code) (Vptr stk Int.zero) + f.(fn_entrypoint) (call_regs rs) m1 Return rs2 m2 -> + exec_function f rs m rs2 (free m2 stk). + +End RELSEM. + +(** Execution of a whole program boils down to invoking its main + function. The result of the program is the return value of the + main function, to be found in the machine register dictated + by the calling conventions. *) + +Definition exec_program (p: program) (r: val) : Prop := + let ge := Genv.globalenv p in + let m0 := Genv.init_mem p in + exists b, exists f, exists rs, exists m, + Genv.find_symbol ge p.(prog_main) = Some b /\ + Genv.find_funct_ptr ge b = Some f /\ + f.(fn_sig) = mksignature nil (Some Tint) /\ + exec_function ge f (Locmap.init Vundef) m0 rs m /\ + rs (R (Conventions.loc_result f.(fn_sig))) = r. + +(** We remark that the [exec_blocks] relation is stable if + the control-flow graph is extended by adding new basic blocks + at previously unused graph nodes. *) + +Section EXEC_BLOCKS_EXTENDS. + +Variable ge: genv. +Variable c1 c2: code. +Hypothesis EXT: forall pc, c2!pc = c1!pc \/ c1!pc = None. + +Lemma exec_blocks_extends: + forall sp pc1 rs1 m1 out rs2 m2, + exec_blocks ge c1 sp pc1 rs1 m1 out rs2 m2 -> + exec_blocks ge c2 sp pc1 rs1 m1 out rs2 m2. +Proof. + induction 1. + apply exec_blocks_refl. + apply exec_blocks_one with b. + elim (EXT pc); intro; congruence. assumption. + eapply exec_blocks_trans; eauto. +Qed. + +End EXEC_BLOCKS_EXTENDS. + +(** * Operations over LTL *) + +(** Computation of the possible successors of a basic block. + This is used for dataflow analyses. *) + +Fixpoint successors_aux (b: block) : list node := + match b with + | Bgetstack s r b => successors_aux b + | Bsetstack r s b => successors_aux b + | Bop op args res b => successors_aux b + | Bload chunk addr args dst b => successors_aux b + | Bstore chunk addr args src b => successors_aux b + | Bcall sig ros b => successors_aux b + | Bgoto n => n :: nil + | Bcond cond args ifso ifnot => ifso :: ifnot :: nil + | Breturn => nil + end. + +Definition successors (f: function) (pc: node) : list node := + match f.(fn_code)!pc with + | None => nil + | Some b => successors_aux b + end. + +Lemma successors_aux_invariant: + forall ge sp b rs m b' rs' m', + exec_instrs ge sp b rs m b' rs' m' -> + successors_aux b = successors_aux b'. +Proof. + induction 1; simpl; intros. + reflexivity. + inversion H; reflexivity. + transitivity (successors_aux b2); auto. +Qed. + +Lemma successors_correct: + forall ge f sp pc rs m pc' rs' m' b, + f.(fn_code)!pc = Some b -> + exec_block ge sp b rs m (Cont pc') rs' m' -> + In pc' (successors f pc). +Proof. + intros. unfold successors. rewrite H. inversion H0. + rewrite (successors_aux_invariant _ _ _ _ _ _ _ _ H6); simpl. + tauto. + rewrite (successors_aux_invariant _ _ _ _ _ _ _ _ H2); simpl. + tauto. + rewrite (successors_aux_invariant _ _ _ _ _ _ _ _ H2); simpl. + tauto. +Qed. diff --git a/backend/LTLtyping.v b/backend/LTLtyping.v new file mode 100644 index 00000000..3f13ac3c --- /dev/null +++ b/backend/LTLtyping.v @@ -0,0 +1,93 @@ +(** Typing rules for LTL. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Op. +Require Import RTL. +Require Import Locations. +Require Import LTL. +Require Import Conventions. + +(** The following predicates define a type system for LTL similar to that + of [RTL] (see file [RTLtyping]): it statically guarantees that operations + and addressing modes are applied to the right number of arguments + and that the arguments are of the correct types. Moreover, it also + enforces some correctness conditions on the offsets of stack slots + accessed through [Bgetstack] and [Bsetstack] LTL instructions. *) + +Section WT_BLOCK. + +Variable funct: function. + +Definition slot_bounded (s: slot) := + match s with + | Local ofs ty => 0 <= ofs + | Outgoing ofs ty => 6 <= ofs + | Incoming ofs ty => 6 <= ofs /\ ofs + typesize ty <= size_arguments funct.(fn_sig) + end. + +Inductive wt_block : block -> Prop := + | wt_Bgetstack: + forall s r b, + slot_type s = mreg_type r -> + slot_bounded s -> + wt_block b -> + wt_block (Bgetstack s r b) + | wt_Bsetstack: + forall r s b, + match s with Incoming _ _ => False | _ => True end -> + slot_type s = mreg_type r -> + slot_bounded s -> + wt_block b -> + wt_block (Bsetstack r s b) + | wt_Bopmove: + forall r1 r b, + mreg_type r1 = mreg_type r -> + wt_block b -> + wt_block (Bop Omove (r1 :: nil) r b) + | wt_Bopundef: + forall r b, + wt_block b -> + wt_block (Bop Oundef nil r b) + | wt_Bop: + forall op args res b, + op <> Omove -> op <> Oundef -> + (List.map mreg_type args, mreg_type res) = type_of_operation op -> + wt_block b -> + wt_block (Bop op args res b) + | wt_Bload: + forall chunk addr args dst b, + List.map mreg_type args = type_of_addressing addr -> + mreg_type dst = type_of_chunk chunk -> + wt_block b -> + wt_block (Bload chunk addr args dst b) + | wt_Bstore: + forall chunk addr args src b, + List.map mreg_type args = type_of_addressing addr -> + mreg_type src = type_of_chunk chunk -> + wt_block b -> + wt_block (Bstore chunk addr args src b) + | wt_Bcall: + forall sig ros b, + match ros with inl r => mreg_type r = Tint | _ => True end -> + wt_block b -> + wt_block (Bcall sig ros b) + | wt_Bgoto: + forall lbl, + wt_block (Bgoto lbl) + | wt_Bcond: + forall cond args ifso ifnot, + List.map mreg_type args = type_of_condition cond -> + wt_block (Bcond cond args ifso ifnot) + | wt_Breturn: + wt_block (Breturn). + +End WT_BLOCK. + +Definition wt_function (f: function) : Prop := + forall pc b, f.(fn_code)!pc = Some b -> wt_block f b. + +Definition wt_program (p: program) : Prop := + forall i f, In (i, f) (prog_funct p) -> wt_function f. + diff --git a/backend/Linear.v b/backend/Linear.v new file mode 100644 index 00000000..f4ed0454 --- /dev/null +++ b/backend/Linear.v @@ -0,0 +1,203 @@ +(** The Linear intermediate language: abstract syntax and semantcs *) + +(** The Linear language is a variant of LTL where control-flow is not + expressed as a graph of basic blocks, but as a linear list of + instructions with explicit labels and ``goto'' instructions. *) + +Require Import Relations. +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Import LTL. +Require Conventions. + +(** * Abstract syntax *) + +Definition label := positive. + +(** Linear instructions are similar to their LTL counterpart: + arguments and results are machine registers, except for the + [Lgetstack] and [Lsetstack] instructions which are register-stack moves. + Except the last three, these instructions continue in sequence + with the next instruction in the linear list of instructions. + Unconditional branches [Lgoto] and conditional branches [Lcond] + transfer control to the given code label. Labels are explicitly + inserted in the instruction list as pseudo-instructions [Llabel]. *) + +Inductive instruction: Set := + | Lgetstack: slot -> mreg -> instruction + | Lsetstack: mreg -> slot -> instruction + | Lop: operation -> list mreg -> mreg -> instruction + | Lload: memory_chunk -> addressing -> list mreg -> mreg -> instruction + | Lstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction + | Lcall: signature -> mreg + ident -> instruction + | Llabel: label -> instruction + | Lgoto: label -> instruction + | Lcond: condition -> list mreg -> label -> instruction + | Lreturn: instruction. + +Definition code: Set := list instruction. + +Record function: Set := mkfunction { + fn_sig: signature; + fn_stacksize: Z; + fn_code: code +}. + +Definition program := AST.program function. + +Definition genv := Genv.t function. +Definition locset := Locmap.t. + +(** * Operational semantics *) + +(** Looking up labels in the instruction list. *) + +Definition is_label (lbl: label) (instr: instruction) : bool := + match instr with + | Llabel lbl' => if peq lbl lbl' then true else false + | _ => false + end. + +Lemma is_label_correct: + forall lbl instr, + if is_label lbl instr then instr = Llabel lbl else instr <> Llabel lbl. +Proof. + intros. destruct instr; simpl; try discriminate. + case (peq lbl l); intro; congruence. +Qed. + +(** [find_label lbl c] returns a list of instruction, suffix of the + code [c], that immediately follows the [Llabel lbl] pseudo-instruction. + If the label [lbl] is multiply-defined, the first occurrence is + retained. If the label [lbl] is not defined, [None] is returned. *) + +Fixpoint find_label (lbl: label) (c: code) {struct c} : option code := + match c with + | nil => None + | i1 :: il => if is_label lbl i1 then Some il else find_label lbl il + end. + +Section RELSEM. + +Variable ge: genv. + +Definition find_function (ros: mreg + ident) (rs: locset) : option function := + match ros with + | inl r => Genv.find_funct ge (rs (R r)) + | inr symb => + match Genv.find_symbol ge symb with + | None => None + | Some b => Genv.find_funct_ptr ge b + end + end. + +(** [exec_instr ge f sp c ls m c' ls' m'] represents the execution + of the first instruction in the code sequence [c]. [ls] and [m] + are the initial location set and memory state, respectively. + [c'] is the current code sequence after execution of the instruction: + it is the tail of [c] if the instruction falls through. + [ls'] and [m'] are the final location state and memory state. *) + +Inductive exec_instr: function -> val -> + code -> locset -> mem -> + code -> locset -> mem -> Prop := + | exec_Lgetstack: + forall f sp sl r b rs m, + exec_instr f sp (Lgetstack sl r :: b) rs m + b (Locmap.set (R r) (rs (S sl)) rs) m + | exec_Lsetstack: + forall f sp r sl b rs m, + exec_instr f sp (Lsetstack r sl :: b) rs m + b (Locmap.set (S sl) (rs (R r)) rs) m + | exec_Lop: + forall f sp op args res b rs m v, + eval_operation ge sp op (reglist args rs) = Some v -> + exec_instr f sp (Lop op args res :: b) rs m + b (Locmap.set (R res) v rs) m + | exec_Lload: + forall f sp chunk addr args dst b rs m a v, + eval_addressing ge sp addr (reglist args rs) = Some a -> + loadv chunk m a = Some v -> + exec_instr f sp (Lload chunk addr args dst :: b) rs m + b (Locmap.set (R dst) v rs) m + | exec_Lstore: + forall f sp chunk addr args src b rs m m' a, + eval_addressing ge sp addr (reglist args rs) = Some a -> + storev chunk m a (rs (R src)) = Some m' -> + exec_instr f sp (Lstore chunk addr args src :: b) rs m + b rs m' + | exec_Lcall: + forall f sp sig ros b rs m f' rs' m', + find_function ros rs = Some f' -> + sig = f'.(fn_sig) -> + exec_function f' rs m rs' m' -> + exec_instr f sp (Lcall sig ros :: b) rs m + b (return_regs rs rs') m' + | exec_Llabel: + forall f sp lbl b rs m, + exec_instr f sp (Llabel lbl :: b) rs m + b rs m + | exec_Lgoto: + forall f sp lbl b rs m b', + find_label lbl f.(fn_code) = Some b' -> + exec_instr f sp (Lgoto lbl :: b) rs m + b' rs m + | exec_Lcond_true: + forall f sp cond args lbl b rs m b', + eval_condition cond (reglist args rs) = Some true -> + find_label lbl f.(fn_code) = Some b' -> + exec_instr f sp (Lcond cond args lbl :: b) rs m + b' rs m + | exec_Lcond_false: + forall f sp cond args lbl b rs m, + eval_condition cond (reglist args rs) = Some false -> + exec_instr f sp (Lcond cond args lbl :: b) rs m + b rs m + +with exec_instrs: function -> val -> + code -> locset -> mem -> + code -> locset -> mem -> Prop := + | exec_refl: + forall f sp b rs m, + exec_instrs f sp b rs m b rs m + | exec_one: + forall f sp b1 rs1 m1 b2 rs2 m2, + exec_instr f sp b1 rs1 m1 b2 rs2 m2 -> + exec_instrs f sp b1 rs1 m1 b2 rs2 m2 + | exec_trans: + forall f sp b1 rs1 m1 b2 rs2 m2 b3 rs3 m3, + exec_instrs f sp b1 rs1 m1 b2 rs2 m2 -> + exec_instrs f sp b2 rs2 m2 b3 rs3 m3 -> + exec_instrs f sp b1 rs1 m1 b3 rs3 m3 + +with exec_function: function -> locset -> mem -> locset -> mem -> Prop := + | exec_funct: + forall f rs m m1 stk rs2 m2 b, + alloc m 0 f.(fn_stacksize) = (m1, stk) -> + exec_instrs f (Vptr stk Int.zero) + f.(fn_code) (call_regs rs) m1 (Lreturn :: b) rs2 m2 -> + exec_function f rs m rs2 (free m2 stk). + +Scheme exec_instr_ind3 := Minimality for exec_instr Sort Prop + with exec_instrs_ind3 := Minimality for exec_instrs Sort Prop + with exec_function_ind3 := Minimality for exec_function Sort Prop. + +End RELSEM. + +Definition exec_program (p: program) (r: val) : Prop := + let ge := Genv.globalenv p in + let m0 := Genv.init_mem p in + exists b, exists f, exists rs, exists m, + Genv.find_symbol ge p.(prog_main) = Some b /\ + Genv.find_funct_ptr ge b = Some f /\ + f.(fn_sig) = mksignature nil (Some Tint) /\ + exec_function ge f (Locmap.init Vundef) m0 rs m /\ + rs (R (Conventions.loc_result f.(fn_sig))) = r. + diff --git a/backend/Linearize.v b/backend/Linearize.v new file mode 100644 index 00000000..af70b0fd --- /dev/null +++ b/backend/Linearize.v @@ -0,0 +1,212 @@ +(** Linearization of the control-flow graph: + translation from LTL to Linear *) + +Require Import Coqlib. +Require Import Maps. +Require Import Sets. +Require Import AST. +Require Import Values. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Import LTL. +Require Import Linear. +Require Import Kildall. +Require Import Lattice. + +(** To translate from LTL to Linear, we must layout the basic blocks + of the LTL control-flow graph in some linear order, and insert + explicit branches and conditional branches to make sure that + each basic block jumps to its successors as prescribed by the + LTL control-flow graph. However, branches are not necessary + if the fall-through behaviour of Linear instructions already + implements the desired flow of control. For instance, + consider the two LTL basic blocks +<< + L1: Bop op args res (Bgoto L2) + L2: ... +>> + If the blocks [L1] and [L2] are laid out consecutively in the Linear + code, we can generate the following Linear code: +<< + L1: Lop op args res + L2: ... +>> + However, if this is not possible, an explicit [Lgoto] is needed: +<< + L1: Lop op args res + Lgoto L2 + ... + L2: ... +>> + The main challenge in code linearization is therefore to pick a + ``good'' order for the basic blocks that exploits well the + fall-through behavior. Many clever trace picking heuristics + have been developed for this purpose. + + In this file, we present linearization in a way that clearly + separates the heuristic part (choosing an order for the basic blocks) + from the actual code transformation parts. We proceed in three passes: +- Choosing an order for the basic blocks. This returns an enumeration + of CFG nodes stating that the basic blocks must be laid out in + the order shown in the list. +- Generate naive Linear code where each basic block branches explicitly + to its successors, even if one of these successors is the next basic + block. +- Simplify the naive Linear code, removing unconditional branches to + a label that immediately follows: +<< + ... ... + Igoto L1; becomes L1: ... + L1: ... +>> + The beauty of this approach is that correct code is generated + under surprisingly weak hypotheses on the enumeration of + CFG nodes: it suffices that every reachable basic block occurs + exactly once in the enumeration. While the enumeration heuristic we use + is quite trivial, it is easy to implement more sophisticated + trace picking heuristics: as long as they satisfy the property above, + we do not need to re-do the proof of semantic preservation. + The enumeration could even be performed by external, untrusted + Caml code, then verified (for the property above) by certified Coq code. +*) + +(** * Determination of the order of basic blocks *) + +(** We first compute a mapping from CFG nodes to booleans, + indicating whether a CFG basic block is reachable or not. + This computation is a trivial forward dataflow analysis + where the transfer function is the identity: the successors + of a reachable block are reachable, by the very + definition of reachability. *) + +Module DS := Dataflow_Solver(LBoolean). + +Definition reachable_aux (f: LTL.function) : option (PMap.t bool) := + DS.fixpoint + (successors f) + (Psucc f.(fn_entrypoint)) + (fun pc r => r) + ((f.(fn_entrypoint), true) :: nil). + +Definition reachable (f: LTL.function) : PMap.t bool := + match reachable_aux f with + | None => PMap.init true + | Some rs => rs + end. + +(** We then enumerate the nodes of reachable basic blocks. The heuristic + we take is trivial: nodes are enumerated in decreasing numerical + order. Remember that CFG nodes are positive integers, and that + the [RTLgen] pass allocated fresh nodes 1, 2, 3, ..., but generated + instructions in roughly reverse control-flow order: often, + a basic block at label [n] has [n-1] as its only successor. + Therefore, by enumerating reachable nodes in decreasing order, + we recover a reasonable layout of basic blocks that globally respects + the structure of the original Cminor code. *) + +Definition enumerate (f: LTL.function) : list node := + let reach := reachable f in + positive_rec (list node) nil + (fun pc nodes => if reach!!pc then pc :: nodes else nodes) + (Psucc f.(fn_entrypoint)). + +(** * Translation from LTL to Linear *) + +(** We now flatten the structure of the CFG graph, laying out + basic blocks consecutively in the order computed by [enumerate], + and inserting a branch at the end of every basic block. + + For blocks ending in a conditional branch [Bcond cond args s1 s2], + we have two possible translations: +<< + Lcond cond args s1; or Lcond (not cond) args s2; + Lgoto s2 Lgoto s1 +>> + We favour the first translation if [s2] is the label of the + next instruction, and the second if [s1] is the label of the + next instruction, thus exhibiting more opportunities for + fall-through optimization later. *) + +Fixpoint starts_with (lbl: label) (k: code) {struct k} : bool := + match k with + | Llabel lbl' :: k' => if peq lbl lbl' then true else starts_with lbl k' + | _ => false + end. + +Fixpoint linearize_block (b: block) (k: code) {struct b} : code := + match b with + | Bgetstack s r b => + Lgetstack s r :: linearize_block b k + | Bsetstack r s b => + Lsetstack r s :: linearize_block b k + | Bop op args res b => + Lop op args res :: linearize_block b k + | Bload chunk addr args dst b => + Lload chunk addr args dst :: linearize_block b k + | Bstore chunk addr args src b => + Lstore chunk addr args src :: linearize_block b k + | Bcall sig ros b => + Lcall sig ros :: linearize_block b k + | Bgoto s => + Lgoto s :: k + | Bcond cond args s1 s2 => + if starts_with s1 k then + Lcond (negate_condition cond) args s2 :: Lgoto s1 :: k + else + Lcond cond args s1 :: Lgoto s2 :: k + | Breturn => + Lreturn :: k + end. + +(* Linearize a function body according to an enumeration of its + nodes. *) + +Fixpoint linearize_body (f: LTL.function) (enum: list node) + {struct enum} : code := + match enum with + | nil => nil + | pc :: rem => + match f.(LTL.fn_code)!pc with + | None => linearize_body f rem + | Some b => Llabel pc :: linearize_block b (linearize_body f rem) + end + end. + +Definition linearize_function (f: LTL.function) : Linear.function := + mkfunction + (LTL.fn_sig f) + (LTL.fn_stacksize f) + (linearize_body f (enumerate f)). + +(** * Cleanup of the linearized code *) + +(** We now eliminate [Lgoto] instructions that branch to an + immediately following label: these are redundant, as the fall-through + behaviour obtained by removing the [Lgoto] instruction is + semantically equivalent. *) + +Fixpoint cleanup_code (c: code) {struct c} : code := + match c with + | nil => nil + | Lgoto lbl :: c' => + if starts_with lbl c' + then cleanup_code c' + else Lgoto lbl :: cleanup_code c' + | i :: c' => + i :: cleanup_code c' + end. + +Definition cleanup_function (f: Linear.function) : Linear.function := + mkfunction + (fn_sig f) + (fn_stacksize f) + (cleanup_code f.(fn_code)). + +(** * Entry points for code linearization *) + +Definition transf_function (f: LTL.function) : Linear.function := + cleanup_function (linearize_function f). + +Definition transf_program (p: LTL.program) : Linear.program := + transform_program transf_function p. diff --git a/backend/Linearizeproof.v b/backend/Linearizeproof.v new file mode 100644 index 00000000..b80acb4d --- /dev/null +++ b/backend/Linearizeproof.v @@ -0,0 +1,711 @@ +(** Correctness proof for code linearization *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Import LTL. +Require Import Linear. +Require Import Linearize. +Require Import Lattice. + +Section LINEARIZATION. + +Variable prog: LTL.program. +Let tprog := transf_program prog. + +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma functions_translated: + forall v f, + Genv.find_funct ge v = Some f -> + Genv.find_funct tge v = Some (transf_function f). +Proof (@Genv.find_funct_transf _ _ transf_function prog). + +Lemma function_ptr_translated: + forall v f, + Genv.find_funct_ptr ge v = Some f -> + Genv.find_funct_ptr tge v = Some (transf_function f). +Proof (@Genv.find_funct_ptr_transf _ _ transf_function prog). + +Lemma symbols_preserved: + forall id, + Genv.find_symbol tge id = Genv.find_symbol ge id. +Proof (@Genv.find_symbol_transf _ _ transf_function prog). + +(** * Correctness of reachability analysis *) + +(** The entry point of the function is reachable. *) + +Lemma reachable_entrypoint: + forall f, (reachable f)!!(f.(fn_entrypoint)) = true. +Proof. + intros. unfold reachable. + caseEq (reachable_aux f). + unfold reachable_aux; intros reach A. + assert (LBoolean.ge reach!!(f.(fn_entrypoint)) true). + eapply DS.fixpoint_entry. eexact A. auto with coqlib. + unfold LBoolean.ge in H. tauto. + intros. apply PMap.gi. +Qed. + +(** The successors of a reachable basic block are reachable. *) + +Lemma reachable_successors: + forall f pc pc', + f.(LTL.fn_code)!pc <> None -> + In pc' (successors f pc) -> + (reachable f)!!pc = true -> + (reachable f)!!pc' = true. +Proof. + intro f. unfold reachable. + caseEq (reachable_aux f). + unfold reachable_aux. intro reach; intros. + assert (LBoolean.ge reach!!pc' reach!!pc). + change (reach!!pc) with ((fun pc r => r) pc (reach!!pc)). + eapply DS.fixpoint_solution. eexact H. + elim (fn_code_wf f pc); intro. auto. contradiction. + auto. + elim H3; intro. congruence. auto. + intros. apply PMap.gi. +Qed. + +(* If we have a valid LTL transition from [pc] to [pc'], and + [pc] is reachable, then [pc'] is reachable. *) + +Lemma reachable_correct_1: + forall f sp pc rs m pc' rs' m' b, + f.(LTL.fn_code)!pc = Some b -> + exec_block ge sp b rs m (Cont pc') rs' m' -> + (reachable f)!!pc = true -> + (reachable f)!!pc' = true. +Proof. + intros. apply reachable_successors with pc; auto. + congruence. + eapply successors_correct; eauto. +Qed. + +Lemma reachable_correct_2: + forall c sp pc rs m out rs' m', + exec_blocks ge c sp pc rs m out rs' m' -> + forall f pc', + c = f.(LTL.fn_code) -> + out = Cont pc' -> + (reachable f)!!pc = true -> + (reachable f)!!pc' = true. +Proof. + induction 1; intros. + congruence. + eapply reachable_correct_1. rewrite <- H1; eauto. + subst out; eauto. auto. + auto. +Qed. + +(** * Properties of node enumeration *) + +(** An enumeration of CFG nodes is correct if the following conditions hold: +- All nodes for reachable basic blocks must be in the list. +- The function entry point is the first element in the list. +- The list is without repetition (so that no code duplication occurs). + +We prove that our [enumerate] function satisfies all three. *) + +Lemma enumerate_complete: + forall f pc i, + f.(LTL.fn_code)!pc = Some i -> + (reachable f)!!pc = true -> + In pc (enumerate f). +Proof. + intros. + assert (forall p, + Plt p (Psucc f.(fn_entrypoint)) -> + (reachable f)!!p = true -> + In p (enumerate f)). + unfold enumerate. pattern (Psucc (fn_entrypoint f)). + apply positive_Peano_ind. + intros. compute in H1. destruct p; discriminate. + intros. rewrite positive_rec_succ. elim (Plt_succ_inv _ _ H2); intro. + case (reachable f)!!x. apply in_cons. auto. auto. + subst x. rewrite H3. apply in_eq. + elim (LTL.fn_code_wf f pc); intro. auto. congruence. +Qed. + +Lemma enumerate_head: + forall f, exists l, enumerate f = f.(LTL.fn_entrypoint) :: l. +Proof. + intro. unfold enumerate. rewrite positive_rec_succ. + rewrite reachable_entrypoint. + exists (positive_rec (list node) nil + (fun (pc : positive) (nodes : list node) => + if (reachable f) !! pc then pc :: nodes else nodes) + (fn_entrypoint f) ). + auto. +Qed. + +Lemma enumerate_norepet: + forall f, list_norepet (enumerate f). +Proof. + intro. + unfold enumerate. pattern (Psucc (fn_entrypoint f)). + apply positive_Peano_ind. + rewrite positive_rec_base. constructor. + intros. rewrite positive_rec_succ. + case (reachable f)!!x. auto. + constructor. + assert (forall y, + In y (positive_rec + (list node) nil + (fun (pc : positive) (nodes : list node) => + if (reachable f) !! pc then pc :: nodes else nodes) x) -> + Plt y x). + pattern x. apply positive_Peano_ind. + rewrite positive_rec_base. intros. elim H0. + intros until y. rewrite positive_rec_succ. + case (reachable f)!!x0. + simpl. intros [A|B]. + subst x0. apply Plt_succ. + apply Plt_trans_succ. auto. + intro. apply Plt_trans_succ. auto. + red; intro. generalize (H0 x H1). exact (Plt_strict x). auto. + auto. +Qed. + +(** * Correctness of the cleanup pass *) + +(** If labels are globally unique and the Linear code [c] contains + a subsequence [Llabel lbl :: c1], [find_label lbl c] returns [c1]. +*) + +Fixpoint unique_labels (c: code) : Prop := + match c with + | nil => True + | Llabel lbl :: c => ~(In (Llabel lbl) c) /\ unique_labels c + | i :: c => unique_labels c + end. + +Inductive is_tail: code -> code -> Prop := + | is_tail_refl: + forall c, is_tail c c + | is_tail_cons: + forall i c1 c2, is_tail c1 c2 -> is_tail c1 (i :: c2). + +Lemma is_tail_in: + forall i c1 c2, is_tail (i :: c1) c2 -> In i c2. +Proof. + induction c2; simpl; intros. + inversion H. + inversion H. tauto. right; auto. +Qed. + +Lemma is_tail_cons_left: + forall i c1 c2, is_tail (i :: c1) c2 -> is_tail c1 c2. +Proof. + induction c2; intros; inversion H. + constructor. constructor. constructor. auto. +Qed. + +Lemma find_label_unique: + forall lbl c1 c2 c3, + is_tail (Llabel lbl :: c1) c2 -> + unique_labels c2 -> + find_label lbl c2 = Some c3 -> + c1 = c3. +Proof. + induction c2. + simpl; intros; discriminate. + intros c3 TAIL UNIQ. simpl. + generalize (is_label_correct lbl a). case (is_label lbl a); intro ISLBL. + subst a. intro. inversion TAIL. congruence. + elim UNIQ; intros. elim H4. apply is_tail_in with c1; auto. + inversion TAIL. congruence. apply IHc2. auto. + destruct a; simpl in UNIQ; tauto. +Qed. + +(** Correctness of the [starts_with] test. *) + +Lemma starts_with_correct: + forall lbl c1 c2 c3 f sp ls m, + is_tail c1 c2 -> + unique_labels c2 -> + starts_with lbl c1 = true -> + find_label lbl c2 = Some c3 -> + exec_instrs tge f sp (cleanup_code c1) ls m + (cleanup_code c3) ls m. +Proof. + induction c1. + simpl; intros; discriminate. + simpl starts_with. destruct a; try (intros; discriminate). + intros. apply exec_trans with (cleanup_code c1) ls m. + simpl. + constructor. constructor. + destruct (peq lbl l). + subst l. replace c3 with c1. constructor. + apply find_label_unique with lbl c2; auto. + apply IHc1 with c2; auto. eapply is_tail_cons_left; eauto. +Qed. + +(** Code cleanup preserves the labeling of the code. *) + +Lemma find_label_cleanup_code: + forall lbl c c', + find_label lbl c = Some c' -> + find_label lbl (cleanup_code c) = Some (cleanup_code c'). +Proof. + induction c. + simpl. intros; discriminate. + generalize (is_label_correct lbl a). + simpl find_label. case (is_label lbl a); intro. + subst a. intros. injection H; intros. subst c'. + simpl. rewrite peq_true. auto. + intros. destruct a; auto. + simpl. rewrite peq_false. auto. + congruence. + case (starts_with l c). auto. simpl. auto. +Qed. + +(** One transition in the original Linear code corresponds to zero + or one transitions in the cleaned-up code. *) + +Lemma cleanup_code_correct_1: + forall f sp c1 ls1 m1 c2 ls2 m2, + exec_instr tge f sp c1 ls1 m1 c2 ls2 m2 -> + is_tail c1 f.(fn_code) -> + unique_labels f.(fn_code) -> + exec_instrs tge (cleanup_function f) sp + (cleanup_code c1) ls1 m1 + (cleanup_code c2) ls2 m2. +Proof. + induction 1; simpl; intros; + try (apply exec_one; econstructor; eauto; fail). + (* goto *) + caseEq (starts_with lbl b); intro SW. + eapply starts_with_correct; eauto. + eapply is_tail_cons_left; eauto. + constructor. constructor. + unfold cleanup_function; simpl. + apply find_label_cleanup_code. auto. + (* cond, taken *) + constructor. apply exec_Lcond_true. auto. + unfold cleanup_function; simpl. + apply find_label_cleanup_code. auto. + (* cond, not taken *) + constructor. apply exec_Lcond_false. auto. +Qed. + +Lemma is_tail_find_label: + forall lbl c2 c1, + find_label lbl c1 = Some c2 -> is_tail c2 c1. +Proof. + induction c1; simpl. + intros; discriminate. + case (is_label lbl a). intro. injection H; intro. subst c2. + constructor. constructor. + intro. constructor. auto. +Qed. + +Lemma is_tail_exec_instr: + forall f sp c1 ls1 m1 c2 ls2 m2, + exec_instr tge f sp c1 ls1 m1 c2 ls2 m2 -> + is_tail c1 f.(fn_code) -> is_tail c2 f.(fn_code). +Proof. + induction 1; intros; + try (eapply is_tail_cons_left; eauto; fail). + eapply is_tail_find_label; eauto. + eapply is_tail_find_label; eauto. +Qed. + +Lemma is_tail_exec_instrs: + forall f sp c1 ls1 m1 c2 ls2 m2, + exec_instrs tge f sp c1 ls1 m1 c2 ls2 m2 -> + is_tail c1 f.(fn_code) -> is_tail c2 f.(fn_code). +Proof. + induction 1; intros. + auto. + eapply is_tail_exec_instr; eauto. + auto. +Qed. + +(** Zero, one or several transitions in the original Linear code correspond + to zero, one or several transitions in the cleaned-up code. *) + +Lemma cleanup_code_correct_2: + forall f sp c1 ls1 m1 c2 ls2 m2, + exec_instrs tge f sp c1 ls1 m1 c2 ls2 m2 -> + is_tail c1 f.(fn_code) -> + unique_labels f.(fn_code) -> + exec_instrs tge (cleanup_function f) sp + (cleanup_code c1) ls1 m1 + (cleanup_code c2) ls2 m2. +Proof. + induction 1; simpl; intros. + apply exec_refl. + eapply cleanup_code_correct_1; eauto. + apply exec_trans with (cleanup_code b2) rs2 m2. + auto. + apply IHexec_instrs2; auto. + eapply is_tail_exec_instrs; eauto. +Qed. + +Lemma cleanup_function_correct: + forall f ls1 m1 ls2 m2, + exec_function tge f ls1 m1 ls2 m2 -> + unique_labels f.(fn_code) -> + exec_function tge (cleanup_function f) ls1 m1 ls2 m2. +Proof. + induction 1; intro. + generalize (cleanup_code_correct_2 _ _ _ _ _ _ _ _ H0 (is_tail_refl _) H1). + simpl. intro. + econstructor; eauto. +Qed. + +(** * Properties of linearized code *) + +(** We now state useful properties of the Linear code generated by + the naive LTL-to-Linear translation. *) + +(** Connection between [find_label] and linearization. *) + +Lemma find_label_lin_block: + forall lbl k b, + find_label lbl (linearize_block b k) = find_label lbl k. +Proof. + induction b; simpl; auto. + case (starts_with n k); reflexivity. +Qed. + +Lemma find_label_lin_rec: + forall f enum pc b, + In pc enum -> + f.(LTL.fn_code)!pc = Some b -> + exists k, + find_label pc (linearize_body f enum) = Some (linearize_block b k). +Proof. + induction enum; intros. + elim H. + case (peq a pc); intro. + subst a. exists (linearize_body f enum). + simpl. rewrite H0. simpl. rewrite peq_true. auto. + assert (In pc enum). simpl in H. tauto. + elim (IHenum pc b H1 H0). intros k FIND. + exists k. simpl. destruct (LTL.fn_code f)!a. + simpl. rewrite peq_false. rewrite find_label_lin_block. auto. auto. + auto. +Qed. + +Lemma find_label_lin: + forall f pc b, + f.(LTL.fn_code)!pc = Some b -> + (reachable f)!!pc = true -> + exists k, + find_label pc (fn_code (linearize_function f)) = Some (linearize_block b k). +Proof. + intros. unfold linearize_function; simpl. apply find_label_lin_rec. + eapply enumerate_complete; eauto. auto. +Qed. + +(** Unique label property for linearized code. *) + +Lemma label_in_lin_block: + forall lbl k b, + In (Llabel lbl) (linearize_block b k) -> In (Llabel lbl) k. +Proof. + induction b; simpl; try (intuition congruence). + case (starts_with n k); simpl; intuition congruence. +Qed. + +Lemma label_in_lin_rec: + forall f lbl enum, + In (Llabel lbl) (linearize_body f enum) -> In lbl enum. +Proof. + induction enum. + simpl; auto. + simpl. destruct (LTL.fn_code f)!a. + simpl. intros [A|B]. left; congruence. + right. apply IHenum. eapply label_in_lin_block; eauto. + intro; right; auto. +Qed. + +Lemma unique_labels_lin_block: + forall k b, + unique_labels k -> unique_labels (linearize_block b k). +Proof. + induction b; simpl; auto. + case (starts_with n k); simpl; auto. +Qed. + +Lemma unique_labels_lin_rec: + forall f enum, + list_norepet enum -> + unique_labels (linearize_body f enum). +Proof. + induction enum. + simpl; auto. + intro. simpl. destruct (LTL.fn_code f)!a. + simpl. split. red. intro. inversion H. elim H3. + apply label_in_lin_rec with f. + apply label_in_lin_block with b. auto. + apply unique_labels_lin_block. apply IHenum. inversion H; auto. + apply IHenum. inversion H; auto. +Qed. + +Lemma unique_labels_lin_function: + forall f, + unique_labels (fn_code (linearize_function f)). +Proof. + intros. unfold linearize_function; simpl. + apply unique_labels_lin_rec. apply enumerate_norepet. +Qed. + +(** * Correctness of linearization *) + +(** The outcome of an LTL [exec_block] or [exec_blocks] execution + is valid if it corresponds to a reachable, existing basic block. + All outcomes occurring in an LTL program execution are actually + valid, but we will prove that along with the main simulation proof. *) + +Definition valid_outcome (f: LTL.function) (out: outcome) := + match out with + | Cont s => + (reachable f)!!s = true /\ exists b, f.(LTL.fn_code)!s = Some b + | Return => True + end. + +(** Auxiliary lemma used to establish the [valid_outcome] property. *) + +Lemma exec_blocks_valid_outcome: + forall c sp pc ls1 m1 out ls2 m2, + exec_blocks ge c sp pc ls1 m1 out ls2 m2 -> + forall f, + c = f.(LTL.fn_code) -> + (reachable f)!!pc = true -> + valid_outcome f out -> + valid_outcome f (Cont pc). +Proof. + induction 1. + auto. + intros. simpl. split. auto. exists b. congruence. + intros. apply IHexec_blocks1. auto. auto. + apply IHexec_blocks2. auto. + eapply reachable_correct_2. eexact H. auto. auto. auto. + auto. +Qed. + +(** Correspondence between an LTL outcome and a fragment of generated + Linear code. *) + +Inductive cont_for_outcome: LTL.function -> outcome -> code -> Prop := + | co_return: + forall f k, + cont_for_outcome f Return (Lreturn :: k) + | co_goto: + forall f s k, + find_label s (linearize_function f).(fn_code) = Some k -> + cont_for_outcome f (Cont s) k. + +(** The simulation properties used in the inductive proof. + They are parameterized by an LTL execution, and state + that a matching execution is possible in the generated Linear code. *) + +Definition exec_instr_prop + (sp: val) (b1: block) (ls1: locset) (m1: mem) + (b2: block) (ls2: locset) (m2: mem) : Prop := + forall f k, + exec_instr tge (linearize_function f) sp + (linearize_block b1 k) ls1 m1 + (linearize_block b2 k) ls2 m2. + +Definition exec_instrs_prop + (sp: val) (b1: block) (ls1: locset) (m1: mem) + (b2: block) (ls2: locset) (m2: mem) : Prop := + forall f k, + exec_instrs tge (linearize_function f) sp + (linearize_block b1 k) ls1 m1 + (linearize_block b2 k) ls2 m2. + +Definition exec_block_prop + (sp: val) (b: block) (ls1: locset) (m1: mem) + (out: outcome) (ls2: locset) (m2: mem): Prop := + forall f k, + valid_outcome f out -> + exists k', + exec_instrs tge (linearize_function f) sp + (linearize_block b k) ls1 m1 + k' ls2 m2 + /\ cont_for_outcome f out k'. + +Definition exec_blocks_prop + (c: LTL.code) (sp: val) (pc: node) (ls1: locset) (m1: mem) + (out: outcome) (ls2: locset) (m2: mem): Prop := + forall f k, + c = f.(LTL.fn_code) -> + (reachable f)!!pc = true -> + find_label pc (fn_code (linearize_function f)) = Some k -> + valid_outcome f out -> + exists k', + exec_instrs tge (linearize_function f) sp + k ls1 m1 + k' ls2 m2 + /\ cont_for_outcome f out k'. + +Definition exec_function_prop + (f: LTL.function) (ls1: locset) (m1: mem) (ls2: locset) (m2: mem): Prop := + exec_function tge (transf_function f) ls1 m1 ls2 m2. + +Scheme exec_instr_ind5 := Minimality for LTL.exec_instr Sort Prop + with exec_instrs_ind5 := Minimality for LTL.exec_instrs Sort Prop + with exec_block_ind5 := Minimality for LTL.exec_block Sort Prop + with exec_blocks_ind5 := Minimality for LTL.exec_blocks Sort Prop + with exec_function_ind5 := Minimality for LTL.exec_function Sort Prop. + +(** The obligatory proof by structural induction on the LTL evaluation + derivation. *) + +Lemma transf_function_correct: + forall f ls1 m1 ls2 m2, + LTL.exec_function ge f ls1 m1 ls2 m2 -> + exec_function_prop f ls1 m1 ls2 m2. +Proof. + apply (exec_function_ind5 ge + exec_instr_prop + exec_instrs_prop + exec_block_prop + exec_blocks_prop + exec_function_prop); + intros; red; intros; simpl. + (* getstack *) + constructor. + (* setstack *) + constructor. + (* op *) + constructor. rewrite <- H. apply eval_operation_preserved. + exact symbols_preserved. + (* load *) + apply exec_Lload with a. + rewrite <- H. apply eval_addressing_preserved. + exact symbols_preserved. + auto. + (* store *) + apply exec_Lstore with a. + rewrite <- H. apply eval_addressing_preserved. + exact symbols_preserved. + auto. + (* call *) + apply exec_Lcall with (transf_function f). + generalize H. destruct ros; simpl. + intro; apply functions_translated; auto. + rewrite symbols_preserved. destruct (Genv.find_symbol ge i). + intro; apply function_ptr_translated; auto. congruence. + rewrite H0; reflexivity. + apply H2. + (* instr_refl *) + apply exec_refl. + (* instr_one *) + apply exec_one. apply H0. + (* instr_trans *) + apply exec_trans with (linearize_block b2 k) rs2 m2. + apply H0. apply H2. + (* goto *) + elim H1. intros REACH [b2 AT2]. + generalize (H0 f k). simpl. intro. + elim (find_label_lin f s b2 AT2 REACH). intros k2 FIND. + exists (linearize_block b2 k2). + split. + eapply exec_trans. eexact H2. constructor. constructor. auto. + constructor. auto. + (* cond, true *) + elim H2. intros REACH [b2 AT2]. + elim (find_label_lin f ifso b2 AT2 REACH). intros k2 FIND. + exists (linearize_block b2 k2). + split. + generalize (H0 f k). simpl. + case (starts_with ifso k); intro. + eapply exec_trans. eexact H3. + eapply exec_trans. apply exec_one. apply exec_Lcond_false. + change false with (negb true). apply eval_negate_condition. auto. + apply exec_one. apply exec_Lgoto. auto. + eapply exec_trans. eexact H3. + apply exec_one. apply exec_Lcond_true. + auto. auto. + constructor; auto. + (* cond, false *) + elim H2. intros REACH [b2 AT2]. + elim (find_label_lin f ifnot b2 AT2 REACH). intros k2 FIND. + exists (linearize_block b2 k2). + split. + generalize (H0 f k). simpl. + case (starts_with ifso k); intro. + eapply exec_trans. eexact H3. + apply exec_one. apply exec_Lcond_true. + change true with (negb false). apply eval_negate_condition. auto. + auto. + eapply exec_trans. eexact H3. + eapply exec_trans. apply exec_one. apply exec_Lcond_false. auto. + apply exec_one. apply exec_Lgoto. auto. + constructor; auto. + (* return *) + exists (Lreturn :: k). split. + exact (H0 f k). constructor. + (* refl blocks *) + exists k. split. apply exec_refl. constructor. auto. + (* one blocks *) + subst c. elim (find_label_lin f pc b H H3). intros k' FIND. + assert (k = linearize_block b k'). congruence. subst k. + elim (H1 f k' H5). intros k'' [EXEC CFO]. + exists k''. tauto. + (* trans blocks *) + assert ((reachable f)!!pc2 = true). + eapply reachable_correct_2. eexact H. auto. auto. auto. + assert (valid_outcome f (Cont pc2)). + eapply exec_blocks_valid_outcome; eauto. + generalize (H0 f k H3 H4 H5 H8). intros [k1 [EX1 CFO2]]. + inversion CFO2. + generalize (H2 f k1 H3 H7 H11 H6). intros [k2 [EX2 CFO3]]. + exists k2. split. eapply exec_trans; eauto. auto. + (* function -- TA-DA! *) + assert (REACH0: (reachable f)!!(fn_entrypoint f) = true). + apply reachable_entrypoint. + assert (VO2: valid_outcome f Return). simpl; auto. + assert (VO1: valid_outcome f (Cont (fn_entrypoint f))). + eapply exec_blocks_valid_outcome; eauto. + assert (exists k, fn_code (linearize_function f) = Llabel f.(fn_entrypoint) :: k). + unfold linearize_function; simpl. + elim (enumerate_head f). intros tl EN. rewrite EN. + simpl. elim VO1. intros REACH [b EQ]. rewrite EQ. + exists (linearize_block b (linearize_body f tl)). auto. + elim H2; intros k EQ. + assert (find_label (fn_entrypoint f) (fn_code (linearize_function f)) = + Some k). + rewrite EQ. simpl. rewrite peq_true. auto. + generalize (H1 f k (refl_equal _) REACH0 H3 VO2). + intros [k' [EX CO]]. + inversion CO. subst k'. + unfold transf_function. apply cleanup_function_correct. + econstructor. eauto. rewrite EQ. eapply exec_trans. + apply exec_one. constructor. eauto. + apply unique_labels_lin_function. +Qed. + +End LINEARIZATION. + +Theorem transf_program_correct: + forall (p: LTL.program) (r: val), + LTL.exec_program p r -> + Linear.exec_program (transf_program p) r. +Proof. + intros p r [b [f [ls [m [FIND1 [FIND2 [SIG [EX RES]]]]]]]]. + red. exists b; exists (transf_function f); exists ls; exists m. + split. change (prog_main (transf_program p)) with (prog_main p). + rewrite symbols_preserved; auto. + split. apply function_ptr_translated. auto. + split. auto. + split. apply transf_function_correct. + unfold transf_program. rewrite Genv.init_mem_transf. auto. + exact RES. +Qed. + diff --git a/backend/Linearizetyping.v b/backend/Linearizetyping.v new file mode 100644 index 00000000..6cebca8d --- /dev/null +++ b/backend/Linearizetyping.v @@ -0,0 +1,340 @@ +(** Type preservation for the Linearize pass *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Op. +Require Import Locations. +Require Import LTL. +Require Import Linear. +Require Import Linearize. +Require Import LTLtyping. +Require Import Lineartyping. +Require Import Conventions. + +(** * Validity of resource bounds *) + +(** In this section, we show that the resource bounds computed by + [function_bounds] are valid: all references to callee-save registers + and stack slots in the code of the function are within those bounds. *) + +Section BOUNDS. + +Variable f: Linear.function. +Let b := function_bounds f. + +Lemma max_over_list_bound: + forall (A: Set) (valu: A -> Z) (l: list A) (x: A), + In x l -> valu x <= max_over_list A valu l. +Proof. + intros until x. unfold max_over_list. + assert (forall c z, + let f := fold_left (fun x y => Zmax x (valu y)) c z in + z <= f /\ (In x c -> valu x <= f)). + induction c; simpl; intros. + split. omega. tauto. + elim (IHc (Zmax z (valu a))); intros. + split. apply Zle_trans with (Zmax z (valu a)). apply Zmax1. auto. + intro H1; elim H1; intro. + subst a. apply Zle_trans with (Zmax z (valu x)). + apply Zmax2. auto. auto. + intro. elim (H l 0); intros. auto. +Qed. + +Lemma max_over_instrs_bound: + forall (valu: instruction -> Z) i, + In i f.(fn_code) -> valu i <= max_over_instrs f valu. +Proof. + intros. unfold max_over_instrs. apply max_over_list_bound; auto. +Qed. + +Lemma max_over_regs_of_funct_bound: + forall (valu: mreg -> Z) i r, + In i f.(fn_code) -> In r (regs_of_instr i) -> + valu r <= max_over_regs_of_funct f valu. +Proof. + intros. unfold max_over_regs_of_funct. + apply Zle_trans with (max_over_regs_of_instr valu i). + unfold max_over_regs_of_instr. apply max_over_list_bound. auto. + apply max_over_instrs_bound. auto. +Qed. + +Lemma max_over_slots_of_funct_bound: + forall (valu: slot -> Z) i s, + In i f.(fn_code) -> In s (slots_of_instr i) -> + valu s <= max_over_slots_of_funct f valu. +Proof. + intros. unfold max_over_slots_of_funct. + apply Zle_trans with (max_over_slots_of_instr valu i). + unfold max_over_slots_of_instr. apply max_over_list_bound. auto. + apply max_over_instrs_bound. auto. +Qed. + +Lemma int_callee_save_bound: + forall i r, + In i f.(fn_code) -> In r (regs_of_instr i) -> + index_int_callee_save r < bound_int_callee_save b. +Proof. + intros. apply Zlt_le_trans with (int_callee_save r). + unfold int_callee_save. omega. + unfold b, function_bounds, bound_int_callee_save. + eapply max_over_regs_of_funct_bound; eauto. +Qed. + +Lemma float_callee_save_bound: + forall i r, + In i f.(fn_code) -> In r (regs_of_instr i) -> + index_float_callee_save r < bound_float_callee_save b. +Proof. + intros. apply Zlt_le_trans with (float_callee_save r). + unfold float_callee_save. omega. + unfold b, function_bounds, bound_float_callee_save. + eapply max_over_regs_of_funct_bound; eauto. +Qed. + +Lemma int_local_slot_bound: + forall i ofs, + In i f.(fn_code) -> In (Local ofs Tint) (slots_of_instr i) -> + ofs < bound_int_local b. +Proof. + intros. apply Zlt_le_trans with (int_local (Local ofs Tint)). + unfold int_local. omega. + unfold b, function_bounds, bound_int_local. + eapply max_over_slots_of_funct_bound; eauto. +Qed. + +Lemma float_local_slot_bound: + forall i ofs, + In i f.(fn_code) -> In (Local ofs Tfloat) (slots_of_instr i) -> + ofs < bound_float_local b. +Proof. + intros. apply Zlt_le_trans with (float_local (Local ofs Tfloat)). + unfold float_local. omega. + unfold b, function_bounds, bound_float_local. + eapply max_over_slots_of_funct_bound; eauto. +Qed. + +Lemma outgoing_slot_bound: + forall i ofs ty, + In i f.(fn_code) -> In (Outgoing ofs ty) (slots_of_instr i) -> + ofs + typesize ty <= bound_outgoing b. +Proof. + intros. change (ofs + typesize ty) with (outgoing_slot (Outgoing ofs ty)). + unfold b, function_bounds, bound_outgoing. + apply Zmax_bound_r. apply Zmax_bound_r. + eapply max_over_slots_of_funct_bound; eauto. +Qed. + +Lemma size_arguments_bound: + forall sig ros, + In (Lcall sig ros) f.(fn_code) -> + size_arguments sig <= bound_outgoing b. +Proof. + intros. change (size_arguments sig) with (outgoing_space (Lcall sig ros)). + unfold b, function_bounds, bound_outgoing. + apply Zmax_bound_r. apply Zmax_bound_l. + apply max_over_instrs_bound; auto. +Qed. + +End BOUNDS. + +(** Consequently, all machine registers or stack slots mentioned by one + of the instructions of function [f] are within bounds. *) + +Lemma mreg_is_bounded: + forall f i r, + In i f.(fn_code) -> In r (regs_of_instr i) -> + mreg_bounded f r. +Proof. + intros. unfold mreg_bounded. + case (mreg_type r). + eapply int_callee_save_bound; eauto. + eapply float_callee_save_bound; eauto. +Qed. + +Lemma slot_is_bounded: + forall f i s, + In i (transf_function f).(fn_code) -> In s (slots_of_instr i) -> + LTLtyping.slot_bounded f s -> + slot_bounded (transf_function f) s. +Proof. + intros. unfold slot_bounded. + destruct s. + destruct t. + split. exact H1. eapply int_local_slot_bound; eauto. + split. exact H1. eapply float_local_slot_bound; eauto. + unfold linearize_function; simpl. exact H1. + split. exact H1. eapply outgoing_slot_bound; eauto. +Qed. + +(** * Conservation property of the cleanup pass *) + +(** We show that the cleanup pass only deletes some [Lgoto] instructions: + all other instructions present in the output of naive linearization + are in the cleaned-up code, and all instructions in the cleaned-up code + are in the output of naive linearization. *) + +Lemma cleanup_code_conservation: + forall i, + match i with Lgoto _ => False | _ => True end -> + forall c, + In i c -> In i (cleanup_code c). +Proof. + induction c; simpl. + auto. + intro. + assert (In i (a :: cleanup_code c)). + elim H0; intro. subst i. auto with coqlib. + apply in_cons. auto. + destruct a; auto. + assert (In i (cleanup_code c)). + elim H1; intro. subst i. contradiction. auto. + case (starts_with l c). auto. apply in_cons; auto. +Qed. + +Lemma cleanup_function_conservation: + forall f i, + In i (linearize_function f).(fn_code) -> + match i with Lgoto _ => False | _ => True end -> + In i (transf_function f).(fn_code). +Proof. + intros. unfold transf_function. unfold cleanup_function. + simpl. simpl in H0. eapply cleanup_code_conservation; eauto. +Qed. + +Lemma cleanup_code_conservation_2: + forall i c, In i (cleanup_code c) -> In i c. +Proof. + induction c; simpl. + auto. + assert (In i (a :: cleanup_code c) -> a = i \/ In i c). + intro. elim H; tauto. + destruct a; auto. + case (starts_with l c). auto. auto. +Qed. + +Lemma cleanup_function_conservation_2: + forall f i, + In i (transf_function f).(fn_code) -> + In i (linearize_function f).(fn_code). +Proof. + simpl. intros. eapply cleanup_code_conservation_2; eauto. +Qed. + +(** * Type preservation for the linearization pass *) + +Lemma linearize_block_incl: + forall k b, incl k (linearize_block b k). +Proof. + induction b; simpl; auto with coqlib. + case (starts_with n k); auto with coqlib. +Qed. + +Lemma wt_linearize_block: + forall f k, + (forall i, In i k -> wt_instr (transf_function f) i) -> + forall b i, + wt_block f b -> + incl (linearize_block b k) (linearize_function f).(fn_code) -> + In i (linearize_block b k) -> wt_instr (transf_function f) i. +Proof. + induction b; simpl; intros; + try (generalize (cleanup_function_conservation + _ _ (H1 _ (in_eq _ _)) I)); + inversion H0; + try (elim H2; intro; [subst i|eauto with coqlib]); + intros. + (* getstack *) + constructor. auto. eapply slot_is_bounded; eauto. + simpl; auto with coqlib. + eapply mreg_is_bounded; eauto. + simpl; auto with coqlib. + (* setstack *) + constructor. auto. auto. + eapply slot_is_bounded; eauto. + simpl; auto with coqlib. + (* move *) + subst o; subst l. constructor. auto. + eapply mreg_is_bounded; eauto. + simpl; auto with coqlib. + (* undef *) + subst o; subst l. constructor. + eapply mreg_is_bounded; eauto. + simpl; auto with coqlib. + (* other ops *) + constructor; auto. + eapply mreg_is_bounded; eauto. + simpl; auto with coqlib. + (* load *) + constructor; auto. + eapply mreg_is_bounded; eauto. + simpl; auto with coqlib. + (* store *) + constructor; auto. + (* call *) + constructor; auto. + eapply size_arguments_bound; eauto. + (* goto *) + constructor. + (* cond *) + destruct (starts_with n k). + elim H2; intro. + subst i. constructor. + rewrite H4. destruct c; reflexivity. + elim H8; intro. + subst i. constructor. + eauto with coqlib. + elim H2; intro. + subst i. constructor. auto. + elim H8; intro. + subst i. constructor. + eauto with coqlib. + (* return *) + constructor. +Qed. + +Lemma wt_linearize_body: + forall f, + LTLtyping.wt_function f -> + forall enum i, + incl (linearize_body f enum) (linearize_function f).(fn_code) -> + In i (linearize_body f enum) -> wt_instr (transf_function f) i. +Proof. + induction enum. + simpl; intros; contradiction. + intro i. simpl. + caseEq (LTL.fn_code f)!a. intros b EQ INCL IN. + elim IN; intro. subst i; constructor. + apply wt_linearize_block with (linearize_body f enum) b. + intros; apply IHenum. + apply incl_tran with (linearize_block b (linearize_body f enum)). + apply linearize_block_incl. + eauto with coqlib. + auto. + eapply H; eauto. + eauto with coqlib. auto. + auto. +Qed. + +Lemma wt_transf_function: + forall f, + LTLtyping.wt_function f -> + wt_function (transf_function f). +Proof. + intros; red; intros. + apply wt_linearize_body with (enumerate f); auto. + simpl. apply incl_refl. + apply cleanup_function_conservation_2; auto. +Qed. + +Lemma program_typing_preserved: + forall (p: LTL.program), + LTLtyping.wt_program p -> + Lineartyping.wt_program (transf_program p). +Proof. + intros; red; intros. + generalize (transform_program_function transf_function p i f H0). + intros [f0 [IN TR]]. + subst f. apply wt_transf_function; auto. + apply (H i f0 IN). +Qed. diff --git a/backend/Lineartyping.v b/backend/Lineartyping.v new file mode 100644 index 00000000..0b13b40a --- /dev/null +++ b/backend/Lineartyping.v @@ -0,0 +1,254 @@ +(** Typing rules and computation of stack bounds for Linear. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Op. +Require Import RTL. +Require Import Locations. +Require Import Linear. +Require Import Conventions. + +(** * Resource bounds for a function *) + +(** The [bounds] record capture how many local and outgoing stack slots + and callee-save registers are used by a function. *) + +Record bounds : Set := mkbounds { + bound_int_local: Z; + bound_float_local: Z; + bound_int_callee_save: Z; + bound_float_callee_save: Z; + bound_outgoing: Z +}. + +(** The resource bounds for a function are computed by a linear scan + of its instructions. *) + +Section BOUNDS. + +Variable f: function. + +Definition regs_of_instr (i: instruction) : list mreg := + match i with + | Lgetstack s r => r :: nil + | Lsetstack r s => r :: nil + | Lop op args res => res :: args + | Lload chunk addr args dst => dst :: args + | Lstore chunk addr args src => src :: args + | Lcall sig (inl fn) => fn :: nil + | Lcall sig (inr _) => nil + | Llabel lbl => nil + | Lgoto lbl => nil + | Lcond cond args lbl => args + | Lreturn => nil + end. + +Definition slots_of_instr (i: instruction) : list slot := + match i with + | Lgetstack s r => s :: nil + | Lsetstack r s => s :: nil + | _ => nil + end. + +Definition max_over_list (A: Set) (valu: A -> Z) (l: list A) : Z := + List.fold_left (fun m l => Zmax m (valu l)) l 0. + +Definition max_over_instrs (valu: instruction -> Z) : Z := + max_over_list instruction valu f.(fn_code). + +Definition max_over_regs_of_instr (valu: mreg -> Z) (i: instruction) : Z := + max_over_list mreg valu (regs_of_instr i). + +Definition max_over_slots_of_instr (valu: slot -> Z) (i: instruction) : Z := + max_over_list slot valu (slots_of_instr i). + +Definition max_over_regs_of_funct (valu: mreg -> Z) : Z := + max_over_instrs (max_over_regs_of_instr valu). + +Definition max_over_slots_of_funct (valu: slot -> Z) : Z := + max_over_instrs (max_over_slots_of_instr valu). + +Definition int_callee_save (r: mreg) := 1 + index_int_callee_save r. + +Definition float_callee_save (r: mreg) := 1 + index_float_callee_save r. + +Definition int_local (s: slot) := + match s with Local ofs Tint => 1 + ofs | _ => 0 end. + +Definition float_local (s: slot) := + match s with Local ofs Tfloat => 1 + ofs | _ => 0 end. + +Definition outgoing_slot (s: slot) := + match s with Outgoing ofs ty => ofs + typesize ty | _ => 0 end. + +Definition outgoing_space (i: instruction) := + match i with Lcall sig _ => size_arguments sig | _ => 0 end. + +Definition function_bounds := + mkbounds + (max_over_slots_of_funct int_local) + (max_over_slots_of_funct float_local) + (max_over_regs_of_funct int_callee_save) + (max_over_regs_of_funct float_callee_save) + (Zmax 6 + (Zmax (max_over_instrs outgoing_space) + (max_over_slots_of_funct outgoing_slot))). + +(** We show that bounds computed by [function_bounds] are all positive + or null, and moreiver [bound_outgoing] is greater or equal to 6. + These properties are used later to reason about the layout of + the activation record. *) + +Lemma max_over_list_pos: + forall (A: Set) (valu: A -> Z) (l: list A), + max_over_list A valu l >= 0. +Proof. + intros until valu. unfold max_over_list. + assert (forall l z, fold_left (fun x y => Zmax x (valu y)) l z >= z). + induction l; simpl; intros. + omega. apply Zge_trans with (Zmax z (valu a)). + auto. apply Zle_ge. apply Zmax1. auto. +Qed. + +Lemma max_over_slots_of_funct_pos: + forall (valu: slot -> Z), max_over_slots_of_funct valu >= 0. +Proof. + intros. unfold max_over_slots_of_funct. + unfold max_over_instrs. apply max_over_list_pos. +Qed. + +Lemma max_over_regs_of_funct_pos: + forall (valu: mreg -> Z), max_over_regs_of_funct valu >= 0. +Proof. + intros. unfold max_over_regs_of_funct. + unfold max_over_instrs. apply max_over_list_pos. +Qed. + +Lemma bound_int_local_pos: + bound_int_local function_bounds >= 0. +Proof. + simpl. apply max_over_slots_of_funct_pos. +Qed. + +Lemma bound_float_local_pos: + bound_float_local function_bounds >= 0. +Proof. + simpl. apply max_over_slots_of_funct_pos. +Qed. + +Lemma bound_int_callee_save_pos: + bound_int_callee_save function_bounds >= 0. +Proof. + simpl. apply max_over_regs_of_funct_pos. +Qed. + +Lemma bound_float_callee_save_pos: + bound_float_callee_save function_bounds >= 0. +Proof. + simpl. apply max_over_regs_of_funct_pos. +Qed. + +Lemma bound_outgoing_pos: + bound_outgoing function_bounds >= 6. +Proof. + simpl. apply Zle_ge. apply Zmax_bound_l. omega. +Qed. + +End BOUNDS. + +(** * Typing rules for Linear *) + +(** The typing rules for Linear are similar to those for LTL: we check + that instructions receive the right number of arguments, + and that the types of the argument and result registers agree + with what the instruction expects. Moreover, we state that references + to callee-save registers and to stack slots are within the bounds + computed by [function_bounds]. This is true by construction of + [function_bounds], and is proved in [Linearizetyping], but it + is convenient to integrate this property within the typing judgement. +*) + +Section WT_INSTR. + +Variable funct: function. +Let b := function_bounds funct. + +Definition mreg_bounded (r: mreg) := + match mreg_type r with + | Tint => index_int_callee_save r < bound_int_callee_save b + | Tfloat => index_float_callee_save r < bound_float_callee_save b + end. + +Definition slot_bounded (s: slot) := + match s with + | Local ofs Tint => 0 <= ofs < bound_int_local b + | Local ofs Tfloat => 0 <= ofs < bound_float_local b + | Outgoing ofs ty => 6 <= ofs /\ ofs + typesize ty <= bound_outgoing b + | Incoming ofs ty => 6 <= ofs /\ ofs + typesize ty <= size_arguments funct.(fn_sig) + end. + +Inductive wt_instr : instruction -> Prop := + | wt_Lgetstack: + forall s r, + slot_type s = mreg_type r -> + slot_bounded s -> mreg_bounded r -> + wt_instr (Lgetstack s r) + | wt_Lsetstack: + forall s r, + match s with Incoming _ _ => False | _ => True end -> + slot_type s = mreg_type r -> + slot_bounded s -> + wt_instr (Lsetstack r s) + | wt_Lopmove: + forall r1 r, + mreg_type r1 = mreg_type r -> + mreg_bounded r -> + wt_instr (Lop Omove (r1 :: nil) r) + | wt_Lopundef: + forall r, + mreg_bounded r -> + wt_instr (Lop Oundef nil r) + | wt_Lop: + forall op args res, + op <> Omove -> op <> Oundef -> + (List.map mreg_type args, mreg_type res) = type_of_operation op -> + mreg_bounded res -> + wt_instr (Lop op args res) + | wt_Lload: + forall chunk addr args dst, + List.map mreg_type args = type_of_addressing addr -> + mreg_type dst = type_of_chunk chunk -> + mreg_bounded dst -> + wt_instr (Lload chunk addr args dst) + | wt_Lstore: + forall chunk addr args src, + List.map mreg_type args = type_of_addressing addr -> + mreg_type src = type_of_chunk chunk -> + wt_instr (Lstore chunk addr args src) + | wt_Lcall: + forall sig ros, + size_arguments sig <= bound_outgoing b -> + match ros with inl r => mreg_type r = Tint | _ => True end -> + wt_instr (Lcall sig ros) + | wt_Llabel: + forall lbl, + wt_instr (Llabel lbl) + | wt_Lgoto: + forall lbl, + wt_instr (Lgoto lbl) + | wt_Lcond: + forall cond args lbl, + List.map mreg_type args = type_of_condition cond -> + wt_instr (Lcond cond args lbl) + | wt_Lreturn: + wt_instr (Lreturn). + +End WT_INSTR. + +Definition wt_function (f: function) : Prop := + forall instr, In instr f.(fn_code) -> wt_instr f instr. + +Definition wt_program (p: program) : Prop := + forall i f, In (i, f) (prog_funct p) -> wt_function f. + diff --git a/backend/Locations.v b/backend/Locations.v new file mode 100644 index 00000000..c97855ea --- /dev/null +++ b/backend/Locations.v @@ -0,0 +1,476 @@ +(** Locations are a refinement of RTL pseudo-registers, used to reflect + the results of register allocation (file [Allocation]). *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Values. + +(** * Representation of locations *) + +(** A location is either a processor register or (an abstract designation of) + a slot in the activation record of the current function. *) + +(** ** Machine registers *) + +(** The following type defines the machine registers that can be referenced + as locations. These include: +- Integer registers that can be allocated to RTL pseudo-registers ([Rxx]). +- Floating-point registers that can be allocated to RTL pseudo-registers + ([Fxx]). +- Three integer registers, not allocatable, reserved as temporaries for + spilling and reloading ([ITx]). +- Three float registers, not allocatable, reserved as temporaries for + spilling and reloading ([FTx]). + + The type [mreg] does not include special-purpose machine registers + such as the stack pointer and the condition codes. *) + +Inductive mreg: Set := + (** Allocatable integer regs *) + | R3: mreg | R4: mreg | R5: mreg | R6: mreg + | R7: mreg | R8: mreg | R9: mreg | R10: mreg + | R13: mreg | R14: mreg | R15: mreg | R16: mreg + | R17: mreg | R18: mreg | R19: mreg | R20: mreg + | R21: mreg | R22: mreg | R23: mreg | R24: mreg + | R25: mreg | R26: mreg | R27: mreg | R28: mreg + | R29: mreg | R30: mreg | R31: mreg + (** Allocatable float regs *) + | F1: mreg | F2: mreg | F3: mreg | F4: mreg + | F5: mreg | F6: mreg | F7: mreg | F8: mreg + | F9: mreg | F10: mreg | F14: mreg | F15: mreg + | F16: mreg | F17: mreg | F18: mreg | F19: mreg + | F20: mreg | F21: mreg | F22: mreg | F23: mreg + | F24: mreg | F25: mreg | F26: mreg | F27: mreg + | F28: mreg | F29: mreg | F30: mreg | F31: mreg + (** Integer temporaries *) + | IT1: mreg (* R11 *) | IT2: mreg (* R12 *) | IT3: mreg (* R0 *) + (** Float temporaries *) + | FT1: mreg (* F11 *) | FT2: mreg (* F12 *) | FT3: mreg (* F0 *). + +Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}. +Proof. decide equality. Qed. + +Definition mreg_type (r: mreg): typ := + match r with + | R3 => Tint | R4 => Tint | R5 => Tint | R6 => Tint + | R7 => Tint | R8 => Tint | R9 => Tint | R10 => Tint + | R13 => Tint | R14 => Tint | R15 => Tint | R16 => Tint + | R17 => Tint | R18 => Tint | R19 => Tint | R20 => Tint + | R21 => Tint | R22 => Tint | R23 => Tint | R24 => Tint + | R25 => Tint | R26 => Tint | R27 => Tint | R28 => Tint + | R29 => Tint | R30 => Tint | R31 => Tint + | F1 => Tfloat | F2 => Tfloat | F3 => Tfloat | F4 => Tfloat + | F5 => Tfloat | F6 => Tfloat | F7 => Tfloat | F8 => Tfloat + | F9 => Tfloat | F10 => Tfloat | F14 => Tfloat | F15 => Tfloat + | F16 => Tfloat | F17 => Tfloat | F18 => Tfloat | F19 => Tfloat + | F20 => Tfloat | F21 => Tfloat | F22 => Tfloat | F23 => Tfloat + | F24 => Tfloat | F25 => Tfloat | F26 => Tfloat | F27 => Tfloat + | F28 => Tfloat | F29 => Tfloat | F30 => Tfloat | F31 => Tfloat + | IT1 => Tint | IT2 => Tint | IT3 => Tint + | FT1 => Tfloat | FT2 => Tfloat | FT3 => Tfloat + end. + +Open Scope positive_scope. + +Module IndexedMreg <: INDEXED_TYPE. + Definition t := mreg. + Definition eq := mreg_eq. + Definition index (r: mreg): positive := + match r with + | R3 => 1 | R4 => 2 | R5 => 3 | R6 => 4 + | R7 => 5 | R8 => 6 | R9 => 7 | R10 => 8 + | R13 => 9 | R14 => 10 | R15 => 11 | R16 => 12 + | R17 => 13 | R18 => 14 | R19 => 15 | R20 => 16 + | R21 => 17 | R22 => 18 | R23 => 19 | R24 => 20 + | R25 => 21 | R26 => 22 | R27 => 23 | R28 => 24 + | R29 => 25 | R30 => 26 | R31 => 27 + | F1 => 28 | F2 => 29 | F3 => 30 | F4 => 31 + | F5 => 32 | F6 => 33 | F7 => 34 | F8 => 35 + | F9 => 36 | F10 => 37 | F14 => 38 | F15 => 39 + | F16 => 40 | F17 => 41 | F18 => 42 | F19 => 43 + | F20 => 44 | F21 => 45 | F22 => 46 | F23 => 47 + | F24 => 48 | F25 => 49 | F26 => 50 | F27 => 51 + | F28 => 52 | F29 => 53 | F30 => 54 | F31 => 55 + | IT1 => 56 | IT2 => 57 | IT3 => 58 + | FT1 => 59 | FT2 => 60 | FT3 => 61 + end. + Lemma index_inj: + forall r1 r2, index r1 = index r2 -> r1 = r2. + Proof. + destruct r1; destruct r2; simpl; intro; discriminate || reflexivity. + Qed. +End IndexedMreg. + +(** ** Slots in activation records *) + +(** A slot in an activation record is designated abstractly by a kind, + a type and an integer offset. Three kinds are considered: +- [Local]: these are the slots used by register allocation for + pseudo-registers that cannot be assigned a hardware register. +- [Incoming]: used to store the parameters of the current function + that cannot reside in hardware registers, as determined by the + calling conventions. +- [Outgoing]: used to store arguments to called functions that + cannot reside in hardware registers, as determined by the + calling conventions. *) + +Inductive slot: Set := + | Local: Z -> typ -> slot + | Incoming: Z -> typ -> slot + | Outgoing: Z -> typ -> slot. + +(** Morally, the [Incoming] slots of a function are the [Outgoing] +slots of its caller function. + +The type of a slot indicates how it will be accessed later once mapped to +actual memory locations inside a memory-allocated activation record: +as 32-bit integers/pointers (type [Tint]) or as 64-bit floats (type [Tfloat]). + +The offset of a slot, combined with its type and its kind, identifies +uniquely the slot and will determine later where it resides within the +memory-allocated activation record. Offsets are always positive. + +Conceptually, slots will be mapped to four non-overlapping memory areas +within activation records: +- The area for [Local] slots of type [Tint]. The offset is interpreted + as a 4-byte word index. +- The area for [Local] slots of type [Tfloat]. The offset is interpreted + as a 8-byte word index. Thus, two [Local] slots always refer either + to the same memory chunk (if they have the same types and offsets) + or to non-overlapping memory chunks (if the types or offsets differ). +- The area for [Outgoing] slots. The offset is a 4-byte word index. + Unlike [Local] slots, the PowerPC calling conventions demand that + integer and float [Outgoing] slots reside in the same memory area. + Therefore, [Outgoing Tint 0] and [Outgoing Tfloat 0] refer to + overlapping memory chunks and cannot be used simultaneously: one will + lose its value when the other is assigned. We will reflect this + overlapping behaviour in the environments mapping locations to values + defined later in this file. +- The area for [Incoming] slots. Same structure as the [Outgoing] slots. +*) + +Definition slot_type (s: slot): typ := + match s with + | Local ofs ty => ty + | Incoming ofs ty => ty + | Outgoing ofs ty => ty + end. + +Lemma slot_eq: forall (p q: slot), {p = q} + {p <> q}. +Proof. + assert (typ_eq: forall (t1 t2: typ), {t1 = t2} + {t1 <> t2}). + decide equality. + generalize zeq; intro. + decide equality. +Qed. + +Open Scope Z_scope. + +Definition typesize (ty: typ) : Z := + match ty with Tint => 1 | Tfloat => 2 end. + +Lemma typesize_pos: + forall (ty: typ), typesize ty > 0. +Proof. + destruct ty; compute; auto. +Qed. + +(** ** Locations *) + +(** Locations are just the disjoint union of machine registers and + activation record slots. *) + +Inductive loc : Set := + | R: mreg -> loc + | S: slot -> loc. + +Module Loc. + + Definition type (l: loc) : typ := + match l with + | R r => mreg_type r + | S s => slot_type s + end. + + Lemma eq: forall (p q: loc), {p = q} + {p <> q}. + Proof. + decide equality. apply mreg_eq. apply slot_eq. + Qed. + +(** As mentioned previously, two locations can be different (in the sense + of the [<>] mathematical disequality), yet denote + overlapping memory chunks within the activation record. + Given two locations, three cases are possible: +- They are equal (in the sense of the [=] equality) +- They are different and non-overlapping. +- They are different but overlapping. + + The second case (different and non-overlapping) is characterized + by the following [Loc.diff] predicate. +*) + Definition diff (l1 l2: loc) : Prop := + match l1, l2 with + | R r1, R r2 => r1 <> r2 + | S (Local d1 t1), S (Local d2 t2) => + d1 <> d2 \/ t1 <> t2 + | S (Incoming d1 t1), S (Incoming d2 t2) => + d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1 + | S (Outgoing d1 t1), S (Outgoing d2 t2) => + d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1 + | _, _ => + True + end. + + Lemma same_not_diff: + forall l, ~(diff l l). + Proof. + destruct l; unfold diff; try tauto. + destruct s. + tauto. + generalize (typesize_pos t); omega. + generalize (typesize_pos t); omega. + Qed. + + Lemma diff_not_eq: + forall l1 l2, diff l1 l2 -> l1 <> l2. + Proof. + unfold not; intros. subst l2. elim (same_not_diff l1 H). + Qed. + + Lemma diff_sym: + forall l1 l2, diff l1 l2 -> diff l2 l1. + Proof. + destruct l1; destruct l2; unfold diff; auto. + destruct s; auto. + destruct s; destruct s0; intuition auto. + Qed. + +(** [Loc.overlap l1 l2] returns [false] if [l1] and [l2] are different and + non-overlapping, and [true] otherwise: either [l1 = l2] or they partially + overlap. *) + + Definition overlap_aux (t1: typ) (d1 d2: Z) : bool := + if zeq d1 d2 then true else + match t1 with + | Tint => false + | Tfloat => if zeq (d1 + 1) d2 then true else false + end. + + Definition overlap (l1 l2: loc) : bool := + match l1, l2 with + | S (Incoming d1 t1), S (Incoming d2 t2) => + overlap_aux t1 d1 d2 || overlap_aux t2 d2 d1 + | S (Outgoing d1 t1), S (Outgoing d2 t2) => + overlap_aux t1 d1 d2 || overlap_aux t2 d2 d1 + | _, _ => false + end. + + Lemma overlap_aux_true_1: + forall d1 t1 d2 t2, + overlap_aux t1 d1 d2 = true -> + ~(d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1). + Proof. + intros until t2. + generalize (typesize_pos t1); intro. + generalize (typesize_pos t2); intro. + unfold overlap_aux. + case (zeq d1 d2). + intros. omega. + case t1. intros; discriminate. + case (zeq (d1 + 1) d2); intros. + subst d2. simpl. omega. + discriminate. + Qed. + + Lemma overlap_aux_true_2: + forall d1 t1 d2 t2, + overlap_aux t2 d2 d1 = true -> + ~(d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1). + Proof. + intros. generalize (overlap_aux_true_1 d2 t2 d1 t1 H). + tauto. + Qed. + + Lemma overlap_not_diff: + forall l1 l2, overlap l1 l2 = true -> ~(diff l1 l2). + Proof. + unfold overlap, diff; destruct l1; destruct l2; intros; try discriminate. + destruct s; discriminate. + destruct s; destruct s0; try discriminate. + elim (orb_true_elim _ _ H); intro. + apply overlap_aux_true_1; auto. + apply overlap_aux_true_2; auto. + elim (orb_true_elim _ _ H); intro. + apply overlap_aux_true_1; auto. + apply overlap_aux_true_2; auto. + Qed. + + Lemma overlap_aux_false_1: + forall t1 d1 t2 d2, + overlap_aux t1 d1 d2 || overlap_aux t2 d2 d1 = false -> + d1 + typesize t1 <= d2 \/ d2 + typesize t2 <= d1. + Proof. + intros until d2. intro OV. + generalize (orb_false_elim _ _ OV). intro OV'. elim OV'. + unfold overlap_aux. + case (zeq d1 d2); intro. + intros; discriminate. + case (zeq d2 d1); intro. + intros; discriminate. + case t1; case t2; simpl. + intros; omega. + case (zeq (d2 + 1) d1); intros. discriminate. omega. + case (zeq (d1 + 1) d2); intros. discriminate. omega. + case (zeq (d1 + 1) d2); intros H1 H2. discriminate. + case (zeq (d2 + 1) d1); intros. discriminate. omega. + Qed. + + Lemma non_overlap_diff: + forall l1 l2, l1 <> l2 -> overlap l1 l2 = false -> diff l1 l2. + Proof. + intros. unfold diff; destruct l1; destruct l2. + congruence. + auto. + destruct s; auto. + destruct s; destruct s0; auto. + case (zeq z z0); intro. + compare t t0; intro. + congruence. tauto. tauto. + apply overlap_aux_false_1. exact H0. + apply overlap_aux_false_1. exact H0. + Qed. + +(** We now redefine some standard notions over lists, using the [Loc.diff] + predicate instead of standard disequality [<>]. + + [Loc.notin l ll] holds if the location [l] is different from all locations + in the list [ll]. *) + + Fixpoint notin (l: loc) (ll: list loc) {struct ll} : Prop := + match ll with + | nil => True + | l1 :: ls => diff l l1 /\ notin l ls + end. + + Lemma notin_not_in: + forall l ll, notin l ll -> ~(In l ll). + Proof. + unfold not; induction ll; simpl; intros. + auto. + elim H; intros. elim H0; intro. + subst l. exact (same_not_diff a H1). + auto. + Qed. + +(** [Loc.disjoint l1 l2] is true if the locations in list [l1] + are different from all locations in list [l2]. *) + + Definition disjoint (l1 l2: list loc) : Prop := + forall x1 x2, In x1 l1 -> In x2 l2 -> diff x1 x2. + + Lemma disjoint_cons_left: + forall a l1 l2, + disjoint (a :: l1) l2 -> disjoint l1 l2. + Proof. + unfold disjoint; intros. auto with coqlib. + Qed. + Lemma disjoint_cons_right: + forall a l1 l2, + disjoint l1 (a :: l2) -> disjoint l1 l2. + Proof. + unfold disjoint; intros. auto with coqlib. + Qed. + + Lemma disjoint_sym: + forall l1 l2, disjoint l1 l2 -> disjoint l2 l1. + Proof. + unfold disjoint; intros. apply diff_sym; auto. + Qed. + + Lemma in_notin_diff: + forall l1 l2 ll, notin l1 ll -> In l2 ll -> diff l1 l2. + Proof. + induction ll; simpl; intros. + elim H0. + elim H0; intro. subst a. tauto. apply IHll; tauto. + Qed. + + Lemma notin_disjoint: + forall l1 l2, + (forall x, In x l1 -> notin x l2) -> disjoint l1 l2. + Proof. + unfold disjoint; induction l1; intros. + elim H0. + elim H0; intro. + subst x1. eapply in_notin_diff. apply H. auto with coqlib. auto. + eapply IHl1; eauto. intros. apply H. auto with coqlib. + Qed. + + Lemma disjoint_notin: + forall l1 l2 x, disjoint l1 l2 -> In x l1 -> notin x l2. + Proof. + unfold disjoint; induction l2; simpl; intros. + auto. + split. apply H. auto. tauto. + apply IHl2. intros. apply H. auto. tauto. auto. + Qed. + +(** [Loc.norepet ll] holds if the locations in list [ll] are pairwise + different. *) + + Inductive norepet : list loc -> Prop := + | norepet_nil: + norepet nil + | norepet_cons: + forall hd tl, notin hd tl -> norepet tl -> norepet (hd :: tl). + + Definition no_overlap (l1 l2 : list loc) := + forall r, In r l1 -> forall s, In s l2 -> r = s \/ Loc.diff r s. + +End Loc. + +(** * Mappings from locations to values *) + +(** The [Locmap] module defines mappings from locations to values, + used as evaluation environments for the semantics of the [LTL] + and [Linear] intermediate languages. *) + +Set Implicit Arguments. + +Module Locmap. + + Definition t := loc -> val. + + Definition init (x: val) : t := fun (_: loc) => x. + + Definition get (l: loc) (m: t) : val := m l. + + (** The [set] operation over location mappings reflects the overlapping + properties of locations: changing the value of a location [l] + invalidates (sets to [Vundef]) the locations that partially overlap + with [l]. In other terms, the result of [set l v m] + maps location [l] to value [v], locations that overlap with [l] + to [Vundef], and locations that are different (and non-overlapping) + from [l] to their previous values in [m]. This is apparent in the + ``good variables'' properties [Locmap.gss] and [Locmap.gso]. *) + + Definition set (l: loc) (v: val) (m: t) : t := + fun (p: loc) => + if Loc.eq l p then v else if Loc.overlap l p then Vundef else m p. + + Lemma gss: forall l v m, (set l v m) l = v. + Proof. + intros. unfold set. case (Loc.eq l l); tauto. + Qed. + + Lemma gso: forall l v m p, Loc.diff l p -> (set l v m) p = m p. + Proof. + intros. unfold set. case (Loc.eq l p); intro. + subst p. elim (Loc.same_not_diff _ H). + caseEq (Loc.overlap l p); intro. + elim (Loc.overlap_not_diff _ _ H0 H). + auto. + Qed. + +End Locmap. diff --git a/backend/Mach.v b/backend/Mach.v new file mode 100644 index 00000000..f9537985 --- /dev/null +++ b/backend/Mach.v @@ -0,0 +1,295 @@ +(** The Mach intermediate language: abstract syntax and semantics. + + Mach is the last intermediate language before generation of assembly + code. +*) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. + +(** * Abstract syntax *) + +(** Like Linear, the Mach language is organized as lists of instructions + operating over machine registers, with default fall-through behaviour + and explicit labels and branch instructions. + + The main difference with Linear lies in the instructions used to + access the activation record. Mach has three such instructions: + [Mgetstack] and [Msetstack] to read and write within the activation + record for the current function, at a given word offset and with a + given type; and [Mgetparam], to read within the activation record of + the caller. + + These instructions implement a more concrete view of the activation + record than the the [Bgetstack] and [Bsetstack] instructions of + Linear: actual offsets are used instead of abstract stack slots; the + distinction between the caller's frame and the callee's frame is + made explicit. *) + +Definition label := positive. + +Inductive instruction: Set := + | Mgetstack: int -> typ -> mreg -> instruction + | Msetstack: mreg -> int -> typ -> instruction + | Mgetparam: int -> typ -> mreg -> instruction + | Mop: operation -> list mreg -> mreg -> instruction + | Mload: memory_chunk -> addressing -> list mreg -> mreg -> instruction + | Mstore: memory_chunk -> addressing -> list mreg -> mreg -> instruction + | Mcall: signature -> mreg + ident -> instruction + | Mlabel: label -> instruction + | Mgoto: label -> instruction + | Mcond: condition -> list mreg -> label -> instruction + | Mreturn: instruction. + +Definition code := list instruction. + +Record function: Set := mkfunction + { fn_sig: signature; + fn_code: code; + fn_stacksize: Z; + fn_framesize: Z }. + +Definition program := AST.program function. + +Definition genv := Genv.t function. + +(** * Dynamic semantics *) + +Module RegEq. + Definition t := mreg. + Definition eq := mreg_eq. +End RegEq. + +Module Regmap := EMap(RegEq). + +Definition regset := Regmap.t val. + +Notation "a ## b" := (List.map a b) (at level 1). +Notation "a # b <- c" := (Regmap.set b c a) (at level 1, b at next level). + +Definition is_label (lbl: label) (instr: instruction) : bool := + match instr with + | Mlabel lbl' => if peq lbl lbl' then true else false + | _ => false + end. + +Lemma is_label_correct: + forall lbl instr, + if is_label lbl instr then instr = Mlabel lbl else instr <> Mlabel lbl. +Proof. + intros. destruct instr; simpl; try discriminate. + case (peq lbl l); intro; congruence. +Qed. + +Fixpoint find_label (lbl: label) (c: code) {struct c} : option code := + match c with + | nil => None + | i1 :: il => if is_label lbl i1 then Some il else find_label lbl il + end. + +(** The three stack-related Mach instructions are interpreted as + memory accesses relative to the stack pointer. More precisely: +- [Mgetstack ofs ty r] is a memory load at offset [ofs * 4] relative + to the stack pointer. +- [Msetstack r ofs ty] is a memory store at offset [ofs * 4] relative + to the stack pointer. +- [Mgetparam ofs ty r] is a memory load at offset [ofs * 4] + relative to the pointer found at offset 0 from the stack pointer. + The semantics maintain a linked structure of activation records, + with the current record containing a pointer to the record of the + caller function at offset 0. *) + +Definition chunk_of_type (ty: typ) := + match ty with Tint => Mint32 | Tfloat => Mfloat64 end. + +Definition load_stack (m: mem) (sp: val) (ty: typ) (ofs: int) := + Mem.loadv (chunk_of_type ty) m (Val.add sp (Vint ofs)). + +Definition store_stack (m: mem) (sp: val) (ty: typ) (ofs: int) (v: val) := + Mem.storev (chunk_of_type ty) m (Val.add sp (Vint ofs)) v. + +Definition align_16_top (lo hi: Z) := + Zmax 0 (((hi - lo + 15) / 16) * 16 + lo). + +Section RELSEM. + +Variable ge: genv. + +Definition find_function (ros: mreg + ident) (rs: regset) : option function := + match ros with + | inl r => Genv.find_funct ge (rs r) + | inr symb => + match Genv.find_symbol ge symb with + | None => None + | Some b => Genv.find_funct_ptr ge b + end + end. + +(** [exec_instr ge f sp c rs m c' rs' m'] reflects the execution of + the first instruction in the current instruction sequence [c]. + [c'] is the current instruction sequence after this execution. + [rs] and [rs'] map machine registers to values, respectively + before and after instruction execution. [m] and [m'] are the + memory states before and after. *) + +Inductive exec_instr: + function -> val -> + code -> regset -> mem -> + code -> regset -> mem -> Prop := + | exec_Mlabel: + forall f sp lbl c rs m, + exec_instr f sp + (Mlabel lbl :: c) rs m + c rs m + | exec_Mgetstack: + forall f sp ofs ty dst c rs m v, + load_stack m sp ty ofs = Some v -> + exec_instr f sp + (Mgetstack ofs ty dst :: c) rs m + c (rs#dst <- v) m + | exec_Msetstack: + forall f sp src ofs ty c rs m m', + store_stack m sp ty ofs (rs src) = Some m' -> + exec_instr f sp + (Msetstack src ofs ty :: c) rs m + c rs m' + | exec_Mgetparam: + forall f sp parent ofs ty dst c rs m v, + load_stack m sp Tint (Int.repr 0) = Some parent -> + load_stack m parent ty ofs = Some v -> + exec_instr f sp + (Mgetparam ofs ty dst :: c) rs m + c (rs#dst <- v) m + | exec_Mop: + forall f sp op args res c rs m v, + eval_operation ge sp op rs##args = Some v -> + exec_instr f sp + (Mop op args res :: c) rs m + c (rs#res <- v) m + | exec_Mload: + forall f sp chunk addr args dst c rs m a v, + eval_addressing ge sp addr rs##args = Some a -> + Mem.loadv chunk m a = Some v -> + exec_instr f sp + (Mload chunk addr args dst :: c) rs m + c (rs#dst <- v) m + | exec_Mstore: + forall f sp chunk addr args src c rs m m' a, + eval_addressing ge sp addr rs##args = Some a -> + Mem.storev chunk m a (rs src) = Some m' -> + exec_instr f sp + (Mstore chunk addr args src :: c) rs m + c rs m' + | exec_Mcall: + forall f sp sig ros c rs m f' rs' m', + find_function ros rs = Some f' -> + exec_function f' sp rs m rs' m' -> + exec_instr f sp + (Mcall sig ros :: c) rs m + c rs' m' + | exec_Mgoto: + forall f sp lbl c rs m c', + find_label lbl f.(fn_code) = Some c' -> + exec_instr f sp + (Mgoto lbl :: c) rs m + c' rs m + | exec_Mcond_true: + forall f sp cond args lbl c rs m c', + eval_condition cond rs##args = Some true -> + find_label lbl f.(fn_code) = Some c' -> + exec_instr f sp + (Mcond cond args lbl :: c) rs m + c' rs m + | exec_Mcond_false: + forall f sp cond args lbl c rs m, + eval_condition cond rs##args = Some false -> + exec_instr f sp + (Mcond cond args lbl :: c) rs m + c rs m + +with exec_instrs: + function -> val -> + code -> regset -> mem -> + code -> regset -> mem -> Prop := + | exec_refl: + forall f sp c rs m, + exec_instrs f sp c rs m c rs m + | exec_one: + forall f sp c rs m c' rs' m', + exec_instr f sp c rs m c' rs' m' -> + exec_instrs f sp c rs m c' rs' m' + | exec_trans: + forall f sp c1 rs1 m1 c2 rs2 m2 c3 rs3 m3, + exec_instrs f sp c1 rs1 m1 c2 rs2 m2 -> + exec_instrs f sp c2 rs2 m2 c3 rs3 m3 -> + exec_instrs f sp c1 rs1 m1 c3 rs3 m3 + +(** In addition to reserving the word at offset 0 in the activation + record for maintaining the linking of activation records, + we need to reserve the word at offset 4 to store the return address + into the caller. However, the return address (a pointer within + the code of the caller) is not precisely known at this point: + it will be determined only after the final translation to PowerPC + assembly code. Therefore, we simply reserve that word in the strongest + sense of the word ``reserve'': we make sure that whatever pointer + is stored there at function entry keeps the same value until the + final return instruction, and that the return value and final memory + state are the same regardless of the return address. + This is captured in the evaluation rule [exec_function] + that quantifies universally over all possible values of the return + address, and pass this value to [exec_function_body]. In other + terms, the inference rule [exec_function] has an infinity of + premises, one for each possible return address. Such infinitely + branching inference rules are uncommon in operational semantics, + but cause no difficulties in Coq. *) + +with exec_function_body: + function -> val -> val -> + regset -> mem -> regset -> mem -> Prop := + | exec_funct_body: + forall f parent ra rs m rs' m1 m2 m3 m4 stk c, + Mem.alloc m (- f.(fn_framesize)) + (align_16_top (- f.(fn_framesize)) f.(fn_stacksize)) + = (m1, stk) -> + let sp := Vptr stk (Int.repr (-f.(fn_framesize))) in + store_stack m1 sp Tint (Int.repr 0) parent = Some m2 -> + store_stack m2 sp Tint (Int.repr 4) ra = Some m3 -> + exec_instrs f sp + f.(fn_code) rs m3 + (Mreturn :: c) rs' m4 -> + load_stack m4 sp Tint (Int.repr 0) = Some parent -> + load_stack m4 sp Tint (Int.repr 4) = Some ra -> + exec_function_body f parent ra rs m rs' (Mem.free m4 stk) + +with exec_function: + function -> val -> regset -> mem -> regset -> mem -> Prop := + | exec_funct: + forall f parent rs m rs' m', + (forall ra, + Val.has_type ra Tint -> + exec_function_body f parent ra rs m rs' m') -> + exec_function f parent rs m rs' m'. + +Scheme exec_instr_ind4 := Minimality for exec_instr Sort Prop + with exec_instrs_ind4 := Minimality for exec_instrs Sort Prop + with exec_function_body_ind4 := Minimality for exec_function_body Sort Prop + with exec_function_ind4 := Minimality for exec_function Sort Prop. + +End RELSEM. + +Definition exec_program (p: program) (r: val) : Prop := + let ge := Genv.globalenv p in + let m0 := Genv.init_mem p in + exists b, exists f, exists rs, exists m, + Genv.find_symbol ge p.(prog_main) = Some b /\ + Genv.find_funct_ptr ge b = Some f /\ + exec_function ge f (Vptr Mem.nullptr Int.zero) (Regmap.init Vundef) m0 rs m /\ + rs R3 = r. + diff --git a/backend/Machabstr.v b/backend/Machabstr.v new file mode 100644 index 00000000..25458dcc --- /dev/null +++ b/backend/Machabstr.v @@ -0,0 +1,371 @@ +(** Alternate semantics for the Mach intermediate language. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Mem. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Conventions. +Require Import Mach. + +(** This file defines an alternate semantics for the Mach intermediate + language, which differ from the standard semantics given in file [Mach] + as follows: the stack access instructions [Mgetstack], [Msetstack] + and [Mgetparam] are interpreted not as memory accesses, but as + accesses in a frame environment, not resident in memory. The evaluation + relations take two such frame environments as parameters and results, + one for the current function and one for its caller. + + Not having the frame data in memory facilitates the proof of + the [Stacking] pass, which shows that the generated code executes + correctly with the alternate semantics. In file [Machabstr2mach], + we show an implication from this alternate semantics to + the standard semantics, thus establishing that the [Stacking] pass + generates code that behaves correctly against the standard [Mach] + semantics as well. *) + +(** * Structure of abstract stack frames *) + +(** A frame has the same structure as the contents of a memory block. *) + +Definition frame := block_contents. + +Definition empty_frame := empty_block 0 0. + +Definition mem_type (ty: typ) := + match ty with Tint => Size32 | Tfloat => Size64 end. + +(** [get_slot fr ty ofs v] holds if the frame [fr] contains value [v] + with type [ty] at word offset [ofs]. *) + +Inductive get_slot: frame -> typ -> Z -> val -> Prop := + | get_slot_intro: + forall fr ty ofs v, + 0 <= ofs -> + fr.(low) + ofs + 4 * typesize ty <= 0 -> + v = load_contents (mem_type ty) fr.(contents) (fr.(low) + ofs) -> + get_slot fr ty ofs v. + +Remark size_mem_type: + forall ty, size_mem (mem_type ty) = 4 * typesize ty. +Proof. + destruct ty; reflexivity. +Qed. + +Remark set_slot_undef_outside: + forall fr ty ofs v, + fr.(high) = 0 -> + 0 <= ofs -> + fr.(low) + ofs + 4 * typesize ty <= 0 -> + (forall x, x < fr.(low) \/ x >= fr.(high) -> fr.(contents) x = Undef) -> + (forall x, x < fr.(low) \/ x >= fr.(high) -> + store_contents (mem_type ty) fr.(contents) (fr.(low) + ofs) v x = Undef). +Proof. + intros. apply store_contents_undef_outside with fr.(low) fr.(high). + rewrite <- size_mem_type in H1. omega. assumption. assumption. +Qed. + +(** [set_slot fr ty ofs v fr'] holds if frame [fr'] is obtained from + frame [fr] by storing value [v] with type [ty] at word offset [ofs]. *) + +Inductive set_slot: frame -> typ -> Z -> val -> frame -> Prop := + | set_slot_intro: + forall fr ty ofs v + (A: fr.(high) = 0) + (B: 0 <= ofs) + (C: fr.(low) + ofs + 4 * typesize ty <= 0), + set_slot fr ty ofs v + (mkblock fr.(low) fr.(high) + (store_contents (mem_type ty) fr.(contents) (fr.(low) + ofs) v) + (set_slot_undef_outside fr ty ofs v A B C fr.(undef_outside))). + +Definition init_frame (f: function) := + empty_block (- f.(fn_framesize)) 0. + +Section RELSEM. + +Variable ge: genv. + +(** Execution of one instruction has the form + [exec_instr ge f sp parent c rs fr m c' rs' fr' m'], + where [parent] is the caller's frame (read-only) + and [fr], [fr'] are the current frame, before and after execution + of one instruction. The other parameters are as in the Mach semantics. *) + +Inductive exec_instr: + function -> val -> frame -> + code -> regset -> frame -> mem -> + code -> regset -> frame -> mem -> Prop := + | exec_Mlabel: + forall f sp parent lbl c rs fr m, + exec_instr f sp parent + (Mlabel lbl :: c) rs fr m + c rs fr m + | exec_Mgetstack: + forall f sp parent ofs ty dst c rs fr m v, + get_slot fr ty (Int.signed ofs) v -> + exec_instr f sp parent + (Mgetstack ofs ty dst :: c) rs fr m + c (rs#dst <- v) fr m + | exec_Msetstack: + forall f sp parent src ofs ty c rs fr m fr', + set_slot fr ty (Int.signed ofs) (rs src) fr' -> + exec_instr f sp parent + (Msetstack src ofs ty :: c) rs fr m + c rs fr' m + | exec_Mgetparam: + forall f sp parent ofs ty dst c rs fr m v, + get_slot parent ty (Int.signed ofs) v -> + exec_instr f sp parent + (Mgetparam ofs ty dst :: c) rs fr m + c (rs#dst <- v) fr m + | exec_Mop: + forall f sp parent op args res c rs fr m v, + eval_operation ge sp op rs##args = Some v -> + exec_instr f sp parent + (Mop op args res :: c) rs fr m + c (rs#res <- v) fr m + | exec_Mload: + forall f sp parent chunk addr args dst c rs fr m a v, + eval_addressing ge sp addr rs##args = Some a -> + Mem.loadv chunk m a = Some v -> + exec_instr f sp parent + (Mload chunk addr args dst :: c) rs fr m + c (rs#dst <- v) fr m + | exec_Mstore: + forall f sp parent chunk addr args src c rs fr m m' a, + eval_addressing ge sp addr rs##args = Some a -> + Mem.storev chunk m a (rs src) = Some m' -> + exec_instr f sp parent + (Mstore chunk addr args src :: c) rs fr m + c rs fr m' + | exec_Mcall: + forall f sp parent sig ros c rs fr m f' rs' m', + find_function ge ros rs = Some f' -> + exec_function f' fr rs m rs' m' -> + exec_instr f sp parent + (Mcall sig ros :: c) rs fr m + c rs' fr m' + | exec_Mgoto: + forall f sp parent lbl c rs fr m c', + find_label lbl f.(fn_code) = Some c' -> + exec_instr f sp parent + (Mgoto lbl :: c) rs fr m + c' rs fr m + | exec_Mcond_true: + forall f sp parent cond args lbl c rs fr m c', + eval_condition cond rs##args = Some true -> + find_label lbl f.(fn_code) = Some c' -> + exec_instr f sp parent + (Mcond cond args lbl :: c) rs fr m + c' rs fr m + | exec_Mcond_false: + forall f sp parent cond args lbl c rs fr m, + eval_condition cond rs##args = Some false -> + exec_instr f sp parent + (Mcond cond args lbl :: c) rs fr m + c rs fr m + +with exec_instrs: + function -> val -> frame -> + code -> regset -> frame -> mem -> + code -> regset -> frame -> mem -> Prop := + | exec_refl: + forall f sp parent c rs fr m, + exec_instrs f sp parent c rs fr m c rs fr m + | exec_one: + forall f sp parent c rs fr m c' rs' fr' m', + exec_instr f sp parent c rs fr m c' rs' fr' m' -> + exec_instrs f sp parent c rs fr m c' rs' fr' m' + | exec_trans: + forall f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2 c3 rs3 fr3 m3, + exec_instrs f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2 -> + exec_instrs f sp parent c2 rs2 fr2 m2 c3 rs3 fr3 m3 -> + exec_instrs f sp parent c1 rs1 fr1 m1 c3 rs3 fr3 m3 + +with exec_function_body: + function -> frame -> val -> val -> + regset -> mem -> regset -> mem -> Prop := + | exec_funct_body: + forall f parent link ra rs m rs' m1 m2 stk fr1 fr2 fr3 c, + Mem.alloc m 0 f.(fn_stacksize) = (m1, stk) -> + set_slot (init_frame f) Tint 0 link fr1 -> + set_slot fr1 Tint 4 ra fr2 -> + exec_instrs f (Vptr stk (Int.repr (-f.(fn_framesize)))) parent + f.(fn_code) rs fr2 m1 + (Mreturn :: c) rs' fr3 m2 -> + exec_function_body f parent link ra rs m rs' (Mem.free m2 stk) + +with exec_function: + function -> frame -> regset -> mem -> regset -> mem -> Prop := + | exec_funct: + forall f parent rs m rs' m', + (forall link ra, + Val.has_type link Tint -> + Val.has_type ra Tint -> + exec_function_body f parent link ra rs m rs' m') -> + exec_function f parent rs m rs' m'. + +Scheme exec_instr_ind4 := Minimality for exec_instr Sort Prop + with exec_instrs_ind4 := Minimality for exec_instrs Sort Prop + with exec_function_body_ind4 := Minimality for exec_function_body Sort Prop + with exec_function_ind4 := Minimality for exec_function Sort Prop. + +(** Ugly mutual induction principle over evaluation derivations. + Coq is not able to generate it directly, even though it is + an immediate consequence of the 4 induction principles generated + by the [Scheme] command above. *) + +Lemma exec_mutual_induction: + forall (P + P0 : function -> + val -> + frame -> + code -> + regset -> + frame -> mem -> code -> regset -> frame -> mem -> Prop) + (P1 : function -> + frame -> val -> val -> regset -> mem -> regset -> mem -> Prop) + (P2 : function -> frame -> regset -> mem -> regset -> mem -> Prop), + (forall (f : function) (sp : val) (parent : frame) (lbl : label) + (c : list instruction) (rs : regset) (fr : frame) (m : mem), + P f sp parent (Mlabel lbl :: c) rs fr m c rs fr m) -> + (forall (f : function) (sp : val) (parent : frame) (ofs : int) + (ty : typ) (dst : mreg) (c : list instruction) (rs : regset) + (fr : frame) (m : mem) (v : val), + get_slot fr ty (Int.signed ofs) v -> + P f sp parent (Mgetstack ofs ty dst :: c) rs fr m c rs # dst <- v fr + m) -> + (forall (f : function) (sp : val) (parent : frame) (src : mreg) + (ofs : int) (ty : typ) (c : list instruction) (rs : mreg -> val) + (fr : frame) (m : mem) (fr' : frame), + set_slot fr ty (Int.signed ofs) (rs src) fr' -> + P f sp parent (Msetstack src ofs ty :: c) rs fr m c rs fr' m) -> + (forall (f : function) (sp : val) (parent : frame) (ofs : int) + (ty : typ) (dst : mreg) (c : list instruction) (rs : regset) + (fr : frame) (m : mem) (v : val), + get_slot parent ty (Int.signed ofs) v -> + P f sp parent (Mgetparam ofs ty dst :: c) rs fr m c rs # dst <- v fr + m) -> + (forall (f : function) (sp : val) (parent : frame) (op : operation) + (args : list mreg) (res : mreg) (c : list instruction) + (rs : mreg -> val) (fr : frame) (m : mem) (v : val), + eval_operation ge sp op rs ## args = Some v -> + P f sp parent (Mop op args res :: c) rs fr m c rs # res <- v fr m) -> + (forall (f : function) (sp : val) (parent : frame) + (chunk : memory_chunk) (addr : addressing) (args : list mreg) + (dst : mreg) (c : list instruction) (rs : mreg -> val) (fr : frame) + (m : mem) (a v : val), + eval_addressing ge sp addr rs ## args = Some a -> + loadv chunk m a = Some v -> + P f sp parent (Mload chunk addr args dst :: c) rs fr m c + rs # dst <- v fr m) -> + (forall (f : function) (sp : val) (parent : frame) + (chunk : memory_chunk) (addr : addressing) (args : list mreg) + (src : mreg) (c : list instruction) (rs : mreg -> val) (fr : frame) + (m m' : mem) (a : val), + eval_addressing ge sp addr rs ## args = Some a -> + storev chunk m a (rs src) = Some m' -> + P f sp parent (Mstore chunk addr args src :: c) rs fr m c rs fr m') -> + (forall (f : function) (sp : val) (parent : frame) (sig : signature) + (ros : mreg + ident) (c : list instruction) (rs : regset) + (fr : frame) (m : mem) (f' : function) (rs' : regset) (m' : mem), + find_function ge ros rs = Some f' -> + exec_function f' fr rs m rs' m' -> + P2 f' fr rs m rs' m' -> + P f sp parent (Mcall sig ros :: c) rs fr m c rs' fr m') -> + (forall (f : function) (sp : val) (parent : frame) (lbl : label) + (c : list instruction) (rs : regset) (fr : frame) (m : mem) + (c' : code), + find_label lbl (fn_code f) = Some c' -> + P f sp parent (Mgoto lbl :: c) rs fr m c' rs fr m) -> + (forall (f : function) (sp : val) (parent : frame) + (cond : condition) (args : list mreg) (lbl : label) + (c : list instruction) (rs : mreg -> val) (fr : frame) (m : mem) + (c' : code), + eval_condition cond rs ## args = Some true -> + find_label lbl (fn_code f) = Some c' -> + P f sp parent (Mcond cond args lbl :: c) rs fr m c' rs fr m) -> + (forall (f : function) (sp : val) (parent : frame) + (cond : condition) (args : list mreg) (lbl : label) + (c : list instruction) (rs : mreg -> val) (fr : frame) (m : mem), + eval_condition cond rs ## args = Some false -> + P f sp parent (Mcond cond args lbl :: c) rs fr m c rs fr m) -> + (forall (f : function) (sp : val) (parent : frame) (c : code) + (rs : regset) (fr : frame) (m : mem), + P0 f sp parent c rs fr m c rs fr m) -> + (forall (f : function) (sp : val) (parent : frame) (c : code) + (rs : regset) (fr : frame) (m : mem) (c' : code) (rs' : regset) + (fr' : frame) (m' : mem), + exec_instr f sp parent c rs fr m c' rs' fr' m' -> + P f sp parent c rs fr m c' rs' fr' m' -> + P0 f sp parent c rs fr m c' rs' fr' m') -> + (forall (f : function) (sp : val) (parent : frame) (c1 : code) + (rs1 : regset) (fr1 : frame) (m1 : mem) (c2 : code) (rs2 : regset) + (fr2 : frame) (m2 : mem) (c3 : code) (rs3 : regset) (fr3 : frame) + (m3 : mem), + exec_instrs f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2 -> + P0 f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2 -> + exec_instrs f sp parent c2 rs2 fr2 m2 c3 rs3 fr3 m3 -> + P0 f sp parent c2 rs2 fr2 m2 c3 rs3 fr3 m3 -> + P0 f sp parent c1 rs1 fr1 m1 c3 rs3 fr3 m3) -> + (forall (f : function) (parent : frame) (link ra : val) (rs : regset) + (m : mem) (rs' : regset) (m1 m2 : mem) (stk : block) + (fr1 fr2 fr3 : frame) (c : list instruction), + alloc m 0 (fn_stacksize f) = (m1, stk) -> + set_slot (init_frame f) Tint 0 link fr1 -> + set_slot fr1 Tint 4 ra fr2 -> + exec_instrs f (Vptr stk (Int.repr (-f.(fn_framesize)))) parent (fn_code f) rs fr2 m1 (Mreturn :: c) rs' fr3 + m2 -> + P0 f (Vptr stk (Int.repr (-f.(fn_framesize)))) parent (fn_code f) rs fr2 m1 (Mreturn :: c) rs' fr3 m2 -> + P1 f parent link ra rs m rs' (free m2 stk)) -> + (forall (f : function) (parent : frame) (rs : regset) (m : mem) + (rs' : regset) (m' : mem), + (forall link ra : val, + Val.has_type link Tint -> + Val.has_type ra Tint -> + exec_function_body f parent link ra rs m rs' m') -> + (forall link ra : val, + Val.has_type link Tint -> + Val.has_type ra Tint -> P1 f parent link ra rs m rs' m') -> + P2 f parent rs m rs' m') -> + (forall (f15 : function) (sp : val) (f16 : frame) (c : code) + (r : regset) (f17 : frame) (m : mem) (c0 : code) (r0 : regset) + (f18 : frame) (m0 : mem), + exec_instr f15 sp f16 c r f17 m c0 r0 f18 m0 -> + P f15 sp f16 c r f17 m c0 r0 f18 m0) + /\ (forall (f15 : function) (sp : val) (f16 : frame) (c : code) + (r : regset) (f17 : frame) (m : mem) (c0 : code) (r0 : regset) + (f18 : frame) (m0 : mem), + exec_instrs f15 sp f16 c r f17 m c0 r0 f18 m0 -> + P0 f15 sp f16 c r f17 m c0 r0 f18 m0) + /\ (forall (f15 : function) (f16 : frame) (v1 v2 : val) (r : regset) (m : mem) + (r0 : regset) (m0 : mem), + exec_function_body f15 f16 v1 v2 r m r0 m0 -> P1 f15 f16 v1 v2 r m r0 m0) + /\ (forall (f15 : function) (f16 : frame) (r : regset) (m : mem) + (r0 : regset) (m0 : mem), + exec_function f15 f16 r m r0 m0 -> P2 f15 f16 r m r0 m0). +Proof. + intros. split. apply (exec_instr_ind4 P P0 P1 P2); assumption. + split. apply (exec_instrs_ind4 P P0 P1 P2); assumption. + split. apply (exec_function_body_ind4 P P0 P1 P2); assumption. + apply (exec_function_ind4 P P0 P1 P2); assumption. +Qed. + +End RELSEM. + +Definition exec_program (p: program) (r: val) : Prop := + let ge := Genv.globalenv p in + let m0 := Genv.init_mem p in + exists b, exists f, exists rs, exists m, + Genv.find_symbol ge p.(prog_main) = Some b /\ + Genv.find_funct_ptr ge b = Some f /\ + f.(fn_sig) = mksignature nil (Some Tint) /\ + exec_function ge f empty_frame (Regmap.init Vundef) m0 rs m /\ + rs (Conventions.loc_result f.(fn_sig)) = r. + diff --git a/backend/Machabstr2mach.v b/backend/Machabstr2mach.v new file mode 100644 index 00000000..8549cefc --- /dev/null +++ b/backend/Machabstr2mach.v @@ -0,0 +1,1120 @@ +(** Simulation between the two semantics for the Mach language. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Import Machabstr. +Require Import Mach. +Require Import Machtyping. +Require Import Stackingproof. + +(** Two semantics were defined for the Mach intermediate language: +- The concrete semantics (file [Mach]), where the whole activation + record resides in memory and the [Mgetstack], [Msetstack] and + [Mgetparent] are interpreted as [sp]-relative memory accesses. +- The abstract semantics (file [Machabstr]), where the activation + record is split in two parts: the Cminor stack data, resident in + memory, and the frame information, residing not in memory but + in additional evaluation environments. + + In this file, we show a simulation result between these + semantics: if a program executes to some result [r] in the + abstract semantics, it also executes to the same result in + the concrete semantics. This result bridges the correctness proof + in file [Stackingproof] (which uses the abstract Mach semantics + as output) and that in file [PPCgenproof] (which uses the concrete + Mach semantics as input). +*) + +Remark align_16_top_ge: + forall lo hi, + hi <= align_16_top lo hi. +Proof. + intros. unfold align_16_top. apply Zmax_bound_r. + assert (forall x, x <= (x + 15) / 16 * 16). + intro. assert (16 > 0). omega. + generalize (Z_div_mod_eq (x + 15) 16 H). intro. + replace ((x + 15) / 16 * 16) with ((x + 15) - (x + 15) mod 16). + generalize (Z_mod_lt (x + 15) 16 H). omega. + rewrite Zmult_comm. omega. + generalize (H (hi - lo)). omega. +Qed. + +Remark align_16_top_pos: + forall lo hi, + 0 <= align_16_top lo hi. +Proof. + intros. unfold align_16_top. apply Zmax_bound_l. omega. +Qed. + +Remark size_mem_pos: + forall sz, size_mem sz > 0. +Proof. + destruct sz; simpl; compute; auto. +Qed. + +Remark size_type_chunk: + forall ty, size_chunk (chunk_of_type ty) = 4 * typesize ty. +Proof. + destruct ty; reflexivity. +Qed. + +Remark mem_chunk_type: + forall ty, mem_chunk (chunk_of_type ty) = mem_type ty. +Proof. + destruct ty; reflexivity. +Qed. + +Remark size_mem_type: + forall ty, size_mem (mem_type ty) = 4 * typesize ty. +Proof. + destruct ty; reflexivity. +Qed. + +(** * Agreement between frames and memory-resident activation records *) + +(** ** Agreement for one frame *) + +(** The core idea of the simulation proof is that for all active + functions, the memory-allocated activation record, in the concrete + semantics, contains the same data as the Cminor stack block + (at positive offsets) and the frame of the function (at negative + offsets) in the abstract semantics. + + This intuition (activation record = Cminor stack data + frame) + is formalized by the following predicate [frame_match fr sp base mm ms]. + [fr] is a frame and [mm] the current memory state in the abstract + semantics. [ms] is the current memory state in the concrete semantics. + The stack pointer is [Vptr sp base] in both semantics. *) + +Inductive frame_match: frame -> block -> int -> mem -> mem -> Prop := + frame_match_intro: + forall fr sp base mm ms, + valid_block ms sp -> + low_bound mm sp = 0 -> + low_bound ms sp = fr.(low) -> + high_bound ms sp = align_16_top fr.(low) (high_bound mm sp) -> + fr.(low) <= 0 -> + Int.min_signed <= fr.(low) -> + base = Int.repr fr.(low) -> + block_contents_agree fr.(low) 0 fr (ms.(blocks) sp) -> + block_contents_agree 0 (high_bound ms sp) + (mm.(blocks) sp) (ms.(blocks) sp) -> + frame_match fr sp base mm ms. + +(** [frame_match], while presented as a relation for convenience, + is actually a function: it fully determines the contents of the + activation record [ms.(blocks) sp]. *) + +Lemma frame_match_exten: + forall fr sp base mm ms1 ms2, + frame_match fr sp base mm ms1 -> + frame_match fr sp base mm ms2 -> + ms1.(blocks) sp = ms2.(blocks) sp. +Proof. + intros. inversion H. inversion H0. + unfold low_bound, high_bound in *. + apply block_contents_exten. + congruence. + congruence. + hnf; intros. + elim H29. rewrite H3; rewrite H4; intros. + case (zlt ofs 0); intro. + assert (low fr <= ofs < 0). tauto. + transitivity (contents fr ofs). + symmetry. apply H8; auto. + apply H22; auto. + transitivity (contents (blocks mm sp) ofs). + symmetry. apply H9. rewrite H4. omega. + apply H23. rewrite H18. omega. +Qed. + +(** The following two innocuous-looking lemmas are the key results + showing that [sp]-relative memory accesses in the concrete + semantics behave like the direct frame accesses in the abstract + semantics. First, a value [v] that has type [ty] is preserved + when stored in memory with chunk [chunk_of_type ty], then read + back with the same chunk. The typing hypothesis is crucial here: + for instance, a float value reads back as [Vundef] when stored + and load with chunk [Mint32]. *) + +Lemma load_result_ty: + forall v ty, + Val.has_type v ty -> Val.load_result (chunk_of_type ty) v = v. +Proof. + destruct v; destruct ty; simpl; contradiction || reflexivity. +Qed. + +(** Second, computations of [sp]-relative offsets using machine + arithmetic (as done in the concrete semantics) never overflows + and behaves identically to the offset computations using exact + arithmetic (as done in the abstract semantics). *) + +Lemma int_add_no_overflow: + forall x y, + Int.min_signed <= Int.signed x + Int.signed y <= Int.max_signed -> + Int.signed (Int.add x y) = Int.signed x + Int.signed y. +Proof. + intros. rewrite Int.add_signed. rewrite Int.signed_repr. auto. auto. +Qed. + +(** Given matching frames and activation records, loading from the + activation record (in the concrete semantics) behaves identically + to reading the corresponding slot from the frame + (in the abstract semantics). *) + +Lemma frame_match_get_slot: + forall fr sp base mm ms ty ofs v, + frame_match fr sp base mm ms -> + get_slot fr ty (Int.signed ofs) v -> + Val.has_type v ty -> + load_stack ms (Vptr sp base) ty ofs = Some v. +Proof. + intros. inversion H. inversion H0. subst v. + unfold load_stack, Val.add, loadv. + assert (Int.signed base = low fr). + rewrite H8. apply Int.signed_repr. + split. auto. apply Zle_trans with 0. auto. compute; congruence. + assert (Int.signed (Int.add base ofs) = low fr + Int.signed ofs). + rewrite int_add_no_overflow. rewrite H18. auto. + rewrite H18. split. omega. apply Zle_trans with 0. + generalize (typesize_pos ty). omega. compute. congruence. + rewrite H23. + assert (BND1: low_bound ms sp <= low fr + Int.signed ofs). + omega. + assert (BND2: (low fr + Int.signed ofs) + size_chunk (chunk_of_type ty) <= high_bound ms sp). + rewrite size_type_chunk. apply Zle_trans with 0. + assumption. rewrite H5. apply align_16_top_pos. + generalize (load_in_bounds (chunk_of_type ty) ms sp (low fr + Int.signed ofs) H2 BND1 BND2). + intros [v' LOAD]. + generalize (load_inv _ _ _ _ _ LOAD). + intros [A [B [C D]]]. + rewrite LOAD. rewrite <- D. + decEq. rewrite mem_chunk_type. + rewrite <- size_mem_type in H17. + assert (low fr <= low fr + Int.signed ofs). omega. + generalize (load_contentmap_agree _ _ _ _ _ _ H9 H24 H17). + intro. rewrite H25. + apply load_result_ty. + assumption. +Qed. + +(** Loads from [sp], corresponding to accesses to Cminor stack data + in the abstract semantics, give the same results in the concrete + semantics. This is because the offset from [sp] must be positive or + null for the original load to succeed, and because the part + of the activation record at positive offsets matches the Cminor + stack data block. *) + +Lemma frame_match_load: + forall fr sp base mm ms chunk ofs v, + frame_match fr sp base mm ms -> + load chunk mm sp ofs = Some v -> + load chunk ms sp ofs = Some v. +Proof. + intros. inversion H. + generalize (load_inv _ _ _ _ _ H0). intros [A [B [C D]]]. + change (low (blocks mm sp)) with (low_bound mm sp) in B. + change (high (blocks mm sp)) with (high_bound mm sp) in C. + unfold load. rewrite zlt_true; auto. + rewrite in_bounds_holds. + rewrite <- D. decEq. decEq. eapply load_contentmap_agree. + red in H9. eexact H9. + omega. + unfold size_chunk in C. rewrite H4. + apply Zle_trans with (high_bound mm sp). auto. + apply align_16_top_ge. + change (low (blocks ms sp)) with (low_bound ms sp). + rewrite H3. omega. + change (high (blocks ms sp)) with (high_bound ms sp). + rewrite H4. apply Zle_trans with (high_bound mm sp). auto. + apply align_16_top_ge. +Qed. + +(** Assigning a value to a frame slot (in the abstract semantics) + corresponds to storing this value in the activation record + (in the concrete semantics). Moreover, agreement between frames + and activation records is preserved. *) + +Lemma frame_match_set_slot: + forall fr sp base mm ms ty ofs v fr', + frame_match fr sp base mm ms -> + set_slot fr ty (Int.signed ofs) v fr' -> + exists ms', + store_stack ms (Vptr sp base) ty ofs v = Some ms' /\ + frame_match fr' sp base mm ms'. +Proof. + intros. inversion H. inversion H0. subst ty0. + unfold store_stack, Val.add, storev. + assert (Int.signed base = low fr). + rewrite H7. apply Int.signed_repr. + split. auto. apply Zle_trans with 0. auto. compute; congruence. + assert (Int.signed (Int.add base ofs) = low fr + Int.signed ofs). + rewrite int_add_no_overflow. rewrite H16. auto. + rewrite H16. split. omega. apply Zle_trans with 0. + generalize (typesize_pos ty). omega. compute. congruence. + rewrite H20. + assert (BND1: low_bound ms sp <= low fr + Int.signed ofs). + omega. + assert (BND2: (low fr + Int.signed ofs) + size_chunk (chunk_of_type ty) <= high_bound ms sp). + rewrite size_type_chunk. rewrite H4. + apply Zle_trans with 0. subst ofs0. auto. apply align_16_top_pos. + generalize (store_in_bounds _ _ _ _ v H1 BND1 BND2). + intros [ms' STORE]. + generalize (store_inv _ _ _ _ _ _ STORE). intros [P [Q [R [S [x T]]]]]. + generalize (low_bound_store _ _ _ _ sp _ _ STORE). intro LB. + generalize (high_bound_store _ _ _ _ sp _ _ STORE). intro HB. + exists ms'. + split. exact STORE. + apply frame_match_intro; auto. + eapply valid_block_store; eauto. + rewrite LB. auto. + rewrite HB. auto. + red. rewrite T; rewrite update_s; simpl. + rewrite mem_chunk_type. + subst ofs0. eapply store_contentmap_agree; eauto. + rewrite HB; rewrite T; rewrite update_s. + red. simpl. apply store_contentmap_outside_agree. + assumption. left. rewrite mem_chunk_type. + rewrite size_mem_type. subst ofs0. auto. +Qed. + +(** Agreement is preserved by stores within blocks other than the + one pointed to by [sp]. *) + +Lemma frame_match_store_stack_other: + forall fr sp base mm ms sp' base' ty ofs v ms', + frame_match fr sp base mm ms -> + store_stack ms (Vptr sp' base') ty ofs v = Some ms' -> + sp <> sp' -> + frame_match fr sp base mm ms'. +Proof. + unfold store_stack, Val.add, storev. intros. inversion H. + generalize (store_inv _ _ _ _ _ _ H0). intros [P [Q [R [S [x T]]]]]. + generalize (low_bound_store _ _ _ _ sp _ _ H0). intro LB. + generalize (high_bound_store _ _ _ _ sp _ _ H0). intro HB. + apply frame_match_intro; auto. + eapply valid_block_store; eauto. + rewrite LB; auto. + rewrite HB; auto. + rewrite T; rewrite update_o; auto. + rewrite HB; rewrite T; rewrite update_o; auto. +Qed. + +(** Stores relative to [sp], corresponding to accesses to Cminor stack data + in the abstract semantics, give the same results in the concrete + semantics. Moreover, agreement between frames and activation + records is preserved. *) + +Lemma frame_match_store_ok: + forall fr sp base mm ms chunk ofs v mm', + frame_match fr sp base mm ms -> + store chunk mm sp ofs v = Some mm' -> + exists ms', store chunk ms sp ofs v = Some ms'. +Proof. + intros. inversion H. + generalize (store_inv _ _ _ _ _ _ H0). intros [P [Q [R [S [x T]]]]]. + change (low (blocks mm sp)) with (low_bound mm sp) in Q. + change (high (blocks mm sp)) with (high_bound mm sp) in R. + apply store_in_bounds. + auto. + omega. + apply Zle_trans with (high_bound mm sp). + auto. rewrite H4. apply align_16_top_ge. +Qed. + +Lemma frame_match_store: + forall fr sp base mm ms chunk b ofs v mm' ms', + frame_match fr sp base mm ms -> + store chunk mm b ofs v = Some mm' -> + store chunk ms b ofs v = Some ms' -> + frame_match fr sp base mm' ms'. +Proof. + intros. inversion H. + generalize (store_inv _ _ _ _ _ _ H1). intros [A [B [C [D [x1 E]]]]]. + generalize (store_inv _ _ _ _ _ _ H0). intros [I [J [K [L [x2 M]]]]]. + generalize (low_bound_store _ _ _ _ sp _ _ H0). intro LBm. + generalize (low_bound_store _ _ _ _ sp _ _ H1). intro LBs. + generalize (high_bound_store _ _ _ _ sp _ _ H0). intro HBm. + generalize (high_bound_store _ _ _ _ sp _ _ H1). intro HBs. + apply frame_match_intro; auto. + eapply valid_block_store; eauto. + congruence. congruence. congruence. + rewrite E. unfold update. case (zeq sp b); intro. + subst b. red; simpl. + apply store_contentmap_outside_agree; auto. + right. unfold low_bound in H3. omega. + assumption. + rewrite HBs; rewrite E; rewrite M; unfold update. + case (zeq sp b); intro. + subst b. red; simpl. + apply store_contentmap_agree; auto. + assumption. +Qed. + +(** Memory allocation of the Cminor stack data block (in the abstract + semantics) and of the whole activation record (in the concrete + semantics) return memory states that agree according to [frame_match]. + Moreover, [frame_match] relations over already allocated blocks + remain true. *) + +Lemma frame_match_new: + forall mm ms mm' ms' sp sp' f, + mm.(nextblock) = ms.(nextblock) -> + alloc mm 0 f.(fn_stacksize) = (mm', sp) -> + alloc ms (- f.(fn_framesize)) (align_16_top (- f.(fn_framesize)) f.(fn_stacksize)) = (ms', sp') -> + f.(fn_framesize) >= 0 -> + f.(fn_framesize) <= -Int.min_signed -> + frame_match (init_frame f) sp (Int.repr (-f.(fn_framesize))) mm' ms'. +Proof. + intros. + injection H0; intros. injection H1; intros. + assert (sp = sp'). congruence. rewrite <- H8 in H6. subst sp'. + generalize (low_bound_alloc _ _ sp _ _ _ H0). rewrite zeq_true. intro LBm. + generalize (low_bound_alloc _ _ sp _ _ _ H1). rewrite zeq_true. intro LBs. + generalize (high_bound_alloc _ _ sp _ _ _ H0). rewrite zeq_true. intro HBm. + generalize (high_bound_alloc _ _ sp _ _ _ H1). rewrite zeq_true. intro HBs. + apply frame_match_intro; auto. + eapply valid_new_block; eauto. + simpl. congruence. + simpl. omega. + simpl. omega. + rewrite <- H7. simpl. rewrite H6; simpl. rewrite update_s. + hnf; simpl; auto. + rewrite HBs; rewrite <- H5; simpl; rewrite H4; rewrite <- H7; simpl; rewrite H6; simpl; + repeat (rewrite update_s). + hnf; simpl; auto. +Qed. + +Lemma frame_match_alloc: + forall mm ms fr sp base lom him los his mm' ms' bm bs, + mm.(nextblock) = ms.(nextblock) -> + frame_match fr sp base mm ms -> + alloc mm lom him = (mm', bm) -> + alloc ms los his = (ms', bs) -> + frame_match fr sp base mm' ms'. +Proof. + intros. inversion H0. + assert (sp <> bm). + apply valid_not_valid_diff with mm. + red. rewrite H. exact H3. + eapply fresh_block_alloc; eauto. + assert (sp <> bs). + apply valid_not_valid_diff with ms. auto. + eapply fresh_block_alloc; eauto. + generalize (low_bound_alloc _ _ sp _ _ _ H1). + rewrite zeq_false; auto; intro LBm. + generalize (low_bound_alloc _ _ sp _ _ _ H2). + rewrite zeq_false; auto; intro LBs. + generalize (high_bound_alloc _ _ sp _ _ _ H1). + rewrite zeq_false; auto; intro HBm. + generalize (high_bound_alloc _ _ sp _ _ _ H2). + rewrite zeq_false; auto; intro HBs. + apply frame_match_intro. + eapply valid_block_alloc; eauto. + congruence. congruence. congruence. auto. auto. auto. + injection H2; intros. rewrite <- H20; simpl; rewrite H19; simpl. + rewrite update_o; auto. + rewrite HBs; + injection H2; intros. rewrite <- H20; simpl; rewrite H19; simpl. + injection H1; intros. rewrite <- H22; simpl; rewrite H21; simpl. + repeat (rewrite update_o; auto). +Qed. + +(** [frame_match] relations are preserved by freeing a block + other than the one pointed to by [sp]. *) + +Lemma frame_match_free: + forall fr sp base mm ms b, + frame_match fr sp base mm ms -> + sp <> b -> + frame_match fr sp base (free mm b) (free ms b). +Proof. + intros. inversion H. + generalize (low_bound_free mm _ _ H0); intro LBm. + generalize (low_bound_free ms _ _ H0); intro LBs. + generalize (high_bound_free mm _ _ H0); intro HBm. + generalize (high_bound_free ms _ _ H0); intro HBs. + apply frame_match_intro; auto. + congruence. congruence. congruence. + unfold free; simpl. rewrite update_o; auto. + rewrite HBs. + unfold free; simpl. repeat (rewrite update_o; auto). +Qed. + +(** ** Agreement for all the frames in a call stack *) + +(** We need to reason about all the frames and activation records + active at any given time during the executions: not just + about those for the currently executing function, but also + those for the callers. These collections of + active frames are called ``call stacks''. They are lists + of triples representing a frame and a stack pointer + (reference and offset) in the abstract semantics. *) + +Definition callstack : Set := list (frame * block * int). + +(** The correct linking of frames (each frame contains a pointer + to the frame of its caller at the lowest offset) is captured + by the following predicate. *) + +Inductive callstack_linked: callstack -> Prop := + | callstack_linked_nil: + callstack_linked nil + | callstack_linked_one: + forall fr1 sp1 base1, + callstack_linked ((fr1, sp1, base1) :: nil) + | callstack_linked_cons: + forall fr1 sp1 base1 fr2 base2 sp2 cs, + get_slot fr1 Tint 0 (Vptr sp2 base2) -> + callstack_linked ((fr2, sp2, base2) :: cs) -> + callstack_linked ((fr1, sp1, base1) :: (fr2, sp2, base2) :: cs). + +(** [callstack_dom cs b] (read: call stack [cs] is ``dominated'' + by block reference [b]) means that the stack pointers in [cs] + strictly decrease and are all below [b]. This ensures that + the stack block for a function was allocated after that for its + callers. A consequence is that no two active functions share + the same stack block. *) + +Inductive callstack_dom: callstack -> block -> Prop := + | callstack_dom_nil: + forall b, callstack_dom nil b + | callstack_dom_cons: + forall fr sp base cs b, + sp < b -> + callstack_dom cs sp -> + callstack_dom ((fr, sp, base) :: cs) b. + +Lemma callstack_dom_less: + forall cs b, callstack_dom cs b -> + forall fr sp base, In (fr, sp, base) cs -> sp < b. +Proof. + induction 1. simpl. tauto. + simpl. intros fr0 sp0 base0 [A|B]. + injection A; intros; subst fr0; subst sp0; subst base0. auto. + apply Zlt_trans with sp. eapply IHcallstack_dom; eauto. auto. +Qed. + +Lemma callstack_dom_diff: + forall cs b, callstack_dom cs b -> + forall fr sp base, In (fr, sp, base) cs -> sp <> b. +Proof. + intros. apply Zlt_not_eq. eapply callstack_dom_less; eauto. +Qed. + +Lemma callstack_dom_incr: + forall cs b, callstack_dom cs b -> + forall b', b < b' -> callstack_dom cs b'. +Proof. + induction 1; intros. + apply callstack_dom_nil. + apply callstack_dom_cons. omega. auto. +Qed. + +(** Every block reference is either ``in'' a call stack (that is, + refers to the stack block of one of the calls) or ``not in'' + a call stack (that is, differs from all the stack blocks). *) + +Inductive notin_callstack: block -> callstack -> Prop := + | notin_callstack_nil: + forall b, notin_callstack b nil + | notin_callstack_cons: + forall b fr sp base cs, + b <> sp -> + notin_callstack b cs -> + notin_callstack b ((fr, sp, base) :: cs). + +Lemma in_or_notin_callstack: + forall b cs, + (exists fr, exists base, In (fr, b, base) cs) \/ notin_callstack b cs. +Proof. + induction cs. + right; constructor. + elim IHcs. + intros [fr [base IN]]. left. exists fr; exists base; auto with coqlib. + intro NOTIN. destruct a. destruct p. case (eq_block b b0); intro. + left. exists f; exists i. subst b0. auto with coqlib. + right. apply notin_callstack_cons; auto. +Qed. + +(** [callstack_invariant cs mm ms] relates the memory state [mm] + from the abstract semantics with the memory state [ms] from the + concrete semantics, relative to the current call stack [cs]. + Five conditions are enforced: +- All frames in [cs] agree with the corresponding activation records + (in the sense of [frame_match]). +- The frames in the call stack are properly linked. +- Memory blocks that are not activation records for active function + calls are exactly identical in [mm] and [ms]. +- The allocation pointers are the same in [mm] and [ms]. +- The call stack [cs] is ``dominated'' by this allocation pointer, + implying that all activation records are valid, allocated blocks, + pairwise disjoint, and they were allocated in the order implied + by [cs]. *) + +Record callstack_invariant (cs: callstack) (mm ms: mem) : Prop := + mk_callstack_invariant { + cs_frame: + forall fr sp base, + In (fr, sp, base) cs -> frame_match fr sp base mm ms; + cs_linked: + callstack_linked cs; + cs_others: + forall b, notin_callstack b cs -> + mm.(blocks) b = ms.(blocks) b; + cs_next: + mm.(nextblock) = ms.(nextblock); + cs_dom: + callstack_dom cs ms.(nextblock) + }. + +(** Again, while [callstack_invariant] is presented as a relation, + it actually fully determines the concrete memory state [ms] + from the abstract memory state [mm] and the call stack [cs]. *) + +Lemma callstack_exten: + forall cs mm ms1 ms2, + callstack_invariant cs mm ms1 -> + callstack_invariant cs mm ms2 -> + ms1 = ms2. +Proof. + intros. inversion H; inversion H0. + apply mem_exten. + congruence. + intros. elim (in_or_notin_callstack b cs). + intros [fr [base FM]]. apply frame_match_exten with fr base mm; auto. + intro. transitivity (blocks mm b). + symmetry. auto. auto. +Qed. + +(** The following properties of [callstack_invariant] are the + building blocks for the proof of simulation. First, a [get_slot] + operation in the abstract semantics corresponds to a [sp]-relative + memory load in the concrete semantics. *) + +Lemma callstack_get_slot: + forall fr sp base cs mm ms ty ofs v, + callstack_invariant ((fr, sp, base) :: cs) mm ms -> + get_slot fr ty (Int.signed ofs) v -> + Val.has_type v ty -> + load_stack ms (Vptr sp base) ty ofs = Some v. +Proof. + intros. inversion H. + apply frame_match_get_slot with fr mm. + apply cs_frame0. apply in_eq. + auto. auto. +Qed. + +(** Similarly, a [get_parent] operation corresponds to loading + the back-link from the current activation record, then loading + from this back-link. *) + +Lemma callstack_get_parent: + forall fr1 sp1 base1 fr2 sp2 base2 cs mm ms ty ofs v, + callstack_invariant ((fr1, sp1, base1) :: (fr2, sp2, base2) :: cs) mm ms -> + get_slot fr2 ty (Int.signed ofs) v -> + Val.has_type v ty -> + load_stack ms (Vptr sp1 base1) Tint (Int.repr 0) = Some (Vptr sp2 base2) /\ + load_stack ms (Vptr sp2 base2) ty ofs = Some v. +Proof. + intros. inversion H. split. + inversion cs_linked0. + apply frame_match_get_slot with fr1 mm. + apply cs_frame0. auto with coqlib. + rewrite Int.signed_repr. auto. compute. intuition congruence. + exact I. + apply frame_match_get_slot with fr2 mm. + apply cs_frame0. auto with coqlib. + auto. auto. +Qed. + +(** A memory load valid in the abstract semantics can also be performed + in the concrete semantics. *) + +Lemma callstack_load: + forall cs chunk mm ms a v, + callstack_invariant cs mm ms -> + loadv chunk mm a = Some v -> + loadv chunk ms a = Some v. +Proof. + unfold loadv. intros. destruct a; try discriminate. + inversion H. + elim (in_or_notin_callstack b cs). + intros [fr [base IN]]. apply frame_match_load with fr base mm; auto. + intro. rewrite <- H0. unfold load. + rewrite (cs_others0 b H1). rewrite cs_next0. reflexivity. +Qed. + +(** A [set_slot] operation in the abstract semantics corresponds + to a [sp]-relative memory store in the concrete semantics. + Moreover, the property [callstack_invariant] still holds for + the final call stacks and memory states. *) + +Lemma callstack_set_slot: + forall fr sp base cs mm ms ty ofs v fr', + callstack_invariant ((fr, sp, base) :: cs) mm ms -> + set_slot fr ty (Int.signed ofs) v fr' -> + 4 <= Int.signed ofs -> + exists ms', + store_stack ms (Vptr sp base) ty ofs v = Some ms' /\ + callstack_invariant ((fr', sp, base) :: cs) mm ms'. +Proof. + intros. inversion H. + assert (frame_match fr sp base mm ms). apply cs_frame0. apply in_eq. + generalize (frame_match_set_slot _ _ _ _ _ _ _ _ _ H2 H0). + intros [ms' [SS FM]]. + generalize (store_inv _ _ _ _ _ _ SS). intros [A [B [C [D [x E]]]]]. + exists ms'. + split. auto. + constructor. + (* cs_frame *) + intros. elim H3; intros. + injection H4; intros; clear H4. + subst fr0; subst sp0; subst base0. auto. + apply frame_match_store_stack_other with ms sp base ty ofs v. + apply cs_frame0. auto with coqlib. auto. + apply callstack_dom_diff with cs fr0 base0. inversion cs_dom0; auto. auto. + (* cs_linked *) + inversion cs_linked0. apply callstack_linked_one. + apply callstack_linked_cons. + eapply slot_gso; eauto. + auto. + (* cs_others *) + intros. inversion H3. + rewrite E; simpl. rewrite update_o; auto. apply cs_others0. + constructor; auto. + (* cs_next *) + congruence. + (* cs_dom *) + inversion cs_dom0. constructor. rewrite D; auto. auto. +Qed. + +(** A memory store in the abstract semantics can also be performed + in the concrete semantics. Moreover, the property + [callstack_invariant] is preserved. *) + +Lemma callstack_store_aux: + forall cs mm ms chunk b ofs v mm' ms', + callstack_invariant cs mm ms -> + store chunk mm b ofs v = Some mm' -> + store chunk ms b ofs v = Some ms' -> + callstack_invariant cs mm' ms'. +Proof. + intros. inversion H. + generalize (store_inv _ _ _ _ _ _ H0). intros [A [B [C [D [x E]]]]]. + generalize (store_inv _ _ _ _ _ _ H1). intros [P [Q [R [S [y T]]]]]. + constructor; auto. + (* cs_frame *) + intros. eapply frame_match_store; eauto. + (* cs_others *) + intros. generalize (cs_others0 b0 H2); intro. + rewrite E; rewrite T; unfold update. + case (zeq b0 b); intro. + subst b0. + generalize x; generalize y. rewrite H3. + intros. replace y0 with x0. reflexivity. apply proof_irrelevance. + auto. + (* cs_nextblock *) + congruence. + (* cs_dom *) + rewrite S. auto. +Qed. + +Lemma callstack_store_ok: + forall cs mm ms chunk b ofs v mm', + callstack_invariant cs mm ms -> + store chunk mm b ofs v = Some mm' -> + exists ms', store chunk ms b ofs v = Some ms'. +Proof. + intros. inversion H. + elim (in_or_notin_callstack b cs). + intros [fr [base IN]]. + apply frame_match_store_ok with fr base mm mm'; auto. + intro. generalize (cs_others0 b H1). intro. + generalize (store_inv _ _ _ _ _ _ H0). + rewrite cs_next0; rewrite H2. intros [A [B [C [D [x E]]]]]. + apply store_in_bounds; auto. +Qed. + +Lemma callstack_store: + forall cs mm ms chunk a v mm', + callstack_invariant cs mm ms -> + storev chunk mm a v = Some mm' -> + exists ms', + storev chunk ms a v = Some ms' /\ + callstack_invariant cs mm' ms'. +Proof. + unfold storev; intros. destruct a; try discriminate. + generalize (callstack_store_ok _ _ _ _ _ _ _ _ H H0). + intros [ms' STORE]. + exists ms'. split. auto. eapply callstack_store_aux; eauto. +Qed. + +(** At function entry, a new frame is pushed on the call stack, + and memory blocks are allocated in both semantics. Moreover, + the back link to the caller's activation record is set up + in the concrete semantics. All this preserves [callstack_invariant]. *) + +Lemma callstack_function_entry: + forall fr0 sp0 base0 cs mm ms f fr mm' sp ms' sp', + callstack_invariant ((fr0, sp0, base0) :: cs) mm ms -> + alloc mm 0 f.(fn_stacksize) = (mm', sp) -> + alloc ms (- f.(fn_framesize)) (align_16_top (- f.(fn_framesize)) f.(fn_stacksize)) = (ms', sp') -> + f.(fn_framesize) >= 0 -> + f.(fn_framesize) <= -Int.min_signed -> + set_slot (init_frame f) Tint 0 (Vptr sp0 base0) fr -> + let base := Int.repr (-f.(fn_framesize)) in + exists ms'', + store_stack ms' (Vptr sp base) Tint (Int.repr 0) (Vptr sp0 base0) = Some ms'' + /\ callstack_invariant ((fr, sp, base) :: (fr0, sp0, base0) :: cs) mm' ms'' + /\ sp' = sp. +Proof. + assert (ZERO: 0 = Int.signed (Int.repr 0)). + rewrite Int.signed_repr. auto. compute; intuition congruence. + intros. inversion H. + injection H0; intros. injection H1; intros. + assert (sp' = sp). congruence. rewrite H9 in H7. subst sp'. + assert (frame_match (init_frame f) sp base mm' ms'). + unfold base. eapply frame_match_new; eauto. + rewrite ZERO in H4. + generalize (frame_match_set_slot _ _ _ _ _ _ _ _ _ H9 H4). + intros [ms'' [SS FM]]. + generalize (store_inv _ _ _ _ _ _ SS). + intros [A [B [C [D [x E]]]]]. + exists ms''. split; auto. split. + constructor. + (* cs_frame *) + intros. elim H10; intro. + injection H11; intros; clear H11. + subst fr1; subst sp1; subst base1. auto. + eapply frame_match_store_stack_other; eauto. + eapply frame_match_alloc; [idtac|idtac|eexact H0|eexact H1]. + congruence. eapply cs_frame; eauto with coqlib. + rewrite <- H7. eapply callstack_dom_diff; eauto with coqlib. + (* cs_linked *) + constructor. rewrite ZERO. eapply slot_gss; eauto. auto. + (* cs_others *) + intros. inversion H10. + rewrite E; rewrite update_o; auto. + rewrite <- H6; rewrite <- H8; simpl; rewrite H5; rewrite H7; simpl. + repeat (rewrite update_o; auto). + (* cs_next *) + rewrite D. rewrite <- H6; rewrite <- H8; simpl. congruence. + (* cs_dom *) + constructor. rewrite D; auto. rewrite <- H7. auto. + auto. +Qed. + +(** At function return, the memory blocks corresponding to Cminor + stack data and activation record for the function are freed. + This preserves [callstack_invariant]. *) + +Lemma callstack_function_return: + forall fr sp base cs mm ms, + callstack_invariant ((fr, sp, base) :: cs) mm ms -> + callstack_invariant cs (free mm sp) (free ms sp). +Proof. + intros. inversion H. inversion cs_dom0. + constructor. + (* cs_frame *) + intros. apply frame_match_free. apply cs_frame0; auto with coqlib. + apply callstack_dom_diff with cs fr1 base1. auto. auto. + (* cs_linked *) + inversion cs_linked0. apply callstack_linked_nil. auto. + (* cs_others *) + intros. + unfold free; simpl; unfold update. + case (zeq b0 sp); intro. + auto. + apply cs_others0. apply notin_callstack_cons; auto. + (* cs_nextblock *) + simpl. auto. + (* cs_dom *) + simpl. apply callstack_dom_incr with sp; auto. +Qed. + +(** Finally, [callstack_invariant] holds for the initial memory states + in the two semantics. *) + +Lemma callstack_init: + forall (p: program), + callstack_invariant ((empty_frame, nullptr, Int.zero) :: nil) + (Genv.init_mem p) (Genv.init_mem p). +Proof. + intros. + generalize (Genv.initmem_nullptr p). intros [A B]. + constructor. + (* cs_frame *) + intros. elim H; intro. + injection H0; intros; subst fr; subst sp; subst base. + constructor. + assumption. + unfold low_bound. rewrite B. reflexivity. + unfold low_bound, empty_frame. rewrite B. reflexivity. + unfold high_bound. rewrite B. reflexivity. + simpl. omega. + simpl. compute. intuition congruence. + reflexivity. + rewrite B. unfold empty_frame. simpl. hnf. auto. + rewrite B. hnf. auto. + elim H0. + (* cs_linked *) + apply callstack_linked_one. + (* cs_others *) + auto. + (* cs_nextblock *) + reflexivity. + (* cs_dom *) + constructor. exact A. constructor. +Qed. + +(** * The proof of simulation *) + +Section SIMULATION. + +Variable p: program. +Hypothesis wt_p: wt_program p. +Let ge := Genv.globalenv p. + +(** The proof of simulation relies on diagrams of the following form: +<< + sp, parent, c, rs, fr, mm ----------- sp, c, rs, ms + | | + | | + v v + sp, parent, c', rs', fr', mm' -------- sp, c', rs', ms' +>> + The left vertical arrow is a transition in the abstract semantics. + The right vertical arrow is a transition in the concrete semantics. + The precondition (top horizontal line) is the appropriate + [callstack_invariant] property between the initial memory states + [mm] and [ms] and any call stack with [fr] as top frame and + [parent] as second frame. In addition, well-typedness conditions + over the code [c], the register [rs] and the frame [fr] are demanded. + The postcondition (bottom horizontal line) is [callstack_invariant] + between the final memory states [mm'] and [ms'] and the final + call stack. +*) + +Definition exec_instr_prop + (f: function) (sp: val) (parent: frame) + (c: code) (rs: regset) (fr: frame) (mm: mem) + (c': code) (rs': regset) (fr': frame) (mm': mem) : Prop := + forall ms stk base pstk pbase cs + (WTF: wt_function f) + (INCL: incl c f.(fn_code)) + (WTRS: wt_regset rs) + (WTFR: wt_frame fr) + (WTPA: wt_frame parent) + (CSI: callstack_invariant ((fr, stk, base) :: (parent, pstk, pbase) :: cs) mm ms) + (SP: sp = Vptr stk base), + exists ms', + exec_instr ge f sp c rs ms c' rs' ms' /\ + callstack_invariant ((fr', stk, base) :: (parent, pstk, pbase) :: cs) mm' ms'. + +Definition exec_instrs_prop + (f: function) (sp: val) (parent: frame) + (c: code) (rs: regset) (fr: frame) (mm: mem) + (c': code) (rs': regset) (fr': frame) (mm': mem) : Prop := + forall ms stk base pstk pbase cs + (WTF: wt_function f) + (INCL: incl c f.(fn_code)) + (WTRS: wt_regset rs) + (WTFR: wt_frame fr) + (WTPA: wt_frame parent) + (CSI: callstack_invariant ((fr, stk, base) :: (parent, pstk, pbase) :: cs) mm ms) + (SP: sp = Vptr stk base), + exists ms', + exec_instrs ge f sp c rs ms c' rs' ms' /\ + callstack_invariant ((fr', stk, base) :: (parent, pstk, pbase) :: cs) mm' ms'. + +Definition exec_function_body_prop + (f: function) (parent: frame) (link ra: val) + (rs: regset) (mm: mem) + (rs': regset) (mm': mem) : Prop := + forall ms pstk pbase cs + (WTF: wt_function f) + (WTRS: wt_regset rs) + (WTPA: wt_frame parent) + (WTRA: Val.has_type ra Tint) + (LINK: link = Vptr pstk pbase) + (CSI: callstack_invariant ((parent, pstk, pbase) :: cs) mm ms), + exists ms', + exec_function_body ge f (Vptr pstk pbase) ra rs ms rs' ms' /\ + callstack_invariant ((parent, pstk, pbase) :: cs) mm' ms'. + +Definition exec_function_prop + (f: function) (parent: frame) + (rs: regset) (mm: mem) + (rs': regset) (mm': mem) : Prop := + forall ms pstk pbase cs + (WTF: wt_function f) + (WTRS: wt_regset rs) + (WTPA: wt_frame parent) + (CSI: callstack_invariant ((parent, pstk, pbase) :: cs) mm ms), + exists ms', + exec_function ge f (Vptr pstk pbase) rs ms rs' ms' /\ + callstack_invariant ((parent, pstk, pbase) :: cs) mm' ms'. + +Lemma exec_function_equiv: + forall f parent rs mm rs' mm', + Machabstr.exec_function ge f parent rs mm rs' mm' -> + exec_function_prop f parent rs mm rs' mm'. +Proof. + apply (Machabstr.exec_function_ind4 ge + exec_instr_prop + exec_instrs_prop + exec_function_body_prop + exec_function_prop); + intros; red; intros. + + (* Mlabel *) + exists ms. split. constructor. auto. + (* Mgetstack *) + exists ms. split. + constructor. rewrite SP. eapply callstack_get_slot; eauto. + apply wt_get_slot with fr (Int.signed ofs); auto. + auto. + (* Msetstack *) + generalize (wt_function_instrs f WTF _ (INCL _ (in_eq _ _))). + intro WTI. inversion WTI. + assert (4 <= Int.signed ofs). omega. + generalize (callstack_set_slot _ _ _ _ _ _ _ _ _ _ CSI H H5). + intros [ms' [STO CSI']]. + exists ms'. split. constructor. rewrite SP. auto. auto. + (* Mgetparam *) + exists ms. split. + assert (WTV: Val.has_type v ty). eapply wt_get_slot; eauto. + generalize (callstack_get_parent _ _ _ _ _ _ _ _ _ _ _ _ + CSI H WTV). + intros [L1 L2]. + eapply exec_Mgetparam. rewrite SP; eexact L1. eexact L2. + auto. + (* Mop *) + exists ms. split. constructor. auto. auto. + (* Mload *) + exists ms. split. econstructor. eauto. eapply callstack_load; eauto. + auto. + (* Mstore *) + generalize (callstack_store _ _ _ _ _ _ _ CSI H0). + intros [ms' [STO CSI']]. + exists ms'. split. econstructor. eauto. auto. + auto. + (* Mcall *) + red in H1. + assert (WTF': wt_function f'). + destruct ros; simpl in H. + apply (Genv.find_funct_prop wt_function wt_p H). + destruct (Genv.find_symbol ge i); try discriminate. + apply (Genv.find_funct_ptr_prop wt_function wt_p H). + generalize (H1 _ _ _ _ WTF' WTRS WTFR CSI). + intros [ms' [EXF CSI']]. + exists ms'. split. apply exec_Mcall with f'; auto. + rewrite SP. auto. + auto. + (* Mgoto *) + exists ms. split. constructor; auto. auto. + (* Mcond *) + exists ms. split. apply exec_Mcond_true; auto. auto. + exists ms. split. apply exec_Mcond_false; auto. auto. + + (* refl *) + exists ms. split. apply exec_refl. auto. + (* one *) + generalize (H0 _ _ _ _ _ _ WTF INCL WTRS WTFR WTPA CSI SP). + intros [ms' [EX CSI']]. + exists ms'. split. apply exec_one; auto. auto. + (* trans *) + generalize (subject_reduction_instrs p wt_p + _ _ _ _ _ _ _ _ _ _ _ H WTF INCL WTRS WTFR WTPA). + intros [INCL2 [WTRS2 WTFR2]]. + generalize (H0 _ _ _ _ _ _ WTF INCL WTRS WTFR WTPA CSI SP). + intros [ms1 [EX1 CSI1]]. + generalize (H2 _ _ _ _ _ _ WTF INCL2 WTRS2 WTFR2 WTPA CSI1 SP). + intros [ms2 [EX2 CSI2]]. + exists ms2. split. eapply exec_trans; eauto. auto. + + (* function body *) + caseEq (alloc ms (- f.(fn_framesize)) + (align_16_top (- f.(fn_framesize)) f.(fn_stacksize))). + intros ms1 stk1 ALL. + subst link. + assert (FS: f.(fn_framesize) >= 0). + generalize (wt_function_framesize f WTF). omega. + generalize (callstack_function_entry _ _ _ _ _ _ _ _ _ _ _ _ + CSI H ALL FS + (wt_function_no_overflow f WTF) H0). + intros [ms2 [STORELINK [CSI2 EQ]]]. + subst stk1. + assert (ZERO: Int.signed (Int.repr 0) = 0). reflexivity. + assert (FOUR: Int.signed (Int.repr 4) = 4). reflexivity. + assert (BND: 4 <= Int.signed (Int.repr 4)). + rewrite FOUR; omega. + rewrite <- FOUR in H1. + generalize (callstack_set_slot _ _ _ _ _ _ _ _ _ _ + CSI2 H1 BND). + intros [ms3 [STORERA CSI3]]. + assert (WTFR2: wt_frame fr2). + eapply wt_set_slot; eauto. eapply wt_set_slot; eauto. + red. intros. simpl. auto. + exact I. + red in H3. + generalize (H3 _ _ _ _ _ _ WTF (incl_refl _) WTRS WTFR2 WTPA + CSI3 (refl_equal _)). + intros [ms4 [EXEC CSI4]]. + generalize (exec_instrs_link_invariant _ _ _ _ _ _ _ _ _ _ _ _ + H2 WTF (incl_refl _)). + intros [INCL LINKINV]. + exists (free ms4 stk). split. + eapply exec_funct_body; eauto. + eapply callstack_get_slot. eexact CSI4. + apply LINKINV. rewrite ZERO. omega. + eapply slot_gso; eauto. rewrite ZERO. eapply slot_gss; eauto. + exact I. + eapply callstack_get_slot. eexact CSI4. + apply LINKINV. rewrite FOUR. omega. eapply slot_gss; eauto. auto. + eapply callstack_function_return; eauto. + + (* function *) + generalize (H0 (Vptr pstk pbase) Vzero I I + ms pstk pbase cs WTF WTRS WTPA I (refl_equal _) CSI). + intros [ms' [EXEC CSI']]. + exists ms'. split. constructor. intros. + generalize (H0 (Vptr pstk pbase) ra I H1 + ms pstk pbase cs WTF WTRS WTPA H1 (refl_equal _) CSI). + intros [ms1 [EXEC1 CSI1]]. + rewrite (callstack_exten _ _ _ _ CSI' CSI1). auto. + auto. +Qed. + +End SIMULATION. + +Theorem exec_program_equiv: + forall p r, + wt_program p -> + Machabstr.exec_program p r -> + Mach.exec_program p r. +Proof. + intros p r WTP [fptr [f [rs [mm [FINDPTR [FINDF [SIG [EXEC RES]]]]]]]]. + assert (WTF: wt_function f). + apply (Genv.find_funct_ptr_prop wt_function WTP FINDF). + assert (WTRS: wt_regset (Regmap.init Vundef)). + red; intros. rewrite Regmap.gi; simpl; auto. + assert (WTFR: wt_frame empty_frame). + red; intros. simpl. auto. + generalize (exec_function_equiv p WTP + f empty_frame + (Regmap.init Vundef) (Genv.init_mem p) rs mm + EXEC (Genv.init_mem p) nullptr Int.zero nil + WTF WTRS WTFR (callstack_init p)). + intros [ms' [EXEC' CSI]]. + red. exists fptr; exists f; exists rs; exists ms'. + intuition. rewrite SIG in RES. exact RES. +Qed. diff --git a/backend/Machtyping.v b/backend/Machtyping.v new file mode 100644 index 00000000..987269ba --- /dev/null +++ b/backend/Machtyping.v @@ -0,0 +1,367 @@ +(** Type system for the Mach intermediate language. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Mem. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Conventions. +Require Import Mach. + +(** * Typing rules *) + +Inductive wt_instr : instruction -> Prop := + | wt_Mlabel: + forall lbl, + wt_instr (Mlabel lbl) + | wt_Mgetstack: + forall ofs ty r, + mreg_type r = ty -> + wt_instr (Mgetstack ofs ty r) + | wt_Msetstack: + forall ofs ty r, + mreg_type r = ty -> 24 <= Int.signed ofs -> + wt_instr (Msetstack r ofs ty) + | wt_Mgetparam: + forall ofs ty r, + mreg_type r = ty -> + wt_instr (Mgetparam ofs ty r) + | wt_Mopmove: + forall r1 r, + mreg_type r1 = mreg_type r -> + wt_instr (Mop Omove (r1 :: nil) r) + | wt_Mopundef: + forall r, + wt_instr (Mop Oundef nil r) + | wt_Mop: + forall op args res, + op <> Omove -> op <> Oundef -> + (List.map mreg_type args, mreg_type res) = type_of_operation op -> + wt_instr (Mop op args res) + | wt_Mload: + forall chunk addr args dst, + List.map mreg_type args = type_of_addressing addr -> + mreg_type dst = type_of_chunk chunk -> + wt_instr (Mload chunk addr args dst) + | wt_Mstore: + forall chunk addr args src, + List.map mreg_type args = type_of_addressing addr -> + mreg_type src = type_of_chunk chunk -> + wt_instr (Mstore chunk addr args src) + | wt_Mcall: + forall sig ros, + match ros with inl r => mreg_type r = Tint | inr s => True end -> + wt_instr (Mcall sig ros) + | wt_Mgoto: + forall lbl, + wt_instr (Mgoto lbl) + | wt_Mcond: + forall cond args lbl, + List.map mreg_type args = type_of_condition cond -> + wt_instr (Mcond cond args lbl) + | wt_Mreturn: + wt_instr Mreturn. + +Record wt_function (f: function) : Prop := mk_wt_function { + wt_function_instrs: + forall instr, In instr f.(fn_code) -> wt_instr instr; + wt_function_stacksize: + f.(fn_stacksize) >= 0; + wt_function_framesize: + f.(fn_framesize) >= 24; + wt_function_no_overflow: + f.(fn_framesize) <= -Int.min_signed +}. + +Definition wt_program (p: program) : Prop := + forall i f, In (i, f) (prog_funct p) -> wt_function f. + +(** * Type soundness *) + +Require Import Machabstr. + +(** We show a weak type soundness result for the alternate semantics + of Mach: for a well-typed Mach program, if a transition is taken + from a state where registers hold values of their static types, + registers in the final state hold values of their static types + as well. This is a progress theorem for our type system. + It is used in the proof of implcation from the abstract Mach + semantics to the concrete Mach semantics (file [Machabstr2mach]). +*) + +Definition wt_regset (rs: regset) : Prop := + forall r, Val.has_type (rs r) (mreg_type r). + +Definition wt_content (c: content) : Prop := + match c with + | Datum32 v => Val.has_type v Tint + | Datum64 v => Val.has_type v Tfloat + | _ => True + end. + +Definition wt_frame (fr: frame) : Prop := + forall ofs, wt_content (fr.(contents) ofs). + +Lemma wt_setreg: + forall (rs: regset) (r: mreg) (v: val), + Val.has_type v (mreg_type r) -> + wt_regset rs -> wt_regset (rs#r <- v). +Proof. + intros; red; intros. unfold Regmap.set. + case (RegEq.eq r0 r); intro. + subst r0; assumption. + apply H0. +Qed. + +Lemma wt_get_slot: + forall fr ty ofs v, + get_slot fr ty ofs v -> + wt_frame fr -> + Val.has_type v ty. +Proof. + induction 1; intro. subst v. + set (pos := low fr + ofs). + case ty; simpl. + unfold getN. case (check_cont 3 (pos + 1) (contents fr)). + generalize (H2 pos). unfold wt_content. + destruct (contents fr pos); simpl; tauto. + simpl; auto. + unfold getN. case (check_cont 7 (pos + 1) (contents fr)). + generalize (H2 pos). unfold wt_content. + destruct (contents fr pos); simpl; tauto. + simpl; auto. +Qed. + +Lemma wt_set_slot: + forall fr ty ofs v fr', + set_slot fr ty ofs v fr' -> + wt_frame fr -> + Val.has_type v ty -> + wt_frame fr'. +Proof. + intros. induction H. + set (i := low fr + ofs). + red; intro j; simpl. + assert (forall n ofs c, + let c' := set_cont n ofs c in + forall k, c' k = c k \/ c' k = Cont). + induction n; simpl; intros. + left; auto. + unfold update. case (zeq k ofs0); intro. + right; auto. + apply IHn. + destruct ty; simpl; unfold store_contents; unfold setN; + unfold update; case (zeq j i); intro. + red. assumption. + elim (H 3%nat (i + 1) (contents fr) j); intro; rewrite H2. + apply H0. red; auto. + red. assumption. + elim (H 7%nat (i + 1) (contents fr) j); intro; rewrite H2. + apply H0. red; auto. +Qed. + +Lemma wt_init_frame: + forall f, + wt_frame (init_frame f). +Proof. + intros. unfold init_frame. + red; intros. simpl. exact I. +Qed. + +Lemma incl_find_label: + forall lbl c c', find_label lbl c = Some c' -> incl c' c. +Proof. + induction c; simpl. + intros; discriminate. + case (is_label lbl a); intros. + injection H; intro; subst c'; apply incl_tl; apply incl_refl. + apply incl_tl; auto. +Qed. + +Section SUBJECT_REDUCTION. + +Variable p: program. +Hypothesis wt_p: wt_program p. +Let ge := Genv.globalenv p. + +Definition exec_instr_prop + (f: function) (sp: val) (parent: frame) + (c1: code) (rs1: regset) (fr1: frame) (m1: mem) + (c2: code) (rs2: regset) (fr2: frame) (m2: mem) := + forall (WTF: wt_function f) + (INCL: incl c1 f.(fn_code)) + (WTRS: wt_regset rs1) + (WTFR: wt_frame fr1) + (WTPA: wt_frame parent), + incl c2 f.(fn_code) /\ wt_regset rs2 /\ wt_frame fr2. +Definition exec_function_body_prop + (f: function) (parent: frame) (link ra: val) + (rs1: regset) (m1: mem) (rs2: regset) (m2: mem) := + forall (WTF: wt_function f) + (WTRS: wt_regset rs1) + (WTPA: wt_frame parent) + (WTLINK: Val.has_type link Tint) + (WTRA: Val.has_type ra Tint), + wt_regset rs2. +Definition exec_function_prop + (f: function) (parent: frame) + (rs1: regset) (m1: mem) (rs2: regset) (m2: mem) := + forall (WTF: wt_function f) + (WTRS: wt_regset rs1) + (WTPA: wt_frame parent), + wt_regset rs2. + +Lemma subject_reduction: + (forall f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2, + exec_instr ge f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2 -> + exec_instr_prop f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2) +/\ (forall f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2, + exec_instrs ge f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2 -> + exec_instr_prop f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2) +/\ (forall f parent link ra rs1 m1 rs2 m2, + exec_function_body ge f parent link ra rs1 m1 rs2 m2 -> + exec_function_body_prop f parent link ra rs1 m1 rs2 m2) +/\ (forall f parent rs1 m1 rs2 m2, + exec_function ge f parent rs1 m1 rs2 m2 -> + exec_function_prop f parent rs1 m1 rs2 m2). +Proof. + apply exec_mutual_induction; red; intros; + try (generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))); intro; + intuition eauto with coqlib). + + apply wt_setreg; auto. + inversion H0. rewrite H2. apply wt_get_slot with fr (Int.signed ofs); auto. + + inversion H0. eapply wt_set_slot; eauto. + rewrite <- H3. apply WTRS. + + inversion H0. apply wt_setreg; auto. + rewrite H2. apply wt_get_slot with parent (Int.signed ofs); auto. + + apply wt_setreg; auto. inversion H0. + simpl. subst args; subst op. simpl in H. + rewrite <- H2. replace v with (rs r1). apply WTRS. congruence. + subst args; subst op. simpl in H. + replace v with Vundef. simpl; auto. congruence. + replace (mreg_type res) with (snd (type_of_operation op)). + apply type_of_operation_sound with function ge rs##args sp; auto. + rewrite <- H6; reflexivity. + + apply wt_setreg; auto. inversion H1. rewrite H7. + eapply type_of_chunk_correct; eauto. + + apply H1; auto. + destruct ros; simpl in H. + apply (Genv.find_funct_prop wt_function wt_p H). + destruct (Genv.find_symbol ge i). + apply (Genv.find_funct_ptr_prop wt_function wt_p H). + discriminate. + + apply incl_find_label with lbl; auto. + apply incl_find_label with lbl; auto. + + tauto. + apply H0; auto. + generalize (H0 WTF INCL WTRS WTFR WTPA). intros [A [B C]]. + apply H2; auto. + + assert (WTFR2: wt_frame fr2). + eapply wt_set_slot; eauto. + eapply wt_set_slot; eauto. + apply wt_init_frame. + generalize (H3 WTF (incl_refl _) WTRS WTFR2 WTPA). + tauto. + + apply (H0 Vzero Vzero). exact I. exact I. auto. auto. auto. + exact I. exact I. +Qed. + +Lemma subject_reduction_instr: + forall f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2, + exec_instr ge f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2 -> + exec_instr_prop f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2. +Proof (proj1 subject_reduction). + +Lemma subject_reduction_instrs: + forall f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2, + exec_instrs ge f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2 -> + exec_instr_prop f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2. +Proof (proj1 (proj2 subject_reduction)). + +Lemma subject_reduction_function: + forall f parent rs1 m1 rs2 m2, + exec_function ge f parent rs1 m1 rs2 m2 -> + exec_function_prop f parent rs1 m1 rs2 m2. +Proof (proj2 (proj2 (proj2 subject_reduction))). + +End SUBJECT_REDUCTION. + +(** * Preservation of the reserved frame slots during execution *) + +(** We now show another result useful for the proof of implication + between the two Mach semantics: well-typed functions do not + change the values of the back link and return address fields + of the frame slot (at offsets 0 and 4) during their execution. + Actually, we show that the whole reserved part of the frame + (between offsets 0 and 24) does not change. *) + +Definition link_invariant (fr1 fr2: frame) : Prop := + forall pos v, + 0 <= pos < 20 -> + get_slot fr1 Tint pos v -> get_slot fr2 Tint pos v. + +Remark link_invariant_refl: + forall fr, link_invariant fr fr. +Proof. + intros; red; auto. +Qed. + +Lemma set_slot_link_invariant: + forall fr ty ofs v fr', + set_slot fr ty ofs v fr' -> + 24 <= ofs -> + link_invariant fr fr'. +Proof. + induction 1. intros; red; intros. + inversion H1. constructor. auto. exact H3. + simpl contents. simpl low. symmetry. rewrite H4. + apply load_store_contents_other. simpl. simpl in H3. + omega. +Qed. + +Lemma exec_instr_link_invariant: + forall ge f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2, + exec_instr ge f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2 -> + wt_function f -> + incl c1 f.(fn_code) -> + incl c2 f.(fn_code) /\ link_invariant fr1 fr2. +Proof. + induction 1; intros; + (split; [eauto with coqlib | try (apply link_invariant_refl)]). + eapply set_slot_link_invariant; eauto. + generalize (wt_function_instrs _ H0 _ (H1 _ (in_eq _ _))); intro. + inversion H2. auto. + eapply incl_find_label; eauto. + eapply incl_find_label; eauto. +Qed. + +Lemma exec_instrs_link_invariant: + forall ge f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2, + exec_instrs ge f sp parent c1 rs1 fr1 m1 c2 rs2 fr2 m2 -> + wt_function f -> + incl c1 f.(fn_code) -> + incl c2 f.(fn_code) /\ link_invariant fr1 fr2. +Proof. + induction 1; intros. + split. auto. apply link_invariant_refl. + eapply exec_instr_link_invariant; eauto. + generalize (IHexec_instrs1 H1 H2); intros [A B]. + generalize (IHexec_instrs2 H1 A); intros [C D]. + split. auto. red; auto. +Qed. + diff --git a/backend/Main.v b/backend/Main.v new file mode 100644 index 00000000..6647e269 --- /dev/null +++ b/backend/Main.v @@ -0,0 +1,307 @@ +(** The compiler back-end and its proof of semantic preservation *) + +(** Libraries. *) +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Values. +(** Languages (syntax and semantics). *) +Require Csharpminor. +Require Cminor. +Require RTL. +Require LTL. +Require Linear. +Require Mach. +Require PPC. +(** Translation passes. *) +Require Cminorgen. +Require RTLgen. +Require Constprop. +Require CSE. +Require Allocation. +Require Tunneling. +Require Linearize. +Require Stacking. +Require PPCgen. +(** Type systems. *) +Require RTLtyping. +Require LTLtyping. +Require Lineartyping. +Require Machtyping. +(** Proofs of semantic preservation and typing preservation. *) +Require Cminorgenproof. +Require RTLgenproof. +Require Constpropproof. +Require CSEproof. +Require Allocproof. +Require Alloctyping. +Require Tunnelingproof. +Require Tunnelingtyping. +Require Linearizeproof. +Require Linearizetyping. +Require Stackingproof. +Require Stackingtyping. +Require Machabstr2mach. +Require PPCgenproof. + +(** * Composing the translation passes *) + +(** We first define useful monadic composition operators, + along with funny (but convenient) notations. *) + +Definition apply_total (A B: Set) (x: option A) (f: A -> B) : option B := + match x with None => None | Some x1 => Some (f x1) end. + +Definition apply_partial (A B: Set) + (x: option A) (f: A -> option B) : option B := + match x with None => None | Some x1 => f x1 end. + +Notation "a @@@ b" := + (apply_partial _ _ a b) (at level 50, left associativity). +Notation "a @@ b" := + (apply_total _ _ a b) (at level 50, left associativity). + +(** We define two translation functions for whole programs: one starting with + a Csharpminor program, the other with a Cminor program. Both + translations produce PPC programs ready for pretty-printing and + assembling. Some front-ends will prefer to generate Csharpminor + (e.g. a C front-end) while some others (e.g. an ML compiler) might + find it more convenient to generate Cminor directly, bypassing + Csharpminor. + + There are two ways to compose the compiler passes. The first translates + every function from the Cminor program from Cminor to RTL, then to LTL, etc, + all the way to PPC, and iterates this transformation for every function. + The second translates the whole Cminor program to a RTL program, then to + an LTL program, etc. We follow the first approach because it has lower + memory requirements. + + The translation of a Cminor function to a PPC function is as follows. *) + +Definition transf_cminor_function (f: Cminor.function) : option PPC.code := + Some f + @@@ RTLgen.transl_function + @@ Constprop.transf_function + @@ CSE.transf_function + @@@ Allocation.transf_function + @@ Tunneling.tunnel_function + @@ Linearize.transf_function + @@@ Stacking.transf_function + @@@ PPCgen.transf_function. + +(** And here is the translation for Csharpminor functions. *) + +Definition transf_csharpminor_function (f: Csharpminor.function) : option PPC.code := + Some f + @@@ Cminorgen.transl_function + @@@ transf_cminor_function. + +(** The corresponding translations for whole program follow. *) + +Definition transf_cminor_program (p: Cminor.program) : option PPC.program := + transform_partial_program transf_cminor_function p. + +Definition transf_csharpminor_program (p: Csharpminor.program) : option PPC.program := + transform_partial_program transf_csharpminor_function p. + +(** * Equivalence with whole program transformations *) + +(** To prove semantic preservation for the whole compiler, it is easier to reason + over the second way to compose the compiler pass: the one that translate + whole programs through each compiler pass. We now define this second translation + and prove that it produces the same PPC programs as the first translation. *) + +Definition transf_cminor_program2 (p: Cminor.program) : option PPC.program := + Some p + @@@ RTLgen.transl_program + @@ Constprop.transf_program + @@ CSE.transf_program + @@@ Allocation.transf_program + @@ Tunneling.tunnel_program + @@ Linearize.transf_program + @@@ Stacking.transf_program + @@@ PPCgen.transf_program. + +Definition transf_csharpminor_program2 (p: Csharpminor.program) : option PPC.program := + Some p + @@@ Cminorgen.transl_program + @@@ transf_cminor_program2. + +Lemma transf_partial_program_compose: + forall (A B C: Set) + (f1: A -> option B) (f2: B -> option C) + (p: list (ident * A)), + transf_partial_program f1 p @@@ transf_partial_program f2 = + transf_partial_program (fun f => f1 f @@@ f2) p. +Proof. + induction p. simpl. auto. + simpl. destruct a. destruct (f1 a). + simpl. simpl in IHp. destruct (transf_partial_program f1 p). + simpl. simpl in IHp. destruct (f2 b). + destruct (transf_partial_program f2 l). + rewrite <- IHp. auto. + rewrite <- IHp. auto. + auto. + simpl. rewrite <- IHp. simpl. destruct (f2 b); auto. + simpl. auto. +Qed. + +Lemma transform_partial_program_compose: + forall (A B C: Set) + (f1: A -> option B) (f2: B -> option C) + (p: program A), + transform_partial_program f1 p @@@ + (fun p' => transform_partial_program f2 p') = + transform_partial_program (fun f => f1 f @@@ f2) p. +Proof. + unfold transform_partial_program; intros. + rewrite <- transf_partial_program_compose. simpl. + destruct (transf_partial_program f1 (prog_funct p)); simpl. + auto. auto. +Qed. + +Lemma transf_program_partial_total: + forall (A B: Set) (f: A -> B) (l: list (ident * A)), + Some (AST.transf_program f l) = + AST.transf_partial_program (fun x => Some (f x)) l. +Proof. + induction l. simpl. auto. + simpl. destruct a. rewrite <- IHl. auto. +Qed. + +Lemma transform_program_partial_total: + forall (A B: Set) (f: A -> B) (p: program A), + Some (transform_program f p) = + transform_partial_program (fun x => Some (f x)) p. +Proof. + intros. unfold transform_program, transform_partial_program. + rewrite <- transf_program_partial_total. auto. +Qed. + +Lemma apply_total_transf_program: + forall (A B: Set) (f: A -> B) (x: option (program A)), + x @@ (fun p => transform_program f p) = + x @@@ (fun p => transform_partial_program (fun x => Some (f x)) p). +Proof. + intros. unfold apply_total, apply_partial. + destruct x. apply transform_program_partial_total. auto. +Qed. + +Lemma transf_cminor_program_equiv: + forall p, transf_cminor_program2 p = transf_cminor_program p. +Proof. + intro. unfold transf_cminor_program, transf_cminor_program2, transf_cminor_function. + simpl. + unfold RTLgen.transl_program, + Constprop.transf_program, RTL.program. + rewrite apply_total_transf_program. + rewrite transform_partial_program_compose. + unfold CSE.transf_program, RTL.program. + rewrite apply_total_transf_program. + rewrite transform_partial_program_compose. + unfold Allocation.transf_program, + LTL.program, RTL.program. + rewrite transform_partial_program_compose. + unfold Tunneling.tunnel_program, LTL.program. + rewrite apply_total_transf_program. + rewrite transform_partial_program_compose. + unfold Linearize.transf_program, LTL.program, Linear.program. + rewrite apply_total_transf_program. + rewrite transform_partial_program_compose. + unfold Stacking.transf_program, Linear.program, Mach.program. + rewrite transform_partial_program_compose. + unfold PPCgen.transf_program, Mach.program, PPC.program. + rewrite transform_partial_program_compose. + reflexivity. +Qed. + +Lemma transf_csharpminor_program_equiv: + forall p, transf_csharpminor_program2 p = transf_csharpminor_program p. +Proof. + intros. + unfold transf_csharpminor_program2, transf_csharpminor_program, transf_csharpminor_function. + simpl. + replace transf_cminor_program2 with transf_cminor_program. + unfold transf_cminor_program, Cminorgen.transl_program, Cminor.program, PPC.program. + apply transform_partial_program_compose. + symmetry. apply extensionality. exact transf_cminor_program_equiv. +Qed. + +(** * Semantic preservation *) + +(** We prove that the [transf_program2] translations preserve semantics. The proof + composes the semantic preservation results for each pass. *) + +Lemma transf_cminor_program2_correct: + forall p tp n, + transf_cminor_program2 p = Some tp -> + Cminor.exec_program p (Vint n) -> + PPC.exec_program tp (Vint n). +Proof. + intros until n. + unfold transf_cminor_program2. + simpl. caseEq (RTLgen.transl_program p). intros p1 EQ1. + simpl. set (p2 := CSE.transf_program (Constprop.transf_program p1)). + caseEq (Allocation.transf_program p2). intros p3 EQ3. + simpl. set (p4 := Tunneling.tunnel_program p3). + set (p5 := Linearize.transf_program p4). + caseEq (Stacking.transf_program p5). intros p6 EQ6. + simpl. intros EQTP EXEC. + assert (WT3 : LTLtyping.wt_program p3). + apply Alloctyping.program_typing_preserved with p2. auto. + assert (WT4 : LTLtyping.wt_program p4). + unfold p4. apply Tunnelingtyping.program_typing_preserved. auto. + assert (WT5 : Lineartyping.wt_program p5). + unfold p5. apply Linearizetyping.program_typing_preserved. auto. + assert (WT6: Machtyping.wt_program p6). + apply Stackingtyping.program_typing_preserved with p5. auto. auto. + apply PPCgenproof.transf_program_correct with p6; auto. + apply Machabstr2mach.exec_program_equiv; auto. + apply Stackingproof.transl_program_correct with p5; auto. + unfold p5; apply Linearizeproof.transf_program_correct. + unfold p4; apply Tunnelingproof.transf_program_correct. + apply Allocproof.transl_program_correct with p2; auto. + unfold p2; apply CSEproof.transf_program_correct; + apply Constpropproof.transf_program_correct. + apply RTLgenproof.transl_program_correct with p; auto. + simpl; intros; discriminate. + simpl; intros; discriminate. + simpl; intros; discriminate. +Qed. + +Lemma transf_csharpminor_program2_correct: + forall p tp n, + transf_csharpminor_program2 p = Some tp -> + Csharpminor.exec_program p (Vint n) -> + PPC.exec_program tp (Vint n). +Proof. + intros until n; unfold transf_csharpminor_program2; simpl. + caseEq (Cminorgen.transl_program p). + simpl; intros p1 EQ1 EQ2 EXEC. + apply transf_cminor_program2_correct with p1. auto. + apply Cminorgenproof.transl_program_correct with p. auto. + assumption. + simpl; intros; discriminate. +Qed. + +(** It follows that the whole compiler is semantic-preserving. *) + +Theorem transf_cminor_program_correct: + forall p tp n, + transf_cminor_program p = Some tp -> + Cminor.exec_program p (Vint n) -> + PPC.exec_program tp (Vint n). +Proof. + intros. apply transf_cminor_program2_correct with p. + rewrite transf_cminor_program_equiv. assumption. assumption. +Qed. + +Theorem transf_csharpminor_program_correct: + forall p tp n, + transf_csharpminor_program p = Some tp -> + Csharpminor.exec_program p (Vint n) -> + PPC.exec_program tp (Vint n). +Proof. + intros. apply transf_csharpminor_program2_correct with p. + rewrite transf_csharpminor_program_equiv. assumption. assumption. +Qed. diff --git a/backend/Mem.v b/backend/Mem.v new file mode 100644 index 00000000..26d4c499 --- /dev/null +++ b/backend/Mem.v @@ -0,0 +1,2253 @@ +(** This file develops the memory model that is used in the dynamic + semantics of all the languages of the compiler back-end. + It defines a type [mem] of memory states, the following 4 basic + operations over memory states, and their properties: +- [alloc]: allocate a fresh memory block; +- [free]: invalidate a memory block; +- [load]: read a memory chunk at a given address; +- [store]: store a memory chunk at a given address. +*) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. + +(** * Structure of memory states *) + +(** A memory state is organized in several disjoint blocks. Each block + has a low and a high bound that defines its size. Each block map + byte offsets to the contents of this byte. *) + +Definition update (A: Set) (x: Z) (v: A) (f: Z -> A) : Z -> A := + fun y => if zeq y x then v else f y. + +Implicit Arguments update [A]. + +Lemma update_s: + forall (A: Set) (x: Z) (v: A) (f: Z -> A), + update x v f x = v. +Proof. + intros; unfold update. apply zeq_true. +Qed. + +Lemma update_o: + forall (A: Set) (x: Z) (v: A) (f: Z -> A) (y: Z), + x <> y -> update x v f y = f y. +Proof. + intros; unfold update. apply zeq_false; auto. +Qed. + +(** The possible contents of a byte-sized memory cell. To give intuitions, + a 4-byte value [v] stored at offset [d] will be represented by + the content [Datum32 v] at offset [d] and [Cont] at offsets [d+1], + [d+2] and [d+3]. The [Cont] contents enable detecting future writes + that would overlap partially the 4-byte value. *) + +Inductive content : Set := + | Undef: content (**r undefined contents *) + | Datum8: val -> content (**r a value that fits in 1 byte *) + | Datum16: val -> content (**r first byte of a 2-byte value *) + | Datum32: val -> content (**r first byte of a 4-byte value *) + | Datum64: val -> content (**r first byte of a 8-byte value *) + | Cont: content. (**r continuation bytes for a multi-byte value *) + +Definition contentmap : Set := Z -> content. + +(** A memory block comprises the dimensions of the block (low and high bounds) + plus a mapping from byte offsets to contents. For technical reasons, + we also carry around a proof that the mapping is equal to [Undef] + outside the range of allowed byte offsets. *) + +Record block_contents : Set := mkblock { + low: Z; + high: Z; + contents: contentmap; + undef_outside: forall ofs, ofs < low \/ ofs >= high -> contents ofs = Undef +}. + +(** A memory state is a mapping from block addresses (represented by [Z] + integers) to blocks. We also maintain the address of the next + unallocated block, and a proof that this address is positive. *) + +Record mem : Set := mkmem { + blocks: Z -> block_contents; + nextblock: block; + nextblock_pos: nextblock > 0 +}. + +(** * Operations on memory stores *) + +(** Memory reads and writes are performed by quantities called memory chunks, + encoding the type, size and signedness of the chunk being addressed. + It is useful to extract only the size information as given by the + following [memory_size] type. *) + +Inductive memory_size : Set := + | Size8: memory_size + | Size16: memory_size + | Size32: memory_size + | Size64: memory_size. + +Definition size_mem (sz: memory_size) := + match sz with + | Size8 => 1 + | Size16 => 2 + | Size32 => 4 + | Size64 => 8 + end. + +Definition mem_chunk (chunk: memory_chunk) := + match chunk with + | Mint8signed => Size8 + | Mint8unsigned => Size8 + | Mint16signed => Size16 + | Mint16unsigned => Size16 + | Mint32 => Size32 + | Mfloat32 => Size32 + | Mfloat64 => Size64 + end. + +Definition size_chunk (chunk: memory_chunk) := size_mem (mem_chunk chunk). + +(** The initial store. *) + +Remark one_pos: 1 > 0. +Proof. omega. Qed. + +Remark undef_undef_outside: + forall lo hi ofs, ofs < lo \/ ofs >= hi -> (fun y => Undef) ofs = Undef. +Proof. + auto. +Qed. + +Definition empty_block (lo hi: Z) : block_contents := + mkblock lo hi (fun y => Undef) (undef_undef_outside lo hi). + +Definition empty: mem := + mkmem (fun x => empty_block 0 0) 1 one_pos. + +Definition nullptr: block := 0. + +(** Allocation of a fresh block with the given bounds. Return an updated + memory state and the address of the fresh block, which initially contains + undefined cells. Note that allocation never fails: we model an + infinite memory. *) + +Remark succ_nextblock_pos: + forall m, Zsucc m.(nextblock) > 0. +Proof. intro. generalize (nextblock_pos m). omega. Qed. + +Definition alloc (m: mem) (lo hi: Z) := + (mkmem (update m.(nextblock) + (empty_block lo hi) + m.(blocks)) + (Zsucc m.(nextblock)) + (succ_nextblock_pos m), + m.(nextblock)). + +(** Freeing a block. Return the updated memory state where the given + block address has been invalidated: future reads and writes to this + address will fail. Note that we make no attempt to return the block + to an allocation pool: the given block address will never be allocated + later. *) + +Definition free (m: mem) (b: block) := + mkmem (update b + (empty_block 0 0) + m.(blocks)) + m.(nextblock) + m.(nextblock_pos). + +(** Freeing of a list of blocks. *) + +Definition free_list (m:mem) (l:list block) := + List.fold_right (fun b m => free m b) m l. + +(** Return the low and high bounds for the given block address. + Those bounds are 0 for freed or not yet allocated address. *) + +Definition low_bound (m: mem) (b: block) := + low (m.(blocks) b). +Definition high_bound (m: mem) (b: block) := + high (m.(blocks) b). + +(** A block address is valid if it was previously allocated. It remains valid + even after being freed. *) + +Definition valid_block (m: mem) (b: block) := + b < m.(nextblock). + +(** Reading and writing [N] adjacent locations in a [contentmap]. + + We define two functions and prove some of their properties: + - [getN n ofs m] returns the datum at offset [ofs] in block contents [m] + after checking that the contents of offsets [ofs+1] to [ofs+n+1] + are [Cont]. + - [setN n ofs v m] updates the block contents [m], storing the content [v] + at offset [ofs] and the content [Cont] at offsets [ofs+1] to [ofs+n+1]. + *) + +Fixpoint check_cont (n: nat) (p: Z) (m: contentmap) {struct n} : bool := + match n with + | O => true + | S n1 => + match m p with + | Cont => check_cont n1 (p + 1) m + | _ => false + end + end. + +Definition getN (n: nat) (p: Z) (m: contentmap) : content := + if check_cont n (p + 1) m then m p else Undef. + +Fixpoint set_cont (n: nat) (p: Z) (m: contentmap) {struct n} : contentmap := + match n with + | O => m + | S n1 => update p Cont (set_cont n1 (p + 1) m) + end. + +Definition setN (n: nat) (p: Z) (v: content) (m: contentmap) : contentmap := + update p v (set_cont n (p + 1) m). + +Lemma check_cont_true: + forall n p m, + (forall q, p <= q < p + Z_of_nat n -> m q = Cont) -> + check_cont n p m = true. +Proof. + induction n; intros. + reflexivity. + simpl. rewrite H. apply IHn. + intros. apply H. rewrite inj_S. omega. + rewrite inj_S. omega. +Qed. + +Hint Resolve check_cont_true. + +Lemma check_cont_inv: + forall n p m, + check_cont n p m = true -> + (forall q, p <= q < p + Z_of_nat n -> m q = Cont). +Proof. + induction n; intros until m. + unfold Z_of_nat. intros. omegaContradiction. + unfold check_cont; fold check_cont. + caseEq (m p); intros; try discriminate. + assert (p = q \/ p + 1 <= q < (p + 1) + Z_of_nat n). + rewrite inj_S in H1. omega. + elim H2; intro. + subst q. auto. + apply IHn with (p + 1); auto. +Qed. + +Hint Resolve check_cont_inv. + +Lemma check_cont_false: + forall n p q m, + p <= q < p + Z_of_nat n -> + m q <> Cont -> + check_cont n p m = false. +Proof. + intros. caseEq (check_cont n p m); intro. + generalize (check_cont_inv _ _ _ H1 q H). intro. contradiction. + auto. +Qed. + +Hint Resolve check_cont_false. + +Lemma set_cont_inside: + forall n p m q, + p <= q < p + Z_of_nat n -> + (set_cont n p m) q = Cont. +Proof. + induction n; intros. + unfold Z_of_nat in H. omegaContradiction. + simpl. + assert (p = q \/ p + 1 <= q < (p + 1) + Z_of_nat n). + rewrite inj_S in H. omega. + elim H0; intro. + subst q. apply update_s. + rewrite update_o. apply IHn. auto. + red; intro; subst q. omega. +Qed. + +Hint Resolve set_cont_inside. + +Lemma set_cont_outside: + forall n p m q, + q < p \/ p + Z_of_nat n <= q -> + (set_cont n p m) q = m q. +Proof. + induction n; intros. + simpl. auto. + simpl. rewrite inj_S in H. + rewrite update_o. apply IHn. omega. omega. +Qed. + +Hint Resolve set_cont_outside. + +Lemma getN_setN_same: + forall n p v m, + getN n p (setN n p v m) = v. +Proof. + intros. unfold getN, setN. + rewrite check_cont_true. apply update_s. + intros. rewrite update_o. apply set_cont_inside. auto. + omega. +Qed. + +Hint Resolve getN_setN_same. + +Lemma getN_setN_other: + forall n1 n2 p1 p2 v m, + p1 + Z_of_nat n1 < p2 \/ p2 + Z_of_nat n2 < p1 -> + getN n2 p2 (setN n1 p1 v m) = getN n2 p2 m. +Proof. + intros. unfold getN, setN. + caseEq (check_cont n2 (p2 + 1) m); intro. + rewrite check_cont_true. rewrite update_o. + apply set_cont_outside. omega. omega. + intros. rewrite update_o. rewrite set_cont_outside. + eapply check_cont_inv. eauto. auto. + omega. omega. + caseEq (check_cont n2 (p2 + 1) (update p1 v (set_cont n1 (p1 + 1) m))); intros. + assert (check_cont n2 (p2 + 1) m = true). + apply check_cont_true. intros. + generalize (check_cont_inv _ _ _ H1 q H2). + rewrite update_o. rewrite set_cont_outside. auto. omega. omega. + rewrite H0 in H2; discriminate. + auto. +Qed. + +Hint Resolve getN_setN_other. + +Lemma getN_setN_overlap: + forall n1 n2 p1 p2 v m, + p1 <> p2 -> + p1 + Z_of_nat n1 >= p2 -> p2 + Z_of_nat n2 >= p1 -> + v <> Cont -> + getN n2 p2 (setN n1 p1 v m) = Cont \/ + getN n2 p2 (setN n1 p1 v m) = Undef. +Proof. + intros. unfold getN. + caseEq (check_cont n2 (p2 + 1) (setN n1 p1 v m)); intro. + case (zlt p2 p1); intro. + assert (p2 + 1 <= p1 < p2 + 1 + Z_of_nat n2). omega. + generalize (check_cont_inv _ _ _ H3 p1 H4). + unfold setN. rewrite update_s. intro. contradiction. + left. unfold setN. rewrite update_o. + apply set_cont_inside. omega. auto. + right; auto. +Qed. + +Hint Resolve getN_setN_overlap. + +Lemma getN_setN_mismatch: + forall n1 n2 p v m, + getN n2 p (setN n1 p v m) = v \/ getN n2 p (setN n1 p v m) = Undef. +Proof. + intros. unfold getN. + caseEq (check_cont n2 (p + 1) (setN n1 p v m)); intro. + left. unfold setN. apply update_s. + right. auto. +Qed. + +Hint Resolve getN_setN_mismatch. + +Lemma getN_init: + forall n p, + getN n p (fun y => Undef) = Undef. +Proof. + intros. unfold getN. + case (check_cont n (p + 1) (fun y => Undef)); auto. +Qed. + +Hint Resolve getN_init. + +(** The following function checks whether accessing the given memory chunk + at the given offset in the given block respects the bounds of the block. *) + +Definition in_bounds (chunk: memory_chunk) (ofs: Z) (c: block_contents) : + {c.(low) <= ofs /\ ofs + size_chunk chunk <= c.(high)} + + {c.(low) > ofs \/ ofs + size_chunk chunk > c.(high)} := + match zle c.(low) ofs, zle (ofs + size_chunk chunk) c.(high) with + | left P1, left P2 => left _ (conj P1 P2) + | left P1, right P2 => right _ (or_intror _ P2) + | right P1, _ => right _ (or_introl _ P1) + end. + +Lemma in_bounds_holds: + forall (chunk: memory_chunk) (ofs: Z) (c: block_contents) + (A: Set) (a b: A), + c.(low) <= ofs -> ofs + size_chunk chunk <= c.(high) -> + (if in_bounds chunk ofs c then a else b) = a. +Proof. + intros. case (in_bounds chunk ofs c); intro. + auto. + omegaContradiction. +Qed. + +Lemma in_bounds_exten: + forall (chunk: memory_chunk) (ofs: Z) (c: block_contents) (x: contentmap) P, + in_bounds chunk ofs (mkblock (low c) (high c) x P) = + in_bounds chunk ofs c. +Proof. + intros; reflexivity. +Qed. + +Hint Resolve in_bounds_holds in_bounds_exten. + +(** [valid_pointer] holds if the given block address is valid and the + given offset falls within the bounds of the corresponding block. *) + +Definition valid_pointer (m: mem) (b: block) (ofs: Z) : bool := + if zlt b m.(nextblock) then + (let c := m.(blocks) b in + if zle c.(low) ofs then if zlt ofs c.(high) then true else false + else false) + else false. + +(** Read a quantity of size [sz] at offset [ofs] in block contents [c]. + Return [Vundef] if the requested size does not match that of the + current contents, or if the following offsets do not contain [Cont]. + The first check captures a size mismatch between the read and the + latest write at this offset. The second check captures partial overwriting + of the latest write at this offset by a more recent write at a nearby + offset. *) + +Definition load_contents (sz: memory_size) (c: contentmap) (ofs: Z) : val := + match sz with + | Size8 => + match getN 0%nat ofs c with + | Datum8 n => n + | _ => Vundef + end + | Size16 => + match getN 1%nat ofs c with + | Datum16 n => n + | _ => Vundef + end + | Size32 => + match getN 3%nat ofs c with + | Datum32 n => n + | _ => Vundef + end + | Size64 => + match getN 7%nat ofs c with + | Datum64 n => n + | _ => Vundef + end + end. + +(** [load chunk m b ofs] perform a read in memory state [m], at address + [b] and offset [ofs]. [None] is returned if the address is invalid + or the memory access is out of bounds. *) + +Definition load (chunk: memory_chunk) (m: mem) (b: block) (ofs: Z) + : option val := + if zlt b m.(nextblock) then + (let c := m.(blocks) b in + if in_bounds chunk ofs c + then Some (Val.load_result chunk + (load_contents (mem_chunk chunk) c.(contents) ofs)) + else None) + else + None. + +(** [loadv chunk m addr] is similar, but the address and offset are given + as a single value [addr], which must be a pointer value. *) + +Definition loadv (chunk: memory_chunk) (m: mem) (addr: val) : option val := + match addr with + | Vptr b ofs => load chunk m b (Int.signed ofs) + | _ => None + end. + +Theorem load_in_bounds: + forall (chunk: memory_chunk) (m: mem) (b: block) (ofs: Z), + valid_block m b -> + low_bound m b <= ofs -> + ofs + size_chunk chunk <= high_bound m b -> + exists v, load chunk m b ofs = Some v. +Proof. + intros. unfold load. rewrite zlt_true; auto. + rewrite in_bounds_holds. + exists (Val.load_result chunk + (load_contents (mem_chunk chunk) + (contents (m.(blocks) b)) + ofs)). + auto. + exact H0. exact H1. +Qed. + +Lemma load_inv: + forall (chunk: memory_chunk) (m: mem) (b: block) (ofs: Z) (v: val), + load chunk m b ofs = Some v -> + let c := m.(blocks) b in + b < m.(nextblock) /\ + c.(low) <= ofs /\ + ofs + size_chunk chunk <= c.(high) /\ + Val.load_result chunk (load_contents (mem_chunk chunk) c.(contents) ofs) = v. +Proof. + intros until v; unfold load. + case (zlt b (nextblock m)); intro. + set (c := m.(blocks) b). + case (in_bounds chunk ofs c). + intuition congruence. + intros; discriminate. + intros; discriminate. +Qed. +Hint Resolve load_in_bounds load_inv. + +(* Write the value [v] with size [sz] at offset [ofs] in block contents [c]. + Return updated block contents. [Cont] contents are stored at the following + offsets. *) + +Definition store_contents (sz: memory_size) (c: contentmap) + (ofs: Z) (v: val) : contentmap := + match sz with + | Size8 => + setN 0%nat ofs (Datum8 v) c + | Size16 => + setN 1%nat ofs (Datum16 v) c + | Size32 => + setN 3%nat ofs (Datum32 v) c + | Size64 => + setN 7%nat ofs (Datum64 v) c + end. + +Remark store_contents_undef_outside: + forall sz c ofs v lo hi, + lo <= ofs /\ ofs + size_mem sz <= hi -> + (forall x, x < lo \/ x >= hi -> c x = Undef) -> + (forall x, x < lo \/ x >= hi -> + store_contents sz c ofs v x = Undef). +Proof. + intros until hi; intros [LO HI] UO. + assert (forall n d x, + ofs + (1 + Z_of_nat n) <= hi -> + x < lo \/ x >= hi -> + setN n ofs d c x = Undef). + intros. unfold setN. rewrite update_o. + transitivity (c x). apply set_cont_outside. omega. + apply UO. omega. omega. + unfold store_contents; destruct sz; unfold size_mem in HI; + intros; apply H; auto; simpl Z_of_nat; auto. +Qed. + +Definition unchecked_store + (chunk: memory_chunk) (m: mem) (b: block) + (ofs: Z) (v: val) + (P: (m.(blocks) b).(low) <= ofs /\ + ofs + size_chunk chunk <= (m.(blocks) b).(high)) : mem := + let c := m.(blocks) b in + mkmem + (update b + (mkblock c.(low) c.(high) + (store_contents (mem_chunk chunk) c.(contents) ofs v) + (store_contents_undef_outside (mem_chunk chunk) c.(contents) + ofs v _ _ P c.(undef_outside))) + m.(blocks)) + m.(nextblock) + m.(nextblock_pos). + +(** [store chunk m b ofs v] perform a write in memory state [m]. + Value [v] is stored at address [b] and offset [ofs]. + Return the updated memory store, or [None] if the address is invalid + or the memory access is out of bounds. *) + +Definition store (chunk: memory_chunk) (m: mem) (b: block) + (ofs: Z) (v: val) : option mem := + if zlt b m.(nextblock) then + match in_bounds chunk ofs (m.(blocks) b) with + | left P => Some(unchecked_store chunk m b ofs v P) + | right _ => None + end + else + None. + +(** [storev chunk m addr v] is similar, but the address and offset are given + as a single value [addr], which must be a pointer value. *) + +Definition storev (chunk: memory_chunk) (m: mem) (addr v: val) : option mem := + match addr with + | Vptr b ofs => store chunk m b (Int.signed ofs) v + | _ => None + end. + +Theorem store_in_bounds: + forall (chunk: memory_chunk) (m: mem) (b: block) (ofs: Z) (v: val), + valid_block m b -> + low_bound m b <= ofs -> + ofs + size_chunk chunk <= high_bound m b -> + exists m', store chunk m b ofs v = Some m'. +Proof. + intros. unfold store. + rewrite zlt_true; auto. + case (in_bounds chunk ofs (blocks m b)); intro P. + exists (unchecked_store chunk m b ofs v P). reflexivity. + unfold low_bound in H0. unfold high_bound in H1. omegaContradiction. +Qed. + +Lemma store_inv: + forall (chunk: memory_chunk) (m m': mem) (b: block) (ofs: Z) (v: val), + store chunk m b ofs v = Some m' -> + let c := m.(blocks) b in + b < m.(nextblock) /\ + c.(low) <= ofs /\ + ofs + size_chunk chunk <= c.(high) /\ + m'.(nextblock) = m.(nextblock) /\ + exists P, m'.(blocks) = + update b (mkblock c.(low) c.(high) + (store_contents (mem_chunk chunk) c.(contents) ofs v) P) + m.(blocks). +Proof. + intros until v; unfold store. + case (zlt b (nextblock m)); intro. + set (c := m.(blocks) b). + case (in_bounds chunk ofs c). + intros; injection H; intro; subst m'. simpl. + intuition. fold c. + exists (store_contents_undef_outside (mem_chunk chunk) + (contents c) ofs v (low c) (high c) a (undef_outside c)). + auto. + intros; discriminate. + intros; discriminate. +Qed. + +Hint Resolve store_in_bounds store_inv. + +(** * Properties of the memory operations *) + +(** ** Properties related to block validity *) + +Lemma valid_not_valid_diff: + forall m b b', valid_block m b -> ~(valid_block m b') -> b <> b'. +Proof. + intros; red; intros. subst b'. contradiction. +Qed. + +Theorem fresh_block_alloc: + forall (m1 m2: mem) (lo hi: Z) (b: block), + alloc m1 lo hi = (m2, b) -> ~(valid_block m1 b). +Proof. + intros. injection H; intros; subst b. + unfold valid_block. omega. +Qed. + +Theorem valid_new_block: + forall (m1 m2: mem) (lo hi: Z) (b: block), + alloc m1 lo hi = (m2, b) -> valid_block m2 b. +Proof. + unfold alloc, valid_block; intros. + injection H; intros. subst b; subst m2; simpl. omega. +Qed. + +Theorem valid_block_alloc: + forall (m1 m2: mem) (lo hi: Z) (b b': block), + alloc m1 lo hi = (m2, b') -> + valid_block m1 b -> valid_block m2 b. +Proof. + unfold alloc, valid_block; intros. + injection H; intros. subst m2; simpl. omega. +Qed. + +Theorem valid_block_store: + forall (chunk: memory_chunk) (m1 m2: mem) (b b': block) (ofs: Z) (v: val), + store chunk m1 b' ofs v = Some m2 -> + valid_block m1 b -> valid_block m2 b. +Proof. + intros. generalize (store_inv _ _ _ _ _ _ H). + intros [A [B [C [D [P E]]]]]. + red. rewrite D. exact H0. +Qed. + +Theorem valid_block_free: + forall (m: mem) (b b': block), + valid_block m b -> valid_block (free m b') b. +Proof. + unfold valid_block, free; intros. + simpl. auto. +Qed. + +(** ** Properties related to [alloc] *) + +Theorem load_alloc_other: + forall (chunk: memory_chunk) (m1 m2: mem) + (b b': block) (ofs lo hi: Z) (v: val), + alloc m1 lo hi = (m2, b') -> + load chunk m1 b ofs = Some v -> + load chunk m2 b ofs = Some v. +Proof. + unfold alloc; intros. + injection H; intros; subst m2; clear H. + generalize (load_inv _ _ _ _ _ H0). + intros (A, (B, (C, D))). + unfold load; simpl. + rewrite zlt_true. + repeat (rewrite update_o). + rewrite in_bounds_holds. congruence. auto. auto. + omega. omega. +Qed. + +Lemma load_contents_init: + forall (sz: memory_size) (ofs: Z), + load_contents sz (fun y => Undef) ofs = Vundef. +Proof. + intros. destruct sz; reflexivity. +Qed. + +Theorem load_alloc_same: + forall (chunk: memory_chunk) (m1 m2: mem) + (b b': block) (ofs lo hi: Z) (v: val), + alloc m1 lo hi = (m2, b') -> + load chunk m2 b' ofs = Some v -> + v = Vundef. +Proof. + unfold alloc; intros. + injection H; intros; subst m2; clear H. + generalize (load_inv _ _ _ _ _ H0). + simpl. rewrite H1. rewrite update_s. simpl. intros (A, (B, (C, D))). + rewrite <- D. rewrite load_contents_init. + destruct chunk; reflexivity. +Qed. + +Theorem low_bound_alloc: + forall (m1 m2: mem) (b b': block) (lo hi: Z), + alloc m1 lo hi = (m2, b') -> + low_bound m2 b = if zeq b b' then lo else low_bound m1 b. +Proof. + unfold alloc; intros. + injection H; intros; subst m2; clear H. + unfold low_bound; simpl. + unfold update. + subst b'. + case (zeq b (nextblock m1)); reflexivity. +Qed. + +Theorem high_bound_alloc: + forall (m1 m2: mem) (b b': block) (lo hi: Z), + alloc m1 lo hi = (m2, b') -> + high_bound m2 b = if zeq b b' then hi else high_bound m1 b. +Proof. + unfold alloc; intros. + injection H; intros; subst m2; clear H. + unfold high_bound; simpl. + unfold update. + subst b'. + case (zeq b (nextblock m1)); reflexivity. +Qed. + +Theorem store_alloc: + forall (chunk: memory_chunk) (m1 m2: mem) (b: block) (ofs lo hi: Z) (v: val), + alloc m1 lo hi = (m2, b) -> + lo <= ofs -> ofs + size_chunk chunk <= hi -> + exists m2', store chunk m2 b ofs v = Some m2'. +Proof. + unfold alloc; intros. + injection H; intros. + assert (A: b < m2.(nextblock)). + subst m2; subst b; simpl; omega. + assert (B: low_bound m2 b <= ofs). + subst m2; subst b. unfold low_bound; simpl. rewrite update_s. + simpl. assumption. + assert (C: ofs + size_chunk chunk <= high_bound m2 b). + subst m2; subst b. unfold high_bound; simpl. rewrite update_s. + simpl. assumption. + exact (store_in_bounds chunk m2 b ofs v A B C). +Qed. + +Hint Resolve store_alloc high_bound_alloc low_bound_alloc load_alloc_same +load_contents_init load_alloc_other. + +(** ** Properties related to [free] *) + +Theorem load_free: + forall (chunk: memory_chunk) (m: mem) (b bf: block) (ofs: Z), + b <> bf -> + load chunk (free m bf) b ofs = load chunk m b ofs. +Proof. + intros. unfold free, load; simpl. + case (zlt b (nextblock m)). + repeat (rewrite update_o; auto). + reflexivity. +Qed. + +Theorem low_bound_free: + forall (m: mem) (b bf: block), + b <> bf -> + low_bound (free m bf) b = low_bound m b. +Proof. + intros. unfold free, low_bound; simpl. + rewrite update_o; auto. +Qed. + +Theorem high_bound_free: + forall (m: mem) (b bf: block), + b <> bf -> + high_bound (free m bf) b = high_bound m b. +Proof. + intros. unfold free, high_bound; simpl. + rewrite update_o; auto. +Qed. +Hint Resolve load_free low_bound_free high_bound_free. + +(** ** Properties related to [store] *) + +Lemma store_is_in_bounds: + forall chunk m1 b ofs v m2, + store chunk m1 b ofs v = Some m2 -> + low_bound m1 b <= ofs /\ ofs + size_chunk chunk <= high_bound m1 b. +Proof. + intros. generalize (store_inv _ _ _ _ _ _ H). + intros [A [B [C [P D]]]]. + unfold low_bound, high_bound. tauto. +Qed. + +Lemma load_store_contents_same: + forall (sz: memory_size) (c: contentmap) (ofs: Z) (v: val), + load_contents sz (store_contents sz c ofs v) ofs = v. +Proof. + intros until v. + unfold load_contents, store_contents in |- *; case sz; + rewrite getN_setN_same; reflexivity. +Qed. + +Theorem load_store_same: + forall (chunk: memory_chunk) (m1 m2: mem) (b: block) (ofs: Z) (v: val), + store chunk m1 b ofs v = Some m2 -> + load chunk m2 b ofs = Some (Val.load_result chunk v). +Proof. + intros. + generalize (store_inv _ _ _ _ _ _ H). + intros (A, (B, (C, (D, (P, E))))). + unfold load. rewrite D. + rewrite zlt_true; auto. rewrite E. + repeat (rewrite update_s). simpl. + rewrite in_bounds_exten. rewrite in_bounds_holds; auto. + rewrite load_store_contents_same; auto. +Qed. + +Lemma load_store_contents_other: + forall (sz1 sz2: memory_size) (c: contentmap) + (ofs1 ofs2: Z) (v: val), + ofs2 + size_mem sz2 <= ofs1 \/ ofs1 + size_mem sz1 <= ofs2 -> + load_contents sz2 (store_contents sz1 c ofs1 v) ofs2 = + load_contents sz2 c ofs2. +Proof. + intros until v. + unfold size_mem, store_contents, load_contents; + case sz1; case sz2; intros; + (rewrite getN_setN_other; + [reflexivity | simpl Z_of_nat; omega]). +Qed. + +Theorem load_store_other: + forall (chunk1 chunk2: memory_chunk) (m1 m2: mem) + (b1 b2: block) (ofs1 ofs2: Z) (v: val), + store chunk1 m1 b1 ofs1 v = Some m2 -> + b1 <> b2 + \/ ofs2 + size_chunk chunk2 <= ofs1 + \/ ofs1 + size_chunk chunk1 <= ofs2 -> + load chunk2 m2 b2 ofs2 = load chunk2 m1 b2 ofs2. +Proof. + intros. + generalize (store_inv _ _ _ _ _ _ H). + intros (A, (B, (C, (D, (P, E))))). + unfold load. rewrite D. + case (zlt b2 (nextblock m1)); intro. + rewrite E; unfold update; case (zeq b2 b1); intro; simpl. + subst b2. rewrite in_bounds_exten. + rewrite load_store_contents_other. auto. + tauto. + reflexivity. + reflexivity. +Qed. + +Ltac LSCO := + match goal with + | |- (match getN ?sz2 ?ofs2 (setN ?sz1 ?ofs1 ?v ?c) with + | Undef => _ + | Datum8 _ => _ + | Datum16 _ => _ + | Datum32 _ => _ + | Datum64 _ => _ + | Cont => _ + end = _) => + elim (getN_setN_overlap sz1 sz2 ofs1 ofs2 v c); + [ let H := fresh in (intro H; rewrite H; reflexivity) + | let H := fresh in (intro H; rewrite H; reflexivity) + | assumption + | simpl Z_of_nat; omega + | simpl Z_of_nat; omega + | discriminate ] + end. + +Lemma load_store_contents_overlap: + forall (sz1 sz2: memory_size) (c: contentmap) + (ofs1 ofs2: Z) (v: val), + ofs1 <> ofs2 -> + ofs2 + size_mem sz2 > ofs1 -> ofs1 + size_mem sz1 > ofs2 -> + load_contents sz2 (store_contents sz1 c ofs1 v) ofs2 = Vundef. +Proof. + intros. + destruct sz1; destruct sz2; simpl in H0; simpl in H1; simpl; LSCO. +Qed. + +Ltac LSCM := + match goal with + | H:(?x <> ?x) |- _ => + elim H; reflexivity + | |- (match getN ?sz2 ?ofs (setN ?sz1 ?ofs ?v ?c) with + | Undef => _ + | Datum8 _ => _ + | Datum16 _ => _ + | Datum32 _ => _ + | Datum64 _ => _ + | Cont => _ + end = _) => + elim (getN_setN_mismatch sz1 sz2 ofs v c); + [ let H := fresh in (intro H; rewrite H; reflexivity) + | let H := fresh in (intro H; rewrite H; reflexivity) ] + end. + +Lemma load_store_contents_mismatch: + forall (sz1 sz2: memory_size) (c: contentmap) + (ofs: Z) (v: val), + sz1 <> sz2 -> + load_contents sz2 (store_contents sz1 c ofs v) ofs = Vundef. +Proof. + intros. + destruct sz1; destruct sz2; simpl; LSCM. +Qed. + +Theorem low_bound_store: + forall (chunk: memory_chunk) (m1 m2: mem) (b b': block) (ofs: Z) (v: val), + store chunk m1 b ofs v = Some m2 -> + low_bound m2 b' = low_bound m1 b'. +Proof. + intros. + generalize (store_inv _ _ _ _ _ _ H). + intros (A, (B, (C, (D, (P, E))))). + unfold low_bound. rewrite E; unfold update. + case (zeq b' b); intro. + subst b'. reflexivity. + reflexivity. +Qed. + +Theorem high_bound_store: + forall (chunk: memory_chunk) (m1 m2: mem) (b b': block) (ofs: Z) (v: val), + store chunk m1 b ofs v = Some m2 -> + high_bound m2 b' = high_bound m1 b'. +Proof. + intros. + generalize (store_inv _ _ _ _ _ _ H). + intros (A, (B, (C, (D, (P, E))))). + unfold high_bound. rewrite E; unfold update. + case (zeq b' b); intro. + subst b'. reflexivity. + reflexivity. +Qed. + +Hint Resolve high_bound_store low_bound_store load_store_contents_mismatch + load_store_contents_overlap load_store_other store_is_in_bounds + load_store_contents_same load_store_same load_store_contents_other. + +(** * Agreement between memory blocks. *) + +(** Two memory blocks [c1] and [c2] agree on a range [lo] to [hi] + if they associate the same contents to byte offsets in the range + [lo] (included) to [hi] (excluded). *) + +Definition contentmap_agree (lo hi: Z) (c1 c2: contentmap) := + forall (ofs: Z), + lo <= ofs < hi -> c1 ofs = c2 ofs. + +Definition block_contents_agree (lo hi: Z) (c1 c2: block_contents) := + contentmap_agree lo hi c1.(contents) c2.(contents). + +Definition block_agree (b: block) (lo hi: Z) (m1 m2: mem) := + block_contents_agree lo hi + (m1.(blocks) b) (m2.(blocks) b). + +Theorem block_agree_refl: + forall (m: mem) (b: block) (lo hi: Z), + block_agree b lo hi m m. +Proof. + intros. hnf. auto. +Qed. + +Theorem block_agree_sym: + forall (m1 m2: mem) (b: block) (lo hi: Z), + block_agree b lo hi m1 m2 -> + block_agree b lo hi m2 m1. +Proof. + intros. hnf. intros. symmetry. apply H. auto. +Qed. + +Theorem block_agree_trans: + forall (m1 m2 m3: mem) (b: block) (lo hi: Z), + block_agree b lo hi m1 m2 -> + block_agree b lo hi m2 m3 -> + block_agree b lo hi m1 m3. +Proof. + intros. hnf. intros. + transitivity (contents (blocks m2 b) ofs). + apply H; auto. apply H0; auto. +Qed. + +Lemma check_cont_agree: + forall (c1 c2: contentmap) (lo hi: Z), + contentmap_agree lo hi c1 c2 -> + forall (n: nat) (ofs: Z), + lo <= ofs -> ofs + Z_of_nat n <= hi -> + check_cont n ofs c2 = check_cont n ofs c1. +Proof. + induction n; intros; simpl. + auto. + rewrite inj_S in H1. + rewrite H. case (c2 ofs); intros; auto. + apply IHn; omega. + omega. +Qed. + +Lemma getN_agree: + forall (c1 c2: contentmap) (lo hi: Z), + contentmap_agree lo hi c1 c2 -> + forall (n: nat) (ofs: Z), + lo <= ofs -> ofs + Z_of_nat n < hi -> + getN n ofs c2 = getN n ofs c1. +Proof. + intros. unfold getN. + rewrite (check_cont_agree c1 c2 lo hi H n (ofs + 1)). + case (check_cont n (ofs + 1) c1). + symmetry. apply H. omega. auto. + omega. omega. +Qed. + +Lemma load_contentmap_agree: + forall (sz: memory_size) (c1 c2: contentmap) (lo hi ofs: Z), + contentmap_agree lo hi c1 c2 -> + lo <= ofs -> ofs + size_mem sz <= hi -> + load_contents sz c2 ofs = load_contents sz c1 ofs. +Proof. + intros sz c1 c2 lo hi ofs EX LO. + unfold load_contents, size_mem; case sz; intro HI; + rewrite (getN_agree c1 c2 lo hi EX); auto; simpl Z_of_nat; omega. +Qed. + +Lemma set_cont_agree: + forall (lo hi: Z) (n: nat) (c1 c2: contentmap) (ofs: Z), + contentmap_agree lo hi c1 c2 -> + contentmap_agree lo hi (set_cont n ofs c1) (set_cont n ofs c2). +Proof. + induction n; simpl; intros. + auto. + red. intros p B. + case (zeq p ofs); intro. + subst p. repeat (rewrite update_s). reflexivity. + repeat (rewrite update_o). apply IHn. assumption. + assumption. auto. auto. +Qed. + +Lemma setN_agree: + forall (lo hi: Z) (n: nat) (c1 c2: contentmap) (ofs: Z) (v: content), + contentmap_agree lo hi c1 c2 -> + contentmap_agree lo hi (setN n ofs v c1) (setN n ofs v c2). +Proof. + intros. unfold setN. red; intros p B. + case (zeq p ofs); intro. + subst p. repeat (rewrite update_s). reflexivity. + repeat (rewrite update_o; auto). + exact (set_cont_agree lo hi n c1 c2 (ofs + 1) H p B). +Qed. + +Lemma store_contentmap_agree: + forall (sz: memory_size) (c1 c2: contentmap) (lo hi ofs: Z) (v: val), + contentmap_agree lo hi c1 c2 -> + contentmap_agree lo hi + (store_contents sz c1 ofs v) (store_contents sz c2 ofs v). +Proof. + intros. unfold store_contents; case sz; apply setN_agree; auto. +Qed. + +Lemma set_cont_outside_agree: + forall (lo hi: Z) (n: nat) (c1 c2: contentmap) (ofs: Z), + contentmap_agree lo hi c1 c2 -> + ofs + Z_of_nat n <= lo \/ hi <= ofs -> + contentmap_agree lo hi c1 (set_cont n ofs c2). +Proof. + induction n; intros; simpl. + auto. + red; intros p R. rewrite inj_S in H0. + unfold update. case (zeq p ofs); intro. + subst p. omegaContradiction. + apply IHn. auto. omega. auto. +Qed. + +Lemma setN_outside_agree: + forall (lo hi: Z) (n: nat) (c1 c2: contentmap) (ofs: Z) (v: content), + contentmap_agree lo hi c1 c2 -> + ofs + Z_of_nat n < lo \/ hi <= ofs -> + contentmap_agree lo hi c1 (setN n ofs v c2). +Proof. + intros. unfold setN. red; intros p R. + unfold update. case (zeq p ofs); intro. + omegaContradiction. + apply (set_cont_outside_agree lo hi n c1 c2 (ofs + 1) H). + omega. auto. +Qed. + +Lemma store_contentmap_outside_agree: + forall (sz: memory_size) (c1 c2: contentmap) (lo hi ofs: Z) (v: val), + contentmap_agree lo hi c1 c2 -> + ofs + size_mem sz <= lo \/ hi <= ofs -> + contentmap_agree lo hi c1 (store_contents sz c2 ofs v). +Proof. + intros until v. + unfold store_contents; case sz; simpl; intros; + apply setN_outside_agree; auto; simpl Z_of_nat; omega. +Qed. + +Theorem load_agree: + forall (chunk: memory_chunk) (m1 m2: mem) + (b: block) (lo hi: Z) (ofs: Z) (v1 v2: val), + block_agree b lo hi m1 m2 -> + lo <= ofs -> ofs + size_chunk chunk <= hi -> + load chunk m1 b ofs = Some v1 -> + load chunk m2 b ofs = Some v2 -> + v1 = v2. +Proof. + intros. + generalize (load_inv _ _ _ _ _ H2). intros [K [L [M N]]]. + generalize (load_inv _ _ _ _ _ H3). intros [P [Q [R S]]]. + subst v1; subst v2. symmetry. + decEq. eapply load_contentmap_agree. + apply H. auto. auto. +Qed. + +Theorem store_agree: + forall (chunk: memory_chunk) (m1 m2 m1' m2': mem) + (b: block) (lo hi: Z) + (b': block) (ofs: Z) (v: val), + block_agree b lo hi m1 m2 -> + store chunk m1 b' ofs v = Some m1' -> + store chunk m2 b' ofs v = Some m2' -> + block_agree b lo hi m1' m2'. +Proof. + intros. + generalize (store_inv _ _ _ _ _ _ H0). + intros [I [J [K [L [x M]]]]]. + generalize (store_inv _ _ _ _ _ _ H1). + intros [P [Q [R [S [y T]]]]]. + red. red. + rewrite M; rewrite T; unfold update. + case (zeq b b'); intro; simpl. + subst b'. apply store_contentmap_agree. assumption. + apply H. +Qed. + +Theorem store_outside_agree: + forall (chunk: memory_chunk) (m1 m2 m2': mem) + (b: block) (lo hi: Z) + (b': block) (ofs: Z) (v: val), + block_agree b lo hi m1 m2 -> + b <> b' \/ ofs + size_chunk chunk <= lo \/ hi <= ofs -> + store chunk m2 b' ofs v = Some m2' -> + block_agree b lo hi m1 m2'. +Proof. + intros. + generalize (store_inv _ _ _ _ _ _ H1). + intros [I [J [K [L [x M]]]]]. + red. red. rewrite M; unfold update; + case (zeq b b'); intro; simpl. + subst b'. apply store_contentmap_outside_agree. + assumption. + elim H0; intro. tauto. exact H2. + apply H. +Qed. + +(** * Store extensions *) + +(** A store [m2] extends a store [m1] if [m2] can be obtained from [m1] + by increasing the sizes of the memory blocks of [m1] (decreasing + the low bounds, increasing the high bounds), while still keeping the + same contents for block offsets that are valid in [m1]. + This notion is used in the proof of semantic equivalence in + module [Machenv]. *) + +Definition block_contents_extends (c1 c2: block_contents) := + c2.(low) <= c1.(low) /\ c1.(high) <= c2.(high) /\ + contentmap_agree c1.(low) c1.(high) c1.(contents) c2.(contents). + +Definition extends (m1 m2: mem) := + m1.(nextblock) = m2.(nextblock) /\ + forall (b: block), + b < m1.(nextblock) -> + block_contents_extends (m1.(blocks) b) (m2.(blocks) b). + +Theorem extends_refl: + forall (m: mem), extends m m. +Proof. + intro. red. split. + reflexivity. + intros. red. + split. omega. split. omega. + red. intros. reflexivity. +Qed. + +Theorem alloc_extends: + forall (m1 m2 m1' m2': mem) (lo1 hi1 lo2 hi2: Z) (b1 b2: block), + extends m1 m2 -> + lo2 <= lo1 -> hi1 <= hi2 -> + alloc m1 lo1 hi1 = (m1', b1) -> + alloc m2 lo2 hi2 = (m2', b2) -> + b1 = b2 /\ extends m1' m2'. +Proof. + unfold extends, alloc; intros. + injection H2; intros; subst m1'; subst b1. + injection H3; intros; subst m2'; subst b2. + simpl. intuition. + rewrite <- H4. case (zeq b (nextblock m1)); intro. + subst b. repeat rewrite update_s. + red; simpl. intuition. + red; intros; reflexivity. + repeat rewrite update_o. apply H5. omega. + auto. auto. +Qed. + +Theorem free_extends: + forall (m1 m2: mem) (b: block), + extends m1 m2 -> + extends (free m1 b) (free m2 b). +Proof. + unfold extends, free; intros. + simpl. intuition. + red; intros; simpl. + case (zeq b0 b); intro. + subst b0; repeat (rewrite update_s). + simpl. split. omega. split. omega. + red. intros. omegaContradiction. + repeat (rewrite update_o). + exact (H1 b0 H). + auto. auto. +Qed. + +Theorem load_extends: + forall (chunk: memory_chunk) (m1 m2: mem) (b: block) (ofs: Z) (v: val), + extends m1 m2 -> + load chunk m1 b ofs = Some v -> + load chunk m2 b ofs = Some v. +Proof. + intros sz m1 m2 b ofs v (X, Y) L. + generalize (load_inv _ _ _ _ _ L). + intros (A, (B, (C, D))). + unfold load. rewrite <- X. rewrite zlt_true; auto. + generalize (Y b A); intros [M [P Q]]. + rewrite in_bounds_holds. + rewrite <- D. + decEq. decEq. + apply load_contentmap_agree with + (lo := low((blocks m1) b)) + (hi := high((blocks m1) b)); auto. + omega. omega. +Qed. + +Theorem store_within_extends: + forall (chunk: memory_chunk) (m1 m2 m1': mem) (b: block) (ofs: Z) (v: val), + extends m1 m2 -> + store chunk m1 b ofs v = Some m1' -> + exists m2', store chunk m2 b ofs v = Some m2' /\ extends m1' m2'. +Proof. + intros sz m1 m2 m1' b ofs v (X, Y) STORE. + generalize (store_inv _ _ _ _ _ _ STORE). + intros (A, (B, (C, (D, (x, E))))). + generalize (Y b A); intros [M [P Q]]. + unfold store. rewrite <- X. rewrite zlt_true; auto. + case (in_bounds sz ofs (blocks m2 b)); intro. + exists (unchecked_store sz m2 b ofs v a). + split. auto. + split. + unfold unchecked_store; simpl. congruence. + intros b' LT. + unfold block_contents_extends, unchecked_store, block_contents_agree. + rewrite E; unfold update; simpl. + case (zeq b' b); intro; simpl. + subst b'. split. omega. split. omega. + apply store_contentmap_agree. auto. + apply Y. rewrite <- D. auto. + omegaContradiction. +Qed. + +Theorem store_outside_extends: + forall (chunk: memory_chunk) (m1 m2 m2': mem) (b: block) (ofs: Z) (v: val), + extends m1 m2 -> + ofs + size_chunk chunk <= low_bound m1 b \/ high_bound m1 b <= ofs -> + store chunk m2 b ofs v = Some m2' -> + extends m1 m2'. +Proof. + intros sz m1 m2 m2' b ofs v (X, Y) BOUNDS STORE. + generalize (store_inv _ _ _ _ _ _ STORE). + intros (A, (B, (C, (D, (x, E))))). + split. + congruence. + intros b' LT. + rewrite E; unfold update; case (zeq b' b); intro. + subst b'. generalize (Y b LT). intros [M [P Q]]. + red; simpl. split. omega. split. omega. + apply store_contentmap_outside_agree. + assumption. exact BOUNDS. + auto. +Qed. + +(** * Memory extensionality properties *) + +Lemma block_contents_exten: + forall (c1 c2: block_contents), + c1.(low) = c2.(low) -> + c1.(high) = c2.(high) -> + block_contents_agree c1.(low) c1.(high) c1 c2 -> + c1 = c2. +Proof. + intros. caseEq c1; intros lo1 hi1 m1 UO1 EQ1. subst c1. + caseEq c2; intros lo2 hi2 m2 UO2 EQ2. subst c2. + simpl in *. subst lo2 hi2. + assert (m1 = m2). + unfold contentmap. apply extensionality. intro. + case (zlt x lo1); intro. + rewrite UO1. rewrite UO2. auto. tauto. tauto. + case (zlt x hi1); intro. + apply H1. omega. + rewrite UO1. rewrite UO2. auto. tauto. tauto. + subst m2. + assert (UO1 = UO2). + apply proof_irrelevance. + subst UO2. reflexivity. +Qed. + +Theorem mem_exten: + forall m1 m2, + m1.(nextblock) = m2.(nextblock) -> + (forall b, m1.(blocks) b = m2.(blocks) b) -> + m1 = m2. +Proof. + intros. destruct m1 as [map1 nb1 nextpos1]. destruct m2 as [map2 nb2 nextpos2]. + simpl in *. subst nb2. + assert (map1 = map2). apply extensionality. assumption. + assert (nextpos1 = nextpos2). apply proof_irrelevance. + congruence. +Qed. + +(** * Store injections *) + +(** A memory injection [f] is a function from addresses to either [None] + or [Some] of an address and an offset. It defines a correspondence + between the blocks of two memory states [m1] and [m2]: +- if [f b = None], the block [b] of [m1] has no equivalent in [m2]; +- if [f b = Some(b', ofs)], the block [b] of [m2] corresponds to + a sub-block at offset [ofs] of the block [b'] in [m2]. +*) + +Definition meminj := (block -> option (block * Z))%type. + +Section MEM_INJECT. + +Variable f: meminj. + +(** A memory injection defines a relation between values that is the + identity relation, except for pointer values which are shifted + as prescribed by the memory injection. *) + +Inductive val_inject: val -> val -> Prop := + | val_inject_int: + forall i, val_inject (Vint i) (Vint i) + | val_inject_float: + forall f, val_inject (Vfloat f) (Vfloat f) + | val_inject_ptr: + forall b1 ofs1 b2 ofs2 x, + f b1 = Some (b2, x) -> + ofs2 = Int.add ofs1 (Int.repr x) -> + val_inject (Vptr b1 ofs1) (Vptr b2 ofs2) + | val_inject_undef: forall v, + val_inject Vundef v. + +Hint Resolve val_inject_int val_inject_float val_inject_ptr +val_inject_undef. + +Inductive val_list_inject: list val -> list val-> Prop:= + | val_nil_inject : + val_list_inject nil nil + | val_cons_inject : forall v v' vl vl' , + val_inject v v' -> val_list_inject vl vl'-> + val_list_inject (v::vl) (v':: vl'). + +Inductive val_content_inject: memory_size -> val -> val -> Prop := + | val_content_inject_base: + forall sz v1 v2, + val_inject v1 v2 -> + val_content_inject sz v1 v2 + | val_content_inject_8: + forall n1 n2, + Int.cast8unsigned n1 = Int.cast8unsigned n2 -> + val_content_inject Size8 (Vint n1) (Vint n2) + | val_content_inject_16: + forall n1 n2, + Int.cast16unsigned n1 = Int.cast16unsigned n2 -> + val_content_inject Size16 (Vint n1) (Vint n2) + | val_content_inject_32: + forall f1 f2, + Float.singleoffloat f1 = Float.singleoffloat f2 -> + val_content_inject Size32 (Vfloat f1) (Vfloat f2). + +Hint Resolve val_content_inject_base. + +Inductive content_inject: content -> content -> Prop := + | content_inject_undef: + forall c, + content_inject Undef c + | content_inject_datum8: + forall v1 v2, + val_content_inject Size8 v1 v2 -> + content_inject (Datum8 v1) (Datum8 v2) + | content_inject_datum16: + forall v1 v2, + val_content_inject Size16 v1 v2 -> + content_inject (Datum16 v1) (Datum16 v2) + | content_inject_datum32: + forall v1 v2, + val_content_inject Size32 v1 v2 -> + content_inject (Datum32 v1) (Datum32 v2) + | content_inject_datum64: + forall v1 v2, + val_content_inject Size64 v1 v2 -> + content_inject (Datum64 v1) (Datum64 v2) + | content_inject_cont: + content_inject Cont Cont. + +Hint Resolve content_inject_undef content_inject_datum8 +content_inject_datum16 content_inject_datum32 content_inject_datum64 +content_inject_cont. + +Definition contentmap_inject (c1 c2: contentmap) (lo hi delta: Z) : Prop := + forall x, lo <= x < hi -> + content_inject (c1 x) (c2 (x + delta)). + +(** A block contents [c1] injects into another block content [c2] at + offset [delta] if the contents of [c1] at all valid offsets + correspond to the contents of [c2] at the same offsets shifted by [delta]. + Some additional conditions are imposed to guard against arithmetic + overflow in address computations. *) + +Record block_contents_inject (c1 c2: block_contents) (delta: Z) : Prop := + mk_block_contents_inject { + bci_range1: Int.min_signed <= delta <= Int.max_signed; + bci_range2: delta = 0 \/ + (Int.min_signed <= c2.(low) /\ c2.(high) <= Int.max_signed); + bci_lowhigh:forall x, c1.(low) <= x < c1.(high) -> + c2.(low) <= x+delta < c2.(high) ; + bci_contents: + contentmap_inject c1.(contents) c2.(contents) c1.(low) c1.(high) delta + }. + +(** A memory state [m1] injects into another memory state [m2] via the + memory injection [f] if the following conditions hold: +- unallocated blocks in [m1] must be mapped to [None] by [f]; +- if [f b = Some(b', delta)], [b'] must be valid in [m2] and + the contents of [b] in [m1] must inject into the contents of [b'] in [m2] + with offset [delta]; +- distinct blocks in [m1] cannot be mapped to overlapping sub-blocks in [m2]. +*) + +Record mem_inject (m1 m2: mem) : Prop := + mk_mem_inject { + mi_freeblocks: + forall b, b >= m1.(nextblock) -> f b = None; + mi_mappedblocks: + forall b b' delta, + f b = Some(b', delta) -> + b' < m2.(nextblock) /\ + block_contents_inject (m1.(blocks) b) + (m2.(blocks) b') + delta; + mi_no_overlap: + forall b1 b2 b1' b2' delta1 delta2, + b1 <> b2 -> + f b1 = Some (b1', delta1) -> + f b2 = Some (b2', delta2) -> + b1' <> b2' + \/ (forall x1 x2, low_bound m1 b1 <= x1 < high_bound m1 b1 -> + low_bound m1 b2 <= x2 < high_bound m1 b2 -> + x1+delta1 <> x2+delta2) + }. + +(** The following lemmas establish the absence of machine integer overflow + during address computations. *) + +Lemma size_mem_pos: forall sz, size_mem sz > 0. +Proof. destruct sz; simpl; omega. Qed. + +Lemma size_chunk_pos: forall chunk, size_chunk chunk > 0. +Proof. intros. unfold size_chunk. apply size_mem_pos. Qed. + +Lemma address_inject: + forall m1 m2 chunk b1 ofs1 b2 ofs2 x, + mem_inject m1 m2 -> + (m1.(blocks) b1).(low) <= Int.signed ofs1 -> + Int.signed ofs1 + size_chunk chunk <= (m1.(blocks) b1).(high) -> + f b1 = Some (b2, x) -> + ofs2 = Int.add ofs1 (Int.repr x) -> + Int.signed ofs2 = Int.signed ofs1 + x. +Proof. + intros. + generalize (size_chunk_pos chunk). intro. + generalize (mi_mappedblocks m1 m2 H _ _ _ H2). intros [C D]. + inversion D. + elim bci_range4 ; intro. + (**x=0**) + subst x . rewrite Zplus_0_r. rewrite Int.add_zero in H3. + subst ofs2 ; auto . + (**x<>0**) + rewrite H3. rewrite Int.add_signed. repeat rewrite Int.signed_repr. + auto. auto. + assert (low (blocks m1 b1) <= Int.signed ofs1 < high (blocks m1 b1)). + omega. + generalize (bci_lowhigh0 (Int.signed ofs1) H6). omega. + auto. +Qed. + +Lemma valid_pointer_inject_no_overflow: + forall m1 m2 b ofs b' x, + mem_inject m1 m2 -> + valid_pointer m1 b (Int.signed ofs) = true -> + f b = Some(b', x) -> + Int.min_signed <= Int.signed ofs + Int.signed (Int.repr x) <= Int.max_signed. +Proof. + intros. unfold valid_pointer in H0. + destruct (zlt b (nextblock m1)); try discriminate. + destruct (zle (low (blocks m1 b)) (Int.signed ofs)); try discriminate. + destruct (zlt (Int.signed ofs) (high (blocks m1 b))); try discriminate. + inversion H. generalize (mi_mappedblocks0 _ _ _ H1). + intros [A B]. inversion B. + elim bci_range4 ; intro. + (**x=0**) + rewrite Int.signed_repr ; auto. + subst x . rewrite Zplus_0_r. apply Int.signed_range . + (**x<>0**) + generalize (bci_lowhigh0 _ (conj z0 z1)). intro. + rewrite Int.signed_repr. omega. auto. +Qed. + +(** Relation between injections and loads. *) + +Lemma check_cont_inject: + forall c1 c2 lo hi delta, + contentmap_inject c1 c2 lo hi delta -> + forall n p, + lo <= p -> p + Z_of_nat n <= hi -> + check_cont n p c1 = true -> + check_cont n (p + delta) c2 = true. +Proof. + induction n. + intros. simpl. reflexivity. + simpl check_cont. rewrite inj_S. intros p H0 H1. + assert (lo <= p < hi). omega. + generalize (H p H2). intro. inversion H3; intros; try discriminate. + replace (p + delta + 1) with ((p + 1) + delta). + apply IHn. omega. omega. auto. + omega. +Qed. + +Hint Resolve check_cont_inject. + +Lemma getN_inject: + forall c1 c2 lo hi delta, + contentmap_inject c1 c2 lo hi delta -> + forall n p, + lo <= p -> p + Z_of_nat n < hi -> + content_inject (getN n p c1) (getN n (p + delta) c2). +Proof. + intros. simpl in H1. + assert (lo <= p < hi). omega. + unfold getN. + caseEq (check_cont n (p + 1) c1); intro. + replace (check_cont n (p + delta + 1) c2) with true. + apply H. assumption. + symmetry. replace (p + delta + 1) with ((p + 1) + delta). + eapply check_cont_inject; eauto. + omega. omega. omega. + constructor. +Qed. + +Hint Resolve getN_inject. + +Definition ztonat (z:Z): nat:= +match z with +| Z0 => O +| Zpos p => (nat_of_P p) +| Zneg p =>O +end. + +Lemma load_contents_inject: + forall sz c1 c2 lo hi delta p, + contentmap_inject c1 c2 lo hi delta -> + lo <= p -> p + size_mem sz <= hi -> + val_content_inject sz (load_contents sz c1 p) (load_contents sz c2 (p + delta)). +Proof. +intros. +assert (content_inject (getN (ztonat (size_mem sz)-1) p c1) +(getN (ztonat(size_mem sz)-1) (p + delta) c2)). +induction sz; assert (lo<= p< hi); simpl in H1; try omega; +apply getN_inject with lo hi; try assumption; simpl ; try omega. +induction sz ; simpl; inversion H2; auto. +Qed. + +Hint Resolve load_contents_inject. + +Lemma load_result_inject: + forall chunk v1 v2, + val_content_inject (mem_chunk chunk) v1 v2 -> + val_inject (Val.load_result chunk v1) (Val.load_result chunk v2). +Proof. +intros. +destruct chunk; simpl in H; inversion H; simpl; auto; +try (inversion H0; simpl; auto; fail). +replace (Int.cast8signed n2) with (Int.cast8signed n1). constructor. +apply Int.cast8_signed_equal_if_unsigned_equal; auto. +rewrite H0; constructor. +replace (Int.cast16signed n2) with (Int.cast16signed n1). constructor. +apply Int.cast16_signed_equal_if_unsigned_equal; auto. +rewrite H0; constructor. +inversion H0; simpl; auto. +apply val_inject_ptr with x; auto. +Qed. + +Lemma in_bounds_inject: + forall chunk c1 c2 delta p, + block_contents_inject c1 c2 delta -> + c1.(low) <= p /\ p + size_chunk chunk <= c1.(high) -> + c2.(low) <= p + delta /\ (p + delta) + size_chunk chunk <= c2.(high). +Proof. + intros. + inversion H. + generalize (size_chunk_pos chunk); intro. + assert (low c1 <= p + size_chunk chunk - 1 < high c1). + omega. + generalize (bci_lowhigh0 _ H2). intro. + assert (low c1 <= p < high c1). + omega. + generalize (bci_lowhigh0 _ H4). intro. + omega. +Qed. + +Lemma block_cont_val: +forall c1 c2 delta p sz, +block_contents_inject c1 c2 delta -> +c1.(low) <= p -> p + size_mem sz <= c1.(high) -> + val_content_inject sz (load_contents sz c1.(contents) p) + (load_contents sz c2.(contents) (p + delta)). +Proof. +intros. +inversion H . +apply load_contents_inject with c1.(low) c1.(high) ;assumption. +Qed. + +Lemma load_inject: + forall m1 m2 chunk b1 ofs b2 delta v1, + mem_inject m1 m2 -> + load chunk m1 b1 ofs = Some v1 -> + f b1 = Some (b2, delta) -> + exists v2, load chunk m2 b2 (ofs + delta) = Some v2 /\ val_inject v1 v2. +Proof. + intros. + generalize (load_inv _ _ _ _ _ H0). intros [A [B [C D]]]. + inversion H. + generalize (mi_mappedblocks0 _ _ _ H1). intros [U V]. + inversion V. + exists (Val.load_result chunk (load_contents (mem_chunk chunk) + (m2.(blocks) b2).(contents) (ofs+delta))). + split. + unfold load. + generalize (size_chunk_pos chunk); intro. + rewrite zlt_true. rewrite in_bounds_holds. auto. + assert (low (blocks m1 b1) <= ofs < high (blocks m1 b1)). + omega. + generalize (bci_lowhigh0 _ H3). tauto. + assert (low (blocks m1 b1) <= ofs + size_chunk chunk - 1 < high(blocks m1 b1)). + omega. + generalize (bci_lowhigh0 _ H3). omega. + assumption. + rewrite <- D. apply load_result_inject. + eapply load_contents_inject; eauto. +Qed. + +Lemma loadv_inject: + forall m1 m2 chunk a1 a2 v1, + mem_inject m1 m2 -> + loadv chunk m1 a1 = Some v1 -> + val_inject a1 a2 -> + exists v2, loadv chunk m2 a2 = Some v2 /\ val_inject v1 v2. +Proof. + intros. + induction H1 ; simpl in H0 ; try discriminate H0. + simpl. + replace (Int.signed ofs2) with (Int.signed ofs1 + x). + apply load_inject with m1 b1 ; assumption. + symmetry. generalize (load_inv _ _ _ _ _ H0). intros [A [B [C D]]]. + apply address_inject with m1 m2 chunk b1 b2; auto. +Qed. + +(** Relation between injections and stores. *) + +Lemma set_cont_inject: + forall c1 c2 lo hi delta, + contentmap_inject c1 c2 lo hi delta -> + forall n p, + lo <= p -> p + Z_of_nat n <= hi -> + contentmap_inject (set_cont n p c1) (set_cont n (p + delta) c2) lo hi delta. +Proof. +induction n. intros. simpl. assumption. +intros. simpl. unfold contentmap_inject. +intros p2 Hp2. unfold update. +case (zeq p2 p); intro. +subst p2. rewrite zeq_true. constructor. +rewrite zeq_false. replace (p + delta + 1) with ((p + 1) + delta). +apply IHn. omega. rewrite inj_S in H1. omega. assumption. +omega. omega. +Qed. + +Lemma setN_inject: + forall c1 c2 lo hi delta n p v1 v2, + contentmap_inject c1 c2 lo hi delta -> + content_inject v1 v2 -> + lo <= p -> p + Z_of_nat n < hi -> + contentmap_inject (setN n p v1 c1) (setN n (p + delta) v2 c2) + lo hi delta. +Proof. + intros. unfold setN. red; intros. + unfold update. + case (zeq x p); intro. + subst p. rewrite zeq_true. assumption. + rewrite zeq_false. + replace (p + delta + 1) with ((p + 1) + delta). + apply set_cont_inject with lo hi. + assumption. omega. omega. assumption. omega. + omega. +Qed. + +Lemma store_contents_inject: + forall c1 c2 lo hi delta sz p v1 v2, + contentmap_inject c1 c2 lo hi delta -> + val_content_inject sz v1 v2 -> + lo <= p -> p + size_mem sz <= hi -> + contentmap_inject (store_contents sz c1 p v1) + (store_contents sz c2 (p + delta) v2) + lo hi delta. +Proof. + intros. + destruct sz; simpl in *; apply setN_inject; auto; simpl; omega. +Qed. + +Lemma set_cont_outside1 : + forall n p m q , + (forall x , p <= x < p + Z_of_nat n -> x <> q) -> + (set_cont n p m) q = m q. +Proof. + induction n; intros; simpl. + auto. + rewrite inj_S in H. rewrite update_o. apply IHn. + intros. apply H. omega. + apply H. omega. +Qed. + +Lemma set_cont_outside_inject: + forall c1 c2 lo hi delta, + contentmap_inject c1 c2 lo hi delta -> + forall n p, + (forall x1 x2, p <= x1 < p + Z_of_nat n -> + lo <= x2 < hi -> + x1 <> x2 + delta) -> + contentmap_inject c1 (set_cont n p c2) lo hi delta. +Proof. + unfold contentmap_inject. intros. + rewrite set_cont_outside1. apply H. assumption. + intros. apply H0. auto. auto. +Qed. + +Lemma setN_outside_inject: + forall n c1 c2 lo hi delta p v, + contentmap_inject c1 c2 lo hi delta -> + (forall x1 x2, p <= x1 < p + Z_of_nat (S n) -> + lo <= x2 < hi -> + x1 <> x2 + delta) -> + contentmap_inject c1 (setN n p v c2) lo hi delta. +Proof. + intros. unfold setN. red; intros. rewrite update_o. + apply set_cont_outside_inject with lo hi. auto. + intros. apply H0. rewrite inj_S. omega. auto. auto. + apply H0. rewrite inj_S. omega. auto. +Qed. + +Lemma store_contents_outside_inject: + forall c1 c2 lo hi delta sz p v, + contentmap_inject c1 c2 lo hi delta -> + (forall x1 x2, p <= x1 < p + size_mem sz -> + lo <= x2 < hi -> + x1 <> x2 + delta)-> + contentmap_inject c1 (store_contents sz c2 p v) lo hi delta. +Proof. + intros c1 c2 lo hi delta sz. generalize (size_mem_pos sz) . intro . + induction sz ;intros ;simpl ; apply setN_outside_inject ; assumption . +Qed. + +Lemma store_mapped_inject_1: + forall chunk m1 b1 ofs v1 n1 m2 b2 delta v2, + mem_inject m1 m2 -> + store chunk m1 b1 ofs v1 = Some n1 -> + f b1 = Some (b2, delta) -> + val_content_inject (mem_chunk chunk) v1 v2 -> + exists n2, + store chunk m2 b2 (ofs + delta) v2 = Some n2 + /\ mem_inject n1 n2. +Proof. +intros. +generalize (size_chunk_pos chunk); intro SIZEPOS. +generalize (store_inv _ _ _ _ _ _ H0). +intros [A [B [C [D [P E]]]]]. +inversion H. +generalize (mi_mappedblocks0 _ _ _ H1). intros [U V]. +inversion V. +generalize (in_bounds_inject _ _ _ _ _ V (conj B C)). intro BND. +exists (unchecked_store chunk m2 b2 (ofs+delta) v2 BND). +split. unfold store. +rewrite zlt_true; auto. +case (in_bounds chunk (ofs + delta) (blocks m2 b2)); intro. +assert (a = BND). apply proof_irrelevance. congruence. +omegaContradiction. +constructor. +intros. apply mi_freeblocks0. rewrite <- D. assumption. +intros. generalize (mi_mappedblocks0 b b' delta0 H3). +intros [W Y]. split. simpl. auto. +rewrite E; simpl. unfold update. +(* Cas 1: memes blocs b = b1 b'= b2 *) +case (zeq b b1); intro. +subst b. assert (b'= b2). congruence. subst b'. +assert (delta0 = delta). congruence. subst delta0. +rewrite zeq_true. inversion Y. constructor; simpl; auto. +apply store_contents_inject; auto. +(* Cas 2: blocs differents dans m1 mais mappes sur le meme bloc de m2 *) +case (zeq b' b2); intro. +subst b'. +inversion Y. constructor; simpl; auto. +generalize (store_contents_outside_inject _ _ _ _ _ (mem_chunk chunk) + (ofs+delta) v2 bci_contents1). +intros. +apply H4. +elim (mi_no_overlap0 b b1 b2 b2 delta0 delta n H3 H1). +tauto. +unfold high_bound, low_bound. intros. +apply sym_not_equal. replace x1 with ((x1 - delta) + delta). +apply H5. assumption. unfold size_chunk in C. omega. omega. +(* Cas 3: blocs differents dans m1 et dans m2 *) +auto. + +unfold high_bound, low_bound. +rewrite E; simpl; intros. +unfold high_bound, low_bound in mi_no_overlap0. +unfold update. +case (zeq b0 b1). +intro. subst b1. simpl. +rewrite zeq_false; auto. +intro. case (zeq b3 b1); intro. +subst b3. simpl. auto. +auto. +Qed. + +Lemma store_mapped_inject: + forall chunk m1 b1 ofs v1 n1 m2 b2 delta v2, + mem_inject m1 m2 -> + store chunk m1 b1 ofs v1 = Some n1 -> + f b1 = Some (b2, delta) -> + val_inject v1 v2 -> + exists n2, + store chunk m2 b2 (ofs + delta) v2 = Some n2 + /\ mem_inject n1 n2. +Proof. + intros. eapply store_mapped_inject_1; eauto. +Qed. + +Lemma store_unmapped_inject: + forall chunk m1 b1 ofs v1 n1 m2, + mem_inject m1 m2 -> + store chunk m1 b1 ofs v1 = Some n1 -> + f b1 = None -> + mem_inject n1 m2. +Proof. +intros. +inversion H. +generalize (store_inv _ _ _ _ _ _ H0). +intros [A [B [C [D [P E]]]]]. +constructor. +rewrite D. assumption. +intros. elim (mi_mappedblocks0 _ _ _ H2); intros. +split. auto. +rewrite E; unfold update. +rewrite zeq_false. assumption. +congruence. +intros. +assert (forall b, low_bound n1 b = low_bound m1 b). + intros. unfold low_bound; rewrite E; unfold update. + case (zeq b b1); intros. subst b1; reflexivity. reflexivity. +assert (forall b, high_bound n1 b = high_bound m1 b). + intros. unfold high_bound; rewrite E; unfold update. + case (zeq b b1); intros. subst b1; reflexivity. reflexivity. +repeat rewrite H5. repeat rewrite H6. auto. +Qed. + +Lemma storev_mapped_inject_1: + forall chunk m1 a1 v1 n1 m2 a2 v2, + mem_inject m1 m2 -> + storev chunk m1 a1 v1 = Some n1 -> + val_inject a1 a2 -> + val_content_inject (mem_chunk chunk) v1 v2 -> + exists n2, + storev chunk m2 a2 v2 = Some n2 /\ mem_inject n1 n2. +Proof. + intros. destruct a1; simpl in H0; try discriminate. + inversion H1. subst b. + simpl. replace (Int.signed ofs2) with (Int.signed i + x). + eapply store_mapped_inject_1; eauto. + symmetry. generalize (store_inv _ _ _ _ _ _ H0). intros [A [B [C [D [P E]]]]]. + apply address_inject with m1 m2 chunk b1 b2; auto. +Qed. + +Lemma storev_mapped_inject: + forall chunk m1 a1 v1 n1 m2 a2 v2, + mem_inject m1 m2 -> + storev chunk m1 a1 v1 = Some n1 -> + val_inject a1 a2 -> + val_inject v1 v2 -> + exists n2, + storev chunk m2 a2 v2 = Some n2 /\ mem_inject n1 n2. +Proof. + intros. eapply storev_mapped_inject_1; eauto. +Qed. + +(** Relation between injections and [free] *) + +Lemma free_first_inject : + forall m1 m2 b, + mem_inject m1 m2 -> + mem_inject (free m1 b) m2. +Proof. + intros. inversion H. constructor . auto. + simpl. intros. + generalize (mi_mappedblocks0 b0 b' delta H0). + intros [A B] . split. assumption . unfold update. + case (zeq b0 b); intro. inversion B. constructor; simpl; auto. + intros. omega. + unfold contentmap_inject. + intros. omegaContradiction. + auto. intros. + unfold free . unfold low_bound , high_bound. simpl. unfold update. + case (zeq b1 b);intro. simpl. + right. intros. omegaContradiction. + case (zeq b2 b);intro. simpl. + right . intros. omegaContradiction. + unfold low_bound, high_bound in mi_no_overlap0. auto. +Qed. + +Lemma free_first_list_inject : + forall l m1 m2, + mem_inject m1 m2 -> + mem_inject (free_list m1 l) m2. +Proof. + induction l; simpl; intros. + auto. + apply free_first_inject. auto. +Qed. + +Lemma free_snd_inject: + forall m1 m2 b, + (forall b1 delta, f b1 = Some(b, delta) -> + low_bound m1 b1 >= high_bound m1 b1) -> + mem_inject m1 m2 -> + mem_inject m1 (free m2 b). +Proof. + intros. inversion H0. constructor. auto. + simpl. intros. + generalize (mi_mappedblocks0 b0 b' delta H1). + intros [A B] . split. assumption . + inversion B. unfold update. + case (zeq b' b); intro. + subst b'. generalize (H _ _ H1). unfold low_bound, high_bound; intro. + constructor. auto. elim bci_range4 ; intro. + (**delta=0**) + left ; auto . + (** delta<>0**) + right . + simpl. compute. split; intro; congruence. + intros. omegaContradiction. + red; intros. omegaContradiction. + auto. + auto. +Qed. + +Lemma bounds_free_block: + forall m1 b m1' , free m1 b = m1' -> + low_bound m1' b >= high_bound m1' b. +Proof. + intros. unfold free in H. + rewrite<- H . unfold low_bound , high_bound . + simpl . rewrite update_s. simpl. omega. +Qed. + +Lemma free_empty_bounds: + forall l m1 , + let m1' := free_list m1 l in + (forall b, In b l -> low_bound m1' b >= high_bound m1' b). +Proof. + induction l . intros . inversion H. + intros. + generalize (in_inv H). + intro . elim H0. intro . subst b. simpl in m1' . + generalize ( bounds_free_block + (fold_right (fun (b : block) (m : mem) => free m b) m1 l) a m1') . + intros. apply H1. auto . intros. simpl in m1'. unfold m1' . + unfold low_bound , high_bound . simpl . + unfold update; case (zeq b a); intro; simpl. + omega . + unfold low_bound , high_bound in IHl . auto. +Qed. + +Lemma free_inject: + forall m1 m2 l b, + (forall b1 delta, f b1 = Some(b, delta) -> In b1 l) -> + mem_inject m1 m2 -> + mem_inject (free_list m1 l) (free m2 b). +Proof. + intros. apply free_snd_inject. + intros. apply free_empty_bounds. apply H with delta. auto. + apply free_first_list_inject. auto. +Qed. + +End MEM_INJECT. + +Hint Resolve val_inject_int val_inject_float val_inject_ptr val_inject_undef. +Hint Resolve val_nil_inject val_cons_inject. + +(** Monotonicity properties of memory injections. *) + +Definition inject_incr (f1 f2: meminj) : Prop := + forall b, f1 b = f2 b \/ f1 b = None. + +Lemma inject_incr_refl : + forall f , inject_incr f f . +Proof. unfold inject_incr . intros. left . auto . Qed. + +Lemma inject_incr_trans : + forall f1 f2 f3 , + inject_incr f1 f2 -> inject_incr f2 f3 -> inject_incr f1 f3 . +Proof . + unfold inject_incr . intros . + generalize (H b) . intro . generalize (H0 b) . intro . + elim H1 ; elim H2 ; intros. + rewrite H3 in H4 . left . auto . + rewrite H3 in H4 . right . auto . + right ; auto . + right ; auto . +Qed. + +Lemma val_inject_incr: + forall f1 f2 v v', + inject_incr f1 f2 -> + val_inject f1 v v' -> + val_inject f2 v v'. +Proof. + intros. inversion H0. + constructor. + constructor. + elim (H b1); intro. + apply val_inject_ptr with x. congruence. auto. + congruence. + constructor. +Qed. + +Lemma val_list_inject_incr: + forall f1 f2 vl vl' , + inject_incr f1 f2 -> val_list_inject f1 vl vl' -> + val_list_inject f2 vl vl'. +Proof. + induction vl ; intros; inversion H0. auto . + subst a . generalize (val_inject_incr f1 f2 v v' H H3) . + intro . auto . +Qed. + +Hint Resolve inject_incr_refl val_inject_incr val_list_inject_incr. + +Section MEM_INJECT_INCR. + +Variable f1 f2: meminj. +Hypothesis INCR: inject_incr f1 f2. + +Lemma val_content_inject_incr: + forall chunk v v', + val_content_inject f1 chunk v v' -> + val_content_inject f2 chunk v v'. +Proof. + intros. inversion H. + apply val_content_inject_base. eapply val_inject_incr; eauto. + apply val_content_inject_8; auto. + apply val_content_inject_16; auto. + apply val_content_inject_32; auto. +Qed. + +Lemma content_inject_incr: + forall c c', content_inject f1 c c' -> content_inject f2 c c'. +Proof. + induction 1; constructor; eapply val_content_inject_incr; eauto. +Qed. + +Lemma contentmap_inject_incr: + forall c c' lo hi delta, + contentmap_inject f1 c c' lo hi delta -> + contentmap_inject f2 c c' lo hi delta. +Proof. + unfold contentmap_inject; intros. + apply content_inject_incr. auto. +Qed. + +Lemma block_contents_inject_incr: + forall c c' delta, + block_contents_inject f1 c c' delta -> + block_contents_inject f2 c c' delta. +Proof. + intros. inversion H. constructor; auto. + apply contentmap_inject_incr; auto. +Qed. + +End MEM_INJECT_INCR. + +(** Properties of injections and allocations. *) + +Definition extend_inject + (b: block) (x: option (block * Z)) (f: meminj) : meminj := + fun b' => if eq_block b' b then x else f b'. + +Lemma extend_inject_incr: + forall f b x, + f b = None -> + inject_incr f (extend_inject b x f). +Proof. + intros; red; intros. unfold extend_inject. + case (eq_block b0 b); intro. subst b0. right; auto. left; auto. +Qed. + +Lemma alloc_right_inject: + forall f m1 m2 lo hi m2' b, + mem_inject f m1 m2 -> + alloc m2 lo hi = (m2', b) -> + mem_inject f m1 m2'. +Proof. + intros. unfold alloc in H0. injection H0; intros A B; clear H0. + inversion H. + constructor. + assumption. + intros. generalize (mi_mappedblocks0 _ _ _ H0). intros [C D]. + rewrite <- B; simpl. split. omega. + rewrite update_o. auto. omega. + assumption. +Qed. + +Lemma alloc_unmapped_inject: + forall f m1 m2 lo hi m1' b, + mem_inject f m1 m2 -> + alloc m1 lo hi = (m1', b) -> + mem_inject (extend_inject b None f) m1' m2 /\ + inject_incr f (extend_inject b None f). +Proof. + intros. unfold alloc in H0. injection H0; intros; clear H0. + inversion H. + assert (inject_incr f (extend_inject b None f)). + apply extend_inject_incr. apply mi_freeblocks0. rewrite H1. omega. + split; auto. + constructor. + rewrite <- H2; simpl; intros. unfold extend_inject. + case (eq_block b0 b); intro. auto. apply mi_freeblocks0. omega. + intros until delta. unfold extend_inject at 1. case (eq_block b0 b); intro. + intros; discriminate. + intros. rewrite <- H2; simpl. rewrite H1. + rewrite update_o; auto. + elim (mi_mappedblocks0 _ _ _ H3). intros A B. + split. auto. apply block_contents_inject_incr with f. auto. auto. + intros until delta2. unfold extend_inject. + case (eq_block b1 b); intro. congruence. + case (eq_block b2 b); intro. congruence. + rewrite <- H2. unfold low_bound, high_bound; simpl. rewrite H1. + repeat rewrite update_o; auto. + exact (mi_no_overlap0 b1 b2 b1' b2' delta1 delta2). +Qed. + +Lemma alloc_mapped_inject: + forall f m1 m2 lo hi m1' b b' ofs, + mem_inject f m1 m2 -> + alloc m1 lo hi = (m1', b) -> + valid_block m2 b' -> + Int.min_signed <= ofs <= Int.max_signed -> + Int.min_signed <= low_bound m2 b' -> + high_bound m2 b' <= Int.max_signed -> + low_bound m2 b' <= lo + ofs -> + hi + ofs <= high_bound m2 b' -> + (forall b0 ofs0, + f b0 = Some (b', ofs0) -> + high_bound m1 b0 + ofs0 <= lo + ofs \/ + hi + ofs <= low_bound m1 b0 + ofs0) -> + mem_inject (extend_inject b (Some (b', ofs)) f) m1' m2 /\ + inject_incr f (extend_inject b (Some (b', ofs)) f). +Proof. + intros. + generalize (fun b' => low_bound_alloc _ _ b' _ _ _ H0). + intro LOW. + generalize (fun b' => high_bound_alloc _ _ b' _ _ _ H0). + intro HIGH. + unfold alloc in H0. injection H0; intros A B; clear H0. + inversion H. + (* inject_incr *) + assert (inject_incr f (extend_inject b (Some (b', ofs)) f)). + apply extend_inject_incr. apply mi_freeblocks0. rewrite A. omega. + split; auto. + constructor. + (* mi_freeblocks *) + rewrite <- B; simpl; intros. unfold extend_inject. + case (eq_block b0 b); intro. unfold block in *. omegaContradiction. + apply mi_freeblocks0. omega. + (* mi_mappedblocks *) + intros until delta. unfold extend_inject at 1. + case (eq_block b0 b); intro. + intros. subst b0. inversion H8. subst b'0; subst delta. + split. assumption. + rewrite <- B; simpl. rewrite A. rewrite update_s. + constructor; auto. + unfold empty_block. simpl. intros. unfold low_bound in H5. unfold high_bound in H6. omega. + simpl. red; intros. constructor. + intros. + generalize (mi_mappedblocks0 _ _ _ H8). intros [C D]. + split. auto. + rewrite <- B; simpl; rewrite A; rewrite update_o; auto. + apply block_contents_inject_incr with f; auto. + (* no overlap *) + intros until delta2. unfold extend_inject. + repeat rewrite LOW. repeat rewrite HIGH. unfold eq_block. + case (zeq b1 b); case (zeq b2 b); intros. + congruence. + inversion H9. subst b1'; subst delta1. + case (eq_block b' b2'); intro. + subst b2'. generalize (H7 _ _ H10). intro. + right. intros. omega. left. auto. + inversion H10. subst b2'; subst delta2. + case (eq_block b' b1'); intro. + subst b1'. generalize (H7 _ _ H9). intro. + right. intros. omega. left. auto. + apply mi_no_overlap0; auto. +Qed. diff --git a/backend/Op.v b/backend/Op.v new file mode 100644 index 00000000..e0dcfa46 --- /dev/null +++ b/backend/Op.v @@ -0,0 +1,691 @@ +(** Operators and addressing modes. The abstract syntax and dynamic + semantics for the Cminor, RTL, LTL and Mach languages depend on the + following types, defined in this library: +- [condition]: boolean conditions for conditional branches; +- [operation]: arithmetic and logical operations; +- [addressing]: addressing modes for load and store operations. + + These types are PowerPC-specific and correspond roughly to what the + processor can compute in one instruction. In other terms, these + types reflect the state of the program after instruction selection. + For a processor-independent set of operations, see the abstract + syntax and dynamic semantics of the Csharpminor input language. +*) + +Require Import Coqlib. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. + +Set Implicit Arguments. + +(** Conditions (boolean-valued operators). *) + +Inductive condition : Set := + | Ccomp: comparison -> condition (**r signed integer comparison *) + | Ccompu: comparison -> condition (**r unsigned integer comparison *) + | Ccompimm: comparison -> int -> condition (**r signed integer comparison with a constant *) + | Ccompuimm: comparison -> int -> condition (**r unsigned integer comparison with a constant *) + | Ccompf: comparison -> condition (**r floating-point comparison *) + | Cnotcompf: comparison -> condition (**r negation of a floating-point comparison *) + | Cmaskzero: int -> condition (**r test [(arg & constant) == 0] *) + | Cmasknotzero: int -> condition. (**r test [(arg & constant) != 0] *) + +(** Arithmetic and logical operations. In the descriptions, [rd] is the + result of the operation and [r1], [r2], etc, are the arguments. *) + +Inductive operation : Set := + | Omove: operation (**r [rd = r1] *) + | Ointconst: int -> operation (**r [rd] is set to the given integer constant *) + | Ofloatconst: float -> operation (**r [rd] is set to the given float constant *) + | Oaddrsymbol: ident -> int -> operation (**r [rd] is set to the the address of the symbol plus the offset *) + | Oaddrstack: int -> operation (**r [rd] is set to the stack pointer plus the given offset *) + | Oundef: operation (**r set [rd] to undefined value *) +(*c Integer arithmetic: *) + | Ocast8signed: operation (**r [rd] is 8-bit sign extension of [r1] *) + | Ocast16signed: operation (**r [rd] is 16-bit sign extension of [r1] *) + | Oadd: operation (**r [rd = r1 + r2] *) + | Oaddimm: int -> operation (**r [rd = r1 + n] *) + | Osub: operation (**r [rd = r1 - r2] *) + | Osubimm: int -> operation (**r [rd = n - r1] *) + | Omul: operation (**r [rd = r1 * r2] *) + | Omulimm: int -> operation (**r [rd = r1 * n] *) + | Odiv: operation (**r [rd = r1 / r2] (signed) *) + | Odivu: operation (**r [rd = r1 / r2] (unsigned) *) + | Oand: operation (**r [rd = r1 & r2] *) + | Oandimm: int -> operation (**r [rd = r1 & n] *) + | Oor: operation (**r [rd = r1 | r2] *) + | Oorimm: int -> operation (**r [rd = r1 | n] *) + | Oxor: operation (**r [rd = r1 ^ r2] *) + | Oxorimm: int -> operation (**r [rd = r1 ^ n] *) + | Onand: operation (**r [rd = ~(r1 & r2)] *) + | Onor: operation (**r [rd = ~(r1 | r2)] *) + | Onxor: operation (**r [rd = ~(r1 ^ r2)] *) + | Oshl: operation (**r [rd = r1 << r2] *) + | Oshr: operation (**r [rd = r1 >> r2] (signed) *) + | Oshrimm: int -> operation (**r [rd = r1 >> n] (signed) *) + | Oshrximm: int -> operation (**r [rd = r1 / 2^n] (signed) *) + | Oshru: operation (**r [rd = r1 >> r2] (unsigned) *) + | Orolm: int -> int -> operation (**r rotate left and mask *) +(*c Floating-point arithmetic: *) + | Onegf: operation (**r [rd = - r1] *) + | Oabsf: operation (**r [rd = abs(r1)] *) + | Oaddf: operation (**r [rd = r1 + r2] *) + | Osubf: operation (**r [rd = r1 - r2] *) + | Omulf: operation (**r [rd = r1 * r2] *) + | Odivf: operation (**r [rd = r1 / r2] *) + | Omuladdf: operation (**r [rd = r1 * r2 + r3] *) + | Omulsubf: operation (**r [rd = r1 * r2 - r3] *) + | Osingleoffloat: operation (**r [rd] is [r1] truncated to single-precision float *) +(*c Conversions between int and float: *) + | Ointoffloat: operation (**r [rd = int_of_float(r1)] *) + | Ofloatofint: operation (**r [rd = float_of_signed_int(r1)] *) + | Ofloatofintu: operation (**r [rd = float_of_unsigned_int(r1)] *) +(*c Boolean tests: *) + | Ocmp: condition -> operation. (**r [rd = 1] if condition holds, [rd = 0] otherwise. *) + +(** Addressing modes. [r1], [r2], etc, are the arguments to the + addressing. *) + +Inductive addressing: Set := + | Aindexed: int -> addressing (**r Address is [r1 + offset] *) + | Aindexed2: addressing (**r Address is [r1 + r2] *) + | Aglobal: ident -> int -> addressing (**r Address is [symbol + offset] *) + | Abased: ident -> int -> addressing (**r Address is [symbol + offset + r1] *) + | Ainstack: int -> addressing. (**r Address is [stack_pointer + offset] *) + +(** Evaluation of conditions, operators and addressing modes applied + to lists of values. Return [None] when the computation is undefined: + wrong number of arguments, arguments of the wrong types, undefined + operations such as division by zero. [eval_condition] returns a boolean, + [eval_operation] and [eval_addressing] return a value. *) + +Definition eval_compare_null (c: comparison) (n: int) : option bool := + if Int.eq n Int.zero + then match c with Ceq => Some false | Cne => Some true | _ => None end + else None. + +Definition eval_condition (cond: condition) (vl: list val) : option bool := + match cond, vl with + | Ccomp c, Vint n1 :: Vint n2 :: nil => + Some (Int.cmp c n1 n2) + | Ccomp c, Vptr b1 n1 :: Vptr b2 n2 :: nil => + if eq_block b1 b2 then Some (Int.cmp c n1 n2) else None + | Ccomp c, Vptr b1 n1 :: Vint n2 :: nil => + eval_compare_null c n2 + | Ccomp c, Vint n1 :: Vptr b2 n2 :: nil => + eval_compare_null c n1 + | Ccompu c, Vint n1 :: Vint n2 :: nil => + Some (Int.cmpu c n1 n2) + | Ccompu c, Vptr b1 n1 :: Vptr b2 n2 :: nil => + if eq_block b1 b2 then Some (Int.cmpu c n1 n2) else None + | Ccompu c, Vptr b1 n1 :: Vint n2 :: nil => + eval_compare_null c n2 + | Ccompu c, Vint n1 :: Vptr b2 n2 :: nil => + eval_compare_null c n1 + | Ccompimm c n, Vint n1 :: nil => + Some (Int.cmp c n1 n) + | Ccompimm c n, Vptr b1 n1 :: nil => + eval_compare_null c n + | Ccompuimm c n, Vint n1 :: nil => + Some (Int.cmpu c n1 n) + | Ccompuimm c n, Vptr b1 n1 :: nil => + eval_compare_null c n + | Ccompf c, Vfloat f1 :: Vfloat f2 :: nil => + Some (Float.cmp c f1 f2) + | Cnotcompf c, Vfloat f1 :: Vfloat f2 :: nil => + Some (negb (Float.cmp c f1 f2)) + | Cmaskzero n, Vint n1 :: nil => + Some (Int.eq (Int.and n1 n) Int.zero) + | Cmasknotzero n, Vint n1 :: nil => + Some (negb (Int.eq (Int.and n1 n) Int.zero)) + | _, _ => + None + end. + +Definition offset_sp (sp: val) (delta: int) : option val := + match sp with + | Vptr b n => Some (Vptr b (Int.add n delta)) + | _ => None + end. + +Definition eval_operation + (F: Set) (genv: Genv.t F) (sp: val) + (op: operation) (vl: list val) : option val := + match op, vl with + | Omove, v1::nil => Some v1 + | Ointconst n, nil => Some (Vint n) + | Ofloatconst n, nil => Some (Vfloat n) + | Oaddrsymbol s ofs, nil => + match Genv.find_symbol genv s with + | None => None + | Some b => Some (Vptr b ofs) + end + | Oaddrstack ofs, nil => offset_sp sp ofs + | Oundef, nil => Some Vundef + | Ocast8signed, Vint n1 :: nil => Some (Vint (Int.cast8signed n1)) + | Ocast16signed, Vint n1 :: nil => Some (Vint (Int.cast16signed n1)) + | Oadd, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.add n1 n2)) + | Oadd, Vint n1 :: Vptr b2 n2 :: nil => Some (Vptr b2 (Int.add n2 n1)) + | Oadd, Vptr b1 n1 :: Vint n2 :: nil => Some (Vptr b1 (Int.add n1 n2)) + | Oaddimm n, Vint n1 :: nil => Some (Vint (Int.add n1 n)) + | Oaddimm n, Vptr b1 n1 :: nil => Some (Vptr b1 (Int.add n1 n)) + | Osub, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.sub n1 n2)) + | Osub, Vptr b1 n1 :: Vint n2 :: nil => Some (Vptr b1 (Int.sub n1 n2)) + | Osub, Vptr b1 n1 :: Vptr b2 n2 :: nil => + if eq_block b1 b2 then Some (Vint (Int.sub n1 n2)) else None + | Osubimm n, Vint n1 :: nil => Some (Vint (Int.sub n n1)) + | Omul, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.mul n1 n2)) + | Omulimm n, Vint n1 :: nil => Some (Vint (Int.mul n1 n)) + | Odiv, Vint n1 :: Vint n2 :: nil => + if Int.eq n2 Int.zero then None else Some (Vint (Int.divs n1 n2)) + | Odivu, Vint n1 :: Vint n2 :: nil => + if Int.eq n2 Int.zero then None else Some (Vint (Int.divu n1 n2)) + | Oand, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.and n1 n2)) + | Oandimm n, Vint n1 :: nil => Some (Vint (Int.and n1 n)) + | Oor, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.or n1 n2)) + | Oorimm n, Vint n1 :: nil => Some (Vint (Int.or n1 n)) + | Oxor, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.xor n1 n2)) + | Oxorimm n, Vint n1 :: nil => Some (Vint (Int.xor n1 n)) + | Onand, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.not (Int.and n1 n2))) + | Onor, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.not (Int.or n1 n2))) + | Onxor, Vint n1 :: Vint n2 :: nil => Some (Vint (Int.not (Int.xor n1 n2))) + | Oshl, Vint n1 :: Vint n2 :: nil => + if Int.ltu n2 (Int.repr 32) then Some (Vint (Int.shl n1 n2)) else None + | Oshr, Vint n1 :: Vint n2 :: nil => + if Int.ltu n2 (Int.repr 32) then Some (Vint (Int.shr n1 n2)) else None + | Oshrimm n, Vint n1 :: nil => + if Int.ltu n (Int.repr 32) then Some (Vint (Int.shr n1 n)) else None + | Oshrximm n, Vint n1 :: nil => + if Int.ltu n (Int.repr 32) then Some (Vint (Int.shrx n1 n)) else None + | Oshru, Vint n1 :: Vint n2 :: nil => + if Int.ltu n2 (Int.repr 32) then Some (Vint (Int.shru n1 n2)) else None + | Orolm amount mask, Vint n1 :: nil => + Some (Vint (Int.rolm n1 amount mask)) + | Onegf, Vfloat f1 :: nil => Some (Vfloat (Float.neg f1)) + | Oabsf, Vfloat f1 :: nil => Some (Vfloat (Float.abs f1)) + | Oaddf, Vfloat f1 :: Vfloat f2 :: nil => Some (Vfloat (Float.add f1 f2)) + | Osubf, Vfloat f1 :: Vfloat f2 :: nil => Some (Vfloat (Float.sub f1 f2)) + | Omulf, Vfloat f1 :: Vfloat f2 :: nil => Some (Vfloat (Float.mul f1 f2)) + | Odivf, Vfloat f1 :: Vfloat f2 :: nil => Some (Vfloat (Float.div f1 f2)) + | Omuladdf, Vfloat f1 :: Vfloat f2 :: Vfloat f3 :: nil => + Some (Vfloat (Float.add (Float.mul f1 f2) f3)) + | Omulsubf, Vfloat f1 :: Vfloat f2 :: Vfloat f3 :: nil => + Some (Vfloat (Float.sub (Float.mul f1 f2) f3)) + | Osingleoffloat, Vfloat f1 :: nil => + Some (Vfloat (Float.singleoffloat f1)) + | Ointoffloat, Vfloat f1 :: nil => + Some (Vint (Float.intoffloat f1)) + | Ofloatofint, Vint n1 :: nil => + Some (Vfloat (Float.floatofint n1)) + | Ofloatofintu, Vint n1 :: nil => + Some (Vfloat (Float.floatofintu n1)) + | Ocmp c, _ => + match eval_condition c vl with + | None => None + | Some false => Some Vfalse + | Some true => Some Vtrue + end + | _, _ => None + end. + +Definition eval_addressing + (F: Set) (genv: Genv.t F) (sp: val) + (addr: addressing) (vl: list val) : option val := + match addr, vl with + | Aindexed n, Vptr b1 n1 :: nil => + Some (Vptr b1 (Int.add n1 n)) + | Aindexed2, Vptr b1 n1 :: Vint n2 :: nil => + Some (Vptr b1 (Int.add n1 n2)) + | Aindexed2, Vint n1 :: Vptr b2 n2 :: nil => + Some (Vptr b2 (Int.add n1 n2)) + | Aglobal s ofs, nil => + match Genv.find_symbol genv s with + | None => None + | Some b => Some (Vptr b ofs) + end + | Abased s ofs, Vint n1 :: nil => + match Genv.find_symbol genv s with + | None => None + | Some b => Some (Vptr b (Int.add ofs n1)) + end + | Ainstack ofs, nil => + offset_sp sp ofs + | _, _ => None + end. + +Definition negate_condition (cond: condition): condition := + match cond with + | Ccomp c => Ccomp(negate_comparison c) + | Ccompu c => Ccompu(negate_comparison c) + | Ccompimm c n => Ccompimm (negate_comparison c) n + | Ccompuimm c n => Ccompuimm (negate_comparison c) n + | Ccompf c => Cnotcompf c + | Cnotcompf c => Ccompf c + | Cmaskzero n => Cmasknotzero n + | Cmasknotzero n => Cmaskzero n + end. + +Ltac FuncInv := + match goal with + | H: (match ?x with nil => _ | _ :: _ => _ end = Some _) |- _ => + destruct x; simpl in H; try discriminate; FuncInv + | H: (match ?v with Vundef => _ | Vint _ => _ | Vfloat _ => _ | Vptr _ _ => _ end = Some _) |- _ => + destruct v; simpl in H; try discriminate; FuncInv + | H: (Some _ = Some _) |- _ => + injection H; intros; clear H; FuncInv + | _ => + idtac + end. + +Remark eval_negate_compare_null: + forall c n b, + eval_compare_null c n = Some b -> + eval_compare_null (negate_comparison c) n = Some (negb b). +Proof. + intros until b. unfold eval_compare_null. + case (Int.eq n Int.zero). + destruct c; intro EQ; simplify_eq EQ; intros; subst b; reflexivity. + intro; discriminate. +Qed. + +Lemma eval_negate_condition: + forall (cond: condition) (vl: list val) (b: bool), + eval_condition cond vl = Some b -> + eval_condition (negate_condition cond) vl = Some (negb b). +Proof. + intros. + destruct cond; simpl in H; FuncInv; try subst b; simpl. + rewrite Int.negate_cmp. auto. + apply eval_negate_compare_null; auto. + apply eval_negate_compare_null; auto. + destruct (eq_block b0 b1). rewrite Int.negate_cmp. congruence. + discriminate. + rewrite Int.negate_cmpu. auto. + apply eval_negate_compare_null; auto. + apply eval_negate_compare_null; auto. + destruct (eq_block b0 b1). rewrite Int.negate_cmpu. congruence. + discriminate. + rewrite Int.negate_cmp. auto. + apply eval_negate_compare_null; auto. + rewrite Int.negate_cmpu. auto. + apply eval_negate_compare_null; auto. + auto. + rewrite negb_elim. auto. + auto. + rewrite negb_elim. auto. +Qed. + +(** [eval_operation] and [eval_addressing] depend on a global environment + for resolving references to global symbols. We show that they give + the same results if a global environment is replaced by another that + assigns the same addresses to the same symbols. *) + +Section GENV_TRANSF. + +Variable F1 F2: Set. +Variable ge1: Genv.t F1. +Variable ge2: Genv.t F2. +Hypothesis agree_on_symbols: + forall (s: ident), Genv.find_symbol ge2 s = Genv.find_symbol ge1 s. + +Lemma eval_operation_preserved: + forall sp op vl, + eval_operation ge2 sp op vl = eval_operation ge1 sp op vl. +Proof. + intros. + unfold eval_operation; destruct op; try rewrite agree_on_symbols; + reflexivity. +Qed. + +Lemma eval_addressing_preserved: + forall sp addr vl, + eval_addressing ge2 sp addr vl = eval_addressing ge1 sp addr vl. +Proof. + intros. + unfold eval_addressing; destruct addr; try rewrite agree_on_symbols; + reflexivity. +Qed. + +End GENV_TRANSF. + +(** Recognition of move operations. *) + +Definition is_move_operation + (A: Set) (op: operation) (args: list A) : option A := + match op, args with + | Omove, arg :: nil => Some arg + | _, _ => None + end. + +Lemma is_move_operation_correct: + forall (A: Set) (op: operation) (args: list A) (a: A), + is_move_operation op args = Some a -> + op = Omove /\ args = a :: nil. +Proof. + intros until a. unfold is_move_operation; destruct op; + try (intros; discriminate). + destruct args. intros; discriminate. + destruct args. intros. intuition congruence. + intros; discriminate. +Qed. + +(** Static typing of conditions, operators and addressing modes. *) + +Definition type_of_condition (c: condition) : list typ := + match c with + | Ccomp _ => Tint :: Tint :: nil + | Ccompu _ => Tint :: Tint :: nil + | Ccompimm _ _ => Tint :: nil + | Ccompuimm _ _ => Tint :: nil + | Ccompf _ => Tfloat :: Tfloat :: nil + | Cnotcompf _ => Tfloat :: Tfloat :: nil + | Cmaskzero _ => Tint :: nil + | Cmasknotzero _ => Tint :: nil + end. + +Definition type_of_operation (op: operation) : list typ * typ := + match op with + | Omove => (nil, Tint) (* treated specially *) + | Ointconst _ => (nil, Tint) + | Ofloatconst _ => (nil, Tfloat) + | Oaddrsymbol _ _ => (nil, Tint) + | Oaddrstack _ => (nil, Tint) + | Oundef => (nil, Tint) (* treated specially *) + | Ocast8signed => (Tint :: nil, Tint) + | Ocast16signed => (Tint :: nil, Tint) + | Oadd => (Tint :: Tint :: nil, Tint) + | Oaddimm _ => (Tint :: nil, Tint) + | Osub => (Tint :: Tint :: nil, Tint) + | Osubimm _ => (Tint :: nil, Tint) + | Omul => (Tint :: Tint :: nil, Tint) + | Omulimm _ => (Tint :: nil, Tint) + | Odiv => (Tint :: Tint :: nil, Tint) + | Odivu => (Tint :: Tint :: nil, Tint) + | Oand => (Tint :: Tint :: nil, Tint) + | Oandimm _ => (Tint :: nil, Tint) + | Oor => (Tint :: Tint :: nil, Tint) + | Oorimm _ => (Tint :: nil, Tint) + | Oxor => (Tint :: Tint :: nil, Tint) + | Oxorimm _ => (Tint :: nil, Tint) + | Onand => (Tint :: Tint :: nil, Tint) + | Onor => (Tint :: Tint :: nil, Tint) + | Onxor => (Tint :: Tint :: nil, Tint) + | Oshl => (Tint :: Tint :: nil, Tint) + | Oshr => (Tint :: Tint :: nil, Tint) + | Oshrimm _ => (Tint :: nil, Tint) + | Oshrximm _ => (Tint :: nil, Tint) + | Oshru => (Tint :: Tint :: nil, Tint) + | Orolm _ _ => (Tint :: nil, Tint) + | Onegf => (Tfloat :: nil, Tfloat) + | Oabsf => (Tfloat :: nil, Tfloat) + | Oaddf => (Tfloat :: Tfloat :: nil, Tfloat) + | Osubf => (Tfloat :: Tfloat :: nil, Tfloat) + | Omulf => (Tfloat :: Tfloat :: nil, Tfloat) + | Odivf => (Tfloat :: Tfloat :: nil, Tfloat) + | Omuladdf => (Tfloat :: Tfloat :: Tfloat :: nil, Tfloat) + | Omulsubf => (Tfloat :: Tfloat :: Tfloat :: nil, Tfloat) + | Osingleoffloat => (Tfloat :: nil, Tfloat) + | Ointoffloat => (Tfloat :: nil, Tint) + | Ofloatofint => (Tint :: nil, Tfloat) + | Ofloatofintu => (Tint :: nil, Tfloat) + | Ocmp c => (type_of_condition c, Tint) + end. + +Definition type_of_addressing (addr: addressing) : list typ := + match addr with + | Aindexed _ => Tint :: nil + | Aindexed2 => Tint :: Tint :: nil + | Aglobal _ _ => nil + | Abased _ _ => Tint :: nil + | Ainstack _ => nil + end. + +Definition type_of_chunk (c: memory_chunk) : typ := + match c with + | Mint8signed => Tint + | Mint8unsigned => Tint + | Mint16signed => Tint + | Mint16unsigned => Tint + | Mint32 => Tint + | Mfloat32 => Tfloat + | Mfloat64 => Tfloat + end. + +(** Weak type soundness results for [eval_operation]: + the result values, when defined, are always of the type predicted + by [type_of_operation]. *) + +Section SOUNDNESS. + +Variable A: Set. +Variable genv: Genv.t A. + +Lemma type_of_operation_sound: + forall op vl sp v, + op <> Omove -> op <> Oundef -> + eval_operation genv sp op vl = Some v -> + Val.has_type v (snd (type_of_operation op)). +Proof. + intros. + destruct op; simpl in H1; FuncInv; try subst v; try exact I. + congruence. + destruct (Genv.find_symbol genv i); simplify_eq H1; intro; subst v; exact I. + simpl. unfold offset_sp in H1. destruct sp; try discriminate. + inversion H1. exact I. + destruct (eq_block b b0). injection H1; intro; subst v; exact I. + discriminate. + destruct (Int.eq i0 Int.zero). discriminate. + injection H1; intro; subst v; exact I. + destruct (Int.eq i0 Int.zero). discriminate. + injection H1; intro; subst v; exact I. + destruct (Int.ltu i0 (Int.repr 32)). + injection H1; intro; subst v; exact I. discriminate. + destruct (Int.ltu i0 (Int.repr 32)). + injection H1; intro; subst v; exact I. discriminate. + destruct (Int.ltu i (Int.repr 32)). + injection H1; intro; subst v; exact I. discriminate. + destruct (Int.ltu i (Int.repr 32)). + injection H1; intro; subst v; exact I. discriminate. + destruct (Int.ltu i0 (Int.repr 32)). + injection H1; intro; subst v; exact I. discriminate. + destruct (eval_condition c vl). + destruct b; injection H1; intro; subst v; exact I. + discriminate. +Qed. + +Lemma type_of_chunk_correct: + forall chunk m addr v, + Mem.loadv chunk m addr = Some v -> + Val.has_type v (type_of_chunk chunk). +Proof. + intro chunk. + assert (forall v, Val.has_type (Val.load_result chunk v) (type_of_chunk chunk)). + destruct v; destruct chunk; exact I. + intros until v. unfold Mem.loadv. + destruct addr; intros; try discriminate. + generalize (Mem.load_inv _ _ _ _ _ H0). + intros [X [Y [Z W]]]. subst v. apply H. +Qed. + +End SOUNDNESS. + +(** Alternate definition of [eval_condition], [eval_op], [eval_addressing] + as total functions that return [Vundef] when not applicable + (instead of [None]). Used in the proof of [PPCgen]. *) + +Section EVAL_OP_TOTAL. + +Variable F: Set. +Variable genv: Genv.t F. + +Definition find_symbol_offset (id: ident) (ofs: int) : val := + match Genv.find_symbol genv id with + | Some b => Vptr b ofs + | None => Vundef + end. + +Definition eval_condition_total (cond: condition) (vl: list val) : val := + match cond, vl with + | Ccomp c, v1::v2::nil => Val.cmp c v1 v2 + | Ccompu c, v1::v2::nil => Val.cmpu c v1 v2 + | Ccompimm c n, v1::nil => Val.cmp c v1 (Vint n) + | Ccompuimm c n, v1::nil => Val.cmpu c v1 (Vint n) + | Ccompf c, v1::v2::nil => Val.cmpf c v1 v2 + | Cnotcompf c, v1::v2::nil => Val.notbool(Val.cmpf c v1 v2) + | Cmaskzero n, v1::nil => Val.notbool (Val.and v1 (Vint n)) + | Cmasknotzero n, v1::nil => Val.notbool(Val.notbool (Val.and v1 (Vint n))) + | _, _ => Vundef + end. + +Definition eval_operation_total (sp: val) (op: operation) (vl: list val) : val := + match op, vl with + | Omove, v1::nil => v1 + | Ointconst n, nil => Vint n + | Ofloatconst n, nil => Vfloat n + | Oaddrsymbol s ofs, nil => find_symbol_offset s ofs + | Oaddrstack ofs, nil => Val.add sp (Vint ofs) + | Oundef, nil => Vundef + | Ocast8signed, v1::nil => Val.cast8signed v1 + | Ocast16signed, v1::nil => Val.cast16signed v1 + | Oadd, v1::v2::nil => Val.add v1 v2 + | Oaddimm n, v1::nil => Val.add v1 (Vint n) + | Osub, v1::v2::nil => Val.sub v1 v2 + | Osubimm n, v1::nil => Val.sub (Vint n) v1 + | Omul, v1::v2::nil => Val.mul v1 v2 + | Omulimm n, v1::nil => Val.mul v1 (Vint n) + | Odiv, v1::v2::nil => Val.divs v1 v2 + | Odivu, v1::v2::nil => Val.divu v1 v2 + | Oand, v1::v2::nil => Val.and v1 v2 + | Oandimm n, v1::nil => Val.and v1 (Vint n) + | Oor, v1::v2::nil => Val.or v1 v2 + | Oorimm n, v1::nil => Val.or v1 (Vint n) + | Oxor, v1::v2::nil => Val.xor v1 v2 + | Oxorimm n, v1::nil => Val.xor v1 (Vint n) + | Onand, v1::v2::nil => Val.notint(Val.and v1 v2) + | Onor, v1::v2::nil => Val.notint(Val.or v1 v2) + | Onxor, v1::v2::nil => Val.notint(Val.xor v1 v2) + | Oshl, v1::v2::nil => Val.shl v1 v2 + | Oshr, v1::v2::nil => Val.shr v1 v2 + | Oshrimm n, v1::nil => Val.shr v1 (Vint n) + | Oshrximm n, v1::nil => Val.shrx v1 (Vint n) + | Oshru, v1::v2::nil => Val.shru v1 v2 + | Orolm amount mask, v1::nil => Val.rolm v1 amount mask + | Onegf, v1::nil => Val.negf v1 + | Oabsf, v1::nil => Val.absf v1 + | Oaddf, v1::v2::nil => Val.addf v1 v2 + | Osubf, v1::v2::nil => Val.subf v1 v2 + | Omulf, v1::v2::nil => Val.mulf v1 v2 + | Odivf, v1::v2::nil => Val.divf v1 v2 + | Omuladdf, v1::v2::v3::nil => Val.addf (Val.mulf v1 v2) v3 + | Omulsubf, v1::v2::v3::nil => Val.subf (Val.mulf v1 v2) v3 + | Osingleoffloat, v1::nil => Val.singleoffloat v1 + | Ointoffloat, v1::nil => Val.intoffloat v1 + | Ofloatofint, v1::nil => Val.floatofint v1 + | Ofloatofintu, v1::nil => Val.floatofintu v1 + | Ocmp c, _ => eval_condition_total c vl + | _, _ => Vundef + end. + +Definition eval_addressing_total + (sp: val) (addr: addressing) (vl: list val) : val := + match addr, vl with + | Aindexed n, v1::nil => Val.add v1 (Vint n) + | Aindexed2, v1::v2::nil => Val.add v1 v2 + | Aglobal s ofs, nil => find_symbol_offset s ofs + | Abased s ofs, v1::nil => Val.add (find_symbol_offset s ofs) v1 + | Ainstack ofs, nil => Val.add sp (Vint ofs) + | _, _ => Vundef + end. + +Lemma eval_compare_null_weaken: + forall c i b, + eval_compare_null c i = Some b -> + (if Int.eq i Int.zero then Val.cmp_mismatch c else Vundef) = Val.of_bool b. +Proof. + unfold eval_compare_null. intros. + destruct (Int.eq i Int.zero); try discriminate. + destruct c; try discriminate; inversion H; reflexivity. +Qed. + +Lemma eval_condition_weaken: + forall c vl b, + eval_condition c vl = Some b -> + eval_condition_total c vl = Val.of_bool b. +Proof. + intros. + unfold eval_condition in H; destruct c; FuncInv; + try subst b; try reflexivity; simpl; + try (apply eval_compare_null_weaken; auto). + unfold eq_block in H. destruct (zeq b0 b1); congruence. + unfold eq_block in H. destruct (zeq b0 b1); congruence. + symmetry. apply Val.notbool_negb_1. + symmetry. apply Val.notbool_negb_1. +Qed. + +Lemma eval_operation_weaken: + forall sp op vl v, + eval_operation genv sp op vl = Some v -> + eval_operation_total sp op vl = v. +Proof. + intros. + unfold eval_operation in H; destruct op; FuncInv; + try subst v; try reflexivity; simpl. + unfold find_symbol_offset. + destruct (Genv.find_symbol genv i); try discriminate. + congruence. + unfold offset_sp in H. + destruct sp; try discriminate. simpl. congruence. + unfold eq_block in H. destruct (zeq b b0); congruence. + destruct (Int.eq i0 Int.zero); congruence. + destruct (Int.eq i0 Int.zero); congruence. + destruct (Int.ltu i0 (Int.repr 32)); congruence. + destruct (Int.ltu i0 (Int.repr 32)); congruence. + destruct (Int.ltu i (Int.repr 32)); congruence. + destruct (Int.ltu i (Int.repr 32)); congruence. + destruct (Int.ltu i0 (Int.repr 32)); congruence. + caseEq (eval_condition c vl); intros; rewrite H0 in H. + replace v with (Val.of_bool b). + apply eval_condition_weaken; auto. + destruct b; simpl; congruence. + discriminate. +Qed. + +Lemma eval_addressing_weaken: + forall sp addr vl v, + eval_addressing genv sp addr vl = Some v -> + eval_addressing_total sp addr vl = v. +Proof. + intros. + unfold eval_addressing in H; destruct addr; FuncInv; + try subst v; simpl; try reflexivity. + decEq. apply Int.add_commut. + unfold find_symbol_offset. + destruct (Genv.find_symbol genv i); congruence. + unfold find_symbol_offset. + destruct (Genv.find_symbol genv i); try congruence. + inversion H. reflexivity. + unfold offset_sp in H. destruct sp; simpl; congruence. +Qed. + +Lemma eval_condition_total_is_bool: + forall cond vl, Val.is_bool (eval_condition_total cond vl). +Proof. + intros; destruct cond; + destruct vl; try apply Val.undef_is_bool; + destruct vl; try apply Val.undef_is_bool; + try (destruct vl; try apply Val.undef_is_bool); simpl. + apply Val.cmp_is_bool. + apply Val.cmpu_is_bool. + apply Val.cmp_is_bool. + apply Val.cmpu_is_bool. + apply Val.cmpf_is_bool. + apply Val.notbool_is_bool. + apply Val.notbool_is_bool. + apply Val.notbool_is_bool. +Qed. + +End EVAL_OP_TOTAL. diff --git a/backend/PPC.v b/backend/PPC.v new file mode 100644 index 00000000..64bd90a8 --- /dev/null +++ b/backend/PPC.v @@ -0,0 +1,775 @@ +(** Abstract syntax and semantics for PowerPC assembly language *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. + +(** * Abstract syntax *) + +(** Integer registers, floating-point registers. *) + +Inductive ireg: Set := + | GPR0: ireg | GPR1: ireg | GPR2: ireg | GPR3: ireg + | GPR4: ireg | GPR5: ireg | GPR6: ireg | GPR7: ireg + | GPR8: ireg | GPR9: ireg | GPR10: ireg | GPR11: ireg + | GPR12: ireg | GPR13: ireg | GPR14: ireg | GPR15: ireg + | GPR16: ireg | GPR17: ireg | GPR18: ireg | GPR19: ireg + | GPR20: ireg | GPR21: ireg | GPR22: ireg | GPR23: ireg + | GPR24: ireg | GPR25: ireg | GPR26: ireg | GPR27: ireg + | GPR28: ireg | GPR29: ireg | GPR30: ireg | GPR31: ireg. + +Inductive freg: Set := + | FPR0: freg | FPR1: freg | FPR2: freg | FPR3: freg + | FPR4: freg | FPR5: freg | FPR6: freg | FPR7: freg + | FPR8: freg | FPR9: freg | FPR10: freg | FPR11: freg + | FPR12: freg | FPR13: freg | FPR14: freg | FPR15: freg + | FPR16: freg | FPR17: freg | FPR18: freg | FPR19: freg + | FPR20: freg | FPR21: freg | FPR22: freg | FPR23: freg + | FPR24: freg | FPR25: freg | FPR26: freg | FPR27: freg + | FPR28: freg | FPR29: freg | FPR30: freg | FPR31: freg. + +Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}. +Proof. decide equality. Defined. + +Lemma freg_eq: forall (x y: freg), {x=y} + {x<>y}. +Proof. decide equality. Defined. + +(** Symbolic constants. Immediate operands to an arithmetic instruction + or an indexed memory access can be either integer literals + or the low or high 16 bits of a symbolic reference (the address + of a symbol plus a displacement). These symbolic references are + resolved later by the linker. +*) + +Inductive constant: Set := + | Cint: int -> constant + | Csymbol_low_signed: ident -> int -> constant + | Csymbol_high_signed: ident -> int -> constant + | Csymbol_low_unsigned: ident -> int -> constant + | Csymbol_high_unsigned: ident -> int -> constant. + +(** A note on constants: while immediate operands to PowerPC + instructions must be representable in 16 bits (with + sign extension or left shift by 16 positions for some instructions), + we do not attempt to capture these restrictions in the + abstract syntax nor in the semantics. The assembler will + emit an error if immediate operands exceed the representable + range. Of course, our PPC generator (file [PPCgen]) is + careful to respect this range. *) + +(** Bits in the condition register. We are only interested in the + first 4 bits. *) + +Inductive crbit: Set := + | CRbit_0: crbit + | CRbit_1: crbit + | CRbit_2: crbit + | CRbit_3: crbit. + +(** The instruction set. Most instructions correspond exactly to + actual instructions of the PowerPC processor. See the PowerPC + reference manuals for more details. Some instructions, + described below, are pseudo-instructions: they expand to + canned instruction sequences during the printing of the assembly + code. *) + +Definition label := positive. + +Inductive instruction : Set := + | Padd: ireg -> ireg -> ireg -> instruction (**r integer addition *) + | Paddi: ireg -> ireg -> constant -> instruction (**r add immediate *) + | Paddis: ireg -> ireg -> constant -> instruction (**r add immediate high *) + | Paddze: ireg -> ireg -> instruction (**r add Carry bit *) + | Pallocframe: Z -> Z -> instruction (**r allocate new stack frame *) + | Pand_: ireg -> ireg -> ireg -> instruction (**r bitwise and *) + | Pandc: ireg -> ireg -> ireg -> instruction (**r bitwise and-complement *) + | Pandi_: ireg -> ireg -> constant -> instruction (**r and immediate and set conditions *) + | Pandis_: ireg -> ireg -> constant -> instruction (**r and immediate high and set conditions *) + | Pb: label -> instruction (**r unconditional branch *) + | Pbctr: instruction (**r branch to contents of register CTR *) + | Pbctrl: instruction (**r branch to contents of CTR and link *) + | Pbf: crbit -> label -> instruction (**r branch if false *) + | Pbl: ident -> instruction (**r branch and link *) + | Pblr: instruction (**r branch to contents: register LR *) + | Pbt: crbit -> label -> instruction (**r branch if true *) + | Pcmplw: ireg -> ireg -> instruction (**r unsigned integer comparison *) + | Pcmplwi: ireg -> constant -> instruction (**r same, with immediate argument *) + | Pcmpw: ireg -> ireg -> instruction (**r signed integer comparison *) + | Pcmpwi: ireg -> constant -> instruction (**r same, with immediate argument *) + | Pcror: crbit -> crbit -> crbit -> instruction (**r or between condition bits *) + | Pdivw: ireg -> ireg -> ireg -> instruction (**r signed division *) + | Pdivwu: ireg -> ireg -> ireg -> instruction (**r unsigned division *) + | Peqv: ireg -> ireg -> ireg -> instruction (**r bitwise not-xor *) + | Pextsb: ireg -> ireg -> instruction (**r 8-bit sign extension *) + | Pextsh: ireg -> ireg -> instruction (**r 16-bit sign extension *) + | Pfreeframe: instruction (**r deallocate stack frame and restore previous frame *) + | Pfabs: freg -> freg -> instruction (**r float absolute value *) + | Pfadd: freg -> freg -> freg -> instruction (**r float addition *) + | Pfcmpu: freg -> freg -> instruction (**r float comparison *) + | Pfcti: ireg -> freg -> instruction (**r float-to-int conversion *) + | Pfdiv: freg -> freg -> freg -> instruction (**r float division *) + | Pfmadd: freg -> freg -> freg -> freg -> instruction (**r float multiply-add *) + | Pfmr: freg -> freg -> instruction (**r float move *) + | Pfmsub: freg -> freg -> freg -> freg -> instruction (**r float multiply-sub *) + | Pfmul: freg -> freg -> freg -> instruction (**r float multiply *) + | Pfneg: freg -> freg -> instruction (**r float negation *) + | Pfrsp: freg -> freg -> instruction (**r float round to single precision *) + | Pfsub: freg -> freg -> freg -> instruction (**r float subtraction *) + | Pictf: freg -> ireg -> instruction (**r int-to-float conversion *) + | Piuctf: freg -> ireg -> instruction (**r unsigned int-to-float conversion *) + | Plbz: ireg -> constant -> ireg -> instruction (**r load 8-bit unsigned int *) + | Plbzx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Plfd: freg -> constant -> ireg -> instruction (**r load 64-bit float *) + | Plfdx: freg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Plfs: freg -> constant -> ireg -> instruction (**r load 32-bit float *) + | Plfsx: freg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Plha: ireg -> constant -> ireg -> instruction (**r load 16-bit signed int *) + | Plhax: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Plhz: ireg -> constant -> ireg -> instruction (**r load 16-bit unsigned int *) + | Plhzx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Plfi: freg -> float -> instruction (**r load float constant *) + | Plwz: ireg -> constant -> ireg -> instruction (**r load 32-bit int *) + | Plwzx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Pmfcrbit: ireg -> crbit -> instruction (**r move condition bit to reg *) + | Pmflr: ireg -> instruction (**r move LR to reg *) + | Pmr: ireg -> ireg -> instruction (**r integer move *) + | Pmtctr: ireg -> instruction (**r move ireg to CTR *) + | Pmtlr: ireg -> instruction (**r move ireg to LR *) + | Pmulli: ireg -> ireg -> constant -> instruction (**r integer multiply immediate *) + | Pmullw: ireg -> ireg -> ireg -> instruction (**r integer multiply *) + | Pnand: ireg -> ireg -> ireg -> instruction (**r bitwise not-and *) + | Pnor: ireg -> ireg -> ireg -> instruction (**r bitwise not-or *) + | Por: ireg -> ireg -> ireg -> instruction (**r bitwise or *) + | Porc: ireg -> ireg -> ireg -> instruction (**r bitwise or-complement *) + | Pori: ireg -> ireg -> constant -> instruction (**r or with immediate *) + | Poris: ireg -> ireg -> constant -> instruction (**r or with immediate high *) + | Prlwinm: ireg -> ireg -> int -> int -> instruction (**r rotate and mask *) + | Pslw: ireg -> ireg -> ireg -> instruction (**r shift left *) + | Psraw: ireg -> ireg -> ireg -> instruction (**r shift right signed *) + | Psrawi: ireg -> ireg -> int -> instruction (**r shift right signed immediate *) + | Psrw: ireg -> ireg -> ireg -> instruction (**r shift right unsigned *) + | Pstb: ireg -> constant -> ireg -> instruction (**r store 8-bit int *) + | Pstbx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Pstfd: freg -> constant -> ireg -> instruction (**r store 64-bit float *) + | Pstfdx: freg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Pstfs: freg -> constant -> ireg -> instruction (**r store 32-bit float *) + | Pstfsx: freg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Psth: ireg -> constant -> ireg -> instruction (**r store 16-bit int *) + | Psthx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Pstw: ireg -> constant -> ireg -> instruction (**r store 32-bit int *) + | Pstwx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Psubfc: ireg -> ireg -> ireg -> instruction (**r reversed integer subtraction *) + | Psubfic: ireg -> ireg -> constant -> instruction (**r integer subtraction from immediate *) + | Pxor: ireg -> ireg -> ireg -> instruction (**r bitwise xor *) + | Pxori: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate *) + | Pxoris: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate high *) + | Piundef: ireg -> instruction (**r set int reg to [Vundef] *) + | Pfundef: freg -> instruction (**r set float reg to [Vundef] *) + | Plabel: label -> instruction. (**r define a code label *) + +(** The pseudo-instructions are the following: + +- [Plabel]: define a code label at the current program point + +- [Plfi]: load a floating-point constant in a float register. + Expands to a float load [lfd] from an address in the constant data section + initialized with the floating-point constant: +<< + addis r2, 0, ha16(lbl) + lfd rdst, lo16(lbl)(r2) + .const_data +lbl: .double floatcst + .text +>> + Initialized data in the constant data section are not modeled here, + which is why we use a pseudo-instruction for this purpose. + +- [Pfcti]: convert a float to an integer. This requires a transfer + via memory of a 32-bit integer from a float register to an int register, + which our memory model cannot express. Expands to: +<< + fctiwz f13, rsrc + stfdu f13, -8(r1) + lwz rdst, 4(r1) + addi r1, r1, 8 +>> + +- [Pictf]: convert a signed integer to a float. This requires complicated + bit-level manipulations of IEEE floats through mixed float and integer + arithmetic over a memory word, which our memory model and axiomatization + of floats cannot express. Expands to: +<< + addis r2, 0, 0x4330 + stwu r2, -8(r1) + addis r2, rsrc, 0x8000 + stw r2, 4(r1) + addis r2, 0, ha16(lbl) + lfd f13, lo16(lbl)(r2) + lfd rdst, 0(r1) + addi r1, r1, 8 + fsub rdst, rdst, f13 + .const_data +lbl: .long 0x43300000, 0x80000000 + .text +>> + (Don't worry if you do not understand this instruction sequence: intimate + knowledge of IEEE float arithmetic is necessary.) + +- [Piuctf]: convert an unsigned integer to a float. The expansion is close + to that [Pictf], and equally obscure. +<< + addis r2, 0, 0x4330 + stwu r2, -8(r1) + stw rsrc, 4(r1) + addis r2, 0, ha16(lbl) + lfd f13, lo16(lbl)(r2) + lfd rdst, 0(r1) + addi r1, r1, 8 + fsub rdst, rdst, f13 + .const_data +lbl: .long 0x43300000, 0x00000000 + .text +>> + +- [Pallocframe lo hi]: in the formal semantics, this pseudo-instruction + allocates a memory block with bounds [lo] and [hi], stores the value + of register [r1] (the stack pointer, by convention) at the bottom + of this block, and sets [r1] to a pointer to the bottom of this + block. In the printed PowerPC assembly code, this allocation + is just a store-decrement of register [r1]: +<< + stwu r1, (lo - hi)(r1) +>> + This cannot be expressed in our memory model, which does not reflect + the fact that stack frames are adjacent and allocated/freed + following a stack discipline. + +- [Pfreeframe]: in the formal semantics, this pseudo-instruction + reads the bottom word of the block pointed by [r1] (the stack pointer), + frees this block, and sets [r1] to the value of the bottom word. + In the printed PowerPC assembly code, this freeing + is just a load of register [r1] relative to [r1] itself: +<< + lwz r1, 0(r1) +>> + Again, our memory model cannot comprehend that this operation + frees (logically) the current stack frame. + +- [Piundef] and [Pfundef]: set an integer or float register (respectively) + to the [Vundef] value. Expand to nothing, as the PowerPC processor has + no notion of ``undefined value''. These two pseudo-instructions are used + to ensure that the generated PowerPC code computes exactly the same values + as predicted by the semantics of Cminor, which explicitly set uninitialized + local variables to the [Vundef] value. A general property of our semantics, + not yet formally proved, is that they are monotone with respect to the + partial ordering [Vundef <= v] over values. Consequently, if a program + evaluates to a non-[Vundef] result [r] from an initial state that contains + [Vundef] values, it will also evaluate to [r] if arbitrary values are put + in the initial state instead of the [Vundef] values. This justifies + treating [Piundef] and [Pfundef] as no-operations, leaving in the target + register whatever value was already there instead of setting it to [Vundef]. + The formal proof of this result remains to be done, however. +*) + +Definition code := list instruction. +Definition program := AST.program code. + +(** * Operational semantics *) + +(** The PowerPC has a great many registers, some general-purpose, some very + specific. We model only the following registers: *) + +Inductive preg: Set := + | IR: ireg -> preg (**r integer registers *) + | FR: freg -> preg (**r float registers *) + | PC: preg (**r program counter *) + | LR: preg (**r link register (return address) *) + | CTR: preg (**r count register, used for some branches *) + | CARRY: preg (**r carry bit of the status register *) + | CR0_0: preg (**r bit 0 of the condition register *) + | CR0_1: preg (**r bit 1 of the condition register *) + | CR0_2: preg (**r bit 2 of the condition register *) + | CR0_3: preg. (**r bit 3 of the condition register *) + +Coercion IR: ireg >-> preg. +Coercion FR: freg >-> preg. + +Lemma preg_eq: forall (x y: preg), {x=y} + {x<>y}. +Proof. decide equality. apply ireg_eq. apply freg_eq. Defined. + +Module PregEq. + Definition t := preg. + Definition eq := preg_eq. +End PregEq. + +Module Pregmap := EMap(PregEq). + +(** The semantics operates over a single mapping from registers + (type [preg]) to values. We maintain (but do not enforce) + the convention that integer registers are mapped to values of + type [Tint], float registers to values of type [Tfloat], + and boolean registers ([CARRY], [CR0_0], etc) to either + [Vzero] or [Vone]. *) + +Definition regset := Pregmap.t val. +Definition genv := Genv.t code. + +Notation "a # b" := (a b) (at level 1, only parsing). +Notation "a # b <- c" := (Pregmap.set b c a) (at level 1, b at next level). + +Section RELSEM. + +(** Looking up instructions in a code sequence by position. *) + +Fixpoint find_instr (pos: Z) (c: code) {struct c} : option instruction := + match c with + | nil => None + | i :: il => if zeq pos 0 then Some i else find_instr (pos - 1) il + end. + +(** Position corresponding to a label *) + +Definition is_label (lbl: label) (instr: instruction) : bool := + match instr with + | Plabel lbl' => if peq lbl lbl' then true else false + | _ => false + end. + +Lemma is_label_correct: + forall lbl instr, + if is_label lbl instr then instr = Plabel lbl else instr <> Plabel lbl. +Proof. + intros. destruct instr; simpl; try discriminate. + case (peq lbl l); intro; congruence. +Qed. + +Fixpoint label_pos (lbl: label) (pos: Z) (c: code) {struct c} : option Z := + match c with + | nil => None + | instr :: c' => + if is_label lbl instr then Some (pos + 1) else label_pos lbl (pos + 1) c' + end. + +(** Some PowerPC instructions treat register GPR0 as the integer literal 0 + when that register is used in argument position. *) + +Definition gpr_or_zero (rs: regset) (r: ireg) := + if ireg_eq r GPR0 then Vzero else rs#r. + +Variable ge: genv. + +(** The four functions below axiomatize how the linker processes + symbolic references [symbol + offset] and splits their + actual values into two 16-bit halves, the lower half + being either signed or unsigned. *) + +Parameter low_half_signed: val -> val. +Parameter high_half_signed: val -> val. +Parameter low_half_unsigned: val -> val. +Parameter high_half_unsigned: val -> val. + +(** The fundamental property of these operations is that their + results can be recombined by addition or logical ``or'', + and this recombination rebuilds the original value. *) + +Axiom low_high_half_signed: + forall v, Val.add (low_half_signed v) (high_half_signed v) = v. +Axiom low_high_half_unsigned: + forall v, Val.or (low_half_unsigned v) (high_half_unsigned v) = v. + +(** The other axioms we take is that the results of + the [low_half] and [high_half] functions are of type [Tint], + i.e. either integers, pointers or undefined values. *) + +Axiom low_half_signed_type: + forall v, Val.has_type (low_half_signed v) Tint. +Axiom high_half_signed_type: + forall v, Val.has_type (high_half_signed v) Tint. +Axiom low_half_unsigned_type: + forall v, Val.has_type (low_half_unsigned v) Tint. +Axiom high_half_unsigned_type: + forall v, Val.has_type (high_half_unsigned v) Tint. + +(** Armed with the [low_half] and [high_half] functions, + we can define the evaluation of a symbolic constant. + Note that for [const_high], integer constants + are shifted left by 16 bits, but not symbol addresses: + we assume (as in the [low_high_half] axioms above) + that the results of [high_half] are already shifted + (their 16 low bits are equal to 0). *) + +Definition symbol_offset (id: ident) (ofs: int) : val := + match Genv.find_symbol ge id with + | Some b => Vptr b ofs + | None => Vundef + end. + +Definition const_low (c: constant) := + match c with + | Cint n => Vint n + | Csymbol_low_signed id ofs => low_half_signed (symbol_offset id ofs) + | Csymbol_high_signed id ofs => Vundef + | Csymbol_low_unsigned id ofs => low_half_unsigned (symbol_offset id ofs) + | Csymbol_high_unsigned id ofs => Vundef + end. + +Definition const_high (c: constant) := + match c with + | Cint n => Vint (Int.shl n (Int.repr 16)) + | Csymbol_low_signed id ofs => Vundef + | Csymbol_high_signed id ofs => high_half_signed (symbol_offset id ofs) + | Csymbol_low_unsigned id ofs => Vundef + | Csymbol_high_unsigned id ofs => high_half_unsigned (symbol_offset id ofs) + end. + +(** The semantics is purely small-step and defined as a function + from the current state (a register set + a memory state) + to either [OK rs' m'] where [rs'] and [m'] are the updated register + set and memory state after execution of the instruction at [rs#PC], + or [Error] if the processor is stuck. *) + +Inductive outcome: Set := + | OK: regset -> mem -> outcome + | Error: outcome. + +(** Manipulations over the [PC] register: continuing with the next + instruction ([nextinstr]) or branching to a label ([goto_label]). *) + +Definition nextinstr (rs: regset) := + rs#PC <- (Val.add rs#PC Vone). + +Definition goto_label (c: code) (lbl: label) (rs: regset) (m: mem) := + match label_pos lbl 0 c with + | None => Error + | Some pos => + match rs#PC with + | Vptr b ofs => OK (rs#PC <- (Vptr b (Int.repr pos))) m + | _ => Error + end + end. + +(** Auxiliaries for memory accesses, in two forms: one operand + (plus constant offset) or two operands. *) + +Definition load1 (chunk: memory_chunk) (rd: preg) + (cst: constant) (r1: ireg) (rs: regset) (m: mem) := + match Mem.loadv chunk m (Val.add (gpr_or_zero rs r1) (const_low cst)) with + | None => Error + | Some v => OK (nextinstr (rs#rd <- v)) m + end. + +Definition load2 (chunk: memory_chunk) (rd: preg) (r1 r2: ireg) + (rs: regset) (m: mem) := + match Mem.loadv chunk m (Val.add rs#r1 rs#r2) with + | None => Error + | Some v => OK (nextinstr (rs#rd <- v)) m + end. + +Definition store1 (chunk: memory_chunk) (r: preg) + (cst: constant) (r1: ireg) (rs: regset) (m: mem) := + match Mem.storev chunk m (Val.add (gpr_or_zero rs r1) (const_low cst)) (rs#r) with + | None => Error + | Some m' => OK (nextinstr rs) m' + end. + +Definition store2 (chunk: memory_chunk) (r: preg) (r1 r2: ireg) + (rs: regset) (m: mem) := + match Mem.storev chunk m (Val.add rs#r1 rs#r2) (rs#r) with + | None => Error + | Some m' => OK (nextinstr rs) m' + end. + +(** Operations over condition bits. *) + +Definition reg_of_crbit (bit: crbit) := + match bit with + | CRbit_0 => CR0_0 + | CRbit_1 => CR0_1 + | CRbit_2 => CR0_2 + | CRbit_3 => CR0_3 + end. + +Definition compare_sint (rs: regset) (v1 v2: val) := + rs#CR0_0 <- (Val.cmp Clt v1 v2) + #CR0_1 <- (Val.cmp Cgt v1 v2) + #CR0_2 <- (Val.cmp Ceq v1 v2) + #CR0_3 <- Vundef. + +Definition compare_uint (rs: regset) (v1 v2: val) := + rs#CR0_0 <- (Val.cmpu Clt v1 v2) + #CR0_1 <- (Val.cmpu Cgt v1 v2) + #CR0_2 <- (Val.cmpu Ceq v1 v2) + #CR0_3 <- Vundef. + +Definition compare_float (rs: regset) (v1 v2: val) := + rs#CR0_0 <- (Val.cmpf Clt v1 v2) + #CR0_1 <- (Val.cmpf Cgt v1 v2) + #CR0_2 <- (Val.cmpf Ceq v1 v2) + #CR0_3 <- Vundef. + +Definition val_cond_reg (rs: regset) := + Val.or (Val.shl rs#CR0_0 (Vint (Int.repr 31))) + (Val.or (Val.shl rs#CR0_1 (Vint (Int.repr 30))) + (Val.or (Val.shl rs#CR0_2 (Vint (Int.repr 29))) + (Val.shl rs#CR0_3 (Vint (Int.repr 28))))). + +(** Execution of a single instruction [i] in initial state + [rs] and [m]. Return updated state. For instructions + that correspond to actual PowerPC instructions, the cases are + straightforward transliterations of the informal descriptions + given in the PowerPC reference manuals. For pseudo-instructions, + refer to the informal descriptions given above. Note that + we set to [Vundef] the registers used as temporaries by the + expansions of the pseudo-instructions, so that the PPC code + we generate cannot use those registers to hold values that + must survive the execution of the pseudo-instruction. +*) + +Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome := + match i with + | Padd rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.add rs#r1 rs#r2))) m + | Paddi rd r1 cst => + OK (nextinstr (rs#rd <- (Val.add (gpr_or_zero rs r1) (const_low cst)))) m + | Paddis rd r1 cst => + OK (nextinstr (rs#rd <- (Val.add (gpr_or_zero rs r1) (const_high cst)))) m + | Paddze rd r1 => + OK (nextinstr (rs#rd <- (Val.add rs#r1 rs#CARRY))) m + | Pallocframe lo hi => + let (m1, stk) := Mem.alloc m lo hi in + let sp := Vptr stk (Int.repr lo) in + match Mem.storev Mint32 m1 sp rs#GPR1 with + | None => Error + | Some m2 => OK (nextinstr (rs#GPR1 <- sp #GPR2 <- Vundef)) m2 + end + | Pand_ rd r1 r2 => + let v := Val.and rs#r1 rs#r2 in + OK (nextinstr (compare_sint (rs#rd <- v) v Vzero)) m + | Pandc rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.and rs#r1 (Val.notint rs#r2)))) m + | Pandi_ rd r1 cst => + let v := Val.and rs#r1 (const_low cst) in + OK (nextinstr (compare_sint (rs#rd <- v) v Vzero)) m + | Pandis_ rd r1 cst => + let v := Val.and rs#r1 (const_high cst) in + OK (nextinstr (compare_sint (rs#rd <- v) v Vzero)) m + | Pb lbl => + goto_label c lbl rs m + | Pbctr => + OK (rs#PC <- (rs#CTR)) m + | Pbctrl => + OK (rs#LR <- (Val.add rs#PC Vone) #PC <- (rs#CTR)) m + | Pbf bit lbl => + match rs#(reg_of_crbit bit) with + | Vint n => if Int.eq n Int.zero then goto_label c lbl rs m else OK (nextinstr rs) m + | _ => Error + end + | Pbl ident => + OK (rs#LR <- (Val.add rs#PC Vone) #PC <- (symbol_offset ident Int.zero)) m + | Pblr => + OK (rs#PC <- (rs#LR)) m + | Pbt bit lbl => + match rs#(reg_of_crbit bit) with + | Vint n => if Int.eq n Int.zero then OK (nextinstr rs) m else goto_label c lbl rs m + | _ => Error + end + | Pcmplw r1 r2 => + OK (nextinstr (compare_uint rs rs#r1 rs#r2)) m + | Pcmplwi r1 cst => + OK (nextinstr (compare_uint rs rs#r1 (const_low cst))) m + | Pcmpw r1 r2 => + OK (nextinstr (compare_sint rs rs#r1 rs#r2)) m + | Pcmpwi r1 cst => + OK (nextinstr (compare_sint rs rs#r1 (const_low cst))) m + | Pcror bd b1 b2 => + OK (nextinstr (rs#(reg_of_crbit bd) <- (Val.or rs#(reg_of_crbit b1) rs#(reg_of_crbit b2)))) m + | Pdivw rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.divs rs#r1 rs#r2))) m + | Pdivwu rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.divu rs#r1 rs#r2))) m + | Peqv rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.notint (Val.xor rs#r1 rs#r2)))) m + | Pextsb rd r1 => + OK (nextinstr (rs#rd <- (Val.cast8signed rs#r1))) m + | Pextsh rd r1 => + OK (nextinstr (rs#rd <- (Val.cast16signed rs#r1))) m + | Pfreeframe => + match Mem.loadv Mint32 m rs#GPR1 with + | None => Error + | Some v => + match rs#GPR1 with + | Vptr stk ofs => OK (nextinstr (rs#GPR1 <- v)) (Mem.free m stk) + | _ => Error + end + end + | Pfabs rd r1 => + OK (nextinstr (rs#rd <- (Val.absf rs#r1))) m + | Pfadd rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.addf rs#r1 rs#r2))) m + | Pfcmpu r1 r2 => + OK (nextinstr (compare_float rs rs#r1 rs#r2)) m + | Pfcti rd r1 => + OK (nextinstr (rs#rd <- (Val.intoffloat rs#r1) #FPR13 <- Vundef)) m + | Pfdiv rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.divf rs#r1 rs#r2))) m + | Pfmadd rd r1 r2 r3 => + OK (nextinstr (rs#rd <- (Val.addf (Val.mulf rs#r1 rs#r2) rs#r3))) m + | Pfmr rd r1 => + OK (nextinstr (rs#rd <- (rs#r1))) m + | Pfmsub rd r1 r2 r3 => + OK (nextinstr (rs#rd <- (Val.subf (Val.mulf rs#r1 rs#r2) rs#r3))) m + | Pfmul rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.mulf rs#r1 rs#r2))) m + | Pfneg rd r1 => + OK (nextinstr (rs#rd <- (Val.negf rs#r1))) m + | Pfrsp rd r1 => + OK (nextinstr (rs#rd <- (Val.singleoffloat rs#r1))) m + | Pfsub rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.subf rs#r1 rs#r2))) m + | Pictf rd r1 => + OK (nextinstr (rs#rd <- (Val.floatofint rs#r1) #GPR2 <- Vundef #FPR13 <- Vundef)) m + | Piuctf rd r1 => + OK (nextinstr (rs#rd <- (Val.floatofintu rs#r1) #GPR2 <- Vundef #FPR13 <- Vundef)) m + | Plbz rd cst r1 => + load1 Mint8unsigned rd cst r1 rs m + | Plbzx rd r1 r2 => + load2 Mint8unsigned rd r1 r2 rs m + | Plfd rd cst r1 => + load1 Mfloat64 rd cst r1 rs m + | Plfdx rd r1 r2 => + load2 Mfloat64 rd r1 r2 rs m + | Plfs rd cst r1 => + load1 Mfloat32 rd cst r1 rs m + | Plfsx rd r1 r2 => + load2 Mfloat32 rd r1 r2 rs m + | Plha rd cst r1 => + load1 Mint16signed rd cst r1 rs m + | Plhax rd r1 r2 => + load2 Mint16signed rd r1 r2 rs m + | Plhz rd cst r1 => + load1 Mint16unsigned rd cst r1 rs m + | Plhzx rd r1 r2 => + load2 Mint16unsigned rd r1 r2 rs m + | Plfi rd f => + OK (nextinstr (rs#rd <- (Vfloat f) #GPR2 <- Vundef)) m + | Plwz rd cst r1 => + load1 Mint32 rd cst r1 rs m + | Plwzx rd r1 r2 => + load2 Mint32 rd r1 r2 rs m + | Pmfcrbit rd bit => + OK (nextinstr (rs#rd <- (rs#(reg_of_crbit bit)))) m + | Pmflr rd => + OK (nextinstr (rs#rd <- (rs#LR))) m + | Pmr rd r1 => + OK (nextinstr (rs#rd <- (rs#r1))) m + | Pmtctr r1 => + OK (nextinstr (rs#CTR <- (rs#r1))) m + | Pmtlr r1 => + OK (nextinstr (rs#LR <- (rs#r1))) m + | Pmulli rd r1 cst => + OK (nextinstr (rs#rd <- (Val.mul rs#r1 (const_low cst)))) m + | Pmullw rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.mul rs#r1 rs#r2))) m + | Pnand rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.notint (Val.and rs#r1 rs#r2)))) m + | Pnor rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.notint (Val.or rs#r1 rs#r2)))) m + | Por rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.or rs#r1 rs#r2))) m + | Porc rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.or rs#r1 (Val.notint rs#r2)))) m + | Pori rd r1 cst => + OK (nextinstr (rs#rd <- (Val.or rs#r1 (const_low cst)))) m + | Poris rd r1 cst => + OK (nextinstr (rs#rd <- (Val.or rs#r1 (const_high cst)))) m + | Prlwinm rd r1 amount mask => + OK (nextinstr (rs#rd <- (Val.rolm rs#r1 amount mask))) m + | Pslw rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.shl rs#r1 rs#r2))) m + | Psraw rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.shr rs#r1 rs#r2) #CARRY <- (Val.shr_carry rs#r1 rs#r2))) m + | Psrawi rd r1 n => + OK (nextinstr (rs#rd <- (Val.shr rs#r1 (Vint n)) #CARRY <- (Val.shr_carry rs#r1 (Vint n)))) m + | Psrw rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.shru rs#r1 rs#r2))) m + | Pstb rd cst r1 => + store1 Mint8unsigned rd cst r1 rs m + | Pstbx rd r1 r2 => + store2 Mint8unsigned rd r1 r2 rs m + | Pstfd rd cst r1 => + store1 Mfloat64 rd cst r1 rs m + | Pstfdx rd r1 r2 => + store2 Mfloat64 rd r1 r2 rs m + | Pstfs rd cst r1 => + store1 Mfloat32 rd cst r1 rs m + | Pstfsx rd r1 r2 => + store2 Mfloat32 rd r1 r2 rs m + | Psth rd cst r1 => + store1 Mint16unsigned rd cst r1 rs m + | Psthx rd r1 r2 => + store2 Mint16unsigned rd r1 r2 rs m + | Pstw rd cst r1 => + store1 Mint32 rd cst r1 rs m + | Pstwx rd r1 r2 => + store2 Mint32 rd r1 r2 rs m + | Psubfc rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.sub rs#r2 rs#r1) #CARRY <- Vundef)) m + | Psubfic rd r1 cst => + OK (nextinstr (rs#rd <- (Val.sub (const_low cst) rs#r1) #CARRY <- Vundef)) m + | Pxor rd r1 r2 => + OK (nextinstr (rs#rd <- (Val.xor rs#r1 rs#r2))) m + | Pxori rd r1 cst => + OK (nextinstr (rs#rd <- (Val.xor rs#r1 (const_low cst)))) m + | Pxoris rd r1 cst => + OK (nextinstr (rs#rd <- (Val.xor rs#r1 (const_high cst)))) m + | Piundef rd => + OK (nextinstr (rs#rd <- Vundef)) m + | Pfundef rd => + OK (nextinstr (rs#rd <- Vundef)) m + | Plabel lbl => + OK (nextinstr rs) m + end. + +(** Execution of the instruction at [rs#PC]. *) + +Inductive exec_step: regset -> mem -> regset -> mem -> Prop := + | exec_step_intro: + forall b ofs c i rs m rs' m', + rs PC = Vptr b ofs -> + Genv.find_funct_ptr ge b = Some c -> + find_instr (Int.unsigned ofs) c = Some i -> + exec_instr c i rs m = OK rs' m' -> + exec_step rs m rs' m'. + +(** Execution of zero, one or several instructions starting at [rs#PC]. *) + +Inductive exec_steps: regset -> mem -> regset -> mem -> Prop := + | exec_refl: + forall rs m, + exec_steps rs m rs m + | exec_one: + forall rs m rs' m', + exec_step rs m rs' m' -> + exec_steps rs m rs' m' + | exec_trans: + forall rs1 m1 rs2 m2 rs3 m3, + exec_steps rs1 m1 rs2 m2 -> + exec_steps rs2 m2 rs3 m3 -> + exec_steps rs1 m1 rs3 m3. + +End RELSEM. + +Definition exec_program (p: program) (r: val) : Prop := + let ge := Genv.globalenv p in + let m0 := Genv.init_mem p in + let rs0 := + (Pregmap.init Vundef) # PC <- (symbol_offset ge p.(prog_main) Int.zero) + # LR <- Vzero + # GPR1 <- (Vptr Mem.nullptr Int.zero) in + exists rs, exists m, + exec_steps ge rs0 m0 rs m /\ rs#PC = Vzero /\ rs#GPR3 = r. diff --git a/backend/PPCgen.v b/backend/PPCgen.v new file mode 100644 index 00000000..dc8ed40f --- /dev/null +++ b/backend/PPCgen.v @@ -0,0 +1,514 @@ +(** Translation from Mach to PPC. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Import Mach. +Require Import PPC. + +(** Translation of the LTL/Linear/Mach view of machine registers + to the PPC view. PPC has two different types for registers + (integer and float) while LTL et al have only one. The + [ireg_of] and [freg_of] are therefore partial in principle. + To keep things simpler, we make them return nonsensical + results when applied to a LTL register of the wrong type. + The proof in [PPCgenproof] will show that this never happens. + + Note that no LTL register maps to [GPR2] nor [FPR13]. + These two registers are reserved as temporaries, to be used + by the generated PPC code. *) + +Definition ireg_of (r: mreg) : ireg := + match r with + | R3 => GPR3 | R4 => GPR4 | R5 => GPR5 | R6 => GPR6 + | R7 => GPR7 | R8 => GPR8 | R9 => GPR9 | R10 => GPR10 + | R13 => GPR13 | R14 => GPR14 | R15 => GPR15 | R16 => GPR16 + | R17 => GPR17 | R18 => GPR18 | R19 => GPR19 | R20 => GPR20 + | R21 => GPR21 | R22 => GPR22 | R23 => GPR23 | R24 => GPR24 + | R25 => GPR25 | R26 => GPR26 | R27 => GPR27 | R28 => GPR28 + | R29 => GPR29 | R30 => GPR30 | R31 => GPR31 + | IT1 => GPR11 | IT2 => GPR12 | IT3 => GPR0 + | _ => GPR0 (* should not happen *) + end. + +Definition freg_of (r: mreg) : freg := + match r with + | F1 => FPR1 | F2 => FPR2 | F3 => FPR3 | F4 => FPR4 + | F5 => FPR5 | F6 => FPR6 | F7 => FPR7 | F8 => FPR8 + | F9 => FPR9 | F10 => FPR10 | F14 => FPR14 | F15 => FPR15 + | F16 => FPR16 | F17 => FPR17 | F18 => FPR18 | F19 => FPR19 + | F20 => FPR20 | F21 => FPR21 | F22 => FPR22 | F23 => FPR23 + | F24 => FPR24 | F25 => FPR25 | F26 => FPR26 | F27 => FPR27 + | F28 => FPR28 | F29 => FPR29 | F30 => FPR30 | F31 => FPR31 + | FT1 => FPR0 | FT2 => FPR11 | FT3 => FPR12 + | _ => FPR0 (* should not happen *) + end. + +(** Decomposition of integer constants. As noted in file [PPC], + immediate arguments to PowerPC instructions must fit into 16 bits, + and are interpreted after zero extension, sign extension, or + left shift by 16 bits, depending on the instruction. Integer + constants that do not fit must be synthesized using two + processor instructions. The following functions decompose + arbitrary 32-bit integers into two 16-bit halves (high and low + halves). They satisfy the following properties: +- [low_u n] is an unsigned 16-bit integer; +- [low_s n] is a signed 16-bit integer; +- [(high_u n) << 16 | low_u n] equals [n]; +- [(high_s n) << 16 + low_s n] equals [n]. +*) + +Definition low_u (n: int) := Int.and n (Int.repr 65535). +Definition high_u (n: int) := Int.shru n (Int.repr 16). +Definition low_s (n: int) := Int.cast16signed n. +Definition high_s (n: int) := Int.shru (Int.sub n (low_s n)) (Int.repr 16). + +(** Smart constructors for arithmetic operations involving + a 32-bit integer constant. Depending on whether the + constant fits in 16 bits or not, one or several instructions + are generated as required to perform the operation + and prepended to the given instruction sequence [k]. *) + +Definition loadimm (r: ireg) (n: int) (k: code) := + if Int.eq (high_s n) Int.zero then + Paddi r GPR0 (Cint n) :: k + else if Int.eq (low_s n) Int.zero then + Paddis r GPR0 (Cint (high_s n)) :: k + else + Paddis r GPR0 (Cint (high_u n)) :: + Pori r r (Cint (low_u n)) :: k. + +Definition addimm_1 (r1 r2: ireg) (n: int) (k: code) := + if Int.eq (high_s n) Int.zero then + Paddi r1 r2 (Cint n) :: k + else if Int.eq (low_s n) Int.zero then + Paddis r1 r2 (Cint (high_s n)) :: k + else + Paddis r1 r2 (Cint (high_s n)) :: + Paddi r1 r1 (Cint (low_s n)) :: k. + +Definition addimm_2 (r1 r2: ireg) (n: int) (k: code) := + loadimm GPR2 n (Padd r1 r2 GPR2 :: k). + +Definition addimm (r1 r2: ireg) (n: int) (k: code) := + if ireg_eq r1 GPR0 then + addimm_2 r1 r2 n k + else if ireg_eq r2 GPR0 then + addimm_2 r1 r2 n k + else + addimm_1 r1 r2 n k. + +Definition andimm (r1 r2: ireg) (n: int) (k: code) := + if Int.eq (high_u n) Int.zero then + Pandi_ r1 r2 (Cint n) :: k + else if Int.eq (low_u n) Int.zero then + Pandis_ r1 r2 (Cint (high_u n)) :: k + else + loadimm GPR2 n (Pand_ r1 r2 GPR2 :: k). + +Definition orimm (r1 r2: ireg) (n: int) (k: code) := + if Int.eq (high_u n) Int.zero then + Pori r1 r2 (Cint n) :: k + else if Int.eq (low_u n) Int.zero then + Poris r1 r2 (Cint (high_u n)) :: k + else + Poris r1 r2 (Cint (high_u n)) :: + Pori r1 r1 (Cint (low_u n)) :: k. + +Definition xorimm (r1 r2: ireg) (n: int) (k: code) := + if Int.eq (high_u n) Int.zero then + Pxori r1 r2 (Cint n) :: k + else if Int.eq (low_u n) Int.zero then + Pxoris r1 r2 (Cint (high_u n)) :: k + else + Pxoris r1 r2 (Cint (high_u n)) :: + Pxori r1 r1 (Cint (low_u n)) :: k. + +(** Smart constructors for indexed loads and stores, + where the address is the contents of a register plus + an integer literal. *) + +Definition loadind_aux (base: ireg) (ofs: int) (ty: typ) (dst: mreg) := + match ty with + | Tint => Plwz (ireg_of dst) (Cint ofs) base + | Tfloat => Plfd (freg_of dst) (Cint ofs) base + end. + +Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) := + if Int.eq (high_s ofs) Int.zero then + loadind_aux base ofs ty dst :: k + else + Paddis GPR2 base (Cint (high_s ofs)) :: + loadind_aux GPR2 (low_s ofs) ty dst :: k. + +Definition storeind_aux (src: mreg) (base: ireg) (ofs: int) (ty: typ) := + match ty with + | Tint => Pstw (ireg_of src) (Cint ofs) base + | Tfloat => Pstfd (freg_of src) (Cint ofs) base + end. + +Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) := + if Int.eq (high_s ofs) Int.zero then + storeind_aux src base ofs ty :: k + else + Paddis GPR2 base (Cint (high_s ofs)) :: + storeind_aux src GPR2 (low_s ofs) ty :: k. + +(** Constructor for a floating-point comparison. The PowerPC has + a single [fcmpu] instruction to compare floats, which sets + bits 0, 1 and 2 of the condition register to reflect ``less'', + ``greater'' and ``equal'' conditions, respectively. + The ``less or equal'' and ``greater or equal'' conditions must be + synthesized by a [cror] instruction that computes the logical ``or'' + of the corresponding two conditions. *) + +Definition floatcomp (cmp: comparison) (r1 r2: freg) (k: code) := + Pfcmpu r1 r2 :: + match cmp with + | Cle => Pcror CRbit_3 CRbit_2 CRbit_0 :: k + | Cge => Pcror CRbit_3 CRbit_2 CRbit_1 :: k + | _ => k + end. + +(** Translation of a condition. Prepends to [k] the instructions + that evaluate the condition and leave its boolean result in one of + the bits of the condition register. The bit in question is + determined by the [crbit_for_cond] function. *) + +Definition transl_cond + (cond: condition) (args: list mreg) (k: code) := + match cond, args with + | Ccomp c, a1 :: a2 :: nil => + Pcmpw (ireg_of a1) (ireg_of a2) :: k + | Ccompu c, a1 :: a2 :: nil => + Pcmplw (ireg_of a1) (ireg_of a2) :: k + | Ccompimm c n, a1 :: nil => + if Int.eq (high_s n) Int.zero then + Pcmpwi (ireg_of a1) (Cint n) :: k + else + loadimm GPR2 n (Pcmpw (ireg_of a1) GPR2 :: k) + | Ccompuimm c n, a1 :: nil => + if Int.eq (high_u n) Int.zero then + Pcmplwi (ireg_of a1) (Cint n) :: k + else + loadimm GPR2 n (Pcmplw (ireg_of a1) GPR2 :: k) + | Ccompf cmp, a1 :: a2 :: nil => + floatcomp cmp (freg_of a1) (freg_of a2) k + | Cnotcompf cmp, a1 :: a2 :: nil => + floatcomp cmp (freg_of a1) (freg_of a2) k + | Cmaskzero n, a1 :: nil => + andimm GPR2 (ireg_of a1) n k + | Cmasknotzero n, a1 :: nil => + andimm GPR2 (ireg_of a1) n k + | _, _ => + k (**r never happens for well-typed code *) + end. + +(* CRbit_0 = Less + CRbit_1 = Greater + CRbit_2 = Equal + CRbit_3 = Other *) + +Definition crbit_for_icmp (cmp: comparison) := + match cmp with + | Ceq => (CRbit_2, true) + | Cne => (CRbit_2, false) + | Clt => (CRbit_0, true) + | Cle => (CRbit_1, false) + | Cgt => (CRbit_1, true) + | Cge => (CRbit_0, false) + end. + +Definition crbit_for_fcmp (cmp: comparison) := + match cmp with + | Ceq => (CRbit_2, true) + | Cne => (CRbit_2, false) + | Clt => (CRbit_0, true) + | Cle => (CRbit_3, true) + | Cgt => (CRbit_1, true) + | Cge => (CRbit_3, true) + end. + +Definition crbit_for_cond (cond: condition) := + match cond with + | Ccomp cmp => crbit_for_icmp cmp + | Ccompu cmp => crbit_for_icmp cmp + | Ccompimm cmp n => crbit_for_icmp cmp + | Ccompuimm cmp n => crbit_for_icmp cmp + | Ccompf cmp => crbit_for_fcmp cmp + | Cnotcompf cmp => let p := crbit_for_fcmp cmp in (fst p, negb (snd p)) + | Cmaskzero n => (CRbit_2, true) + | Cmasknotzero n => (CRbit_2, false) + end. + +(** Translation of the arithmetic operation [r <- op(args)]. + The corresponding instructions are prepended to [k]. *) + +Definition transl_op + (op: operation) (args: list mreg) (r: mreg) (k: code) := + match op, args with + | Omove, a1 :: nil => + match mreg_type a1 with + | Tint => Pmr (ireg_of r) (ireg_of a1) :: k + | Tfloat => Pfmr (freg_of r) (freg_of a1) :: k + end + | Ointconst n, nil => + loadimm (ireg_of r) n k + | Ofloatconst f, nil => + Plfi (freg_of r) f :: k + | Oaddrsymbol s ofs, nil => + Paddis (ireg_of r) GPR0 (Csymbol_high_unsigned s ofs) :: + Pori (ireg_of r) (ireg_of r) (Csymbol_low_unsigned s ofs) :: k + | Oaddrstack n, nil => + addimm (ireg_of r) GPR1 n k + | Oundef, nil => + match mreg_type r with + | Tint => Piundef (ireg_of r) :: k + | Tfloat => Pfundef (freg_of r) :: k + end + | Ocast8signed, a1 :: nil => + Pextsb (ireg_of r) (ireg_of a1) :: k + | Ocast16signed, a1 :: nil => + Pextsh (ireg_of r) (ireg_of a1) :: k + | Oadd, a1 :: a2 :: nil => + Padd (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Oaddimm n, a1 :: nil => + addimm (ireg_of r) (ireg_of a1) n k + | Osub, a1 :: a2 :: nil => + Psubfc (ireg_of r) (ireg_of a2) (ireg_of a1) :: k + | Osubimm n, a1 :: nil => + if Int.eq (high_s n) Int.zero then + Psubfic (ireg_of r) (ireg_of a1) (Cint n) :: k + else + loadimm GPR2 n (Psubfc (ireg_of r) (ireg_of a1) GPR2 :: k) + | Omul, a1 :: a2 :: nil => + Pmullw (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Omulimm n, a1 :: nil => + if Int.eq (high_s n) Int.zero then + Pmulli (ireg_of r) (ireg_of a1) (Cint n) :: k + else + loadimm GPR2 n (Pmullw (ireg_of r) (ireg_of a1) GPR2 :: k) + | Odiv, a1 :: a2 :: nil => + Pdivw (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Odivu, a1 :: a2 :: nil => + Pdivwu (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Oand, a1 :: a2 :: nil => + Pand_ (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Oandimm n, a1 :: nil => + andimm (ireg_of r) (ireg_of a1) n k + | Oor, a1 :: a2 :: nil => + Por (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Oorimm n, a1 :: nil => + orimm (ireg_of r) (ireg_of a1) n k + | Oxor, a1 :: a2 :: nil => + Pxor (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Oxorimm n, a1 :: nil => + xorimm (ireg_of r) (ireg_of a1) n k + | Onand, a1 :: a2 :: nil => + Pnand (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Onor, a1 :: a2 :: nil => + Pnor (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Onxor, a1 :: a2 :: nil => + Peqv (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Oshl, a1 :: a2 :: nil => + Pslw (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Oshr, a1 :: a2 :: nil => + Psraw (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Oshrimm n, a1 :: nil => + Psrawi (ireg_of r) (ireg_of a1) n :: k + | Oshrximm n, a1 :: nil => + Psrawi (ireg_of r) (ireg_of a1) n :: + Paddze (ireg_of r) (ireg_of r) :: k + | Oshru, a1 :: a2 :: nil => + Psrw (ireg_of r) (ireg_of a1) (ireg_of a2) :: k + | Orolm amount mask, a1 :: nil => + Prlwinm (ireg_of r) (ireg_of a1) amount mask :: k + | Onegf, a1 :: nil => + Pfneg (freg_of r) (freg_of a1) :: k + | Oabsf, a1 :: nil => + Pfabs (freg_of r) (freg_of a1) :: k + | Oaddf, a1 :: a2 :: nil => + Pfadd (freg_of r) (freg_of a1) (freg_of a2) :: k + | Osubf, a1 :: a2 :: nil => + Pfsub (freg_of r) (freg_of a1) (freg_of a2) :: k + | Omulf, a1 :: a2 :: nil => + Pfmul (freg_of r) (freg_of a1) (freg_of a2) :: k + | Odivf, a1 :: a2 :: nil => + Pfdiv (freg_of r) (freg_of a1) (freg_of a2) :: k + | Omuladdf, a1 :: a2 :: a3 :: nil => + Pfmadd (freg_of r) (freg_of a1) (freg_of a2) (freg_of a3) :: k + | Omulsubf, a1 :: a2 :: a3 :: nil => + Pfmsub (freg_of r) (freg_of a1) (freg_of a2) (freg_of a3) :: k + | Osingleoffloat, a1 :: nil => + Pfrsp (freg_of r) (freg_of a1) :: k + | Ointoffloat, a1 :: nil => + Pfcti (ireg_of r) (freg_of a1) :: k + | Ofloatofint, a1 :: nil => + Pictf (freg_of r) (ireg_of a1) :: k + | Ofloatofintu, a1 :: nil => + Piuctf (freg_of r) (ireg_of a1) :: k + | Ocmp cmp, _ => + let p := crbit_for_cond cmp in + transl_cond cmp args + (Pmfcrbit (ireg_of r) (fst p) :: + if snd p + then k + else Pxori (ireg_of r) (ireg_of r) (Cint Int.one) :: k) + | _, _ => + k (**r never happens for well-typed code *) + end. + +(** Common code to translate [Mload] and [Mstore] instructions. *) + +Definition transl_load_store + (mk1: constant -> ireg -> instruction) + (mk2: ireg -> ireg -> instruction) + (addr: addressing) (args: list mreg) (k: code) := + match addr, args with + | Aindexed ofs, a1 :: nil => + if ireg_eq (ireg_of a1) GPR0 then + Pmr GPR2 (ireg_of a1) :: + Paddis GPR2 GPR2 (Cint (high_s ofs)) :: + mk1 (Cint (low_s ofs)) GPR2 :: k + else if Int.eq (high_s ofs) Int.zero then + mk1 (Cint ofs) (ireg_of a1) :: k + else + Paddis GPR2 (ireg_of a1) (Cint (high_s ofs)) :: + mk1 (Cint (low_s ofs)) GPR2 :: k + | Aindexed2, a1 :: a2 :: nil => + mk2 (ireg_of a1) (ireg_of a2) :: k + | Aglobal symb ofs, nil => + Paddis GPR2 GPR0 (Csymbol_high_signed symb ofs) :: + mk1 (Csymbol_low_signed symb ofs) GPR2 :: k + | Abased symb ofs, a1 :: nil => + if ireg_eq (ireg_of a1) GPR0 then + Pmr GPR2 (ireg_of a1) :: + Paddis GPR2 GPR2 (Csymbol_high_signed symb ofs) :: + mk1 (Csymbol_low_signed symb ofs) GPR2 :: k + else + Paddis GPR2 (ireg_of a1) (Csymbol_high_signed symb ofs) :: + mk1 (Csymbol_low_signed symb ofs) GPR2 :: k + | Ainstack ofs, nil => + if Int.eq (high_s ofs) Int.zero then + mk1 (Cint ofs) GPR1 :: k + else + Paddis GPR2 GPR1 (Cint (high_s ofs)) :: + mk1 (Cint (low_s ofs)) GPR2 :: k + | _, _ => + (* should not happen *) k + end. + +(** Translation of a Mach instruction. *) + +Definition transl_instr (i: Mach.instruction) (k: code) := + match i with + | Mgetstack ofs ty dst => + loadind GPR1 ofs ty dst k + | Msetstack src ofs ty => + storeind src GPR1 ofs ty k + | Mgetparam ofs ty dst => + Plwz GPR2 (Cint (Int.repr 0)) GPR1 :: loadind GPR2 ofs ty dst k + | Mop op args res => + transl_op op args res k + | Mload chunk addr args dst => + match chunk with + | Mint8signed => + transl_load_store + (Plbz (ireg_of dst)) (Plbzx (ireg_of dst)) addr args + (Pextsb (ireg_of dst) (ireg_of dst) :: k) + | Mint8unsigned => + transl_load_store + (Plbz (ireg_of dst)) (Plbzx (ireg_of dst)) addr args k + | Mint16signed => + transl_load_store + (Plha (ireg_of dst)) (Plhax (ireg_of dst)) addr args k + | Mint16unsigned => + transl_load_store + (Plhz (ireg_of dst)) (Plhzx (ireg_of dst)) addr args k + | Mint32 => + transl_load_store + (Plwz (ireg_of dst)) (Plwzx (ireg_of dst)) addr args k + | Mfloat32 => + transl_load_store + (Plfs (freg_of dst)) (Plfsx (freg_of dst)) addr args k + | Mfloat64 => + transl_load_store + (Plfd (freg_of dst)) (Plfdx (freg_of dst)) addr args k + end + | Mstore chunk addr args src => + match chunk with + | Mint8signed => + transl_load_store + (Pstb (ireg_of src)) (Pstbx (ireg_of src)) addr args k + | Mint8unsigned => + transl_load_store + (Pstb (ireg_of src)) (Pstbx (ireg_of src)) addr args k + | Mint16signed => + transl_load_store + (Psth (ireg_of src)) (Psthx (ireg_of src)) addr args k + | Mint16unsigned => + transl_load_store + (Psth (ireg_of src)) (Psthx (ireg_of src)) addr args k + | Mint32 => + transl_load_store + (Pstw (ireg_of src)) (Pstwx (ireg_of src)) addr args k + | Mfloat32 => + transl_load_store + (Pstfs (freg_of src)) (Pstfsx (freg_of src)) addr args k + | Mfloat64 => + transl_load_store + (Pstfd (freg_of src)) (Pstfdx (freg_of src)) addr args k + end + | Mcall sig (inl r) => + Pmtctr (ireg_of r) :: Pbctrl :: k + | Mcall sig (inr symb) => + Pbl symb :: k + | Mlabel lbl => + Plabel lbl :: k + | Mgoto lbl => + Pb lbl :: k + | Mcond cond args lbl => + let p := crbit_for_cond cond in + transl_cond cond args + (if (snd p) then Pbt (fst p) lbl :: k else Pbf (fst p) lbl :: k) + | Mreturn => + Plwz GPR2 (Cint (Int.repr 4)) GPR1 :: + Pmtlr GPR2 :: Pfreeframe :: Pblr :: k + end. + +Definition transl_code (il: list Mach.instruction) := + List.fold_right transl_instr nil il. + +(** Translation of a whole function. Note that we must check + that the generated code contains less than [2^32] instructions, + otherwise the offset part of the [PC] code pointer could wrap + around, leading to incorrect executions. *) + +Definition transl_function (f: Mach.function) := + Pallocframe (- f.(fn_framesize)) + (align_16_top (-f.(fn_framesize)) f.(fn_stacksize)) :: + Pmflr GPR2 :: + Pstw GPR2 (Cint (Int.repr 4)) GPR1 :: + transl_code f.(fn_code). + +Fixpoint code_size (c: code) : Z := + match c with + | nil => 0 + | instr :: c' => code_size c' + 1 + end. + +Definition transf_function (f: Mach.function) := + let c := transl_function f in + if zlt Int.max_unsigned (code_size c) + then None + else Some c. + +Definition transf_program (p: Mach.program) : option PPC.program := + transform_partial_program transf_function p. diff --git a/backend/PPCgenproof.v b/backend/PPCgenproof.v new file mode 100644 index 00000000..d89046fd --- /dev/null +++ b/backend/PPCgenproof.v @@ -0,0 +1,1242 @@ +(** Correctness proof for PPC generation: main proof. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Import Mach. +Require Import Machtyping. +Require Import PPC. +Require Import PPCgen. +Require Import PPCgenproof1. + +Section PRESERVATION. + +Variable prog: Mach.program. +Variable tprog: PPC.program. +Hypothesis TRANSF: transf_program prog = Some tprog. + +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma symbols_preserved: + forall id, Genv.find_symbol tge id = Genv.find_symbol ge id. +Proof. + intros. unfold ge, tge. + apply Genv.find_symbol_transf_partial with transf_function. + exact TRANSF. +Qed. + +Lemma functions_translated: + forall f b, + Genv.find_funct_ptr ge b = Some f -> + Genv.find_funct_ptr tge b = Some (transl_function f). +Proof. + intros. + generalize (Genv.find_funct_ptr_transf_partial + transf_function TRANSF H). + intros [A B]. + unfold tge. change code with (list instruction). rewrite A. + generalize B. unfold transf_function. + case (zlt Int.max_unsigned (code_size (transl_function f))); intro. + tauto. auto. +Qed. + +Lemma functions_translated_2: + forall f v, + Genv.find_funct ge v = Some f -> + Genv.find_funct tge v = Some (transl_function f). +Proof. + intros. + generalize (Genv.find_funct_transf_partial + transf_function TRANSF H). + intros [A B]. + unfold tge. change code with (list instruction). rewrite A. + generalize B. unfold transf_function. + case (zlt Int.max_unsigned (code_size (transl_function f))); intro. + tauto. auto. +Qed. + +Lemma functions_translated_no_overflow: + forall b f, + Genv.find_funct_ptr ge b = Some f -> + code_size (transl_function f) <= Int.max_unsigned. +Proof. + intros. + generalize (Genv.find_funct_ptr_transf_partial + transf_function TRANSF H). + intros [A B]. + generalize B. + unfold transf_function. + case (zlt Int.max_unsigned (code_size (transl_function f))); intro. + tauto. intro. omega. +Qed. + +(** * Properties of control flow *) + +Lemma find_instr_in: + forall c pos i, + find_instr pos c = Some i -> In i c. +Proof. + induction c; simpl. intros; discriminate. + intros until i. case (zeq pos 0); intros. + left; congruence. right; eauto. +Qed. + +(** The ``code tail'' of an instruction list [c] is the list of instructions + starting at PC [pos]. *) + +Fixpoint code_tail (pos: Z) (c: code) {struct c} : code := + match c with + | nil => nil + | i :: il => if zeq pos 0 then c else code_tail (pos - 1) il + end. +Proof. + +Lemma find_instr_tail: + forall c pos, + find_instr pos c = + match code_tail pos c with nil => None | i1 :: il => Some i1 end. +Proof. + induction c; simpl; intros. + auto. + case (zeq pos 0); auto. +Qed. + +Remark code_size_pos: + forall fn, code_size fn >= 0. +Proof. + induction fn; simpl; omega. +Qed. + +Remark code_tail_bounds: + forall fn ofs i c, + code_tail ofs fn = i :: c -> 0 <= ofs < code_size fn. +Proof. + induction fn; simpl. + intros; discriminate. + intros until c. case (zeq ofs 0); intros. + generalize (code_size_pos fn). omega. + generalize (IHfn _ _ _ H). omega. +Qed. + +Remark code_tail_unfold: + forall ofs i c, + ofs >= 0 -> + code_tail (ofs + 1) (i :: c) = code_tail ofs c. +Proof. + intros. simpl. case (zeq (ofs + 1) 0); intros. + omegaContradiction. + replace (ofs + 1 - 1) with ofs. auto. omega. +Qed. + +Remark code_tail_zero: + forall fn, code_tail 0 fn = fn. +Proof. + intros. destruct fn; simpl. auto. auto. +Qed. + +Lemma code_tail_next: + forall fn ofs i c, + code_tail ofs fn = i :: c -> + code_tail (ofs + 1) fn = c. +Proof. + induction fn. + simpl; intros; discriminate. + intros until c. case (zeq ofs 0); intro. + subst ofs. intros. rewrite code_tail_zero in H. injection H. + intros. subst c. rewrite code_tail_unfold. apply code_tail_zero. + omega. + intro; generalize (code_tail_bounds _ _ _ _ H); intros [A B]. + assert (ofs = (ofs - 1) + 1). omega. + rewrite H0 in H. rewrite code_tail_unfold in H. + rewrite code_tail_unfold. rewrite H0. eauto. + omega. omega. +Qed. + +Lemma code_tail_next_int: + forall fn ofs i c, + code_size fn <= Int.max_unsigned -> + code_tail (Int.unsigned ofs) fn = i :: c -> + code_tail (Int.unsigned (Int.add ofs Int.one)) fn = c. +Proof. + intros. rewrite Int.add_unsigned. unfold Int.one. + repeat rewrite Int.unsigned_repr. apply code_tail_next with i; auto. + compute; intuition congruence. + generalize (code_tail_bounds _ _ _ _ H0). omega. + compute; intuition congruence. +Qed. + +(** [transl_code_at_pc pc fn c] holds if the code pointer [pc] points + within the PPC code generated by translating Mach function [fn], + and [c] is the tail of the generated code at the position corresponding + to the code pointer [pc]. *) + +Inductive transl_code_at_pc: val -> Mach.function -> Mach.code -> Prop := + transl_code_at_pc_intro: + forall b ofs f c, + Genv.find_funct_ptr ge b = Some f -> + code_tail (Int.unsigned ofs) (transl_function f) = transl_code c -> + transl_code_at_pc (Vptr b ofs) f c. + +(** The following lemmas show that straight-line executions + (predicate [exec_straight]) correspond to correct PPC executions + (predicate [exec_steps]) under adequate [transl_code_at_pc] hypotheses. *) + +Lemma exec_straight_steps_1: + forall fn c rs m c' rs' m', + exec_straight tge fn c rs m c' rs' m' -> + code_size fn <= Int.max_unsigned -> + forall b ofs, + rs#PC = Vptr b ofs -> + Genv.find_funct_ptr tge b = Some fn -> + code_tail (Int.unsigned ofs) fn = c -> + exec_steps tge rs m rs' m'. +Proof. + induction 1. + intros. apply exec_refl. + intros. apply exec_trans with rs2 m2. + apply exec_one; econstructor; eauto. + rewrite find_instr_tail. rewrite H5. auto. + apply IHexec_straight with b (Int.add ofs Int.one). + auto. rewrite H0. rewrite H3. reflexivity. + auto. + apply code_tail_next_int with i; auto. +Qed. + +Lemma exec_straight_steps_2: + forall fn c rs m c' rs' m', + exec_straight tge fn c rs m c' rs' m' -> + code_size fn <= Int.max_unsigned -> + forall b ofs, + rs#PC = Vptr b ofs -> + Genv.find_funct_ptr tge b = Some fn -> + code_tail (Int.unsigned ofs) fn = c -> + exists ofs', + rs'#PC = Vptr b ofs' + /\ code_tail (Int.unsigned ofs') fn = c'. +Proof. + induction 1; intros. + exists ofs. split. auto. auto. + apply IHexec_straight with (Int.add ofs Int.one). + auto. rewrite H0. rewrite H3. reflexivity. auto. + apply code_tail_next_int with i; auto. +Qed. + +Lemma exec_straight_steps: + forall f c c' rs m rs' m', + transl_code_at_pc (rs PC) f c -> + exec_straight tge (transl_function f) + (transl_code c) rs m (transl_code c') rs' m' -> + exec_steps tge rs m rs' m' /\ transl_code_at_pc (rs' PC) f c'. +Proof. + intros. inversion H. + generalize (functions_translated_no_overflow _ _ H2). intro. + generalize (functions_translated _ _ H2). intro. + split. eapply exec_straight_steps_1; eauto. + generalize (exec_straight_steps_2 _ _ _ _ _ _ _ + H0 H6 _ _ (sym_equal H1) H7 H3). + intros [ofs' [PC' CT']]. + rewrite PC'. constructor; auto. +Qed. + +(** The [find_label] function returns the code tail starting at the + given label. A connection with [code_tail] is then established. *) + +Fixpoint find_label (lbl: label) (c: code) {struct c} : option code := + match c with + | nil => None + | instr :: c' => + if is_label lbl instr then Some c' else find_label lbl c' + end. + +Lemma label_pos_code_tail: + forall lbl c pos c', + find_label lbl c = Some c' -> + exists pos', + label_pos lbl pos c = Some pos' + /\ code_tail (pos' - pos) c = c' + /\ pos < pos' <= pos + code_size c. +Proof. + induction c. + simpl; intros. discriminate. + simpl; intros until c'. + case (is_label lbl a). + intro EQ; injection EQ; intro; subst c'. + exists (pos + 1). split. auto. split. + rewrite zeq_false. replace (pos + 1 - pos - 1) with 0. + apply code_tail_zero. omega. omega. + generalize (code_size_pos c). omega. + intros. generalize (IHc (pos + 1) c' H). intros [pos' [A [B C]]]. + exists pos'. split. auto. split. + rewrite zeq_false. replace (pos' - pos - 1) with (pos' - (pos + 1)). + auto. omega. omega. omega. +Qed. + +(** The following lemmas show that the translation from Mach to PPC + preserves labels, in the sense that the following diagram commutes: +<< + translation + Mach code ------------------------ PPC instr sequence + | | + | Mach.find_label lbl find_label lbl | + | | + v v + Mach code tail ------------------- PPC instr seq tail + translation +>> + The proof demands many boring lemmas showing that PPC constructor + functions do not introduce new labels. +*) + +Section TRANSL_LABEL. + +Variable lbl: label. + +Remark loadimm_label: + forall r n k, find_label lbl (loadimm r n k) = find_label lbl k. +Proof. + intros. unfold loadimm. + case (Int.eq (high_s n) Int.zero). reflexivity. + case (Int.eq (low_s n) Int.zero). reflexivity. + reflexivity. +Qed. +Hint Rewrite loadimm_label: labels. + +Remark addimm_1_label: + forall r1 r2 n k, find_label lbl (addimm_1 r1 r2 n k) = find_label lbl k. +Proof. + intros; unfold addimm_1. + case (Int.eq (high_s n) Int.zero). reflexivity. + case (Int.eq (low_s n) Int.zero). reflexivity. reflexivity. +Qed. +Remark addimm_2_label: + forall r1 r2 n k, find_label lbl (addimm_2 r1 r2 n k) = find_label lbl k. +Proof. + intros; unfold addimm_2. autorewrite with labels. reflexivity. +Qed. +Remark addimm_label: + forall r1 r2 n k, find_label lbl (addimm r1 r2 n k) = find_label lbl k. +Proof. + intros; unfold addimm. + case (ireg_eq r1 GPR0); intro. apply addimm_2_label. + case (ireg_eq r2 GPR0); intro. apply addimm_2_label. + apply addimm_1_label. +Qed. +Hint Rewrite addimm_label: labels. + +Remark andimm_label: + forall r1 r2 n k, find_label lbl (andimm r1 r2 n k) = find_label lbl k. +Proof. + intros; unfold andimm. + case (Int.eq (high_u n) Int.zero). reflexivity. + case (Int.eq (low_u n) Int.zero). reflexivity. + autorewrite with labels. reflexivity. +Qed. +Hint Rewrite andimm_label: labels. + +Remark orimm_label: + forall r1 r2 n k, find_label lbl (orimm r1 r2 n k) = find_label lbl k. +Proof. + intros; unfold orimm. + case (Int.eq (high_u n) Int.zero). reflexivity. + case (Int.eq (low_u n) Int.zero). reflexivity. reflexivity. +Qed. +Hint Rewrite orimm_label: labels. + +Remark xorimm_label: + forall r1 r2 n k, find_label lbl (xorimm r1 r2 n k) = find_label lbl k. +Proof. + intros; unfold xorimm. + case (Int.eq (high_u n) Int.zero). reflexivity. + case (Int.eq (low_u n) Int.zero). reflexivity. reflexivity. +Qed. +Hint Rewrite xorimm_label: labels. + +Remark loadind_aux_label: + forall base ofs ty dst k, find_label lbl (loadind_aux base ofs ty dst :: k) = find_label lbl k. +Proof. + intros; unfold loadind_aux. + case ty; reflexivity. +Qed. +Remark loadind_label: + forall base ofs ty dst k, find_label lbl (loadind base ofs ty dst k) = find_label lbl k. +Proof. + intros; unfold loadind. + case (Int.eq (high_s ofs) Int.zero). apply loadind_aux_label. + transitivity (find_label lbl (loadind_aux GPR2 (low_s ofs) ty dst :: k)). + reflexivity. apply loadind_aux_label. +Qed. +Hint Rewrite loadind_label: labels. +Remark storeind_aux_label: + forall base ofs ty dst k, find_label lbl (storeind_aux base ofs ty dst :: k) = find_label lbl k. +Proof. + intros; unfold storeind_aux. + case dst; reflexivity. +Qed. +Remark storeind_label: + forall base ofs ty src k, find_label lbl (storeind base src ofs ty k) = find_label lbl k. +Proof. + intros; unfold storeind. + case (Int.eq (high_s ofs) Int.zero). apply storeind_aux_label. + transitivity (find_label lbl (storeind_aux base GPR2 (low_s ofs) ty :: k)). + reflexivity. apply storeind_aux_label. +Qed. +Hint Rewrite storeind_label: labels. +Remark floatcomp_label: + forall cmp r1 r2 k, find_label lbl (floatcomp cmp r1 r2 k) = find_label lbl k. +Proof. + intros; unfold floatcomp. destruct cmp; reflexivity. +Qed. + +Remark transl_cond_label: + forall cond args k, find_label lbl (transl_cond cond args k) = find_label lbl k. +Proof. + intros; unfold transl_cond. + destruct cond; (destruct args; + [try reflexivity | destruct args; + [try reflexivity | destruct args; try reflexivity]]). + case (Int.eq (high_s i) Int.zero). reflexivity. + autorewrite with labels; reflexivity. + case (Int.eq (high_u i) Int.zero). reflexivity. + autorewrite with labels; reflexivity. + apply floatcomp_label. apply floatcomp_label. + apply andimm_label. apply andimm_label. +Qed. +Hint Rewrite transl_cond_label: labels. +Remark transl_op_label: + forall op args r k, find_label lbl (transl_op op args r k) = find_label lbl k. +Proof. + intros; unfold transl_op; + destruct op; destruct args; try (destruct args); try (destruct args); try (destruct args); + try reflexivity; autorewrite with labels; try reflexivity. + case (mreg_type m); reflexivity. + case (mreg_type r); reflexivity. + case (Int.eq (high_s i) Int.zero); autorewrite with labels; reflexivity. + case (Int.eq (high_s i) Int.zero); autorewrite with labels; reflexivity. + case (snd (crbit_for_cond c)); reflexivity. + case (snd (crbit_for_cond c)); reflexivity. + case (snd (crbit_for_cond c)); reflexivity. + case (snd (crbit_for_cond c)); reflexivity. + case (snd (crbit_for_cond c)); reflexivity. +Qed. +Hint Rewrite transl_op_label: labels. + +Remark transl_load_store_label: + forall (mk1: constant -> ireg -> instruction) (mk2: ireg -> ireg -> instruction) + addr args k, + (forall c r, is_label lbl (mk1 c r) = false) -> + (forall r1 r2, is_label lbl (mk2 r1 r2) = false) -> + find_label lbl (transl_load_store mk1 mk2 addr args k) = find_label lbl k. +Proof. + intros; unfold transl_load_store. + destruct addr; destruct args; try (destruct args); try (destruct args); + try reflexivity. + case (ireg_eq (ireg_of m) GPR0); intro. + simpl. rewrite H. auto. + case (Int.eq (high_s i) Int.zero). simpl; rewrite H; auto. + simpl; rewrite H; auto. + simpl; rewrite H0; auto. + simpl; rewrite H; auto. + case (ireg_eq (ireg_of m) GPR0); intro; simpl; rewrite H; auto. + case (Int.eq (high_s i) Int.zero); simpl; rewrite H; auto. +Qed. +Hint Rewrite transl_load_store_label: labels. + +Lemma transl_instr_label: + forall i k, + find_label lbl (transl_instr i k) = + if Mach.is_label lbl i then Some k else find_label lbl k. +Proof. + intros. generalize (Mach.is_label_correct lbl i). + case (Mach.is_label lbl i); intro. + subst i. simpl. rewrite peq_true. auto. + destruct i; simpl; autorewrite with labels; try reflexivity. + destruct m; rewrite transl_load_store_label; intros; reflexivity. + destruct m; rewrite transl_load_store_label; intros; reflexivity. + destruct s0; reflexivity. + rewrite peq_false. auto. congruence. + case (snd (crbit_for_cond c)); reflexivity. +Qed. + +Lemma transl_code_label: + forall c, + find_label lbl (transl_code c) = + option_map transl_code (Mach.find_label lbl c). +Proof. + induction c; simpl; intros. + auto. rewrite transl_instr_label. + case (Mach.is_label lbl a). reflexivity. + auto. +Qed. + +Lemma transl_find_label: + forall f, + find_label lbl (transl_function f) = + option_map transl_code (Mach.find_label lbl f.(fn_code)). +Proof. + intros. unfold transl_function. simpl. apply transl_code_label. +Qed. + +End TRANSL_LABEL. + +(** A valid branch in a piece of Mach code translates to a valid ``go to'' + transition in the generated PPC code. *) + +Lemma find_label_goto_label: + forall f lbl rs m c' b ofs, + Genv.find_funct_ptr ge b = Some f -> + rs PC = Vptr b ofs -> + Mach.find_label lbl f.(fn_code) = Some c' -> + exists rs', + goto_label (transl_function f) lbl rs m = OK rs' m + /\ transl_code_at_pc (rs' PC) f c' + /\ forall r, r <> PC -> rs'#r = rs#r. +Proof. + intros. + generalize (transl_find_label lbl f). + rewrite H1; simpl. intro. + generalize (label_pos_code_tail lbl (transl_function f) 0 + (transl_code c') H2). + intros [pos' [A [B C]]]. + exists (rs#PC <- (Vptr b (Int.repr pos'))). + split. unfold goto_label. rewrite A. rewrite H0. auto. + split. rewrite Pregmap.gss. constructor. auto. + rewrite Int.unsigned_repr. replace (pos' - 0) with pos' in B. + auto. omega. + generalize (functions_translated_no_overflow _ _ H). + omega. + intros. apply Pregmap.gso; auto. +Qed. + +(** * Memory properties *) + +(** The PowerPC has no instruction for ``load 8-bit signed integer''. + We show that it can be synthesized as a ``load 8-bit unsigned integer'' + followed by a sign extension. *) + +Lemma loadv_8_signed_unsigned: + forall m a, + Mem.loadv Mint8signed m a = + option_map Val.cast8signed (Mem.loadv Mint8unsigned m a). +Proof. + intros. unfold Mem.loadv. destruct a; try reflexivity. + unfold load. case (zlt b (nextblock m)); intro. + change (in_bounds Mint8unsigned (Int.signed i) (blocks m b)) + with (in_bounds Mint8signed (Int.signed i) (blocks m b)). + case (in_bounds Mint8signed (Int.signed i) (blocks m b)). + change (mem_chunk Mint8unsigned) with (mem_chunk Mint8signed). + set (v := (load_contents (mem_chunk Mint8signed) + (contents (blocks m b)) (Int.signed i))). + unfold Val.load_result. destruct v; try reflexivity. + simpl. rewrite Int.cast8_signed_unsigned. auto. + reflexivity. reflexivity. +Qed. + +(** Similarly, we show that signed 8- and 16-bit stores can be performed + like unsigned stores. *) + +Lemma storev_8_signed_unsigned: + forall m a v, + Mem.storev Mint8signed m a v = Mem.storev Mint8unsigned m a v. +Proof. + intros. reflexivity. +Qed. + +Lemma storev_16_signed_unsigned: + forall m a v, + Mem.storev Mint16signed m a v = Mem.storev Mint16unsigned m a v. +Proof. + intros. reflexivity. +Qed. + +(** * Proof of semantic preservation *) + +(** The invariants for the inductive proof of simulation are as follows. + The simulation diagrams are of the form: +<< + c1, ms1, m1 --------------------- rs1, m1 + | | + | | + v v + c2, ms2, m2 --------------------- rs2, m2 +>> + Left: execution of one Mach instruction. Right: execution of zero, one + or several instructions. Precondition (top): agreement between + the Mach register set [ms1] and the PPC register set [rs1]; moreover, + [rs1 PC] points to the translation of code [c1]. Postcondition (bottom): + similar. +*) + +Definition exec_instr_prop + (f: Mach.function) (sp: val) + (c1: Mach.code) (ms1: Mach.regset) (m1: mem) + (c2: Mach.code) (ms2: Mach.regset) (m2: mem) := + forall rs1 + (WTF: wt_function f) + (INCL: incl c1 f.(fn_code)) + (AT: transl_code_at_pc (rs1 PC) f c1) + (AG: agree ms1 sp rs1), + exists rs2, + agree ms2 sp rs2 + /\ exec_steps tge rs1 m1 rs2 m2 + /\ transl_code_at_pc (rs2 PC) f c2. + +Definition exec_function_body_prop + (f: Mach.function) (parent: val) (ra: val) + (ms1: Mach.regset) (m1: mem) + (ms2: Mach.regset) (m2: mem) := + forall rs1 + (WTRA: Val.has_type ra Tint) + (RALR: rs1 LR = ra) + (WTF: wt_function f) + (AT: Genv.find_funct ge (rs1 PC) = Some f) + (AG: agree ms1 parent rs1), + exists rs2, + agree ms2 parent rs2 + /\ exec_steps tge rs1 m1 rs2 m2 + /\ rs2 PC = rs1 LR. + +Definition exec_function_prop + (f: Mach.function) (parent: val) + (ms1: Mach.regset) (m1: mem) + (ms2: Mach.regset) (m2: mem) := + forall rs1 + (WTF: wt_function f) + (AT: Genv.find_funct ge (rs1 PC) = Some f) + (AG: agree ms1 parent rs1) + (WTRA: Val.has_type (rs1 LR) Tint), + exists rs2, + agree ms2 parent rs2 + /\ exec_steps tge rs1 m1 rs2 m2 + /\ rs2 PC = rs1 LR. + +(** We show each case of the inductive proof of simulation as a separate + lemma. *) + +Lemma exec_Mlabel_prop: + forall (f : function) (sp : val) (lbl : Mach.label) + (c : list Mach.instruction) (rs : Mach.regset) (m : mem), + exec_instr_prop f sp (Mlabel lbl :: c) rs m c rs m. +Proof. + intros; red; intros. + assert (exec_straight tge (transl_function f) + (transl_code (Mlabel lbl :: c)) rs1 m + (transl_code c) (nextinstr rs1) m). + simpl. apply exec_straight_one. reflexivity. reflexivity. + exists (nextinstr rs1). split. apply agree_nextinstr; auto. + eapply exec_straight_steps; eauto. +Qed. + +Lemma exec_Mgetstack_prop: + forall (f : function) (sp : val) (ofs : int) (ty : typ) (dst : mreg) + (c : list Mach.instruction) (ms : Mach.regset) (m : mem) (v : val), + load_stack m sp ty ofs = Some v -> + exec_instr_prop f sp (Mgetstack ofs ty dst :: c) ms m c (Regmap.set dst v ms) m. +Proof. + intros; red; intros. + unfold load_stack in H. + generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))). + intro WTI. inversion WTI. + rewrite (sp_val _ _ _ AG) in H. + assert (NOTE: GPR1 <> GPR0). congruence. + generalize (loadind_correct tge (transl_function f) GPR1 ofs ty + dst (transl_code c) rs1 m v H H1 NOTE). + intros [rs2 [EX [RES OTH]]]. + exists rs2. split. + apply agree_exten_2 with (rs1#(preg_of dst) <- v). + auto with ppcgen. + intros. case (preg_eq r0 (preg_of dst)); intro. + subst r0. rewrite Pregmap.gss. auto. + rewrite Pregmap.gso; auto. + eapply exec_straight_steps; eauto. +Qed. + +Lemma exec_Msetstack_prop: + forall (f : function) (sp : val) (src : mreg) (ofs : int) (ty : typ) + (c : list Mach.instruction) (ms : mreg -> val) (m m' : mem), + store_stack m sp ty ofs (ms src) = Some m' -> + exec_instr_prop f sp (Msetstack src ofs ty :: c) ms m c ms m'. +Proof. + intros; red; intros. + unfold store_stack in H. + generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))). + intro WTI. inversion WTI. + rewrite (sp_val _ _ _ AG) in H. + rewrite (preg_val ms sp rs1) in H; auto. + assert (NOTE: GPR1 <> GPR0). congruence. + generalize (storeind_correct tge (transl_function f) GPR1 ofs ty + src (transl_code c) rs1 m m' H H2 NOTE). + intros [rs2 [EX OTH]]. + exists rs2. split. + apply agree_exten_2 with rs1; auto. + eapply exec_straight_steps; eauto. +Qed. + +Lemma exec_Mgetparam_prop: + forall (f : function) (sp parent : val) (ofs : int) (ty : typ) + (dst : mreg) (c : list Mach.instruction) (ms : Mach.regset) + (m : mem) (v : val), + load_stack m sp Tint (Int.repr 0) = Some parent -> + load_stack m parent ty ofs = Some v -> + exec_instr_prop f sp (Mgetparam ofs ty dst :: c) ms m c (Regmap.set dst v ms) m. +Proof. + intros; red; intros. + set (rs2 := nextinstr (rs1#GPR2 <- parent)). + assert (EX1: exec_straight tge (transl_function f) + (transl_code (Mgetparam ofs ty dst :: c)) rs1 m + (loadind GPR2 ofs ty dst (transl_code c)) rs2 m). + simpl. apply exec_straight_one. + simpl. unfold load1. rewrite gpr_or_zero_not_zero; auto with ppcgen. + unfold const_low. rewrite <- (sp_val ms sp rs1); auto. + unfold load_stack in H. simpl chunk_of_type in H. + rewrite H. reflexivity. reflexivity. + generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))). + intro WTI. inversion WTI. + unfold load_stack in H0. change parent with rs2#GPR2 in H0. + assert (NOTE: GPR2 <> GPR0). congruence. + generalize (loadind_correct tge (transl_function f) GPR2 ofs ty + dst (transl_code c) rs2 m v H0 H2 NOTE). + intros [rs3 [EX2 [RES OTH]]]. + exists rs3. split. + apply agree_exten_2 with (rs2#(preg_of dst) <- v). + unfold rs2; auto with ppcgen. + intros. case (preg_eq r0 (preg_of dst)); intro. + subst r0. rewrite Pregmap.gss. auto. + rewrite Pregmap.gso; auto. + eapply exec_straight_steps; eauto. + eapply exec_straight_trans; eauto. +Qed. + +Lemma exec_straight_exec_prop: + forall f sp c1 rs1 m1 c2 m2 ms', + transl_code_at_pc (rs1 PC) f c1 -> + (exists rs2, + exec_straight tge (transl_function f) + (transl_code c1) rs1 m1 + (transl_code c2) rs2 m2 + /\ agree ms' sp rs2) -> + (exists rs2, + agree ms' sp rs2 + /\ exec_steps tge rs1 m1 rs2 m2 + /\ transl_code_at_pc (rs2 PC) f c2). +Proof. + intros until ms'. intros TRANS1 [rs2 [EX AG]]. + exists rs2. split. assumption. + eapply exec_straight_steps; eauto. +Qed. + +Lemma exec_Mop_prop: + forall (f : function) (sp : val) (op : operation) (args : list mreg) + (res : mreg) (c : list Mach.instruction) (ms: Mach.regset) + (m : mem) (v: val), + eval_operation ge sp op ms ## args = Some v -> + exec_instr_prop f sp (Mop op args res :: c) ms m c (Regmap.set res v ms) m. +Proof. + intros; red; intros. + generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))). + intro WTI. + eapply exec_straight_exec_prop; eauto. + simpl. eapply transl_op_correct; auto. + rewrite <- H. apply eval_operation_preserved. exact symbols_preserved. +Qed. + +Lemma exec_Mload_prop: + forall (f : function) (sp : val) (chunk : memory_chunk) + (addr : addressing) (args : list mreg) (dst : mreg) + (c : list Mach.instruction) (ms: Mach.regset) (m : mem) + (a v : val), + eval_addressing ge sp addr ms ## args = Some a -> + loadv chunk m a = Some v -> + exec_instr_prop f sp (Mload chunk addr args dst :: c) ms m c (Regmap.set dst v ms) m. +Proof. + intros; red; intros. + generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))). + intro WTI; inversion WTI. + assert (eval_addressing tge sp addr ms##args = Some a). + rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. + eapply exec_straight_exec_prop; eauto. + destruct chunk; simpl; simpl in H6; + (* all cases but Mint8signed *) + try (eapply transl_load_correct; eauto; + intros; simpl; unfold preg_of; rewrite H6; auto). + (* Mint8signed *) + generalize (loadv_8_signed_unsigned m a). + rewrite H0. + caseEq (loadv Mint8unsigned m a); + [idtac | simpl;intros;discriminate]. + intros v' LOAD' EQ. simpl in EQ. injection EQ. intro EQ1. clear EQ. + assert (X1: forall (cst : constant) (r1 : ireg) (rs1 : regset), + exec_instr tge (transl_function f) (Plbz (ireg_of dst) cst r1) rs1 m = + load1 tge Mint8unsigned (preg_of dst) cst r1 rs1 m). + intros. unfold preg_of; rewrite H6. reflexivity. + assert (X2: forall (r1 r2 : ireg) (rs1 : regset), + exec_instr tge (transl_function f) (Plbzx (ireg_of dst) r1 r2) rs1 m = + load2 Mint8unsigned (preg_of dst) r1 r2 rs1 m). + intros. unfold preg_of; rewrite H6. reflexivity. + generalize (transl_load_correct tge (transl_function f) + (Plbz (ireg_of dst)) (Plbzx (ireg_of dst)) + Mint8unsigned addr args + (Pextsb (ireg_of dst) (ireg_of dst) :: transl_code c) + ms sp rs1 m dst a v' + X1 X2 AG H3 H7 LOAD'). + intros [rs2 [EX1 AG1]]. + exists (nextinstr (rs2#(ireg_of dst) <- v)). + split. eapply exec_straight_trans. eexact EX1. + apply exec_straight_one. simpl. + rewrite <- (ireg_val _ _ _ dst AG1);auto. rewrite Regmap.gss. + rewrite EQ1. reflexivity. reflexivity. + eauto with ppcgen. +Qed. + +Lemma exec_Mstore_prop: + forall (f : function) (sp : val) (chunk : memory_chunk) + (addr : addressing) (args : list mreg) (src : mreg) + (c : list Mach.instruction) (ms: Mach.regset) (m m' : mem) + (a : val), + eval_addressing ge sp addr ms ## args = Some a -> + storev chunk m a (ms src) = Some m' -> + exec_instr_prop f sp (Mstore chunk addr args src :: c) ms m c ms m'. +Proof. + intros; red; intros. + generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))). + intro WTI; inversion WTI. + rewrite <- (eval_addressing_preserved symbols_preserved) in H. + eapply exec_straight_exec_prop; eauto. + destruct chunk; simpl; simpl in H6; + try (rewrite storev_8_signed_unsigned in H); + try (rewrite storev_16_signed_unsigned in H); + simpl; eapply transl_store_correct; eauto; + intros; unfold preg_of; rewrite H6; reflexivity. +Qed. + +Hypothesis wt_prog: wt_program prog. + +Lemma exec_Mcall_prop: + forall (f : function) (sp : val) (sig : signature) + (mos : mreg + ident) (c : list Mach.instruction) (ms : Mach.regset) + (m : mem) (f' : function) (ms' : Mach.regset) (m' : mem), + find_function ge mos ms = Some f' -> + exec_function ge f' sp ms m ms' m' -> + exec_function_prop f' sp ms m ms' m' -> + exec_instr_prop f sp (Mcall sig mos :: c) ms m c ms' m'. +Proof. + intros; red; intros. + generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))). + intro WTI. inversion WTI. + inversion AT. + assert (WTF': wt_function f'). + destruct mos; simpl in H. + apply (Genv.find_funct_prop wt_function wt_prog H). + destruct (Genv.find_symbol ge i); try discriminate. + apply (Genv.find_funct_ptr_prop wt_function wt_prog H). + assert (NOOV: code_size (transl_function f) <= Int.max_unsigned). + eapply functions_translated_no_overflow; eauto. + destruct mos; simpl in H; simpl transl_code in H7. + (* Indirect call *) + generalize (code_tail_next_int _ _ _ _ NOOV H7). intro CT1. + generalize (code_tail_next_int _ _ _ _ NOOV CT1). intro CT2. + set (rs2 := nextinstr (rs1#CTR <- (ms m0))). + set (rs3 := rs2 #LR <- (Val.add rs2#PC Vone) #PC <- (ms m0)). + assert (TFIND: Genv.find_funct ge (rs3#PC) = Some f'). + unfold rs3. rewrite Pregmap.gss. auto. + assert (AG3: agree ms sp rs3). + unfold rs3, rs2; auto 8 with ppcgen. + assert (WTRA: Val.has_type rs3#LR Tint). + change rs3#LR with (Val.add (Val.add rs1#PC Vone) Vone). + rewrite <- H5. exact I. + generalize (H1 rs3 WTF' TFIND AG3 WTRA). + intros [rs4 [AG4 [EXF' PC4]]]. + exists rs4. split. auto. split. + apply exec_trans with rs2 m. apply exec_one. econstructor. + eauto. apply functions_translated. eexact H6. + rewrite find_instr_tail. rewrite H7. reflexivity. + simpl. rewrite <- (ireg_val ms sp rs1); auto. + apply exec_trans with rs3 m. apply exec_one. econstructor. + unfold rs2, nextinstr. rewrite Pregmap.gss. + rewrite Pregmap.gso. rewrite <- H5. simpl. reflexivity. + discriminate. apply functions_translated. eexact H6. + rewrite find_instr_tail. rewrite CT1. reflexivity. + simpl. replace (rs2 CTR) with (ms m0). reflexivity. + unfold rs2. rewrite nextinstr_inv. rewrite Pregmap.gss. + auto. discriminate. + exact EXF'. + rewrite PC4. unfold rs3. rewrite Pregmap.gso. rewrite Pregmap.gss. + unfold rs2, nextinstr. rewrite Pregmap.gss. rewrite Pregmap.gso. + rewrite <- H5. simpl. constructor. auto. auto. + discriminate. discriminate. + (* Direct call *) + caseEq (Genv.find_symbol ge i). intros fblock FINDS. + rewrite FINDS in H. + generalize (code_tail_next_int _ _ _ _ NOOV H7). intro CT1. + set (rs2 := rs1 #LR <- (Val.add rs1#PC Vone) #PC <- (symbol_offset tge i Int.zero)). + assert (TFIND: Genv.find_funct ge (rs2#PC) = Some f'). + unfold rs2. rewrite Pregmap.gss. + unfold symbol_offset. rewrite symbols_preserved. + rewrite FINDS. + rewrite Genv.find_funct_find_funct_ptr. assumption. + assert (AG2: agree ms sp rs2). + unfold rs2; auto 8 with ppcgen. + assert (WTRA: Val.has_type rs2#LR Tint). + change rs2#LR with (Val.add rs1#PC Vone). + rewrite <- H5. exact I. + generalize (H1 rs2 WTF' TFIND AG2 WTRA). + intros [rs3 [AG3 [EXF' PC3]]]. + exists rs3. split. auto. split. + apply exec_trans with rs2 m. apply exec_one. econstructor. + eauto. apply functions_translated. eexact H6. + rewrite find_instr_tail. rewrite H7. reflexivity. + simpl. reflexivity. + exact EXF'. + rewrite PC3. unfold rs2. rewrite Pregmap.gso. rewrite Pregmap.gss. + rewrite <- H5. simpl. constructor. auto. auto. + discriminate. + intro FINDS. rewrite FINDS in H. discriminate. +Qed. + +Lemma exec_Mgoto_prop: + forall (f : function) (sp : val) (lbl : Mach.label) + (c : list Mach.instruction) (ms : Mach.regset) (m : mem) + (c' : Mach.code), + Mach.find_label lbl (fn_code f) = Some c' -> + exec_instr_prop f sp (Mgoto lbl :: c) ms m c' ms m. +Proof. + intros; red; intros. + inversion AT. + generalize (find_label_goto_label f lbl rs1 m _ _ _ H1 (sym_equal H0) H). + intros [rs2 [GOTO [AT2 INV]]]. + exists rs2. split. apply agree_exten_2 with rs1; auto. + split. inversion AT. apply exec_one. econstructor; eauto. + apply functions_translated; eauto. + rewrite find_instr_tail. rewrite H7. simpl. reflexivity. + simpl. rewrite GOTO. auto. auto. +Qed. + +Lemma exec_Mcond_true_prop: + forall (f : function) (sp : val) (cond : condition) + (args : list mreg) (lbl : Mach.label) (c : list Mach.instruction) + (ms: Mach.regset) (m : mem) (c' : Mach.code), + eval_condition cond ms ## args = Some true -> + Mach.find_label lbl (fn_code f) = Some c' -> + exec_instr_prop f sp (Mcond cond args lbl :: c) ms m c' ms m. +Proof. + intros; red; intros. + generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))). + intro WTI. inversion WTI. + pose (k1 := + if snd (crbit_for_cond cond) + then Pbt (fst (crbit_for_cond cond)) lbl :: transl_code c + else Pbf (fst (crbit_for_cond cond)) lbl :: transl_code c). + generalize (transl_cond_correct tge (transl_function f) + cond args k1 ms sp rs1 m true H2 AG H). + simpl. intros [rs2 [EX [RES AG2]]]. + inversion AT. + generalize (functions_translated _ _ H6); intro FN. + generalize (functions_translated_no_overflow _ _ H6); intro NOOV. + simpl in H7. + generalize (exec_straight_steps_2 _ _ _ _ _ _ _ EX + NOOV _ _ (sym_equal H5) FN H7). + intros [ofs' [PC2 CT2]]. + generalize (find_label_goto_label f lbl rs2 m _ _ _ H6 PC2 H0). + intros [rs3 [GOTO [AT3 INV3]]]. + exists rs3. split. + apply agree_exten_2 with rs2; auto. + split. eapply exec_trans. + eapply exec_straight_steps_1; eauto. + caseEq (snd (crbit_for_cond cond)); intro ISSET; rewrite ISSET in RES. + apply exec_one. econstructor; eauto. + rewrite find_instr_tail. rewrite CT2. unfold k1. rewrite ISSET. reflexivity. + simpl. rewrite RES. simpl. auto. + apply exec_one. econstructor; eauto. + rewrite find_instr_tail. rewrite CT2. unfold k1. rewrite ISSET. reflexivity. + simpl. rewrite RES. simpl. auto. + auto. +Qed. + +Lemma exec_Mcond_false_prop: + forall (f : function) (sp : val) (cond : condition) + (args : list mreg) (lbl : Mach.label) (c : list Mach.instruction) + (ms : Mach.regset) (m : mem), + eval_condition cond ms ## args = Some false -> + exec_instr_prop f sp (Mcond cond args lbl :: c) ms m c ms m. +Proof. + intros; red; intros. + generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))). + intro WTI. inversion WTI. + pose (k1 := + if snd (crbit_for_cond cond) + then Pbt (fst (crbit_for_cond cond)) lbl :: transl_code c + else Pbf (fst (crbit_for_cond cond)) lbl :: transl_code c). + generalize (transl_cond_correct tge (transl_function f) + cond args k1 ms sp rs1 m false H1 AG H). + simpl. intros [rs2 [EX [RES AG2]]]. + exists (nextinstr rs2). + split. auto with ppcgen. + eapply exec_straight_steps; eauto. + eapply exec_straight_trans. eexact EX. + caseEq (snd (crbit_for_cond cond)); intro ISSET; rewrite ISSET in RES. + unfold k1; rewrite ISSET; apply exec_straight_one. + simpl. rewrite RES. reflexivity. + reflexivity. + unfold k1; rewrite ISSET; apply exec_straight_one. + simpl. rewrite RES. reflexivity. + reflexivity. +Qed. + +Lemma exec_instr_incl: + forall f sp c rs m c' rs' m', + Mach.exec_instr ge f sp c rs m c' rs' m' -> + incl c f.(fn_code) -> incl c' f.(fn_code). +Proof. + induction 1; intros; eauto with coqlib. + eapply incl_find_label; eauto. + eapply incl_find_label; eauto. +Qed. + +Lemma exec_instrs_incl: + forall f sp c rs m c' rs' m', + Mach.exec_instrs ge f sp c rs m c' rs' m' -> + incl c f.(fn_code) -> incl c' f.(fn_code). +Proof. + induction 1; intros. + auto. + eapply exec_instr_incl; eauto. + eauto. +Qed. + +Lemma exec_refl_prop: + forall (f : function) (sp : val) (c : Mach.code) (ms : Mach.regset) + (m : mem), exec_instr_prop f sp c ms m c ms m. +Proof. + intros; red; intros. + exists rs1. split. auto. split. apply exec_refl. auto. +Qed. + +Lemma exec_one_prop: + forall (f : function) (sp : val) (c : Mach.code) (ms : Mach.regset) + (m : mem) (c' : Mach.code) (ms' : Mach.regset) (m' : mem), + Mach.exec_instr ge f sp c ms m c' ms' m' -> + exec_instr_prop f sp c ms m c' ms' m' -> + exec_instr_prop f sp c ms m c' ms' m'. +Proof. + auto. +Qed. + +Lemma exec_trans_prop: + forall (f : function) (sp : val) (c1 : Mach.code) (ms1 : Mach.regset) + (m1 : mem) (c2 : Mach.code) (ms2 : Mach.regset) (m2 : mem) + (c3 : Mach.code) (ms3 : Mach.regset) (m3 : mem), + exec_instrs ge f sp c1 ms1 m1 c2 ms2 m2 -> + exec_instr_prop f sp c1 ms1 m1 c2 ms2 m2 -> + exec_instrs ge f sp c2 ms2 m2 c3 ms3 m3 -> + exec_instr_prop f sp c2 ms2 m2 c3 ms3 m3 -> + exec_instr_prop f sp c1 ms1 m1 c3 ms3 m3. +Proof. + intros; red; intros. + generalize (H0 rs1 WTF INCL AT AG). + intros [rs2 [AG2 [EX2 AT2]]]. + generalize (exec_instrs_incl _ _ _ _ _ _ _ _ H INCL). intro INCL2. + generalize (H2 rs2 WTF INCL2 AT2 AG2). + intros [rs3 [AG3 [EX3 AT3]]]. + exists rs3. split. auto. split. eapply exec_trans; eauto. auto. +Qed. + +Lemma exec_function_body_prop_: + forall (f : function) (parent ra : val) (ms : Mach.regset) (m : mem) + (ms' : Mach.regset) (m1 m2 m3 m4 : mem) (stk : block) + (c : list Mach.instruction), + alloc m (- fn_framesize f) + (align_16_top (- fn_framesize f) (fn_stacksize f)) = (m1, stk) -> + let sp := Vptr stk (Int.repr (- fn_framesize f)) in + store_stack m1 sp Tint (Int.repr 0) parent = Some m2 -> + store_stack m2 sp Tint (Int.repr 4) ra = Some m3 -> + exec_instrs ge f sp (fn_code f) ms m3 (Mreturn :: c) ms' m4 -> + exec_instr_prop f sp (fn_code f) ms m3 (Mreturn :: c) ms' m4 -> + load_stack m4 sp Tint (Int.repr 0) = Some parent -> + load_stack m4 sp Tint (Int.repr 4) = Some ra -> + exec_function_body_prop f parent ra ms m ms' (free m4 stk). +Proof. + intros; red; intros. + generalize (Genv.find_funct_inv AT). intros [b EQPC]. + generalize AT. rewrite EQPC. rewrite Genv.find_funct_find_funct_ptr. intro FN. + generalize (functions_translated_no_overflow _ _ FN); intro NOOV. + set (rs2 := nextinstr (rs1#GPR1 <- sp #GPR2 <- Vundef)). + set (rs3 := nextinstr (rs2#GPR2 <- ra)). + set (rs4 := nextinstr rs3). + assert (exec_straight tge (transl_function f) + (transl_function f) rs1 m + (transl_code (fn_code f)) rs4 m3). + unfold transl_function at 2. + apply exec_straight_three with rs2 m2 rs3 m2. + unfold exec_instr. rewrite H. fold sp. + generalize H0. unfold store_stack. change (Vint (Int.repr 0)) with Vzero. + replace (Val.add sp Vzero) with sp. simpl chunk_of_type. + rewrite (sp_val _ _ _ AG). intro. rewrite H6. clear H6. + reflexivity. unfold sp. simpl. rewrite Int.add_zero. reflexivity. + simpl. replace (rs2 LR) with ra. reflexivity. + simpl. unfold store1. rewrite gpr_or_zero_not_zero. + unfold const_low. replace (rs3 GPR1) with sp. replace (rs3 GPR2) with ra. + unfold store_stack in H1. simpl chunk_of_type in H1. rewrite H1. reflexivity. + reflexivity. reflexivity. discriminate. + reflexivity. reflexivity. reflexivity. + assert (AT2: transl_code_at_pc rs4#PC f f.(fn_code)). + change (rs4 PC) with (Val.add (Val.add (Val.add (rs1 PC) Vone) Vone) Vone). + rewrite EQPC. simpl. constructor. auto. + eapply code_tail_next_int; auto. + eapply code_tail_next_int; auto. + eapply code_tail_next_int; auto. + unfold Int.zero. rewrite Int.unsigned_repr. + rewrite code_tail_zero. unfold transl_function. reflexivity. + compute. intuition congruence. + assert (AG2: agree ms sp rs2). + split. reflexivity. + intros. unfold rs2. rewrite nextinstr_inv. + repeat (rewrite Pregmap.gso). elim AG; auto. + auto with ppcgen. auto with ppcgen. auto with ppcgen. + assert (AG4: agree ms sp rs4). + unfold rs4, rs3; auto with ppcgen. + generalize (H3 rs4 WTF (incl_refl _) AT2 AG4). + intros [rs5 [AG5 [EXB AT5]]]. + set (rs6 := nextinstr (rs5#GPR2 <- ra)). + set (rs7 := nextinstr (rs6#LR <- ra)). + set (rs8 := nextinstr (rs7#GPR1 <- parent)). + set (rs9 := rs8#PC <- ra). + assert (exec_straight tge (transl_function f) + (transl_code (Mreturn :: c)) rs5 m4 + (Pblr :: transl_code c) rs8 (free m4 stk)). + simpl. apply exec_straight_three with rs6 m4 rs7 m4. + simpl. unfold load1. rewrite gpr_or_zero_not_zero. unfold const_low. + unfold load_stack in H5. simpl in H5. + rewrite <- (sp_val _ _ _ AG5). simpl. rewrite H5. + reflexivity. discriminate. + unfold rs7. change ra with rs6#GPR2. reflexivity. + unfold exec_instr. generalize H4. unfold load_stack. + replace (Val.add sp (Vint (Int.repr 0))) with sp. + simpl chunk_of_type. intro. change rs7#GPR1 with rs5#GPR1. + rewrite <- (sp_val _ _ _ AG5). rewrite H7. + unfold sp. reflexivity. + unfold sp. simpl. rewrite Int.add_zero. reflexivity. + reflexivity. reflexivity. reflexivity. + exists rs9. split. + (* agreement *) + assert (AG7: agree ms' sp rs7). + unfold rs7, rs6; auto 10 with ppcgen. + assert (AG8: agree ms' parent rs8). + split. reflexivity. intros. unfold rs8. + rewrite nextinstr_inv. rewrite Pregmap.gso. + elim AG7; auto. auto with ppcgen. auto with ppcgen. + unfold rs9; auto with ppcgen. + (* execution *) + split. apply exec_trans with rs4 m3. + eapply exec_straight_steps_1; eauto. + apply functions_translated; auto. + apply exec_trans with rs5 m4. assumption. + inversion AT5. + apply exec_trans with rs8 (free m4 stk). + eapply exec_straight_steps_1; eauto. + apply functions_translated; auto. + apply exec_one. econstructor. + change rs8#PC with (Val.add (Val.add (Val.add rs5#PC Vone) Vone) Vone). + rewrite <- H8. simpl. reflexivity. + apply functions_translated; eauto. + assert (code_tail (Int.unsigned (Int.add (Int.add (Int.add ofs Int.one) Int.one) Int.one)) + (transl_function f) = Pblr :: transl_code c). + eapply code_tail_next_int; auto. + eapply code_tail_next_int; auto. + eapply code_tail_next_int; auto. + rewrite H10. simpl. reflexivity. + rewrite find_instr_tail. rewrite H13. + reflexivity. + reflexivity. + (* LR preservation *) + change rs9#PC with ra. auto. +Qed. + +Lemma exec_function_prop_: + forall (f : function) (parent : val) (ms : Mach.regset) (m : mem) + (ms' : Mach.regset) (m' : mem), + (forall ra : val, + Val.has_type ra Tint -> + exec_function_body ge f parent ra ms m ms' m') -> + (forall ra : val, Val.has_type ra Tint -> + exec_function_body_prop f parent ra ms m ms' m') -> + exec_function_prop f parent ms m ms' m'. +Proof. + intros; red; intros. + apply (H0 rs1#LR WTRA rs1 WTRA (refl_equal _) WTF AT AG). +Qed. + +(** We then conclude by induction on the structure of the Mach +execution derivation. *) + +Theorem transf_function_correct: + forall f parent ms m ms' m', + Mach.exec_function ge f parent ms m ms' m' -> + exec_function_prop f parent ms m ms' m'. +Proof + (Mach.exec_function_ind4 ge + exec_instr_prop exec_instr_prop + exec_function_body_prop exec_function_prop + + exec_Mlabel_prop + exec_Mgetstack_prop + exec_Msetstack_prop + exec_Mgetparam_prop + exec_Mop_prop + exec_Mload_prop + exec_Mstore_prop + exec_Mcall_prop + exec_Mgoto_prop + exec_Mcond_true_prop + exec_Mcond_false_prop + exec_refl_prop + exec_one_prop + exec_trans_prop + exec_function_body_prop_ + exec_function_prop_). + +End PRESERVATION. + +Theorem transf_program_correct: + forall (p: Mach.program) (tp: PPC.program) (r: val), + wt_program p -> + transf_program p = Some tp -> + Mach.exec_program p r -> + PPC.exec_program tp r. +Proof. + intros. + destruct H1 as [fptr [f [ms [m [FINDS [FINDF [EX RES]]]]]]]. + assert (WTF: wt_function f). + apply (Genv.find_funct_ptr_prop wt_function H FINDF). + set (ge := Genv.globalenv p) in *. + set (ms0 := Regmap.init Vundef) in *. + set (tge := Genv.globalenv tp). + set (rs0 := + (Pregmap.init Vundef) # PC <- (symbol_offset tge tp.(prog_main) Int.zero) + # LR <- Vzero + # GPR1 <- (Vptr Mem.nullptr Int.zero)). + assert (AT: Genv.find_funct ge (rs0 PC) = Some f). + change (rs0 PC) with (symbol_offset tge tp.(prog_main) Int.zero). + rewrite (transform_partial_program_main _ _ H0). + unfold symbol_offset. rewrite (symbols_preserved p tp H0). + fold ge. rewrite FINDS. + rewrite Genv.find_funct_find_funct_ptr. exact FINDF. + assert (AG: agree ms0 (Vptr Mem.nullptr Int.zero) rs0). + split. reflexivity. intros. unfold rs0. + repeat (rewrite Pregmap.gso; auto with ppcgen). + assert (WTRA: Val.has_type (rs0 LR) Tint). + exact I. + generalize (transf_function_correct p tp H0 H + _ _ _ _ _ _ EX rs0 WTF AT AG WTRA). + intros [rs [AG' [EX' RPC]]]. + red. exists rs; exists m. + split. rewrite (Genv.init_mem_transf_partial _ _ H0). exact EX'. + split. rewrite RPC. reflexivity. rewrite <- RES. + change (IR GPR3) with (preg_of R3). elim AG'; auto. +Qed. diff --git a/backend/PPCgenproof1.v b/backend/PPCgenproof1.v new file mode 100644 index 00000000..30eb3368 --- /dev/null +++ b/backend/PPCgenproof1.v @@ -0,0 +1,1566 @@ +(** Correctness proof for PPC generation: auxiliary results. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Floats. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Import Mach. +Require Import Machtyping. +Require Import PPC. +Require Import PPCgen. + +(** * Properties of low half/high half decomposition *) + +Lemma high_half_signed_zero: + forall v, Val.add (high_half_signed v) Vzero = high_half_signed v. +Proof. + intros. generalize (high_half_signed_type v). + rewrite Val.add_commut. + case (high_half_signed v); simpl; intros; try contradiction. + auto. + rewrite Int.add_commut; rewrite Int.add_zero; auto. + rewrite Int.add_zero; auto. +Qed. + +Lemma high_half_unsigned_zero: + forall v, Val.add (high_half_unsigned v) Vzero = high_half_unsigned v. +Proof. + intros. generalize (high_half_unsigned_type v). + rewrite Val.add_commut. + case (high_half_unsigned v); simpl; intros; try contradiction. + auto. + rewrite Int.add_commut; rewrite Int.add_zero; auto. + rewrite Int.add_zero; auto. +Qed. + +Lemma low_high_u: + forall n, Int.or (Int.shl (high_u n) (Int.repr 16)) (low_u n) = n. +Proof. + intros. unfold high_u, low_u. + rewrite Int.shl_rolm. rewrite Int.shru_rolm. + rewrite Int.rolm_rolm. + change (Int.modu (Int.add (Int.sub (Int.repr (Z_of_nat wordsize)) (Int.repr 16)) + (Int.repr 16)) + (Int.repr (Z_of_nat wordsize))) + with (Int.zero). + rewrite Int.rolm_zero. rewrite <- Int.and_or_distrib. + exact (Int.and_mone n). + reflexivity. reflexivity. +Qed. + +Lemma low_high_u_xor: + forall n, Int.xor (Int.shl (high_u n) (Int.repr 16)) (low_u n) = n. +Proof. + intros. unfold high_u, low_u. + rewrite Int.shl_rolm. rewrite Int.shru_rolm. + rewrite Int.rolm_rolm. + change (Int.modu (Int.add (Int.sub (Int.repr (Z_of_nat wordsize)) (Int.repr 16)) + (Int.repr 16)) + (Int.repr (Z_of_nat wordsize))) + with (Int.zero). + rewrite Int.rolm_zero. rewrite <- Int.and_xor_distrib. + exact (Int.and_mone n). + reflexivity. reflexivity. +Qed. + +Lemma low_high_s: + forall n, Int.add (Int.shl (high_s n) (Int.repr 16)) (low_s n) = n. +Proof. + intros. rewrite Int.shl_mul_two_p. + unfold high_s. + rewrite <- (Int.divu_pow2 (Int.sub n (low_s n)) (Int.repr 65536) (Int.repr 16)). + change (two_p (Int.unsigned (Int.repr 16))) with 65536. + + assert (forall x y, y > 0 -> (x - x mod y) mod y = 0). + intros. apply Zmod_unique with (x / y). + generalize (Z_div_mod_eq x y H). intro. rewrite Zmult_comm. omega. + omega. + + assert (Int.modu (Int.sub n (low_s n)) (Int.repr 65536) = Int.zero). + unfold Int.modu, Int.zero. decEq. + change (Int.unsigned (Int.repr 65536)) with 65536. + unfold Int.sub. + assert (forall a b, Int.eqm a b -> b mod 65536 = 0 -> a mod 65536 = 0). + intros a b [k EQ] H1. rewrite EQ. + change modulus with (65536 * 65536). + rewrite Zmult_assoc. rewrite Zplus_comm. rewrite Z_mod_plus. auto. + omega. + eapply H0. apply Int.eqm_sym. apply Int.eqm_unsigned_repr. + unfold low_s. unfold Int.cast16signed. + set (N := Int.unsigned n). + case (zlt (N mod 65536) 32768); intro. + apply H0 with (N - N mod 65536). auto with ints. + apply H. omega. + apply H0 with (N - (N mod 65536 - 65536)). auto with ints. + replace (N - (N mod 65536 - 65536)) + with ((N - N mod 65536) + 1 * 65536). + rewrite Z_mod_plus. apply H. omega. omega. ring. + + assert (Int.repr 65536 <> Int.zero). compute. congruence. + generalize (Int.modu_divu_Euclid (Int.sub n (low_s n)) (Int.repr 65536) H1). + rewrite H0. rewrite Int.add_zero. intro. rewrite <- H2. + rewrite Int.sub_add_opp. rewrite Int.add_assoc. + replace (Int.add (Int.neg (low_s n)) (low_s n)) with Int.zero. + apply Int.add_zero. symmetry. rewrite Int.add_commut. + rewrite <- Int.sub_add_opp. apply Int.sub_idem. + + reflexivity. +Qed. + +(** * Correspondence between Mach registers and PPC registers *) + +Hint Extern 2 (_ <> _) => discriminate: ppcgen. + +(** Mapping from Mach registers to PPC registers. *) + +Definition preg_of (r: mreg) := + match mreg_type r with + | Tint => IR (ireg_of r) + | Tfloat => FR (freg_of r) + end. + +Lemma preg_of_injective: + forall r1 r2, preg_of r1 = preg_of r2 -> r1 = r2. +Proof. + destruct r1; destruct r2; simpl; intros; reflexivity || discriminate. +Qed. + +(** Characterization of PPC registers that correspond to Mach registers. *) + +Definition is_data_reg (r: preg) : Prop := + match r with + | IR GPR2 => False + | FR FPR13 => False + | PC => False | LR => False | CTR => False + | CR0_0 => False | CR0_1 => False | CR0_2 => False | CR0_3 => False + | CARRY => False + | _ => True + end. + +Lemma ireg_of_is_data_reg: + forall (r: mreg), is_data_reg (ireg_of r). +Proof. + destruct r; exact I. +Qed. + +Lemma freg_of_is_data_reg: + forall (r: mreg), is_data_reg (ireg_of r). +Proof. + destruct r; exact I. +Qed. + +Lemma preg_of_is_data_reg: + forall (r: mreg), is_data_reg (preg_of r). +Proof. + destruct r; exact I. +Qed. + +Lemma ireg_of_not_GPR1: + forall r, ireg_of r <> GPR1. +Proof. + intro. case r; discriminate. +Qed. +Lemma ireg_of_not_GPR2: + forall r, ireg_of r <> GPR2. +Proof. + intro. case r; discriminate. +Qed. +Lemma freg_of_not_FPR13: + forall r, freg_of r <> FPR13. +Proof. + intro. case r; discriminate. +Qed. +Hint Resolve ireg_of_not_GPR1 ireg_of_not_GPR2 freg_of_not_FPR13: ppcgen. + +Lemma preg_of_not: + forall r1 r2, ~(is_data_reg r2) -> preg_of r1 <> r2. +Proof. + intros; red; intro. subst r2. elim H. apply preg_of_is_data_reg. +Qed. +Hint Resolve preg_of_not: ppcgen. + +Lemma preg_of_not_GPR1: + forall r, preg_of r <> GPR1. +Proof. + intro. case r; discriminate. +Qed. +Hint Resolve preg_of_not_GPR1: ppcgen. + +(** Agreement between Mach register sets and PPC register sets. *) + +Definition agree (ms: Mach.regset) (sp: val) (rs: PPC.regset) := + rs#GPR1 = sp /\ forall r: mreg, ms r = rs#(preg_of r). + +Lemma preg_val: + forall ms sp rs r, + agree ms sp rs -> ms r = rs#(preg_of r). +Proof. + intros. elim H. auto. +Qed. + +Lemma ireg_val: + forall ms sp rs r, + agree ms sp rs -> + mreg_type r = Tint -> + ms r = rs#(ireg_of r). +Proof. + intros. elim H; intros. + generalize (H2 r). unfold preg_of. rewrite H0. auto. +Qed. + +Lemma freg_val: + forall ms sp rs r, + agree ms sp rs -> + mreg_type r = Tfloat -> + ms r = rs#(freg_of r). +Proof. + intros. elim H; intros. + generalize (H2 r). unfold preg_of. rewrite H0. auto. +Qed. + +Lemma sp_val: + forall ms sp rs, + agree ms sp rs -> + sp = rs#GPR1. +Proof. + intros. elim H; auto. +Qed. + +Lemma agree_exten_1: + forall ms sp rs rs', + agree ms sp rs -> + (forall r, is_data_reg r -> rs'#r = rs#r) -> + agree ms sp rs'. +Proof. + unfold agree; intros. elim H; intros. + split. rewrite H0. auto. exact I. + intros. rewrite H0. auto. apply preg_of_is_data_reg. +Qed. + +Lemma agree_exten_2: + forall ms sp rs rs', + agree ms sp rs -> + (forall r, + r <> IR GPR2 -> r <> FR FPR13 -> + r <> PC -> r <> LR -> r <> CTR -> + r <> CR0_0 -> r <> CR0_1 -> r <> CR0_2 -> r <> CR0_3 -> + r <> CARRY -> + rs'#r = rs#r) -> + agree ms sp rs'. +Proof. + intros. apply agree_exten_1 with rs. auto. + intros. apply H0; (red; intro; subst r; elim H1). +Qed. + +(** Preservation of register agreement under various assignments. *) + +Lemma agree_set_mreg: + forall ms sp rs r v, + agree ms sp rs -> + agree (Regmap.set r v ms) sp (rs#(preg_of r) <- v). +Proof. + unfold agree; intros. elim H; intros; clear H. + split. rewrite Pregmap.gso. auto. apply sym_not_eq. apply preg_of_not_GPR1. + intros. unfold Regmap.set. case (RegEq.eq r0 r); intro. + subst r0. rewrite Pregmap.gss. auto. + rewrite Pregmap.gso. auto. red; intro. + elim n. apply preg_of_injective; auto. +Qed. +Hint Resolve agree_set_mreg: ppcgen. + +Lemma agree_set_mireg: + forall ms sp rs r v, + agree ms sp (rs#(preg_of r) <- v) -> + mreg_type r = Tint -> + agree ms sp (rs#(ireg_of r) <- v). +Proof. + intros. unfold preg_of in H. rewrite H0 in H. auto. +Qed. +Hint Resolve agree_set_mireg: ppcgen. + +Lemma agree_set_mfreg: + forall ms sp rs r v, + agree ms sp (rs#(preg_of r) <- v) -> + mreg_type r = Tfloat -> + agree ms sp (rs#(freg_of r) <- v). +Proof. + intros. unfold preg_of in H. rewrite H0 in H. auto. +Qed. +Hint Resolve agree_set_mfreg: ppcgen. + +Lemma agree_set_other: + forall ms sp rs r v, + agree ms sp rs -> + ~(is_data_reg r) -> + agree ms sp (rs#r <- v). +Proof. + intros. apply agree_exten_1 with rs. + auto. intros. apply Pregmap.gso. red; intro; subst r0; contradiction. +Qed. +Hint Resolve agree_set_other: ppcgen. + +Lemma agree_nextinstr: + forall ms sp rs, + agree ms sp rs -> agree ms sp (nextinstr rs). +Proof. + intros. unfold nextinstr. apply agree_set_other. auto. auto. +Qed. +Hint Resolve agree_nextinstr: ppcgen. + +Lemma agree_set_mireg_twice: + forall ms sp rs r v v', + agree ms sp rs -> + mreg_type r = Tint -> + agree (Regmap.set r v ms) sp (rs #(ireg_of r) <- v' #(ireg_of r) <- v). +Proof. + intros. replace (IR (ireg_of r)) with (preg_of r). elim H; intros. + split. repeat (rewrite Pregmap.gso; auto with ppcgen). + intros. case (mreg_eq r r0); intro. + subst r0. rewrite Regmap.gss. rewrite Pregmap.gss. auto. + assert (preg_of r <> preg_of r0). + red; intro. elim n. apply preg_of_injective. auto. + rewrite Regmap.gso; auto. + repeat (rewrite Pregmap.gso; auto). + unfold preg_of. rewrite H0. auto. +Qed. +Hint Resolve agree_set_mireg_twice: ppcgen. + +Lemma agree_set_twice_mireg: + forall ms sp rs r v v', + agree (Regmap.set r v' ms) sp rs -> + mreg_type r = Tint -> + agree (Regmap.set r v ms) sp (rs#(ireg_of r) <- v). +Proof. + intros. elim H; intros. + split. rewrite Pregmap.gso. auto. + generalize (ireg_of_not_GPR1 r); congruence. + intros. generalize (H2 r0). + case (mreg_eq r0 r); intro. + subst r0. repeat rewrite Regmap.gss. unfold preg_of; rewrite H0. + rewrite Pregmap.gss. auto. + repeat rewrite Regmap.gso; auto. + rewrite Pregmap.gso. auto. + replace (IR (ireg_of r)) with (preg_of r). + red; intros. elim n. apply preg_of_injective; auto. + unfold preg_of. rewrite H0. auto. +Qed. +Hint Resolve agree_set_twice_mireg: ppcgen. + +Lemma agree_set_commut: + forall ms sp rs r1 r2 v1 v2, + r1 <> r2 -> + agree ms sp ((rs#r2 <- v2)#r1 <- v1) -> + agree ms sp ((rs#r1 <- v1)#r2 <- v2). +Proof. + intros. apply agree_exten_1 with ((rs#r2 <- v2)#r1 <- v1). auto. + intros. + case (preg_eq r r1); intro. + subst r1. rewrite Pregmap.gss. rewrite Pregmap.gso. rewrite Pregmap.gss. + auto. auto. + case (preg_eq r r2); intro. + subst r2. rewrite Pregmap.gss. rewrite Pregmap.gso. rewrite Pregmap.gss. + auto. auto. + repeat (rewrite Pregmap.gso; auto). +Qed. +Hint Resolve agree_set_commut: ppcgen. + +Lemma agree_nextinstr_commut: + forall ms sp rs r v, + agree ms sp (rs#r <- v) -> + r <> PC -> + agree ms sp ((nextinstr rs)#r <- v). +Proof. + intros. unfold nextinstr. apply agree_set_commut. auto. + apply agree_set_other. auto. auto. +Qed. +Hint Resolve agree_nextinstr_commut: ppcgen. + +Lemma agree_set_mireg_exten: + forall ms sp rs r v (rs': regset), + agree ms sp rs -> + mreg_type r = Tint -> + rs'#(ireg_of r) = v -> + (forall r', + r' <> IR GPR2 -> r' <> FR FPR13 -> + r' <> PC -> r' <> LR -> r' <> CTR -> + r' <> CR0_0 -> r' <> CR0_1 -> r' <> CR0_2 -> r' <> CR0_3 -> + r' <> CARRY -> + r' <> IR (ireg_of r) -> rs'#r' = rs#r') -> + agree (Regmap.set r v ms) sp rs'. +Proof. + intros. apply agree_exten_2 with (rs#(ireg_of r) <- v). + auto with ppcgen. + intros. unfold Pregmap.set. case (PregEq.eq r0 (ireg_of r)); intro. + subst r0. auto. apply H2; auto. +Qed. + +(** Useful properties of the PC and GPR0 registers. *) + +Lemma nextinstr_inv: + forall r rs, r <> PC -> (nextinstr rs)#r = rs#r. +Proof. + intros. unfold nextinstr. apply Pregmap.gso. auto. +Qed. +Hint Resolve nextinstr_inv: ppcgen. + +Lemma nextinstr_set_preg: + forall rs m v, + (nextinstr (rs#(preg_of m) <- v))#PC = Val.add rs#PC Vone. +Proof. + intros. unfold nextinstr. rewrite Pregmap.gss. + rewrite Pregmap.gso. auto. apply sym_not_eq. auto with ppcgen. +Qed. +Hint Resolve nextinstr_set_preg: ppcgen. + +Lemma gpr_or_zero_not_zero: + forall rs r, r <> GPR0 -> gpr_or_zero rs r = rs#r. +Proof. + intros. unfold gpr_or_zero. case (ireg_eq r GPR0); tauto. +Qed. +Lemma gpr_or_zero_zero: + forall rs, gpr_or_zero rs GPR0 = Vzero. +Proof. + intros. reflexivity. +Qed. +Hint Resolve gpr_or_zero_not_zero gpr_or_zero_zero: ppcgen. + +(** * Execution of straight-line code *) + +Section STRAIGHTLINE. + +Variable ge: genv. +Variable fn: code. + +(** Straight-line code is composed of PPC instructions that execute + in sequence (no branches, no function calls and returns). + The following inductive predicate relates the machine states + before and after executing a straight-line sequence of instructions. + Instructions are taken from the first list instead of being fetched + from memory. *) + +Inductive exec_straight: code -> regset -> mem -> + code -> regset -> mem -> Prop := + | exec_straight_refl: + forall c rs m, + exec_straight c rs m c rs m + | exec_straight_step: + forall i c rs1 m1 rs2 m2 c' rs3 m3, + exec_instr ge fn i rs1 m1 = OK rs2 m2 -> + rs2#PC = Val.add rs1#PC Vone -> + exec_straight c rs2 m2 c' rs3 m3 -> + exec_straight (i :: c) rs1 m1 c' rs3 m3. + +Lemma exec_straight_trans: + forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3, + exec_straight c1 rs1 m1 c2 rs2 m2 -> + exec_straight c2 rs2 m2 c3 rs3 m3 -> + exec_straight c1 rs1 m1 c3 rs3 m3. +Proof. + induction 1. auto. + intro. apply exec_straight_step with rs2 m2; auto. +Qed. + +Lemma exec_straight_one: + forall i1 c rs1 m1 rs2 m2, + exec_instr ge fn i1 rs1 m1 = OK rs2 m2 -> + rs2#PC = Val.add rs1#PC Vone -> + exec_straight (i1 :: c) rs1 m1 c rs2 m2. +Proof. + intros. apply exec_straight_step with rs2 m2. auto. auto. + apply exec_straight_refl. +Qed. + +Lemma exec_straight_two: + forall i1 i2 c rs1 m1 rs2 m2 rs3 m3, + exec_instr ge fn i1 rs1 m1 = OK rs2 m2 -> + exec_instr ge fn i2 rs2 m2 = OK rs3 m3 -> + rs2#PC = Val.add rs1#PC Vone -> + rs3#PC = Val.add rs2#PC Vone -> + exec_straight (i1 :: i2 :: c) rs1 m1 c rs3 m3. +Proof. + intros. apply exec_straight_step with rs2 m2; auto. + apply exec_straight_one; auto. +Qed. + +Lemma exec_straight_three: + forall i1 i2 i3 c rs1 m1 rs2 m2 rs3 m3 rs4 m4, + exec_instr ge fn i1 rs1 m1 = OK rs2 m2 -> + exec_instr ge fn i2 rs2 m2 = OK rs3 m3 -> + exec_instr ge fn i3 rs3 m3 = OK rs4 m4 -> + rs2#PC = Val.add rs1#PC Vone -> + rs3#PC = Val.add rs2#PC Vone -> + rs4#PC = Val.add rs3#PC Vone -> + exec_straight (i1 :: i2 :: i3 :: c) rs1 m1 c rs4 m4. +Proof. + intros. apply exec_straight_step with rs2 m2; auto. + eapply exec_straight_two; eauto. +Qed. + +(** * Correctness of PowerPC constructor functions *) + +(** Properties of comparisons. *) + +Lemma compare_float_spec: + forall rs v1 v2, + let rs1 := nextinstr (compare_float rs v1 v2) in + rs1#CR0_0 = Val.cmpf Clt v1 v2 + /\ rs1#CR0_1 = Val.cmpf Cgt v1 v2 + /\ rs1#CR0_2 = Val.cmpf Ceq v1 v2 + /\ forall r', r' <> PC -> r' <> CR0_0 -> r' <> CR0_1 -> + r' <> CR0_2 -> r' <> CR0_3 -> rs1#r' = rs#r'. +Proof. + intros. unfold rs1. + split. reflexivity. + split. reflexivity. + split. reflexivity. + intros. rewrite nextinstr_inv; auto. + unfold compare_float. repeat (rewrite Pregmap.gso; auto). +Qed. + +Lemma compare_sint_spec: + forall rs v1 v2, + let rs1 := nextinstr (compare_sint rs v1 v2) in + rs1#CR0_0 = Val.cmp Clt v1 v2 + /\ rs1#CR0_1 = Val.cmp Cgt v1 v2 + /\ rs1#CR0_2 = Val.cmp Ceq v1 v2 + /\ forall r', r' <> PC -> r' <> CR0_0 -> r' <> CR0_1 -> + r' <> CR0_2 -> r' <> CR0_3 -> rs1#r' = rs#r'. +Proof. + intros. unfold rs1. + split. reflexivity. + split. reflexivity. + split. reflexivity. + intros. rewrite nextinstr_inv; auto. + unfold compare_sint. repeat (rewrite Pregmap.gso; auto). +Qed. + +Lemma compare_uint_spec: + forall rs v1 v2, + let rs1 := nextinstr (compare_uint rs v1 v2) in + rs1#CR0_0 = Val.cmpu Clt v1 v2 + /\ rs1#CR0_1 = Val.cmpu Cgt v1 v2 + /\ rs1#CR0_2 = Val.cmpu Ceq v1 v2 + /\ forall r', r' <> PC -> r' <> CR0_0 -> r' <> CR0_1 -> + r' <> CR0_2 -> r' <> CR0_3 -> rs1#r' = rs#r'. +Proof. + intros. unfold rs1. + split. reflexivity. + split. reflexivity. + split. reflexivity. + intros. rewrite nextinstr_inv; auto. + unfold compare_uint. repeat (rewrite Pregmap.gso; auto). +Qed. + +(** Loading a constant. *) + +Lemma loadimm_correct: + forall r n k rs m, + exists rs', + exec_straight (loadimm r n k) rs m k rs' m + /\ rs'#r = Vint n + /\ forall r': preg, r' <> r -> r' <> PC -> rs'#r' = rs#r'. +Proof. + intros. unfold loadimm. + case (Int.eq (high_s n) Int.zero). + (* addi *) + exists (nextinstr (rs#r <- (Vint n))). + split. apply exec_straight_one. + simpl. rewrite Int.add_commut. rewrite Int.add_zero. reflexivity. + reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. + apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. + (* addis *) + generalize (Int.eq_spec (low_s n) Int.zero); case (Int.eq (low_s n) Int.zero); intro. + exists (nextinstr (rs#r <- (Vint n))). + split. apply exec_straight_one. + simpl. rewrite Int.add_commut. + rewrite <- H. rewrite low_high_s. reflexivity. + reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. + (* addis + ori *) + pose (rs1 := nextinstr (rs#r <- (Vint (Int.shl (high_u n) (Int.repr 16))))). + exists (nextinstr (rs1#r <- (Vint n))). + split. eapply exec_straight_two. + simpl. rewrite Int.add_commut. rewrite Int.add_zero. reflexivity. + simpl. rewrite nextinstr_inv; auto with ppcgen. rewrite Pregmap.gss. + unfold Val.or. rewrite low_high_u. reflexivity. + reflexivity. reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. + unfold rs1. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. +Qed. + +(** Add integer immediate. *) + +Lemma addimm_1_correct: + forall r1 r2 n k rs m, + r1 <> GPR0 -> + r2 <> GPR0 -> + exists rs', + exec_straight (addimm_1 r1 r2 n k) rs m k rs' m + /\ rs'#r1 = Val.add rs#r2 (Vint n) + /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'. +Proof. + intros. unfold addimm_1. + (* addi *) + case (Int.eq (high_s n) Int.zero). + exists (nextinstr (rs#r1 <- (Val.add rs#r2 (Vint n)))). + split. apply exec_straight_one. + simpl. rewrite gpr_or_zero_not_zero; auto. + reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. + (* addis *) + generalize (Int.eq_spec (low_s n) Int.zero); case (Int.eq (low_s n) Int.zero); intro. + exists (nextinstr (rs#r1 <- (Val.add rs#r2 (Vint n)))). + split. apply exec_straight_one. + simpl. rewrite gpr_or_zero_not_zero; auto. + generalize (low_high_s n). rewrite H1. rewrite Int.add_zero. intro. + rewrite H2. auto. + reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. + (* addis + addi *) + pose (rs1 := nextinstr (rs#r1 <- (Val.add rs#r2 (Vint (Int.shl (high_s n) (Int.repr 16)))))). + exists (nextinstr (rs1#r1 <- (Val.add rs#r2 (Vint n)))). + split. apply exec_straight_two with rs1 m. + simpl. rewrite gpr_or_zero_not_zero; auto. + simpl. rewrite gpr_or_zero_not_zero; auto. + unfold rs1 at 1. rewrite nextinstr_inv; auto with ppcgen. rewrite Pregmap.gss. + rewrite Val.add_assoc. simpl. rewrite low_high_s. auto. + reflexivity. reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. + unfold rs1. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. +Qed. + +Lemma addimm_2_correct: + forall r1 r2 n k rs m, + r2 <> GPR2 -> + exists rs', + exec_straight (addimm_2 r1 r2 n k) rs m k rs' m + /\ rs'#r1 = Val.add rs#r2 (Vint n) + /\ forall r': preg, r' <> r1 -> r' <> GPR2 -> r' <> PC -> rs'#r' = rs#r'. +Proof. + intros. unfold addimm_2. + generalize (loadimm_correct GPR2 n (Padd r1 r2 GPR2 :: k) rs m). + intros [rs1 [EX [RES OTHER]]]. + exists (nextinstr (rs1#r1 <- (Val.add rs#r2 (Vint n)))). + split. eapply exec_straight_trans. eexact EX. + apply exec_straight_one. simpl. rewrite RES. rewrite OTHER. + auto. congruence. discriminate. + reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. +Qed. + +Lemma addimm_correct: + forall r1 r2 n k rs m, + r2 <> GPR2 -> + exists rs', + exec_straight (addimm r1 r2 n k) rs m k rs' m + /\ rs'#r1 = Val.add rs#r2 (Vint n) + /\ forall r': preg, r' <> r1 -> r' <> GPR2 -> r' <> PC -> rs'#r' = rs#r'. +Proof. + intros. unfold addimm. + case (ireg_eq r1 GPR0); intro. + apply addimm_2_correct; auto. + case (ireg_eq r2 GPR0); intro. + apply addimm_2_correct; auto. + generalize (addimm_1_correct r1 r2 n k rs m n0 n1). + intros [rs' [EX [RES OTH]]]. exists rs'. intuition. +Qed. + +(** And integer immediate. *) + +Lemma andimm_correct: + forall r1 r2 n k (rs : regset) m, + r2 <> GPR2 -> + let v := Val.and rs#r2 (Vint n) in + exists rs', + exec_straight (andimm r1 r2 n k) rs m k rs' m + /\ rs'#r1 = v + /\ rs'#CR0_2 = Val.cmp Ceq v Vzero + /\ forall r': preg, + r' <> r1 -> r' <> GPR2 -> r' <> PC -> + r' <> CR0_0 -> r' <> CR0_1 -> r' <> CR0_2 -> r' <> CR0_3 -> + rs'#r' = rs#r'. +Proof. + intros. unfold andimm. + case (Int.eq (high_u n) Int.zero). + (* andi *) + exists (nextinstr (compare_sint (rs#r1 <- v) v Vzero)). + generalize (compare_sint_spec (rs#r1 <- v) v Vzero). + intros [A [B [C D]]]. + split. apply exec_straight_one. reflexivity. reflexivity. + split. rewrite D; try discriminate. apply Pregmap.gss. + split. auto. + intros. rewrite D; auto. apply Pregmap.gso; auto. + (* andis *) + generalize (Int.eq_spec (low_u n) Int.zero); + case (Int.eq (low_u n) Int.zero); intro. + exists (nextinstr (compare_sint (rs#r1 <- v) v Vzero)). + generalize (compare_sint_spec (rs#r1 <- v) v Vzero). + intros [A [B [C D]]]. + split. apply exec_straight_one. simpl. + generalize (low_high_u n). rewrite H0. rewrite Int.or_zero. + intro. rewrite H1. reflexivity. reflexivity. + split. rewrite D; try discriminate. apply Pregmap.gss. + split. auto. + intros. rewrite D; auto. apply Pregmap.gso; auto. + (* loadimm + and *) + generalize (loadimm_correct GPR2 n (Pand_ r1 r2 GPR2 :: k) rs m). + intros [rs1 [EX1 [RES1 OTHER1]]]. + exists (nextinstr (compare_sint (rs1#r1 <- v) v Vzero)). + generalize (compare_sint_spec (rs1#r1 <- v) v Vzero). + intros [A [B [C D]]]. + split. eapply exec_straight_trans. eexact EX1. + apply exec_straight_one. simpl. rewrite RES1. + rewrite (OTHER1 r2). reflexivity. congruence. congruence. + reflexivity. + split. rewrite D; try discriminate. apply Pregmap.gss. + split. auto. + intros. rewrite D; auto. rewrite Pregmap.gso; auto. +Qed. + +(** Or integer immediate. *) + +Lemma orimm_correct: + forall r1 (r2: ireg) n k (rs : regset) m, + let v := Val.or rs#r2 (Vint n) in + exists rs', + exec_straight (orimm r1 r2 n k) rs m k rs' m + /\ rs'#r1 = v + /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'. +Proof. + intros. unfold orimm. + case (Int.eq (high_u n) Int.zero). + (* ori *) + exists (nextinstr (rs#r1 <- v)). + split. apply exec_straight_one. reflexivity. reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. + (* oris *) + generalize (Int.eq_spec (low_u n) Int.zero); + case (Int.eq (low_u n) Int.zero); intro. + exists (nextinstr (rs#r1 <- v)). + split. apply exec_straight_one. simpl. + generalize (low_high_u n). rewrite H. rewrite Int.or_zero. + intro. rewrite H0. reflexivity. reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. + (* oris + ori *) + pose (rs1 := nextinstr (rs#r1 <- (Val.or rs#r2 (Vint (Int.shl (high_u n) (Int.repr 16)))))). + exists (nextinstr (rs1#r1 <- v)). + split. apply exec_straight_two with rs1 m. + reflexivity. simpl. unfold rs1 at 1. + rewrite nextinstr_inv; auto with ppcgen. + rewrite Pregmap.gss. rewrite Val.or_assoc. simpl. + rewrite low_high_u. reflexivity. reflexivity. reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. + unfold rs1. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. +Qed. + +(** Xor integer immediate. *) + +Lemma xorimm_correct: + forall r1 (r2: ireg) n k (rs : regset) m, + let v := Val.xor rs#r2 (Vint n) in + exists rs', + exec_straight (xorimm r1 r2 n k) rs m k rs' m + /\ rs'#r1 = v + /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'. +Proof. + intros. unfold xorimm. + case (Int.eq (high_u n) Int.zero). + (* xori *) + exists (nextinstr (rs#r1 <- v)). + split. apply exec_straight_one. reflexivity. reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. + (* xoris *) + generalize (Int.eq_spec (low_u n) Int.zero); + case (Int.eq (low_u n) Int.zero); intro. + exists (nextinstr (rs#r1 <- v)). + split. apply exec_straight_one. simpl. + generalize (low_high_u_xor n). rewrite H. rewrite Int.xor_zero. + intro. rewrite H0. reflexivity. reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. + (* xoris + xori *) + pose (rs1 := nextinstr (rs#r1 <- (Val.xor rs#r2 (Vint (Int.shl (high_u n) (Int.repr 16)))))). + exists (nextinstr (rs1#r1 <- v)). + split. apply exec_straight_two with rs1 m. + reflexivity. simpl. unfold rs1 at 1. + rewrite nextinstr_inv; try discriminate. + rewrite Pregmap.gss. rewrite Val.xor_assoc. simpl. + rewrite low_high_u_xor. reflexivity. reflexivity. reflexivity. + split. rewrite nextinstr_inv; auto with ppcgen. + apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. + unfold rs1. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. +Qed. + +(** Indexed memory loads. *) + +Lemma loadind_aux_correct: + forall (base: ireg) ofs ty dst (rs: regset) m v, + Mem.loadv (chunk_of_type ty) m (Val.add rs#base (Vint ofs)) = Some v -> + mreg_type dst = ty -> + base <> GPR0 -> + exec_instr ge fn (loadind_aux base ofs ty dst) rs m = + OK (nextinstr (rs#(preg_of dst) <- v)) m. +Proof. + intros. unfold loadind_aux. unfold preg_of. rewrite H0. destruct ty. + simpl. unfold load1. rewrite gpr_or_zero_not_zero; auto. + unfold const_low. simpl in H. rewrite H. auto. + simpl. unfold load1. rewrite gpr_or_zero_not_zero; auto. + unfold const_low. simpl in H. rewrite H. auto. +Qed. + +Lemma loadind_correct: + forall (base: ireg) ofs ty dst k (rs: regset) m v, + Mem.loadv (chunk_of_type ty) m (Val.add rs#base (Vint ofs)) = Some v -> + mreg_type dst = ty -> + base <> GPR0 -> + exists rs', + exec_straight (loadind base ofs ty dst k) rs m k rs' m + /\ rs'#(preg_of dst) = v + /\ forall r, r <> PC -> r <> GPR2 -> r <> preg_of dst -> rs'#r = rs#r. +Proof. + intros. unfold loadind. + assert (preg_of dst <> PC). + unfold preg_of. case (mreg_type dst); discriminate. + (* short offset *) + case (Int.eq (high_s ofs) Int.zero). + exists (nextinstr (rs#(preg_of dst) <- v)). + split. apply exec_straight_one. apply loadind_aux_correct; auto. + unfold nextinstr. rewrite Pregmap.gss. rewrite Pregmap.gso. auto. auto. + split. rewrite nextinstr_inv; auto. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. + (* long offset *) + pose (rs1 := nextinstr (rs#GPR2 <- (Val.add rs#base (Vint (Int.shl (high_s ofs) (Int.repr 16)))))). + exists (nextinstr (rs1#(preg_of dst) <- v)). + split. apply exec_straight_two with rs1 m. + simpl. rewrite gpr_or_zero_not_zero; auto. + apply loadind_aux_correct. + unfold rs1. rewrite nextinstr_inv; auto with ppcgen. rewrite Pregmap.gss. + rewrite Val.add_assoc. simpl. rewrite low_high_s. assumption. + auto. discriminate. reflexivity. + unfold nextinstr. rewrite Pregmap.gss. rewrite Pregmap.gso. auto. auto. + split. rewrite nextinstr_inv; auto. apply Pregmap.gss. + intros. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. + unfold rs1. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. +Qed. + +(** Indexed memory stores. *) + +Lemma storeind_aux_correct: + forall (base: ireg) ofs ty src (rs: regset) m m', + Mem.storev (chunk_of_type ty) m (Val.add rs#base (Vint ofs)) (rs#(preg_of src)) = Some m' -> + mreg_type src = ty -> + base <> GPR0 -> + exec_instr ge fn (storeind_aux src base ofs ty) rs m = + OK (nextinstr rs) m'. +Proof. + intros. unfold storeind_aux. unfold preg_of in H. rewrite H0 in H. destruct ty. + simpl. unfold store1. rewrite gpr_or_zero_not_zero; auto. + unfold const_low. simpl in H. rewrite H. auto. + simpl. unfold store1. rewrite gpr_or_zero_not_zero; auto. + unfold const_low. simpl in H. rewrite H. auto. +Qed. + +Lemma storeind_correct: + forall (base: ireg) ofs ty src k (rs: regset) m m', + Mem.storev (chunk_of_type ty) m (Val.add rs#base (Vint ofs)) (rs#(preg_of src)) = Some m' -> + mreg_type src = ty -> + base <> GPR0 -> + exists rs', + exec_straight (storeind src base ofs ty k) rs m k rs' m' + /\ forall r, r <> PC -> r <> GPR2 -> rs'#r = rs#r. +Proof. + intros. unfold storeind. + (* short offset *) + case (Int.eq (high_s ofs) Int.zero). + exists (nextinstr rs). + split. apply exec_straight_one. apply storeind_aux_correct; auto. + reflexivity. + intros. rewrite nextinstr_inv; auto. + (* long offset *) + pose (rs1 := nextinstr (rs#GPR2 <- (Val.add rs#base (Vint (Int.shl (high_s ofs) (Int.repr 16)))))). + exists (nextinstr rs1). + split. apply exec_straight_two with rs1 m. + simpl. rewrite gpr_or_zero_not_zero; auto. + apply storeind_aux_correct; auto with ppcgen. + unfold rs1. rewrite nextinstr_inv; auto with ppcgen. rewrite Pregmap.gss. + rewrite nextinstr_inv; auto with ppcgen. + rewrite Pregmap.gso; auto with ppcgen. + rewrite Val.add_assoc. simpl. rewrite low_high_s. assumption. + reflexivity. reflexivity. + intros. rewrite nextinstr_inv; auto. + unfold rs1. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. +Qed. + +(** Float comparisons. *) + +Lemma floatcomp_correct: + forall cmp (r1 r2: freg) k rs m, + exists rs', + exec_straight (floatcomp cmp r1 r2 k) rs m k rs' m + /\ rs'#(reg_of_crbit (fst (crbit_for_fcmp cmp))) = + (if snd (crbit_for_fcmp cmp) + then Val.cmpf cmp rs#r1 rs#r2 + else Val.notbool (Val.cmpf cmp rs#r1 rs#r2)) + /\ forall r', + r' <> PC -> r' <> CR0_0 -> r' <> CR0_1 -> + r' <> CR0_2 -> r' <> CR0_3 -> rs'#r' = rs#r'. +Proof. + intros. + generalize (compare_float_spec rs rs#r1 rs#r2). + intros [A [B [C D]]]. + set (rs1 := nextinstr (compare_float rs rs#r1 rs#r2)) in *. + assert ((cmp = Ceq \/ cmp = Cne \/ cmp = Clt \/ cmp = Cgt) + \/ (cmp = Cle \/ cmp = Cge)). + case cmp; tauto. + unfold floatcomp. elim H; intro; clear H. + exists rs1. + split. generalize H0; intros [EQ|[EQ|[EQ|EQ]]]; subst cmp; + apply exec_straight_one; reflexivity. + split. + generalize H0; intros [EQ|[EQ|[EQ|EQ]]]; subst cmp; simpl; auto. + rewrite Val.negate_cmpf_eq. auto. + auto. + (* two instrs *) + exists (nextinstr (rs1#CR0_3 <- (Val.cmpf cmp rs#r1 rs#r2))). + split. elim H0; intro; subst cmp. + apply exec_straight_two with rs1 m. + reflexivity. simpl. + rewrite C; rewrite A. rewrite Val.or_commut. rewrite <- Val.cmpf_le. + reflexivity. reflexivity. reflexivity. + apply exec_straight_two with rs1 m. + reflexivity. simpl. + rewrite C; rewrite B. rewrite Val.or_commut. rewrite <- Val.cmpf_ge. + reflexivity. reflexivity. reflexivity. + split. elim H0; intro; subst cmp; simpl. + reflexivity. + reflexivity. + intros. rewrite nextinstr_inv; auto. rewrite Pregmap.gso; auto. +Qed. + +Ltac TypeInv := + match goal with + | H: (List.map ?f ?x = nil) |- _ => + destruct x; [clear H | simpl in H; discriminate] + | H: (List.map ?f ?x = ?hd :: ?tl) |- _ => + destruct x; simpl in H; + [ discriminate | + injection H; clear H; let T := fresh "T" in ( + intros H T; TypeInv) ] + | _ => idtac + end. + +(** Translation of conditions. *) + +Lemma transl_cond_correct_aux: + forall cond args k ms sp rs m, + map mreg_type args = type_of_condition cond -> + agree ms sp rs -> + exists rs', + exec_straight (transl_cond cond args k) rs m k rs' m + /\ rs'#(reg_of_crbit (fst (crbit_for_cond cond))) = + (if snd (crbit_for_cond cond) + then eval_condition_total cond (map ms args) + else Val.notbool (eval_condition_total cond (map ms args))) + /\ agree ms sp rs'. +Proof. + intros. destruct cond; simpl in H; TypeInv. + (* Ccomp *) + simpl. + generalize (compare_sint_spec rs ms#m0 ms#m1). + intros [A [B [C D]]]. + exists (nextinstr (compare_sint rs ms#m0 ms#m1)). + split. apply exec_straight_one. simpl. + repeat (rewrite <- (ireg_val ms sp rs); auto). + reflexivity. + split. + case c; simpl; auto; rewrite <- Val.negate_cmp; simpl; auto. + apply agree_exten_2 with rs; auto. + (* Ccompu *) + simpl. + generalize (compare_uint_spec rs ms#m0 ms#m1). + intros [A [B [C D]]]. + exists (nextinstr (compare_uint rs ms#m0 ms#m1)). + split. apply exec_straight_one. simpl. + repeat (rewrite <- (ireg_val ms sp rs); auto). + reflexivity. + split. + case c; simpl; auto; rewrite <- Val.negate_cmpu; simpl; auto. + apply agree_exten_2 with rs; auto. + (* Ccompimm *) + simpl. + case (Int.eq (high_s i) Int.zero). + generalize (compare_sint_spec rs ms#m0 (Vint i)). + intros [A [B [C D]]]. + exists (nextinstr (compare_sint rs ms#m0 (Vint i))). + split. apply exec_straight_one. simpl. + repeat (rewrite <- (ireg_val ms sp rs); auto). + reflexivity. + split. + case c; simpl; auto; rewrite <- Val.negate_cmp; simpl; auto. + apply agree_exten_2 with rs; auto. + generalize (loadimm_correct GPR2 i (Pcmpw (ireg_of m0) GPR2 :: k) rs m). + intros [rs1 [EX1 [RES1 OTH1]]]. + assert (agree ms sp rs1). apply agree_exten_2 with rs; auto. + generalize (compare_sint_spec rs1 ms#m0 (Vint i)). + intros [A [B [C D]]]. + exists (nextinstr (compare_sint rs1 ms#m0 (Vint i))). + split. eapply exec_straight_trans. eexact EX1. + apply exec_straight_one. simpl. + repeat (rewrite <- (ireg_val ms sp rs1); auto). rewrite RES1. + reflexivity. reflexivity. + split. + case c; simpl; auto; rewrite <- Val.negate_cmp; simpl; auto. + apply agree_exten_2 with rs1; auto. + (* Ccompuimm *) + simpl. + case (Int.eq (high_u i) Int.zero). + generalize (compare_uint_spec rs ms#m0 (Vint i)). + intros [A [B [C D]]]. + exists (nextinstr (compare_uint rs ms#m0 (Vint i))). + split. apply exec_straight_one. simpl. + repeat (rewrite <- (ireg_val ms sp rs); auto). + reflexivity. + split. + case c; simpl; auto; rewrite <- Val.negate_cmpu; simpl; auto. + apply agree_exten_2 with rs; auto. + generalize (loadimm_correct GPR2 i (Pcmplw (ireg_of m0) GPR2 :: k) rs m). + intros [rs1 [EX1 [RES1 OTH1]]]. + assert (agree ms sp rs1). apply agree_exten_2 with rs; auto. + generalize (compare_uint_spec rs1 ms#m0 (Vint i)). + intros [A [B [C D]]]. + exists (nextinstr (compare_uint rs1 ms#m0 (Vint i))). + split. eapply exec_straight_trans. eexact EX1. + apply exec_straight_one. simpl. + repeat (rewrite <- (ireg_val ms sp rs1); auto). rewrite RES1. + reflexivity. reflexivity. + split. + case c; simpl; auto; rewrite <- Val.negate_cmpu; simpl; auto. + apply agree_exten_2 with rs1; auto. + (* Ccompf *) + simpl. + generalize (floatcomp_correct c (freg_of m0) (freg_of m1) k rs m). + intros [rs' [EX [RES OTH]]]. + exists rs'. split. auto. + split. rewrite RES. repeat (rewrite <- (freg_val ms sp rs); auto). + apply agree_exten_2 with rs; auto. + (* Cnotcompf *) + simpl. + generalize (floatcomp_correct c (freg_of m0) (freg_of m1) k rs m). + intros [rs' [EX [RES OTH]]]. + exists rs'. split. auto. + split. rewrite RES. repeat (rewrite <- (freg_val ms sp rs); auto). + assert (forall v1 v2, Val.notbool (Val.notbool (Val.cmpf c v1 v2)) = Val.cmpf c v1 v2). + intros v1 v2; unfold Val.cmpf; destruct v1; destruct v2; auto. + apply Val.notbool_idem2. + rewrite H. + generalize RES. case (snd (crbit_for_fcmp c)); simpl; auto. + apply agree_exten_2 with rs; auto. + (* Cmaskzero *) + simpl. + generalize (andimm_correct GPR2 (ireg_of m0) i k rs m (ireg_of_not_GPR2 m0)). + intros [rs' [A [B [C D]]]]. + exists rs'. split. assumption. + split. rewrite C. rewrite <- (ireg_val ms sp rs); auto. + apply agree_exten_2 with rs; auto. + (* Cmasknotzero *) + simpl. + generalize (andimm_correct GPR2 (ireg_of m0) i k rs m (ireg_of_not_GPR2 m0)). + intros [rs' [A [B [C D]]]]. + exists rs'. split. assumption. + split. rewrite C. rewrite <- (ireg_val ms sp rs); auto. + rewrite Val.notbool_idem3. reflexivity. + apply agree_exten_2 with rs; auto. +Qed. + +Lemma transl_cond_correct: + forall cond args k ms sp rs m b, + map mreg_type args = type_of_condition cond -> + agree ms sp rs -> + eval_condition cond (map ms args) = Some b -> + exists rs', + exec_straight (transl_cond cond args k) rs m k rs' m + /\ rs'#(reg_of_crbit (fst (crbit_for_cond cond))) = + (if snd (crbit_for_cond cond) + then Val.of_bool b + else Val.notbool (Val.of_bool b)) + /\ agree ms sp rs'. +Proof. + intros. rewrite <- (eval_condition_weaken _ _ H1). + apply transl_cond_correct_aux; auto. +Qed. + +(** Translation of arithmetic operations. *) + +Ltac TranslOpSimpl := + match goal with + | |- exists rs' : regset, + exec_straight ?c ?rs ?m ?k rs' ?m /\ + agree (Regmap.set ?res ?v ?ms) ?sp rs' => + (exists (nextinstr (rs#(ireg_of res) <- v)); + split; + [ apply exec_straight_one; + [ repeat (rewrite (ireg_val ms sp rs); auto); reflexivity + | reflexivity ] + | auto with ppcgen ]) + || + (exists (nextinstr (rs#(freg_of res) <- v)); + split; + [ apply exec_straight_one; + [ repeat (rewrite (freg_val ms sp rs); auto); reflexivity + | reflexivity ] + | auto with ppcgen ]) + end. + +Lemma transl_op_correct: + forall op args res k ms sp rs m v, + wt_instr (Mop op args res) -> + agree ms sp rs -> + eval_operation ge sp op (map ms args) = Some v -> + exists rs', + exec_straight (transl_op op args res k) rs m k rs' m + /\ agree (Regmap.set res v ms) sp rs'. +Proof. + intros. rewrite <- (eval_operation_weaken _ _ _ _ H1). clear H1; clear v. + inversion H. + (* Omove *) + simpl. exists (nextinstr (rs#(preg_of res) <- (ms r1))). + split. caseEq (mreg_type r1); intro. + apply exec_straight_one. simpl. rewrite (ireg_val ms sp rs); auto. + simpl. unfold preg_of. rewrite <- H2. rewrite H5. reflexivity. + auto with ppcgen. + apply exec_straight_one. simpl. rewrite (freg_val ms sp rs); auto. + simpl. unfold preg_of. rewrite <- H2. rewrite H5. reflexivity. + auto with ppcgen. + auto with ppcgen. + (* Oundef *) + simpl. exists (nextinstr (rs#(preg_of res) <- Vundef)). + split. caseEq (mreg_type res); intro. + apply exec_straight_one. simpl. + unfold preg_of; rewrite H1. reflexivity. + auto with ppcgen. + apply exec_straight_one. simpl. + unfold preg_of; rewrite H1. reflexivity. + auto with ppcgen. + auto with ppcgen. + (* Other instructions *) + clear H1; clear H2; clear H3. + destruct op; simpl in H6; injection H6; clear H6; intros; + TypeInv; simpl; try (TranslOpSimpl). + (* Omove again *) + congruence. + (* Ointconst *) + generalize (loadimm_correct (ireg_of res) i k rs m). + intros [rs' [A [B C]]]. + exists rs'. split. auto. + apply agree_set_mireg_exten with rs; auto. + (* Ofloatconst *) + exists (nextinstr (rs#(freg_of res) <- (Vfloat f) #GPR2 <- Vundef)). + split. apply exec_straight_one. reflexivity. reflexivity. + auto with ppcgen. + (* Oaddrsymbol *) + set (v := find_symbol_offset ge i i0). + pose (rs1 := nextinstr (rs#(ireg_of res) <- (high_half_unsigned v))). + exists (nextinstr (rs1#(ireg_of res) <- v)). + split. apply exec_straight_two with rs1 m. + unfold exec_instr. rewrite gpr_or_zero_zero. + unfold const_high. rewrite Val.add_commut. + rewrite high_half_unsigned_zero. reflexivity. + simpl. unfold rs1 at 1. rewrite nextinstr_inv; auto with ppcgen. + rewrite Pregmap.gss. + fold v. rewrite Val.or_commut. rewrite low_high_half_unsigned. + reflexivity. reflexivity. reflexivity. + unfold rs1. auto with ppcgen. + (* Oaddrstack *) + assert (GPR1 <> GPR2). discriminate. + generalize (addimm_correct (ireg_of res) GPR1 i k rs m H2). + intros [rs' [EX [RES OTH]]]. + exists rs'. split. auto. + apply agree_set_mireg_exten with rs; auto. + rewrite (sp_val ms sp rs). auto. auto. + (* Oundef again *) + congruence. + (* Oaddimm *) + generalize (addimm_correct (ireg_of res) (ireg_of m0) i k rs m + (ireg_of_not_GPR2 m0)). + intros [rs' [A [B C]]]. + exists rs'. split. auto. + apply agree_set_mireg_exten with rs; auto. + rewrite (ireg_val ms sp rs); auto. + (* Osub *) + exists (nextinstr (rs#(ireg_of res) <- (Val.sub (ms m0) (ms m1)) #CARRY <- Vundef)). + split. apply exec_straight_one. repeat (rewrite (ireg_val ms sp rs); auto). + simpl. reflexivity. auto with ppcgen. + (* Osubimm *) + case (Int.eq (high_s i) Int.zero). + exists (nextinstr (rs#(ireg_of res) <- (Val.sub (Vint i) (ms m0)) #CARRY <- Vundef)). + split. apply exec_straight_one. rewrite (ireg_val ms sp rs); auto. + reflexivity. simpl. auto with ppcgen. + generalize (loadimm_correct GPR2 i (Psubfc (ireg_of res) (ireg_of m0) GPR2 :: k) rs m). + intros [rs1 [EX [RES OTH]]]. + assert (agree ms sp rs1). apply agree_exten_2 with rs; auto. + exists (nextinstr (rs1#(ireg_of res) <- (Val.sub (Vint i) (ms m0)) #CARRY <- Vundef)). + split. eapply exec_straight_trans. eexact EX. + apply exec_straight_one. repeat (rewrite (ireg_val ms sp rs); auto). + simpl. rewrite RES. rewrite OTH. reflexivity. + generalize (ireg_of_not_GPR2 m0); congruence. + discriminate. + reflexivity. simpl; auto with ppcgen. + (* Omulimm *) + case (Int.eq (high_s i) Int.zero). + exists (nextinstr (rs#(ireg_of res) <- (Val.mul (ms m0) (Vint i)))). + split. apply exec_straight_one. rewrite (ireg_val ms sp rs); auto. + reflexivity. auto with ppcgen. + generalize (loadimm_correct GPR2 i (Pmullw (ireg_of res) (ireg_of m0) GPR2 :: k) rs m). + intros [rs1 [EX [RES OTH]]]. + assert (agree ms sp rs1). apply agree_exten_2 with rs; auto. + exists (nextinstr (rs1#(ireg_of res) <- (Val.mul (ms m0) (Vint i)))). + split. eapply exec_straight_trans. eexact EX. + apply exec_straight_one. repeat (rewrite (ireg_val ms sp rs); auto). + simpl. rewrite RES. rewrite OTH. reflexivity. + generalize (ireg_of_not_GPR2 m0); congruence. + discriminate. + reflexivity. simpl; auto with ppcgen. + (* Oand *) + pose (v := Val.and (ms m0) (ms m1)). + pose (rs1 := rs#(ireg_of res) <- v). + generalize (compare_sint_spec rs1 v Vzero). + intros [A [B [C D]]]. + exists (nextinstr (compare_sint rs1 v Vzero)). + split. apply exec_straight_one. + unfold rs1, v. repeat (rewrite (ireg_val ms sp rs); auto). + reflexivity. + apply agree_exten_2 with rs1. unfold rs1, v; auto with ppcgen. + auto. + (* Oandimm *) + generalize (andimm_correct (ireg_of res) (ireg_of m0) i k rs m + (ireg_of_not_GPR2 m0)). + intros [rs' [A [B [C D]]]]. + exists rs'. split. auto. apply agree_set_mireg_exten with rs; auto. + rewrite (ireg_val ms sp rs); auto. + (* Oorimm *) + generalize (orimm_correct (ireg_of res) (ireg_of m0) i k rs m). + intros [rs' [A [B C]]]. + exists rs'. split. auto. apply agree_set_mireg_exten with rs; auto. + rewrite (ireg_val ms sp rs); auto. + (* Oxorimm *) + generalize (xorimm_correct (ireg_of res) (ireg_of m0) i k rs m). + intros [rs' [A [B C]]]. + exists rs'. split. auto. apply agree_set_mireg_exten with rs; auto. + rewrite (ireg_val ms sp rs); auto. + (* Oshr *) + exists (nextinstr (rs#(ireg_of res) <- (Val.shr (ms m0) (ms m1)) #CARRY <- (Val.shr_carry (ms m0) (ms m1)))). + split. apply exec_straight_one. repeat (rewrite (ireg_val ms sp rs); auto). + reflexivity. auto with ppcgen. + (* Oshrimm *) + exists (nextinstr (rs#(ireg_of res) <- (Val.shr (ms m0) (Vint i)) #CARRY <- (Val.shr_carry (ms m0) (Vint i)))). + split. apply exec_straight_one. repeat (rewrite (ireg_val ms sp rs); auto). + reflexivity. auto with ppcgen. + (* Oxhrximm *) + pose (rs1 := nextinstr (rs#(ireg_of res) <- (Val.shr (ms m0) (Vint i)) #CARRY <- (Val.shr_carry (ms m0) (Vint i)))). + exists (nextinstr (rs1#(ireg_of res) <- (Val.shrx (ms m0) (Vint i)))). + split. apply exec_straight_two with rs1 m. + unfold rs1; rewrite (ireg_val ms sp rs); auto. + simpl; unfold rs1; repeat rewrite <- (ireg_val ms sp rs); auto. + repeat (rewrite nextinstr_inv; try discriminate). + repeat rewrite Pregmap.gss. decEq. decEq. + apply (f_equal3 (@Pregmap.set val)); auto. + rewrite Pregmap.gso. rewrite Pregmap.gss. apply Val.shrx_carry. + discriminate. reflexivity. reflexivity. + apply agree_exten_2 with (rs#(ireg_of res) <- (Val.shrx (ms m0) (Vint i))). + auto with ppcgen. + intros. rewrite nextinstr_inv; auto. + case (preg_eq (ireg_of res) r); intro. + subst r. repeat rewrite Pregmap.gss. auto. + repeat rewrite Pregmap.gso; auto. + unfold rs1. rewrite nextinstr_inv; auto. + repeat rewrite Pregmap.gso; auto. + (* Ointoffloat *) + exists (nextinstr (rs#(ireg_of res) <- (Val.intoffloat (ms m0)) #FPR13 <- Vundef)). + split. apply exec_straight_one. + repeat (rewrite (freg_val ms sp rs); auto). + reflexivity. auto with ppcgen. + (* Ofloatofint *) + exists (nextinstr (rs#(freg_of res) <- (Val.floatofint (ms m0)) #GPR2 <- Vundef #FPR13 <- Vundef)). + split. apply exec_straight_one. + repeat (rewrite (ireg_val ms sp rs); auto). + reflexivity. auto 10 with ppcgen. + (* Ofloatofintu *) + exists (nextinstr (rs#(freg_of res) <- (Val.floatofintu (ms m0)) #GPR2 <- Vundef #FPR13 <- Vundef)). + split. apply exec_straight_one. + repeat (rewrite (ireg_val ms sp rs); auto). + reflexivity. auto 10 with ppcgen. + (* Ocmp *) + set (bit := fst (crbit_for_cond c)). + set (isset := snd (crbit_for_cond c)). + set (k1 := + Pmfcrbit (ireg_of res) bit :: + (if isset + then k + else Pxori (ireg_of res) (ireg_of res) (Cint Int.one) :: k)). + generalize (transl_cond_correct_aux c args k1 ms sp rs m H2 H0). + fold bit; fold isset. + intros [rs1 [EX1 [RES1 AG1]]]. + set (rs2 := nextinstr (rs1#(ireg_of res) <- (rs1#(reg_of_crbit bit)))). + destruct isset. + exists rs2. + split. apply exec_straight_trans with k1 rs1 m. assumption. + unfold k1. apply exec_straight_one. + reflexivity. reflexivity. + unfold rs2. rewrite RES1. auto with ppcgen. + exists (nextinstr (rs2#(ireg_of res) <- (eval_condition_total c ms##args))). + split. apply exec_straight_trans with k1 rs1 m. assumption. + unfold k1. apply exec_straight_two with rs2 m. + reflexivity. simpl. + replace (Val.xor (rs2 (ireg_of res)) (Vint Int.one)) + with (eval_condition_total c ms##args). + reflexivity. + unfold rs2. rewrite nextinstr_inv; auto with ppcgen. rewrite Pregmap.gss. + rewrite RES1. apply Val.notbool_xor. apply eval_condition_total_is_bool. + reflexivity. reflexivity. + unfold rs2. auto with ppcgen. +Qed. + +Lemma transl_load_store_correct: + forall (mk1: constant -> ireg -> instruction) (mk2: ireg -> ireg -> instruction) + addr args k ms sp rs m ms' m', + (forall cst (r1: ireg) (rs1: regset) k, + eval_addressing_total ge sp addr (map ms args) = Val.add rs1#r1 (const_low ge cst) -> + agree ms sp rs1 -> + r1 <> GPR0 -> + exists rs', + exec_straight (mk1 cst r1 :: k) rs1 m k rs' m' /\ + agree ms' sp rs') -> + (forall (r1 r2: ireg) (rs1: regset) k, + eval_addressing_total ge sp addr (map ms args) = Val.add rs1#r1 rs1#r2 -> + agree ms sp rs1 -> + exists rs', + exec_straight (mk2 r1 r2 :: k) rs1 m k rs' m' /\ + agree ms' sp rs') -> + agree ms sp rs -> + map mreg_type args = type_of_addressing addr -> + exists rs', + exec_straight (transl_load_store mk1 mk2 addr args k) rs m + k rs' m' + /\ agree ms' sp rs'. +Proof. + intros. destruct addr; simpl in H2; TypeInv; simpl. + (* Aindexed *) + case (ireg_eq (ireg_of t) GPR0); intro. + (* Aindexed from GPR0 *) + set (rs1 := nextinstr (rs#GPR2 <- (ms t))). + set (rs2 := nextinstr (rs1#GPR2 <- (Val.add (ms t) (Vint (Int.shl (high_s i) (Int.repr 16)))))). + assert (ADDR: eval_addressing_total ge sp (Aindexed i) ms##(t :: nil) = + Val.add rs2#GPR2 (const_low ge (Cint (low_s i)))). + simpl. unfold rs2. rewrite nextinstr_inv. rewrite Pregmap.gss. + rewrite Val.add_assoc. simpl. rewrite low_high_s. auto. + discriminate. + assert (AG: agree ms sp rs2). unfold rs2, rs1; auto 6 with ppcgen. + assert (NOT0: GPR2 <> GPR0). discriminate. + generalize (H _ _ _ k ADDR AG NOT0). + intros [rs' [EX' AG']]. + exists rs'. split. + apply exec_straight_trans with (mk1 (Cint (low_s i)) GPR2 :: k) rs2 m. + apply exec_straight_two with rs1 m. + unfold rs1. rewrite (ireg_val ms sp rs); auto. + unfold rs2. replace (ms t) with (rs1#GPR2). auto. + unfold rs1. rewrite nextinstr_inv. apply Pregmap.gss. discriminate. + reflexivity. reflexivity. + assumption. assumption. + (* Aindexed short *) + case (Int.eq (high_s i) Int.zero). + assert (ADDR: eval_addressing_total ge sp (Aindexed i) ms##(t :: nil) = + Val.add rs#(ireg_of t) (const_low ge (Cint i))). + simpl. rewrite (ireg_val ms sp rs); auto. + generalize (H _ _ _ k ADDR H1 n). intros [rs' [EX' AG']]. + exists rs'. split. auto. auto. + (* Aindexed long *) + set (rs1 := nextinstr (rs#GPR2 <- (Val.add (ms t) (Vint (Int.shl (high_s i) (Int.repr 16)))))). + assert (ADDR: eval_addressing_total ge sp (Aindexed i) ms##(t :: nil) = + Val.add rs1#GPR2 (const_low ge (Cint (low_s i)))). + simpl. unfold rs1. rewrite nextinstr_inv. rewrite Pregmap.gss. + rewrite Val.add_assoc. simpl. rewrite low_high_s. auto. + discriminate. + assert (AG: agree ms sp rs1). unfold rs1; auto with ppcgen. + assert (NOT0: GPR2 <> GPR0). discriminate. + generalize (H _ _ _ k ADDR AG NOT0). intros [rs' [EX' AG']]. + exists rs'. split. apply exec_straight_step with rs1 m. + simpl. rewrite gpr_or_zero_not_zero; auto. + rewrite <- (ireg_val ms sp rs); auto. reflexivity. + assumption. assumption. + (* Aindexed2 *) + apply H0. + simpl. repeat (rewrite (ireg_val ms sp rs); auto). auto. + (* Aglobal *) + set (rs1 := nextinstr (rs#GPR2 <- (const_high ge (Csymbol_high_signed i i0)))). + assert (ADDR: eval_addressing_total ge sp (Aglobal i i0) ms##nil = + Val.add rs1#GPR2 (const_low ge (Csymbol_low_signed i i0))). + simpl. unfold rs1. rewrite nextinstr_inv. rewrite Pregmap.gss. + unfold const_high, const_low. + set (v := symbol_offset ge i i0). + symmetry. rewrite Val.add_commut. apply low_high_half_signed. + discriminate. + assert (AG: agree ms sp rs1). unfold rs1; auto with ppcgen. + assert (NOT0: GPR2 <> GPR0). discriminate. + generalize (H _ _ _ k ADDR AG NOT0). intros [rs' [EX' AG']]. + exists rs'. split. apply exec_straight_step with rs1 m. + unfold exec_instr. rewrite gpr_or_zero_zero. + rewrite Val.add_commut. unfold const_high. + rewrite high_half_signed_zero. + reflexivity. reflexivity. + assumption. assumption. + (* Abased *) + assert (COMMON: + forall (rs1: regset) r, + r <> GPR0 -> + ms t = rs1#r -> + agree ms sp rs1 -> + exists rs', + exec_straight + (Paddis GPR2 r (Csymbol_high_signed i i0) + :: mk1 (Csymbol_low_signed i i0) GPR2 :: k) rs1 m k rs' m' + /\ agree ms' sp rs'). + intros. + set (rs2 := nextinstr (rs1#GPR2 <- (Val.add (ms t) (const_high ge (Csymbol_high_signed i i0))))). + assert (ADDR: eval_addressing_total ge sp (Abased i i0) ms##(t::nil) = + Val.add rs2#GPR2 (const_low ge (Csymbol_low_signed i i0))). + simpl. unfold rs2. rewrite nextinstr_inv. rewrite Pregmap.gss. + unfold const_high. + set (v := symbol_offset ge i i0). + rewrite Val.add_assoc. + rewrite (Val.add_commut (high_half_signed v)). + rewrite low_high_half_signed. apply Val.add_commut. + discriminate. + assert (AG: agree ms sp rs2). unfold rs2; auto with ppcgen. + assert (NOT0: GPR2 <> GPR0). discriminate. + generalize (H _ _ _ k ADDR AG NOT0). intros [rs' [EX' AG']]. + exists rs'. split. apply exec_straight_step with rs2 m. + unfold exec_instr. rewrite gpr_or_zero_not_zero; auto. + rewrite <- H3. reflexivity. reflexivity. + assumption. assumption. + case (ireg_eq (ireg_of t) GPR0); intro. + set (rs1 := nextinstr (rs#GPR2 <- (ms t))). + assert (R1: GPR2 <> GPR0). discriminate. + assert (R2: ms t = rs1 GPR2). + unfold rs1. rewrite nextinstr_inv. rewrite Pregmap.gss; auto. + discriminate. + assert (R3: agree ms sp rs1). unfold rs1; auto with ppcgen. + generalize (COMMON rs1 GPR2 R1 R2 R3). intros [rs' [EX' AG']]. + exists rs'. split. + apply exec_straight_step with rs1 m. + unfold rs1. rewrite (ireg_val ms sp rs); auto. reflexivity. + assumption. assumption. + apply COMMON; auto. eapply ireg_val; eauto. + (* Ainstack *) + case (Int.eq (high_s i) Int.zero). + apply H. simpl. rewrite (sp_val ms sp rs); auto. auto. + discriminate. + set (rs1 := nextinstr (rs#GPR2 <- (Val.add sp (Vint (Int.shl (high_s i) (Int.repr 16)))))). + assert (ADDR: eval_addressing_total ge sp (Ainstack i) ms##nil = + Val.add rs1#GPR2 (const_low ge (Cint (low_s i)))). + simpl. unfold rs1. rewrite nextinstr_inv. rewrite Pregmap.gss. + rewrite Val.add_assoc. decEq. simpl. rewrite low_high_s. auto. + discriminate. + assert (AG: agree ms sp rs1). unfold rs1; auto with ppcgen. + assert (NOT0: GPR2 <> GPR0). discriminate. + generalize (H _ _ _ k ADDR AG NOT0). intros [rs' [EX' AG']]. + exists rs'. split. apply exec_straight_step with rs1 m. + simpl. rewrite gpr_or_zero_not_zero. + unfold rs1. rewrite (sp_val ms sp rs). reflexivity. + auto. discriminate. reflexivity. assumption. assumption. +Qed. + +(** Translation of memory loads. *) + +Lemma transl_load_correct: + forall (mk1: constant -> ireg -> instruction) (mk2: ireg -> ireg -> instruction) + chunk addr args k ms sp rs m dst a v, + (forall cst (r1: ireg) (rs1: regset), + exec_instr ge fn (mk1 cst r1) rs1 m = + load1 ge chunk (preg_of dst) cst r1 rs1 m) -> + (forall (r1 r2: ireg) (rs1: regset), + exec_instr ge fn (mk2 r1 r2) rs1 m = + load2 chunk (preg_of dst) r1 r2 rs1 m) -> + agree ms sp rs -> + map mreg_type args = type_of_addressing addr -> + eval_addressing ge sp addr (map ms args) = Some a -> + Mem.loadv chunk m a = Some v -> + exists rs', + exec_straight (transl_load_store mk1 mk2 addr args k) rs m + k rs' m + /\ agree (Regmap.set dst v ms) sp rs'. +Proof. + intros. apply transl_load_store_correct with ms. + intros. exists (nextinstr (rs1#(preg_of dst) <- v)). + split. apply exec_straight_one. rewrite H. + unfold load1. rewrite gpr_or_zero_not_zero; auto. + rewrite <- (eval_addressing_weaken _ _ _ _ H3) in H4. + rewrite H5 in H4. rewrite H4. auto. + auto with ppcgen. auto with ppcgen. + intros. exists (nextinstr (rs1#(preg_of dst) <- v)). + split. apply exec_straight_one. rewrite H0. + unfold load2. + rewrite <- (eval_addressing_weaken _ _ _ _ H3) in H4. + rewrite H5 in H4. rewrite H4. auto. + auto with ppcgen. auto with ppcgen. + auto. auto. +Qed. + +(** Translation of memory stores. *) + +Lemma transl_store_correct: + forall (mk1: constant -> ireg -> instruction) (mk2: ireg -> ireg -> instruction) + chunk addr args k ms sp rs m src a m', + (forall cst (r1: ireg) (rs1: regset), + exec_instr ge fn (mk1 cst r1) rs1 m = + store1 ge chunk (preg_of src) cst r1 rs1 m) -> + (forall (r1 r2: ireg) (rs1: regset), + exec_instr ge fn (mk2 r1 r2) rs1 m = + store2 chunk (preg_of src) r1 r2 rs1 m) -> + agree ms sp rs -> + map mreg_type args = type_of_addressing addr -> + eval_addressing ge sp addr (map ms args) = Some a -> + Mem.storev chunk m a (ms src) = Some m' -> + exists rs', + exec_straight (transl_load_store mk1 mk2 addr args k) rs m + k rs' m' + /\ agree ms sp rs'. +Proof. + intros. apply transl_load_store_correct with ms. + intros. exists (nextinstr rs1). + split. apply exec_straight_one. rewrite H. + unfold store1. rewrite gpr_or_zero_not_zero; auto. + rewrite <- (eval_addressing_weaken _ _ _ _ H3) in H4. + rewrite H5 in H4. elim H6; intros. rewrite H9 in H4. + rewrite H4. auto. + auto with ppcgen. auto with ppcgen. + intros. exists (nextinstr rs1). + split. apply exec_straight_one. rewrite H0. + unfold store2. + rewrite <- (eval_addressing_weaken _ _ _ _ H3) in H4. + rewrite H5 in H4. elim H6; intros. rewrite H8 in H4. + rewrite H4. auto. + auto with ppcgen. auto with ppcgen. + auto. auto. +Qed. + +End STRAIGHTLINE. + diff --git a/backend/Parallelmove.v b/backend/Parallelmove.v new file mode 100644 index 00000000..f95416eb --- /dev/null +++ b/backend/Parallelmove.v @@ -0,0 +1,2529 @@ +(** Translation of parallel moves into sequences of individual moves *) + +(** The ``parallel move'' problem, also known as ``parallel assignment'', + is the following. We are given a list of (source, destination) pairs + of locations. The goal is to find a sequence of elementary + moves ([loc <- loc] assignments) such that, at the end of the sequence, + location [dst] contains the value of location [src] at the beginning of + the sequence, for each ([src], [dst]) pairs in the given problem. + Moreover, other locations should keep their values, except one register + of each type, which can be used as temporaries in the generated sequences. + + The parallel move problem is trivial if the sources and destinations do + not overlap. For instance, +<< + (R1, R2) <- (R3, R4) becomes R1 <- R3; R2 <- R4 +>> + However, arbitrary overlap is allowed between sources and destinations. + This requires some care in ordering the individual moves, as in +<< + (R1, R2) <- (R3, R1) becomes R2 <- R1; R1 <- R3 +>> + Worse, cycles (permutations) can require the use of temporaries, as in +<< + (R1, R2, R3) <- (R2, R3, R1) becomes T <- R1; R1 <- R2; R2 <- R3; R3 <- T; +>> + An amazing fact is that for any parallel move problem, at most one temporary + (or in our case one integer temporary and one float temporary) is needed. + + The development in this section was contributed by Laurence Rideau and + Bernard Serpette. It is described in their paper + ``Coq à la conquête des moulins'', JFLA 2005, + ## + http://www-sop.inria.fr/lemme/Laurence.Rideau/RideauSerpetteJFLA05.ps + ## +*) + +Require Omega. +Require Import Wf_nat. +Require Import Conventions. +Require Import Coqlib. +Require Import Bool_nat. +Require Import TheoryList. +Require Import Bool. +Require Import Arith. +Require Import Peano_dec. +Require Import EqNat. +Require Import Values. +Require Import LTL. +Require Import EqNat. +Require Import Locations. +Require Import AST. + +Section pmov. + +Ltac caseEq name := generalize (refl_equal name); pattern name at -1; case name. +Hint Resolve beq_nat_eq . + +Lemma neq_is_neq: forall (x y : nat), x <> y -> beq_nat x y = false. +Proof. +unfold not; intros. +caseEq (beq_nat x y); auto. +intro. +elim H; auto. +Qed. +Hint Resolve neq_is_neq . + +Lemma app_nil: forall (A : Set) (l : list A), l ++ nil = l. +Proof. +intros A l; induction l as [|a l Hrecl]; auto; simpl; rewrite Hrecl; auto. +Qed. + +Lemma app_cons: + forall (A : Set) (l1 l2 : list A) (a : A), (a :: l1) ++ l2 = a :: (l1 ++ l2). +Proof. +auto. +Qed. + +Lemma app_app: + forall (A : Set) (l1 l2 l3 : list A), l1 ++ (l2 ++ l3) = (l1 ++ l2) ++ l3. +Proof. +intros A l1; induction l1 as [|a l1 Hrecl1]; simpl; auto; intros; + rewrite Hrecl1; auto. +Qed. + +Lemma app_rewrite: + forall (A : Set) (l : list A) (x : A), + (exists y : A , exists r : list A , l ++ (x :: nil) = y :: r ). +Proof. +intros A l x; induction l as [|a l Hrecl]. +exists x; exists (nil (A:=A)); auto. +inversion Hrecl; inversion H. +exists a; exists (l ++ (x :: nil)); auto. +Qed. + +Lemma app_rewrite2: + forall (A : Set) (l l2 : list A) (x : A), + (exists y : A , exists r : list A , l ++ (x :: l2) = y :: r ). +Proof. +intros A l l2 x; induction l as [|a l Hrecl]. +exists x; exists l2; auto. +inversion Hrecl; inversion H. +exists a; exists (l ++ (x :: l2)); auto. +Qed. + +Lemma app_rewriter: + forall (A : Set) (l : list A) (x : A), + (exists y : A , exists r : list A , x :: l = r ++ (y :: nil) ). +Proof. +intros A l x; induction l as [|a l Hrecl]. +exists x; exists (nil (A:=A)); auto. +inversion Hrecl; inversion H. +generalize H0; case x1; simpl; intros; inversion H1. +exists a; exists (x0 :: nil); simpl; auto. +exists x0; exists (a0 :: (a :: l0)); simpl; auto. +Qed. +Hint Resolve app_rewriter . + +Definition Reg := loc. + +Definition T := + fun (r : loc) => + match Loc.type r with Tint => R IT2 | Tfloat => R FT2 end. + +Definition notemporary := fun (r : loc) => forall x, Loc.diff r (T x). + +Definition Move := (Reg * Reg)%type. + +Definition Moves := list Move. + +Definition State := ((Moves * Moves) * Moves)%type. + +Definition StateToMove (r : State) : Moves := + match r with ((t, b), l) => t end. + +Definition StateBeing (r : State) : Moves := + match r with ((t, b), l) => b end. + +Definition StateDone (r : State) : Moves := + match r with ((t, b), l) => l end. + +Fixpoint noRead (p : Moves) (r : Reg) {struct p} : Prop := + match p with + nil => True + | (s, d) :: l => Loc.diff s r /\ noRead l r + end. + +Lemma noRead_app: + forall (l1 l2 : Moves) (r : Reg), + noRead l1 r -> noRead l2 r -> noRead (l1 ++ l2) r. +Proof. +intros; induction l1 as [|a l1 Hrecl1]; simpl; auto. +destruct a. +elim H; intros; split; auto. +Qed. + +Inductive step : State -> State -> Prop := + step_nop: + forall (r : Reg) (t1 t2 l : Moves), + step (t1 ++ ((r, r) :: t2), nil, l) (t1 ++ t2, nil, l) + | step_start: + forall (t1 t2 l : Moves) (m : Move), + step (t1 ++ (m :: t2), nil, l) (t1 ++ t2, m :: nil, l) + | step_push: + forall (t1 t2 b l : Moves) (s d r : Reg), + step + (t1 ++ ((d, r) :: t2), (s, d) :: b, l) + (t1 ++ t2, (d, r) :: ((s, d) :: b), l) + | step_loop: + forall (t b l : Moves) (s d r0 r0ounon : Reg), + step + (t, (s, r0ounon) :: (b ++ ((r0, d) :: nil)), l) + (t, (s, r0ounon) :: (b ++ ((T r0, d) :: nil)), (r0, T r0) :: l) + | step_pop: + forall (t b l : Moves) (s0 d0 sn dn : Reg), + noRead t dn -> + Loc.diff dn s0 -> + step + (t, (sn, dn) :: (b ++ ((s0, d0) :: nil)), l) + (t, b ++ ((s0, d0) :: nil), (sn, dn) :: l) + | step_last: + forall (t l : Moves) (s d : Reg), + noRead t d -> step (t, (s, d) :: nil, l) (t, nil, (s, d) :: l) . +Hint Resolve step_nop step_start step_push step_loop step_pop step_last . + +Fixpoint path (l : Moves) : Prop := + match l with + nil => True + | (s, d) :: l => + match l with + nil => True + | (ss, dd) :: l2 => s = dd /\ path l + end + end. + +Lemma path_pop: forall (m : Move) (l : Moves), path (m :: l) -> path l. +Proof. +simpl; intros m l; destruct m as [ms md]; case l; auto. +intros m0; destruct m0; intros; inversion H; auto. +Qed. + +Fixpoint noWrite (p : Moves) (r : Reg) {struct p} : Prop := + match p with + nil => True + | (s, d) :: l => Loc.diff d r /\ noWrite l r + end. + +Lemma noWrite_pop: + forall (p1 p2 : Moves) (m : Move) (r : Reg), + noWrite (p1 ++ (m :: p2)) r -> noWrite (p1 ++ p2) r. +Proof. +intros; induction p1 as [|a p1 Hrecp1]. +generalize H; simpl; case m; intros; inversion H0; auto. +generalize H; rewrite app_cons; rewrite app_cons. +simpl; case a; intros. +inversion H0; split; auto. +Qed. + +Lemma noWrite_in: + forall (p1 p2 : Moves) (r0 r1 r2 : Reg), + noWrite (p1 ++ ((r1, r2) :: p2)) r0 -> Loc.diff r0 r2. +Proof. +intros; induction p1 as [|a p1 Hrecp1]; simpl; auto. +generalize H; simpl; intros; inversion H0; auto. +apply Loc.diff_sym; auto. +generalize H; rewrite app_cons; simpl; case a; intros. +apply Hrecp1; inversion H0; auto. +Qed. + +Lemma noWrite_swap: + forall (p : Moves) (m1 m2 : Move) (r : Reg), + noWrite (m1 :: (m2 :: p)) r -> noWrite (m2 :: (m1 :: p)) r. +Proof. +intros p m1 m2 r; simpl; case m1; case m2. +intros; inversion H; inversion H1; split; auto. +Qed. + +Lemma noWrite_movFront: + forall (p1 p2 : Moves) (m : Move) (r0 : Reg), + noWrite (p1 ++ (m :: p2)) r0 -> noWrite (m :: (p1 ++ p2)) r0. +Proof. +intros p1 p2 m r0; induction p1 as [|a p1 Hrecp1]; auto. +case a; intros r1 r2; rewrite app_cons; rewrite app_cons. +intros; apply noWrite_swap; rewrite <- app_cons. +simpl in H |-; inversion H; unfold noWrite; fold noWrite; auto. +Qed. + +Lemma noWrite_insert: + forall (p1 p2 : Moves) (m : Move) (r0 : Reg), + noWrite (m :: (p1 ++ p2)) r0 -> noWrite (p1 ++ (m :: p2)) r0. +Proof. +intros p1 p2 m r0; induction p1 as [|a p1 Hrecp1]. +simpl; auto. +destruct a; simpl. +destruct m. +intros [H1 [H2 H3]]; split; auto. +apply Hrecp1. +simpl; auto. +Qed. + +Lemma noWrite_tmpLast: + forall (t : Moves) (r s d : Reg), + noWrite (t ++ ((s, d) :: nil)) r -> + forall (x : Reg), noWrite (t ++ ((x, d) :: nil)) r. +Proof. +intros; induction t as [|a t Hrect]. +simpl; auto. +generalize H; simpl; case a; intros; inversion H0; split; auto. +Qed. + +Fixpoint simpleDest (p : Moves) : Prop := + match p with + nil => True + | (s, d) :: l => noWrite l d /\ simpleDest l + end. + +Lemma simpleDest_Pop: + forall (m : Move) (l1 l2 : Moves), + simpleDest (l1 ++ (m :: l2)) -> simpleDest (l1 ++ l2). +Proof. +intros; induction l1 as [|a l1 Hrecl1]. +generalize H; simpl; case m; intros; inversion H0; auto. +generalize H; rewrite app_cons; rewrite app_cons. +simpl; case a; intros; inversion H0; split; auto. +apply (noWrite_pop l1 l2 m); auto. +Qed. + +Lemma simpleDest_pop: + forall (m : Move) (l : Moves), simpleDest (m :: l) -> simpleDest l. +Proof. +intros m l; simpl; case m; intros _ r [X Y]; auto. +Qed. + +Lemma simpleDest_right: + forall (l1 l2 : Moves), simpleDest (l1 ++ l2) -> simpleDest l2. +Proof. +intros l1; induction l1 as [|a l1 Hrecl1]; auto. +intros l2; rewrite app_cons; intros; apply Hrecl1. +apply (simpleDest_pop a); auto. +Qed. + +Lemma simpleDest_swap: + forall (m1 m2 : Move) (l : Moves), + simpleDest (m1 :: (m2 :: l)) -> simpleDest (m2 :: (m1 :: l)). +Proof. +intros m1 m2 l; simpl; case m1; case m2. +intros _ r0 _ r2 [[X Y] [Z U]]; auto. +(repeat split); auto. +apply Loc.diff_sym; auto. +Qed. + +Lemma simpleDest_pop2: + forall (m1 m2 : Move) (l : Moves), + simpleDest (m1 :: (m2 :: l)) -> simpleDest (m1 :: l). +Proof. +intros; apply (simpleDest_pop m2); apply simpleDest_swap; auto. +Qed. + +Lemma simpleDest_movFront: + forall (p1 p2 : Moves) (m : Move), + simpleDest (p1 ++ (m :: p2)) -> simpleDest (m :: (p1 ++ p2)). +Proof. +intros p1 p2 m; induction p1 as [|a p1 Hrecp1]. +simpl; auto. +rewrite app_cons; rewrite app_cons. +case a; intros; simpl in H |-; inversion H. +apply simpleDest_swap; simpl; auto. +destruct m. +cut (noWrite ((r1, r2) :: (p1 ++ p2)) r0). +cut (simpleDest ((r1, r2) :: (p1 ++ p2))). +intro; (repeat split); elim H3; elim H2; intros; auto. +apply Hrecp1; auto. +apply noWrite_movFront; auto. +Qed. + +Lemma simpleDest_insert: + forall (p1 p2 : Moves) (m : Move), + simpleDest (m :: (p1 ++ p2)) -> simpleDest (p1 ++ (m :: p2)). +Proof. +intros p1 p2 m; induction p1 as [|a p1 Hrecp1]. +simpl; auto. +rewrite app_cons; intros. +simpl. +destruct a as [a1 a2]. +split. +destruct m; simpl in H |-. +apply noWrite_insert. +simpl; split; elim H; intros [H1 H2] [H3 H4]; auto. +apply Loc.diff_sym; auto. +apply Hrecp1. +apply simpleDest_pop2 with (a1, a2); auto. +Qed. + +Lemma simpleDest_movBack: + forall (p1 p2 : Moves) (m : Move), + simpleDest (p1 ++ (m :: p2)) -> simpleDest ((p1 ++ p2) ++ (m :: nil)). +Proof. +intros. +apply (simpleDest_insert (p1 ++ p2) nil m). +rewrite app_nil; apply simpleDest_movFront; auto. +Qed. + +Lemma simpleDest_swap_app: + forall (t1 t2 t3 : Moves) (m : Move), + simpleDest (t1 ++ (m :: (t2 ++ t3))) -> simpleDest ((t1 ++ t2) ++ (m :: t3)). +Proof. +intros. +apply (simpleDest_insert (t1 ++ t2) t3 m). +rewrite <- app_app. +apply simpleDest_movFront; auto. +Qed. + +Lemma simpleDest_tmpLast: + forall (t : Moves) (s d : Reg), + simpleDest (t ++ ((s, d) :: nil)) -> + forall (r : Reg), simpleDest (t ++ ((r, d) :: nil)). +Proof. +intros t s d; induction t as [|a t Hrect]. +simpl; auto. +simpl; case a; intros; inversion H; split; auto. +apply (noWrite_tmpLast t r0 s); auto. +Qed. + +Fixpoint noTmp (b : Moves) : Prop := + match b with + nil => True + | (s, d) :: l => + (forall r, Loc.diff s (T r)) /\ + ((forall r, Loc.diff d (T r)) /\ noTmp l) + end. + +Fixpoint noTmpLast (b : Moves) : Prop := + match b with + nil => True + | (s, d) :: nil => forall r, Loc.diff d (T r) + | (s, d) :: l => + (forall r, Loc.diff s (T r)) /\ + ((forall r, Loc.diff d (T r)) /\ noTmpLast l) + end. + +Lemma noTmp_app: + forall (l1 l2 : Moves) (m : Move), + noTmp l1 -> noTmpLast (m :: l2) -> noTmpLast (l1 ++ (m :: l2)). +Proof. +intros. +induction l1 as [|a l1 Hrecl1]. +simpl; auto. +simpl. +caseEq (l1 ++ (m :: l2)); intro. +destruct a. +elim H; intros; auto. +inversion H; auto. +elim H3; auto. +intros; destruct a as [a1 a2]. +elim H; intros H2 [H3 H4]; auto. +(repeat split); auto. +rewrite H1 in Hrecl1; apply Hrecl1; auto. +Qed. + +Lemma noTmpLast_popBack: + forall (t : Moves) (m : Move), noTmpLast (t ++ (m :: nil)) -> noTmp t. +Proof. +intros. +induction t as [|a t Hrect]. +simpl; auto. +destruct a as [a1 a2]. +rewrite app_cons in H. +simpl. +simpl in H |-. +generalize H; caseEq (t ++ (m :: nil)); intros. +destruct t; inversion H0. +elim H1. +intros H2 [H3 H4]; (repeat split); auto. +rewrite <- H0 in H4. +apply Hrect; auto. +Qed. + +Fixpoint getsrc (p : Moves) : list Reg := + match p with + nil => nil + | (s, d) :: l => s :: getsrc l + end. + +Fixpoint getdst (p : Moves) : list Reg := + match p with + nil => nil + | (s, d) :: l => d :: getdst l + end. + +Fixpoint noOverlap_aux (r : Reg) (l : list Reg) {struct l} : Prop := + match l with + nil => True + | b :: m => (b = r \/ Loc.diff b r) /\ noOverlap_aux r m + end. + +Definition noOverlap (p : Moves) : Prop := + forall l, In l (getsrc p) -> noOverlap_aux l (getdst p). + +Definition stepInv (r : State) : Prop := + path (StateBeing r) /\ + (simpleDest (StateToMove r ++ StateBeing r) /\ + (noOverlap (StateToMove r ++ StateBeing r) /\ + (noTmp (StateToMove r) /\ noTmpLast (StateBeing r)))). + +Definition Value := val. + +Definition Env := locset. + +Definition get (e : Env) (r : Reg) := Locmap.get r e. + +Definition update (e : Env) (r : Reg) (v : Value) : Env := Locmap.set r v e. + +Fixpoint sexec (p : Moves) (e : Env) {struct p} : Env := + match p with + nil => e + | (s, d) :: l => let e' := sexec l e in + update e' d (get e' s) + end. + +Fixpoint pexec (p : Moves) (e : Env) {struct p} : Env := + match p with + nil => e + | (s, d) :: l => update (pexec l e) d (get e s) + end. + +Lemma get_update: + forall (e : Env) (r1 r2 : Reg) (v : Value), + get (update e r1 v) r2 = + (if Loc.eq r1 r2 then v else if Loc.overlap r1 r2 then Vundef else get e r2). +Proof. +intros. +unfold update, get, Locmap.get, Locmap.set; trivial. +Qed. + +Lemma get_update_id: + forall (e : Env) (r1 : Reg) (v : Value), get (update e r1 v) r1 = v. +Proof. +intros e r1 v; rewrite (get_update e r1 r1); auto. +case (Loc.eq r1 r1); auto. +intros H; elim H; trivial. +Qed. + +Lemma get_update_diff: + forall (e : Env) (r1 r2 : Reg) (v : Value), + Loc.diff r1 r2 -> get (update e r1 v) r2 = get e r2. +Proof. +intros; unfold update, get, Locmap.get, Locmap.set. +case (Loc.eq r1 r2); intro. +absurd (r1 = r2); [apply Loc.diff_not_eq; trivial | trivial]. +caseEq (Loc.overlap r1 r2); intro; trivial. +absurd (Loc.diff r1 r2); [apply Loc.overlap_not_diff; assumption | assumption]. +Qed. + +Lemma get_update_ndiff: + forall (e : Env) (r1 r2 : Reg) (v : Value), + r1 <> r2 -> not (Loc.diff r1 r2) -> get (update e r1 v) r2 = Vundef. +Proof. +intros; unfold update, get, Locmap.get, Locmap.set. +case (Loc.eq r1 r2); intro. +absurd (r1 = r2); assumption. +caseEq (Loc.overlap r1 r2); intro; trivial. +absurd (Loc.diff r1 r2); (try assumption). +apply Loc.non_overlap_diff; assumption. +Qed. + +Lemma pexec_swap: + forall (m1 m2 : Move) (t : Moves), + simpleDest (m1 :: (m2 :: t)) -> + forall (e : Env) (r : Reg), + get (pexec (m1 :: (m2 :: t)) e) r = get (pexec (m2 :: (m1 :: t)) e) r. +Proof. +intros; destruct m1 as [m1s m1d]; destruct m2 as [m2s m2d]. +generalize H; simpl; intros [[NEQ NW] [NW2 HSD]]; clear H. +case (Loc.eq m1d r); case (Loc.eq m2d r); intros. +absurd (m1d = m2d); + [apply Loc.diff_not_eq; apply Loc.diff_sym; assumption | + rewrite e0; rewrite e1; trivial]. +caseEq (Loc.overlap m2d r); intro. +absurd (Loc.diff m2d m1d); [apply Loc.overlap_not_diff; rewrite e0 | idtac]; + (try assumption). +subst m1d; rewrite get_update_id; rewrite get_update_diff; + (try rewrite get_update_id); auto. +caseEq (Loc.overlap m1d r); intro. +absurd (Loc.diff m1d m2d); + [apply Loc.overlap_not_diff; rewrite e0 | apply Loc.diff_sym]; assumption. +subst m2d; (repeat rewrite get_update_id); rewrite get_update_diff; + [rewrite get_update_id; trivial | apply Loc.diff_sym; trivial]. +caseEq (Loc.overlap m1d r); caseEq (Loc.overlap m2d r); intros. +(repeat rewrite get_update_ndiff); + (try (apply Loc.overlap_not_diff; assumption)); trivial. +assert (~ Loc.diff m1d r); + [apply Loc.overlap_not_diff; assumption | + intros; rewrite get_update_ndiff; auto]. +rewrite get_update_diff; + [rewrite get_update_ndiff; auto | apply Loc.non_overlap_diff; auto]. +cut (~ Loc.diff m2d r); [idtac | apply Loc.overlap_not_diff; auto]. +cut (Loc.diff m1d r); [idtac | apply Loc.non_overlap_diff; auto]. +intros; rewrite get_update_diff; auto. +(repeat rewrite get_update_ndiff); auto. +cut (Loc.diff m1d r); [idtac | apply Loc.non_overlap_diff; auto]. +cut (Loc.diff m2d r); [idtac | apply Loc.non_overlap_diff; auto]. +intros; (repeat rewrite get_update_diff); auto. +Qed. + +Lemma pexec_add: + forall (t1 t2 : Moves) (r : Reg) (e : Env), + get (pexec t1 e) r = get (pexec t2 e) r -> + forall (a : Move), get (pexec (a :: t1) e) r = get (pexec (a :: t2) e) r. +Proof. +intros. +case a. +simpl. +intros a1 a2. +unfold get, update, Locmap.set, Locmap.get. +case (Loc.eq a2 r); case (Loc.overlap a2 r); auto. +Qed. + +Lemma pexec_movBack: + forall (t1 t2 : Moves) (m : Move), + simpleDest (m :: (t1 ++ t2)) -> + forall (e : Env) (r : Reg), + get (pexec (m :: (t1 ++ t2)) e) r = get (pexec (t1 ++ (m :: t2)) e) r. +Proof. +intros t1 t2 m; induction t1 as [|a t1 Hrect1]. +simpl; auto. +rewrite app_cons. +intros; rewrite pexec_swap; auto; rewrite app_cons; auto. +apply pexec_add. +apply Hrect1. +apply (simpleDest_pop2 m a); auto. +Qed. + +Lemma pexec_movFront: + forall (t1 t2 : Moves) (m : Move), + simpleDest (t1 ++ (m :: t2)) -> + forall (e : Env) (r : Reg), + get (pexec (t1 ++ (m :: t2)) e) r = get (pexec (m :: (t1 ++ t2)) e) r. +Proof. +intros; rewrite <- pexec_movBack; eauto. +apply simpleDest_movFront; auto. +Qed. + +Lemma pexec_mov: + forall (t1 t2 t3 : Moves) (m : Move), + simpleDest ((t1 ++ (m :: t2)) ++ t3) -> + forall (e : Env) (r : Reg), + get (pexec ((t1 ++ (m :: t2)) ++ t3) e) r = + get (pexec ((t1 ++ t2) ++ (m :: t3)) e) r. +Proof. +intros t1 t2 t3 m. +rewrite <- app_app. +rewrite app_cons. +intros. +rewrite pexec_movFront; auto. +cut (simpleDest (m :: (t1 ++ (t2 ++ t3)))). +rewrite app_app. +rewrite <- pexec_movFront; auto. +apply simpleDest_swap_app; auto. +apply simpleDest_movFront; auto. +Qed. + +Definition diff_dec: + forall (x y : Reg), ({ Loc.diff x y }) + ({ not (Loc.diff x y) }). +intros. +case (Loc.eq x y). +intros heq; right. +red; intro; absurd (x = y); auto. +apply Loc.diff_not_eq; auto. +intro; caseEq (Loc.overlap x y). +intro; right. +apply Loc.overlap_not_diff; auto. +intro; left; apply Loc.non_overlap_diff; auto. +Defined. + +Lemma get_pexec_id_noWrite: + forall (t : Moves) (d : Reg), + noWrite t d -> + forall (e : Env) (v : Value), v = get (pexec t (update e d v)) d. +Proof. +intros. +induction t as [|a t Hrect]. +simpl. +rewrite get_update_id; auto. +generalize H; destruct a as [a1 a2]; simpl; intros [NEQ R]. +rewrite get_update_diff; auto. +Qed. + +Lemma pexec_nop: + forall (t : Moves) (r : Reg) (e : Env) (x : Reg), + Loc.diff r x -> get (pexec ((r, r) :: t) e) x = get (pexec t e) x. +Proof. +intros. +simpl. +rewrite get_update_diff; auto. +Qed. + +Lemma sD_nW: forall t r s, simpleDest ((s, r) :: t) -> noWrite t r. +Proof. +induction t. +simpl; auto. +simpl. +destruct a. +intros r1 r2 H; split; [try assumption | idtac]. +elim H; + [intros H0 H1; elim H0; [intros H2 H3; (try clear H0 H); (try exact H2)]]. +elim H; + [intros H0 H1; elim H0; [intros H2 H3; (try clear H0 H); (try exact H3)]]. +Qed. + +Lemma sD_pexec: + forall (t : Moves) (s d : Reg), + simpleDest ((s, d) :: t) -> forall (e : Env), get (pexec t e) d = get e d. +Proof. +intros. +induction t as [|a t Hrect]; simpl; auto. +destruct a as [a1 a2]. +simpl in H |-; elim H; intros [H0 H1] [H2 H3]; clear H. +case (Loc.eq a2 d); intro. +absurd (a2 = d); [apply Loc.diff_not_eq | trivial]; assumption. +rewrite get_update_diff; (try assumption). +apply Hrect. +simpl; (split; assumption). +Qed. + +Lemma noOverlap_nil: noOverlap nil. +Proof. +unfold noOverlap, noOverlap_aux, getsrc, getdst; trivial. +Qed. + +Lemma getsrc_add: + forall (m : Move) (l1 l2 : Moves) (l : Reg), + In l (getsrc (l1 ++ l2)) -> In l (getsrc (l1 ++ (m :: l2))). +Proof. +intros m l1 l2 l; destruct m; induction l1; simpl; auto. +destruct a; simpl; intros. +elim H; intros H0; [left | right]; auto. +Qed. + +Lemma getdst_add: + forall (r1 r2 : Reg) (l1 l2 : Moves), + getdst (l1 ++ ((r1, r2) :: l2)) = getdst l1 ++ (r2 :: getdst l2). +Proof. +intros r1 r2 l1 l2; induction l1; simpl; auto. +destruct a; simpl; rewrite IHl1; auto. +Qed. + +Lemma getdst_app: + forall (l1 l2 : Moves), getdst (l1 ++ l2) = getdst l1 ++ getdst l2. +Proof. +intros; induction l1; simpl; auto. +destruct a; simpl; rewrite IHl1; auto. +Qed. + +Lemma noOverlap_auxpop: + forall (x r : Reg) (l : list Reg), + noOverlap_aux x (r :: l) -> noOverlap_aux x l. +Proof. +induction l; simpl; auto. +intros [H1 [H2 H3]]; split; auto. +Qed. + +Lemma noOverlap_auxPop: + forall (x r : Reg) (l1 l2 : list Reg), + noOverlap_aux x (l1 ++ (r :: l2)) -> noOverlap_aux x (l1 ++ l2). +Proof. +intros x r l1 l2; (try assumption). +induction l1 as [|a l1 Hrecl1]; simpl app. +intro; apply (noOverlap_auxpop x r); auto. +(repeat rewrite app_cons); simpl. +intros [H1 H2]; split; auto. +Qed. + +Lemma noOverlap_pop: + forall (m : Move) (l : Moves), noOverlap (m :: l) -> noOverlap l. +Proof. +induction l. +intro; apply noOverlap_nil. +unfold noOverlap; simpl; destruct m; destruct a; simpl; intros. +elim (H l0); intros; (try assumption). +elim H0; intros H1; right; [left | right]; assumption. +Qed. + +Lemma noOverlap_Pop: + forall (m : Move) (l1 l2 : Moves), + noOverlap (l1 ++ (m :: l2)) -> noOverlap (l1 ++ l2). +Proof. +intros m l1 l2; induction l1 as [|a l1 Hrecl1]; simpl. +simpl; apply noOverlap_pop. +(repeat rewrite app_cons); unfold noOverlap; destruct a; simpl. +intros H l H0; split. +elim (H l); [intros H1 H2 | idtac]; auto. +elim H0; [intros H3; left | intros H3; right; apply getsrc_add]; auto. +unfold noOverlap in Hrecl1 |-. +elim H0; intros H1; clear H0. +destruct m; rewrite getdst_app; apply noOverlap_auxPop with ( r := r2 ). +rewrite getdst_add in H. +elim H with ( l := l ); [intros H0 H2; (try clear H); (try exact H2) | idtac]. +left; (try assumption). +apply Hrecl1 with ( l := l ); auto. +intros l0 H0; (try assumption). +elim H with ( l := l0 ); [intros H2 H3; (try clear H); (try exact H3) | idtac]; + auto. +Qed. + +Lemma noOverlap_right: + forall (l1 l2 : Moves), noOverlap (l1 ++ l2) -> noOverlap l2. +Proof. +intros l1; induction l1 as [|a l1 Hrecl1]; auto. +intros l2; rewrite app_cons; intros; apply Hrecl1. +apply (noOverlap_pop a); auto. +Qed. + +Lemma pexec_update: + forall t e d r v, + Loc.diff d r -> + noRead t d -> get (pexec t (update e d v)) r = get (pexec t e) r. +Proof. +induction t; simpl. +intros; rewrite get_update_diff; auto. +destruct a as [a1 a2]; intros; case (Loc.eq a2 r); intro. +subst a2; (repeat rewrite get_update_id). +rewrite get_update_diff; auto; apply Loc.diff_sym; elim H0; auto. +case (diff_dec a2 r); intro. +(repeat rewrite get_update_diff); auto. +apply IHt; auto. +elim H0; auto. +(repeat rewrite get_update_ndiff); auto. +Qed. + +Lemma pexec_push: + forall (t l : Moves) (s d : Reg), + noRead t d -> + simpleDest ((s, d) :: t) -> + forall (e : Env) (r : Reg), + r = d \/ Loc.diff d r -> + get (pexec ((s, d) :: t) (sexec l e)) r = + get (pexec t (sexec ((s, d) :: l) e)) r. +Proof. +intros; simpl. +elim H1; intros e1. +rewrite e1; rewrite get_update_id; auto. +rewrite (sD_pexec t s d); auto; rewrite get_update_id; auto. +rewrite pexec_update; auto. +rewrite get_update_diff; auto. +Qed. + +Definition exec (s : State) (e : Env) := + pexec (StateToMove s ++ StateBeing s) (sexec (StateDone s) e). + +Definition sameEnv (e1 e2 : Env) := + forall (r : Reg), notemporary r -> get e1 r = get e2 r. + +Definition NoOverlap (r : Reg) (s : State) := + noOverlap ((r, r) :: (StateToMove s ++ StateBeing s)). + +Lemma noOverlapaux_swap2: + forall (l1 l2 : list Reg) (m l : Reg), + noOverlap_aux l (l1 ++ (m :: l2)) -> noOverlap_aux l (m :: (l1 ++ l2)). +Proof. +intros l1 l2 m l; induction l1; simpl noOverlap_aux; auto. +intros; elim H; intros H0 H1; (repeat split); auto. +simpl in IHl1 |-. +elim IHl1; [intros H2 H3; (try exact H2) | idtac]; auto. +apply (noOverlap_auxpop l m). +apply IHl1; auto. +Qed. + +Lemma noTmp_noReadTmp: forall t, noTmp t -> forall s, noRead t (T s). +Proof. +induction t; simpl; auto. +destruct a as [a1 a2]; intros. +split; [idtac | apply IHt]; elim H; intros H1 [H2 H3]; auto. +Qed. + +Lemma noRead_by_path: + forall (b t : Moves) (r0 r1 r7 r8 : Reg), + simpleDest ((r7, r8) :: (b ++ ((r0, r1) :: nil))) -> + path (b ++ ((r0, r1) :: nil)) -> Loc.diff r8 r0 -> noRead b r8. +Proof. +intros; induction b as [|a b Hrecb]; simpl; auto. +destruct a as [a1 a2]; generalize H H0; rewrite app_cons; intros; split. +simpl in H3 |-; caseEq (b ++ ((r0, r1) :: nil)); intro. +destruct b; inversion H4. +intros l H4. +rewrite H4 in H3. +destruct m. +rewrite H4 in H2; simpl in H2 |-. +elim H3; [intros H5 H6; (try clear H3); (try exact H5)]. +rewrite H5. +elim H2; intros [H3 [H7 H8]] [H9 [H10 H11]]; (try assumption). +apply Hrecb. +apply (simpleDest_pop (a1, a2)); apply simpleDest_swap; auto. +apply (path_pop (a1, a2)); auto. +Qed. + +Lemma noOverlap_swap: + forall (m1 m2 : Move) (l : Moves), + noOverlap (m1 :: (m2 :: l)) -> noOverlap (m2 :: (m1 :: l)). +Proof. +intros m1 m2 l; simpl; destruct m1 as [m1s m1d]; destruct m2 as [m2s m2d]. +unfold noOverlap; simpl; intros. +assert (m1s = l0 \/ (m2s = l0 \/ In l0 (getsrc l))). +elim H0; [intros H1 | intros [H1|H2]]. +right; left; (try assumption). +left; (try assumption). +right; right; (try assumption). +(repeat split); + (elim (H l0); [intros H2 H3; elim H3; [intros H4 H5] | idtac]; auto). +Qed. + +Lemma getsrc_add1: + forall (r1 r2 : Reg) (l1 l2 : Moves), + getsrc (l1 ++ ((r1, r2) :: l2)) = getsrc l1 ++ (r1 :: getsrc l2). +Proof. +intros r1 r2 l1 l2; induction l1; simpl; auto. +destruct a; simpl; rewrite IHl1; auto. +Qed. + +Lemma getsrc_app: + forall (l1 l2 : Moves), getsrc (l1 ++ l2) = getsrc l1 ++ getsrc l2. +Proof. +intros; induction l1; simpl; auto. +destruct a; simpl; rewrite IHl1; auto. +Qed. + +Lemma Ingetsrc_swap: + forall (m : Move) (l1 l2 : Moves) (l : Reg), + In l (getsrc (m :: (l1 ++ l2))) -> In l (getsrc (l1 ++ (m :: l2))). +Proof. +intros; destruct m as [m1 m2]; simpl; auto. +simpl in H |-. +elim H; intros H0; auto. +rewrite H0; rewrite getsrc_add1; auto. +apply (in_or_app (getsrc l1) (l :: getsrc l2)); auto. +right; apply in_eq; auto. +apply getsrc_add; auto. +Qed. + +Lemma noOverlap_movFront: + forall (p1 p2 : Moves) (m : Move), + noOverlap (p1 ++ (m :: p2)) -> noOverlap (m :: (p1 ++ p2)). +Proof. +intros p1 p2 m; unfold noOverlap. +destruct m; rewrite getdst_add; simpl getdst; rewrite getdst_app; intros. +apply noOverlapaux_swap2. +apply (H l); apply Ingetsrc_swap; auto. +Qed. + +Lemma step_inv_loop_aux: + forall (t l : Moves) (s d : Reg), + simpleDest (t ++ ((s, d) :: nil)) -> + noTmp t -> + forall (e : Env) (r : Reg), + notemporary r -> + d = r \/ Loc.diff d r -> + get (pexec (t ++ ((s, d) :: nil)) (sexec l e)) r = + get (pexec (t ++ ((T s, d) :: nil)) (sexec ((s, T s) :: l) e)) r. +Proof. +intros; (repeat rewrite pexec_movFront); auto. +(repeat rewrite app_nil); simpl; elim H2; intros e1. +subst d; (repeat rewrite get_update_id); auto. +(repeat rewrite get_update_diff); auto. +rewrite pexec_update; auto. +apply Loc.diff_sym; unfold notemporary in H1 |-; auto. +apply noTmp_noReadTmp; auto. +apply (simpleDest_tmpLast t s); auto. +Qed. + +Lemma step_inv_loop: + forall (t l : Moves) (s d : Reg), + simpleDest (t ++ ((s, d) :: nil)) -> + noTmpLast (t ++ ((s, d) :: nil)) -> + forall (e : Env) (r : Reg), + notemporary r -> + d = r \/ Loc.diff d r -> + get (pexec (t ++ ((s, d) :: nil)) (sexec l e)) r = + get (pexec (t ++ ((T s, d) :: nil)) (sexec ((s, T s) :: l) e)) r. +Proof. +intros; apply step_inv_loop_aux; auto. +apply (noTmpLast_popBack t (s, d)); auto. +Qed. + +Definition sameExec (s1 s2 : State) := + forall (e : Env) (r : Reg), + (let A := + getdst + ((StateToMove s1 ++ StateBeing s1) ++ (StateToMove s2 ++ StateBeing s2)) + in + notemporary r -> + (forall x, In x A -> r = x \/ Loc.diff r x) -> + get (exec s1 e) r = get (exec s2 e) r). + +Lemma get_noWrite: + forall (t : Moves) (d : Reg), + noWrite t d -> forall (e : Env), get e d = get (pexec t e) d. +Proof. +intros; induction t as [|a t Hrect]; simpl; auto. +generalize H; destruct a as [a1 a2]; simpl; intros [NEQ R]. +unfold get, Locmap.get, update, Locmap.set. +case (Loc.eq a2 d); intro; auto. +absurd (a2 = d); auto; apply Loc.diff_not_eq; (try assumption). +caseEq (Loc.overlap a2 d); intro. +absurd (Loc.diff a2 d); auto; apply Loc.overlap_not_diff; auto. +unfold get, Locmap.get in Hrect |-; apply Hrect; auto. +Qed. + +Lemma step_sameExec: + forall (r1 r2 : State), step r1 r2 -> stepInv r1 -> sameExec r1 r2. +Proof. +intros r1 r2 STEP; inversion STEP; + unfold stepInv, sameExec, NoOverlap, exec, StateToMove, StateBeing, StateDone; + (repeat rewrite app_nil); intros [P [SD [NO [TT TB]]]]; intros. +rewrite pexec_movFront; simpl; auto. +case (Loc.eq r r0); intros e0. +subst r0; rewrite get_update_id; apply get_noWrite; apply sD_nW with r; + apply simpleDest_movFront; auto. +elim H2 with ( x := r ); + [intros H3; absurd (r = r0); auto | + intros H3; rewrite get_update_diff; auto; apply Loc.diff_sym; auto | idtac]. +(repeat (rewrite getdst_app; simpl)); apply in_or_app; left; apply in_or_app; + right; simpl; auto. +(repeat rewrite pexec_movFront); auto. +rewrite app_nil; auto. +apply simpleDest_movBack; auto. +apply pexec_mov; auto. +repeat (rewrite <- app_cons; rewrite app_app). +apply step_inv_loop; auto. +repeat (rewrite <- app_app; rewrite app_cons; auto). +repeat (rewrite <- app_app; rewrite app_cons; auto). +apply noTmp_app; auto. +elim H2 with ( x := d ); + [intros H3; left; auto | intros H3; right; apply Loc.diff_sym; auto + | try clear H2]. +repeat (rewrite getdst_app; simpl). +apply in_or_app; left; apply in_or_app; right; simpl; right; apply in_or_app; + right; simpl; left; trivial. +rewrite pexec_movFront; auto; apply pexec_push; auto. +apply noRead_app; auto. +apply noRead_app. +apply (noRead_by_path b b s0 d0 sn dn); auto. +apply (simpleDest_right t); auto. +apply (path_pop (sn, dn)); auto. +simpl; split; [apply Loc.diff_sym | idtac]; auto. +apply simpleDest_movFront; auto. +elim H4 with ( x := dn ); [intros H5 | intros H5 | try clear H4]. +left; (try assumption). +right; apply Loc.diff_sym; (try assumption). +repeat (rewrite getdst_app; simpl). +apply in_or_app; left; apply in_or_app; right; simpl; left; trivial. +rewrite pexec_movFront; auto. +rewrite app_nil; auto. +apply pexec_push; auto. +rewrite <- (app_nil _ t). +apply simpleDest_movFront; auto. +elim (H3 d); (try intros H4). +left; (try assumption). +right; apply Loc.diff_sym; (try assumption). +(repeat rewrite getdst_app); simpl; apply in_or_app; left; apply in_or_app; + right; simpl; left; trivial. +Qed. + +Lemma path_tmpLast: + forall (s d : Reg) (l : Moves), + path (l ++ ((s, d) :: nil)) -> path (l ++ ((T s, d) :: nil)). +Proof. +intros; induction l as [|a l Hrecl]. +simpl; auto. +generalize H; (repeat rewrite app_cons). +case a; generalize Hrecl; case l; intros; auto. +destruct m; intros. +inversion H0; split; auto. +Qed. + +Lemma step_inv_path: + forall (r1 r2 : State), step r1 r2 -> stepInv r1 -> path (StateBeing r2). +Proof. +intros r1 r2 STEP; inversion_clear STEP; unfold stepInv; + unfold stepInv, sameExec, sameEnv, exec, StateToMove, StateBeing, StateDone; + intros [P [SD [TT TB]]]; (try (simpl; auto; fail)). +simpl; case m; auto. +generalize P; rewrite <- app_cons; rewrite <- app_cons. +apply (path_tmpLast r0). +generalize P; apply path_pop. +Qed. + +Lemma step_inv_simpleDest: + forall (r1 r2 : State), + step r1 r2 -> stepInv r1 -> simpleDest (StateToMove r2 ++ StateBeing r2). +Proof. +intros r1 r2 STEP; inversion_clear STEP; unfold stepInv; + unfold stepInv, sameExec, sameEnv, exec, StateToMove, StateBeing, StateDone; + (repeat rewrite app_nil); intros [P [SD [TT TB]]]. +apply (simpleDest_Pop (r, r)); assumption. +apply simpleDest_movBack; assumption. +apply simpleDest_insert; rewrite <- app_app; apply simpleDest_movFront. +rewrite <- app_cons; rewrite app_app; auto. +generalize SD; (repeat rewrite <- app_cons); (repeat rewrite app_app). +generalize (simpleDest_tmpLast (t ++ ((s, r0ounon) :: b)) r0 d); auto. +generalize SD; apply simpleDest_Pop. +rewrite <- (app_nil _ t); generalize SD; apply simpleDest_Pop. +Qed. + +Lemma noTmp_pop: + forall (m : Move) (l1 l2 : Moves), noTmp (l1 ++ (m :: l2)) -> noTmp (l1 ++ l2). +Proof. +intros; induction l1 as [|a l1 Hrecl1]; generalize H. +simpl; case m; intros; inversion H0; inversion H2; auto. +rewrite app_cons; rewrite app_cons; simpl; case a. +intros; inversion H0; inversion H2; auto. +Qed. + +Lemma step_inv_noTmp: + forall (r1 r2 : State), step r1 r2 -> stepInv r1 -> noTmp (StateToMove r2). +Proof. +intros r1 r2 STEP; inversion_clear STEP; unfold stepInv; + unfold stepInv, sameExec, sameEnv, exec, StateToMove, StateBeing, StateDone; + intros [P [SD [NO [TT TB]]]]; generalize TT; (try apply noTmp_pop); auto. +Qed. + +Lemma noTmp_noTmpLast: forall (l : Moves), noTmp l -> noTmpLast l. +Proof. +intros; induction l as [|a l Hrecl]; (try (simpl; auto; fail)). +generalize H; simpl; case a; generalize Hrecl; case l; + (intros; inversion H0; inversion H2; auto). +Qed. + +Lemma noTmpLast_pop: + forall (m : Move) (l : Moves), noTmpLast (m :: l) -> noTmpLast l. +Proof. +intros m l; simpl; case m; case l. +simpl; auto. +intros; inversion H; inversion H1; auto. +Qed. + +Lemma noTmpLast_Pop: + forall (m : Move) (l1 l2 : Moves), + noTmpLast (l1 ++ (m :: l2)) -> noTmpLast (l1 ++ l2). +Proof. +intros; induction l1 as [|a l1 Hrecl1]; generalize H. +simpl; case m; case l2. +simpl; auto. +intros. +elim H0; [intros H1 H2; elim H2; [intros H3 H4; (try exact H4)]]. +(repeat rewrite app_cons); simpl; case a. +generalize Hrecl1; case l1. +simpl; case m; case l2; intros; inversion H0; inversion H2; auto. +intros m0 l R r r0; rewrite app_cons; rewrite app_cons. +intros; inversion H0; inversion H2; auto. +Qed. + +Lemma noTmpLast_push: + forall (m : Move) (t1 t2 t3 : Moves), + noTmp (t1 ++ (m :: t2)) -> noTmpLast t3 -> noTmpLast (m :: t3). +Proof. +intros; induction t1 as [|a t1 Hrect1]; generalize H. +simpl; case m; intros r r0 [N1 [N2 NT]]; generalize H0; case t3; auto. +rewrite app_cons; intros; apply Hrect1. +generalize H1. +simpl; case m; case a; intros; inversion H2; inversion H4; auto. +Qed. + +Lemma noTmpLast_tmpLast: + forall (s d : Reg) (l : Moves), + noTmpLast (l ++ ((s, d) :: nil)) -> noTmpLast (l ++ ((T s, d) :: nil)). +Proof. +intros; induction l as [|a l Hrecl]. +simpl; auto. +generalize H; rewrite app_cons; rewrite app_cons; simpl. +case a; generalize Hrecl; case l. +simpl; auto. +intros m l0 REC r r0; generalize REC; rewrite app_cons; rewrite app_cons. +case m; intros; inversion H0; inversion H2; split; auto. +Qed. + +Lemma step_inv_noTmpLast: + forall (r1 r2 : State), step r1 r2 -> stepInv r1 -> noTmpLast (StateBeing r2). +Proof. +intros r1 r2 STEP; inversion_clear STEP; unfold stepInv; + unfold stepInv, sameExec, sameEnv, exec, StateToMove, StateBeing, StateDone; + intros [P [SD [NO [TT TB]]]]; auto. +apply (noTmpLast_push m t1 t2); auto. +apply (noTmpLast_push (d, r) t1 t2); auto. +generalize TB; rewrite <- app_cons; rewrite <- app_cons; apply noTmpLast_tmpLast. +apply (noTmpLast_pop (sn, dn)); auto. +Qed. + +Lemma noOverlapaux_insert: + forall (l1 l2 : list Reg) (r x : Reg), + noOverlap_aux x (r :: (l1 ++ l2)) -> noOverlap_aux x (l1 ++ (r :: l2)). +Proof. +simpl; intros; induction l1; simpl; split. +elim H; [intros H0 H1; (try exact H0)]. +elim H; [intros H0 H1; (try exact H1)]. +simpl in H |-. +elim H; + [intros H0 H1; elim H1; [intros H2 H3; (try clear H1 H); (try exact H2)]]. +apply IHl1. +split. +elim H; [intros H0 H1; (try exact H0)]. +rewrite app_cons in H. +apply noOverlap_auxpop with ( r := a ). +elim H; [intros H0 H1; (try exact H1)]. +Qed. + +Lemma Ingetsrc_swap2: + forall (m : Move) (l1 l2 : Moves) (l : Reg), + In l (getsrc (l1 ++ (m :: l2))) -> In l (getsrc (m :: (l1 ++ l2))). +Proof. +intros; destruct m as [m1 m2]; simpl; auto. +induction l1; simpl. +simpl in H |-; auto. +destruct a; simpl. +simpl in H |-. +elim H; [intros H0 | intros H0; (try exact H0)]. +right; left; (try assumption). +elim IHl1; intros; auto. +Qed. + +Lemma noOverlap_insert: + forall (p1 p2 : Moves) (m : Move), + noOverlap (m :: (p1 ++ p2)) -> noOverlap (p1 ++ (m :: p2)). +Proof. +unfold noOverlap; destruct m; rewrite getdst_add; simpl getdst; + rewrite getdst_app. +intros. +apply noOverlapaux_insert. +generalize (H l); intros H1; lapply H1; + [intros H2; (try clear H1); (try exact H2) | idtac]. +simpl getsrc. +generalize (Ingetsrc_swap2 (r, r0)); simpl; (intros; auto). +Qed. + +Lemma noOverlap_movBack: + forall (p1 p2 : Moves) (m : Move), + noOverlap (p1 ++ (m :: p2)) -> noOverlap ((p1 ++ p2) ++ (m :: nil)). +Proof. +intros. +apply (noOverlap_insert (p1 ++ p2) nil m). +rewrite app_nil; apply noOverlap_movFront; auto. +Qed. + +Lemma noOverlap_movBack0: + forall (t : Moves) (s d : Reg), + noOverlap ((s, d) :: t) -> noOverlap (t ++ ((s, d) :: nil)). +Proof. +intros t s d H; (try assumption). +apply noOverlap_insert. +rewrite app_nil; auto. +Qed. + +Lemma noOverlap_Front0: + forall (t : Moves) (s d : Reg), + noOverlap (t ++ ((s, d) :: nil)) -> noOverlap ((s, d) :: t). +Proof. +intros t s d H; (try assumption). +cut ((s, d) :: t = (s, d) :: (t ++ nil)). +intros e; rewrite e. +apply noOverlap_movFront; auto. +rewrite app_nil; auto. +Qed. + +Lemma noTmpL_diff: + forall (t : Moves) (s d : Reg), + noTmpLast (t ++ ((s, d) :: nil)) -> notemporary d. +Proof. +intros t s d; unfold notemporary; induction t; (try (simpl; intros; auto; fail)). +rewrite app_cons. +intros; apply IHt. +apply (noTmpLast_pop a); auto. +Qed. + +Lemma noOverlap_aux_app: + forall l1 l2 (r : Reg), + noOverlap_aux r l1 -> noOverlap_aux r l2 -> noOverlap_aux r (l1 ++ l2). +Proof. +induction l1; simpl; auto. +intros; split. +elim H; [intros H1 H2; (try clear H); (try exact H1)]. +apply IHl1; auto. +elim H; [intros H1 H2; (try clear H); (try exact H2)]. +Qed. + +Lemma noTmP_noOverlap_aux: + forall t (r : Reg), noTmp t -> noOverlap_aux (T r) (getdst t). +Proof. +induction t; simpl; auto. +destruct a; simpl; (intros; split). +elim H; intros; elim H1; intros. +right; apply H2. +apply IHt; auto. +elim H; + [intros H0 H1; elim H1; [intros H2 H3; (try clear H1 H); (try exact H3)]]. +Qed. + +Lemma noTmp_append: forall l1 l2, noTmp l1 -> noTmp l2 -> noTmp (l1 ++ l2). +Proof. +induction l1; simpl; auto. +destruct a. +intros l2 [H1 [H2 H3]] H4. +(repeat split); auto. +Qed. + +Lemma step_inv_noOverlap: + forall (r1 r2 : State), + step r1 r2 -> stepInv r1 -> noOverlap (StateToMove r2 ++ StateBeing r2). +Proof. +intros r1 r2 STEP; inversion_clear STEP; unfold stepInv; + unfold stepInv, sameExec, sameEnv, exec, StateToMove, StateBeing, StateDone; + (repeat rewrite app_nil); intros [P [SD [NO [TT TB]]]]; + (try (generalize NO; apply noOverlap_Pop; auto; fail)). +apply noOverlap_movBack; auto. +apply noOverlap_insert; rewrite <- app_app; apply noOverlap_movFront; + rewrite <- app_cons; rewrite app_app; auto. +generalize NO; (repeat rewrite <- app_cons); (repeat rewrite app_app); + (clear NO; intros NO); apply noOverlap_movBack0. +assert (noOverlap ((r0, d) :: (t ++ ((s, r0ounon) :: b)))); + [apply noOverlap_Front0; auto | idtac]. +generalize H; unfold noOverlap; simpl; clear H; intros. +elim H0; intros; [idtac | apply (H l0); (right; (try assumption))]. +split; [right; (try assumption) | idtac]. +generalize TB; simpl; caseEq (b ++ ((r0, d) :: nil)); intro. +elim (app_eq_nil b ((r0, d) :: nil)); intros; auto; inversion H4. +subst l0; intros; rewrite <- H1 in TB0. +elim TB0; [intros H2 H3; elim H3; [intros H4 H5; (try clear H3 TB0)]]. +generalize (noTmpL_diff b r0 d); unfold notemporary; intro; apply H3; auto. +rewrite <- H1; apply noTmP_noOverlap_aux; apply noTmp_append; auto; + rewrite <- app_cons in TB; apply noTmpLast_popBack with (r0, d); auto. +rewrite <- (app_nil _ t); apply (noOverlap_Pop (s, d)); assumption. +Qed. + +Lemma step_inv: forall (r1 r2 : State), step r1 r2 -> stepInv r1 -> stepInv r2. +Proof. +intros; unfold stepInv; (repeat split). +apply (step_inv_path r1 r2); auto. +apply (step_inv_simpleDest r1 r2); auto. +apply (step_inv_noOverlap r1 r2); auto. +apply (step_inv_noTmp r1 r2); auto. +apply (step_inv_noTmpLast r1 r2); auto. +Qed. + +Definition step_NF (r : State) : Prop := ~ (exists s : State , step r s ). + +Inductive stepp : State -> State -> Prop := + stepp_refl: forall (r : State), stepp r r + | stepp_trans: + forall (r1 r2 r3 : State), step r1 r2 -> stepp r2 r3 -> stepp r1 r3 . +Hint Resolve stepp_refl stepp_trans . + +Lemma stepp_transitive: + forall (r1 r2 r3 : State), stepp r1 r2 -> stepp r2 r3 -> stepp r1 r3. +Proof. +intros; induction H as [r|r1 r2 r0 H H1 HrecH]; eauto. +Qed. + +Lemma step_stepp: forall (s1 s2 : State), step s1 s2 -> stepp s1 s2. +Proof. +eauto. +Qed. + +Lemma stepp_inv: + forall (r1 r2 : State), stepp r1 r2 -> stepInv r1 -> stepInv r2. +Proof. +intros; induction H as [r|r1 r2 r3 H H1 HrecH]; auto. +apply HrecH; auto. +apply (step_inv r1 r2); auto. +Qed. + +Lemma noTmpLast_lastnoTmp: + forall l s d, noTmpLast (l ++ ((s, d) :: nil)) -> notemporary d. +Proof. +induction l. +simpl. +intros; unfold notemporary; auto. +destruct a as [a1 a2]; intros. +change (noTmpLast ((a1, a2) :: (l ++ ((s, d) :: nil)))) in H |-. +apply IHl with s. +apply noTmpLast_pop with (a1, a2); auto. +Qed. + +Lemma step_inv_NoOverlap: + forall (s1 s2 : State) r, + step s1 s2 -> notemporary r -> stepInv s1 -> NoOverlap r s1 -> NoOverlap r s2. +Proof. +intros s1 s2 r STEP notempr; inversion_clear STEP; unfold stepInv; + unfold stepInv, sameExec, sameEnv, exec, StateToMove, StateBeing, StateDone; + intros [P [SD [NO [TT TB]]]]; unfold NoOverlap; simpl. +simpl; (repeat rewrite app_nil); simpl; (repeat rewrite <- app_cons); intro; + apply noOverlap_Pop with ( m := (r0, r0) ); auto. +(repeat rewrite app_nil); simpl; rewrite app_ass; (repeat rewrite <- app_cons); + intro; rewrite ass_app; apply noOverlap_movBack; auto. +simpl; (repeat (rewrite app_ass; simpl)); (repeat rewrite <- app_cons); intro. +rewrite ass_app; apply noOverlap_insert; rewrite app_ass; + apply noOverlap_movFront; auto. +simpl; (repeat rewrite <- app_cons); intro; rewrite ass_app; + apply noOverlap_movBack0; auto. +generalize H; (repeat (rewrite app_ass; simpl)); intro. +assert (noOverlap ((r0, d) :: (((r, r) :: t) ++ ((s, r0ounon) :: b)))); + [apply noOverlap_Front0 | idtac]; auto. +generalize H0; (repeat (rewrite app_ass; simpl)); auto. +generalize H1; unfold noOverlap; simpl; intros. +elim H3; intros H4; clear H3. +split. +right; assert (notemporary d). +change (noTmpLast (((s, r0ounon) :: b) ++ ((r0, d) :: nil))) in TB |-; + apply (noTmpLast_lastnoTmp ((s, r0ounon) :: b) r0); auto. +rewrite <- H4; unfold notemporary in H3 |-; apply H3. +split. +right; rewrite <- H4; unfold notemporary in notempr |-; apply notempr. +rewrite <- H4; apply noTmP_noOverlap_aux; auto. +apply noTmp_append; auto. +change (noTmpLast (((s, r0ounon) :: b) ++ ((r0, d) :: nil))) in TB |-; + apply noTmpLast_popBack with ( m := (r0, d) ); auto. +apply (H2 l0). +elim H4; intros H3; right; [left | right]; assumption. +intro; + change (noOverlap (((r, r) :: t) ++ ((sn, dn) :: (b ++ ((s0, d0) :: nil))))) in + H1 |-. +change (noOverlap (((r, r) :: t) ++ (b ++ ((s0, d0) :: nil)))); + apply (noOverlap_Pop (sn, dn)); auto. +(repeat rewrite <- app_cons); apply noOverlap_Pop. +Qed. + +Lemma step_inv_getdst: + forall (s1 s2 : State) r, + step s1 s2 -> + In r (getdst (StateToMove s2 ++ StateBeing s2)) -> + In r (getdst (StateToMove s1 ++ StateBeing s1)). +Proof. +intros s1 s2 r STEP; inversion_clear STEP; + unfold StateToMove, StateBeing, StateDone. +(repeat rewrite getdst_app); simpl; (repeat rewrite app_nil); intro; + apply in_or_app. +elim (in_app_or (getdst t1) (getdst t2) r); auto. +intro; right; simpl; right; assumption. +(repeat rewrite getdst_app); destruct m as [m1 m2]; simpl; + (repeat rewrite app_nil); intro; apply in_or_app. +elim (in_app_or (getdst t1 ++ getdst t2) (m2 :: nil) r); auto; intro. +elim (in_app_or (getdst t1) (getdst t2) r); auto; intro. +right; simpl; right; assumption. +elim H0; intros H1; [right; simpl; left; (try assumption) | inversion H1]. +(repeat rewrite getdst_app); simpl; (repeat rewrite app_nil); intro; + apply in_or_app. +elim (in_app_or (getdst t1 ++ getdst t2) (r0 :: (d :: getdst b)) r); auto; + intro. +elim (in_app_or (getdst t1) (getdst t2) r); auto; intro. +left; apply in_or_app; left; assumption. +left; apply in_or_app; right; simpl; right; assumption. +elim H0; intro. +left; apply in_or_app; right; simpl; left; trivial. +elim H1; intro. +right; (simpl; left; trivial). +right; simpl; right; assumption. +(repeat (rewrite getdst_app; simpl)); trivial. +(repeat (rewrite getdst_app; simpl)); intro. +elim (in_app_or (getdst t) (getdst b ++ (d0 :: nil)) r); auto; intro; + apply in_or_app; auto. +elim (in_app_or (getdst b) (d0 :: nil) r); auto; intro. +right; simpl; right; apply in_or_app; auto. +elim H3; intro. +right; simpl; right; apply in_or_app; right; simpl; auto. +inversion H4. +rewrite app_nil; (repeat (rewrite getdst_app; simpl)); intro. +apply in_or_app; left; assumption. +Qed. + +Lemma stepp_sameExec: + forall (r1 r2 : State), stepp r1 r2 -> stepInv r1 -> sameExec r1 r2. +Proof. +intros; induction H as [r|r1 r2 r3 H H1 HrecH]. +unfold sameExec; intros; auto. +cut (sameExec r1 r2); [idtac | apply (step_sameExec r1); auto]. +unfold sameExec; unfold sameExec in HrecH |-; intros. +rewrite H2; auto. +rewrite HrecH; auto. +apply (step_inv r1); auto. +intros x H5; apply H4. +generalize H5; (repeat rewrite getdst_app); intros; apply in_or_app. +elim + (in_app_or + (getdst (StateToMove r2) ++ getdst (StateBeing r2)) + (getdst (StateToMove r3) ++ getdst (StateBeing r3)) x); auto; intro. +generalize (step_inv_getdst r1 r2 x); (repeat rewrite getdst_app); intro. +left; apply H8; auto. +intros x H5; apply H4. +generalize H5; (repeat rewrite getdst_app); intros; apply in_or_app. +elim + (in_app_or + (getdst (StateToMove r1) ++ getdst (StateBeing r1)) + (getdst (StateToMove r2) ++ getdst (StateBeing r2)) x); auto; intro. +generalize (step_inv_getdst r1 r2 x); (repeat rewrite getdst_app); intro. +left; apply H8; auto. +Qed. + +Inductive dstep : State -> State -> Prop := + dstep_nop: + forall (r : Reg) (t l : Moves), dstep ((r, r) :: t, nil, l) (t, nil, l) + | dstep_start: + forall (t l : Moves) (s d : Reg), + s <> d -> dstep ((s, d) :: t, nil, l) (t, (s, d) :: nil, l) + | dstep_push: + forall (t1 t2 b l : Moves) (s d r : Reg), + noRead t1 d -> + dstep + (t1 ++ ((d, r) :: t2), (s, d) :: b, l) + (t1 ++ t2, (d, r) :: ((s, d) :: b), l) + | dstep_pop_loop: + forall (t b l : Moves) (s d r0 : Reg), + noRead t r0 -> + dstep + (t, (s, r0) :: (b ++ ((r0, d) :: nil)), l) + (t, b ++ ((T r0, d) :: nil), (s, r0) :: ((r0, T r0) :: l)) + | dstep_pop: + forall (t b l : Moves) (s0 d0 sn dn : Reg), + noRead t dn -> + Loc.diff dn s0 -> + dstep + (t, (sn, dn) :: (b ++ ((s0, d0) :: nil)), l) + (t, b ++ ((s0, d0) :: nil), (sn, dn) :: l) + | dstep_last: + forall (t l : Moves) (s d : Reg), + noRead t d -> dstep (t, (s, d) :: nil, l) (t, nil, (s, d) :: l) . +Hint Resolve dstep_nop dstep_start dstep_push . +Hint Resolve dstep_pop_loop dstep_pop dstep_last . + +Lemma dstep_step: + forall (r1 r2 : State), dstep r1 r2 -> stepInv r1 -> stepp r1 r2. +Proof. +intros r1 r2 DS; inversion_clear DS; intros SI; eauto. +change (stepp (nil ++ ((r, r) :: t), nil, l) (t, nil, l)); apply step_stepp; + apply (step_nop r nil t). +change (stepp (nil ++ ((s, d) :: t), nil, l) (t, (s, d) :: nil, l)); + apply step_stepp; apply (step_start nil t l). +apply + (stepp_trans + (t, (s, r0) :: (b ++ ((r0, d) :: nil)), l) + (t, (s, r0) :: (b ++ ((T r0, d) :: nil)), (r0, T r0) :: l) + (t, b ++ ((T r0, d) :: nil), (s, r0) :: ((r0, T r0) :: l))); auto. +apply step_stepp; apply step_pop; auto. +unfold stepInv in SI |-; generalize SI; intros [X [Y [Z [U V]]]]. +generalize V; unfold StateBeing, noTmpLast. +case (b ++ ((r0, d) :: nil)); auto. +intros m l0 [R1 [OK PP]]; auto. +Qed. + +Lemma dstep_inv: + forall (r1 r2 : State), dstep r1 r2 -> stepInv r1 -> stepInv r2. +Proof. +intros. +apply (stepp_inv r1 r2); auto. +apply dstep_step; auto. +Qed. + +Inductive dstepp : State -> State -> Prop := + dstepp_refl: forall (r : State), dstepp r r + | dstepp_trans: + forall (r1 r2 r3 : State), dstep r1 r2 -> dstepp r2 r3 -> dstepp r1 r3 . +Hint Resolve dstepp_refl dstepp_trans . + +Lemma dstepp_stepp: + forall (s1 s2 : State), stepInv s1 -> dstepp s1 s2 -> stepp s1 s2. +Proof. +intros; induction H0 as [r|r1 r2 r3 H0 H1 HrecH0]; auto. +apply (stepp_transitive r1 r2 r3); auto. +apply dstep_step; auto. +apply HrecH0; auto. +apply (dstep_inv r1 r2); auto. +Qed. + +Lemma dstepp_sameExec: + forall (r1 r2 : State), dstepp r1 r2 -> stepInv r1 -> sameExec r1 r2. +Proof. +intros; apply stepp_sameExec; auto. +apply dstepp_stepp; auto. +Qed. + +End pmov. + +Fixpoint split_move' (m : Moves) (r : Reg) {struct m} : + option ((Moves * Reg) * Moves) := + match m with + (s, d) :: tail => + match diff_dec s r with + right _ => Some (nil, d, tail) + | left _ => + match split_move' tail r with + Some ((t1, r2, t2)) => Some ((s, d) :: t1, r2, t2) + | None => None + end + end + | nil => None + end. + +Fixpoint split_move (m : Moves) (r : Reg) {struct m} : + option ((Moves * Reg) * Moves) := + match m with + (s, d) :: tail => + match Loc.eq s r with + left _ => Some (nil, d, tail) + | right _ => + match split_move tail r with + Some ((t1, r2, t2)) => Some ((s, d) :: t1, r2, t2) + | None => None + end + end + | nil => None + end. + +Definition def : Move := (R IT1, R IT1). + +Fixpoint last (M : Moves) : Move := + match M with nil => def + | m :: nil => m + | m :: tail => last tail end. + +Fixpoint head_but_last (M : Moves) : Moves := + match M with + nil => nil + | m' :: nil => nil + | m' :: tail => m' :: head_but_last tail + end. + +Fixpoint replace_last_s (M : Moves) : Moves := + match M with + nil => nil + | m :: nil => + match m with (s, d) => (T s, d) :: nil end + | m :: tail => m :: replace_last_s tail + end. + +Ltac CaseEq name := generalize (refl_equal name); pattern name at -1; case name. + +Definition stepf' (S1 : State) : State := + match S1 with + (nil, nil, _) => S1 + | ((s, d) :: tl, nil, l) => + match diff_dec s d with + right _ => (tl, nil, l) + | left _ => (tl, (s, d) :: nil, l) + end + | (t, (s, d) :: b, l) => + match split_move t d with + Some ((t1, r, t2)) => + (t1 ++ t2, (d, r) :: ((s, d) :: b), l) + | None => + match b with + nil => (t, nil, (s, d) :: l) + | _ => + match diff_dec d (fst (last b)) with + right _ => + (t, replace_last_s b, (s, d) :: ((d, T d) :: l)) + | left _ => (t, b, (s, d) :: l) + end + end + end + end. + +Definition stepf (S1 : State) : State := + match S1 with + (nil, nil, _) => S1 + | ((s, d) :: tl, nil, l) => + match Loc.eq s d with + left _ => (tl, nil, l) + | right _ => (tl, (s, d) :: nil, l) + end + | (t, (s, d) :: b, l) => + match split_move t d with + Some ((t1, r, t2)) => + (t1 ++ t2, (d, r) :: ((s, d) :: b), l) + | None => + match b with + nil => (t, nil, (s, d) :: l) + | _ => + match Loc.eq d (fst (last b)) with + left _ => + (t, replace_last_s b, (s, d) :: ((d, T d) :: l)) + | right _ => (t, b, (s, d) :: l) + end + end + end + end. + +Lemma rebuild_l: + forall (l : Moves) (m : Move), + m :: l = head_but_last (m :: l) ++ (last (m :: l) :: nil). +Proof. +induction l; simpl; auto. +intros m; rewrite (IHl a); auto. +Qed. + +Lemma splitSome: + forall (l t1 t2 : Moves) (s d r : Reg), + noOverlap (l ++ ((r, s) :: nil)) -> + split_move l s = Some (t1, d, t2) -> noRead t1 s. +Proof. +induction l; simpl. +intros; discriminate. +destruct a as [a1 a2]. +intros t1 t2 s d r Hno; case (Loc.eq a1 s). +intros e H1; inversion H1. +simpl; auto. +CaseEq (split_move l s). +intros; (repeat destruct p). +inversion H0; auto. +simpl; split; auto. +change (noOverlap (((a1, a2) :: l) ++ ((r, s) :: nil))) in Hno |-. +assert (noOverlap ((r, s) :: ((a1, a2) :: l))). +apply noOverlap_Front0; auto. +unfold noOverlap in H1 |-; simpl in H1 |-. +elim H1 with ( l0 := a1 ); + [intros H5 H6; (try clear H1); (try exact H5) | idtac]. +elim H5; [intros H1; (try clear H5); (try exact H1) | intros H1; (try clear H5)]. +absurd (a1 = s); auto. +apply Loc.diff_sym; auto. +right; left; trivial. +apply (IHl m0 m s r0 r); auto. +apply (noOverlap_pop (a1, a2)); auto. +intros; discriminate. +Qed. + +Lemma unsplit_move: + forall (l t1 t2 : Moves) (s d r : Reg), + noOverlap (l ++ ((r, s) :: nil)) -> + split_move l s = Some (t1, d, t2) -> l = t1 ++ ((s, d) :: t2). +Proof. +induction l. +simpl; intros; discriminate. +intros t1 t2 s d r HnoO; destruct a as [a1 a2]; simpl; case (diff_dec a1 s); + intro. +case (Loc.eq a1 s); intro. +absurd (Loc.diff a1 s); auto. +rewrite e; apply Loc.same_not_diff. +CaseEq (split_move l s); intros; (try discriminate). +(repeat destruct p); inversion H0. +rewrite app_cons; subst t2; subst d; rewrite (IHl m0 m s r0 r); auto. +apply (noOverlap_pop (a1, a2)); auto. +case (Loc.eq a1 s); intros e H; inversion H; simpl. +rewrite e; auto. +cut (noOverlap_aux a1 (getdst ((r, s) :: nil))). +intros [[H5|H4] H0]; [try exact H5 | idtac]. +absurd (s = a1); auto. +absurd (Loc.diff a1 s); auto; apply Loc.diff_sym; auto. +generalize HnoO; rewrite app_cons; intro. +assert (noOverlap (l ++ ((a1, a2) :: ((r, s) :: nil)))); + (try (apply noOverlap_insert; assumption)). +assert (noOverlap ((a1, a2) :: ((r, s) :: nil))). +apply (noOverlap_right l); auto. +generalize H2; unfold noOverlap; simpl. +intros H5; elim (H5 a1); [idtac | left; trivial]. +intros H6 [[H7|H8] H9]. +absurd (s = a1); auto. +split; [right; (try assumption) | auto]. +Qed. + +Lemma cons_replace: + forall (a : Move) (l : Moves), + l <> nil -> replace_last_s (a :: l) = a :: replace_last_s l. +Proof. +intros; simpl. +CaseEq l. +intro; contradiction. +intros m l0 H0; auto. +Qed. + +Lemma last_replace: + forall (l : Moves) (s d : Reg), + replace_last_s (l ++ ((s, d) :: nil)) = l ++ ((T s, d) :: nil). +Proof. +induction l; (try (simpl; auto; fail)). +intros; (repeat rewrite <- app_comm_cons). +rewrite cons_replace. +rewrite IHl; auto. +red; intro. +elim (app_eq_nil l ((s, d) :: nil)); auto; intros; discriminate. +Qed. + +Lemma last_app: forall (l : Moves) (m : Move), last (l ++ (m :: nil)) = m. +Proof. +induction l; simpl; auto. +intros m; CaseEq (l ++ (m :: nil)). +intro; elim (app_eq_nil l (m :: nil)); auto; intros; discriminate. +intros m0 l0 H; (rewrite <- H; apply IHl). +Qed. + +Lemma last_cons: + forall (l : Moves) (m m0 : Move), last (m0 :: (m :: l)) = last (m :: l). +Proof. +intros; simpl; auto. +Qed. + +Lemma stepf_popLoop: + forall (t b l : Moves) (s d r0 : Reg), + split_move t d = None -> + stepf (t, (s, d) :: (b ++ ((d, r0) :: nil)), l) = + (t, b ++ ((T d, r0) :: nil), (s, d) :: ((d, T d) :: l)). +Proof. +intros; simpl; rewrite H; CaseEq (b ++ ((d, r0) :: nil)); intros. +destruct b; discriminate. +rewrite <- H0; rewrite last_app; simpl; rewrite last_replace. +case (Loc.eq d d); intro; intuition. +destruct t; (try destruct m0); simpl; auto. +Qed. + +Lemma stepf_pop: + forall (t b l : Moves) (s d r r0 : Reg), + split_move t d = None -> + d <> r -> + stepf (t, (s, d) :: (b ++ ((r, r0) :: nil)), l) = + (t, b ++ ((r, r0) :: nil), (s, d) :: l). +Proof. +intros; simpl; rewrite H; CaseEq (b ++ ((r, r0) :: nil)); intros. +destruct b; discriminate. +rewrite <- H1; rewrite last_app; simpl. +case (Loc.eq d r); intro. +absurd (d = r); auto. +destruct t; (try destruct m0); simpl; auto. +Qed. + +Lemma noOverlap_head: + forall l1 l2 m, noOverlap (l1 ++ (m :: l2)) -> noOverlap (l1 ++ (m :: nil)). +Proof. +induction l2; simpl; auto. +intros; apply IHl2. +cut (l1 ++ (m :: (a :: l2)) = (l1 ++ (m :: nil)) ++ (a :: l2)); + [idtac | rewrite app_ass; auto]. +intros e; rewrite e in H. +cut (l1 ++ (m :: l2) = (l1 ++ (m :: nil)) ++ l2); + [idtac | rewrite app_ass; auto]. +intros e'; rewrite e'; auto. +apply noOverlap_Pop with a; auto. +Qed. + +Lemma splitNone: + forall (l : Moves) (s d : Reg), + split_move l d = None -> noOverlap (l ++ ((s, d) :: nil)) -> noRead l d. +Proof. +induction l; intros s d; simpl; auto. +destruct a as [a1 a2]; case (Loc.eq a1 d); intro; (try (intro; discriminate)). +CaseEq (split_move l d); intros. +(repeat destruct p); discriminate. +split; (try assumption). +change (noOverlap (((a1, a2) :: l) ++ ((s, d) :: nil))) in H1 |-. +assert (noOverlap ((s, d) :: ((a1, a2) :: l))). +apply noOverlap_Front0; auto. +assert (noOverlap ((a1, a2) :: ((s, d) :: l))). +apply noOverlap_swap; auto. +unfold noOverlap in H3 |-; simpl in H3 |-. +elim H3 with ( l0 := a1 ); + [intros H5 H6; (try clear H1); (try exact H5) | idtac]. +elim H6; + [intros H1 H4; elim H1; + [intros H7; (try clear H1 H6); (try exact H7) | intros H7; (try clear H1 H6)]]. +absurd (a1 = d); auto. +apply Loc.diff_sym; auto. +left; trivial. +apply IHl with s; auto. +apply noOverlap_pop with (a1, a2); auto. +Qed. + +Lemma noO_diff: + forall l1 l2 s d r r0, + noOverlap (l1 ++ ((s, d) :: (l2 ++ ((r, r0) :: nil)))) -> + r = d \/ Loc.diff d r. +Proof. +intros. +assert (noOverlap ((s, d) :: (l2 ++ ((r, r0) :: nil)))); auto. +apply (noOverlap_right l1); auto. +assert (noOverlap ((l2 ++ ((r, r0) :: nil)) ++ ((s, d) :: nil))); auto. +apply (noOverlap_movBack0 (l2 ++ ((r, r0) :: nil))); auto. +assert + ((l2 ++ ((r, r0) :: nil)) ++ ((s, d) :: nil) = + l2 ++ (((r, r0) :: nil) ++ ((s, d) :: nil))); auto. +rewrite app_ass; auto. +rewrite H2 in H1. +simpl in H1 |-. +assert (noOverlap ((r, r0) :: ((s, d) :: nil))); auto. +apply (noOverlap_right l2); auto. +unfold noOverlap in H3 |-. +generalize (H3 r); simpl. +intros H4; elim H4; intros; [idtac | left; trivial]. +elim H6; intros [H9|H9] H10; [left | right]; auto. +Qed. + +Lemma f2ind: + forall (S1 S2 : State), + (forall (l : Moves), (S1 <> (nil, nil, l))) -> + noOverlap (StateToMove S1 ++ StateBeing S1) -> stepf S1 = S2 -> dstep S1 S2. +Proof. +intros S1 S2 Hneq HnoO; destruct S1 as [[t b] l]; destruct b. +destruct t. +elim (Hneq l); auto. +destruct m; simpl; case (Loc.eq r r0). +intros. +rewrite e; rewrite <- H; apply dstep_nop. +intros n H; rewrite <- H; generalize (dstep_start t l r r0); auto. +intros H; rewrite <- H; destruct m as [s d]. +CaseEq (split_move t d). +intros p H0; destruct p as [[t1 s0] t2]; simpl; rewrite H0; destruct t; simpl. +simpl in H0 |-; discriminate. +rewrite (unsplit_move (m :: t) t1 t2 d s0 s); auto. +destruct m; generalize dstep_push; intros H1; apply H1. +unfold StateToMove, StateBeing in HnoO |-. +apply (splitSome ((r, r0) :: t) t1 t2 d s0 s); auto. +apply noOverlap_head with b; auto. +unfold StateToMove, StateBeing in HnoO |-. +apply noOverlap_head with b; auto. +intros H0; destruct b. +simpl. +rewrite H0. +destruct t; (try destruct m); generalize dstep_last; intros H1; apply H1. +simpl; auto. +unfold StateToMove, StateBeing in HnoO |-. +apply splitNone with s; auto. +unfold StateToMove, StateBeing in HnoO |-. +generalize HnoO; clear HnoO; rewrite (rebuild_l b m); intros HnoO. +destruct (last (m :: b)). +case (Loc.eq d r). +intros e; rewrite <- e. +CaseEq (head_but_last (m :: b)); intros; [simpl | idtac]; + (try + (destruct t; (try destruct m0); rewrite H0; + (case (Loc.eq d d); intros h; (try (elim h; auto))))). +generalize (dstep_pop_loop nil nil); simpl; intros H3; apply H3; auto. +generalize (dstep_pop_loop ((r1, r2) :: t) nil); unfold T; simpl app; + intros H3; apply H3; clear H3; apply splitNone with s; (try assumption). +apply noOverlap_head with (head_but_last (m :: b) ++ ((r, r0) :: nil)); auto. +rewrite stepf_popLoop; auto. +generalize (dstep_pop_loop t (m0 :: l0)); simpl; intros H3; apply H3; clear H3; + apply splitNone with s; (try assumption). +apply noOverlap_head with (head_but_last (m :: b) ++ ((r, r0) :: nil)); auto. +intro; assert (Loc.diff d r). +assert (r = d \/ Loc.diff d r). +apply (noO_diff t (head_but_last (m :: b)) s d r r0); auto. +elim H1; [intros H2; absurd (d = r); auto | intros H2; auto]. +rewrite stepf_pop; auto. +generalize (dstep_pop t (head_but_last (m :: b))); intros H3; apply H3; auto. +clear H3; apply splitNone with s; (try assumption). +apply noOverlap_head with (head_but_last (m :: b) ++ ((r, r0) :: nil)); auto. +Qed. + +Lemma f2ind': + forall (S1 : State), + (forall (l : Moves), (S1 <> (nil, nil, l))) -> + noOverlap (StateToMove S1 ++ StateBeing S1) -> dstep S1 (stepf S1). +Proof. +intros S1 H noO; apply f2ind; auto. +Qed. + +Lemma appcons_length: + forall (l1 l2 : Moves) (m : Move), + length (l1 ++ (m :: l2)) = (length (l1 ++ l2) + 1%nat)%nat. +Proof. +induction l1; simpl; intros; [omega | idtac]. +rewrite IHl1; omega. +Qed. + +Definition mesure (S0 : State) : nat := + let (p, _) := S0 in let (t, b) := p in (2 * length t + length b)%nat. + +Lemma step_dec0: + forall (t1 t2 b1 b2 : Moves) (l1 l2 : Moves), + dstep (t1, b1, l1) (t2, b2, l2) -> + (2 * length t2 + length b2 < 2 * length t1 + length b1)%nat. +Proof. +intros t1 t2 b1 b2 l1 l2 H; inversion H; simpl; (try omega). +rewrite appcons_length; omega. +cut (length (b ++ ((T r0, d) :: nil)) = length (b ++ ((r0, d) :: nil))); + (try omega). +induction b; simpl; auto. +(repeat rewrite appcons_length); auto. +Qed. + +Lemma step_dec: + forall (S1 S2 : State), dstep S1 S2 -> (mesure S2 < mesure S1)%nat. +Proof. +unfold mesure; destruct S1 as [[t1 b1] l1]; destruct S2 as [[t2 b2] l2]. +intro; apply (step_dec0 t1 t2 b1 b2 l1 l2); trivial. +Qed. + +Lemma stepf_dec0: + forall (S1 S2 : State), + (forall (l : Moves), (S1 <> (nil, nil, l))) /\ + (S2 = stepf S1 /\ noOverlap (StateToMove S1 ++ StateBeing S1)) -> + (mesure S2 < mesure S1)%nat. +Proof. +intros S1 S2 [H1 [H2 H3]]; apply step_dec. +apply f2ind; trivial. +rewrite H2; reflexivity. +Qed. + +Lemma stepf_dec: + forall (S1 S2 : State), + S2 = stepf S1 /\ + ((forall (l : Moves), (S1 <> (nil, nil, l))) /\ + noOverlap (StateToMove S1 ++ StateBeing S1)) -> ltof _ mesure S2 S1. +Proof. +unfold ltof. +intros S1 S2 [H1 [H2 H3]]; apply step_dec. +apply f2ind; trivial. +rewrite H1; reflexivity. +Qed. + +Lemma replace_last_id: + forall l m m0, replace_last_s (m :: (m0 :: l)) = m :: replace_last_s (m0 :: l). +Proof. +intros; case l; simpl. +destruct m0; simpl; auto. +intros; case l0; auto. +Qed. + +Lemma length_replace: forall l, length (replace_last_s l) = length l. +Proof. +induction l; simpl; auto. +destruct l; destruct a; simpl; auto. +Qed. + +Lemma length_app: + forall (A : Set) (l1 l2 : list A), + (length (l1 ++ l2) = length l1 + length l2)%nat. +Proof. +intros A l1 l2; (try assumption). +induction l1; simpl; auto. +Qed. + +Lemma split_length: + forall (l t1 t2 : Moves) (s d : Reg), + split_move l s = Some (t1, d, t2) -> + (length l = (length t1 + length t2) + 1)%nat. +Proof. +induction l. +intros; discriminate. +intros t1 t2 s d; destruct a as [r r0]; simpl; case (Loc.eq r s); intro. +intros H; inversion H. +simpl; omega. +CaseEq (split_move l s); (try (intros; discriminate)). +(repeat destruct p); intros H H0; inversion H0. +rewrite H2; rewrite (IHl m0 m s r1); auto. +rewrite H4; rewrite <- H2; simpl; omega. +Qed. + +Lemma stepf_dec0': + forall (S1 : State), + (forall (l : Moves), (S1 <> (nil, nil, l))) -> + (mesure (stepf S1) < mesure S1)%nat. +Proof. +intros S1 H. +unfold mesure; destruct S1 as [[t1 b1] l1]. +destruct t1. +destruct b1. +generalize (H l1); intros H1; elim H1; auto. +destruct m; simpl. +destruct b1. +simpl; auto. +case (Loc.eq r0 (fst (last (m :: b1)))). +intros; rewrite length_replace; simpl; omega. +simpl; case b1; intros; simpl; omega. +destruct m. +destruct b1. +simpl. +case (Loc.eq r r0); intros; simpl; omega. +destruct m; simpl; case (Loc.eq r r2). +intros; simpl; omega. +CaseEq (split_move t1 r2); intros. +destruct p; destruct p; simpl. +rewrite (split_length t1 m0 m r2 r3); auto. +rewrite length_app; auto. +omega. +destruct b1. +simpl; omega. +case (Loc.eq r2 (fst (last (m :: b1)))); intros. +rewrite length_replace; simpl; omega. +simpl; omega. +Qed. + +Lemma stepf1_dec: + forall (S1 S2 : State), + (forall (l : Moves), (S1 <> (nil, nil, l))) -> + S2 = stepf S1 -> ltof _ mesure S2 S1. +Proof. +unfold ltof; intros S1 S2 H H0; rewrite H0. +apply stepf_dec0'; (try assumption). +Qed. + +Lemma disc1: + forall (a : Move) (l1 l2 l3 l4 : list Move), + ((a :: l1, l2, l3) <> (nil, nil, l4)). +Proof. +intros; discriminate. +Qed. + +Lemma disc2: + forall (a : Move) (l1 l2 l3 l4 : list Move), + ((l1, a :: l2, l3) <> (nil, nil, l4)). +Proof. +intros; discriminate. +Qed. +Hint Resolve disc1 disc2 . + +Lemma sameExec_reflexive: forall (r : State), sameExec r r. +Proof. +intros r; unfold sameExec, sameEnv, exec. +destruct r as [[t b] d]; trivial. +Qed. + +Definition base_case_Pmov_dec: + forall (s : State), + ({ exists l : list Move , s = (nil, nil, l) }) + + ({ forall l, (s <> (nil, nil, l)) }). +Proof. +destruct s as [[[|x tl] [|y tl']] l]; (try (right; intro; discriminate)). +left; exists l; auto. +Defined. + +Definition Pmov := + Fix + (well_founded_ltof _ mesure) (fun _ => State) + (fun (S1 : State) => + fun (Pmov : forall x, ltof _ mesure x S1 -> State) => + match base_case_Pmov_dec S1 with + left h => S1 + | right h => Pmov (stepf S1) (stepf_dec0' S1 h) end). + +Theorem Pmov_equation: forall S1, Pmov S1 = match S1 with + ((nil, nil), _) => S1 + | _ => Pmov (stepf S1) + end. +Proof. +intros S1; unfold Pmov at 1; + rewrite (Fix_eq + (well_founded_ltof _ mesure) (fun _ => State) + (fun (S1 : State) => + fun (Pmov : forall x, ltof _ mesure x S1 -> State) => + match base_case_Pmov_dec S1 with + left h => S1 + | right h => Pmov (stepf S1) (stepf_dec0' S1 h) end)). +fold Pmov. +destruct S1 as [[[|x tl] [|y tl']] l]; + match goal with + | |- match ?a with left _ => _ | right _ => _ end = _ => case a end; + (try (intros [l0 Heq]; discriminate Heq)); auto. +intros H; elim (H l); auto. +intros x f g Hfg_ext. +match goal with +| |- match ?a with left _ => _ | right _ => _ end = _ => case a end; auto. +Qed. + +Lemma sameExec_transitive: + forall (r1 r2 r3 : State), + (forall r, + In r (getdst (StateToMove r2 ++ StateBeing r2)) -> + In r (getdst (StateToMove r1 ++ StateBeing r1))) -> + (forall r, + In r (getdst (StateToMove r3 ++ StateBeing r3)) -> + In r (getdst (StateToMove r2 ++ StateBeing r2))) -> + sameExec r1 r2 -> sameExec r2 r3 -> sameExec r1 r3. +Proof. +intros r1 r2 r3; unfold sameExec, exec; (repeat rewrite getdst_app). +destruct r1 as [[t1 b1] d1]; destruct r2 as [[t2 b2] d2]; + destruct r3 as [[t3 b3] d3]; simpl. +intros Hin; intros. +rewrite H0; auto. +rewrite H1; auto. +intros. +apply (H3 x). +apply in_or_app; auto. +elim (in_app_or (getdst t2 ++ getdst b2) (getdst t3 ++ getdst b3) x); auto. +intros. +apply (H3 x). +apply in_or_app; auto. +elim (in_app_or (getdst t1 ++ getdst b1) (getdst t2 ++ getdst b2) x); auto. +Qed. + +Lemma dstep_inv_getdst: + forall (s1 s2 : State) r, + dstep s1 s2 -> + In r (getdst (StateToMove s2 ++ StateBeing s2)) -> + In r (getdst (StateToMove s1 ++ StateBeing s1)). +intros s1 s2 r STEP; inversion_clear STEP; + unfold StateToMove, StateBeing, StateDone; (repeat rewrite app_nil); + (repeat (rewrite getdst_app; simpl)); intro; auto. +Proof. +right; (try assumption). +elim (in_app_or (getdst t) (d :: nil) r); auto; (simpl; intros [H1|H1]); + [left; assumption | inversion H1]. +elim (in_app_or (getdst t1 ++ getdst t2) (r0 :: (d :: getdst b)) r); auto; + (simpl; intros). +elim (in_app_or (getdst t1) (getdst t2) r); auto; (simpl; intros). +apply in_or_app; left; apply in_or_app; left; assumption. +apply in_or_app; left; apply in_or_app; right; simpl; right; assumption. +elim H1; [intros H2 | intros [H2|H2]]. +apply in_or_app; left; apply in_or_app; right; simpl; left; auto. +apply in_or_app; right; simpl; left; auto. +apply in_or_app; right; simpl; right; assumption. +elim (in_app_or (getdst t) (getdst b ++ (d :: nil)) r); auto; (simpl; intros). +apply in_or_app; left; assumption. +elim (in_app_or (getdst b) (d :: nil) r); auto; (simpl; intros). +apply in_or_app; right; simpl; right; apply in_or_app; left; assumption. +elim H2; [intros H3 | intros H3; inversion H3]. +apply in_or_app; right; simpl; right; apply in_or_app; right; simpl; auto. +elim (in_app_or (getdst t) (getdst b ++ (d0 :: nil)) r); auto; (simpl; intros). +apply in_or_app; left; assumption. +elim (in_app_or (getdst b) (d0 :: nil) r); auto; simpl; + [intros H3 | intros [H3|H3]; [idtac | inversion H3]]. +apply in_or_app; right; simpl; right; apply in_or_app; left; assumption. +apply in_or_app; right; simpl; right; apply in_or_app; right; simpl; auto. +apply in_or_app; left; assumption. +Qed. + +Theorem STM_Pmov: forall (S1 : State), StateToMove (Pmov S1) = nil. +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1; intros Hrec; destruct S1 as [[t b] d]; + rewrite Pmov_equation; destruct t. +destruct b; auto. +apply Hrec; apply stepf1_dec; auto. +apply Hrec; apply stepf1_dec; auto. +Qed. + +Theorem SB_Pmov: forall (S1 : State), StateBeing (Pmov S1) = nil. +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1; intros Hrec; destruct S1 as [[t b] d]; + rewrite Pmov_equation; destruct t. +destruct b; auto. +apply Hrec; apply stepf1_dec; auto. +apply Hrec; apply stepf1_dec; auto. +Qed. + +Theorem Fpmov_correct: + forall (S1 : State), stepInv S1 -> sameExec S1 (Pmov S1). +Proof. +intros S1; elim S1 using (well_founded_ind (Wf_nat.well_founded_ltof _ mesure)). +clear S1; intros S1; intros Hrec Hinv; rewrite Pmov_equation; + destruct S1 as [[t b] d]. +assert + (forall (r : Reg) S1, + In r (getdst (StateToMove (Pmov (stepf S1)) ++ StateBeing (Pmov (stepf S1)))) -> + In r (getdst (StateToMove (stepf S1) ++ StateBeing (stepf S1)))). +intros r S1; rewrite (STM_Pmov (stepf S1)); rewrite SB_Pmov; simpl; intros. +inversion H. +destruct t. +destruct b. +apply sameExec_reflexive. +set (S1:=(nil (A:=Move), m :: b, d)). +assert (dstep S1 (stepf S1)); (try apply f2ind); unfold S1; auto. +elim Hinv; intros Hpath [SD [NO NT]]; assumption. +apply sameExec_transitive with (stepf S1); auto. +intros r; apply dstep_inv_getdst; auto. +apply dstepp_sameExec; auto; apply dstepp_trans with (stepf S1); auto. +apply dstepp_refl; auto. +apply Hrec; auto. +unfold ltof; apply step_dec; assumption. +apply (dstep_inv S1); assumption. +set (S1:=(m :: t, b, d)). +assert (dstep S1 (stepf S1)); (try apply f2ind); unfold S1; auto. +elim Hinv; intros Hpath [SD [NO NT]]; assumption. +apply sameExec_transitive with (stepf S1); auto. +intros r; apply dstep_inv_getdst; auto. +apply dstepp_sameExec; auto; apply dstepp_trans with (stepf S1); auto. +apply dstepp_refl; auto. +apply Hrec; auto. +unfold ltof; apply step_dec; assumption. +apply (dstep_inv S1); assumption. +Qed. + +Definition P_move := fun (p : Moves) => StateDone (Pmov (p, nil, nil)). + +Definition Sexec := sexec. + +Definition Get := get. + +Fixpoint listsLoc2Moves (src dst : list loc) {struct src} : Moves := + match src with + nil => nil + | s :: srcs => + match dst with + nil => nil + | d :: dsts => (s, d) :: listsLoc2Moves srcs dsts + end + end. + +Definition no_overlap (l1 l2 : list loc) := + forall r, In r l1 -> forall s, In s l2 -> r = s \/ Loc.diff r s. + +Definition no_overlap_state (S : State) := + no_overlap + (getsrc (StateToMove S ++ StateBeing S)) + (getdst (StateToMove S ++ StateBeing S)). + +Definition no_overlap_list := fun l => no_overlap (getsrc l) (getdst l). + +Lemma Indst_noOverlap_aux: + forall l1 l, + (forall (s : Reg), In s (getdst l1) -> l = s \/ Loc.diff l s) -> + noOverlap_aux l (getdst l1). +Proof. +intros; induction l1; simpl; auto. +destruct a as [a1 a2]; simpl; split. +elim (H a2); (try intros H0). +left; auto. +right; apply Loc.diff_sym; auto. +simpl; left; trivial. +apply IHl1; intros. +apply H; simpl; right; (try assumption). +Qed. + +Lemma no_overlap_noOverlap: + forall r, no_overlap_state r -> noOverlap (StateToMove r ++ StateBeing r). +Proof. +intros r; unfold noOverlap, no_overlap_state. +set (l1:=StateToMove r ++ StateBeing r). +unfold no_overlap; intros H l H0. +apply Indst_noOverlap_aux; intros; apply H; auto. +Qed. + +Theorem Fpmov_correctMoves: + forall p e r, + simpleDest p -> + no_overlap_list p -> + noTmp p -> + notemporary r -> + (forall (x : Reg), In x (getdst p) -> r = x \/ Loc.diff r x) -> + get (pexec p e) r = get (sexec (StateDone (Pmov (p, nil, nil))) e) r. +Proof. +intros p e r SD no_O notmp notempo. +generalize (Fpmov_correct (p, nil, nil)); unfold sameExec, exec; simpl; + rewrite SB_Pmov; rewrite STM_Pmov; simpl. +(repeat rewrite app_nil); intro. +apply H; auto. +unfold stepInv; simpl; (repeat split); (try (rewrite app_nil; assumption)); auto. +generalize (no_overlap_noOverlap (p, nil, nil)); simpl; intros; auto. +apply H0; auto; unfold no_overlap_list in H0 |-. +unfold no_overlap_state; simpl; (repeat rewrite app_nil); auto. +Qed. + +Theorem Fpmov_correct1: + forall (p : Moves) (e : Env) (r : Reg), + simpleDest p -> + no_overlap_list p -> + noTmp p -> + notemporary r -> + (forall (x : Reg), In x (getdst p) -> r = x \/ Loc.diff r x) -> + noWrite p r -> get e r = get (sexec (StateDone (Pmov (p, nil, nil))) e) r. +Proof. +intros p e r Hsd Hno_O HnoTmp Hrnotempo Hrno_Overlap Hnw. +rewrite <- (Fpmov_correctMoves p e); (try assumption). +destruct p; auto. +destruct m as [m1 m2]; simpl; case (Loc.eq m2 r); intros. +elim Hnw; intros; absurd (Loc.diff m2 r); auto. +rewrite e0; apply Loc.same_not_diff. +elim Hnw; intros H1 H2. +rewrite get_update_diff; (try assumption). +apply get_noWrite; (try assumption). +Qed. + +Lemma In_SD_diff: + forall (s d a1 a2 : Reg) (p : Moves), + In (s, d) p -> simpleDest ((a1, a2) :: p) -> Loc.diff a2 d. +Proof. +intros; induction p. +inversion H. +elim H; auto. +intro; subst a; elim H0; intros H1 H2; elim H1; intros; apply Loc.diff_sym; + assumption. +intro; apply IHp; auto. +apply simpleDest_pop2 with a; (try assumption). +Qed. + +Theorem pexec_correct: + forall (e : Env) (m : Move) (p : Moves), + In m p -> simpleDest p -> (let (s, d) := m in get (pexec p e) d = get e s). +Proof. +induction p; intros. +elim H. +destruct m. +elim (in_inv H); intro. +rewrite H1; simpl; rewrite get_update_id; auto. +destruct a as [a1 a2]; simpl. +rewrite get_update_diff. +apply IHp; auto. +apply (simpleDest_pop (a1, a2)); (try assumption). +apply (In_SD_diff r) with ( p := p ) ( a1 := a1 ); auto. +Qed. + +Lemma In_noTmp_notempo: + forall (s d : Reg) (p : Moves), In (s, d) p -> noTmp p -> notemporary d. +Proof. +intros; unfold notemporary; induction p. +inversion H. +elim H; intro. +subst a; elim H0; intros H1 [H3 H2]; (try assumption). +intro; apply IHp; auto. +destruct a; elim H0; intros _ [H2 H3]; (try assumption). +Qed. + +Lemma In_Indst: forall s d p, In (s, d) p -> In d (getdst p). +Proof. +intros; induction p; auto. +destruct a; simpl. +elim H; intro. +left; inversion H0; trivial. +right; apply IHp; auto. +Qed. + +Lemma In_SD_diff': + forall (d a1 a2 : Reg) (p : Moves), + In d (getdst p) -> simpleDest ((a1, a2) :: p) -> Loc.diff a2 d. +Proof. +intros d a1 a2 p H H0; induction p. +inversion H. +destruct a; elim H. +elim H0; simpl; intros. +subst r0. +elim H1; intros H3 H4; apply Loc.diff_sym; assumption. +intro; apply IHp; (try assumption). +apply simpleDest_pop2 with (r, r0); (try assumption). +Qed. + +Lemma In_SD_no_o: + forall (s d : Reg) (p : Moves), + In (s, d) p -> + simpleDest p -> forall (x : Reg), In x (getdst p) -> d = x \/ Loc.diff d x. +Proof. +intros s d p Hin Hsd; induction p. +inversion Hin. +destruct a as [a1 a2]; elim Hin; intros. +inversion H; subst d; subst s. +elim H0; intros H1; [left | right]; (try assumption). +apply (In_SD_diff' x a1 a2 p); auto. +elim H0. +intro; subst x. +right; apply Loc.diff_sym; apply (In_SD_diff s d a1 a2 p); auto. +intro; apply IHp; auto. +apply (simpleDest_pop (a1, a2)); assumption. +Qed. + +Lemma getdst_map: forall p, getdst p = map (fun x => snd x) p. +Proof. +induction p. +simpl; auto. +destruct a; simpl. +rewrite IHp; auto. +Qed. + +Lemma getsrc_map: forall p, getsrc p = map (fun x => fst x) p. +Proof. +induction p. +simpl; auto. +destruct a; simpl. +rewrite IHp; auto. +Qed. + +Theorem Fpmov_correct2: + forall (p : Moves) (e : Env) (m : Move), + In m p -> + simpleDest p -> + no_overlap_list p -> + noTmp p -> + (let (s, d) := m in get (sexec (StateDone (Pmov (p, nil, nil))) e) d = get e s). +Proof. +intros p e m Hin Hsd Hno_O HnoTmp; destruct m as [s d]; + generalize (Fpmov_correctMoves p e); intros. +rewrite <- H; auto. +apply pexec_correct with ( m := (s, d) ); auto. +apply (In_noTmp_notempo s d p); auto. +apply (In_SD_no_o s d p Hin Hsd). +Qed. + +Lemma notindst_nW: forall a p, Loc.notin a (getdst p) -> noWrite p a. +Proof. +induction p; simpl; auto. +destruct a0 as [a1 a2]. +simpl. +intros H; elim H; intro; split. +apply Loc.diff_sym; (try assumption). +apply IHp; auto. +Qed. + +Lemma disjoint_tmp__noTmp: + forall p, + Loc.disjoint (getsrc p) temporaries -> + Loc.disjoint (getdst p) temporaries -> noTmp p. +Proof. +induction p; simpl; auto. +destruct a as [a1 a2]; simpl getsrc; simpl getdst; unfold Loc.disjoint; intros; + (repeat split). +intro; unfold T; case (Loc.type r); apply H; (try (left; trivial; fail)). +right; left; trivial. +right; right; right; right; left; trivial. +intro; unfold T; case (Loc.type r); apply H0; (try (left; trivial; fail)). +right; left; trivial. +right; right; right; right; left; trivial. +apply IHp. +apply Loc.disjoint_cons_left with a1; auto. +apply Loc.disjoint_cons_left with a2; auto. +Qed. + +Theorem Fpmov_correct_IT3: + forall p rs, + simpleDest p -> + no_overlap_list p -> + Loc.disjoint (getsrc p) temporaries -> + Loc.disjoint (getdst p) temporaries -> + (sexec (StateDone (Pmov (p, nil, nil))) rs) (R IT3) = rs (R IT3). +Proof. +intros p rs Hsd Hno_O Hdistmpsrc Hdistmpdst. +generalize (Fpmov_correctMoves p rs); unfold get, Locmap.get; intros H2. +rewrite <- H2; auto. +generalize (get_noWrite p (R IT3)); unfold get, Locmap.get; intros. +rewrite <- H; auto. +apply notindst_nW. +apply (Loc.disjoint_notin temporaries). +apply Loc.disjoint_sym; auto. +right; right; left; trivial. +apply disjoint_tmp__noTmp; auto. +unfold notemporary, T. +intros x; case (Loc.type x); simpl; intro; discriminate. +intros x H; right; apply Loc.in_notin_diff with (getdst p); auto. +apply Loc.disjoint_notin with temporaries; auto. +apply Loc.disjoint_sym; auto. +right; right; left; trivial. +Qed. + +Theorem Fpmov_correct_map: + forall p rs, + simpleDest p -> + no_overlap_list p -> + Loc.disjoint (getsrc p) temporaries -> + Loc.disjoint (getdst p) temporaries -> + List.map (sexec (StateDone (Pmov (p, nil, nil))) rs) (getdst p) = + List.map rs (getsrc p). +Proof. +intros; rewrite getsrc_map; rewrite getdst_map; rewrite list_map_compose; + rewrite list_map_compose; apply list_map_exten; intros. +generalize (Fpmov_correct2 p rs x). +destruct x; simpl. +unfold get, Locmap.get; intros; auto. +rewrite H4; auto. +apply disjoint_tmp__noTmp; auto. +Qed. + +Theorem Fpmov_correct_ext: + forall p rs, + simpleDest p -> + no_overlap_list p -> + Loc.disjoint (getsrc p) temporaries -> + Loc.disjoint (getdst p) temporaries -> + forall l, + Loc.notin l (getdst p) -> + Loc.notin l temporaries -> + (sexec (StateDone (Pmov (p, nil, nil))) rs) l = rs l. +Proof. +intros; generalize (Fpmov_correct1 p rs l); unfold get, Locmap.get; intros. +rewrite <- H5; auto. +apply disjoint_tmp__noTmp; auto. +unfold notemporary; simpl in H4 |-; unfold T; intros x; case (Loc.type x). +elim H4; + [intros H6 H7; elim H7; [intros H8 H9; (try clear H7 H4); (try exact H8)]]. +elim H4; + [intros H6 H7; elim H7; + [intros H8 H9; elim H9; + [intros H10 H11; elim H11; + [intros H12 H13; elim H13; + [intros H14 H15; (try clear H13 H11 H9 H7 H4); (try exact H14)]]]]]. +unfold no_overlap_list, no_overlap in H0 |-; intros. +case (Loc.eq l x). +intros e; left; (try assumption). +intros n; right; (try assumption). +apply Loc.in_notin_diff with (getdst p); auto. +apply notindst_nW; auto. +Qed. diff --git a/backend/RTL.v b/backend/RTL.v new file mode 100644 index 00000000..ac9a4159 --- /dev/null +++ b/backend/RTL.v @@ -0,0 +1,349 @@ +(** The RTL intermediate language: abstract syntax and semantics. + + RTL (``Register Transfer Language'' is the first intermediate language + after Cminor. +*) + +Require Import Relations. +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Registers. + +(** * Abstract syntax *) + +(** RTL is organized as instructions, functions and programs. + Instructions correspond roughly to elementary instructions of the + target processor, but take their arguments and leave their results + in pseudo-registers (also called temporaries in textbooks). + Infinitely many pseudo-registers are available, and each function + has its own set of pseudo-registers, unaffected by function calls. + + Instructions are organized as a control-flow graph: a function is + a finite map from ``nodes'' (abstract program points) to instructions, + and each instruction lists explicitly the nodes of its successors. +*) + +Definition node := positive. + +Inductive instruction: Set := + | Inop: node -> instruction + (** No operation -- just branch to the successor. *) + | Iop: operation -> list reg -> reg -> node -> instruction + (** [Iop op args dest succ] performs the arithmetic operation [op] + over the values of registers [args], stores the result in [dest], + and branches to [succ]. *) + | Iload: memory_chunk -> addressing -> list reg -> reg -> node -> instruction + (** [Iload chunk addr args dest succ] loads a [chunk] quantity from + the address determined by the addressing mode [addr] and the + values of the [args] registers, stores the quantity just read + into [dest], and branches to [succ]. *) + | Istore: memory_chunk -> addressing -> list reg -> reg -> node -> instruction + (** [Istore chunk addr args src succ] stores the value of register + [src] in the [chunk] quantity at the + the address determined by the addressing mode [addr] and the + values of the [args] registers, then branches to [succ]. *) + | Icall: signature -> reg + ident -> list reg -> reg -> node -> instruction + (** [Icall sig fn args dest succ] invokes the function determined by + [fn] (either a function pointer found in a register or a + function name), giving it the values of registers [args] + as arguments. It stores the return value in [dest] and branches + to [succ]. *) + | Icond: condition -> list reg -> node -> node -> instruction + (** [Icond cond args ifso ifnot] evaluates the boolean condition + [cond] over the values of registers [args]. If the condition + is true, it transitions to [ifso]. If the condition is false, + it transitions to [ifnot]. *) + | Ireturn: option reg -> instruction. + (** [Ireturn] terminates the execution of the current function + (it has no successor). It returns the value of the given + register, or [Vundef] if none is given. *) + +Definition code: Set := PTree.t instruction. + +Record function: Set := mkfunction { + fn_sig: signature; + fn_params: list reg; + fn_stacksize: Z; + fn_code: code; + fn_entrypoint: node; + fn_nextpc: node; + fn_code_wf: forall (pc: node), Plt pc fn_nextpc \/ fn_code!pc = None +}. + +(** A function description comprises a control-flow graph (CFG) [fn_code] + (a partial finite mapping from nodes to instructions). As in Cminor, + [fn_sig] is the function signature and [fn_stacksize] the number of bytes + for its stack-allocated activation record. [fn_params] is the list + of registers that are bound to the values of arguments at call time. + [fn_entrypoint] is the node of the first instruction of the function + in the CFG. [fn_code_wf] asserts that all instructions of the function + have nodes no greater than [fn_nextpc]. *) + +Definition program := AST.program function. + +(** * Operational semantics *) + +Definition genv := Genv.t function. +Definition regset := Regmap.t val. + +Fixpoint init_regs (vl: list val) (rl: list reg) {struct rl} : regset := + match rl, vl with + | r1 :: rs, v1 :: vs => Regmap.set r1 v1 (init_regs vs rs) + | _, _ => Regmap.init Vundef + end. + +Section RELSEM. + +Variable ge: genv. + +Definition find_function (ros: reg + ident) (rs: regset) : option function := + match ros with + | inl r => Genv.find_funct ge rs#r + | inr symb => + match Genv.find_symbol ge symb with + | None => None + | Some b => Genv.find_funct_ptr ge b + end + end. + +(** The dynamic semantics of RTL is a combination of small-step (transition) + semantics and big-step semantics. Execution of an instruction performs + a single transition to the next instruction (small-step), except if + the instruction is a function call. In this case, the whole body of + the called function is executed at once and the transition terminates + on the instruction immediately following the call in the caller function. + Such ``mixed-step'' semantics is convenient for reasoning over + intra-procedural analyses and transformations. It also dispenses us + from making the call stack explicit in the semantics. + + The semantics is organized in three mutually inductive predicates. + The first is [exec_instr ge c sp pc rs m pc' rs' m']. [ge] is the + global environment (see module [Genv]), [c] the CFG for the current + function, and [sp] the pointer to the stack block for its + current activation (as in Cminor). [pc], [rs] and [m] is the + initial state of the transition: program point (CFG node) [pc], + register state (mapping of pseudo-registers to values) [rs], + and memory state [m]. The final state is [pc'], [rs'] and [m']. *) + +Inductive exec_instr: code -> val -> + node -> regset -> mem -> + node -> regset -> mem -> Prop := + | exec_Inop: + forall c sp pc rs m pc', + c!pc = Some(Inop pc') -> + exec_instr c sp pc rs m pc' rs m + | exec_Iop: + forall c sp pc rs m op args res pc' v, + c!pc = Some(Iop op args res pc') -> + eval_operation ge sp op rs##args = Some v -> + exec_instr c sp pc rs m pc' (rs#res <- v) m + | exec_Iload: + forall c sp pc rs m chunk addr args dst pc' a v, + c!pc = Some(Iload chunk addr args dst pc') -> + eval_addressing ge sp addr rs##args = Some a -> + Mem.loadv chunk m a = Some v -> + exec_instr c sp pc rs m pc' (rs#dst <- v) m + | exec_Istore: + forall c sp pc rs m chunk addr args src pc' a m', + c!pc = Some(Istore chunk addr args src pc') -> + eval_addressing ge sp addr rs##args = Some a -> + Mem.storev chunk m a rs#src = Some m' -> + exec_instr c sp pc rs m pc' rs m' + | exec_Icall: + forall c sp pc rs m sig ros args res pc' f vres m', + c!pc = Some(Icall sig ros args res pc') -> + find_function ros rs = Some f -> + sig = f.(fn_sig) -> + exec_function f rs##args m vres m' -> + exec_instr c sp pc rs m pc' (rs#res <- vres) m' + | exec_Icond_true: + forall c sp pc rs m cond args ifso ifnot, + c!pc = Some(Icond cond args ifso ifnot) -> + eval_condition cond rs##args = Some true -> + exec_instr c sp pc rs m ifso rs m + | exec_Icond_false: + forall c sp pc rs m cond args ifso ifnot, + c!pc = Some(Icond cond args ifso ifnot) -> + eval_condition cond rs##args = Some false -> + exec_instr c sp pc rs m ifnot rs m + +(** [exec_instrs ge c sp pc rs m pc' rs' m'] is the reflexive + transitive closure of [exec_instr]. It corresponds to the execution + of zero, one or finitely many transitions. *) + +with exec_instrs: code -> val -> + node -> regset -> mem -> + node -> regset -> mem -> Prop := + | exec_refl: + forall c sp pc rs m, + exec_instrs c sp pc rs m pc rs m + | exec_one: + forall c sp pc rs m pc' rs' m', + exec_instr c sp pc rs m pc' rs' m' -> + exec_instrs c sp pc rs m pc' rs' m' + | exec_trans: + forall c sp pc1 rs1 m1 pc2 rs2 m2 pc3 rs3 m3, + exec_instrs c sp pc1 rs1 m1 pc2 rs2 m2 -> + exec_instrs c sp pc2 rs2 m2 pc3 rs3 m3 -> + exec_instrs c sp pc1 rs1 m1 pc3 rs3 m3 + +(** [exec_function ge f args m res m'] executes a function application. + [f] is the called function, [args] the values of its arguments, + and [m] the memory state at the beginning of the call. [res] is + the returned value: the value of [r] if the function terminates with + a [Ireturn (Some r)], or [Vundef] if it terminates with [Ireturn None]. + Evaluation proceeds by executing transitions from the function's entry + point to the first [Ireturn] instruction encountered. It is preceeded + by the allocation of the stack activation block and the binding + of register parameters to the provided arguments. + (Non-parameter registers are initialized to [Vundef].) Before returning, + the stack activation block is freed. *) + +with exec_function: function -> list val -> mem -> + val -> mem -> Prop := + | exec_funct: + forall f m m1 stk args pc rs m2 or vres, + Mem.alloc m 0 f.(fn_stacksize) = (m1, stk) -> + exec_instrs f.(fn_code) (Vptr stk Int.zero) + f.(fn_entrypoint) (init_regs args f.(fn_params)) m1 + pc rs m2 -> + f.(fn_code)!pc = Some(Ireturn or) -> + vres = regmap_optget or Vundef rs -> + exec_function f args m vres (Mem.free m2 stk). + +Scheme exec_instr_ind_3 := Minimality for exec_instr Sort Prop + with exec_instrs_ind_3 := Minimality for exec_instrs Sort Prop + with exec_function_ind_3 := Minimality for exec_function Sort Prop. + +(** Some derived execution rules. *) + +Lemma exec_step: + forall c sp pc1 rs1 m1 pc2 rs2 m2 pc3 rs3 m3, + exec_instr c sp pc1 rs1 m1 pc2 rs2 m2 -> + exec_instrs c sp pc2 rs2 m2 pc3 rs3 m3 -> + exec_instrs c sp pc1 rs1 m1 pc3 rs3 m3. +Proof. + intros. eapply exec_trans. apply exec_one. eauto. eauto. +Qed. + +Lemma exec_Iop': + forall c sp pc rs m op args res pc' rs' v, + c!pc = Some(Iop op args res pc') -> + eval_operation ge sp op rs##args = Some v -> + rs' = (rs#res <- v) -> + exec_instr c sp pc rs m pc' rs' m. +Proof. + intros. subst rs'. eapply exec_Iop; eauto. +Qed. + +Lemma exec_Iload': + forall c sp pc rs m chunk addr args dst pc' rs' a v, + c!pc = Some(Iload chunk addr args dst pc') -> + eval_addressing ge sp addr rs##args = Some a -> + Mem.loadv chunk m a = Some v -> + rs' = (rs#dst <- v) -> + exec_instr c sp pc rs m pc' rs' m. +Proof. + intros. subst rs'. eapply exec_Iload; eauto. +Qed. + +(** If a transition can take place from [pc], the instruction at [pc] + is defined in the CFG. *) + +Lemma exec_instr_present: + forall c sp pc rs m pc' rs' m', + exec_instr c sp pc rs m pc' rs' m' -> + c!pc <> None. +Proof. + induction 1; congruence. +Qed. + +Lemma exec_instrs_present: + forall c sp pc rs m pc' rs' m', + exec_instrs c sp pc rs m pc' rs' m' -> + c!pc' <> None -> c!pc <> None. +Proof. + induction 1; intros. + auto. + eapply exec_instr_present; eauto. + eauto. +Qed. + +End RELSEM. + +(** Execution of whole programs. As in Cminor, we call the ``main'' function + with no arguments and observe its return value. *) + +Definition exec_program (p: program) (r: val) : Prop := + let ge := Genv.globalenv p in + let m0 := Genv.init_mem p in + exists b, exists f, exists m, + Genv.find_symbol ge p.(prog_main) = Some b /\ + Genv.find_funct_ptr ge b = Some f /\ + f.(fn_sig) = mksignature nil (Some Tint) /\ + exec_function ge f nil m0 r m. + +(** * Operations on RTL abstract syntax *) + +(** Computation of the possible successors of an instruction. + This is used in particular for dataflow analyses. *) + +Definition successors (f: function) (pc: node) : list node := + match f.(fn_code)!pc with + | None => nil + | Some i => + match i with + | Inop s => s :: nil + | Iop op args res s => s :: nil + | Iload chunk addr args dst s => s :: nil + | Istore chunk addr args src s => s :: nil + | Icall sig ros args res s => s :: nil + | Icond cond args ifso ifnot => ifso :: ifnot :: nil + | Ireturn optarg => nil + end + end. + +Lemma successors_correct: + forall ge f sp pc rs m pc' rs' m', + exec_instr ge f.(fn_code) sp pc rs m pc' rs' m' -> + In pc' (successors f pc). +Proof. + intros ge f. unfold successors. generalize (fn_code f). + induction 1; rewrite H; simpl; tauto. +Qed. + +(** Transformation of a RTL function instruction by instruction. + This applies a given transformation function to all instructions + of a function and constructs a transformed function from that. *) + +Section TRANSF. + +Variable transf: node -> instruction -> instruction. + +Lemma transf_code_wf: + forall (c: code) (nextpc: node), + (forall (pc: node), Plt pc nextpc \/ c!pc = None) -> + (forall (pc: node), Plt pc nextpc \/ (PTree.map transf c)!pc = None). +Proof. + intros. elim (H pc); intro. + left; assumption. + right. rewrite PTree.gmap. rewrite H0. reflexivity. +Qed. + +Definition transf_function (f: function) : function := + mkfunction + f.(fn_sig) + f.(fn_params) + f.(fn_stacksize) + (PTree.map transf f.(fn_code)) + f.(fn_entrypoint) + f.(fn_nextpc) + (transf_code_wf f.(fn_code) f.(fn_nextpc) f.(fn_code_wf)). + +End TRANSF. diff --git a/backend/RTLgen.v b/backend/RTLgen.v new file mode 100644 index 00000000..9dc9660d --- /dev/null +++ b/backend/RTLgen.v @@ -0,0 +1,473 @@ +(** Translation from Cminor to RTL. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Op. +Require Import Registers. +Require Import Cminor. +Require Import RTL. + +(** * Mutated variables *) + +(** The following functions compute the list of local Cminor variables + possibly modified during the evaluation of the given expression. + It is used in the [alloc_reg] function to avoid unnecessary + register-to-register moves in the generated RTL. *) + +Fixpoint mutated_expr (a: expr) : list ident := + match a with + | Evar id => nil + | Eassign id b => id :: mutated_expr b + | Eop op bl => mutated_exprlist bl + | Eload _ _ bl => mutated_exprlist bl + | Estore _ _ bl c => mutated_exprlist bl ++ mutated_expr c + | Ecall _ b cl => mutated_expr b ++ mutated_exprlist cl + | Econdition b c d => mutated_condexpr b ++ mutated_expr c ++ mutated_expr d + | Elet b c => mutated_expr b ++ mutated_expr c + | Eletvar n => nil + end + +with mutated_condexpr (a: condexpr) : list ident := + match a with + | CEtrue => nil + | CEfalse => nil + | CEcond cond bl => mutated_exprlist bl + | CEcondition b c d => + mutated_condexpr b ++ mutated_condexpr c ++ mutated_condexpr d + end + +with mutated_exprlist (l: exprlist) : list ident := + match l with + | Enil => nil + | Econs a bl => mutated_expr a ++ mutated_exprlist bl + end. + +(** * Translation environments and state *) + +(** The translation functions are parameterized by the following + compile-time environment, which maps Cminor local variables and + let-bound variables to RTL registers. The mapping for local variables + is computed from the Cminor variable declarations at the beginning of + the translation of a function, and does not change afterwards. + The mapping for let-bound variables is initially empty and updated + during translation of expressions, when crossing a [Elet] binding. *) + +Record mapping: Set := mkmapping { + map_vars: PTree.t reg; + map_letvars: list reg +}. + +(** The translation functions modify a global state, comprising the + current state of the control-flow graph for the function being translated, + as well as sources of fresh RTL registers and fresh CFG nodes. *) + +Record state: Set := mkstate { + st_nextreg: positive; + st_nextnode: positive; + st_code: code; + st_wf: forall (pc: positive), Plt pc st_nextnode \/ st_code!pc = None +}. + +(** ** The state and error monad *) + +(** The translation functions can fail to produce RTL code, for instance + if a non-declared variable is referenced. They must also modify + the global state, adding new nodes to the control-flow graph and + generating fresh temporary registers. In a language like ML or Java, + we would use exceptions to report errors and mutable data structures + to modify the global state. These luxuries are not available in Coq, + however. Instead, we use a monadic encoding of the translation: + translation functions take the current global state as argument, + and return either [Error] to denote an error, or [OK r s] to denote + success. [s] is the modified state, and [r] the result value of the + translation function. + + We now define this monadic encoding -- the ``state and error'' monad -- + as well as convenient syntax to express monadic computations. *) + +Set Implicit Arguments. + +Inductive res (A: Set) : Set := + | Error: res A + | OK: A -> state -> res A. + +Definition mon (A: Set) : Set := state -> res A. + +Definition ret (A: Set) (x: A) : mon A := fun (s: state) => OK x s. + +Definition error (A: Set) : mon A := fun (s: state) => Error A. + +Definition bind (A B: Set) (f: mon A) (g: A -> mon B) : mon B := + fun (s: state) => + match f s with + | Error => Error B + | OK a s' => g a s' + end. + +Definition bind2 (A B C: Set) (f: mon (A * B)) (g: A -> B -> mon C) : mon C := + bind f (fun xy => g (fst xy) (snd xy)). + +Notation "'do' X <- A ; B" := (bind A (fun X => B)) + (at level 200, X ident, A at level 100, B at level 200). +Notation "'do' ( X , Y ) <- A ; B" := (bind2 A (fun X Y => B)) + (at level 200, X ident, Y ident, A at level 100, B at level 200). + +(** ** Operations on state *) + +(** The initial state (empty CFG). *) + +Lemma init_state_wf: + forall pc, Plt pc 1%positive \/ (PTree.empty instruction)!pc = None. +Proof. intros; right; apply PTree.gempty. Qed. + +Definition init_state : state := + mkstate 1%positive 1%positive (PTree.empty instruction) init_state_wf. + +(** Adding a node with the given instruction to the CFG. Return the + label of the new node. *) + +Lemma add_instr_wf: + forall s i pc, + let n := s.(st_nextnode) in + Plt pc (Psucc n) \/ (PTree.set n i s.(st_code))!pc = None. +Proof. + intros. case (peq pc n); intro. + subst pc; left; apply Plt_succ. + rewrite PTree.gso; auto. + elim (st_wf s pc); intro. + left. apply Plt_trans_succ. exact H. + right; assumption. +Qed. + +Definition add_instr (i: instruction) : mon node := + fun s => + let n := s.(st_nextnode) in + OK n + (mkstate s.(st_nextreg) + (Psucc n) + (PTree.set n i s.(st_code)) + (add_instr_wf s i)). + +(** [add_instr] can be decomposed in two steps: reserving a fresh + CFG node, and filling it later with an instruction. This is needed + to compile loops. *) + +Lemma reserve_instr_wf: + forall s pc, + Plt pc (Psucc s.(st_nextnode)) \/ s.(st_code)!pc = None. +Proof. + intros. elim (st_wf s pc); intro. + left; apply Plt_trans_succ; auto. + right; auto. +Qed. + +Definition reserve_instr : mon node := + fun s => + let n := s.(st_nextnode) in + OK n (mkstate s.(st_nextreg) (Psucc n) s.(st_code) + (reserve_instr_wf s)). + +Lemma update_instr_wf: + forall s n i, + Plt n s.(st_nextnode) -> + forall pc, + Plt pc s.(st_nextnode) \/ (PTree.set n i s.(st_code))!pc = None. +Proof. + intros. + case (peq pc n); intro. + subst pc; left; assumption. + rewrite PTree.gso; auto. exact (st_wf s pc). +Qed. + +Definition update_instr (n: node) (i: instruction) : mon unit := + fun s => + match plt n s.(st_nextnode) with + | left PEQ => + OK tt (mkstate s.(st_nextreg) s.(st_nextnode) + (PTree.set n i s.(st_code)) + (@update_instr_wf s n i PEQ)) + | right _ => + Error unit + end. + +(** Generate a fresh RTL register. *) + +Definition new_reg : mon reg := + fun s => + OK s.(st_nextreg) + (mkstate (Psucc s.(st_nextreg)) + s.(st_nextnode) s.(st_code) s.(st_wf)). + +(** ** Operations on mappings *) + +Definition init_mapping : mapping := + mkmapping (PTree.empty reg) nil. + +Definition add_var (map: mapping) (name: ident) : mon (reg * mapping) := + do r <- new_reg; + ret (r, mkmapping (PTree.set name r map.(map_vars)) + map.(map_letvars)). + +Fixpoint add_vars (map: mapping) (names: list ident) + {struct names} : mon (list reg * mapping) := + match names with + | nil => ret (nil, map) + | n1 :: nl => + do (rl, map1) <- add_vars map nl; + do (r1, map2) <- add_var map1 n1; + ret (r1 :: rl, map2) + end. + +Definition find_var (map: mapping) (name: ident) : mon reg := + match PTree.get name map.(map_vars) with + | None => error reg + | Some r => ret r + end. + +Definition add_letvar (map: mapping) (r: reg) : mapping := + mkmapping map.(map_vars) (r :: map.(map_letvars)). + +Definition find_letvar (map: mapping) (idx: nat) : mon reg := + match List.nth_error map.(map_letvars) idx with + | None => error reg + | Some r => ret r + end. + +(** ** Optimized temporary generation *) + +(** [alloc_reg map mut a] returns the RTL register where the evaluation + of expression [a] should leave its result -- the ``target register'' + for evaluating [a]. In general, this is a + fresh temporary register. Exception: if [a] is a let-bound variable + or a non-mutated local variable, we return the RTL register associated + with that variable instead. Returning a fresh temporary in all cases + would be semantically correct, but would generate less efficient + RTL code. *) + +Definition alloc_reg (map: mapping) (mut: list ident) (a: expr) : mon reg := + match a with + | Evar id => + if In_dec ident_eq id mut + then new_reg + else find_var map id + | Eletvar n => + find_letvar map n + | _ => + new_reg + end. + +(** [alloc_regs] is similar, but for a list of expressions. *) + +Fixpoint alloc_regs (map: mapping) (mut:list ident) (al: exprlist) + {struct al}: mon (list reg) := + match al with + | Enil => + ret nil + | Econs a bl => + do rl <- alloc_regs map mut bl; + do r <- alloc_reg map mut a; + ret (r :: rl) + end. + +(** * RTL generation **) + +(** Insertion of a register-to-register move instruction. *) + +Definition add_move (rs rd: reg) (nd: node) : mon node := + if Reg.eq rs rd + then ret nd + else add_instr (Iop Omove (rs::nil) rd nd). + +(** Translation of an expression. [transl_expr map mut a rd nd] + enriches the current CFG with the RTL instructions necessary + to compute the value of Cminor expression [a], leave its result + in register [rd], and branch to node [nd]. It returns the node + of the first instruction in this sequence. [map] is the compile-time + translation environment, and [mut] is an over-approximation of + the set of local variables possibly modified during + the evaluation of [a]. *) + +Fixpoint transl_expr (map: mapping) (mut: list ident) + (a: expr) (rd: reg) (nd: node) + {struct a}: mon node := + match a with + | Evar v => + do r <- find_var map v; add_move r rd nd + | Eassign v b => + do r <- find_var map v; + do no <- add_move rd r nd; transl_expr map mut b rd no + | Eop op al => + do rl <- alloc_regs map mut al; + do no <- add_instr (Iop op rl rd nd); + transl_exprlist map mut al rl no + | Eload chunk addr al => + do rl <- alloc_regs map mut al; + do no <- add_instr (Iload chunk addr rl rd nd); + transl_exprlist map mut al rl no + | Estore chunk addr al b => + do rl <- alloc_regs map mut al; + do no <- add_instr (Istore chunk addr rl rd nd); + do ns <- transl_expr map mut b rd no; + transl_exprlist map mut al rl ns + | Ecall sig b cl => + do rf <- alloc_reg map mut b; + do rargs <- alloc_regs map mut cl; + do n1 <- add_instr (Icall sig (inl _ rf) rargs rd nd); + do n2 <- transl_exprlist map mut cl rargs n1; + transl_expr map mut b rf n2 + | Econdition b c d => + do nfalse <- transl_expr map mut d rd nd; + do ntrue <- transl_expr map mut c rd nd; + transl_condition map mut b ntrue nfalse + | Elet b c => + do r <- new_reg; + do nc <- transl_expr (add_letvar map r) mut c rd nd; + transl_expr map mut b r nc + | Eletvar n => + do r <- find_letvar map n; add_move r rd nd + end + +(** Translation of a conditional expression. Similar to [transl_expr], + but the expression is evaluated for its truth value, and the generated + code branches to one of two possible continuation nodes [ntrue] or + [nfalse] depending on the truth value of [a]. *) + +with transl_condition (map: mapping) (mut: list ident) + (a: condexpr) (ntrue nfalse: node) + {struct a}: mon node := + match a with + | CEtrue => + ret ntrue + | CEfalse => + ret nfalse + | CEcond cond bl => + do rl <- alloc_regs map mut bl; + do nt <- add_instr (Icond cond rl ntrue nfalse); + transl_exprlist map mut bl rl nt + | CEcondition b c d => + do nd <- transl_condition map mut d ntrue nfalse; + do nc <- transl_condition map mut c ntrue nfalse; + transl_condition map mut b nc nd + end + +(** Translation of a list of expressions. The expressions are evaluated + left-to-right, and their values left in the given list of registers. *) + +with transl_exprlist (map: mapping) (mut: list ident) + (al: exprlist) (rl: list reg) (nd: node) + {struct al} : mon node := + match al, rl with + | Enil, nil => + ret nd + | Econs b bs, r :: rs => + do no <- transl_exprlist map mut bs rs nd; transl_expr map mut b r no + | _, _ => + error node + end. + +(** Auxiliary for branch prediction. When compiling an if/then/else + statement, we have a choice between translating the ``then'' branch + first or the ``else'' branch first. Linearization of RTL control-flow + graph, performed later, will exploit this choice as a hint about + which branch is most frequently executed. However, this choice has + no impact on program correctness. We delegate the choice to an + external heuristic (written in OCaml), declared below. *) + +Parameter more_likely: condexpr -> stmtlist -> stmtlist -> bool. + +(** Translation of statements. [transl_stmt map s nd nexits nret rret] + enriches the current CFG with the RTL instructions necessary to + execute the Cminor statement [s], and returns the node of the first + instruction in this sequence. The generated instructions continue + at node [nd] if the statement terminates normally, at node [nret] + if it terminates by early return, and at the [n]-th node in the list + [nlist] if it terminates by an [exit n] construct. [rret] is the + register where the return value of the function must be stored, if any. *) + +Fixpoint transl_stmt (map: mapping) (s: stmt) (nd: node) + (nexits: list node) (nret: node) (rret: option reg) + {struct s} : mon node := + match s with + | Sexpr a => + let mut := mutated_expr a in + do r <- alloc_reg map mut a; transl_expr map mut a r nd + | Sifthenelse a strue sfalse => + let mut := mutated_condexpr a in + (if more_likely a strue sfalse then + do nfalse <- transl_stmtlist map sfalse nd nexits nret rret; + do ntrue <- transl_stmtlist map strue nd nexits nret rret; + transl_condition map mut a ntrue nfalse + else + do ntrue <- transl_stmtlist map strue nd nexits nret rret; + do nfalse <- transl_stmtlist map sfalse nd nexits nret rret; + transl_condition map mut a ntrue nfalse) + | Sloop sbody => + do nloop <- reserve_instr; + do nbody <- transl_stmtlist map sbody nloop nexits nret rret; + do x <- update_instr nloop (Inop nbody); + ret nbody + | Sblock sbody => + transl_stmtlist map sbody nd (nd :: nexits) nret rret + | Sexit n => + match nth_error nexits n with + | None => error node + | Some ne => ret ne + end + | Sreturn opt_a => + match opt_a, rret with + | None, None => ret nret + | Some a, Some r => transl_expr map (mutated_expr a) a r nret + | _, _ => error node + end + end + +(** Translation of lists of statements. *) + +with transl_stmtlist (map: mapping) (sl: stmtlist) (nd: node) + (nexits: list node) (nret: node) (rret: option reg) + {struct sl} : mon node := + match sl with + | Snil => ret nd + | Scons s1 ss => + do ns <- transl_stmtlist map ss nd nexits nret rret; + transl_stmt map s1 ns nexits nret rret + end. + +(** Translation of a Cminor function. *) + +Definition ret_reg (sig: signature) (rd: reg) : option reg := + match sig.(sig_res) with + | None => None + | Some ty => Some rd + end. + +Definition transl_fun (f: Cminor.function) : mon (node * list reg) := + do (rparams, map1) <- add_vars init_mapping f.(Cminor.fn_params); + do (rvars, map2) <- add_vars map1 f.(Cminor.fn_vars); + do rret <- new_reg; + let orret := ret_reg f.(Cminor.fn_sig) rret in + do nret <- add_instr (Ireturn orret); + do nentry <- transl_stmtlist map2 f.(Cminor.fn_body) nret nil nret orret; + ret (nentry, rparams). + +Definition transl_function (f: Cminor.function) : option RTL.function := + match transl_fun f init_state with + | Error => None + | OK (nentry, rparams) s => + Some (RTL.mkfunction + f.(Cminor.fn_sig) + rparams + f.(Cminor.fn_stackspace) + s.(st_code) + nentry + s.(st_nextnode) + s.(st_wf)) + end. + +(** Translation of a whole program. *) + +Definition transl_program (p: Cminor.program) : option RTL.program := + transform_partial_program transl_function p. diff --git a/backend/RTLgenproof.v b/backend/RTLgenproof.v new file mode 100644 index 00000000..98462d28 --- /dev/null +++ b/backend/RTLgenproof.v @@ -0,0 +1,1302 @@ +(** Correctness proof for RTL generation: main proof. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Registers. +Require Import Cminor. +Require Import RTL. +Require Import RTLgen. +Require Import RTLgenproof1. + +Section CORRECTNESS. + +Variable prog: Cminor.program. +Variable tprog: RTL.program. +Hypothesis TRANSL: transl_program prog = Some tprog. + +Let ge : Cminor.genv := Genv.globalenv prog. +Let tge : RTL.genv := Genv.globalenv tprog. + +(** Relationship between the global environments for the original + Cminor program and the generated RTL program. *) + +Lemma symbols_preserved: + forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof. + intro. unfold ge, tge. + apply Genv.find_symbol_transf_partial with transl_function. + exact TRANSL. +Qed. + +Lemma function_ptr_translated: + forall (b: block) (f: Cminor.function), + Genv.find_funct_ptr ge b = Some f -> + exists tf, + Genv.find_funct_ptr tge b = Some tf /\ transl_function f = Some tf. +Proof. + intros. + generalize + (Genv.find_funct_ptr_transf_partial transl_function TRANSL H). + case (transl_function f). + intros tf [A B]. exists tf. tauto. + intros [A B]. elim B. reflexivity. +Qed. + +Lemma functions_translated: + forall (v: val) (f: Cminor.function), + Genv.find_funct ge v = Some f -> + exists tf, + Genv.find_funct tge v = Some tf /\ transl_function f = Some tf. +Proof. + intros. + generalize + (Genv.find_funct_transf_partial transl_function TRANSL H). + case (transl_function f). + intros tf [A B]. exists tf. tauto. + intros [A B]. elim B. reflexivity. +Qed. + +(** Correctness of the code generated by [add_move]. *) + +Lemma add_move_correct: + forall r1 r2 sp nd s ns s' rs m, + add_move r1 r2 nd s = OK ns s' -> + exists rs', + exec_instrs tge s'.(st_code) sp ns rs m nd rs' m /\ + rs'#r2 = rs#r1 /\ + (forall r, r <> r2 -> rs'#r = rs#r). +Proof. + intros until m. + unfold add_move. case (Reg.eq r1 r2); intro. + monadSimpl. subst s'; subst r2; subst ns. + exists rs. split. apply exec_refl. split. auto. auto. + intro. exists (rs#r2 <- (rs#r1)). + split. apply exec_one. eapply exec_Iop. eauto with rtlg. + reflexivity. + split. apply Regmap.gss. + intros. apply Regmap.gso; auto. +Qed. + +(** The proof of semantic preservation for the translation of expressions + is a simulation argument based on diagrams of the following form: +<< + I /\ P + e, m, a ------------- ns, rs, m + || | + || |* + || | + \/ v + e', m', v ----------- nd, rs', m' + I /\ Q +>> + where [transl_expr map mut a rd nd s = OK ns s']. + The left vertical arrow represents an evaluation of the expression [a] + (assumption). The right vertical arrow represents the execution of + zero, one or several instructions in the generated RTL flow graph + found in the final state [s'] (conclusion). + + The invariant [I] is the agreement between Cminor environments and + RTL register environment, as captured by [match_envs]. + + The preconditions [P] include the well-formedness of the compilation + environment [mut] and the validity of [rd] as a target register. + + The postconditions [Q] state that in the final register environment + [rs'], register [rd] contains value [v], and most other registers + valid in [s] are unchanged w.r.t. the initial register environment + [rs]. (See below for a precise specification of ``most other + registers''.) + + We formalize this simulation property by the following predicate + parameterized by the Cminor evaluation (left arrow). *) + +Definition transl_expr_correct + (sp: val) (le: letenv) (e: env) (m: mem) (a: expr) + (e': env) (m': mem) (v: val) : Prop := + forall map mut rd nd s ns s' rs + (MWF: map_wf map s) + (TE: transl_expr map mut a rd nd s = OK ns s') + (ME: match_env map e le rs) + (MUT: incl (mutated_expr a) mut) + (TRG: target_reg_ok s map mut a rd), + exists rs', + exec_instrs tge s'.(st_code) sp ns rs m nd rs' m' + /\ match_env map e' le rs' + /\ rs'#rd = v + /\ (forall r, + reg_valid r s -> ~(mutated_reg map mut r) -> + reg_in_map map r \/ r <> rd -> + rs'#r = rs#r). + +(** The simulation properties for lists of expressions and for + conditional expressions are similar. *) + +Definition transl_exprlist_correct + (sp: val) (le: letenv) (e: env) (m: mem) (al: exprlist) + (e': env) (m': mem) (vl: list val) : Prop := + forall map mut rl nd s ns s' rs + (MWF: map_wf map s) + (TE: transl_exprlist map mut al rl nd s = OK ns s') + (ME: match_env map e le rs) + (MUT: incl (mutated_exprlist al) mut) + (TRG: target_regs_ok s map mut al rl), + exists rs', + exec_instrs tge s'.(st_code) sp ns rs m nd rs' m' + /\ match_env map e' le rs' + /\ rs'##rl = vl + /\ (forall r, + reg_valid r s -> ~(mutated_reg map mut r) -> + reg_in_map map r \/ ~(In r rl) -> + rs'#r = rs#r). + +Definition transl_condition_correct + (sp: val) (le: letenv) (e: env) (m: mem) (a: condexpr) + (e': env) (m': mem) (vb: bool) : Prop := + forall map mut ntrue nfalse s ns s' rs + (MWF: map_wf map s) + (TE: transl_condition map mut a ntrue nfalse s = OK ns s') + (ME: match_env map e le rs) + (MUT: incl (mutated_condexpr a) mut), + exists rs', + exec_instrs tge s'.(st_code) sp ns rs m (if vb then ntrue else nfalse) rs' m' + /\ match_env map e' le rs' + /\ (forall r, + reg_valid r s -> ~(mutated_reg map mut r) -> + rs'#r = rs#r). + +(** For statements, we define the following auxiliary predicates + relating the outcome of the Cminor execution with the final node + and value of the return register in the RTL execution. *) + +Definition outcome_node + (out: outcome) + (ncont: node) (nexits: list node) (nret: node) (nd: node) : Prop := + match out with + | Out_normal => ncont = nd + | Out_exit n => nth_error nexits n = Some nd + | Out_return _ => nret = nd + end. + +Definition match_return_reg + (rs: regset) (rret: option reg) (v: val) : Prop := + match rret with + | None => True + | Some r => rs#r = v + end. + +Definition match_return_outcome + (out: outcome) (rret: option reg) (rs: regset) : Prop := + match out with + | Out_normal => True + | Out_exit n => True + | Out_return optv => + match rret, optv with + | None, None => True + | Some r, Some v => rs#r = v + | _, _ => False + end + end. + +(** The simulation diagram for the translation of statements and + lists of statements is of the following form: +<< + I /\ P + e, m, a ------------- ns, rs, m + || | + || |* + || | + \/ v + e', m', out --------- nd, rs', m' + I /\ Q +>> + where [transl_stmt map a ncont nexits nret rret s = OK ns s']. + The left vertical arrow represents an execution of the statement [a] + (assumption). The right vertical arrow represents the execution of + zero, one or several instructions in the generated RTL flow graph + found in the final state [s'] (conclusion). + + The invariant [I] is the agreement between Cminor environments and + RTL register environment, as captured by [match_envs]. + + The preconditions [P] include the well-formedness of the compilation + environment [mut] and the agreement between the outcome [out] + and the end node [nd]. + + The postcondition [Q] states agreement between the outcome [out] + and the value of the return register [rret]. *) + +Definition transl_stmt_correct + (sp: val) (e: env) (m: mem) (a: stmt) + (e': env) (m': mem) (out: outcome) : Prop := + forall map ncont nexits nret rret s ns s' nd rs + (MWF: map_wf map s) + (TE: transl_stmt map a ncont nexits nret rret s = OK ns s') + (ME: match_env map e nil rs) + (OUT: outcome_node out ncont nexits nret nd) + (RRG: return_reg_ok s map rret), + exists rs', + exec_instrs tge s'.(st_code) sp ns rs m nd rs' m' + /\ match_env map e' nil rs' + /\ match_return_outcome out rret rs'. + +Definition transl_stmtlist_correct + (sp: val) (e: env) (m: mem) (al: stmtlist) + (e': env) (m': mem) (out: outcome) : Prop := + forall map ncont nexits nret rret s ns s' nd rs + (MWF: map_wf map s) + (TE: transl_stmtlist map al ncont nexits nret rret s = OK ns s') + (ME: match_env map e nil rs) + (OUT: outcome_node out ncont nexits nret nd) + (RRG: return_reg_ok s map rret), + exists rs', + exec_instrs tge s'.(st_code) sp ns rs m nd rs' m' + /\ match_env map e' nil rs' + /\ match_return_outcome out rret rs'. + +(** Finally, the correctness condition for the translation of functions + is that the translated RTL function, when applied to the same arguments + as the original Cminor function, returns the same value and performs + the same modifications on the memory state. *) + +Definition transl_function_correct + (m: mem) (f: Cminor.function) (vargs: list val) + (m':mem) (vres: val) : Prop := + forall tf + (TE: transl_function f = Some tf), + exec_function tge tf vargs m vres m'. + +(** The correctness of the translation is a huge induction over + the Cminor evaluation derivation for the source program. To keep + the proof manageable, we put each case of the proof in a separate + lemma. There is one lemma for each Cminor evaluation rule. + It takes as hypotheses the premises of the Cminor evaluation rule, + plus the induction hypotheses, that is, the [transl_expr_correct], etc, + corresponding to the evaluations of sub-expressions or sub-statements. *) + +Lemma transl_expr_Evar_correct: + forall (sp: val) (le: letenv) (e: env) (m: mem) (id: ident) (v: val), + e!id = Some v -> + transl_expr_correct sp le e m (Evar id) e m v. +Proof. + intros; red; intros. monadInv TE. intro. + generalize EQ; unfold find_var. caseEq (map_vars map)!id. + intros r' MV; monadSimpl. subst s0; subst r'. + generalize (add_move_correct _ _ sp _ _ _ _ rs m TE0). + intros [rs1 [EX1 [RES1 OTHER1]]]. + exists rs1. +(* Exec *) + split. assumption. +(* Match-env *) + split. inversion TRG. + (* Optimized case rd = r *) + rewrite MV in H5; injection H5; intro; subst r. + apply match_env_exten with rs. + intros. case (Reg.eq r rd); intro. + subst r; assumption. apply OTHER1; assumption. + assumption. + (* General case rd is temp *) + apply match_env_invariant with rs. + assumption. intros. apply OTHER1. red; intro; subst r1. contradiction. +(* Result value *) + split. rewrite RES1. eauto with rtlg. +(* Other regs *) + intros. case (Reg.eq rd r0); intro. + subst r0. inversion TRG. + rewrite MV in H8; injection H8; intro; subst r. apply RES1. + byContradiction. tauto. + apply OTHER1; auto. + + intro; monadSimpl. +Qed. + +Lemma transl_expr_Eop_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem) (op : operation) + (al : exprlist) (e1 : env) (m1 : mem) (vl : list val) + (v: val), + eval_exprlist ge sp le e m al e1 m1 vl -> + transl_exprlist_correct sp le e m al e1 m1 vl -> + eval_operation ge sp op vl = Some v -> + transl_expr_correct sp le e m (Eop op al) e1 m1 v. +Proof. + intros until v. intros EEL TEL EOP. red; intros. + simpl in TE. monadInv TE. intro EQ1. + simpl in MUT. + assert (TRG': target_regs_ok s1 map mut al l); eauto with rtlg. + assert (MWF': map_wf map s1). eauto with rtlg. + generalize (TEL _ _ _ _ _ _ _ _ MWF' EQ1 ME MUT TRG'). + intros [rs1 [EX1 [ME1 [RR1 RO1]]]]. + exists (rs1#rd <- v). +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s1. eauto with rtlg. + apply exec_one; eapply exec_Iop. eauto with rtlg. + subst vl. + rewrite (@eval_operation_preserved Cminor.function RTL.function ge tge). + eexact EOP. + exact symbols_preserved. +(* Match-env *) + split. inversion TRG. eauto with rtlg. +(* Result reg *) + split. apply Regmap.gss. +(* Other regs *) + intros. rewrite Regmap.gso. + apply RO1. eauto with rtlg. assumption. + case (In_dec Reg.eq r l); intro. + left. elim (alloc_regs_fresh_or_in_map _ _ _ _ _ _ MWF EQ r i); intro. + assumption. byContradiction; eauto with rtlg. + right; assumption. + red; intro; subst r. + elim H1; intro. inversion TRG. contradiction. + tauto. +Qed. + +Lemma transl_expr_Eassign_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem) + (id : ident) (a : expr) (e1 : env) (m1 : mem) + (v : val), + eval_expr ge sp le e m a e1 m1 v -> + transl_expr_correct sp le e m a e1 m1 v -> + transl_expr_correct sp le e m (Eassign id a) (PTree.set id v e1) m1 v. +Proof. + intros; red; intros. + simpl in TE. monadInv TE. intro EQ1. + simpl in MUT. + assert (MWF0: map_wf map s1). eauto with rtlg. + assert (MUTa: incl (mutated_expr a) mut). + red. intros. apply MUT. simpl. tauto. + assert (TRGa: target_reg_ok s1 map mut a rd). + inversion TRG. apply target_reg_other; eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ _ MWF0 EQ1 ME MUTa TRGa). + intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + generalize (add_move_correct _ _ sp _ _ _ _ rs1 m1 EQ0). + intros [rs2 [EX2 [RES2 OTHER2]]]. + exists rs2. +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s1. eauto with rtlg. + exact EX2. +(* Match-env *) + split. + apply match_env_update_var with rs1 r s s0; auto. + congruence. +(* Result *) + split. case (Reg.eq rd r); intro. + subst r. congruence. + rewrite OTHER2; auto. +(* Other regs *) + intros. transitivity (rs1#r0). + apply OTHER2. red; intro; subst r0. + apply H2. red. exists id. split. apply MUT. red; tauto. + generalize EQ; unfold find_var. + destruct ((map_vars map)!id); monadSimpl. congruence. + apply OTHER1. eauto with rtlg. assumption. assumption. +Qed. + +Lemma transl_expr_Eload_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem) + (chunk : memory_chunk) (addr : addressing) + (al : exprlist) (e1 : env) (m1 : mem) (v : val) + (vl : list val) (a: val), + eval_exprlist ge sp le e m al e1 m1 vl -> + transl_exprlist_correct sp le e m al e1 m1 vl -> + eval_addressing ge sp addr vl = Some a -> + Mem.loadv chunk m1 a = Some v -> + transl_expr_correct sp le e m (Eload chunk addr al) e1 m1 v. +Proof. + intros; red; intros. simpl in TE. monadInv TE. intro EQ1. clear TE. + simpl in MUT. + assert (MWF1: map_wf map s1). eauto with rtlg. + assert (TRG1: target_regs_ok s1 map mut al l). eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ _ MWF1 EQ1 ME MUT TRG1). + intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + exists (rs1#rd <- v). +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s1. eauto with rtlg. + apply exec_one. eapply exec_Iload. eauto with rtlg. + rewrite RES1. rewrite (@eval_addressing_preserved _ _ ge tge). + eexact H1. exact symbols_preserved. assumption. +(* Match-env *) + split. eapply match_env_update_temp. assumption. inversion TRG. assumption. +(* Result *) + split. apply Regmap.gss. +(* Other regs *) + intros. rewrite Regmap.gso. apply OTHER1. + eauto with rtlg. assumption. + case (In_dec Reg.eq r l); intro. + elim (alloc_regs_fresh_or_in_map _ _ _ _ _ _ MWF EQ r i); intro. + tauto. byContradiction. eauto with rtlg. + tauto. + red; intro; subst r. inversion TRG. tauto. +Qed. + +Lemma transl_expr_Estore_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem) + (chunk : memory_chunk) (addr : addressing) + (al : exprlist) (b : expr) (e1 : env) (m1 : mem) + (vl : list val) (e2 : env) (m2 m3 : mem) + (v : val) (a: val), + eval_exprlist ge sp le e m al e1 m1 vl -> + transl_exprlist_correct sp le e m al e1 m1 vl -> + eval_expr ge sp le e1 m1 b e2 m2 v -> + transl_expr_correct sp le e1 m1 b e2 m2 v -> + eval_addressing ge sp addr vl = Some a -> + Mem.storev chunk m2 a v = Some m3 -> + transl_expr_correct sp le e m (Estore chunk addr al b) e2 m3 v. +Proof. + intros; red; intros. simpl in TE; monadInv TE. intro EQ2; clear TE. + simpl in MUT. + assert (MWF2: map_wf map s2). + apply map_wf_incr with s. + apply state_incr_trans2 with s0 s1; eauto with rtlg. + assumption. + assert (MUT2: incl (mutated_exprlist al) mut). eauto with coqlib. + assert (TRG2: target_regs_ok s2 map mut al l). + apply target_regs_ok_incr with s0; eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ _ MWF2 EQ2 ME MUT2 TRG2). + intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + assert (MWF1: map_wf map s1). eauto with rtlg. + assert (MUT1: incl (mutated_expr b) mut). eauto with coqlib. + assert (TRG1: target_reg_ok s1 map mut b rd). + inversion TRG. apply target_reg_other; eauto with rtlg. + generalize (H2 _ _ _ _ _ _ _ _ MWF1 EQ1 ME1 MUT1 TRG1). + intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]]. + exists rs2. +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s2. eauto with rtlg. + eapply exec_trans. eexact EX2. + apply exec_instrs_incr with s1. eauto with rtlg. + apply exec_one. eapply exec_Istore. eauto with rtlg. + assert (rs2##l = rs1##l). + apply list_map_exten. intros r' IN. symmetry. apply OTHER2. + eauto with rtlg. eauto with rtlg. + elim (alloc_regs_fresh_or_in_map _ _ _ _ _ _ MWF EQ r' IN); intro. + tauto. right. apply sym_not_equal. + apply valid_fresh_different with s. inversion TRG; assumption. + assumption. + rewrite H5; rewrite RES1. + rewrite (@eval_addressing_preserved _ _ ge tge). + eexact H3. exact symbols_preserved. + rewrite RES2. assumption. +(* Match-env *) + split. assumption. +(* Result *) + split. assumption. +(* Other regs *) + intro r'; intros. transitivity (rs1#r'). + apply OTHER2. apply reg_valid_incr with s; eauto with rtlg. + assumption. assumption. + apply OTHER1. apply reg_valid_incr with s. + apply state_incr_trans2 with s0 s1; eauto with rtlg. assumption. + assumption. case (In_dec Reg.eq r' l); intro. + elim (alloc_regs_fresh_or_in_map _ _ _ _ _ _ MWF EQ r' i); intro. + tauto. byContradiction; eauto with rtlg. tauto. +Qed. + +Lemma transl_expr_Ecall_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem) + (sig : signature) (a : expr) (bl : exprlist) + (e1 e2 : env) (m1 m2 m3 : mem) (vf : val) + (vargs : list val) (vres : val) (f : Cminor.function), + eval_expr ge sp le e m a e1 m1 vf -> + transl_expr_correct sp le e m a e1 m1 vf -> + eval_exprlist ge sp le e1 m1 bl e2 m2 vargs -> + transl_exprlist_correct sp le e1 m1 bl e2 m2 vargs -> + Genv.find_funct ge vf = Some f -> + Cminor.fn_sig f = sig -> + eval_funcall ge m2 f vargs m3 vres -> + transl_function_correct m2 f vargs m3 vres -> + transl_expr_correct sp le e m (Ecall sig a bl) e2 m3 vres. +Proof. + intros. red; simpl; intros. + monadInv TE. intro EQ3. clear TE. + assert (MUTa: incl (mutated_expr a) mut). eauto with coqlib. + assert (MUTbl: incl (mutated_exprlist bl) mut). eauto with coqlib. + assert (MWFa: map_wf map s3). + apply map_wf_incr with s. + apply state_incr_trans3 with s0 s1 s2; eauto with rtlg. + assumption. + assert (TRGr: target_reg_ok s3 map mut a r). + apply target_reg_ok_incr with s0. + apply state_incr_trans2 with s1 s2; eauto with rtlg. + eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ _ MWFa EQ3 ME MUTa TRGr). + intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + clear MUTa MWFa TRGr. + assert (MWFbl: map_wf map s2). + apply map_wf_incr with s. + apply state_incr_trans2 with s0 s1; eauto with rtlg. + assumption. + assert (TRGl: target_regs_ok s2 map mut bl l). + apply target_regs_ok_incr with s1; eauto with rtlg. + generalize (H2 _ _ _ _ _ _ _ _ MWFbl EQ2 ME1 MUTbl TRGl). + intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]]. + clear MUTbl MWFbl TRGl. + + generalize (functions_translated vf f H3). intros [tf [TFIND TF]]. + generalize (H6 tf TF). intro EXF. + + assert (EX3: exec_instrs tge (st_code s2) sp n rs2 m2 + nd (rs2#rd <- vres) m3). + apply exec_one. eapply exec_Icall. + eauto with rtlg. simpl. replace (rs2#r) with vf. eexact TFIND. + rewrite <- RES1. symmetry. apply OTHER2. + apply reg_valid_incr with s0; eauto with rtlg. + apply target_reg_not_mutated with s0 a. + eauto with rtlg. eauto with rtlg. + assert (MWFs0: map_wf map s0). eauto with rtlg. + case (In_dec Reg.eq r l); intro. + elim (alloc_regs_fresh_or_in_map _ _ _ _ _ _ MWFs0 EQ0 r i); intro. + tauto. byContradiction. apply valid_fresh_absurd with r s0. + eauto with rtlg. assumption. + tauto. + generalize TF. unfold transl_function. + destruct (transl_fun f init_state). + intro; discriminate. destruct p. intros. injection TF0. intro. + rewrite <- H7; simpl. auto. + rewrite RES2. assumption. + + exists (rs2#rd <- vres). +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s3. eauto with rtlg. + eapply exec_trans. eexact EX2. + apply exec_instrs_incr with s2. eauto with rtlg. + exact EX3. +(* Match env *) + split. apply match_env_update_temp. assumption. + inversion TRG. assumption. +(* Result *) + split. apply Regmap.gss. +(* Other regs *) + intros. + rewrite Regmap.gso. transitivity (rs1#r0). + apply OTHER2. + apply reg_valid_incr with s. + apply state_incr_trans2 with s0 s1; eauto with rtlg. + assumption. + assumption. + assert (MWFs0: map_wf map s0). eauto with rtlg. + case (In_dec Reg.eq r0 l); intro. + elim (alloc_regs_fresh_or_in_map _ _ _ _ _ _ MWFs0 EQ0 r0 i); intro. + tauto. byContradiction. apply valid_fresh_absurd with r0 s0. + eauto with rtlg. assumption. + tauto. + apply OTHER1. + apply reg_valid_incr with s. + apply state_incr_trans3 with s0 s1 s2; eauto with rtlg. + assumption. + assumption. + case (Reg.eq r0 r); intro. + subst r0. + elim (alloc_reg_fresh_or_in_map _ _ _ _ _ _ MWF EQ); intro. + tauto. byContradiction; eauto with rtlg. + tauto. + red; intro; subst r0. + inversion TRG. tauto. +Qed. + +Lemma transl_expr_Econdition_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem) + (a : condexpr) (b c : expr) (e1 : env) (m1 : mem) + (v1 : bool) (e2 : env) (m2 : mem) (v2 : val), + eval_condexpr ge sp le e m a e1 m1 v1 -> + transl_condition_correct sp le e m a e1 m1 v1 -> + eval_expr ge sp le e1 m1 (if v1 then b else c) e2 m2 v2 -> + transl_expr_correct sp le e1 m1 (if v1 then b else c) e2 m2 v2 -> + transl_expr_correct sp le e m (Econdition a b c) e2 m2 v2. +Proof. + intros; red; intros. simpl in TE; monadInv TE. intro EQ1; clear TE. + simpl in MUT. + + assert (MWF1: map_wf map s1). + apply map_wf_incr with s. eauto with rtlg. assumption. + assert (MUT1: incl (mutated_condexpr a) mut). eauto with coqlib. + generalize (H0 _ _ _ _ _ _ _ _ MWF1 EQ1 ME MUT1). + intros [rs1 [EX1 [ME1 OTHER1]]]. + destruct v1. + + assert (MWF0: map_wf map s0). eauto with rtlg. + assert (MUT0: incl (mutated_expr b) mut). eauto with coqlib. + assert (TRG0: target_reg_ok s0 map mut b rd). + inversion TRG. apply target_reg_other; eauto with rtlg. + generalize (H2 _ _ _ _ _ _ _ _ MWF0 EQ0 ME1 MUT0 TRG0). + intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]]. + exists rs2. +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s1. eauto with rtlg. + exact EX2. +(* Match-env *) + split. assumption. +(* Result value *) + split. assumption. +(* Other regs *) + intros. transitivity (rs1#r). + apply OTHER2; auto. eauto with rtlg. + apply OTHER1; auto. apply reg_valid_incr with s. + apply state_incr_trans with s0; eauto with rtlg. assumption. + + assert (MUTc: incl (mutated_expr c) mut). eauto with coqlib. + assert (TRGc: target_reg_ok s map mut c rd). + inversion TRG. apply target_reg_other; auto. + generalize (H2 _ _ _ _ _ _ _ _ MWF EQ ME1 MUTc TRGc). + intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]]. + exists rs2. +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s0. eauto with rtlg. + exact EX2. +(* Match-env *) + split. assumption. +(* Result value *) + split. assumption. +(* Other regs *) + intros. transitivity (rs1#r). + apply OTHER2; auto. eauto with rtlg. + apply OTHER1; auto. apply reg_valid_incr with s. + apply state_incr_trans2 with s0 s1; eauto with rtlg. assumption. +Qed. + +Lemma transl_expr_Elet_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem) + (a b : expr) (e1 : env) (m1 : mem) (v1 : val) + (e2 : env) (m2 : mem) (v2 : val), + eval_expr ge sp le e m a e1 m1 v1 -> + transl_expr_correct sp le e m a e1 m1 v1 -> + eval_expr ge sp (v1 :: le) e1 m1 b e2 m2 v2 -> + transl_expr_correct sp (v1 :: le) e1 m1 b e2 m2 v2 -> + transl_expr_correct sp le e m (Elet a b) e2 m2 v2. +Proof. + intros; red; intros. + simpl in TE; monadInv TE. intro EQ1. + simpl in MUT. + assert (MWF1: map_wf map s1). eauto with rtlg. + assert (MUT1: incl (mutated_expr a) mut). eauto with coqlib. + assert (TRG1: target_reg_ok s1 map mut a r). + eapply target_reg_other; eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ _ MWF1 EQ1 ME MUT1 TRG1). + intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + assert (MWF2: map_wf (add_letvar map r) s0). + apply add_letvar_wf; eauto with rtlg. + assert (MUT2: incl (mutated_expr b) mut). eauto with coqlib. + assert (ME2: match_env (add_letvar map r) e1 (v1 :: le) rs1). + apply match_env_letvar; assumption. + assert (TRG2: target_reg_ok s0 (add_letvar map r) mut b rd). + inversion TRG. apply target_reg_other. + unfold reg_in_map, add_letvar; simpl. red; intro. + elim H10; intro. apply H3. left. assumption. + elim H11; intro. apply valid_fresh_absurd with rd s. + assumption. rewrite <- H12. eauto with rtlg. + apply H3. right. assumption. + eauto with rtlg. + generalize (H2 _ _ _ _ _ _ _ _ MWF2 EQ0 ME2 MUT2 TRG2). + intros [rs2 [EX2 [ME3 [RES2 OTHER2]]]]. + exists rs2. +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s1. eauto with rtlg. exact EX2. +(* Match-env *) + split. apply mk_match_env. exact (me_vars _ _ _ _ ME3). + generalize (me_letvars _ _ _ _ ME3). + unfold add_letvar; simpl. intro X; injection X; auto. +(* Result *) + split. assumption. +(* Other regs *) + intros. transitivity (rs1#r0). + apply OTHER2. eauto with rtlg. + unfold mutated_reg. unfold add_letvar; simpl. assumption. + elim H5; intro. left. + unfold reg_in_map, add_letvar; simpl. + unfold reg_in_map in H6; tauto. + tauto. + apply OTHER1. eauto with rtlg. + assumption. + right. red; intro. apply valid_fresh_absurd with r0 s. + assumption. rewrite H6. eauto with rtlg. +Qed. + +Lemma transl_expr_Eletvar_correct: + forall (sp: val) (le : list val) (e : env) + (m : mem) (n : nat) (v : val), + nth_error le n = Some v -> + transl_expr_correct sp le e m (Eletvar n) e m v. +Proof. + intros; red; intros. + simpl in TE; monadInv TE. intro EQ1. + generalize EQ. unfold find_letvar. + caseEq (nth_error (map_letvars map) n). + intros r0 NE; monadSimpl. subst s0; subst r0. + generalize (add_move_correct _ _ sp _ _ _ _ rs m EQ1). + intros [rs1 [EX1 [RES1 OTHER1]]]. + exists rs1. +(* Exec *) + split. exact EX1. +(* Match-env *) + split. inversion TRG. + assert (r = rd). congruence. + subst r. apply match_env_exten with rs. + intros. case (Reg.eq r rd); intro. subst r; auto. auto. auto. + apply match_env_invariant with rs. auto. + intros. apply OTHER1. red;intro;subst r1. contradiction. +(* Result *) + split. rewrite RES1. + generalize H. rewrite <- (me_letvars _ _ _ _ ME). + change positive with reg. + rewrite list_map_nth. rewrite NE. simpl. congruence. +(* Other regs *) + intros. inversion TRG. + assert (r = rd). congruence. subst r. + case (Reg.eq r0 rd); intro. subst r0; auto. auto. + apply OTHER1. red; intro; subst r0. tauto. + + intro; monadSimpl. +Qed. + +Lemma transl_condition_CEtrue_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem), + transl_condition_correct sp le e m CEtrue e m true. +Proof. + intros; red; intros. simpl in TE; monadInv TE. subst ns. + exists rs. split. apply exec_refl. split. auto. auto. +Qed. + +Lemma transl_condition_CEfalse_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem), + transl_condition_correct sp le e m CEfalse e m false. +Proof. + intros; red; intros. simpl in TE; monadInv TE. subst ns. + exists rs. split. apply exec_refl. split. auto. auto. +Qed. + +Lemma transl_condition_CEcond_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem) + (cond : condition) (al : exprlist) (e1 : env) + (m1 : mem) (vl : list val) (b : bool), + eval_exprlist ge sp le e m al e1 m1 vl -> + transl_exprlist_correct sp le e m al e1 m1 vl -> + eval_condition cond vl = Some b -> + transl_condition_correct sp le e m (CEcond cond al) e1 m1 b. +Proof. + intros; red; intros. simpl in TE; monadInv TE. intro EQ1; clear TE. + assert (MWF1: map_wf map s1). eauto with rtlg. + simpl in MUT. + assert (TRG: target_regs_ok s1 map mut al l). + eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ _ MWF1 EQ1 ME MUT TRG). + intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + exists rs1. +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s1. eauto with rtlg. + apply exec_one. + destruct b. + eapply exec_Icond_true. eauto with rtlg. + rewrite RES1. assumption. + eapply exec_Icond_false. eauto with rtlg. + rewrite RES1. assumption. +(* Match-env *) + split. assumption. +(* Regs *) + intros. apply OTHER1. eauto with rtlg. assumption. + case (In_dec Reg.eq r l); intro. + elim (alloc_regs_fresh_or_in_map _ _ _ _ _ _ MWF EQ r i); intro. + tauto. byContradiction; eauto with rtlg. + tauto. +Qed. + +Lemma transl_condition_CEcondition_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem) + (a b c : condexpr) (e1 : env) (m1 : mem) + (vb1 : bool) (e2 : env) (m2 : mem) (vb2 : bool), + eval_condexpr ge sp le e m a e1 m1 vb1 -> + transl_condition_correct sp le e m a e1 m1 vb1 -> + eval_condexpr ge sp le e1 m1 (if vb1 then b else c) e2 m2 vb2 -> + transl_condition_correct sp le e1 m1 (if vb1 then b else c) e2 m2 vb2 -> + transl_condition_correct sp le e m (CEcondition a b c) e2 m2 vb2. +Proof. + intros; red; intros. simpl in TE; monadInv TE. intro EQ1; clear TE. + simpl in MUT. + assert (MWF1: map_wf map s1). eauto with rtlg. + assert (MUTa: incl (mutated_condexpr a) mut). eauto with coqlib. + generalize (H0 _ _ _ _ _ _ _ _ MWF1 EQ1 ME MUTa). + intros [rs1 [EX1 [ME1 OTHER1]]]. + destruct vb1. + + assert (MWF0: map_wf map s0). eauto with rtlg. + assert (MUTb: incl (mutated_condexpr b) mut). eauto with coqlib. + generalize (H2 _ _ _ _ _ _ _ _ MWF0 EQ0 ME1 MUTb). + intros [rs2 [EX2 [ME2 OTHER2]]]. + exists rs2. + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s1. eauto with rtlg. + exact EX2. + split. assumption. + intros. transitivity (rs1#r). + apply OTHER2; eauto with rtlg. + apply OTHER1; eauto with rtlg. + + assert (MUTc: incl (mutated_condexpr c) mut). eauto with coqlib. + generalize (H2 _ _ _ _ _ _ _ _ MWF EQ ME1 MUTc). + intros [rs2 [EX2 [ME2 OTHER2]]]. + exists rs2. + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s0. eauto with rtlg. + exact EX2. + split. assumption. + intros. transitivity (rs1#r). + apply OTHER2; eauto with rtlg. + apply OTHER1; eauto with rtlg. +Qed. + +Lemma transl_exprlist_Enil_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem), + transl_exprlist_correct sp le e m Enil e m nil. +Proof. + intros; red; intros. + generalize TE. simpl. destruct rl; monadSimpl. + subst s'; subst ns. exists rs. + split. apply exec_refl. + split. assumption. + split. reflexivity. + intros. reflexivity. +Qed. + +Lemma transl_exprlist_Econs_correct: + forall (sp: val) (le : letenv) (e : env) (m : mem) + (a : expr) (bl : exprlist) (e1 : env) (m1 : mem) + (v : val) (e2 : env) (m2 : mem) (vl : list val), + eval_expr ge sp le e m a e1 m1 v -> + transl_expr_correct sp le e m a e1 m1 v -> + eval_exprlist ge sp le e1 m1 bl e2 m2 vl -> + transl_exprlist_correct sp le e1 m1 bl e2 m2 vl -> + transl_exprlist_correct sp le e m (Econs a bl) e2 m2 (v :: vl). +Proof. + intros. red. intros. + inversion TRG. + rewrite <- H10 in TE. simpl in TE. monadInv TE. intro EQ1. + simpl in MUT. + assert (MUT1: incl (mutated_expr a) mut); eauto with coqlib. + assert (MUT2: incl (mutated_exprlist bl) mut); eauto with coqlib. + assert (MWF1: map_wf map s1); eauto with rtlg. + assert (TRG1: target_reg_ok s1 map mut a r); eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ _ MWF1 EQ1 ME MUT1 TRG1). + intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + generalize (H2 _ _ _ _ _ _ _ _ MWF EQ ME1 MUT2 H11). + intros [rs2 [EX2 [ME2 [RES2 OTHER2]]]]. + exists rs2. +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s1. eauto with rtlg. assumption. +(* Match-env *) + split. assumption. +(* Results *) + split. simpl. rewrite RES2. rewrite OTHER2. rewrite RES1. + reflexivity. + eauto with rtlg. + eauto with rtlg. + assumption. +(* Other regs *) + simpl. intros. + transitivity (rs1#r0). + apply OTHER2; auto. tauto. + apply OTHER1; auto. eauto with rtlg. + elim H14; intro. left; assumption. right; apply sym_not_equal; tauto. +Qed. + +Lemma transl_funcall_correct: + forall (m : mem) (f : Cminor.function) (vargs : list val) + (m1 : mem) (sp : block) (e e2 : env) (m2 : mem) + (out : outcome) (vres : val), + Mem.alloc m 0 (fn_stackspace f) = (m1, sp) -> + set_locals (Cminor.fn_vars f) (set_params vargs (Cminor.fn_params f)) = e -> + exec_stmtlist ge (Vptr sp Int.zero) e m1 (fn_body f) e2 m2 out -> + transl_stmtlist_correct (Vptr sp Int.zero) e m1 (fn_body f) e2 m2 out -> + outcome_result_value out f.(Cminor.fn_sig).(sig_res) vres -> + transl_function_correct m f vargs (Mem.free m2 sp) vres. +Proof. + intros; red; intros. + generalize TE. unfold transl_function. + caseEq (transl_fun f init_state). + intros; discriminate. + intros ns s. unfold transl_fun. monadSimpl. + subst ns. intros EQ4. injection EQ4; intro TF; clear EQ4. + subst s4. + + pose (rs := init_regs vargs x). + assert (ME: match_env y0 e nil rs). + rewrite <- H0. unfold rs. + eapply match_init_env_init_reg; eauto. + + assert (OUT: outcome_node out n nil n n). + red. generalize H3. unfold outcome_result_value. + destruct out; contradiction || auto. + + assert (MWF1: map_wf y0 s1). + eapply add_vars_wf. eexact EQ0. + eapply add_vars_wf. eexact EQ. + apply init_mapping_wf. + + assert (MWF: map_wf y0 s3). + apply map_wf_incr with s1. apply state_incr_trans with s2; eauto with rtlg. + assumption. + + assert (RRG: return_reg_ok s3 y0 (ret_reg (Cminor.fn_sig f) r)). + apply return_reg_ok_incr with s2. eauto with rtlg. + apply new_reg_return_ok with s1; assumption. + + generalize (H2 _ _ _ _ _ _ _ _ _ rs MWF EQ3 ME OUT RRG). + intros [rs1 [EX1 [ME1 MR1]]]. + + apply exec_funct with m1 n rs1 (ret_reg (Cminor.fn_sig f) r). + rewrite <- TF; simpl; assumption. + rewrite <- TF; simpl. exact EX1. + rewrite <- TF; simpl. apply instr_at_incr with s3. + eauto with rtlg. discriminate. eauto with rtlg. + generalize MR1. unfold match_return_outcome. + generalize H3. unfold outcome_result_value. + unfold ret_reg; destruct (sig_res (Cminor.fn_sig f)). + unfold regmap_optget. destruct out; try contradiction. + destruct o; try contradiction. intros; congruence. + unfold regmap_optget. destruct out; contradiction||auto. + destruct o; contradiction||auto. +Qed. + +Lemma transl_stmt_Sexpr_correct: + forall (sp: val) (e : env) (m : mem) (a : expr) + (e1 : env) (m1 : mem) (v : val), + eval_expr ge sp nil e m a e1 m1 v -> + transl_expr_correct sp nil e m a e1 m1 v -> + transl_stmt_correct sp e m (Sexpr a) e1 m1 Out_normal. +Proof. + intros; red; intros. + simpl in OUT. subst nd. + unfold transl_stmt in TE. monadInv TE. intro EQ1. + assert (MWF0: map_wf map s0). eauto with rtlg. + assert (TRG: target_reg_ok s0 map (mutated_expr a) a r). eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ _ MWF0 EQ1 ME (incl_refl (mutated_expr a)) TRG). + intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + exists rs1; simpl; tauto. +Qed. + +Lemma transl_stmt_Sifthenelse_correct: + forall (sp: val) (e : env) (m : mem) (a : condexpr) + (sl1 sl2 : stmtlist) (e1 : env) (m1 : mem) + (v1 : bool) (e2 : env) (m2 : mem) (out : outcome), + eval_condexpr ge sp nil e m a e1 m1 v1 -> + transl_condition_correct sp nil e m a e1 m1 v1 -> + exec_stmtlist ge sp e1 m1 (if v1 then sl1 else sl2) e2 m2 out -> + transl_stmtlist_correct sp e1 m1 (if v1 then sl1 else sl2) e2 m2 out -> + transl_stmt_correct sp e m (Sifthenelse a sl1 sl2) e2 m2 out. +Proof. + intros; red; intros until rs; intro MWF. + simpl. case (more_likely a sl1 sl2); monadSimpl; intro EQ2; intros. + assert (MWF1: map_wf map s1). eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ rs MWF1 EQ2 ME (incl_refl _)). + intros [rs1 [EX1 [ME1 OTHER1]]]. + destruct v1. + assert (MWF0: map_wf map s0). eauto with rtlg. + assert (RRG0: return_reg_ok s0 map rret). eauto with rtlg. + generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF0 EQ0 ME1 OUT RRG0). + intros [rs2 [EX2 [ME2 MRE2]]]. + exists rs2. split. + eapply exec_trans. eexact EX1. apply exec_instrs_incr with s1. + eauto with rtlg. exact EX2. + tauto. + generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF EQ ME1 OUT RRG). + intros [rs2 [EX2 [ME2 MRE2]]]. + exists rs2. split. + eapply exec_trans. eexact EX1. apply exec_instrs_incr with s0. + eauto with rtlg. exact EX2. + tauto. + + assert (MWF1: map_wf map s1). eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ rs MWF1 EQ2 ME (incl_refl _)). + intros [rs1 [EX1 [ME1 OTHER1]]]. + destruct v1. + generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF EQ ME1 OUT RRG). + intros [rs2 [EX2 [ME2 MRE2]]]. + exists rs2. split. + eapply exec_trans. eexact EX1. apply exec_instrs_incr with s0. + eauto with rtlg. exact EX2. + tauto. + assert (MWF0: map_wf map s0). eauto with rtlg. + assert (RRG0: return_reg_ok s0 map rret). eauto with rtlg. + generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF0 EQ0 ME1 OUT RRG0). + intros [rs2 [EX2 [ME2 MRE2]]]. + exists rs2. split. + eapply exec_trans. eexact EX1. apply exec_instrs_incr with s1. + eauto with rtlg. exact EX2. + tauto. +Qed. + +Lemma transl_stmt_Sloop_loop_correct: + forall (sp: val) (e : env) (m : mem) (sl : stmtlist) + (e1 : env) (m1 : mem) (e2 : env) (m2 : mem) + (out : outcome), + exec_stmtlist ge sp e m sl e1 m1 Out_normal -> + transl_stmtlist_correct sp e m sl e1 m1 Out_normal -> + exec_stmt ge sp e1 m1 (Sloop sl) e2 m2 out -> + transl_stmt_correct sp e1 m1 (Sloop sl) e2 m2 out -> + transl_stmt_correct sp e m (Sloop sl) e2 m2 out. +Proof. + intros; red; intros. + generalize TE. simpl. monadSimpl. subst s2; subst n0. intros. + assert (MWF0: map_wf map s0). apply map_wf_incr with s. + eapply reserve_instr_incr; eauto. + assumption. + assert (OUT0: outcome_node Out_normal n nexits nret n). + unfold outcome_node. auto. + assert (RRG0: return_reg_ok s0 map rret). + apply return_reg_ok_incr with s. + eapply reserve_instr_incr; eauto. + assumption. + generalize (H0 _ _ _ _ _ _ _ _ _ _ MWF0 EQ0 ME OUT0 RRG0). + intros [rs1 [EX1 [ME1 MR1]]]. + generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF TE ME1 OUT RRG). + intros [rs2 [EX2 [ME2 MR2]]]. + exists rs2. + split. eapply exec_trans. + apply exec_instrs_extends with s1. + eapply update_instr_extends. + eexact EQ. eauto with rtlg. eexact EQ1. eexact EX1. + apply exec_trans with ns rs1 m1. + apply exec_one. apply exec_Inop. + generalize EQ1. unfold update_instr. + case (plt n (st_nextnode s1)); intro; monadSimpl. + rewrite <- H4. simpl. apply PTree.gss. + exact EX2. + tauto. +Qed. + +Lemma transl_stmt_Sloop_stop_correct: + forall (sp: val) (e : env) (m : mem) (sl : stmtlist) + (e1 : env) (m1 : mem) (out : outcome), + exec_stmtlist ge sp e m sl e1 m1 out -> + transl_stmtlist_correct sp e m sl e1 m1 out -> + out <> Out_normal -> + transl_stmt_correct sp e m (Sloop sl) e1 m1 out. +Proof. + intros; red; intros. + simpl in TE. monadInv TE. subst s2; subst n0. + assert (MWF0: map_wf map s0). apply map_wf_incr with s. + eapply reserve_instr_incr; eauto. assumption. + assert (OUT0: outcome_node out n nexits nret nd). + generalize OUT. unfold outcome_node. + destruct out; auto. elim H1; auto. + assert (RRG0: return_reg_ok s0 map rret). + apply return_reg_ok_incr with s. + eapply reserve_instr_incr; eauto. + assumption. + generalize (H0 _ _ _ _ _ _ _ _ _ _ MWF0 EQ0 ME OUT0 RRG0). + intros [rs1 [EX1 [ME1 MR1]]]. + exists rs1. split. + apply exec_instrs_extends with s1. + eapply update_instr_extends. + eexact EQ. eauto with rtlg. eexact EQ1. eexact EX1. + tauto. +Qed. + +Lemma transl_stmt_Sblock_correct: + forall (sp: val) (e : env) (m : mem) (sl : stmtlist) + (e1 : env) (m1 : mem) (out : outcome), + exec_stmtlist ge sp e m sl e1 m1 out -> + transl_stmtlist_correct sp e m sl e1 m1 out -> + transl_stmt_correct sp e m (Sblock sl) e1 m1 (outcome_block out). +Proof. + intros; red; intros. simpl in TE. + assert (OUT': outcome_node out ncont (ncont :: nexits) nret nd). + generalize OUT. unfold outcome_node, outcome_block. + destruct out. + auto. + destruct n. simpl. intro; unfold value; congruence. + simpl. auto. + auto. + generalize (H0 _ _ _ _ _ _ _ _ _ _ MWF TE ME OUT' RRG). + intros [rs1 [EX1 [ME1 MR1]]]. + exists rs1. + split. assumption. + split. assumption. + generalize MR1. unfold match_return_outcome, outcome_block. + destruct out; auto. + destruct n; simpl; auto. +Qed. + +Lemma transl_stmt_Sexit_correct: + forall (sp: val) (e : env) (m : mem) (n : nat), + transl_stmt_correct sp e m (Sexit n) e m (Out_exit n). +Proof. + intros; red; intros. + generalize TE. simpl. caseEq (nth_error nexits n). + intros n' NE. monadSimpl. subst n'. + simpl in OUT. assert (ns = nd). congruence. subst ns. + exists rs. intuition. apply exec_refl. + intro. monadSimpl. +Qed. + +Lemma transl_stmt_Sreturn_none_correct: + forall (sp: val) (e : env) (m : mem), + transl_stmt_correct sp e m (Sreturn None) e m (Out_return None). +Proof. + intros; red; intros. generalize TE. simpl. + destruct rret; monadSimpl. + simpl in OUT. subst ns; subst nret. + exists rs. intuition. apply exec_refl. +Qed. + +Lemma transl_stmt_Sreturn_some_correct: + forall (sp: val) (e : env) (m : mem) (a : expr) + (e1 : env) (m1 : mem) (v : val), + eval_expr ge sp nil e m a e1 m1 v -> + transl_expr_correct sp nil e m a e1 m1 v -> + transl_stmt_correct sp e m (Sreturn (Some a)) e1 m1 (Out_return (Some v)). +Proof. + intros; red; intros. generalize TE; simpl. + destruct rret. intro EQ. + assert (TRG: target_reg_ok s map (mutated_expr a) a r). + inversion RRG. apply target_reg_other; auto. + generalize (H0 _ _ _ _ _ _ _ _ MWF EQ ME (incl_refl _) TRG). + intros [rs1 [EX1 [ME1 [RES1 OTHER1]]]]. + simpl in OUT. subst nd. exists rs1. tauto. + + monadSimpl. +Qed. + +Lemma transl_stmtlist_Snil_correct: + forall (sp: val) (e : env) (m : mem), + transl_stmtlist_correct sp e m Snil e m Out_normal. +Proof. + intros; red; intros. simpl in TE. monadInv TE. + subst s'; subst ns. + simpl in OUT. subst ncont. + exists rs. simpl. intuition. apply exec_refl. +Qed. + +Lemma transl_stmtlist_Scons_continue_correct: + forall (sp: val) (e : env) (m : mem) (s : stmt) + (sl : stmtlist) (e1 : env) (m1 : mem) (e2 : env) + (m2 : mem) (out : outcome), + exec_stmt ge sp e m s e1 m1 Out_normal -> + transl_stmt_correct sp e m s e1 m1 Out_normal -> + exec_stmtlist ge sp e1 m1 sl e2 m2 out -> + transl_stmtlist_correct sp e1 m1 sl e2 m2 out -> + transl_stmtlist_correct sp e m (Scons s sl) e2 m2 out. +Proof. + intros; red; intros. simpl in TE; monadInv TE. intro EQ0. + assert (MWF1: map_wf map s1). eauto with rtlg. + assert (OUTs: outcome_node Out_normal n nexits nret n). + simpl. auto. + assert (RRG1: return_reg_ok s1 map rret). eauto with rtlg. + generalize (H0 _ _ _ _ _ _ _ _ _ _ MWF1 EQ0 ME OUTs RRG1). + intros [rs1 [EX1 [ME1 MR1]]]. + generalize (H2 _ _ _ _ _ _ _ _ _ _ MWF EQ ME1 OUT RRG). + intros [rs2 [EX2 [ME2 MR2]]]. + exists rs2. +(* Exec *) + split. eapply exec_trans. eexact EX1. + apply exec_instrs_incr with s1. eauto with rtlg. + exact EX2. +(* Match-env + return *) + tauto. +Qed. + +Lemma transl_stmtlist_Scons_stop_correct: + forall (sp: val) (e : env) (m : mem) (s : stmt) + (sl : stmtlist) (e1 : env) (m1 : mem) (out : outcome), + exec_stmt ge sp e m s e1 m1 out -> + transl_stmt_correct sp e m s e1 m1 out -> + out <> Out_normal -> + transl_stmtlist_correct sp e m (Scons s sl) e1 m1 out. +Proof. + intros; red; intros. + simpl in TE; monadInv TE. intro EQ0; clear TE. + assert (MWF1: map_wf map s1). eauto with rtlg. + assert (OUTs: outcome_node out n nexits nret nd). + destruct out; simpl; auto. tauto. + assert (RRG1: return_reg_ok s1 map rret). eauto with rtlg. + exact (H0 _ _ _ _ _ _ _ _ _ _ MWF1 EQ0 ME OUTs RRG1). +Qed. + +(** The correctness of the translation then follows by application + of the mutual induction principle for Cminor evaluation derivations + to the lemmas above. *) + +Scheme eval_expr_ind_6 := Minimality for eval_expr Sort Prop + with eval_condexpr_ind_6 := Minimality for eval_condexpr Sort Prop + with eval_exprlist_ind_6 := Minimality for eval_exprlist Sort Prop + with eval_funcall_ind_6 := Minimality for eval_funcall Sort Prop + with exec_stmt_ind_6 := Minimality for exec_stmt Sort Prop + with exec_stmtlist_ind_6 := Minimality for exec_stmtlist Sort Prop. + +Theorem transl_function_correctness: + forall m f vargs m' vres, + eval_funcall ge m f vargs m' vres -> + transl_function_correct m f vargs m' vres. +Proof + (eval_funcall_ind_6 ge + transl_expr_correct + transl_condition_correct + transl_exprlist_correct + transl_function_correct + transl_stmt_correct + transl_stmtlist_correct + + transl_expr_Evar_correct + transl_expr_Eassign_correct + transl_expr_Eop_correct + transl_expr_Eload_correct + transl_expr_Estore_correct + transl_expr_Ecall_correct + transl_expr_Econdition_correct + transl_expr_Elet_correct + transl_expr_Eletvar_correct + transl_condition_CEtrue_correct + transl_condition_CEfalse_correct + transl_condition_CEcond_correct + transl_condition_CEcondition_correct + transl_exprlist_Enil_correct + transl_exprlist_Econs_correct + transl_funcall_correct + transl_stmt_Sexpr_correct + transl_stmt_Sifthenelse_correct + transl_stmt_Sloop_loop_correct + transl_stmt_Sloop_stop_correct + transl_stmt_Sblock_correct + transl_stmt_Sexit_correct + transl_stmt_Sreturn_none_correct + transl_stmt_Sreturn_some_correct + transl_stmtlist_Snil_correct + transl_stmtlist_Scons_continue_correct + transl_stmtlist_Scons_stop_correct). + +Theorem transl_program_correct: + forall (r: val), + Cminor.exec_program prog r -> + RTL.exec_program tprog r. +Proof. + intros r [b [f [m [SYMB [FUNC [SIG EVAL]]]]]]. + generalize (function_ptr_translated _ _ FUNC). + intros [tf [TFIND TRANSLF]]. + red; exists b; exists tf; exists m. + split. rewrite symbols_preserved. + replace (prog_main tprog) with (prog_main prog). + assumption. + symmetry; apply transform_partial_program_main with transl_function. + exact TRANSL. + split. exact TFIND. + split. generalize TRANSLF. unfold transl_function. + destruct (transl_fun f init_state). + intro; discriminate. destruct p; intro EQ; injection EQ; intro EQ1. + rewrite <- EQ1. simpl. congruence. + rewrite (Genv.init_mem_transf_partial transl_function prog TRANSL). + exact (transl_function_correctness _ _ _ _ _ EVAL _ TRANSLF). +Qed. + +End CORRECTNESS. diff --git a/backend/RTLgenproof1.v b/backend/RTLgenproof1.v new file mode 100644 index 00000000..4a8ad43f --- /dev/null +++ b/backend/RTLgenproof1.v @@ -0,0 +1,1463 @@ +(** Correctness proof for RTL generation: auxiliary results. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Registers. +Require Import Cminor. +Require Import RTL. +Require Import RTLgen. + +(** * Reasoning about monadic computations *) + +(** The tactics below simplify hypotheses of the form [f ... = OK x s] + where [f] is a monadic computation. For instance, the hypothesis + [(do x <- a; b) s = OK y s'] will generate the additional witnesses + [x], [s1] and the additional hypotheses + [a s = OK x s1] and [b x s1 = OK y s'], reflecting the fact that + both monadic computations [a] and [b] succeeded. +*) + +Ltac monadSimpl1 := + match goal with + | [ |- (bind ?F ?G ?S = OK ?X ?S') -> _ ] => + unfold bind at 1; + generalize (refl_equal (F S)); + pattern (F S) at -1 in |- *; + case (F S); + [ intro; intro; discriminate + | (let s := fresh "s" in + (let EQ := fresh "EQ" in + (intro; intros s EQ; + try monadSimpl1))) ] + | [ |- (bind2 ?F ?G ?S = OK ?X ?S') -> _ ] => + unfold bind2 at 1; unfold bind at 1; + generalize (refl_equal (F S)); + pattern (F S) at -1 in |- *; + case (F S); + [ intro; intro; discriminate + | let xy := fresh "xy" in + (let x := fresh "x" in + (let y := fresh "y" in + (let s := fresh "s" in + (let EQ := fresh "EQ" in + (intros xy s EQ; destruct xy as [x y]; simpl; + try monadSimpl1))))) ] + | [ |- (error _ _ = OK _ _) -> _ ] => + unfold error; monadSimpl1 + | [ |- (ret _ _ = OK _ _) -> _ ] => + unfold ret; monadSimpl1 + | [ |- (Error _ = OK _ _) -> _ ] => + intro; discriminate + | [ |- (OK _ _ = OK _ _) -> _ ] => + let h := fresh "H" in + (intro h; injection h; intro; intro; clear h) + end. + +Ltac monadSimpl := + match goal with + | [ |- (bind ?F ?G ?S = OK ?X ?S') -> _ ] => monadSimpl1 + | [ |- (bind2 ?F ?G ?S = OK ?X ?S') -> _ ] => monadSimpl1 + | [ |- (error _ _ = OK _ _) -> _ ] => monadSimpl1 + | [ |- (ret _ _ = OK _ _) -> _ ] => monadSimpl1 + | [ |- (Error _ = OK _ _) -> _ ] => monadSimpl1 + | [ |- (OK _ _ = OK _ _) -> _ ] => monadSimpl1 + | [ |- (?F _ _ _ _ _ _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1 + | [ |- (?F _ _ _ _ _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1 + | [ |- (?F _ _ _ _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1 + | [ |- (?F _ _ _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1 + | [ |- (?F _ _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1 + | [ |- (?F _ _ = OK _ _) -> _ ] => unfold F; monadSimpl1 + | [ |- (?F _ = OK _ _) -> _ ] => unfold F; monadSimpl1 + end. + +Ltac monadInv H := + generalize H; monadSimpl. + +(** * Monotonicity properties of the state *) + +(** Operations over the global state satisfy a crucial monotonicity property: + nodes are only added to the CFG, but never removed nor their instructions + changed; similarly, fresh nodes and fresh registers are only consumed, + but never reused. This property is captured by the following predicate + over states, which we show is a partial order. *) + +Inductive state_incr: state -> state -> Prop := + state_incr_intro: + forall (s1 s2: state), + Ple s1.(st_nextnode) s2.(st_nextnode) -> + Ple s1.(st_nextreg) s2.(st_nextreg) -> + (forall pc, Plt pc s1.(st_nextnode) -> s2.(st_code)!pc = s1.(st_code)!pc) -> + state_incr s1 s2. + +Lemma instr_at_incr: + forall s1 s2 n i, + s1.(st_code)!n = i -> i <> None -> state_incr s1 s2 -> + s2.(st_code)!n = i. +Proof. + intros. inversion H1. + rewrite <- H. apply H4. elim (st_wf s1 n); intro. + assumption. elim H0. congruence. +Qed. + +Lemma state_incr_refl: + forall s, state_incr s s. +Proof. + intros. apply state_incr_intro. + apply Ple_refl. apply Ple_refl. intros; auto. +Qed. +Hint Resolve state_incr_refl: rtlg. + +Lemma state_incr_trans: + forall s1 s2 s3, state_incr s1 s2 -> state_incr s2 s3 -> state_incr s1 s3. +Proof. + intros. inversion H. inversion H0. apply state_incr_intro. + apply Ple_trans with (st_nextnode s2); assumption. + apply Ple_trans with (st_nextreg s2); assumption. + intros. transitivity (s2.(st_code)!pc). + apply H8. apply Plt_Ple_trans with s1.(st_nextnode); auto. + apply H3; auto. +Qed. +Hint Resolve state_incr_trans: rtlg. + +Lemma state_incr_trans2: + forall s1 s2 s3 s4, + state_incr s1 s2 -> state_incr s2 s3 -> state_incr s3 s4 -> + state_incr s1 s4. +Proof. + intros; eauto with rtlg. +Qed. + +Lemma state_incr_trans3: + forall s1 s2 s3 s4 s5, + state_incr s1 s2 -> state_incr s2 s3 -> state_incr s3 s4 -> state_incr s4 s5 -> + state_incr s1 s5. +Proof. + intros; eauto with rtlg. +Qed. + +Lemma state_incr_trans4: + forall s1 s2 s3 s4 s5 s6, + state_incr s1 s2 -> state_incr s2 s3 -> state_incr s3 s4 -> + state_incr s4 s5 -> state_incr s5 s6 -> + state_incr s1 s6. +Proof. + intros; eauto with rtlg. +Qed. + +Lemma state_incr_trans5: + forall s1 s2 s3 s4 s5 s6 s7, + state_incr s1 s2 -> state_incr s2 s3 -> state_incr s3 s4 -> + state_incr s4 s5 -> state_incr s5 s6 -> state_incr s6 s7 -> + state_incr s1 s7. +Proof. + intros; eauto 6 with rtlg. +Qed. + +Lemma state_incr_trans6: + forall s1 s2 s3 s4 s5 s6 s7 s8, + state_incr s1 s2 -> state_incr s2 s3 -> state_incr s3 s4 -> + state_incr s4 s5 -> state_incr s5 s6 -> state_incr s6 s7 -> + state_incr s7 s8 -> state_incr s1 s8. +Proof. + intros; eauto 7 with rtlg. +Qed. + +(** The following relation between two states is a weaker version of + [state_incr]. It permits changing the contents at an already reserved + graph node, but only from [None] to [Some i]. *) + +Definition state_extends (s1 s2: state): Prop := + forall pc, + s1.(st_code)!pc = None \/ s2.(st_code)!pc = s1.(st_code)!pc. + +Lemma state_incr_extends: + forall s1 s2, + state_incr s1 s2 -> state_extends s1 s2. +Proof. + unfold state_extends; intros. inversion H. + case (plt pc s1.(st_nextnode)); intro. + right; apply H2; auto. + left. elim (s1.(st_wf) pc); intro. + elim (n H5). auto. +Qed. +Hint Resolve state_incr_extends. + +(** A crucial property of states is the following: if an RTL execution + is possible (does not get stuck) in the CFG of a given state [s1] + the same execution is possible and leads to the same results in + the CFG of any state [s2] that extends [s1] in the sense of the + [state_extends] predicate. *) + +Section EXEC_INSTR_EXTENDS. + +Variable s1 s2: state. +Hypothesis EXT: state_extends s1 s2. + +Lemma exec_instr_not_halt: + forall ge c sp pc rs m pc' rs' m', + exec_instr ge c sp pc rs m pc' rs' m' -> c!pc <> None. +Proof. + induction 1; rewrite H; discriminate. +Qed. + +Lemma exec_instr_in_s2: + forall ge sp pc rs m pc' rs' m', + exec_instr ge s1.(st_code) sp pc rs m pc' rs' m' -> + s2.(st_code)!pc = s1.(st_code)!pc. +Proof. + intros. + elim (EXT pc); intro. + elim (exec_instr_not_halt _ _ _ _ _ _ _ _ _ H H0). + assumption. +Qed. + +Lemma exec_instr_extends_rec: + forall ge c sp pc rs m pc' rs' m', + exec_instr ge c sp pc rs m pc' rs' m' -> + forall c', c!pc = c'!pc -> + exec_instr ge c' sp pc rs m pc' rs' m'. +Proof. + induction 1; intros. + apply exec_Inop. congruence. + apply exec_Iop with op args. congruence. auto. + apply exec_Iload with chunk addr args a. congruence. auto. auto. + apply exec_Istore with chunk addr args src a. + congruence. auto. auto. + apply exec_Icall with sig ros args f; auto. congruence. + apply exec_Icond_true with cond args ifnot; auto. congruence. + apply exec_Icond_false with cond args ifso; auto. congruence. +Qed. + +Lemma exec_instr_extends: + forall ge sp pc rs m pc' rs' m', + exec_instr ge s1.(st_code) sp pc rs m pc' rs' m' -> + exec_instr ge s2.(st_code) sp pc rs m pc' rs' m'. +Proof. + intros. + apply exec_instr_extends_rec with (st_code s1). + assumption. + symmetry. eapply exec_instr_in_s2. eexact H. +Qed. + +Lemma exec_instrs_extends_rec: + forall ge c sp pc rs m pc' rs' m', + exec_instrs ge c sp pc rs m pc' rs' m' -> + c = s1.(st_code) -> + exec_instrs ge s2.(st_code) sp pc rs m pc' rs' m'. +Proof. + induction 1; intros. + apply exec_refl. + apply exec_one. apply exec_instr_extends; auto. rewrite <- H0; auto. + apply exec_trans with pc2 rs2 m2; auto. +Qed. + +Lemma exec_instrs_extends: + forall ge sp pc rs m pc' rs' m', + exec_instrs ge s1.(st_code) sp pc rs m pc' rs' m' -> + exec_instrs ge s2.(st_code) sp pc rs m pc' rs' m'. +Proof. + intros. + apply exec_instrs_extends_rec with (st_code s1); auto. +Qed. + +End EXEC_INSTR_EXTENDS. + +(** Since [state_incr s1 s2] implies [state_extends s1 s2], we also have + that any RTL execution possible in the CFG of [s1] is also possible + in the CFG of [s2], provided that [state_incr s1 s2]. + In particular, any RTL execution that is possible in a partially + constructed CFG remains possible in the final CFG obtained at + the end of the translation of the current function. *) + +Section EXEC_INSTR_INCR. + +Variable s1 s2: state. +Hypothesis INCR: state_incr s1 s2. + +Lemma exec_instr_incr: + forall ge sp pc rs m pc' rs' m', + exec_instr ge s1.(st_code) sp pc rs m pc' rs' m' -> + exec_instr ge s2.(st_code) sp pc rs m pc' rs' m'. +Proof. + intros. + apply exec_instr_extends with s1. + apply state_incr_extends; auto. + auto. +Qed. + +Lemma exec_instrs_incr: + forall ge sp pc rs m pc' rs' m', + exec_instrs ge s1.(st_code) sp pc rs m pc' rs' m' -> + exec_instrs ge s2.(st_code) sp pc rs m pc' rs' m'. +Proof. + intros. + apply exec_instrs_extends with s1. + apply state_incr_extends; auto. + auto. +Qed. + +End EXEC_INSTR_INCR. + +(** * Validity and freshness of registers *) + +(** An RTL pseudo-register is valid in a given state if it was created + earlier, that is, it is less than the next fresh register of the state. + Otherwise, the pseudo-register is said to be fresh. *) + +Definition reg_valid (r: reg) (s: state) : Prop := + Plt r s.(st_nextreg). + +Definition reg_fresh (r: reg) (s: state) : Prop := + ~(Plt r s.(st_nextreg)). + +Lemma valid_fresh_absurd: + forall r s, reg_valid r s -> reg_fresh r s -> False. +Proof. + intros r s. unfold reg_valid, reg_fresh; case r; tauto. +Qed. +Hint Resolve valid_fresh_absurd: rtlg. + +Lemma valid_fresh_different: + forall r1 r2 s, reg_valid r1 s -> reg_fresh r2 s -> r1 <> r2. +Proof. + unfold not; intros. subst r2. eauto with rtlg. +Qed. +Hint Resolve valid_fresh_different: rtlg. + +Lemma reg_valid_incr: + forall r s1 s2, state_incr s1 s2 -> reg_valid r s1 -> reg_valid r s2. +Proof. + intros r s1 s2 INCR. + inversion INCR. + unfold reg_valid. intros; apply Plt_Ple_trans with (st_nextreg s1); auto. +Qed. +Hint Resolve reg_valid_incr: rtlg. + +Lemma reg_fresh_decr: + forall r s1 s2, state_incr s1 s2 -> reg_fresh r s2 -> reg_fresh r s1. +Proof. + intros r s1 s2 INCR. inversion INCR. + unfold reg_fresh; unfold not; intros. + apply H4. apply Plt_Ple_trans with (st_nextreg s1); auto. +Qed. +Hint Resolve reg_fresh_decr: rtlg. + +(** * Well-formedness of compilation environments *) + +(** A compilation environment (mapping) is well-formed in a given state if + the following properties hold: +- The registers associated with Cminor local variables and let-bound variables + are valid in the state. +- Two distinct Cminor local variables are mapped to distinct pseudo-registers. +- A Cminor local variable and a let-bound variable are mapped to + distinct pseudo-registers. +*) + +Record map_wf (m: mapping) (s: state) : Prop := + mk_map_wf { + map_wf_var_valid: + (forall id r, m.(map_vars)!id = Some r -> reg_valid r s); + map_wf_letvar_valid: + (forall r, In r m.(map_letvars) -> reg_valid r s); + map_wf_inj: + (forall id1 id2 r, + m.(map_vars)!id1 = Some r -> m.(map_vars)!id2 = Some r -> id1 = id2); + map_wf_disj: + (forall id r, + m.(map_vars)!id = Some r -> In r m.(map_letvars) -> False) + }. +Hint Resolve map_wf_var_valid + map_wf_letvar_valid + map_wf_inj map_wf_disj: rtlg. + +Lemma map_wf_incr: + forall s1 s2 m, + state_incr s1 s2 -> map_wf m s1 -> map_wf m s2. +Proof. + intros. apply mk_map_wf; intros; eauto with rtlg. +Qed. +Hint Resolve map_wf_incr: rtlg. + +(** A register is ``in'' a mapping if it is associated with a Cminor + local or let-bound variable. *) + +Definition reg_in_map (m: mapping) (r: reg) : Prop := + (exists id, m.(map_vars)!id = Some r) \/ In r m.(map_letvars). + +Lemma reg_in_map_valid: + forall m s r, + map_wf m s -> reg_in_map m r -> reg_valid r s. +Proof. + intros. elim H0. + intros [id EQ]. eauto with rtlg. + intro IN. eauto with rtlg. +Qed. +Hint Resolve reg_in_map_valid: rtlg. + +(** A register is mutated if it is associated with a Cminor local variable + that belongs to the given set of mutated variables. *) + +Definition mutated_reg (map: mapping) (mut: list ident) (r: reg) : Prop := + exists id, In id mut /\ map.(map_vars)!id = Some r. + +Lemma mutated_reg_in_map: + forall map mut r, mutated_reg map mut r -> reg_in_map map r. +Proof. + intros. elim H. intros id [IN EQ]. + left. exists id; auto. +Qed. +Hint Resolve mutated_reg_in_map: rtlg. + +(** * Properties of basic operations over the state *) + +(** Properties of [add_instr]. *) + +Lemma add_instr_incr: + forall s1 s2 i n, + add_instr i s1 = OK n s2 -> state_incr s1 s2. +Proof. + intros until n; monadSimpl. + subst s2; apply state_incr_intro; simpl. + apply Ple_succ. + apply Ple_refl. + intros. apply PTree.gso; apply Plt_ne; auto. +Qed. +Hint Resolve add_instr_incr: rtlg. + +Lemma add_instr_at: + forall s1 s2 i n, + add_instr i s1 = OK n s2 -> s2.(st_code)!n = Some i. +Proof. + intros until n; monadSimpl. + subst n; subst s2; simpl. apply PTree.gss. +Qed. +Hint Resolve add_instr_at. + +(** Properties of [reserve_instr] and [update_instr]. *) + +Lemma reserve_instr_incr: + forall s1 s2 n, + reserve_instr s1 = OK n s2 -> state_incr s1 s2. +Proof. + intros until n; monadSimpl. subst s2. + apply state_incr_intro; simpl. + apply Ple_succ. + apply Ple_refl. + auto. +Qed. + +Lemma update_instr_incr: + forall s1 s2 s3 s4 i n t, + reserve_instr s1 = OK n s2 -> + state_incr s2 s3 -> + update_instr n i s3 = OK t s4 -> + state_incr s1 s4. +Proof. + intros. + monadInv H. + generalize H1; unfold update_instr. + case (plt n (st_nextnode s3)); intro. + monadSimpl. inversion H0. + subst s4; apply state_incr_intro; simpl. + apply Plt_Ple. apply Plt_Ple_trans with (st_nextnode s2). + subst s2; simpl; apply Plt_succ. assumption. + rewrite <- H3 in H7; simpl in H7. assumption. + intros. rewrite PTree.gso. + rewrite <- H3 in H8; simpl in H8. apply H8. + apply Plt_trans_succ; assumption. + subst n; apply Plt_ne; assumption. + intros; discriminate. +Qed. + +Lemma update_instr_extends: + forall s1 s2 s3 s4 i n t, + reserve_instr s1 = OK n s2 -> + state_incr s2 s3 -> + update_instr n i s3 = OK t s4 -> + state_extends s3 s4. +Proof. + intros. + monadInv H. + red; intros. + case (peq pc n); intro. + subst pc. left. inversion H0. rewrite H6. + rewrite <- H3; simpl. + elim (s1.(st_wf) n); intro. + rewrite <- H4 in H9. elim (Plt_strict _ H9). + auto. + rewrite <- H4. rewrite <- H3; simpl. apply Plt_succ. + generalize H1; unfold update_instr. + case (plt n s3.(st_nextnode)); intro; monadSimpl. + right; rewrite <- H5; simpl. apply PTree.gso; auto. +Qed. + +(** Properties of [new_reg]. *) + +Lemma new_reg_incr: + forall s1 s2 r, new_reg s1 = OK r s2 -> state_incr s1 s2. +Proof. + intros until r. monadSimpl. + subst s2; apply state_incr_intro; simpl. + apply Ple_refl. apply Ple_succ. auto. +Qed. +Hint Resolve new_reg_incr: rtlg. + +Lemma new_reg_valid: + forall s1 s2 r, + new_reg s1 = OK r s2 -> reg_valid r s2. +Proof. + intros until r. monadSimpl. subst s2; subst r. + unfold reg_valid; unfold reg_valid; simpl. + apply Plt_succ. +Qed. +Hint Resolve new_reg_valid: rtlg. + +Lemma new_reg_fresh: + forall s1 s2 r, + new_reg s1 = OK r s2 -> reg_fresh r s1. +Proof. + intros until r. monadSimpl. subst s2; subst r. + unfold reg_fresh; simpl. + exact (Plt_strict _). +Qed. +Hint Resolve new_reg_fresh: rtlg. + +Lemma new_reg_not_in_map: + forall s1 s2 m r, + new_reg s1 = OK r s2 -> map_wf m s1 -> ~(reg_in_map m r). +Proof. + unfold not; intros; eauto with rtlg. +Qed. +Hint Resolve new_reg_not_in_map: rtlg. + +Lemma new_reg_not_mutated: + forall s1 s2 m mut r, + new_reg s1 = OK r s2 -> map_wf m s1 -> ~(mutated_reg m mut r). +Proof. + unfold not; intros. + generalize (mutated_reg_in_map _ _ _ H1); intro. + exact (new_reg_not_in_map _ _ _ _ H H0 H2). +Qed. +Hint Resolve new_reg_not_mutated: rtlg. + +(** * Properties of operations over compilation environments *) + +Lemma init_mapping_wf: + forall s, map_wf init_mapping s. +Proof. + intro. unfold init_mapping; apply mk_map_wf; simpl; intros. + rewrite PTree.gempty in H; discriminate. + contradiction. + rewrite PTree.gempty in H; discriminate. + tauto. +Qed. + +(** Properties of [find_var]. *) + +Lemma find_var_incr: + forall s1 s2 map name r, + find_var map name s1 = OK r s2 -> state_incr s1 s2. +Proof. + intros until r. unfold find_var. + case (map_vars map)!name. + intro; monadSimpl. subst s2; auto with rtlg. + monadSimpl. +Qed. +Hint Resolve find_var_incr: rtlg. + +Lemma find_var_in_map: + forall s1 s2 map name r, + find_var map name s1 = OK r s2 -> map_wf map s1 -> reg_in_map map r. +Proof. + intros until r. unfold find_var; caseEq (map.(map_vars)!name). + intros r0 eq. monadSimpl; intros. subst r0. + left. exists name; auto. + intro; monadSimpl. +Qed. +Hint Resolve find_var_in_map: rtlg. + +Lemma find_var_valid: + forall s1 s2 map name r, + find_var map name s1 = OK r s2 -> map_wf map s1 -> reg_valid r s1. +Proof. + eauto with rtlg. +Qed. +Hint Resolve find_var_valid: rtlg. + +Lemma find_var_not_mutated: + forall s1 s2 map name r mut, + find_var map name s1 = OK r s2 -> + map_wf map s1 -> + ~(In name mut) -> + ~(mutated_reg map mut r). +Proof. + intros until mut. unfold find_var; caseEq (map.(map_vars)!name). + intros r0 EQ. monadSimpl; intros; subst r0. + red; unfold mutated_reg; intros [id [IN EQ2]]. + assert (name = id). eauto with rtlg. + subst id. contradiction. + intro; monadSimpl. +Qed. +Hint Resolve find_var_not_mutated: rtlg. + +(** Properties of [find_letvar]. *) + +Lemma find_letvar_incr: + forall s1 s2 map idx r, + find_letvar map idx s1 = OK r s2 -> state_incr s1 s2. +Proof. + intros until r. unfold find_letvar. + case (nth_error (map_letvars map) idx). + intro; monadSimpl. subst s2; auto with rtlg. + monadSimpl. +Qed. +Hint Resolve find_letvar_incr: rtlg. + +Lemma find_letvar_in_map: + forall s1 s2 map idx r, + find_letvar map idx s1 = OK r s2 -> map_wf map s1 -> reg_in_map map r. +Proof. + intros until r. unfold find_letvar. + caseEq (nth_error (map_letvars map) idx). + intros r0 EQ; monadSimpl. intros. right. + subst r0; apply nth_error_in with idx; auto. + intro; monadSimpl. +Qed. +Hint Resolve find_letvar_in_map: rtlg. + +Lemma find_letvar_valid: + forall s1 s2 map idx r, + find_letvar map idx s1 = OK r s2 -> map_wf map s1 -> reg_valid r s1. +Proof. + eauto with rtlg. +Qed. +Hint Resolve find_letvar_valid: rtlg. + +Lemma find_letvar_not_mutated: + forall s1 s2 map idx mut r, + find_letvar map idx s1 = OK r s2 -> + map_wf map s1 -> + ~(mutated_reg map mut r). +Proof. + intros until r. unfold find_letvar. + caseEq (nth_error (map_letvars map) idx). + intros r' NTH. monadSimpl. unfold not; unfold mutated_reg. + intros MWF (id, (IN, MV)). subst r'. eauto with rtlg coqlib. + intro; monadSimpl. +Qed. +Hint Resolve find_letvar_not_mutated: rtlg. + +(** Properties of [add_var]. *) + +Lemma add_var_valid: + forall s1 s2 map1 map2 name r, + add_var map1 name s1 = OK (r, map2) s2 -> reg_valid r s2. +Proof. + intros until r. monadSimpl. intro. subst r0; subst s. + eauto with rtlg. +Qed. + +Lemma add_var_incr: + forall s1 s2 map name rm, + add_var map name s1 = OK rm s2 -> state_incr s1 s2. +Proof. + intros until rm; monadSimpl. subst s2. eauto with rtlg. +Qed. +Hint Resolve add_var_incr: rtlg. + +Lemma add_var_wf: + forall s1 s2 map name r map', + add_var map name s1 = OK (r,map') s2 -> map_wf map s1 -> map_wf map' s2. +Proof. + intros until map'; monadSimpl; intros. + subst r0; subst s; subst map'; apply mk_map_wf; simpl. + + intros id r'. rewrite PTree.gsspec. + case (peq id name); intros. + injection H; intros; subst r'. eauto with rtlg. + eauto with rtlg. + eauto with rtlg. + + intros id1 id2 r'. + repeat (rewrite PTree.gsspec). + case (peq id1 name); case (peq id2 name); intros. + congruence. + rewrite <- H in H0. byContradiction; eauto with rtlg. + rewrite <- H0 in H. byContradiction; eauto with rtlg. + eauto with rtlg. + + intros id r'. case (peq id name); intro. + subst id. rewrite PTree.gss. intro E; injection E; intro; subst r'. + intro; eauto with rtlg. + + rewrite PTree.gso; auto. eauto with rtlg. +Qed. +Hint Resolve add_var_wf: rtlg. + +Lemma add_var_find: + forall s1 s2 map name r map', + add_var map name s1 = OK (r,map') s2 -> map'.(map_vars)!name = Some r. +Proof. + intros until map'. + monadSimpl. + intro; subst r0. + subst map'; simpl in |- *. + apply PTree.gss. +Qed. + +Lemma add_vars_incr: + forall names s1 s2 map r, + add_vars map names s1 = OK r s2 -> state_incr s1 s2. +Proof. + induction names; simpl. + intros until r; monadSimpl; intros. subst s2; eauto with rtlg. + intros until r; monadSimpl; intros. + subst s0; eauto with rtlg. +Qed. + +Lemma add_vars_valid: + forall namel s1 s2 map1 map2 rl, + add_vars map1 namel s1 = OK (rl, map2) s2 -> + forall r, In r rl -> reg_valid r s2. +Proof. + induction namel; simpl; intros. + monadInv H. intro. subst rl. elim H0. + monadInv H. intro EQ1. subst rl; subst s0; subst y0. elim H0. + intro; subst r. eapply add_var_valid. eexact EQ0. + intro. apply reg_valid_incr with s. eauto with rtlg. + eauto. +Qed. + +Lemma add_vars_wf: + forall names s1 s2 map map' rl, + add_vars map names s1 = OK (rl,map') s2 -> + map_wf map s1 -> map_wf map' s2. +Proof. + induction names; simpl. + intros until rl; monadSimpl; intros. + subst s2; subst map'; assumption. + intros until rl; monadSimpl; intros. subst y0; subst s0. + eapply add_var_wf. eexact EQ0. + eapply IHnames. eexact EQ. auto. +Qed. +Hint Resolve add_vars_wf: rtlg. + +Lemma add_var_letenv: + forall map1 i s1 r map2 s2, + add_var map1 i s1 = OK (r, map2) s2 -> + map2.(map_letvars) = map1.(map_letvars). +Proof. + intros until s2. monadSimpl. intro. subst map2; reflexivity. +Qed. + +(** Properties of [add_letvar]. *) + +Lemma add_letvar_wf: + forall map s r, + map_wf map s -> + reg_valid r s -> + ~(reg_in_map map r) -> + map_wf (add_letvar map r) s. +Proof. + intros. unfold add_letvar; apply mk_map_wf; simpl. + exact (map_wf_var_valid map s H). + intros r' [EQ| IN]. + subst r'; assumption. + eapply map_wf_letvar_valid; eauto. + exact (map_wf_inj map s H). + intros. elim H3; intro. + subst r0. apply H1. left. exists id; auto. + eapply map_wf_disj; eauto. +Qed. + +(** * Properties of [alloc_reg] and [alloc_regs] *) + +Lemma alloc_reg_incr: + forall a s1 s2 map mut r, + alloc_reg map mut a s1 = OK r s2 -> state_incr s1 s2. +Proof. + intros until r. unfold alloc_reg. + case a; eauto with rtlg. + intro i; case (In_dec ident_eq i mut); eauto with rtlg. +Qed. +Hint Resolve alloc_reg_incr: rtlg. + +Lemma alloc_reg_valid: + forall a s1 s2 map mut r, + map_wf map s1 -> + alloc_reg map mut a s1 = OK r s2 -> reg_valid r s2. +Proof. + intros until r. unfold alloc_reg. + case a; eauto with rtlg. + intro i; case (In_dec ident_eq i mut); eauto with rtlg. +Qed. +Hint Resolve alloc_reg_valid: rtlg. + +Lemma alloc_reg_fresh_or_in_map: + forall map mut a s r s', + map_wf map s -> + alloc_reg map mut a s = OK r s' -> + reg_in_map map r \/ reg_fresh r s. +Proof. + intros until s'. unfold alloc_reg. + destruct a; try (right; eauto with rtlg; fail). + case (In_dec ident_eq i mut); intros. + right; eauto with rtlg. + left; eauto with rtlg. + intros; left; eauto with rtlg. +Qed. + +Lemma add_vars_letenv: + forall il map1 s1 rl map2 s2, + add_vars map1 il s1 = OK (rl, map2) s2 -> + map2.(map_letvars) = map1.(map_letvars). +Proof. + induction il; simpl; intros. + monadInv H. intro. subst map2; reflexivity. + monadInv H. intro EQ1. transitivity (map_letvars y). + subst y0. eapply add_var_letenv; eauto. + eauto. +Qed. + +(** A register is an adequate target for holding the value of an + expression if +- either the register is associated with a Cminor let-bound variable + or a Cminor local variable that is not modified; +- or the register is valid and not associated with any Cminor variable. *) + +Inductive target_reg_ok: state -> mapping -> list ident -> expr -> reg -> Prop := + | target_reg_immut_var: + forall s map mut id r, + ~(In id mut) -> map.(map_vars)!id = Some r -> + target_reg_ok s map mut (Evar id) r + | target_reg_letvar: + forall s map mut idx r, + nth_error map.(map_letvars) idx = Some r -> + target_reg_ok s map mut (Eletvar idx) r + | target_reg_other: + forall s map mut a r, + ~(reg_in_map map r) -> + reg_valid r s -> + target_reg_ok s map mut a r. + +Lemma target_reg_ok_incr: + forall s1 s2 map mut e r, + state_incr s1 s2 -> + target_reg_ok s1 map mut e r -> + target_reg_ok s2 map mut e r. +Proof. + intros. inversion H0. + apply target_reg_immut_var; auto. + apply target_reg_letvar; auto. + apply target_reg_other; eauto with rtlg. +Qed. +Hint Resolve target_reg_ok_incr: rtlg. + +Lemma target_reg_valid: + forall s map mut e r, + map_wf map s -> + target_reg_ok s map mut e r -> + reg_valid r s. +Proof. + intros. inversion H0; eauto with rtlg coqlib. +Qed. +Hint Resolve target_reg_valid: rtlg. + +Lemma target_reg_not_mutated: + forall s map mut e r, + map_wf map s -> + target_reg_ok s map mut e r -> + ~(mutated_reg map mut r). +Proof. + unfold not; unfold mutated_reg; intros until r. + intros MWF TRG [id [IN MV]]. + inversion TRG. + assert (id = id0); eauto with rtlg. subst id0. contradiction. + assert (In r (map_letvars map)). eauto with coqlib. eauto with rtlg. + apply H. red. left; exists id; assumption. +Qed. +Hint Resolve target_reg_not_mutated: rtlg. + +Lemma alloc_reg_target_ok: + forall a s1 s2 map mut r, + map_wf map s1 -> + alloc_reg map mut a s1 = OK r s2 -> + target_reg_ok s2 map mut a r. +Proof. + intros until r; intro MWF. unfold alloc_reg. + case a; intros; try (eapply target_reg_other; eauto with rtlg; fail). + generalize H; case (In_dec ident_eq i mut); intros. + apply target_reg_other; eauto with rtlg. + apply target_reg_immut_var; auto. + generalize H0; unfold find_var. + case (map_vars map)!i. + intro. monadSimpl. congruence. + monadSimpl. + apply target_reg_letvar. + generalize H. unfold find_letvar. + case (nth_error (map_letvars map) n). + intro; monadSimpl; congruence. + monadSimpl. +Qed. +Hint Resolve alloc_reg_target_ok: rtlg. + +Lemma alloc_regs_incr: + forall al s1 s2 map mut rl, + alloc_regs map mut al s1 = OK rl s2 -> state_incr s1 s2. +Proof. + induction al; simpl; intros. + monadInv H. subst s2. eauto with rtlg. + monadInv H. subst s2. eauto with rtlg. +Qed. +Hint Resolve alloc_regs_incr: rtlg. + +Lemma alloc_regs_valid: + forall al s1 s2 map mut rl, + map_wf map s1 -> + alloc_regs map mut al s1 = OK rl s2 -> + forall r, In r rl -> reg_valid r s2. +Proof. + induction al; simpl; intros. + monadInv H0. subst rl. elim H1. + monadInv H0. subst rl; subst s0. + elim H1; intro. + subst r0. eauto with rtlg. + eauto with rtlg. +Qed. +Hint Resolve alloc_regs_valid: rtlg. + +Lemma alloc_regs_fresh_or_in_map: + forall map mut al s rl s', + map_wf map s -> + alloc_regs map mut al s = OK rl s' -> + forall r, In r rl -> reg_in_map map r \/ reg_fresh r s. +Proof. + induction al; simpl; intros. + monadInv H0. subst rl. elim H1. + monadInv H0. subst rl. elim (in_inv H1); intro. + subst r. + assert (MWF: map_wf map s0). eauto with rtlg. + elim (alloc_reg_fresh_or_in_map map mut e s0 r0 s1 MWF EQ0); intro. + left; assumption. right; eauto with rtlg. + eauto with rtlg. +Qed. + +Inductive target_regs_ok: state -> mapping -> list ident -> exprlist -> list reg -> Prop := + | target_regs_nil: + forall s map mut, + target_regs_ok s map mut Enil nil + | target_regs_cons: + forall s map mut a r al rl, + reg_in_map map r \/ ~(In r rl) -> + target_reg_ok s map mut a r -> + target_regs_ok s map mut al rl -> + target_regs_ok s map mut (Econs a al) (r :: rl). + +Lemma target_regs_ok_incr: + forall s1 map mut al rl, + target_regs_ok s1 map mut al rl -> + forall s2, + state_incr s1 s2 -> + target_regs_ok s2 map mut al rl. +Proof. + induction 1; intros. + apply target_regs_nil. + apply target_regs_cons; eauto with rtlg. +Qed. +Hint Resolve target_regs_ok_incr: rtlg. + +Lemma target_regs_valid: + forall s map mut al rl, + target_regs_ok s map mut al rl -> + map_wf map s -> + forall r, In r rl -> reg_valid r s. +Proof. + induction 1; simpl; intros. + contradiction. + elim H3; intro. + subst r0. eauto with rtlg. + auto. +Qed. +Hint Resolve target_regs_valid: rtlg. + +Lemma target_regs_not_mutated: + forall s map mut el rl, + target_regs_ok s map mut el rl -> + map_wf map s -> + forall r, In r rl -> ~(mutated_reg map mut r). +Proof. + induction 1; simpl; intros. + contradiction. + elim H3; intro. subst r0. eauto with rtlg. + auto. +Qed. +Hint Resolve target_regs_not_mutated: rtlg. + +Lemma alloc_regs_target_ok: + forall al s1 s2 map mut rl, + map_wf map s1 -> + alloc_regs map mut al s1 = OK rl s2 -> + target_regs_ok s2 map mut al rl. +Proof. + induction al; simpl; intros. + monadInv H0. subst rl; apply target_regs_nil. + monadInv H0. subst s0; subst rl. + apply target_regs_cons; eauto 6 with rtlg. + assert (MWF: map_wf map s). eauto with rtlg. + elim (alloc_reg_fresh_or_in_map map mut e s r s2 MWF EQ0); intro. + left; assumption. right; red; intro; eauto with rtlg. +Qed. +Hint Resolve alloc_regs_target_ok: rtlg. + +(** The following predicate is a variant of [target_reg_ok] used + to characterize registers that are adequate for holding the return + value of a function. *) + +Inductive return_reg_ok: state -> mapping -> option reg -> Prop := + | return_reg_ok_none: + forall s map, + return_reg_ok s map None + | return_reg_ok_some: + forall s map r, + ~(reg_in_map map r) -> reg_valid r s -> + return_reg_ok s map (Some r). + +Lemma return_reg_ok_incr: + forall s1 s2 map or, + state_incr s1 s2 -> + return_reg_ok s1 map or -> + return_reg_ok s2 map or. +Proof. + intros. inversion H0; constructor. + assumption. eauto with rtlg. +Qed. +Hint Resolve return_reg_ok_incr: rtlg. + +Lemma new_reg_return_ok: + forall s1 r s2 map sig, + new_reg s1 = OK r s2 -> + map_wf map s1 -> + return_reg_ok s2 map (ret_reg sig r). +Proof. + intros. unfold ret_reg. destruct (sig_res sig); constructor. + eauto with rtlg. eauto with rtlg. +Qed. + +(** * Correspondence between Cminor environments and RTL register sets *) + +(** An RTL register environment matches a Cminor local environment and + let-environment if the value of every local or let-bound variable in + the Cminor environments is identical to the value of the + corresponding pseudo-register in the RTL register environment. *) + +Record match_env + (map: mapping) (e: Cminor.env) (le: Cminor.letenv) (rs: regset) : Prop := + mk_match_env { + me_vars: + (forall id v, + e!id = Some v -> exists r, map.(map_vars)!id = Some r /\ rs#r = v); + me_letvars: + rs##(map.(map_letvars)) = le + }. + +Lemma match_env_find_reg: + forall map id s1 s2 r e le rs v, + find_var map id s1 = OK r s2 -> + match_env map e le rs -> + e!id = Some v -> + rs#r = v. +Proof. + intros until v. + unfold find_var. caseEq (map.(map_vars)!id). + intros r' EQ. monadSimpl. subst r'. intros. + generalize (me_vars _ _ _ _ H _ _ H1). intros [r' [EQ' RS]]. + rewrite EQ' in EQ; injection EQ; intro; subst r'. + assumption. + intro; monadSimpl. +Qed. +Hint Resolve match_env_find_reg: rtlg. + +Lemma match_env_invariant: + forall map e le rs rs', + match_env map e le rs -> + (forall r, (reg_in_map map r) -> rs'#r = rs#r) -> + match_env map e le rs'. +Proof. + intros. apply mk_match_env. + intros id v' E. + generalize (me_vars _ _ _ _ H _ _ E). intros (r', (M, R)). + exists r'. split. auto. rewrite <- R. apply H0. + left. exists id. auto. + transitivity rs ## (map_letvars map). + apply list_map_exten. intros. + symmetry. apply H0. right. auto. + exact (me_letvars _ _ _ _ H). +Qed. + +(** Matching between environments is preserved when an unmapped register + (not corresponding to any Cminor variable) is assigned in the RTL + execution. *) + +Lemma match_env_update_temp: + forall map e le rs r v, + match_env map e le rs -> + ~(reg_in_map map r) -> + match_env map e le (rs#r <- v). +Proof. + intros. apply match_env_invariant with rs; auto. + intros. case (Reg.eq r r0); intro. + subst r0; contradiction. + apply Regmap.gso; auto. +Qed. +Hint Resolve match_env_update_temp: rtlg. + +(** Matching between environments is preserved by simultaneous + assignment to a Cminor local variable (in the Cminor environments) + and to the corresponding RTL pseudo-register (in the RTL register + environment). *) + +Lemma match_env_update_var: + forall map e le rs rs' id r v s s', + map_wf map s -> + find_var map id s = OK r s' -> + match_env map e le rs -> + rs'#r = v -> + (forall x, x <> r -> rs'#x = rs#x) -> + match_env map (PTree.set id v e) le rs'. +Proof. + intros until s'; intro MWF. + unfold find_var in |- *. caseEq (map_vars map)!id. + intros. monadInv H0. subst r0. apply mk_match_env. + intros id' v' E. case (peq id' id); intros. + subst id'. rewrite PTree.gss in E. injection E; intro; subst v'. + exists r. split. auto. auto. + rewrite PTree.gso in E; auto. + elim (me_vars _ _ _ _ H1 _ _ E). intros r' (M, R). + exists r'. split. assumption. rewrite <- R; apply H3; auto. + red in |- *; intro. subst r'. apply n. eauto with rtlg. + transitivity rs ## (map_letvars map). + apply list_map_exten. intros. symmetry. apply H3. + red in |- *; intro. subst x. eauto with rtlg. + exact (me_letvars _ _ _ _ H1). + intro; monadSimpl. +Qed. + +Lemma match_env_letvar: + forall map e le rs r v, + match_env map e le rs -> + rs#r = v -> + match_env (add_letvar map r) e (v :: le) rs. +Proof. + intros. unfold add_letvar in |- *; apply mk_match_env; simpl in |- *. + exact (me_vars _ _ _ _ H). + rewrite H0. rewrite (me_letvars _ _ _ _ H). auto. +Qed. + +Lemma match_env_exten: + forall map e le rs1 rs2, + (forall r, rs2#r = rs1#r) -> + match_env map e le rs1 -> + match_env map e le rs2. +Proof. + intros. apply mk_match_env. + intros. generalize (me_vars _ _ _ _ H0 _ _ H1). intros (r, (M1, M2)). + exists r. split. assumption. subst v. apply H. + transitivity rs1 ## (map_letvars map). + apply list_map_exten. intros. symmetry in |- *. apply H. + exact (me_letvars _ _ _ _ H0). +Qed. + +Lemma match_env_empty: + forall map, + map.(map_letvars) = nil -> + match_env map (PTree.empty val) nil (Regmap.init Vundef). +Proof. + intros. apply mk_match_env. + intros. rewrite PTree.gempty in H0. discriminate. + rewrite H. reflexivity. +Qed. + +(** The assignment of function arguments to local variables (on the Cminor + side) and pseudo-registers (on the RTL side) preserves matching + between environments. *) + +Lemma match_set_params_init_regs: + forall il rl s1 map2 s2 vl, + add_vars init_mapping il s1 = OK (rl, map2) s2 -> + match_env map2 (set_params vl il) nil (init_regs vl rl) + /\ (forall r, reg_fresh r s2 -> (init_regs vl rl)#r = Vundef). +Proof. + induction il; simpl in |- *; intros. + + monadInv H. intro; subst rl; simpl in |- *. + split. apply match_env_empty. subst map2; auto. + intros. apply Regmap.gi. + + monadInv H. intro EQ1; subst s0; subst y0; subst rl. clear H. + monadInv EQ0. intro EQ2. subst x0; subst s0. simpl. + + assert (LV : map_letvars map2 = nil). + transitivity (map_letvars y). + eapply add_var_letenv; eauto. + transitivity (map_letvars init_mapping). + eapply add_vars_letenv; eauto. + reflexivity. + + destruct vl. + (* vl = nil *) + generalize (IHil _ _ _ _ nil EQ). intros [ME UNDEF]. split. + constructor. intros id v. subst map2. simpl. + repeat rewrite PTree.gsspec; case (peq id a); intros. + exists r; split. auto. rewrite Regmap.gi. congruence. + destruct (me_vars _ _ _ _ ME id v H) as (r', (MV, IR)). + exists r'. split. auto. + replace (init_regs nil x) with (Regmap.init Vundef) in IR. auto. + destruct x; reflexivity. + rewrite LV; reflexivity. + intros. apply Regmap.gi. + (* vl = v :: vl *) + generalize (IHil _ _ _ _ vl EQ). intros [ME UNDEF]. split. + constructor. intros id v1. subst map2. simpl. + repeat rewrite PTree.gsspec; case (peq id a); intros. + exists r; split. auto. rewrite Regmap.gss. congruence. + destruct (me_vars _ _ _ _ ME id v1 H) as (r', (MV, IR)). + exists r'. split. auto. rewrite Regmap.gso. auto. + apply valid_fresh_different with s. + assert (MWF : map_wf y s). + eapply add_vars_wf; eauto. apply init_mapping_wf. + eauto with rtlg. eauto with rtlg. + rewrite LV; reflexivity. + intros. rewrite Regmap.gso. apply UNDEF. eauto with rtlg. + apply sym_not_equal. eauto with rtlg. +Qed. + +Lemma match_set_locals: + forall map1 s1, + map_wf map1 s1 -> + forall il rl map2 s2 e le rs, + match_env map1 e le rs -> + (forall r, reg_fresh r s1 -> rs#r = Vundef) -> + add_vars map1 il s1 = OK (rl, map2) s2 -> + match_env map2 (set_locals il e) le rs. +Proof. + induction il; simpl in *; intros. + + monadInv H2. intros; subst map2; auto. + + monadInv H2. intros. subst s0; subst y0. + assert (match_env y (set_locals il e) le rs). + eapply IHil; eauto. + monadInv EQ0. intro. subst s0; subst x0. rewrite <- H7. + constructor. + intros id v. simpl. repeat rewrite PTree.gsspec. + case (peq id a); intros. + exists r. split. auto. injection H5; intro; subst v. + apply H1. apply reg_fresh_decr with s. + eapply add_vars_incr; eauto. + eauto with rtlg. + eapply me_vars; eauto. + simpl. eapply me_letvars; eauto. +Qed. + +Lemma match_init_env_init_reg: + forall params s0 rparams map1 s1 vars rvars map2 s2 vparams, + add_vars init_mapping params s0 = OK (rparams, map1) s1 -> + add_vars map1 vars s1 = OK (rvars, map2) s2 -> + match_env map2 (set_locals vars (set_params vparams params)) + nil (init_regs vparams rparams). +Proof. + intros. + generalize (match_set_params_init_regs _ _ _ _ _ vparams H). + intros [A B]. + eapply match_set_locals; eauto. + eapply add_vars_wf; eauto. apply init_mapping_wf. +Qed. + +(** * Monotonicity properties of the state for the translation functions *) + +(** We show that the translation functions modify the state monotonically + (in the sense of the [state_incr] relation). *) + +Lemma add_move_incr: + forall r1 r2 nd s ns s', + add_move r1 r2 nd s = OK ns s' -> + state_incr s s'. +Proof. + intros until s'. unfold add_move. + case (Reg.eq r1 r2); intro. + monadSimpl. subst s'; auto with rtlg. + intro; eauto with rtlg. +Qed. +Hint Resolve add_move_incr: rtlg. + +Scheme expr_ind3 := Induction for expr Sort Prop + with condexpr_ind3 := Induction for condexpr Sort Prop + with exprlist_ind3 := Induction for exprlist Sort Prop. + +Lemma expr_condexpr_exprlist_ind: +forall (P : expr -> Prop) (P0 : condexpr -> Prop) + (P1 : exprlist -> Prop), + (forall i : ident, P (Evar i)) -> + (forall (i : ident) (e : expr), P e -> P (Eassign i e)) -> + (forall (o : operation) (e : exprlist), P1 e -> P (Eop o e)) -> + (forall (m : memory_chunk) (a : addressing) (e : exprlist), + P1 e -> P (Eload m a e)) -> + (forall (m : memory_chunk) (a : addressing) (e : exprlist), + P1 e -> forall e0 : expr, P e0 -> P (Estore m a e e0)) -> + (forall (s : signature) (e : expr), + P e -> forall e0 : exprlist, P1 e0 -> P (Ecall s e e0)) -> + (forall c : condexpr, + P0 c -> + forall e : expr, + P e -> forall e0 : expr, P e0 -> P (Econdition c e e0)) -> + (forall e : expr, P e -> forall e0 : expr, P e0 -> P (Elet e e0)) -> + (forall n : nat, P (Eletvar n)) -> + P0 CEtrue -> + P0 CEfalse -> + (forall (c : condition) (e : exprlist), P1 e -> P0 (CEcond c e)) -> + (forall c : condexpr, + P0 c -> + forall c0 : condexpr, + P0 c0 -> forall c1 : condexpr, P0 c1 -> P0 (CEcondition c c0 c1)) -> + P1 Enil -> + (forall e : expr, + P e -> forall e0 : exprlist, P1 e0 -> P1 (Econs e e0)) -> + (forall e : expr, P e) /\ + (forall ce : condexpr, P0 ce) /\ + (forall el : exprlist, P1 el). +Proof. + intros. split. apply (expr_ind3 P P0 P1); assumption. + split. apply (condexpr_ind3 P P0 P1); assumption. + apply (exprlist_ind3 P P0 P1); assumption. +Qed. + +Definition transl_expr_incr_pred (a: expr) : Prop := + forall map mut rd nd s ns s', + transl_expr map mut a rd nd s = OK ns s' -> state_incr s s'. +Definition transl_condition_incr_pred (c: condexpr) : Prop := + forall map mut ntrue nfalse s ns s', + transl_condition map mut c ntrue nfalse s = OK ns s' -> state_incr s s'. +Definition transl_exprlist_incr_pred (al: exprlist) : Prop := + forall map mut rl nd s ns s', + transl_exprlist map mut al rl nd s = OK ns s' -> state_incr s s'. + +Lemma transl_expr_condition_exprlist_incr: + (forall a, transl_expr_incr_pred a) /\ + (forall c, transl_condition_incr_pred c) /\ + (forall al, transl_exprlist_incr_pred al). +Proof. + apply expr_condexpr_exprlist_ind; + unfold transl_expr_incr_pred, + transl_condition_incr_pred, + transl_exprlist_incr_pred; + simpl; intros; + try (monadInv H); try (monadInv H0); + try (monadInv H1); try (monadInv H2); + eauto 6 with rtlg. + + intro EQ2. + apply state_incr_trans3 with s0 s1 s2; eauto with rtlg. + + intro EQ4. + apply state_incr_trans4 with s1 s2 s3 s4; eauto with rtlg. + + subst s'; auto with rtlg. + subst s'; auto with rtlg. + destruct rl; monadInv H. subst s'; auto with rtlg. + destruct rl; monadInv H1. eauto with rtlg. +Qed. + +Lemma transl_expr_incr: + forall a map mut rd nd s ns s', + transl_expr map mut a rd nd s = OK ns s' -> state_incr s s'. +Proof (proj1 transl_expr_condition_exprlist_incr). + +Lemma transl_condition_incr: + forall a map mut ntrue nfalse s ns s', + transl_condition map mut a ntrue nfalse s = OK ns s' -> state_incr s s'. +Proof (proj1 (proj2 transl_expr_condition_exprlist_incr)). + +Lemma transl_exprlist_incr: + forall al map mut rl nd s ns s', + transl_exprlist map mut al rl nd s = OK ns s' -> state_incr s s'. +Proof (proj2 (proj2 transl_expr_condition_exprlist_incr)). + +Hint Resolve transl_expr_incr transl_condition_incr transl_exprlist_incr: rtlg. + +Scheme stmt_ind2 := Induction for stmt Sort Prop + with stmtlist_ind2 := Induction for stmtlist Sort Prop. + +Lemma stmt_stmtlist_ind: +forall (P : stmt -> Prop) (P0 : stmtlist -> Prop), + (forall e : expr, P (Sexpr e)) -> + (forall (c : condexpr) (s : stmtlist), + P0 s -> forall s0 : stmtlist, P0 s0 -> P (Sifthenelse c s s0)) -> + (forall s : stmtlist, P0 s -> P (Sloop s)) -> + (forall s : stmtlist, P0 s -> P (Sblock s)) -> + (forall n : nat, P (Sexit n)) -> + (forall o : option expr, P (Sreturn o)) -> + P0 Snil -> + (forall s : stmt, + P s -> forall s0 : stmtlist, P0 s0 -> P0 (Scons s s0)) -> + (forall s : stmt, P s) /\ + (forall sl : stmtlist, P0 sl). +Proof. + intros. split. apply (stmt_ind2 P P0); assumption. + apply (stmtlist_ind2 P P0); assumption. +Qed. + +Definition transl_stmt_incr_pred (a: stmt) : Prop := + forall map nd nexits nret rret s ns s', + transl_stmt map a nd nexits nret rret s = OK ns s' -> + state_incr s s'. +Definition transl_stmtlist_incr_pred (al: stmtlist) : Prop := + forall map nd nexits nret rret s ns s', + transl_stmtlist map al nd nexits nret rret s = OK ns s' -> + state_incr s s'. + +Lemma transl_stmt_stmtlist_incr: + (forall a, transl_stmt_incr_pred a) /\ + (forall al, transl_stmtlist_incr_pred al). +Proof. + apply stmt_stmtlist_ind; + unfold transl_stmt_incr_pred, + transl_stmtlist_incr_pred; + simpl; intros; + try (monadInv H); try (monadInv H0); + try (monadInv H1); try (monadInv H2); + eauto 6 with rtlg. + + generalize H1. case (more_likely c s s0); monadSimpl; eauto 6 with rtlg. + + subst s'. apply update_instr_incr with s1 s2 (Inop n0) n u; + eauto with rtlg. + + generalize H; destruct (nth_error nexits n); monadSimpl. + subst s'; auto with rtlg. + + generalize H. destruct o; destruct rret; try monadSimpl. + eauto with rtlg. + subst s'; auto with rtlg. + subst s'; auto with rtlg. +Qed. + +Lemma transl_stmt_incr: + forall a map nd nexits nret rret s ns s', + transl_stmt map a nd nexits nret rret s = OK ns s' -> + state_incr s s'. +Proof (proj1 transl_stmt_stmtlist_incr). + +Lemma transl_stmtlist_incr: + forall al map nd nexits nret rret s ns s', + transl_stmtlist map al nd nexits nret rret s = OK ns s' -> + state_incr s s'. +Proof (proj2 transl_stmt_stmtlist_incr). + +Hint Resolve transl_stmt_incr transl_stmtlist_incr: rtlg. + diff --git a/backend/RTLtyping.v b/backend/RTLtyping.v new file mode 100644 index 00000000..d15dbb88 --- /dev/null +++ b/backend/RTLtyping.v @@ -0,0 +1,1277 @@ +(** Typing rules and a type inference algorithm for RTL. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Op. +Require Import Registers. +Require Import RTL. +Require Import union_find. + +(** * The type system *) + +(** Like Cminor and all intermediate languages, RTL can be equipped with + a simple type system that statically guarantees that operations + and addressing modes are applied to the right number of arguments + and that the arguments are of the correct types. The type algebra + is trivial, consisting of the two types [Tint] (for integers and pointers) + and [Tfloat] (for floats). + + Additionally, we impose that each pseudo-register has the same type + throughout the function. This requirement helps with register allocation, + enabling each pseudo-register to be mapped to a single hardware register + or stack location of the correct type. + + + The typing judgement for instructions is of the form [wt_instr f env instr], + where [f] is the current function (used to type-check [Ireturn] + instructions) and [env] is a typing environment associating types to + pseudo-registers. Since pseudo-registers have unique types throughout + the function, the typing environment does not change during type-checking + of individual instructions. One point to note is that we have two + polymorphic operators, [Omove] and [Oundef], which can work both + over integers and floats. +*) + +Definition regenv := reg -> typ. + +Section WT_INSTR. + +Variable env: regenv. +Variable funct: function. + +Inductive wt_instr : instruction -> Prop := + | wt_Inop: + forall s, + wt_instr (Inop s) + | wt_Iopmove: + forall r1 r s, + env r1 = env r -> + wt_instr (Iop Omove (r1 :: nil) r s) + | wt_Iopundef: + forall r s, + wt_instr (Iop Oundef nil r s) + | wt_Iop: + forall op args res s, + op <> Omove -> op <> Oundef -> + (List.map env args, env res) = type_of_operation op -> + wt_instr (Iop op args res s) + | wt_Iload: + forall chunk addr args dst s, + List.map env args = type_of_addressing addr -> + env dst = type_of_chunk chunk -> + wt_instr (Iload chunk addr args dst s) + | wt_Istore: + forall chunk addr args src s, + List.map env args = type_of_addressing addr -> + env src = type_of_chunk chunk -> + wt_instr (Istore chunk addr args src s) + | wt_Icall: + forall sig ros args res s, + match ros with inl r => env r = Tint | inr s => True end -> + List.map env args = sig.(sig_args) -> + env res = match sig.(sig_res) with None => Tint | Some ty => ty end -> + wt_instr (Icall sig ros args res s) + | wt_Icond: + forall cond args s1 s2, + List.map env args = type_of_condition cond -> + wt_instr (Icond cond args s1 s2) + | wt_Ireturn: + forall optres, + option_map env optres = funct.(fn_sig).(sig_res) -> + wt_instr (Ireturn optres). + +End WT_INSTR. + +(** A function [f] is well-typed w.r.t. a typing environment [env], + written [wt_function env f], if all instructions are well-typed, + parameters agree in types with the function signature, and + names of parameters are pairwise distinct. *) + +Record wt_function (env: regenv) (f: function) : Prop := + mk_wt_function { + wt_params: + List.map env f.(fn_params) = f.(fn_sig).(sig_args); + wt_norepet: + list_norepet f.(fn_params); + wt_instrs: + forall pc instr, f.(fn_code)!pc = Some instr -> wt_instr env f instr + }. + +(** * Type inference *) + +(** There are several ways to ensure that RTL code is well-typed and + to obtain the typing environment (type assignment for pseudo-registers) + needed for register allocation. One is to start with well-typed Cminor + code and show type preservation for RTL generation and RTL optimizations. + Another is to start with untyped RTL and run a type inference algorithm + that reconstructs the typing environment, determining the type of + each pseudo-register from its uses in the code. We follow the second + approach. + + The algorithm and its correctness proof in this section were + contributed by Damien Doligez. *) + +(** ** Type inference algorithm *) + +Set Implicit Arguments. + +(** Type inference for RTL is similar to that for simply-typed + lambda-calculus: we use type variables to represent the types + of registers that have not yet been determined to be [Tint] or [Tfloat] + based on their uses. We need exactly one type variable per pseudo-register, + therefore type variables can be conveniently equated with registers. + The type of a register during inference is therefore either + [tTy t] (with [t = Tint] or [t = Tfloat]) for a known type, + or [tReg r] to mean ``the same type as that of register [r]''. *) + +Inductive myT : Set := + | tTy : typ -> myT + | tReg : reg -> myT. + +(** The algorithm proceeds by unification of the currently inferred + type for a pseudo-register with the type dictated by its uses. + Unification builds on a ``union-find'' data structure representing + equivalence classes of types (see module [Union_find]). +*) + +Module myreg. + Definition T := myT. + Definition eq : forall (x y : T), {x=y}+{x<>y}. + Proof. + destruct x; destruct y; auto; + try (apply right; discriminate); try (apply left; discriminate). + destruct t; destruct t0; + try (apply right; congruence); try (apply left; congruence). + elim (peq r r0); intros. + rewrite a; apply left; apply refl_equal. + apply right; congruence. + Defined. +End myreg. + +Module mymap. + Definition elt := myreg.T. + Definition encode (t : myreg.T) : positive := + match t with + | tTy Tint => xH + | tTy Tfloat => xI xH + | tReg r => xO r + end. + Definition decode (p : positive) : elt := + match p with + | xH => tTy Tint + | xI _ => tTy Tfloat + | xO r => tReg r + end. + + Lemma encode_decode : forall x : myreg.T, decode (encode x) = x. + Proof. + destruct x; try destruct t; simpl; auto. + Qed. + + Lemma encode_injective : + forall (x y : myreg.T), encode x = encode y -> x = y. + Proof. + intros. + unfold encode in H. destruct x; destruct y; try congruence; + try destruct t; try destruct t0; congruence. + Qed. + + Definition T := PTree.t positive. + Definition empty := PTree.empty positive. + Definition get (adr : elt) (t : T) := + option_map decode (PTree.get (encode adr) t). + Definition add (adr dat : elt) (t : T) := + PTree.set (encode adr) (encode dat) t. + + Theorem get_empty : forall (x : elt), get x empty = None. + Proof. + intro. + unfold get. unfold empty. + rewrite PTree.gempty. + simpl; auto. + Qed. + Theorem get_add_1 : + forall (x y : elt) (m : T), get x (add x y m) = Some y. + Proof. + intros. + unfold add. unfold get. + rewrite PTree.gss. + simpl; rewrite encode_decode; auto. + Qed. + Theorem get_add_2 : + forall (x y z : elt) (m : T), z <> x -> get z (add x y m) = get z m. + Proof. + intros. + unfold get. unfold add. + rewrite PTree.gso; auto. + intro. apply H. apply encode_injective. auto. + Qed. +End mymap. + +Module Uf := Unionfind (myreg) (mymap). + +Definition error := Uf.identify Uf.empty (tTy Tint) (tTy Tfloat). + +Fixpoint fold2 (A B : Set) (f : Uf.T -> A -> B -> Uf.T) + (init : Uf.T) (la : list A) (lb : list B) {struct la} + : Uf.T := + match la, lb with + | ha::ta, hb::tb => fold2 f (f init ha hb) ta tb + | nil, nil => init + | _, _ => error + end. + +Definition option_fold2 (A B : Set) (f : Uf.T -> A -> B -> Uf.T) + (init : Uf.T) (oa : option A) (ob : option B) + : Uf.T := + match oa, ob with + | Some a, Some b => f init a b + | None, None => init + | _, _ => error + end. + +Definition teq (ty1 ty2 : typ) : bool := + match ty1, ty2 with + | Tint, Tint => true + | Tfloat, Tfloat => true + | _, _ => false + end. + +Definition type_rtl_arg (u : Uf.T) (r : reg) (t : typ) := + Uf.identify u (tReg r) (tTy t). + +Definition type_rtl_ros (u : Uf.T) (ros : reg+ident) := + match ros with + | inl r => Uf.identify u (tReg r) (tTy Tint) + | inr s => u + end. + +Definition type_of_sig_res (sig : signature) := + match sig.(sig_res) with None => Tint | Some ty => ty end. + +(** This is the core type inference function. The [u] argument is + the current substitution / equivalence classes between types. + An updated set of equivalence classes, reflecting unifications + possibly performed during the type-checking of [i], is returned. + Note that [type_rtl_instr] never fails explicitly. However, + in case of type error (e.g. applying the [Oadd] integer operation + to float registers), the equivalence relation returned will + put [tTy Tint] and [tTy Tfloat] in the same equivalence class. + This fact will propagate through type inference for other instructions, + and be detected at the end of type inference, indicating a typing failure. +*) + +Definition type_rtl_instr (rtyp : option typ) + (u : Uf.T) (_ : positive) (i : instruction) := + match i with + | Inop _ => u + | Iop Omove (r1 :: nil) r0 _ => Uf.identify u (tReg r0) (tReg r1) + | Iop Omove _ _ _ => error + | Iop Oundef nil _ _ => u + | Iop Oundef _ _ _ => error + | Iop op args r0 _ => + let (argtyp, restyp) := type_of_operation op in + let u1 := type_rtl_arg u r0 restyp in + fold2 type_rtl_arg u1 args argtyp + | Iload chunk addr args r0 _ => + let u1 := type_rtl_arg u r0 (type_of_chunk chunk) in + fold2 type_rtl_arg u1 args (type_of_addressing addr) + | Istore chunk addr args r0 _ => + let u1 := type_rtl_arg u r0 (type_of_chunk chunk) in + fold2 type_rtl_arg u1 args (type_of_addressing addr) + | Icall sign ros args r0 _ => + let u1 := type_rtl_ros u ros in + let u2 := type_rtl_arg u1 r0 (type_of_sig_res sign) in + fold2 type_rtl_arg u2 args sign.(sig_args) + | Icond cond args _ _ => + fold2 type_rtl_arg u args (type_of_condition cond) + | Ireturn o => option_fold2 type_rtl_arg u o rtyp + end. + +Definition mk_env (u : Uf.T) (r : reg) := + if myreg.eq (Uf.repr u (tReg r)) (Uf.repr u (tTy Tfloat)) + then Tfloat + else Tint. + +Fixpoint member (x : reg) (l : list reg) {struct l} : bool := + match l with + | nil => false + | y :: rest => if peq x y then true else member x rest + end. + +Fixpoint repet (l : list reg) : bool := + match l with + | nil => false + | x :: rest => member x rest || repet rest + end. + +Definition type_rtl_function (f : function) := + let u1 := PTree.fold (type_rtl_instr f.(fn_sig).(sig_res)) + f.(fn_code) Uf.empty in + let u2 := fold2 type_rtl_arg u1 f.(fn_params) f.(fn_sig).(sig_args) in + if repet f.(fn_params) then + None + else + if myreg.eq (Uf.repr u2 (tTy Tint)) (Uf.repr u2 (tTy Tfloat)) + then None + else Some (mk_env u2). + +Unset Implicit Arguments. + +(** ** Correctness proof for type inference *) + +(** General properties of the type equivalence relation. *) + +Definition consistent (u : Uf.T) := + Uf.repr u (tTy Tint) <> Uf.repr u (tTy Tfloat). + +Lemma consistent_not_eq : forall (u : Uf.T) (A : Type) (x y : A), + consistent u -> + (if myreg.eq (Uf.repr u (tTy Tint)) (Uf.repr u (tTy Tfloat)) then x else y) + = y. + Proof. + intros. + unfold consistent in H. + destruct (myreg.eq (Uf.repr u (tTy Tint)) (Uf.repr u (tTy Tfloat))); + congruence. + Qed. + +Lemma equal_eq : forall (t : myT) (A : Type) (x y : A), + (if myreg.eq t t then x else y) = x. + Proof. + intros. + destruct (myreg.eq t t); congruence. + Qed. + +Lemma error_inconsistent : forall (A : Prop), consistent error -> A. + Proof. + intros. + absurd (consistent error); auto. + intro. + unfold error in H. unfold consistent in H. + rewrite Uf.sameclass_identify_1 in H. + congruence. + Qed. + +Lemma teq_correct : forall (t1 t2 : typ), teq t1 t2 = true -> t1 = t2. + Proof. + intros; destruct t1; destruct t2; try simpl in H; congruence. + Qed. + +Definition included (u1 u2 : Uf.T) : Prop := + forall (e1 e2: myT), + Uf.repr u1 e1 = Uf.repr u1 e2 -> Uf.repr u2 e1 = Uf.repr u2 e2. + +Lemma included_refl : + forall (e : Uf.T), included e e. + Proof. + unfold included. auto. + Qed. + +Lemma included_trans : + forall (e1 e2 e3 : Uf.T), + included e3 e2 -> included e2 e1 -> included e3 e1. + Proof. + unfold included. auto. + Qed. + +Lemma included_consistent : + forall (u1 u2 : Uf.T), + included u1 u2 -> consistent u2 -> consistent u1. + Proof. + unfold consistent. unfold included. + intros. + intro. apply H0. apply H. + auto. + Qed. + +Lemma included_identify : + forall (u : Uf.T) (t1 t2 : myT), included u (Uf.identify u t1 t2). + Proof. + unfold included. + intros. + apply Uf.sameclass_identify_2; auto. + Qed. + +Lemma type_arg_correct_1 : + forall (u : Uf.T) (r : reg) (t : typ), + consistent (type_rtl_arg u r t) + -> Uf.repr (type_rtl_arg u r t) (tReg r) + = Uf.repr (type_rtl_arg u r t) (tTy t). + Proof. + intros. + unfold type_rtl_arg. + rewrite Uf.sameclass_identify_1. + auto. + Qed. + +Lemma type_arg_correct : + forall (u : Uf.T) (r : reg) (t : typ), + consistent (type_rtl_arg u r t) -> mk_env (type_rtl_arg u r t) r = t. + Proof. + intros. + unfold mk_env. + rewrite type_arg_correct_1. + destruct t. + apply consistent_not_eq; auto. + destruct (myreg.eq (Uf.repr (type_rtl_arg u r Tfloat) (tTy Tfloat))); + congruence. + auto. + Qed. + +Lemma type_arg_included : + forall (u : Uf.T) (r : reg) (t : typ), included u (type_rtl_arg u r t). + Proof. + intros. + unfold type_rtl_arg. + apply included_identify. + Qed. + +Lemma type_arg_extends : + forall (u : Uf.T) (r : reg) (t : typ), + consistent (type_rtl_arg u r t) -> consistent u. + Proof. + intros. + apply included_consistent with (u2 := type_rtl_arg u r t). + apply type_arg_included. + auto. + Qed. + +Lemma type_args_included : + forall (l1 : list reg) (l2 : list typ) (u : Uf.T), + consistent (fold2 type_rtl_arg u l1 l2) + -> included u (fold2 type_rtl_arg u l1 l2). + Proof. + induction l1; intros; destruct l2. + simpl in H; simpl; apply included_refl. + simpl in H. apply error_inconsistent. auto. + simpl in H. apply error_inconsistent. auto. + simpl. + simpl in H. + apply included_trans with (e2 := type_rtl_arg u a t). + apply type_arg_included. + apply IHl1. + auto. + Qed. + +Lemma type_args_extends : + forall (l1 : list reg) (l2 : list typ) (u : Uf.T), + consistent (fold2 type_rtl_arg u l1 l2) -> consistent u. + Proof. + intros. + apply (included_consistent _ _ (type_args_included l1 l2 u H)). + auto. + Qed. + +Lemma type_args_correct : + forall (l1 : list reg) (l2 : list typ) (u : Uf.T), + consistent (fold2 type_rtl_arg u l1 l2) + -> map (mk_env (fold2 type_rtl_arg u l1 l2)) l1 = l2. + Proof. + induction l1. + intros. + destruct l2. + unfold map; simpl; auto. + simpl in H; apply error_inconsistent; auto. + intros. + destruct l2. + simpl in H; apply error_inconsistent; auto. + simpl. + simpl in H. + rewrite (IHl1 l2 (type_rtl_arg u a t) H). + unfold mk_env. + destruct t. + rewrite (type_args_included _ _ _ H (tReg a) (tTy Tint)). + rewrite consistent_not_eq; auto. + apply type_arg_correct_1. + apply type_args_extends with (l1 := l1) (l2 := l2); auto. + rewrite (type_args_included _ _ _ H (tReg a) (tTy Tfloat)). + rewrite equal_eq; auto. + apply type_arg_correct_1. + apply type_args_extends with (l1 := l1) (l2 := l2); auto. + Qed. + +(** Correctness of [wt_params]. *) + +Lemma type_rtl_function_params : + forall (f: function) (env: regenv), + type_rtl_function f = Some env + -> List.map env f.(fn_params) = f.(fn_sig).(sig_args). + Proof. + destruct f; unfold type_rtl_function; simpl. + destruct (repet fn_params); simpl; intros; try congruence. + pose (u := PTree.fold (type_rtl_instr (sig_res fn_sig)) fn_code Uf.empty). + fold u in H. + cut (consistent (fold2 type_rtl_arg u fn_params (sig_args fn_sig))). + intro. + pose (R := Uf.repr (fold2 type_rtl_arg u fn_params (sig_args fn_sig))). + fold R in H. + destruct (myreg.eq (R (tTy Tint)) (R (tTy Tfloat))). + congruence. + injection H. + intro. + rewrite <- H1. + apply type_args_correct. + auto. + intro. + rewrite H0 in H. + rewrite equal_eq in H. + congruence. + Qed. + +(** Correctness of [wt_norepet]. *) + +Lemma member_correct : + forall (l : list reg) (a : reg), member a l = false -> ~In a l. + Proof. + induction l; simpl; intros; try tauto. + destruct (peq a0 a); simpl; try congruence. + intro. destruct H0; try congruence. + generalize H0; apply IHl; auto. + Qed. + +Lemma repet_correct : + forall (l : list reg), repet l = false -> list_norepet l. + Proof. + induction l; simpl; intros. + exact (list_norepet_nil reg). + elim (orb_false_elim (member a l) (repet l) H); intros. + apply list_norepet_cons. + apply member_correct; auto. + apply IHl; auto. + Qed. + +Lemma type_rtl_function_norepet : + forall (f: function) (env: regenv), + type_rtl_function f = Some env + -> list_norepet f.(fn_params). + Proof. + destruct f; unfold type_rtl_function; simpl. + intros. cut (repet fn_params = false). + intro. apply repet_correct. auto. + destruct (repet fn_params); congruence. + Qed. + +(** Correctness of [wt_instrs]. *) + +Lemma step1 : + forall (f : function) (env : regenv), + type_rtl_function f = Some env + -> exists u2 : Uf.T, + included (PTree.fold (type_rtl_instr f.(fn_sig).(sig_res)) + f.(fn_code) Uf.empty) + u2 + /\ env = mk_env u2 + /\ consistent u2. + Proof. + intros f env. + pose (u1 := (PTree.fold (type_rtl_instr f.(fn_sig).(sig_res)) + f.(fn_code) Uf.empty)). + fold u1. + unfold type_rtl_function. + intros. + destruct (repet f.(fn_params)). + congruence. + fold u1 in H. + pose (u2 := (fold2 type_rtl_arg u1 f.(fn_params) f.(fn_sig).(sig_args))). + fold u2 in H. + exists u2. + caseEq (myreg.eq (Uf.repr u2 (tTy Tint)) (Uf.repr u2 (tTy Tfloat))). + intros. + rewrite e in H. + rewrite equal_eq in H. + congruence. + intros. + rewrite consistent_not_eq in H. + apply conj. + unfold u2. + apply type_args_included. + auto. + apply conj; auto. + congruence. + auto. + Qed. + +Lemma let_fold_args_res : + forall (u : Uf.T) (l : list reg) (r : reg) (e : list typ * typ), + (let (argtyp, restyp) := e in + fold2 type_rtl_arg (type_rtl_arg u r restyp) l argtyp) + = fold2 type_rtl_arg (type_rtl_arg u r (snd e)) l (fst e). + Proof. + intros. rewrite (surjective_pairing e). simpl. auto. + Qed. + +Lemma type_args_res_included : + forall (l1 : list reg) (l2 : list typ) (u : Uf.T) (r : reg) (t : typ), + consistent (fold2 type_rtl_arg (type_rtl_arg u r t) l1 l2) + -> included u (fold2 type_rtl_arg (type_rtl_arg u r t) l1 l2). + Proof. + intros. + apply included_trans with (e2 := type_rtl_arg u r t). + apply type_arg_included. + apply type_args_included; auto. + Qed. + +Lemma type_args_res_ros_included : + forall (l1 : list reg) (l2 : list typ) (u : Uf.T) (r : reg) (t : typ) + (ros : reg+ident), + consistent (fold2 type_rtl_arg (type_rtl_arg (type_rtl_ros u ros) r t) l1 l2) + -> included u (fold2 type_rtl_arg (type_rtl_arg (type_rtl_ros u ros) r t) l1 l2). +Proof. + intros. + apply included_trans with (e2 := type_rtl_ros u ros). + unfold type_rtl_ros; destruct ros. + apply included_identify. + apply included_refl. + apply type_args_res_included; auto. +Qed. + +Lemma type_instr_included : + forall (p : positive) (i : instruction) (u : Uf.T) (res_ty : option typ), + consistent (type_rtl_instr res_ty u p i) + -> included u (type_rtl_instr res_ty u p i). + Proof. + intros. + destruct i; simpl; simpl in H; try apply type_args_res_included; auto. + apply included_refl; auto. + destruct o; simpl; simpl in H; try apply type_args_res_included; auto. + destruct l; simpl; simpl in H; auto. + apply error_inconsistent; auto. + destruct l; simpl; simpl in H; auto. + apply included_identify. + apply error_inconsistent; auto. + destruct l; simpl; simpl in H; auto. + apply included_refl. + apply error_inconsistent; auto. + apply type_args_res_ros_included; auto. + apply type_args_included; auto. + destruct res_ty; destruct o; simpl; simpl in H; + try (apply error_inconsistent; auto; fail). + apply type_arg_included. + apply included_refl. + Qed. + +Lemma type_instrs_extends : + forall (l : list (positive * instruction)) (u : Uf.T) (res_ty : option typ), + consistent + (fold_left (fun v p => type_rtl_instr res_ty v (fst p) (snd p)) l u) + -> consistent u. +Proof. + induction l; simpl; intros. + auto. + apply included_consistent + with (u2 := (type_rtl_instr res_ty u (fst a) (snd a))). + apply type_instr_included. + apply IHl with (res_ty := res_ty); auto. + apply IHl with (res_ty := res_ty); auto. +Qed. + +Lemma type_instrs_included : + forall (l : list (positive * instruction)) (u : Uf.T) (res_ty : option typ), + consistent + (fold_left (fun v p => type_rtl_instr res_ty v (fst p) (snd p)) l u) + -> included u + (fold_left (fun v p => type_rtl_instr res_ty v (fst p) (snd p)) l u). + Proof. + induction l; simpl; intros. + apply included_refl; auto. + apply included_trans with (e2 := (type_rtl_instr res_ty u (fst a) (snd a))). + apply type_instr_included. + apply type_instrs_extends with (res_ty := res_ty) (l := l); auto. + apply IHl; auto. + Qed. + +Lemma step2 : + forall (res_ty : option typ) (c : code) (u0 : Uf.T), + consistent (PTree.fold (type_rtl_instr res_ty) c u0) -> + forall (pc : positive) (i : instruction), + c!pc = Some i + -> exists u : Uf.T, + consistent (type_rtl_instr res_ty u pc i) + /\ included (type_rtl_instr res_ty u pc i) + (PTree.fold (type_rtl_instr res_ty) c u0). + Proof. + intros. + rewrite PTree.fold_spec. + rewrite PTree.fold_spec in H. + pose (H1 := PTree.elements_correct _ _ H0). + generalize H. clear H. + generalize u0. clear u0. + generalize H1. clear H1. + induction (PTree.elements c). + intros. + absurd (In (pc, i) nil). + apply in_nil. + auto. + intros. + simpl in H. + elim H1. + intro. + rewrite H2 in H. + simpl in H. + rewrite H2. simpl. + exists u0. + apply conj. + apply type_instrs_extends with (res_ty := res_ty) (l := l). + auto. + apply type_instrs_included. + auto. + intro. + simpl. + apply IHl. + auto. + auto. + Qed. + +Definition mapped (u : Uf.T) (r : reg) := + Uf.repr u (tReg r) = Uf.repr u (tTy Tfloat) + \/ Uf.repr u (tReg r) = Uf.repr u (tTy Tint). + +Definition definite (u : Uf.T) (i : instruction) := + match i with + | Inop _ => True + | Iop Omove (r1 :: nil) r0 _ => Uf.repr u (tReg r1) = Uf.repr u (tReg r0) + | Iop Oundef _ _ _ => True + | Iop _ args r0 _ => + mapped u r0 /\ forall r : reg, In r args -> mapped u r + | Iload _ _ args r0 _ => + mapped u r0 /\ forall r : reg, In r args -> mapped u r + | Istore _ _ args r0 _ => + mapped u r0 /\ forall r : reg, In r args -> mapped u r + | Icall _ ros args r0 _ => + match ros with inl r => mapped u r | _ => True end + /\ mapped u r0 /\ forall r : reg, In r args -> mapped u r + | Icond _ args _ _ => + forall r : reg, In r args -> mapped u r + | Ireturn None => True + | Ireturn (Some r) => mapped u r + end. + +Lemma type_arg_complete : + forall (u : Uf.T) (r : reg) (t : typ), + mapped (type_rtl_arg u r t) r. +Proof. + intros. + unfold type_rtl_arg. + unfold mapped. + destruct t. + right; apply Uf.sameclass_identify_1. + left; apply Uf.sameclass_identify_1. +Qed. + +Lemma type_arg_mapped : + forall (u : Uf.T) (r r0 : reg) (t : typ), + mapped u r0 -> mapped (type_rtl_arg u r t) r0. +Proof. + unfold mapped. + unfold type_rtl_arg. + intros. + elim H; intros. + left; apply Uf.sameclass_identify_2; auto. + right; apply Uf.sameclass_identify_2; auto. +Qed. + +Lemma type_args_mapped : + forall (lr : list reg) (lt : list typ) (u : Uf.T) (r : reg), + consistent (fold2 type_rtl_arg u lr lt) -> + mapped u r -> + mapped (fold2 type_rtl_arg u lr lt) r. +Proof. + induction lr; simpl; intros. + destruct lt; simpl; auto; try (apply error_inconsistent; auto; fail). + destruct lt; simpl; auto; try (apply error_inconsistent; auto; fail). + apply IHlr. + auto. + apply type_arg_mapped; auto. +Qed. + +Lemma type_args_complete : + forall (lr : list reg) (lt : list typ) (u : Uf.T), + consistent (fold2 type_rtl_arg u lr lt) + -> forall r, (In r lr -> mapped (fold2 type_rtl_arg u lr lt) r). +Proof. + induction lr; simpl; intros. + destruct lt; simpl; try tauto. + destruct lt; simpl. + apply error_inconsistent; auto. + elim H0; intros. + rewrite H1. + rewrite H1 in H. + apply type_args_mapped; auto. + apply type_arg_complete. + apply IHlr; auto. +Qed. + +Lemma type_res_complete : + forall (u : Uf.T) (lr : list reg) (lt : list typ) (r : reg) (t : typ), + consistent (fold2 type_rtl_arg (type_rtl_arg u r t) lr lt) + -> mapped (fold2 type_rtl_arg (type_rtl_arg u r t) lr lt) r. +Proof. + intros. + apply type_args_mapped; auto. + apply type_arg_complete. +Qed. + +Lemma type_args_res_complete : + forall (u : Uf.T) (lr : list reg) (lt : list typ) (r : reg) (t : typ), + consistent (fold2 type_rtl_arg (type_rtl_arg u r t) lr lt) + -> mapped (fold2 type_rtl_arg (type_rtl_arg u r t) lr lt) r + /\ forall rr, (In rr lr -> mapped (fold2 type_rtl_arg (type_rtl_arg u r t) + lr lt) + rr). +Proof. + intros. + apply conj. + apply type_res_complete; auto. + apply type_args_complete; auto. +Qed. + +Lemma type_ros_complete : + forall (u : Uf.T) (lr : list reg) (lt : list typ) (r r1 : reg) (t : typ), + consistent (fold2 type_rtl_arg (type_rtl_arg + (type_rtl_ros u (inl ident r1)) r t) lr lt) + -> + mapped (fold2 type_rtl_arg (type_rtl_arg + (type_rtl_ros u (inl ident r1)) r t) lr lt) r1. +Proof. + intros. + apply type_args_mapped; auto. + apply type_arg_mapped. + unfold type_rtl_ros. + unfold mapped. + right. + apply Uf.sameclass_identify_1; auto. +Qed. + +Lemma type_res_correct : + forall (u : Uf.T) (lr : list reg) (lt : list typ) (r : reg) (t : typ), + consistent (fold2 type_rtl_arg (type_rtl_arg u r t) lr lt) + -> mk_env (fold2 type_rtl_arg (type_rtl_arg u r t) lr lt) r = t. +Proof. + intros. + unfold mk_env. + rewrite (type_args_included _ _ _ H (tReg r) (tTy t)). + destruct t. + apply consistent_not_eq; auto. + apply equal_eq; auto. + unfold type_rtl_arg; apply Uf.sameclass_identify_1; auto. +Qed. + +Lemma type_ros_correct : + forall (u : Uf.T) (lr : list reg) (lt : list typ) (r r1 : reg) (t : typ), + consistent (fold2 type_rtl_arg (type_rtl_arg + (type_rtl_ros u (inl ident r1)) r t) lr lt) + -> + mk_env (fold2 type_rtl_arg (type_rtl_arg + (type_rtl_ros u (inl ident r1)) r t) lr lt) r1 + = Tint. +Proof. + intros. + unfold mk_env. + rewrite (type_args_included _ _ _ H (tReg r1) (tTy Tint)). + apply consistent_not_eq; auto. + rewrite (type_arg_included (type_rtl_ros u (inl ident r1)) r t (tReg r1) (tTy Tint)). + auto. + simpl. + apply Uf.sameclass_identify_1; auto. +Qed. + +Lemma step3 : + forall (u : Uf.T) (f : function) (c : code) (i : instruction) (pc : positive), + c!pc = Some i -> + consistent (type_rtl_instr f.(fn_sig).(sig_res) u pc i) + -> wt_instr (mk_env (type_rtl_instr f.(fn_sig).(sig_res) u pc i)) f i + /\ definite (type_rtl_instr f.(fn_sig).(sig_res) u pc i) i. + Proof. + Opaque type_rtl_arg. + intros. + destruct i; simpl in H0; simpl. + (* Inop *) + apply conj; auto. apply wt_Inop. + (* Iop *) + destruct o; + try (apply conj; [ + apply wt_Iop; try congruence; simpl; + rewrite (type_args_correct _ _ _ H0); + rewrite (type_res_correct _ _ _ _ _ H0); + auto + |apply (type_args_res_complete _ _ _ _ _ H0)]). + (* Omove *) + destruct l; [apply error_inconsistent; auto | idtac]. + destruct l; [idtac | apply error_inconsistent; auto]. + apply conj. + apply wt_Iopmove. + simpl. + unfold mk_env. + rewrite Uf.sameclass_identify_1. + congruence. + simpl. + rewrite Uf.sameclass_identify_1; congruence. + (* Oundef *) + destruct l; [idtac | apply error_inconsistent; auto]. + apply conj. apply wt_Iopundef. + unfold definite. auto. + (* Iload *) + apply conj. + apply wt_Iload. + rewrite (type_args_correct _ _ _ H0); auto. + rewrite (type_res_correct _ _ _ _ _ H0); auto. + simpl; apply (type_args_res_complete _ _ _ _ _ H0). + (* IStore *) + apply conj. + apply wt_Istore. + rewrite (type_args_correct _ _ _ H0); auto. + rewrite (type_res_correct _ _ _ _ _ H0); auto. + simpl; apply (type_args_res_complete _ _ _ _ _ H0). + (* Icall *) + apply conj. + apply wt_Icall. + destruct s0; auto. apply type_ros_correct. auto. + apply type_args_correct. auto. + fold (type_of_sig_res s). apply type_res_correct. auto. + destruct s0. + apply conj. + apply type_ros_complete. auto. + apply type_args_res_complete. auto. + apply conj; auto. + apply type_args_res_complete. auto. + (* Icond *) + apply conj. + apply wt_Icond. + apply (type_args_correct _ _ _ H0). + simpl; apply (type_args_complete _ _ _ H0). + (* Ireturn *) + destruct o; simpl. + apply conj. + apply wt_Ireturn. + destruct f.(fn_sig).(sig_res); simpl; simpl in H0. + rewrite type_arg_correct; auto. + apply error_inconsistent; auto. + destruct f.(fn_sig).(sig_res); simpl; simpl in H0. + apply type_arg_complete. + apply error_inconsistent; auto. + apply conj; auto. apply wt_Ireturn. + destruct f.(fn_sig).(sig_res); simpl; simpl in H0. + apply error_inconsistent; auto. + congruence. + Transparent type_rtl_arg. + Qed. + +Lemma mapped_included_consistent : + forall (u1 u2 : Uf.T) (r : reg), + mapped u1 r -> + included u1 u2 -> + consistent u2 -> + mk_env u2 r = mk_env u1 r. +Proof. + intros. + unfold mk_env. + unfold mapped in H. + elim H; intros; rewrite H2; rewrite (H0 _ _ H2). + rewrite equal_eq; rewrite equal_eq; auto. + rewrite (consistent_not_eq u2). + rewrite (consistent_not_eq u1). + auto. + apply included_consistent with (u2 := u2). + auto. + auto. + auto. +Qed. + +Lemma mapped_list_included : + forall (u1 u2 : Uf.T) (lr : list reg), + (forall r, In r lr -> mapped u1 r) -> + included u1 u2 -> + consistent u2 -> + map (mk_env u2) lr = map (mk_env u1) lr. +Proof. + induction lr; simpl; intros. + auto. + rewrite (mapped_included_consistent u1 u2 a). + rewrite IHlr; auto. + apply (H a); intros. + left; auto. + auto. + auto. +Qed. + +Lemma included_mapped : + forall (u1 u2 : Uf.T) (r : reg), + included u1 u2 -> + mapped u1 r -> + mapped u2 r. +Proof. + unfold mapped. + intros. + elim H0; intros. + left; rewrite (H _ _ H1); auto. + right; rewrite (H _ _ H1); auto. +Qed. + +Lemma included_mapped_forall : + forall (u1 u2 : Uf.T) (r : reg) (l : list reg), + included u1 u2 -> + mapped u1 r /\ (forall r, In r l -> mapped u1 r) -> + mapped u2 r /\ (forall r, In r l -> mapped u2 r). +Proof. + intros. + elim H0; intros. + apply conj. + apply (included_mapped _ _ r H); auto. + intros. + apply (included_mapped _ _ r0 H). + apply H2; auto. +Qed. + +Lemma definite_included : + forall (u1 u2 : Uf.T) (i : instruction), + included u1 u2 -> definite u1 i -> definite u2 i. +Proof. + unfold definite. + intros. + destruct i; try apply (included_mapped_forall _ _ _ _ H H0); auto. + destruct o; try apply (included_mapped_forall _ _ _ _ H H0); auto. + destruct l; auto. + apply (included_mapped_forall _ _ _ _ H H0). + destruct l; auto. + apply (included_mapped_forall _ _ _ _ H H0). + destruct s0; auto. + elim H0; intros. + apply conj. + apply (included_mapped _ _ _ H H1). + apply (included_mapped_forall _ _ _ _ H H2). + elim H0; intros. + apply conj; auto. + apply (included_mapped_forall _ _ _ _ H H2). + intros. + apply (included_mapped _ _ _ H (H0 r H1)). + destruct o; auto. + apply (included_mapped _ _ _ H H0). +Qed. + +Lemma step4 : + forall (f : function) (u1 u3 : Uf.T) (i : instruction), + included u3 u1 -> + wt_instr (mk_env u3) f i -> + definite u3 i -> + consistent u1 -> + wt_instr (mk_env u1) f i. + Proof. + intros f u1 u3 i H1 H H0 X. + destruct H; try simpl in H0; try (elim H0; intros). + apply wt_Inop. + apply wt_Iopmove. unfold mk_env. rewrite (H1 _ _ H0). auto. + apply wt_Iopundef. + apply wt_Iop; auto. + destruct op; try congruence; simpl; simpl in H3; + simpl in H0; elim H0; intros; rewrite (mapped_included_consistent _ _ _ H4 H1 X); + rewrite (mapped_list_included _ _ _ H5 H1); auto. + apply wt_Iload. + rewrite (mapped_list_included _ _ _ H4 H1); auto. + rewrite (mapped_included_consistent _ _ _ H3 H1 X). auto. + apply wt_Istore. + rewrite (mapped_list_included _ _ _ H4 H1); auto. + rewrite (mapped_included_consistent _ _ _ H3 H1 X). auto. + elim H5; intros; destruct ros; apply wt_Icall. + rewrite (mapped_included_consistent _ _ _ H4 H1 X); auto. + rewrite (mapped_list_included _ _ _ H7 H1); auto. + rewrite (mapped_included_consistent _ _ _ H6 H1 X); auto. + auto. + rewrite (mapped_list_included _ _ _ H7 H1); auto. + rewrite (mapped_included_consistent _ _ _ H6 H1 X); auto. + apply wt_Icond. rewrite (mapped_list_included _ _ _ H0 H1); auto. + apply wt_Ireturn. + destruct optres; destruct f.(fn_sig).(sig_res); + simpl in H; simpl; try congruence. + rewrite (mapped_included_consistent _ _ _ H0 H1 X); auto. + Qed. + +Lemma type_rtl_function_instrs : + forall (f: function) (env: regenv), + type_rtl_function f = Some env + -> forall pc i, f.(fn_code)!pc = Some i -> wt_instr env f i. + Proof. + intros. + elim (step1 _ _ H). + intros. + elim H1. + intros. + elim H3. + intros. + rewrite H4. + elim (step2 _ _ _ (included_consistent _ _ H2 H5) _ _ H0). + intros. + elim H6. intros. + elim (step3 x0 f _ _ _ H0); auto. intros. + apply (step4 f _ _ i H2); auto. + apply (step4 _ _ _ _ H8 H9 H10). + apply (included_consistent _ _ H2); auto. + apply (definite_included _ _ _ H8 H10); auto. + Qed. + +(** Combining the sub-proofs. *) + +Theorem type_rtl_function_correct: + forall (f: function) (env: regenv), + type_rtl_function f = Some env -> wt_function env f. + Proof. + intros. + exact (mk_wt_function env f (type_rtl_function_params f _ H) + (type_rtl_function_norepet f _ H) + (type_rtl_function_instrs f _ H)). + Qed. + +Definition wt_program (p: program) : Prop := + forall i f, In (i, f) (prog_funct p) -> type_rtl_function f <> None. + +(** * Type preservation during evaluation *) + +(** The type system for RTL is not sound in that it does not guarantee + progress: well-typed instructions such as [Icall] can fail because + of run-time type tests (such as the equality between calle and caller's + signatures). However, the type system guarantees a type preservation + property: if the execution does not fail because of a failed run-time + test, the result values and register states match the static + typing assumptions. This preservation property will be useful + later for the proof of semantic equivalence between [Machabstr] and [Mach]. + Even though we do not need it for [RTL], we show preservation for [RTL] + here, as a warm-up exercise and because some of the lemmas will be + useful later. *) + +Require Import Globalenvs. +Require Import Values. +Require Import Mem. +Require Import Integers. + +Definition wt_regset (env: regenv) (rs: regset) : Prop := + forall r, Val.has_type (rs#r) (env r). + +Lemma wt_regset_assign: + forall env rs v r, + wt_regset env rs -> + Val.has_type v (env r) -> + wt_regset env (rs#r <- v). +Proof. + intros; red; intros. + rewrite Regmap.gsspec. + case (peq r0 r); intro. + subst r0. assumption. + apply H. +Qed. + +Lemma wt_regset_list: + forall env rs, + wt_regset env rs -> + forall rl, Val.has_type_list (rs##rl) (List.map env rl). +Proof. + induction rl; simpl. + auto. + split. apply H. apply IHrl. +Qed. + +Lemma wt_init_regs: + forall env rl args, + Val.has_type_list args (List.map env rl) -> + wt_regset env (init_regs args rl). +Proof. + induction rl; destruct args; simpl; intuition. + red; intros. rewrite Regmap.gi. simpl; auto. + apply wt_regset_assign; auto. +Qed. + +Section SUBJECT_REDUCTION. + +Variable p: program. + +Hypothesis wt_p: wt_program p. + +Let ge := Genv.globalenv p. + +Definition exec_instr_subject_reduction + (c: code) (sp: val) + (pc: node) (rs: regset) (m: mem) + (pc': node) (rs': regset) (m': mem) : Prop := + forall env f + (CODE: c = fn_code f) + (WT_FN: wt_function env f) + (WT_RS: wt_regset env rs), + wt_regset env rs'. + +Definition exec_function_subject_reduction + (f: function) (args: list val) (m: mem) (res: val) (m': mem) : Prop := + forall env, + wt_function env f -> + Val.has_type_list args f.(fn_sig).(sig_args) -> + Val.has_type res + (match f.(fn_sig).(sig_res) with None => Tint | Some ty => ty end). + +Lemma subject_reduction: + forall f args m res m', + exec_function ge f args m res m' -> + exec_function_subject_reduction f args m res m'. +Proof. + apply (exec_function_ind_3 ge + exec_instr_subject_reduction + exec_instr_subject_reduction + exec_function_subject_reduction); + intros; red; intros; + try (rewrite CODE in H; + generalize (wt_instrs env _ WT_FN pc _ H); + intro WT_INSTR; + inversion WT_INSTR). + + assumption. + + apply wt_regset_assign. auto. + subst op. subst args. simpl in H0. injection H0; intro. + subst v. rewrite <- H2. apply WT_RS. + + apply wt_regset_assign. auto. + subst op; subst args; simpl in H0. injection H0; intro; subst v. + simpl; auto. + + apply wt_regset_assign. auto. + replace (env res) with (snd (type_of_operation op)). + apply type_of_operation_sound with function ge rs##args sp; auto. + rewrite <- H7. reflexivity. + + apply wt_regset_assign. auto. rewrite H8. + eapply type_of_chunk_correct; eauto. + + assumption. + + apply wt_regset_assign. auto. rewrite H11. rewrite H1. + assert (type_rtl_function f <> None). + destruct ros; simpl in H0. + pattern f. apply Genv.find_funct_prop with function p (rs#r). + exact wt_p. exact H0. + caseEq (Genv.find_symbol ge i); intros; rewrite H12 in H0. + pattern f. apply Genv.find_funct_ptr_prop with function p b. + exact wt_p. exact H0. + discriminate. + assert (exists env1, wt_function env1 f). + caseEq (type_rtl_function f); intros; try congruence. + exists t. apply type_rtl_function_correct; auto. + elim H13; intros env1 WT_FN1. + eapply H3. eexact WT_FN1. rewrite <- H1. rewrite <- H10. + apply wt_regset_list; auto. + + assumption. + assumption. + assumption. + eauto. + eauto. + + assert (WT_INIT: wt_regset env (init_regs args (fn_params f))). + apply wt_init_regs. rewrite (wt_params env f H4). assumption. + generalize (H1 env f (refl_equal (fn_code f)) H4 WT_INIT). + intro WT_RS. + generalize (wt_instrs env _ H4 pc _ H2). + intro WT_INSTR; inversion WT_INSTR. + destruct or; simpl in H3; simpl in H7; rewrite <- H7. + rewrite H3. apply WT_RS. + rewrite H3. simpl; auto. +Qed. + +End SUBJECT_REDUCTION. diff --git a/backend/Registers.v b/backend/Registers.v new file mode 100644 index 00000000..30935893 --- /dev/null +++ b/backend/Registers.v @@ -0,0 +1,49 @@ +(** Pseudo-registers and register states. + + This library defines the type of pseudo-registers used in the RTL + intermediate language, and of mappings from pseudo-registers to + values as used in the dynamic semantics of RTL. *) + +Require Import Bool. +Require Import Coqlib. +Require Import AST. +Require Import Maps. +Require Import Sets. + +Definition reg: Set := positive. + +Module Reg. + +Definition eq := peq. + +Definition typenv := PMap.t typ. + +End Reg. + +(** Mappings from registers to some type. *) + +Module Regmap := PMap. + +Set Implicit Arguments. + +Definition regmap_optget + (A: Set) (or: option reg) (dfl: A) (rs: Regmap.t A) : A := + match or with + | None => dfl + | Some r => Regmap.get r rs + end. + +Definition regmap_optset + (A: Set) (or: option reg) (v: A) (rs: Regmap.t A) : Regmap.t A := + match or with + | None => rs + | Some r => Regmap.set r v rs + end. + +Notation "a # b" := (Regmap.get b a) (at level 1). +Notation "a ## b" := (List.map (fun r => Regmap.get r a) b) (at level 1). +Notation "a # b <- c" := (Regmap.set b c a) (at level 1, b at next level). + +(** Sets of registers *) + +Module Regset := MakeSet(PTree). diff --git a/backend/Stacking.v b/backend/Stacking.v new file mode 100644 index 00000000..1f0c4542 --- /dev/null +++ b/backend/Stacking.v @@ -0,0 +1,226 @@ +(** Layout of activation records; translation from Linear to Mach. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Op. +Require Import RTL. +Require Import Locations. +Require Import Linear. +Require Import Lineartyping. +Require Import Mach. +Require Import Conventions. + +(** * Layout of activation records *) + +(** The general shape of activation records is as follows, + from bottom (lowest offsets) to top: +- 24 reserved bytes. The first 4 bytes hold the back pointer to the + activation record of the caller. The next 4 bytes will be used + to store the return address. The remaining 16 bytes are unused + but reserved in accordance with the PowerPC application binary interface. +- Space for outgoing arguments to function calls. +- Local stack slots of integer type. +- Saved values of integer callee-save registers used by the function. +- One word of padding, if necessary to align the following data + on a 8-byte boundary. +- Local stack slots of float type. +- Saved values of float callee-save registers used by the function. +- Space for the stack-allocated data declared in Cminor. + +To facilitate some of the proofs, the Cminor stack-allocated data +starts at offset 0; the preceding areas in the activation record +therefore have negative offsets. This part (with negative offsets) +is called the ``frame'' (see the [Machabstr] semantics for the Mach +language), by opposition with the ``Cminor stack data'' which is the part +with positive offsets. + +The [frame_env] compilation environment records the positions of +the boundaries between areas in the frame part. +*) + +Record frame_env : Set := mk_frame_env { + fe_size: Z; + fe_ofs_int_local: Z; + fe_ofs_int_callee_save: Z; + fe_num_int_callee_save: Z; + fe_ofs_float_local: Z; + fe_ofs_float_callee_save: Z; + fe_num_float_callee_save: Z +}. + +(** Computation of the frame environment from the bounds of the current + function. *) + +Definition make_env (b: bounds) := + let oil := 4 * b.(bound_outgoing) in (* integer locals *) + let oics := oil + 4 * b.(bound_int_local) in (* integer callee-saves *) + let oendi := oics + 4 * b.(bound_int_callee_save) in + let ofl := align oendi 8 in (* float locals *) + let ofcs := ofl + 8 * b.(bound_float_local) in (* float callee-saves *) + let sz := ofcs + 8 * b.(bound_float_callee_save) in (* total frame size *) + mk_frame_env sz oil oics b.(bound_int_callee_save) + ofl ofcs b.(bound_float_callee_save). + +(** Computation the frame offset for the given component of the frame. + The component is expressed by the following [frame_index] sum type. *) + +Inductive frame_index: Set := + | FI_local: Z -> typ -> frame_index + | FI_arg: Z -> typ -> frame_index + | FI_saved_int: Z -> frame_index + | FI_saved_float: Z -> frame_index. + +Definition offset_of_index (fe: frame_env) (idx: frame_index) := + match idx with + | FI_local x Tint => fe.(fe_ofs_int_local) + 4 * x + | FI_local x Tfloat => fe.(fe_ofs_float_local) + 8 * x + | FI_arg x ty => 4 * x + | FI_saved_int x => fe.(fe_ofs_int_callee_save) + 4 * x + | FI_saved_float x => fe.(fe_ofs_float_callee_save) + 8 * x + end. + +(** * Saving and restoring callee-save registers *) + +(** [save_callee_save fe k] adds before [k] the instructions that + store in the frame the values of callee-save registers used by the + current function. *) + +Definition save_int_callee_save (fe: frame_env) (cs: mreg) (k: Mach.code) := + let i := index_int_callee_save cs in + if zlt i fe.(fe_num_int_callee_save) + then Msetstack cs (Int.repr (offset_of_index fe (FI_saved_int i))) Tint :: k + else k. + +Definition save_float_callee_save (fe: frame_env) (cs: mreg) (k: Mach.code) := + let i := index_float_callee_save cs in + if zlt i fe.(fe_num_float_callee_save) + then Msetstack cs (Int.repr (offset_of_index fe (FI_saved_float i))) Tfloat :: k + else k. + +Definition save_callee_save (fe: frame_env) (k: Mach.code) := + List.fold_right (save_int_callee_save fe) + (List.fold_right (save_float_callee_save fe) k float_callee_save_regs) + int_callee_save_regs. + +(** [restore_callee_save fe k] adds before [k] the instructions that + re-load from the frame the values of callee-save registers used by the + current function, restoring these registers to their initial values. *) + +Definition restore_int_callee_save (fe: frame_env) (cs: mreg) (k: Mach.code) := + let i := index_int_callee_save cs in + if zlt i fe.(fe_num_int_callee_save) + then Mgetstack (Int.repr (offset_of_index fe (FI_saved_int i))) Tint cs :: k + else k. + +Definition restore_float_callee_save (fe: frame_env) (cs: mreg) (k: Mach.code) := + let i := index_float_callee_save cs in + if zlt i fe.(fe_num_float_callee_save) + then Mgetstack (Int.repr (offset_of_index fe (FI_saved_float i))) Tfloat cs :: k + else k. + +Definition restore_callee_save (fe: frame_env) (k: Mach.code) := + List.fold_right (restore_int_callee_save fe) + (List.fold_right (restore_float_callee_save fe) k float_callee_save_regs) + int_callee_save_regs. + +(** * Code transformation. *) + +(** Translation of operations and addressing mode. + In Linear, the stack pointer has offset 0, i.e. points to the + beginning of the Cminor stack data block. In Mach, the stack + pointer points to the bottom of the activation record, + at offset [- fe.(fe_size)] where [fe] is the frame environment. + Operations and addressing mode that are relative to the stack pointer + must therefore be offset by [fe.(fe_size)] to preserve their + behaviour. *) + +Definition transl_op (fe: frame_env) (op: operation) := + match op with + | Oaddrstack ofs => Oaddrstack (Int.add (Int.repr fe.(fe_size)) ofs) + | _ => op + end. + +Definition transl_addr (fe: frame_env) (addr: addressing) := + match addr with + | Ainstack ofs => Ainstack (Int.add (Int.repr fe.(fe_size)) ofs) + | _ => addr + end. + +(** Translation of a Linear instruction. Prepends the corresponding + Mach instructions to the given list of instructions. + [Lgetstack] and [Lsetstack] moves between registers and stack slots + are turned into [Mgetstack], [Mgetparent] or [Msetstack] instructions + at offsets determined by the frame environment. + Instructions and addressing modes are modified as described previously. + Code to restore the values of callee-save registers is inserted + before the function returns. *) + +Definition transl_instr + (fe: frame_env) (i: Linear.instruction) (k: Mach.code) : Mach.code := + match i with + | Lgetstack s r => + match s with + | Local ofs ty => + Mgetstack (Int.repr (offset_of_index fe (FI_local ofs ty))) ty r :: k + | Incoming ofs ty => + Mgetparam (Int.repr (offset_of_index fe (FI_arg ofs ty))) ty r :: k + | Outgoing ofs ty => + Mgetstack (Int.repr (offset_of_index fe (FI_arg ofs ty))) ty r :: k + end + | Lsetstack r s => + match s with + | Local ofs ty => + Msetstack r (Int.repr (offset_of_index fe (FI_local ofs ty))) ty :: k + | Incoming ofs ty => + k (* should not happen *) + | Outgoing ofs ty => + Msetstack r (Int.repr (offset_of_index fe (FI_arg ofs ty))) ty :: k + end + | Lop op args res => + Mop (transl_op fe op) args res :: k + | Lload chunk addr args dst => + Mload chunk (transl_addr fe addr) args dst :: k + | Lstore chunk addr args src => + Mstore chunk (transl_addr fe addr) args src :: k + | Lcall sig ros => + Mcall sig ros :: k + | Llabel lbl => + Mlabel lbl :: k + | Lgoto lbl => + Mgoto lbl :: k + | Lcond cond args lbl => + Mcond cond args lbl :: k + | Lreturn => + restore_callee_save fe (Mreturn :: k) + end. + +(** Translation of a function. Code that saves the values of used + callee-save registers is inserted at function entry, followed + by the translation of the function body. + + Subtle point: the compiler must check that the frame is no + larger than [- Int.min_signed] bytes, otherwise arithmetic overflows + could occur during frame accesses using signed machine integers as + offsets. *) + +Definition transl_code + (fe: frame_env) (il: list Linear.instruction) : Mach.code := + List.fold_right (transl_instr fe) nil il. + +Definition transl_body (f: Linear.function) (fe: frame_env) := + save_callee_save fe (transl_code fe f.(Linear.fn_code)). + +Definition transf_function (f: Linear.function) : option Mach.function := + let fe := make_env (function_bounds f) in + if zlt f.(Linear.fn_stacksize) 0 then None else + if zlt (- Int.min_signed) fe.(fe_size) then None else + Some (Mach.mkfunction + f.(Linear.fn_sig) + (transl_body f fe) + f.(Linear.fn_stacksize) + fe.(fe_size)). + +Definition transf_program (p: Linear.program) : option Mach.program := + transform_partial_program transf_function p. diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v new file mode 100644 index 00000000..002ca8d5 --- /dev/null +++ b/backend/Stackingproof.v @@ -0,0 +1,1610 @@ +(** Correctness proof for the translation from Linear to Mach. *) + +(** This file proves semantic preservation for the [Stacking] pass. + For the target language Mach, we use the alternate semantics + given in file [Machabstr], where a part of the activation record + is not resident in memory. Combined with the semantic equivalence + result between the two Mach semantics (see file [Machabstr2mach]), + the proof in this file also shows semantic preservation with + respect to the standard Mach semantics. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Integers. +Require Import Values. +Require Import Op. +Require Import Mem. +Require Import Globalenvs. +Require Import Locations. +Require Import Mach. +Require Import Machabstr. +Require Import Linear. +Require Import Lineartyping. +Require Import Conventions. +Require Import Stacking. + +(** * Properties of frames and frame accesses *) + +(** ``Good variable'' properties for frame accesses. *) + +Lemma get_slot_ok: + forall fr ty ofs, + 0 <= ofs -> fr.(low) + ofs + 4 * typesize ty <= 0 -> + exists v, get_slot fr ty ofs v. +Proof. + intros. exists (load_contents (mem_type ty) fr.(contents) (fr.(low) + ofs)). + constructor; auto. +Qed. + +Lemma set_slot_ok: + forall fr ty ofs v, + fr.(high) = 0 -> 0 <= ofs -> fr.(low) + ofs + 4 * typesize ty <= 0 -> + exists fr', set_slot fr ty ofs v fr'. +Proof. + intros. + exists (mkblock fr.(low) fr.(high) + (store_contents (mem_type ty) fr.(contents) (fr.(low) + ofs) v) + (set_slot_undef_outside fr ty ofs v H H0 H1 fr.(undef_outside))). + constructor; auto. +Qed. + +Lemma slot_gss: + forall fr1 ty ofs v fr2, + set_slot fr1 ty ofs v fr2 -> + get_slot fr2 ty ofs v. +Proof. + intros. induction H. + constructor. + auto. simpl. auto. + simpl. symmetry. apply load_store_contents_same. +Qed. + +Lemma slot_gso: + forall fr1 ty ofs v fr2 ty' ofs' v', + set_slot fr1 ty ofs v fr2 -> + get_slot fr1 ty' ofs' v' -> + ofs' + 4 * typesize ty' <= ofs \/ ofs + 4 * typesize ty <= ofs' -> + get_slot fr2 ty' ofs' v'. +Proof. + intros. induction H; inversion H0. + constructor. + auto. simpl low. auto. + simpl. rewrite H3. symmetry. apply load_store_contents_other. + repeat (rewrite size_mem_type). omega. +Qed. + +Lemma slot_gi: + forall f ofs ty, + 0 <= ofs -> (init_frame f).(low) + ofs + 4 * typesize ty <= 0 -> + get_slot (init_frame f) ty ofs Vundef. +Proof. + intros. constructor. + auto. auto. + symmetry. apply load_contents_init. +Qed. + +Section PRESERVATION. + +Variable prog: Linear.program. +Variable tprog: Mach.program. +Hypothesis TRANSF: transf_program prog = Some tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Section FRAME_PROPERTIES. + +Variable f: Linear.function. +Let b := function_bounds f. +Let fe := make_env b. +Variable tf: Mach.function. +Hypothesis TRANSF_F: transf_function f = Some tf. + +Lemma unfold_transf_function: + tf = Mach.mkfunction + f.(Linear.fn_sig) + (transl_body f fe) + f.(Linear.fn_stacksize) + fe.(fe_size). +Proof. + generalize TRANSF_F. unfold transf_function. + case (zlt (fn_stacksize f) 0). intros; discriminate. + case (zlt (- Int.min_signed) (fe_size (make_env (function_bounds f)))). + intros; discriminate. + intros. unfold fe. unfold b. congruence. +Qed. + +Lemma size_no_overflow: fe.(fe_size) <= -Int.min_signed. +Proof. + generalize TRANSF_F. unfold transf_function. + case (zlt (fn_stacksize f) 0). intros; discriminate. + case (zlt (- Int.min_signed) (fe_size (make_env (function_bounds f)))). + intros; discriminate. + intros. unfold fe, b. omega. +Qed. + +(** A frame index is valid if it lies within the resource bounds + of the current function. *) + +Definition index_valid (idx: frame_index) := + match idx with + | FI_local x Tint => 0 <= x < b.(bound_int_local) + | FI_local x Tfloat => 0 <= x < b.(bound_float_local) + | FI_arg x ty => 6 <= x /\ x + typesize ty <= b.(bound_outgoing) + | FI_saved_int x => 0 <= x < b.(bound_int_callee_save) + | FI_saved_float x => 0 <= x < b.(bound_float_callee_save) + end. + +Definition type_of_index (idx: frame_index) := + match idx with + | FI_local x ty => ty + | FI_arg x ty => ty + | FI_saved_int x => Tint + | FI_saved_float x => Tfloat + end. + +(** Non-overlap between the memory areas corresponding to two + frame indices. *) + +Definition index_diff (idx1 idx2: frame_index) : Prop := + match idx1, idx2 with + | FI_local x1 ty1, FI_local x2 ty2 => + x1 <> x2 \/ ty1 <> ty2 + | FI_arg x1 ty1, FI_arg x2 ty2 => + x1 + typesize ty1 <= x2 \/ x2 + typesize ty2 <= x1 + | FI_saved_int x1, FI_saved_int x2 => x1 <> x2 + | FI_saved_float x1, FI_saved_float x2 => x1 <> x2 + | _, _ => True + end. + +Remark align_float_part: + 4 * bound_outgoing b + 4 * bound_int_local b + 4 * bound_int_callee_save b <= + align (4 * bound_outgoing b + 4 * bound_int_local b + 4 * bound_int_callee_save b) 8. +Proof. + apply align_le. omega. +Qed. + +Ltac AddPosProps := + assert (bound_int_local b >= 0); + [unfold b; apply bound_int_local_pos | + assert (bound_float_local b >= 0); + [unfold b; apply bound_float_local_pos | + assert (bound_int_callee_save b >= 0); + [unfold b; apply bound_int_callee_save_pos | + assert (bound_float_callee_save b >= 0); + [unfold b; apply bound_float_callee_save_pos | + assert (bound_outgoing b >= 6); + [unfold b; apply bound_outgoing_pos | + generalize align_float_part; intro]]]]]. + +Lemma size_pos: fe.(fe_size) >= 24. +Proof. + AddPosProps. + unfold fe, make_env, fe_size. omega. +Qed. + +Opaque function_bounds. + +Lemma offset_of_index_disj: + forall idx1 idx2, + index_valid idx1 -> index_valid idx2 -> + index_diff idx1 idx2 -> + offset_of_index fe idx1 + 4 * typesize (type_of_index idx1) <= offset_of_index fe idx2 \/ + offset_of_index fe idx2 + 4 * typesize (type_of_index idx2) <= offset_of_index fe idx1. +Proof. + AddPosProps. + intros. + destruct idx1; destruct idx2; + try (destruct t); try (destruct t0); + unfold offset_of_index, fe, make_env, + fe_size, fe_ofs_int_local, fe_ofs_int_callee_save, + fe_ofs_float_local, fe_ofs_float_callee_save, + type_of_index, typesize; + simpl in H5; simpl in H6; simpl in H7; + try omega. + assert (z <> z0). intuition auto. omega. + assert (z <> z0). intuition auto. omega. +Qed. + +(** The following lemmas give sufficient conditions for indices + of various kinds to be valid. *) + +Lemma index_local_valid: + forall ofs ty, + slot_bounded f (Local ofs ty) -> + index_valid (FI_local ofs ty). +Proof. + unfold slot_bounded, index_valid. auto. +Qed. + +Lemma index_arg_valid: + forall ofs ty, + slot_bounded f (Outgoing ofs ty) -> + index_valid (FI_arg ofs ty). +Proof. + unfold slot_bounded, index_valid, b. auto. +Qed. + +Lemma index_saved_int_valid: + forall r, + In r int_callee_save_regs -> + index_int_callee_save r < b.(bound_int_callee_save) -> + index_valid (FI_saved_int (index_int_callee_save r)). +Proof. + intros. red. split. + apply Zge_le. apply index_int_callee_save_pos; auto. + auto. +Qed. + +Lemma index_saved_float_valid: + forall r, + In r float_callee_save_regs -> + index_float_callee_save r < b.(bound_float_callee_save) -> + index_valid (FI_saved_float (index_float_callee_save r)). +Proof. + intros. red. split. + apply Zge_le. apply index_float_callee_save_pos; auto. + auto. +Qed. + +Hint Resolve index_local_valid index_arg_valid + index_saved_int_valid index_saved_float_valid: stacking. + +(** The offset of a valid index lies within the bounds of the frame. *) + +Lemma offset_of_index_valid: + forall idx, + index_valid idx -> + 24 <= offset_of_index fe idx /\ + offset_of_index fe idx + 4 * typesize (type_of_index idx) <= fe.(fe_size). +Proof. + AddPosProps. + intros. + destruct idx; try destruct t; + unfold offset_of_index, fe, make_env, + fe_size, fe_ofs_int_local, fe_ofs_int_callee_save, + fe_ofs_float_local, fe_ofs_float_callee_save, + type_of_index, typesize; + simpl in H5; + omega. +Qed. + +(** Offsets for valid index are representable as signed machine integers + without loss of precision. *) + +Lemma offset_of_index_no_overflow: + forall idx, + index_valid idx -> + Int.signed (Int.repr (offset_of_index fe idx)) = offset_of_index fe idx. +Proof. + intros. + generalize (offset_of_index_valid idx H). intros [A B]. +(* omega falls flat on its face... *) + apply Int.signed_repr. + split. apply Zle_trans with 24. compute; intro; discriminate. + auto. + assert (offset_of_index fe idx < fe_size fe). + generalize (typesize_pos (type_of_index idx)); intro. omega. + apply Zlt_succ_le. + change (Zsucc Int.max_signed) with (- Int.min_signed). + generalize size_no_overflow. omega. +Qed. + +(** Admissible evaluation rules for the [Mgetstack] and [Msetstack] + instructions, in case the offset is computed with [offset_of_index]. *) + +Lemma exec_Mgetstack': + forall sp parent idx ty c rs fr dst m v, + index_valid idx -> + get_slot fr ty (offset_of_index fe idx) v -> + Machabstr.exec_instrs tge tf sp parent + (Mgetstack (Int.repr (offset_of_index fe idx)) ty dst :: c) rs fr m + c (rs#dst <- v) fr m. +Proof. + intros. apply Machabstr.exec_one. apply Machabstr.exec_Mgetstack. + rewrite offset_of_index_no_overflow. auto. auto. +Qed. + +Lemma exec_Msetstack': + forall sp parent idx ty c rs fr src m fr', + index_valid idx -> + set_slot fr ty (offset_of_index fe idx) (rs src) fr' -> + Machabstr.exec_instrs tge tf sp parent + (Msetstack src (Int.repr (offset_of_index fe idx)) ty :: c) rs fr m + c rs fr' m. +Proof. + intros. apply Machabstr.exec_one. apply Machabstr.exec_Msetstack. + rewrite offset_of_index_no_overflow. auto. auto. +Qed. + +(** An alternate characterization of the [get_slot] and [set_slot] + operations in terms of the following [index_val] frame access + function. *) + +Definition index_val (idx: frame_index) (fr: frame) := + load_contents (mem_type (type_of_index idx)) + fr.(contents) + (fr.(low) + offset_of_index fe idx). + +Lemma get_slot_index: + forall fr idx ty v, + index_valid idx -> + fr.(low) = - fe.(fe_size) -> + ty = type_of_index idx -> + v = index_val idx fr -> + get_slot fr ty (offset_of_index fe idx) v. +Proof. + intros. subst v; subst ty. + generalize (offset_of_index_valid idx H); intros [A B]. + unfold index_val. apply get_slot_intro. omega. + rewrite H0. omega. auto. +Qed. + +Lemma set_slot_index: + forall fr idx v, + index_valid idx -> + fr.(high) = 0 -> + fr.(low) = - fe.(fe_size) -> + exists fr', set_slot fr (type_of_index idx) (offset_of_index fe idx) v fr'. +Proof. + intros. + generalize (offset_of_index_valid idx H); intros [A B]. + apply set_slot_ok. auto. omega. rewrite H1; omega. +Qed. + +(** Alternate ``good variable'' properties for [get_slot] and [set_slot]. *) +Lemma slot_iss: + forall fr idx v fr', + set_slot fr (type_of_index idx) (offset_of_index fe idx) v fr' -> + index_val idx fr' = v. +Proof. + intros. inversion H. subst ofs ty. + unfold index_val; simpl. apply load_store_contents_same. +Qed. + +Lemma slot_iso: + forall fr idx v fr' idx', + set_slot fr (type_of_index idx) (offset_of_index fe idx) v fr' -> + index_diff idx idx' -> + index_valid idx -> index_valid idx' -> + index_val idx' fr' = index_val idx' fr. +Proof. + intros. generalize (offset_of_index_disj idx idx' H1 H2 H0). intro. + unfold index_val. inversion H. subst ofs ty. simpl. + apply load_store_contents_other. + repeat rewrite size_mem_type. omega. +Qed. + +(** * Agreement between location sets and Mach environments *) + +(** The following [agree] predicate expresses semantic agreement between + a location set on the Linear side and, on the Mach side, + a register set, plus the current and parent frames, plus the register + set [rs0] at function entry. *) + +Record agree (ls: locset) (rs: regset) (fr parent: frame) (rs0: regset) : Prop := + mk_agree { + (** Machine registers have the same values on the Linear and Mach sides. *) + agree_reg: + forall r, ls (R r) = rs r; + + (** Machine registers outside the bounds of the current function + have the same values they had at function entry. In other terms, + these registers are never assigned. *) + agree_unused_reg: + forall r, ~(mreg_bounded f r) -> rs r = rs0 r; + + (** The bounds of the current frame are [[- fe.(fe_size), 0]]. *) + agree_high: + fr.(high) = 0; + agree_size: + fr.(low) = - fe.(fe_size); + + (** Local and outgoing stack slots (on the Linear side) have + the same values as the one loaded from the current Mach frame + at the corresponding offsets. *) + + agree_locals: + forall ofs ty, + slot_bounded f (Local ofs ty) -> + ls (S (Local ofs ty)) = index_val (FI_local ofs ty) fr; + agree_outgoing: + forall ofs ty, + slot_bounded f (Outgoing ofs ty) -> + ls (S (Outgoing ofs ty)) = index_val (FI_arg ofs ty) fr; + + (** Incoming stack slots (on the Linear side) have + the same values as the one loaded from the parent Mach frame + at the corresponding offsets. *) + agree_incoming: + forall ofs ty, + slot_bounded f (Incoming ofs ty) -> + get_slot parent ty (Int.signed (Int.repr (4 * ofs))) (ls (S (Incoming ofs ty))); + + (** The areas of the frame reserved for saving used callee-save + registers always contain the values that those registers had + on function entry. *) + agree_saved_int: + forall r, + In r int_callee_save_regs -> + index_int_callee_save r < b.(bound_int_callee_save) -> + index_val (FI_saved_int (index_int_callee_save r)) fr = rs0 r; + agree_saved_float: + forall r, + In r float_callee_save_regs -> + index_float_callee_save r < b.(bound_float_callee_save) -> + index_val (FI_saved_float (index_float_callee_save r)) fr = rs0 r + }. + +Hint Resolve agree_reg agree_unused_reg agree_size agree_high + agree_locals agree_outgoing agree_incoming + agree_saved_int agree_saved_float: stacking. + +(** Values of registers and register lists. *) + +Lemma agree_eval_reg: + forall ls rs fr parent rs0 r, + agree ls rs fr parent rs0 -> rs r = ls (R r). +Proof. + intros. symmetry. eauto with stacking. +Qed. + +Lemma agree_eval_regs: + forall ls rs fr parent rs0 rl, + agree ls rs fr parent rs0 -> rs##rl = LTL.reglist rl ls. +Proof. + induction rl; simpl; intros. + auto. apply (f_equal2 (@cons val)). + eapply agree_eval_reg; eauto. + auto. +Qed. + +Hint Resolve agree_eval_reg agree_eval_regs: stacking. + +(** Preservation of agreement under various assignments: + of machine registers, of local slots, of outgoing slots. *) + +Lemma agree_set_reg: + forall ls rs fr parent rs0 r v, + agree ls rs fr parent rs0 -> + mreg_bounded f r -> + agree (Locmap.set (R r) v ls) (Regmap.set r v rs) fr parent rs0. +Proof. + intros. constructor; eauto with stacking. + intros. case (mreg_eq r r0); intro. + subst r0. rewrite Locmap.gss; rewrite Regmap.gss; auto. + rewrite Locmap.gso. rewrite Regmap.gso. eauto with stacking. + auto. red. auto. + intros. rewrite Regmap.gso. eauto with stacking. + red; intro; subst r0. contradiction. + intros. rewrite Locmap.gso. eauto with stacking. red. auto. + intros. rewrite Locmap.gso. eauto with stacking. red. auto. + intros. rewrite Locmap.gso. eauto with stacking. red. auto. +Qed. + +Lemma agree_set_local: + forall ls rs fr parent rs0 v ofs ty, + agree ls rs fr parent rs0 -> + slot_bounded f (Local ofs ty) -> + exists fr', + set_slot fr ty (offset_of_index fe (FI_local ofs ty)) v fr' /\ + agree (Locmap.set (S (Local ofs ty)) v ls) rs fr' parent rs0. +Proof. + intros. + generalize (set_slot_index fr _ v (index_local_valid _ _ H0) + (agree_high _ _ _ _ _ H) + (agree_size _ _ _ _ _ H)). + intros [fr' SET]. + exists fr'. split. auto. constructor; eauto with stacking. + (* agree_reg *) + intros. rewrite Locmap.gso. eauto with stacking. red; auto. + (* agree_high *) + inversion SET. simpl high. eauto with stacking. + (* agree_size *) + inversion SET. simpl low. eauto with stacking. + (* agree_local *) + intros. case (slot_eq (Local ofs ty) (Local ofs0 ty0)); intro. + rewrite <- e. rewrite Locmap.gss. + replace (FI_local ofs0 ty0) with (FI_local ofs ty). + symmetry. eapply slot_iss; eauto. congruence. + assert (ofs <> ofs0 \/ ty <> ty0). + case (zeq ofs ofs0); intro. compare ty ty0; intro. + congruence. tauto. tauto. + rewrite Locmap.gso. transitivity (index_val (FI_local ofs0 ty0) fr). + eauto with stacking. symmetry. eapply slot_iso; eauto. + simpl. auto. + (* agree_outgoing *) + intros. rewrite Locmap.gso. transitivity (index_val (FI_arg ofs0 ty0) fr). + eauto with stacking. symmetry. eapply slot_iso; eauto. + red; auto. red; auto. + (* agree_incoming *) + intros. rewrite Locmap.gso. eauto with stacking. red. auto. + (* agree_saved_int *) + intros. rewrite <- (agree_saved_int _ _ _ _ _ H r H1 H2). + eapply slot_iso; eauto with stacking. red; auto. + (* agree_saved_float *) + intros. rewrite <- (agree_saved_float _ _ _ _ _ H r H1 H2). + eapply slot_iso; eauto with stacking. red; auto. +Qed. + +Lemma agree_set_outgoing: + forall ls rs fr parent rs0 v ofs ty, + agree ls rs fr parent rs0 -> + slot_bounded f (Outgoing ofs ty) -> + exists fr', + set_slot fr ty (offset_of_index fe (FI_arg ofs ty)) v fr' /\ + agree (Locmap.set (S (Outgoing ofs ty)) v ls) rs fr' parent rs0. +Proof. + intros. + generalize (set_slot_index fr _ v (index_arg_valid _ _ H0) + (agree_high _ _ _ _ _ H) + (agree_size _ _ _ _ _ H)). + intros [fr' SET]. + exists fr'. split. exact SET. constructor; eauto with stacking. + (* agree_reg *) + intros. rewrite Locmap.gso. eauto with stacking. red; auto. + (* agree_high *) + inversion SET. simpl high. eauto with stacking. + (* agree_size *) + inversion SET. simpl low. eauto with stacking. + (* agree_local *) + intros. rewrite Locmap.gso. + transitivity (index_val (FI_local ofs0 ty0) fr). + eauto with stacking. symmetry. eapply slot_iso; eauto. + red; auto. red; auto. + (* agree_outgoing *) + intros. unfold Locmap.set. + case (Loc.eq (S (Outgoing ofs ty)) (S (Outgoing ofs0 ty0))); intro. + (* same location *) + replace ofs0 with ofs. replace ty0 with ty. + symmetry. eapply slot_iss; eauto. + congruence. congruence. + (* overlapping locations *) + caseEq (Loc.overlap (S (Outgoing ofs ty)) (S (Outgoing ofs0 ty0))); intros. + inversion SET. subst ofs1 ty1. + unfold index_val, type_of_index, offset_of_index. + set (ofs4 := 4 * ofs). set (ofs04 := 4 * ofs0). simpl. + unfold ofs4, ofs04. symmetry. + case (zeq ofs ofs0); intro. + subst ofs0. apply load_store_contents_mismatch. + destruct ty; destruct ty0; simpl; congruence. + generalize (Loc.overlap_not_diff _ _ H2). intro. simpl in H4. + apply load_store_contents_overlap. + omega. + rewrite size_mem_type. omega. + rewrite size_mem_type. omega. + (* different locations *) + transitivity (index_val (FI_arg ofs0 ty0) fr). + eauto with stacking. + symmetry. eapply slot_iso; eauto. + simpl. eapply Loc.overlap_aux_false_1; eauto. + (* agree_incoming *) + intros. rewrite Locmap.gso. eauto with stacking. red. auto. + (* saved ints *) + intros. rewrite <- (agree_saved_int _ _ _ _ _ H r H1 H2). + eapply slot_iso; eauto with stacking. red; auto. + (* saved floats *) + intros. rewrite <- (agree_saved_float _ _ _ _ _ H r H1 H2). + eapply slot_iso; eauto with stacking. red; auto. +Qed. + +Lemma agree_return_regs: + forall ls rs fr parent rs0 ls' rs', + agree ls rs fr parent rs0 -> + (forall r, + In (R r) temporaries \/ In (R r) destroyed_at_call -> + rs' r = ls' (R r)) -> + (forall r, + In r int_callee_save_regs \/ In r float_callee_save_regs -> + rs' r = rs r) -> + agree (LTL.return_regs ls ls') rs' fr parent rs0. +Proof. + intros. constructor; unfold LTL.return_regs; eauto with stacking. + (* agree_reg *) + intros. case (In_dec Loc.eq (R r) temporaries); intro. + symmetry. apply H0. tauto. + case (In_dec Loc.eq (R r) destroyed_at_call); intro. + symmetry. apply H0. tauto. + rewrite H1. eauto with stacking. + generalize (register_classification r); tauto. + (* agree_unused_reg *) + intros. rewrite H1. eauto with stacking. + generalize H2; unfold mreg_bounded; case (mreg_type r); intro. + left. apply index_int_callee_save_pos2. + generalize (bound_int_callee_save_pos f); intro. omega. + right. apply index_float_callee_save_pos2. + generalize (bound_float_callee_save_pos f); intro. omega. +Qed. + +(** * Correctness of saving and restoring of callee-save registers *) + +(** The following lemmas show the correctness of the register saving + code generated by [save_callee_save]: after this code has executed, + the register save areas of the current frame do contain the + values of the callee-save registers used by the function. *) + +Lemma save_int_callee_save_correct_rec: + forall l k sp parent rs fr m, + incl l int_callee_save_regs -> + list_norepet l -> + fr.(high) = 0 -> + fr.(low) = -fe.(fe_size) -> + exists fr', + Machabstr.exec_instrs tge tf sp parent + (List.fold_right (save_int_callee_save fe) k l) rs fr m + k rs fr' m + /\ fr'.(high) = 0 + /\ fr'.(low) = -fe.(fe_size) + /\ (forall r, + In r l -> index_int_callee_save r < bound_int_callee_save b -> + index_val (FI_saved_int (index_int_callee_save r)) fr' = rs r) + /\ (forall idx, + index_valid idx -> + (forall r, + In r l -> index_int_callee_save r < bound_int_callee_save b -> + idx <> FI_saved_int (index_int_callee_save r)) -> + index_val idx fr' = index_val idx fr). +Proof. + induction l. + (* base case *) + intros. simpl fold_right. exists fr. + split. apply Machabstr.exec_refl. split. auto. split. auto. + split. intros. elim H3. auto. + (* inductive case *) + intros. simpl fold_right. + set (k1 := fold_right (save_int_callee_save fe) k l) in *. + assert (R1: incl l int_callee_save_regs). eauto with coqlib. + assert (R2: list_norepet l). inversion H0; auto. + unfold save_int_callee_save. + case (zlt (index_int_callee_save a) (fe_num_int_callee_save fe)); + intro; + unfold fe_num_int_callee_save, fe, make_env in z. + (* a store takes place *) + assert (IDX: index_valid (FI_saved_int (index_int_callee_save a))). + apply index_saved_int_valid. eauto with coqlib. auto. + generalize (set_slot_index _ _ (rs a) IDX H1 H2). + intros [fr1 SET]. + assert (R3: high fr1 = 0). inversion SET. simpl high. auto. + assert (R4: low fr1 = -fe_size fe). inversion SET. simpl low. auto. + generalize (IHl k sp parent rs fr1 m R1 R2 R3 R4). + intros [fr' [A [B [C [D E]]]]]. + exists fr'. + split. eapply Machabstr.exec_trans. apply exec_Msetstack'; eauto with stacking. + exact A. + split. auto. + split. auto. + split. intros. elim H3; intros. subst r. + rewrite E. eapply slot_iss; eauto. auto. + intros. decEq. apply index_int_callee_save_inj; auto with coqlib. + inversion H0. red; intro; subst r; contradiction. + apply D; auto. + intros. transitivity (index_val idx fr1). + apply E; auto. + intros. apply H4; eauto with coqlib. + eapply slot_iso; eauto. + destruct idx; simpl; auto. + generalize (H4 a (in_eq _ _) z). congruence. + (* no store takes place *) + generalize (IHl k sp parent rs fr m R1 R2 H1 H2). + intros [fr' [A [B [C [D E]]]]]. + exists fr'. split. exact A. split. exact B. split. exact C. + split. intros. elim H3; intros. subst r. omegaContradiction. + apply D; auto. + intros. apply E; auto. + intros. apply H4; auto with coqlib. +Qed. + +Lemma save_int_callee_save_correct: + forall k sp parent rs fr m, + fr.(high) = 0 -> + fr.(low) = -fe.(fe_size) -> + exists fr', + Machabstr.exec_instrs tge tf sp parent + (List.fold_right (save_int_callee_save fe) k int_callee_save_regs) rs fr m + k rs fr' m + /\ fr'.(high) = 0 + /\ fr'.(low) = -fe.(fe_size) + /\ (forall r, + In r int_callee_save_regs -> + index_int_callee_save r < bound_int_callee_save b -> + index_val (FI_saved_int (index_int_callee_save r)) fr' = rs r) + /\ (forall idx, + index_valid idx -> + match idx with FI_saved_int _ => False | _ => True end -> + index_val idx fr' = index_val idx fr). +Proof. + intros. + generalize (save_int_callee_save_correct_rec + int_callee_save_regs k sp parent rs fr m + (incl_refl _) int_callee_save_norepet H H0). + intros [fr' [A [B [C [D E]]]]]. + exists fr'. + split. assumption. split. assumption. split. assumption. + split. apply D. intros. apply E. auto. + intros. red; intros; subst idx. contradiction. +Qed. + +Lemma save_float_callee_save_correct_rec: + forall l k sp parent rs fr m, + incl l float_callee_save_regs -> + list_norepet l -> + fr.(high) = 0 -> + fr.(low) = -fe.(fe_size) -> + exists fr', + Machabstr.exec_instrs tge tf sp parent + (List.fold_right (save_float_callee_save fe) k l) rs fr m + k rs fr' m + /\ fr'.(high) = 0 + /\ fr'.(low) = -fe.(fe_size) + /\ (forall r, + In r l -> index_float_callee_save r < bound_float_callee_save b -> + index_val (FI_saved_float (index_float_callee_save r)) fr' = rs r) + /\ (forall idx, + index_valid idx -> + (forall r, + In r l -> index_float_callee_save r < bound_float_callee_save b -> + idx <> FI_saved_float (index_float_callee_save r)) -> + index_val idx fr' = index_val idx fr). +Proof. + induction l. + (* base case *) + intros. simpl fold_right. exists fr. + split. apply Machabstr.exec_refl. split. auto. split. auto. + split. intros. elim H3. auto. + (* inductive case *) + intros. simpl fold_right. + set (k1 := fold_right (save_float_callee_save fe) k l) in *. + assert (R1: incl l float_callee_save_regs). eauto with coqlib. + assert (R2: list_norepet l). inversion H0; auto. + unfold save_float_callee_save. + case (zlt (index_float_callee_save a) (fe_num_float_callee_save fe)); + intro; + unfold fe_num_float_callee_save, fe, make_env in z. + (* a store takes place *) + assert (IDX: index_valid (FI_saved_float (index_float_callee_save a))). + apply index_saved_float_valid. eauto with coqlib. auto. + generalize (set_slot_index _ _ (rs a) IDX H1 H2). + intros [fr1 SET]. + assert (R3: high fr1 = 0). inversion SET. simpl high. auto. + assert (R4: low fr1 = -fe_size fe). inversion SET. simpl low. auto. + generalize (IHl k sp parent rs fr1 m R1 R2 R3 R4). + intros [fr' [A [B [C [D E]]]]]. + exists fr'. + split. eapply Machabstr.exec_trans. apply exec_Msetstack'; eauto with stacking. + exact A. + split. auto. + split. auto. + split. intros. elim H3; intros. subst r. + rewrite E. eapply slot_iss; eauto. auto. + intros. decEq. apply index_float_callee_save_inj; auto with coqlib. + inversion H0. red; intro; subst r; contradiction. + apply D; auto. + intros. transitivity (index_val idx fr1). + apply E; auto. + intros. apply H4; eauto with coqlib. + eapply slot_iso; eauto. + destruct idx; simpl; auto. + generalize (H4 a (in_eq _ _) z). congruence. + (* no store takes place *) + generalize (IHl k sp parent rs fr m R1 R2 H1 H2). + intros [fr' [A [B [C [D E]]]]]. + exists fr'. split. exact A. split. exact B. split. exact C. + split. intros. elim H3; intros. subst r. omegaContradiction. + apply D; auto. + intros. apply E; auto. + intros. apply H4; auto with coqlib. +Qed. + +Lemma save_float_callee_save_correct: + forall k sp parent rs fr m, + fr.(high) = 0 -> + fr.(low) = -fe.(fe_size) -> + exists fr', + Machabstr.exec_instrs tge tf sp parent + (List.fold_right (save_float_callee_save fe) k float_callee_save_regs) rs fr m + k rs fr' m + /\ fr'.(high) = 0 + /\ fr'.(low) = -fe.(fe_size) + /\ (forall r, + In r float_callee_save_regs -> + index_float_callee_save r < bound_float_callee_save b -> + index_val (FI_saved_float (index_float_callee_save r)) fr' = rs r) + /\ (forall idx, + index_valid idx -> + match idx with FI_saved_float _ => False | _ => True end -> + index_val idx fr' = index_val idx fr). +Proof. + intros. + generalize (save_float_callee_save_correct_rec + float_callee_save_regs k sp parent rs fr m + (incl_refl _) float_callee_save_norepet H H0). + intros [fr' [A [B [C [D E]]]]]. + exists fr'. split. assumption. split. assumption. split. assumption. + split. apply D. intros. apply E. auto. + intros. red; intros; subst idx. contradiction. +Qed. + +Lemma index_val_init_frame: + forall idx, + index_valid idx -> + index_val idx (init_frame tf) = Vundef. +Proof. + intros. unfold index_val, init_frame. simpl contents. + apply load_contents_init. +Qed. + +Lemma save_callee_save_correct: + forall sp parent k rs fr m ls, + (forall r, rs r = ls (R r)) -> + (forall ofs ty, + 6 <= ofs -> + ofs + typesize ty <= size_arguments f.(fn_sig) -> + get_slot parent ty (Int.signed (Int.repr (4 * ofs))) (ls (S (Outgoing ofs ty)))) -> + high fr = 0 -> + low fr = -fe.(fe_size) -> + (forall idx, index_valid idx -> index_val idx fr = Vundef) -> + exists fr', + Machabstr.exec_instrs tge tf sp parent + (save_callee_save fe k) rs fr m + k rs fr' m + /\ agree (LTL.call_regs ls) rs fr' parent rs. +Proof. + intros. unfold save_callee_save. + generalize (save_int_callee_save_correct + (fold_right (save_float_callee_save fe) k float_callee_save_regs) + sp parent rs fr m H1 H2). + intros [fr1 [A1 [B1 [C1 [D1 E1]]]]]. + generalize (save_float_callee_save_correct k sp parent rs fr1 m B1 C1). + intros [fr2 [A2 [B2 [C2 [D2 E2]]]]]. + exists fr2. + split. eapply Machabstr.exec_trans. eexact A1. eexact A2. + constructor; unfold LTL.call_regs; auto. + (* agree_local *) + intros. rewrite E2; auto with stacking. + rewrite E1; auto with stacking. + symmetry. auto with stacking. + (* agree_outgoing *) + intros. rewrite E2; auto with stacking. + rewrite E1; auto with stacking. + symmetry. auto with stacking. + (* agree_incoming *) + intros. simpl in H4. apply H0. tauto. tauto. + (* agree_saved_int *) + intros. rewrite E2; auto with stacking. +Qed. + +(** The following lemmas show the correctness of the register reloading + code generated by [reload_callee_save]: after this code has executed, + all callee-save registers contain the same values they had at + function entry. *) + +Lemma restore_int_callee_save_correct_rec: + forall sp parent k fr m rs0 l ls rs, + incl l int_callee_save_regs -> + list_norepet l -> + agree ls rs fr parent rs0 -> + exists ls', exists rs', + Machabstr.exec_instrs tge tf sp parent + (List.fold_right (restore_int_callee_save fe) k l) rs fr m + k rs' fr m + /\ (forall r, In r l -> rs' r = rs0 r) + /\ (forall r, ~(In r l) -> rs' r = rs r) + /\ agree ls' rs' fr parent rs0. +Proof. + induction l. + (* base case *) + intros. simpl fold_right. exists ls. exists rs. + split. apply Machabstr.exec_refl. + split. intros. elim H2. + split. auto. auto. + (* inductive case *) + intros. simpl fold_right. + set (k1 := fold_right (restore_int_callee_save fe) k l) in *. + assert (R0: In a int_callee_save_regs). apply H; auto with coqlib. + assert (R1: incl l int_callee_save_regs). eauto with coqlib. + assert (R2: list_norepet l). inversion H0; auto. + unfold restore_int_callee_save. + case (zlt (index_int_callee_save a) (fe_num_int_callee_save fe)); + intro; + unfold fe_num_int_callee_save, fe, make_env in z. + set (ls1 := Locmap.set (R a) (rs0 a) ls). + set (rs1 := Regmap.set a (rs0 a) rs). + assert (R3: agree ls1 rs1 fr parent rs0). + unfold ls1, rs1. apply agree_set_reg. auto. + red. rewrite int_callee_save_type. exact z. + apply H. auto with coqlib. + generalize (IHl ls1 rs1 R1 R2 R3). + intros [ls' [rs' [A [B [C D]]]]]. + exists ls'. exists rs'. + split. apply Machabstr.exec_trans with k1 rs1 fr m. + unfold rs1; apply exec_Mgetstack'; eauto with stacking. + apply get_slot_index; eauto with stacking. + symmetry. eauto with stacking. + eauto with stacking. + split. intros. elim H2; intros. + subst r. rewrite C. unfold rs1. apply Regmap.gss. inversion H0; auto. + auto. + split. intros. simpl in H2. rewrite C. unfold rs1. apply Regmap.gso. + apply sym_not_eq; tauto. tauto. + assumption. + (* no load takes place *) + generalize (IHl ls rs R1 R2 H1). + intros [ls' [rs' [A [B [C D]]]]]. + exists ls'; exists rs'. split. assumption. + split. intros. elim H2; intros. + subst r. apply (agree_unused_reg _ _ _ _ _ D). + unfold mreg_bounded. rewrite int_callee_save_type; auto. + auto. + split. intros. simpl in H2. apply C. tauto. + assumption. +Qed. + +Lemma restore_int_callee_save_correct: + forall sp parent k fr m rs0 ls rs, + agree ls rs fr parent rs0 -> + exists ls', exists rs', + Machabstr.exec_instrs tge tf sp parent + (List.fold_right (restore_int_callee_save fe) k int_callee_save_regs) rs fr m + k rs' fr m + /\ (forall r, In r int_callee_save_regs -> rs' r = rs0 r) + /\ (forall r, ~(In r int_callee_save_regs) -> rs' r = rs r) + /\ agree ls' rs' fr parent rs0. +Proof. + intros. apply restore_int_callee_save_correct_rec with ls. + apply incl_refl. apply int_callee_save_norepet. auto. +Qed. + +Lemma restore_float_callee_save_correct_rec: + forall sp parent k fr m rs0 l ls rs, + incl l float_callee_save_regs -> + list_norepet l -> + agree ls rs fr parent rs0 -> + exists ls', exists rs', + Machabstr.exec_instrs tge tf sp parent + (List.fold_right (restore_float_callee_save fe) k l) rs fr m + k rs' fr m + /\ (forall r, In r l -> rs' r = rs0 r) + /\ (forall r, ~(In r l) -> rs' r = rs r) + /\ agree ls' rs' fr parent rs0. +Proof. + induction l. + (* base case *) + intros. simpl fold_right. exists ls. exists rs. + split. apply Machabstr.exec_refl. + split. intros. elim H2. + split. auto. auto. + (* inductive case *) + intros. simpl fold_right. + set (k1 := fold_right (restore_float_callee_save fe) k l) in *. + assert (R0: In a float_callee_save_regs). apply H; auto with coqlib. + assert (R1: incl l float_callee_save_regs). eauto with coqlib. + assert (R2: list_norepet l). inversion H0; auto. + unfold restore_float_callee_save. + case (zlt (index_float_callee_save a) (fe_num_float_callee_save fe)); + intro; + unfold fe_num_float_callee_save, fe, make_env in z. + set (ls1 := Locmap.set (R a) (rs0 a) ls). + set (rs1 := Regmap.set a (rs0 a) rs). + assert (R3: agree ls1 rs1 fr parent rs0). + unfold ls1, rs1. apply agree_set_reg. auto. + red. rewrite float_callee_save_type. exact z. + apply H. auto with coqlib. + generalize (IHl ls1 rs1 R1 R2 R3). + intros [ls' [rs' [A [B [C D]]]]]. + exists ls'. exists rs'. + split. apply Machabstr.exec_trans with k1 rs1 fr m. + unfold rs1; apply exec_Mgetstack'; eauto with stacking. + apply get_slot_index; eauto with stacking. + symmetry. eauto with stacking. + exact A. + split. intros. elim H2; intros. + subst r. rewrite C. unfold rs1. apply Regmap.gss. inversion H0; auto. + auto. + split. intros. simpl in H2. rewrite C. unfold rs1. apply Regmap.gso. + apply sym_not_eq; tauto. tauto. + assumption. + (* no load takes place *) + generalize (IHl ls rs R1 R2 H1). + intros [ls' [rs' [A [B [C D]]]]]. + exists ls'; exists rs'. split. assumption. + split. intros. elim H2; intros. + subst r. apply (agree_unused_reg _ _ _ _ _ D). + unfold mreg_bounded. rewrite float_callee_save_type; auto. + auto. + split. intros. simpl in H2. apply C. tauto. + assumption. +Qed. + +Lemma restore_float_callee_save_correct: + forall sp parent k fr m rs0 ls rs, + agree ls rs fr parent rs0 -> + exists ls', exists rs', + Machabstr.exec_instrs tge tf sp parent + (List.fold_right (restore_float_callee_save fe) k float_callee_save_regs) rs fr m + k rs' fr m + /\ (forall r, In r float_callee_save_regs -> rs' r = rs0 r) + /\ (forall r, ~(In r float_callee_save_regs) -> rs' r = rs r) + /\ agree ls' rs' fr parent rs0. +Proof. + intros. apply restore_float_callee_save_correct_rec with ls. + apply incl_refl. apply float_callee_save_norepet. auto. +Qed. + +Lemma restore_callee_save_correct: + forall sp parent k fr m rs0 ls rs, + agree ls rs fr parent rs0 -> + exists rs', + Machabstr.exec_instrs tge tf sp parent + (restore_callee_save fe k) rs fr m + k rs' fr m + /\ (forall r, + In r int_callee_save_regs \/ In r float_callee_save_regs -> + rs' r = rs0 r) + /\ (forall r, + ~(In r int_callee_save_regs) -> + ~(In r float_callee_save_regs) -> + rs' r = rs r). +Proof. + intros. unfold restore_callee_save. + generalize (restore_int_callee_save_correct sp parent + (fold_right (restore_float_callee_save fe) k float_callee_save_regs) + fr m rs0 ls rs H). + intros [ls1 [rs1 [A [B [C D]]]]]. + generalize (restore_float_callee_save_correct sp parent + k fr m rs0 ls1 rs1 D). + intros [ls2 [rs2 [P [Q [R S]]]]]. + exists rs2. split. eapply Machabstr.exec_trans. eexact A. exact P. + split. intros. elim H0; intros. + rewrite R. apply B. auto. apply list_disjoint_notin with int_callee_save_regs. + apply int_float_callee_save_disjoint. auto. + apply Q. auto. + intros. rewrite R. apply C. auto. auto. +Qed. + +End FRAME_PROPERTIES. + +(** * Semantic preservation *) + +(** Preservation of code labels through the translation. *) + +Section LABELS. + +Remark find_label_fold_right: + forall (A: Set) (fn: A -> Mach.code -> Mach.code) lbl, + (forall x k, Mach.find_label lbl (fn x k) = Mach.find_label lbl k) -> forall (args: list A) k, + Mach.find_label lbl (List.fold_right fn k args) = Mach.find_label lbl k. +Proof. + induction args; simpl. auto. + intros. rewrite H. auto. +Qed. + +Remark find_label_save_callee_save: + forall fe lbl k, + Mach.find_label lbl (save_callee_save fe k) = Mach.find_label lbl k. +Proof. + intros. unfold save_callee_save. + repeat rewrite find_label_fold_right. reflexivity. + intros. unfold save_float_callee_save. + case (zlt (index_float_callee_save x) (fe_num_float_callee_save fe)); + intro; reflexivity. + intros. unfold save_int_callee_save. + case (zlt (index_int_callee_save x) (fe_num_int_callee_save fe)); + intro; reflexivity. +Qed. + +Remark find_label_restore_callee_save: + forall fe lbl k, + Mach.find_label lbl (restore_callee_save fe k) = Mach.find_label lbl k. +Proof. + intros. unfold restore_callee_save. + repeat rewrite find_label_fold_right. reflexivity. + intros. unfold restore_float_callee_save. + case (zlt (index_float_callee_save x) (fe_num_float_callee_save fe)); + intro; reflexivity. + intros. unfold restore_int_callee_save. + case (zlt (index_int_callee_save x) (fe_num_int_callee_save fe)); + intro; reflexivity. +Qed. + +Lemma find_label_transl_code: + forall fe lbl c, + Mach.find_label lbl (transl_code fe c) = + option_map (transl_code fe) (Linear.find_label lbl c). +Proof. + induction c; simpl; intros. + auto. + destruct a; unfold transl_instr; auto. + destruct s; simpl; auto. + destruct s; simpl; auto. + simpl. case (peq lbl l); intro. reflexivity. auto. + rewrite find_label_restore_callee_save. auto. +Qed. + +Lemma transl_find_label: + forall f tf lbl c, + transf_function f = Some tf -> + Linear.find_label lbl f.(Linear.fn_code) = Some c -> + Mach.find_label lbl tf.(Mach.fn_code) = + Some (transl_code (make_env (function_bounds f)) c). +Proof. + intros. rewrite (unfold_transf_function _ _ H). simpl. + unfold transl_body. rewrite find_label_save_callee_save. + rewrite find_label_transl_code. rewrite H0. reflexivity. +Qed. + +End LABELS. + +(** Code inclusion property for Linear executions. *) + +Lemma find_label_incl: + forall lbl c c', + Linear.find_label lbl c = Some c' -> incl c' c. +Proof. + induction c; simpl. + intros; discriminate. + intro c'. case (is_label lbl a); intros. + injection H; intro; subst c'. red; intros; auto with coqlib. + apply incl_tl. auto. +Qed. + +Lemma exec_instr_incl: + forall f sp c1 ls1 m1 c2 ls2 m2, + Linear.exec_instr ge f sp c1 ls1 m1 c2 ls2 m2 -> + incl c1 f.(fn_code) -> + incl c2 f.(fn_code). +Proof. + induction 1; intros; eauto with coqlib. + eapply find_label_incl; eauto. + eapply find_label_incl; eauto. +Qed. + +Lemma exec_instrs_incl: + forall f sp c1 ls1 m1 c2 ls2 m2, + Linear.exec_instrs ge f sp c1 ls1 m1 c2 ls2 m2 -> + incl c1 f.(fn_code) -> + incl c2 f.(fn_code). +Proof. + induction 1; intros; auto. + eapply exec_instr_incl; eauto. +Qed. + +(** Preservation / translation of global symbols and functions. *) + +Lemma symbols_preserved: + forall id, Genv.find_symbol tge id = Genv.find_symbol ge id. +Proof. + intros. unfold ge, tge. + apply Genv.find_symbol_transf_partial with transf_function. + exact TRANSF. +Qed. + +Lemma functions_translated: + forall f v, + Genv.find_funct ge v = Some f -> + exists tf, + Genv.find_funct tge v = Some tf /\ transf_function f = Some tf. +Proof. + intros. + generalize (Genv.find_funct_transf_partial transf_function TRANSF H). + case (transf_function f). + intros tf [A B]. exists tf. tauto. + intros. tauto. +Qed. + +Lemma function_ptr_translated: + forall f v, + Genv.find_funct_ptr ge v = Some f -> + exists tf, + Genv.find_funct_ptr tge v = Some tf /\ transf_function f = Some tf. +Proof. + intros. + generalize (Genv.find_funct_ptr_transf_partial transf_function TRANSF H). + case (transf_function f). + intros tf [A B]. exists tf. tauto. + intros. tauto. +Qed. + +(** Correctness of stack pointer relocation in operations and + addressing modes. *) + +Definition shift_sp (tf: Mach.function) (sp: val) := + Val.add sp (Vint (Int.repr (-tf.(fn_framesize)))). + +Remark shift_offset_sp: + forall f tf sp n v, + transf_function f = Some tf -> + offset_sp sp n = Some v -> + offset_sp (shift_sp tf sp) + (Int.add (Int.repr (fe_size (make_env (function_bounds f)))) n) = Some v. +Proof. + intros. destruct sp; try discriminate. + unfold offset_sp in *. + unfold shift_sp. + rewrite (unfold_transf_function _ _ H). unfold fn_framesize. + unfold Val.add. rewrite <- Int.neg_repr. + set (p := Int.repr (fe_size (make_env (function_bounds f)))). + inversion H0. decEq. decEq. + rewrite Int.add_assoc. decEq. + rewrite <- Int.add_assoc. + rewrite (Int.add_commut (Int.neg p) p). rewrite Int.add_neg_zero. + rewrite Int.add_commut. apply Int.add_zero. +Qed. + +Lemma shift_eval_operation: + forall f tf sp op args v, + transf_function f = Some tf -> + eval_operation ge sp op args = Some v -> + eval_operation tge (shift_sp tf sp) + (transl_op (make_env (function_bounds f)) op) args = + Some v. +Proof. + intros until v. destruct op; intros; auto. + simpl in *. rewrite symbols_preserved. auto. + destruct args; auto. unfold eval_operation in *. unfold transl_op. + apply shift_offset_sp; auto. +Qed. + +Lemma shift_eval_addressing: + forall f tf sp addr args v, + transf_function f = Some tf -> + eval_addressing ge sp addr args = Some v -> + eval_addressing tge (shift_sp tf sp) + (transl_addr (make_env (function_bounds f)) addr) args = + Some v. +Proof. + intros. destruct addr; auto. + simpl. rewrite symbols_preserved. auto. + simpl. rewrite symbols_preserved. auto. + unfold transl_addr, eval_addressing in *. + destruct args; try discriminate. + apply shift_offset_sp; auto. +Qed. + +(** The proof of semantic preservation relies on simulation diagrams + of the following form: +<< + c, ls, m ------------------- T(c), rs, fr, m + | | + | | + v v + c', ls', m' ---------------- T(c'), rs', fr', m' +>> + The left vertical arrow represents a transition in the + original Linear code. The top horizontal bar captures three preconditions: +- Agreement between [ls] on the Linear side and [rs], [fr], [rs0] + on the Mach side. +- Inclusion between [c] and the code of the function [f] being + translated. +- Well-typedness of [f]. + + In conclusion, we want to prove the existence of [rs'], [fr'], [m'] + that satisfies the right arrow (zero, one or several transitions in + the generated Mach code) and the postcondition (agreement between + [ls'] and [rs'], [fr'], [rs0]). + + As usual, we capture these diagrams as predicates parameterized + by the transition in the original Linear code. *) + +Definition exec_instr_prop + (f: function) (sp: val) + (c: code) (ls: locset) (m: mem) + (c': code) (ls': locset) (m': mem) := + forall tf rs fr parent rs0 + (TRANSL: transf_function f = Some tf) + (WTF: wt_function f) + (AG: agree f ls rs fr parent rs0) + (INCL: incl c f.(fn_code)), + exists rs', exists fr', + Machabstr.exec_instrs tge tf (shift_sp tf sp) parent + (transl_code (make_env (function_bounds f)) c) rs fr m + (transl_code (make_env (function_bounds f)) c') rs' fr' m' + /\ agree f ls' rs' fr' parent rs0. + +(** The simulation property for function calls has different preconditions + (a slightly weaker notion of agreement between [ls] and the initial + register values [rs] and caller's frame [parent]) and different + postconditions (preservation of callee-save registers). *) + +Definition exec_function_prop + (f: function) + (ls: locset) (m: mem) + (ls': locset) (m': mem) := + forall tf rs parent + (TRANSL: transf_function f = Some tf) + (WTF: wt_function f) + (AG1: forall r, rs r = ls (R r)) + (AG2: forall ofs ty, + 6 <= ofs -> + ofs + typesize ty <= size_arguments f.(fn_sig) -> + get_slot parent ty (Int.signed (Int.repr (4 * ofs))) (ls (S (Outgoing ofs ty)))), + exists rs', + Machabstr.exec_function tge tf parent rs m rs' m' + /\ (forall r, + In (R r) temporaries \/ In (R r) destroyed_at_call -> + rs' r = ls' (R r)) + /\ (forall r, + In r int_callee_save_regs \/ In r float_callee_save_regs -> + rs' r = rs r). + +Hypothesis wt_prog: wt_program prog. + +Lemma transf_function_correct: + forall f ls m ls' m', + Linear.exec_function ge f ls m ls' m' -> + exec_function_prop f ls m ls' m'. +Proof. + assert (RED: forall f i c, + transl_code (make_env (function_bounds f)) (i :: c) = + transl_instr (make_env (function_bounds f)) i + (transl_code (make_env (function_bounds f)) c)). + intros. reflexivity. + apply (Linear.exec_function_ind3 ge exec_instr_prop + exec_instr_prop exec_function_prop); + intros; red; intros; + try rewrite RED; + try (generalize (WTF _ (INCL _ (in_eq _ _))); intro WTI); + unfold transl_instr. + + (* Lgetstack *) + inversion WTI. exists (rs0#r <- (rs (S sl))); exists fr. + split. destruct sl. + (* Lgetstack, local *) + apply exec_Mgetstack'; auto. + apply get_slot_index. apply index_local_valid. auto. + eapply agree_size; eauto. reflexivity. + eapply agree_locals; eauto. + (* Lgetstack, incoming *) + apply Machabstr.exec_one; constructor. + unfold offset_of_index. eapply agree_incoming; eauto. + (* Lgetstack, outgoing *) + apply exec_Mgetstack'; auto. + apply get_slot_index. apply index_arg_valid. auto. + eapply agree_size; eauto. reflexivity. + eapply agree_outgoing; eauto. + (* Lgetstack, common *) + apply agree_set_reg; auto. + + (* Lsetstack *) + inversion WTI. destruct sl. + (* Lsetstack, local *) + generalize (agree_set_local _ _ _ _ _ _ (rs0 r) _ _ AG H3). + intros [fr' [SET AG']]. + exists rs0; exists fr'. split. + apply exec_Msetstack'; auto. + replace (rs (R r)) with (rs0 r). auto. + symmetry. eapply agree_reg; eauto. + (* Lsetstack, incoming *) + contradiction. + (* Lsetstack, outgoing *) + generalize (agree_set_outgoing _ _ _ _ _ _ (rs0 r) _ _ AG H3). + intros [fr' [SET AG']]. + exists rs0; exists fr'. split. + apply exec_Msetstack'; auto. + replace (rs (R r)) with (rs0 r). auto. + symmetry. eapply agree_reg; eauto. + + (* Lop *) + assert (mreg_bounded f res). inversion WTI; auto. + exists (rs0#res <- v); exists fr. split. + apply Machabstr.exec_one. constructor. + apply shift_eval_operation. auto. + change mreg with RegEq.t. + rewrite (agree_eval_regs _ _ _ _ _ _ args AG). auto. + apply agree_set_reg; auto. + + (* Lload *) + inversion WTI. exists (rs0#dst <- v); exists fr. split. + apply Machabstr.exec_one; econstructor. + apply shift_eval_addressing; auto. + change mreg with RegEq.t. + rewrite (agree_eval_regs _ _ _ _ _ _ args AG). eauto. + auto. + apply agree_set_reg; auto. + + (* Lstore *) + exists rs0; exists fr. split. + apply Machabstr.exec_one; econstructor. + apply shift_eval_addressing; auto. + change mreg with RegEq.t. + rewrite (agree_eval_regs _ _ _ _ _ _ args AG). eauto. + rewrite (agree_eval_reg _ _ _ _ _ _ src AG). auto. + auto. + + (* Lcall *) + inversion WTI. + assert (WTF': wt_function f'). + destruct ros; simpl in H. + apply (Genv.find_funct_prop wt_function wt_prog H). + destruct (Genv.find_symbol ge i); try discriminate. + apply (Genv.find_funct_ptr_prop wt_function wt_prog H). + assert (TR: exists tf', Mach.find_function tge ros rs0 = Some tf' + /\ transf_function f' = Some tf'). + destruct ros; simpl in H; simpl. + eapply functions_translated. + rewrite (agree_eval_reg _ _ _ _ _ _ m0 AG). auto. + rewrite symbols_preserved. + destruct (Genv.find_symbol ge i); try discriminate. + apply function_ptr_translated; auto. + elim TR; intros tf' [FIND' TRANSL']; clear TR. + assert (AG1: forall r, rs0 r = rs (R r)). + intro. symmetry. eapply agree_reg; eauto. + assert (AG2: forall ofs ty, + 6 <= ofs -> + ofs + typesize ty <= size_arguments f'.(fn_sig) -> + get_slot fr ty (Int.signed (Int.repr (4 * ofs))) (rs (S (Outgoing ofs ty)))). + intros. + assert (slot_bounded f (Outgoing ofs ty)). + red. rewrite <- H0 in H8. omega. + change (4 * ofs) with (offset_of_index (make_env (function_bounds f)) (FI_arg ofs ty)). + rewrite (offset_of_index_no_overflow f tf); auto. + apply get_slot_index. apply index_arg_valid. auto. + eapply agree_size; eauto. reflexivity. + eapply agree_outgoing; eauto. + generalize (H2 tf' rs0 fr TRANSL' WTF' AG1 AG2). + intros [rs2 [EXF [REGS1 REGS2]]]. + exists rs2; exists fr. split. + apply Machabstr.exec_one; apply Machabstr.exec_Mcall with tf'; auto. + apply agree_return_regs with rs0; auto. + + (* Llabel *) + exists rs0; exists fr. split. + apply Machabstr.exec_one; apply Machabstr.exec_Mlabel. + auto. + + (* Lgoto *) + exists rs0; exists fr. split. + apply Machabstr.exec_one; apply Machabstr.exec_Mgoto. + apply transl_find_label; auto. + auto. + + (* Lcond, true *) + exists rs0; exists fr. split. + apply Machabstr.exec_one; apply Machabstr.exec_Mcond_true. + rewrite <- (agree_eval_regs _ _ _ _ _ _ args AG) in H; auto. + apply transl_find_label; auto. + auto. + + (* Lcond, false *) + exists rs0; exists fr. split. + apply Machabstr.exec_one; apply Machabstr.exec_Mcond_false. + rewrite <- (agree_eval_regs _ _ _ _ _ _ args AG) in H; auto. + auto. + + (* refl *) + exists rs0; exists fr. split. apply Machabstr.exec_refl. auto. + + (* one *) + apply H0; auto. + + (* trans *) + generalize (H0 tf rs fr parent rs0 TRANSL WTF AG INCL). + intros [rs' [fr' [A B]]]. + assert (INCL': incl b2 (fn_code f)). eapply exec_instrs_incl; eauto. + generalize (H2 tf rs' fr' parent rs0 TRANSL WTF B INCL'). + intros [rs'' [fr'' [C D]]]. + exists rs''; exists fr''. split. + eapply Machabstr.exec_trans. eexact A. eexact C. + auto. + + (* function *) + assert (X: forall link ra, + exists rs' : regset, + Machabstr.exec_function_body tge tf parent link ra rs0 m rs' (free m2 stk) /\ + (forall r : mreg, + In (R r) temporaries \/ In (R r) destroyed_at_call -> rs' r = rs2 (R r)) /\ + (forall r : mreg, + In r int_callee_save_regs \/ In r float_callee_save_regs -> rs' r = rs0 r)). + intros. + set (sp := Vptr stk Int.zero) in *. + set (tsp := shift_sp tf sp). + set (fe := make_env (function_bounds f)). + assert (low (init_frame tf) = -fe.(fe_size)). + simpl low. rewrite (unfold_transf_function _ _ TRANSL). + reflexivity. + assert (exists fr1, set_slot (init_frame tf) Tint 0 link fr1). + apply set_slot_ok. reflexivity. omega. rewrite H2. generalize (size_pos f). + unfold fe. simpl typesize. omega. + elim H3; intros fr1 SET1; clear H3. + assert (high fr1 = 0). + inversion SET1. reflexivity. + assert (low fr1 = -fe.(fe_size)). + inversion SET1. rewrite <- H2. reflexivity. + assert (exists fr2, set_slot fr1 Tint 4 ra fr2). + apply set_slot_ok. auto. omega. rewrite H4. generalize (size_pos f). + unfold fe. simpl typesize. omega. + elim H5; intros fr2 SET2; clear H5. + assert (high fr2 = 0). + inversion SET2. simpl. auto. + assert (low fr2 = -fe.(fe_size)). + inversion SET2. rewrite <- H4. reflexivity. + assert (UNDEF: forall idx, index_valid f idx -> index_val f idx fr2 = Vundef). + intros. + assert (get_slot fr2 (type_of_index idx) (offset_of_index fe idx) Vundef). + generalize (offset_of_index_valid _ _ H7). fold fe. intros [XX YY]. + eapply slot_gso; eauto. + eapply slot_gso; eauto. + apply slot_gi. omega. omega. + simpl typesize. omega. simpl typesize. omega. + inversion H8. symmetry. exact H11. + generalize (save_callee_save_correct f tf TRANSL + tsp parent + (transl_code (make_env (function_bounds f)) f.(fn_code)) + rs0 fr2 m1 rs AG1 AG2 H5 H6 UNDEF). + intros [fr [EXP AG]]. + generalize (H1 tf rs0 fr parent rs0 TRANSL WTF AG (incl_refl _)). + intros [rs' [fr' [EXB AG']]]. + generalize (restore_callee_save_correct f tf TRANSL tsp parent + (Mreturn :: transl_code (make_env (function_bounds f)) b) + fr' m2 rs0 rs2 rs' AG'). + intros [rs'' [EXX [REGS1 REGS2]]]. + exists rs''. split. eapply Machabstr.exec_funct_body. + rewrite (unfold_transf_function f tf TRANSL); eexact H. + eexact SET1. eexact SET2. + replace (Mach.fn_code tf) with + (transl_body f (make_env (function_bounds f))). + replace (Vptr stk (Int.repr (- fn_framesize tf))) with tsp. + eapply Machabstr.exec_trans. eexact EXP. + eapply Machabstr.exec_trans. eexact EXB. eexact EXX. + unfold tsp, shift_sp, sp. unfold Val.add. + rewrite Int.add_commut. rewrite Int.add_zero. auto. + rewrite (unfold_transf_function f tf TRANSL). simpl. auto. + split. intros. rewrite REGS2. symmetry; eapply agree_reg; eauto. + apply int_callee_save_not_destroyed; auto. + apply float_callee_save_not_destroyed; auto. + auto. + generalize (X Vzero Vzero). intros [rs' [EX [REGS1 REGS2]]]. + exists rs'. split. + constructor. intros. + generalize (X link ra). intros [rs'' [EX' [REGS1' REGS2']]]. + assert (rs' = rs''). + apply (@Regmap.exten val). intro r. + elim (register_classification r); intro. + rewrite REGS1'. apply REGS1. auto. auto. + rewrite REGS2'. apply REGS2. auto. auto. + rewrite H4. auto. + split; auto. +Qed. + +End PRESERVATION. + +Theorem transl_program_correct: + forall (p: Linear.program) (tp: Mach.program) (r: val), + wt_program p -> + transf_program p = Some tp -> + Linear.exec_program p r -> + Machabstr.exec_program tp r. +Proof. + intros p tp r WTP TRANSF + [fptr [f [ls' [m [FINDSYMB [FINDFUNC [SIG [EXEC RES]]]]]]]]. + assert (WTF: wt_function f). + apply (Genv.find_funct_ptr_prop wt_function WTP FINDFUNC). + set (ls := Locmap.init Vundef) in *. + set (rs := Regmap.init Vundef). + set (fr := empty_frame). + assert (AG1: forall r, rs r = ls (R r)). + intros; reflexivity. + assert (AG2: forall ofs ty, + 6 <= ofs -> + ofs + typesize ty <= size_arguments f.(fn_sig) -> + get_slot fr ty (Int.signed (Int.repr (4 * ofs))) (ls (S (Outgoing ofs ty)))). + rewrite SIG. unfold size_arguments, sig_args, size_arguments_rec. + intros. generalize (typesize_pos ty). + intro. omegaContradiction. + generalize (function_ptr_translated p tp TRANSF f _ FINDFUNC). + intros [tf [TFIND TRANSL]]. + generalize (transf_function_correct p tp TRANSF WTP _ _ _ _ _ EXEC + tf rs fr TRANSL WTF AG1 AG2). + intros [rs' [A [B C]]]. + red. exists fptr; exists tf; exists rs'; exists m. + split. rewrite (symbols_preserved p tp TRANSF). + rewrite (transform_partial_program_main transf_function p TRANSF). + assumption. + split. assumption. + split. rewrite (unfold_transf_function f tf TRANSL); simpl. + assumption. + split. replace (Genv.init_mem tp) with (Genv.init_mem p). + exact A. symmetry. apply Genv.init_mem_transf_partial with transf_function. + exact TRANSF. + rewrite <- RES. rewrite (unfold_transf_function f tf TRANSL); simpl. + apply B. right. apply loc_result_acceptable. +Qed. diff --git a/backend/Stackingtyping.v b/backend/Stackingtyping.v new file mode 100644 index 00000000..85d19229 --- /dev/null +++ b/backend/Stackingtyping.v @@ -0,0 +1,222 @@ +(** Type preservation for the [Stacking] pass. *) + +Require Import Coqlib. +Require Import Maps. +Require Import Integers. +Require Import AST. +Require Import Op. +Require Import Locations. +Require Import Conventions. +Require Import Linear. +Require Import Lineartyping. +Require Import Mach. +Require Import Machtyping. +Require Import Stacking. +Require Import Stackingproof. + +(** We show that the Mach code generated by the [Stacking] pass + is well-typed if the original Linear code is. *) + +Definition wt_instrs (k: Mach.code) : Prop := + forall i, In i k -> wt_instr i. + +Lemma wt_instrs_cons: + forall i k, + wt_instr i -> wt_instrs k -> wt_instrs (i :: k). +Proof. + unfold wt_instrs; intros. elim H1; intro. + subst i0; auto. auto. +Qed. + +Section TRANSL_FUNCTION. + +Variable f: Linear.function. +Let fe := make_env (function_bounds f). +Variable tf: Mach.function. +Hypothesis TRANSF_F: transf_function f = Some tf. + +Lemma wt_Msetstack': + forall idx ty r, + mreg_type r = ty -> index_valid f idx -> + wt_instr (Msetstack r (Int.repr (offset_of_index fe idx)) ty). +Proof. + intros. constructor. auto. + unfold fe. rewrite (offset_of_index_no_overflow f tf TRANSF_F); auto. + generalize (offset_of_index_valid f idx H0). tauto. +Qed. + +Lemma wt_fold_right: + forall (A: Set) (f: A -> code -> code) (k: code) (l: list A), + (forall x k', In x l -> wt_instrs k' -> wt_instrs (f x k')) -> + wt_instrs k -> + wt_instrs (List.fold_right f k l). +Proof. + induction l; intros; simpl. + auto. + apply H. apply in_eq. apply IHl. + intros. apply H. auto with coqlib. auto. + auto. +Qed. + +Lemma wt_save_int_callee_save: + forall cs k, + In cs int_callee_save_regs -> wt_instrs k -> + wt_instrs (save_int_callee_save fe cs k). +Proof. + intros. unfold save_int_callee_save. + case (zlt (index_int_callee_save cs) (fe_num_int_callee_save fe)); intro. + apply wt_instrs_cons; auto. + apply wt_Msetstack'. apply int_callee_save_type; auto. + apply index_saved_int_valid. auto. exact z. + auto. +Qed. + +Lemma wt_save_float_callee_save: + forall cs k, + In cs float_callee_save_regs -> wt_instrs k -> + wt_instrs (save_float_callee_save fe cs k). +Proof. + intros. unfold save_float_callee_save. + case (zlt (index_float_callee_save cs) (fe_num_float_callee_save fe)); intro. + apply wt_instrs_cons; auto. + apply wt_Msetstack'. apply float_callee_save_type; auto. + apply index_saved_float_valid. auto. exact z. + auto. +Qed. + +Lemma wt_restore_int_callee_save: + forall cs k, + In cs int_callee_save_regs -> wt_instrs k -> + wt_instrs (restore_int_callee_save fe cs k). +Proof. + intros. unfold restore_int_callee_save. + case (zlt (index_int_callee_save cs) (fe_num_int_callee_save fe)); intro. + apply wt_instrs_cons; auto. + constructor. apply int_callee_save_type; auto. + auto. +Qed. + +Lemma wt_restore_float_callee_save: + forall cs k, + In cs float_callee_save_regs -> wt_instrs k -> + wt_instrs (restore_float_callee_save fe cs k). +Proof. + intros. unfold restore_float_callee_save. + case (zlt (index_float_callee_save cs) (fe_num_float_callee_save fe)); intro. + apply wt_instrs_cons; auto. + constructor. apply float_callee_save_type; auto. + auto. +Qed. + +Lemma wt_save_callee_save: + forall k, + wt_instrs k -> wt_instrs (save_callee_save fe k). +Proof. + intros. unfold save_callee_save. + apply wt_fold_right. exact wt_save_int_callee_save. + apply wt_fold_right. exact wt_save_float_callee_save. + auto. +Qed. + +Lemma wt_restore_callee_save: + forall k, + wt_instrs k -> wt_instrs (restore_callee_save fe k). +Proof. + intros. unfold restore_callee_save. + apply wt_fold_right. exact wt_restore_int_callee_save. + apply wt_fold_right. exact wt_restore_float_callee_save. + auto. +Qed. + +Lemma wt_transl_instr: + forall instr k, + Lineartyping.wt_instr f instr -> + wt_instrs k -> + wt_instrs (transl_instr fe instr k). +Proof. + intros. destruct instr; unfold transl_instr; inversion H. + (* getstack *) + destruct s; simpl in H3; apply wt_instrs_cons; auto; + constructor; auto. + (* setstack *) + destruct s; simpl in H3; simpl in H4. + apply wt_instrs_cons; auto. apply wt_Msetstack'. auto. + apply index_local_valid. auto. + auto. + apply wt_instrs_cons; auto. apply wt_Msetstack'. auto. + apply index_arg_valid. auto. + (* op, move *) + simpl. apply wt_instrs_cons. constructor; auto. auto. + (* op, undef *) + simpl. apply wt_instrs_cons. constructor. auto. + (* op, others *) + apply wt_instrs_cons; auto. + constructor. + destruct o; simpl; congruence. + destruct o; simpl; congruence. + rewrite H6. destruct o; reflexivity || congruence. + (* load *) + apply wt_instrs_cons; auto. + constructor; auto. + rewrite H4. destruct a; reflexivity. + (* store *) + apply wt_instrs_cons; auto. + constructor; auto. + rewrite H3. destruct a; reflexivity. + (* call *) + apply wt_instrs_cons; auto. + constructor; auto. + (* label *) + apply wt_instrs_cons; auto. + constructor. + (* goto *) + apply wt_instrs_cons; auto. + constructor; auto. + (* cond *) + apply wt_instrs_cons; auto. + constructor; auto. + (* return *) + apply wt_restore_callee_save. apply wt_instrs_cons. constructor. auto. +Qed. + +End TRANSL_FUNCTION. + +Lemma wt_transf_function: + forall f tf, + transf_function f = Some tf -> + Lineartyping.wt_function f -> + wt_function tf. +Proof. + intros. + generalize H; unfold transf_function. + case (zlt (Linear.fn_stacksize f) 0); intro. + intros; discriminate. + case (zlt (- Int.min_signed) (fe_size (make_env (function_bounds f)))); intro. + intros; discriminate. intro EQ. + generalize (unfold_transf_function f tf H); intro. + assert (fn_framesize tf = fe_size (make_env (function_bounds f))). + subst tf; reflexivity. + constructor. + change (wt_instrs (fn_code tf)). + rewrite H1; simpl; unfold transl_body. + apply wt_save_callee_save with tf; auto. + unfold transl_code. apply wt_fold_right. + intros. eapply wt_transl_instr; eauto. + red; intros. elim H3. + subst tf; simpl; auto. + rewrite H2. eapply size_pos; eauto. + rewrite H2. eapply size_no_overflow; eauto. +Qed. + +Lemma program_typing_preserved: + forall (p: Linear.program) (tp: Mach.program), + transf_program p = Some tp -> + Lineartyping.wt_program p -> + Machtyping.wt_program tp. +Proof. + intros; red; intros. + generalize (transform_partial_program_function transf_function p i f H H1). + intros [f0 [IN TRANSF]]. + apply wt_transf_function with f0; auto. + eapply H0; eauto. +Qed. diff --git a/backend/Tunneling.v b/backend/Tunneling.v new file mode 100644 index 00000000..9c3e82c4 --- /dev/null +++ b/backend/Tunneling.v @@ -0,0 +1,131 @@ +(** Branch tunneling (optimization of branches to branches). *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Values. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Import LTL. + +(** Branch tunneling shortens sequences of branches (with no intervening + computations) by rewriting the branch and conditional branch instructions + so that they jump directly to the end of the branch sequence. + For example: +<< + L1: goto L2; L1: goto L3; + L2; goto L3; becomes L2: goto L3; + L3: instr; L3: instr; + L4: if (cond) goto L1; L4: if (cond) goto L3; +>> + This optimization can be applied to several of our intermediate + languages. We choose to perform it on the [LTL] language, + after register allocation but before code linearization. + Register allocation can delete instructions (such as dead + computations or useless moves), therefore there are more + opportunities for tunneling after allocation than before. + Symmetrically, prior tunneling helps linearization to produce + better code, e.g. by revealing that some [goto] instructions are + dead code (as the "goto L3" in the example above). +*) + +Definition is_goto_block (b: option block) : option node := + match b with Some (Bgoto s) => Some s | _ => None end. + +(** [branch_target f pc] returns the node of the CFG that is at + the end of the branch sequence starting at [pc]. If the instruction + at [pc] is not a [goto], [pc] itself is returned. + The naive definition of [branch_target] is: +<< + branch_target f pc = branch_target f pc' if f(pc) = goto pc' + = pc otherwise +>> + However, this definition can fail to terminate if + the program can contain loops consisting only of branches, as in +<< + L1: goto L1; +>> + or +<< L1: goto L2; + L2: goto L1; +>> + Coq warns us of this fact by not accepting the definition + of [branch_target] above. + + The proper way to handle this problem is to detect [goto] cycles + in the control-flow graph. For simplicity, we just bound arbitrarily + the number of iterations performed by [branch_target], + never chasing more than 10 [goto] instructions. (This many + consecutive branches is rarely, if even, encountered.) + + For a sequence of more than 10 [goto] instructions, we can return + (as branch target) any of the labels of the [goto] instructions. + This is semantically correct in any case. However, the proof + is simpler if we return the label of the first [goto] in the sequence. +*) + +Fixpoint branch_target_rec (f: LTL.function) (pc: node) (count: nat) + {struct count} : option node := + match count with + | Datatypes.O => None + | Datatypes.S count' => + match is_goto_block f.(fn_code)!pc with + | Some s => branch_target_rec f s count' + | None => Some pc + end + end. + +Definition branch_target (f: LTL.function) (pc: node) := + match branch_target_rec f pc 10%nat with + | Some pc' => pc' + | None => pc + end. + +(** The tunneling optimization simply rewrites all LTL basic blocks, + replacing the destinations of the [Bgoto] and [Bcond] instructions + with their final target, as computed by [branch_target]. *) + +Fixpoint tunnel_block (f: LTL.function) (b: block) {struct b} : block := + match b with + | Bgetstack s r b => + Bgetstack s r (tunnel_block f b) + | Bsetstack r s b => + Bsetstack r s (tunnel_block f b) + | Bop op args res b => + Bop op args res (tunnel_block f b) + | Bload chunk addr args dst b => + Bload chunk addr args dst (tunnel_block f b) + | Bstore chunk addr args src b => + Bstore chunk addr args src (tunnel_block f b) + | Bcall sig ros b => + Bcall sig ros (tunnel_block f b) + | Bgoto s => + Bgoto (branch_target f s) + | Bcond cond args s1 s2 => + Bcond cond args (branch_target f s1) (branch_target f s2) + | Breturn => + Breturn + end. + +Lemma wf_tunneled_code: + forall (f: LTL.function), + let tc := PTree.map (fun pc b => tunnel_block f b) (fn_code f) in + forall (pc: node), Plt pc (Psucc (fn_entrypoint f)) \/ tc!pc = None. +Proof. + intros. elim (fn_code_wf f pc); intro. + left; auto. right; unfold tc. + rewrite PTree.gmap; unfold option_map; rewrite H; auto. +Qed. + +Definition tunnel_function (f: LTL.function) : LTL.function := + mkfunction + (fn_sig f) + (fn_stacksize f) + (PTree.map (fun pc b => tunnel_block f b) (fn_code f)) + (fn_entrypoint f) + (wf_tunneled_code f). + +Definition tunnel_program (p: LTL.program) : LTL.program := + transform_program tunnel_function p. + diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v new file mode 100644 index 00000000..111d1d83 --- /dev/null +++ b/backend/Tunnelingproof.v @@ -0,0 +1,311 @@ +(** Correctness proof for the branch tunneling optimization. *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Import LTL. +Require Import Tunneling. + +(** * Properties of branch target computation *) + +Lemma is_goto_block_correct: + forall b s, + is_goto_block b = Some s -> b = Some (Bgoto s). +Proof. + unfold is_goto_block; intros. + destruct b. destruct b; discriminate || congruence. + discriminate. +Qed. + +Lemma branch_target_rec_1: + forall f pc n, + branch_target_rec f pc n = Some pc + \/ branch_target_rec f pc n = None + \/ exists pc', f.(fn_code)!pc = Some(Bgoto pc'). +Proof. + intros. destruct n; simpl. + right; left; auto. + caseEq (is_goto_block f.(fn_code)!pc); intros. + right; right. exists n0. apply is_goto_block_correct; auto. + left; auto. +Qed. + +Lemma branch_target_rec_2: + forall f n pc1 pc2 pc3, + f.(fn_code)!pc1 = Some (Bgoto pc2) -> + branch_target_rec f pc1 n = Some pc3 -> + branch_target_rec f pc2 n = Some pc3. +Proof. + induction n. + simpl. intros; discriminate. + intros pc1 pc2 pc3 ATpc1 H. simpl in H. + unfold is_goto_block in H; rewrite ATpc1 in H. + simpl. caseEq (is_goto_block f.(fn_code)!pc2); intros. + apply IHn with pc2. apply is_goto_block_correct; auto. auto. + destruct n; simpl in H. discriminate. rewrite H0 in H. auto. +Qed. + +(** The following lemma captures the property of [branch_target] + on which the proof of semantic preservation relies. *) + +Lemma branch_target_characterization: + forall f pc, + branch_target f pc = pc \/ + (exists pc', f.(fn_code)!pc = Some(Bgoto pc') + /\ branch_target f pc' = branch_target f pc). +Proof. + intros. unfold branch_target. + generalize (branch_target_rec_1 f pc 10%nat). + intros [A|[A|[pc' AT]]]. + rewrite A. left; auto. + rewrite A. left; auto. + caseEq (branch_target_rec f pc 10%nat). intros pcx BT. + right. exists pc'. split. auto. + rewrite (branch_target_rec_2 _ _ _ _ _ AT BT). auto. + intro. left; auto. +Qed. + +(** * Preservation of semantics *) + +Section PRESERVATION. + +Variable p: program. +Let tp := tunnel_program p. +Let ge := Genv.globalenv p. +Let tge := Genv.globalenv tp. + +Lemma functions_translated: + forall v f, + Genv.find_funct ge v = Some f -> + Genv.find_funct tge v = Some (tunnel_function f). +Proof (@Genv.find_funct_transf _ _ tunnel_function p). + +Lemma function_ptr_translated: + forall v f, + Genv.find_funct_ptr ge v = Some f -> + Genv.find_funct_ptr tge v = Some (tunnel_function f). +Proof (@Genv.find_funct_ptr_transf _ _ tunnel_function p). + +Lemma symbols_preserved: + forall id, + Genv.find_symbol tge id = Genv.find_symbol ge id. +Proof (@Genv.find_symbol_transf _ _ tunnel_function p). + +(** These are inversion lemmas, characterizing what happens in the LTL + semantics when executing [Bgoto] instructions or basic blocks. *) + +Lemma exec_instrs_Bgoto_inv: + forall sp b1 ls1 m1 b2 ls2 m2, + exec_instrs ge sp b1 ls1 m1 b2 ls2 m2 -> + forall s1, + b1 = Bgoto s1 -> b2 = b1 /\ ls2 = ls1 /\ m2 = m1. +Proof. + induction 1. + intros. tauto. + intros. subst b1. inversion H. + intros. generalize (IHexec_instrs1 s1 H1). intros [A [B C]]. + subst b2; subst rs2; subst m2. eauto. +Qed. + +Lemma exec_block_Bgoto_inv: + forall sp s ls1 m1 out ls2 m2, + exec_block ge sp (Bgoto s) ls1 m1 out ls2 m2 -> + out = Cont s /\ ls2 = ls1 /\ m2 = m1. +Proof. + intros. inversion H; + generalize (exec_instrs_Bgoto_inv _ _ _ _ _ _ _ H0 s (refl_equal _)); + intros [A [B C]]. + split. congruence. tauto. + discriminate. + discriminate. + discriminate. +Qed. + +Lemma exec_blocks_Bgoto_inv: + forall c sp pc1 ls1 m1 out ls2 m2 s, + exec_blocks ge c sp pc1 ls1 m1 out ls2 m2 -> + c!pc1 = Some (Bgoto s) -> + (out = Cont pc1 /\ ls2 = ls1 /\ m2 = m1) + \/ exec_blocks ge c sp s ls1 m1 out ls2 m2. +Proof. + induction 1; intros. + left; tauto. + assert (b = Bgoto s). congruence. subst b. + generalize (exec_block_Bgoto_inv _ _ _ _ _ _ _ H0). + intros [A [B C]]. subst out; subst rs'; subst m'. + right. apply exec_blocks_refl. + elim (IHexec_blocks1 H1). + intros [A [B C]]. + assert (pc2 = pc1). congruence. subst rs2; subst m2; subst pc2. + apply IHexec_blocks2; auto. + intro. right. apply exec_blocks_trans with pc2 rs2 m2; auto. +Qed. + +(** The following [exec_*_prop] predicates state the correctness + of the tunneling transformation: for each LTL execution + in the original code (of an instruction, a sequence of instructions, + a basic block, a sequence of basic blocks, etc), there exists + a similar LTL execution in the tunneled code. + + Note that only the control-flow is changed: the values of locations + and the memory states are identical in the original and transformed + codes. *) + +Definition tunnel_outcome (f: function) (out: outcome) := + match out with + | Cont pc => Cont (branch_target f pc) + | Return => Return + end. + +Definition exec_instr_prop + (sp: val) (b1: block) (ls1: locset) (m1: mem) + (b2: block) (ls2: locset) (m2: mem) : Prop := + forall f, + exec_instr tge sp (tunnel_block f b1) ls1 m1 + (tunnel_block f b2) ls2 m2. + +Definition exec_instrs_prop + (sp: val) (b1: block) (ls1: locset) (m1: mem) + (b2: block) (ls2: locset) (m2: mem) : Prop := + forall f, + exec_instrs tge sp (tunnel_block f b1) ls1 m1 + (tunnel_block f b2) ls2 m2. + +Definition exec_block_prop + (sp: val) (b1: block) (ls1: locset) (m1: mem) + (out: outcome) (ls2: locset) (m2: mem) : Prop := + forall f, + exec_block tge sp (tunnel_block f b1) ls1 m1 + (tunnel_outcome f out) ls2 m2. + +Definition tunneled_code (f: function) := + PTree.map (fun pc b => tunnel_block f b) (fn_code f). + +Definition exec_blocks_prop + (c: code) (sp: val) + (pc: node) (ls1: locset) (m1: mem) + (out: outcome) (ls2: locset) (m2: mem) : Prop := + forall f, + f.(fn_code) = c -> + exec_blocks tge (tunneled_code f) sp + (branch_target f pc) ls1 m1 + (tunnel_outcome f out) ls2 m2. + +Definition exec_function_prop + (f: function) (ls1: locset) (m1: mem) + (ls2: locset) (m2: mem) : Prop := + exec_function tge (tunnel_function f) ls1 m1 ls2 m2. + +Scheme exec_instr_ind5 := Minimality for LTL.exec_instr Sort Prop + with exec_instrs_ind5 := Minimality for LTL.exec_instrs Sort Prop + with exec_block_ind5 := Minimality for LTL.exec_block Sort Prop + with exec_blocks_ind5 := Minimality for LTL.exec_blocks Sort Prop + with exec_function_ind5 := Minimality for LTL.exec_function Sort Prop. + +(** The proof of semantic preservation is a structural induction + over the LTL evaluation derivation of the original program, + using the [exec_*_prop] predicates above as induction hypotheses. *) + +Lemma tunnel_function_correct: + forall f ls1 m1 ls2 m2, + exec_function ge f ls1 m1 ls2 m2 -> + exec_function_prop f ls1 m1 ls2 m2. +Proof. + apply (exec_function_ind5 ge + exec_instr_prop + exec_instrs_prop + exec_block_prop + exec_blocks_prop + exec_function_prop); + intros; red; intros; simpl. + (* getstack *) + constructor. + (* setstack *) + constructor. + (* op *) + constructor. rewrite <- H. apply eval_operation_preserved. + exact symbols_preserved. + (* load *) + apply exec_Bload with a. rewrite <- H. + apply eval_addressing_preserved. exact symbols_preserved. + auto. + (* store *) + apply exec_Bstore with a. rewrite <- H. + apply eval_addressing_preserved. exact symbols_preserved. + auto. + (* call *) + apply exec_Bcall with (tunnel_function f). + generalize H; unfold find_function; destruct ros. + intro. apply functions_translated; auto. + rewrite symbols_preserved. destruct (Genv.find_symbol ge i). + intro. apply function_ptr_translated; auto. congruence. + rewrite H0; reflexivity. + apply H2. + (* instr_refl *) + apply exec_refl. + (* instr_one *) + apply exec_one. apply H0. + (* instr_trans *) + apply exec_trans with (tunnel_block f b2) rs2 m2; auto. + (* goto *) + apply exec_Bgoto. red in H0. simpl in H0. apply H0. + (* cond, true *) + eapply exec_Bcond_true. red in H0. simpl in H0. apply H0. auto. + (* cond, false *) + eapply exec_Bcond_false. red in H0. simpl in H0. apply H0. auto. + (* return *) + apply exec_Breturn. red in H0. simpl in H0. apply H0. + (* block_refl *) + apply exec_blocks_refl. + (* block_one *) + red in H1. + elim (branch_target_characterization f pc). + intro. rewrite H3. apply exec_blocks_one with (tunnel_block f b). + unfold tunneled_code. rewrite PTree.gmap. rewrite H2; rewrite H. + reflexivity. apply H1. + intros [pc' [ATpc BTS]]. + assert (b = Bgoto pc'). congruence. subst b. + generalize (exec_block_Bgoto_inv _ _ _ _ _ _ _ H0). + intros [A [B C]]. subst out; subst rs'; subst m'. + simpl. rewrite BTS. apply exec_blocks_refl. + (* blocks_trans *) + apply exec_blocks_trans with (branch_target f pc2) rs2 m2. + exact (H0 f H3). exact (H2 f H3). + (* function *) + econstructor. eexact H. + change (fn_code (tunnel_function f)) with (tunneled_code f). + simpl. + elim (branch_target_characterization f (fn_entrypoint f)). + intro BT. rewrite <- BT. exact (H1 f (refl_equal _)). + intros [pc [ATpc BT]]. + apply exec_blocks_trans with + (branch_target f (fn_entrypoint f)) (call_regs rs) m1. + eapply exec_blocks_one. + unfold tunneled_code. rewrite PTree.gmap. rewrite ATpc. + simpl. reflexivity. + apply exec_Bgoto. rewrite BT. apply exec_refl. + exact (H1 f (refl_equal _)). +Qed. + +End PRESERVATION. + +Theorem transf_program_correct: + forall (p: program) (r: val), + exec_program p r -> + exec_program (tunnel_program p) r. +Proof. + intros p r [b [f [ls [m [FIND1 [FIND2 [SIG [EX RES]]]]]]]]. + red. exists b; exists (tunnel_function f); exists ls; exists m. + split. change (prog_main (tunnel_program p)) with (prog_main p). + rewrite <- FIND1. apply symbols_preserved. + split. apply function_ptr_translated. assumption. + split. rewrite <- SIG. reflexivity. + split. apply tunnel_function_correct. + unfold tunnel_program. rewrite Genv.init_mem_transf. auto. + exact RES. +Qed. diff --git a/backend/Tunnelingtyping.v b/backend/Tunnelingtyping.v new file mode 100644 index 00000000..29b74f12 --- /dev/null +++ b/backend/Tunnelingtyping.v @@ -0,0 +1,44 @@ +(** Type preservation for the Tunneling pass *) + +Require Import Coqlib. +Require Import Maps. +Require Import AST. +Require Import Values. +Require Import Mem. +Require Import Globalenvs. +Require Import Op. +Require Import Locations. +Require Import LTL. +Require Import LTLtyping. +Require Import Tunneling. + +(** Tunneling preserves typing. *) + +Lemma wt_tunnel_block: + forall f b, + wt_block f b -> + wt_block (tunnel_function f) (tunnel_block f b). +Proof. + induction 1; simpl; econstructor; eauto. +Qed. + +Lemma wt_tunnel_function: + forall f, wt_function f -> wt_function (tunnel_function f). +Proof. + unfold wt_function; intros until b. + simpl. rewrite PTree.gmap. unfold option_map. + caseEq (fn_code f)!pc. intros b0 AT EQ. + injection EQ; intros; subst b. + apply wt_tunnel_block. eauto. + intros; discriminate. +Qed. + +Lemma program_typing_preserved: + forall (p: LTL.program), + wt_program p -> wt_program (tunnel_program p). +Proof. + intros; red; intros. + generalize (transform_program_function tunnel_function p i f H0). + intros [f0 [IN TRANSF]]. + subst f. apply wt_tunnel_function. eauto. +Qed. diff --git a/backend/Values.v b/backend/Values.v new file mode 100644 index 00000000..aa59e045 --- /dev/null +++ b/backend/Values.v @@ -0,0 +1,888 @@ +(** This module defines the type of values that is used in the dynamic + semantics of all our intermediate languages. *) + +Require Import Coqlib. +Require Import AST. +Require Import Integers. +Require Import Floats. + +Definition block : Set := Z. +Definition eq_block := zeq. + +(** A value is either: +- a machine integer; +- a floating-point number; +- a pointer: a pair of a memory address and an integer offset with respect + to this address; +- the [Vundef] value denoting an arbitrary bit pattern, such as the + value of an uninitialized variable. +*) + +Inductive val: Set := + | Vundef: val + | Vint: int -> val + | Vfloat: float -> val + | Vptr: block -> int -> val. + +Definition Vzero: val := Vint Int.zero. +Definition Vone: val := Vint Int.one. +Definition Vmone: val := Vint Int.mone. + +Definition Vtrue: val := Vint Int.one. +Definition Vfalse: val := Vint Int.zero. + +(** The module [Val] defines a number of arithmetic and logical operations + over type [val]. Most of these operations are straightforward extensions + of the corresponding integer or floating-point operations. *) + +Module Val. + +Definition of_bool (b: bool): val := if b then Vtrue else Vfalse. + +Definition has_type (v: val) (t: typ) : Prop := + match v, t with + | Vundef, _ => True + | Vint _, Tint => True + | Vfloat _, Tfloat => True + | Vptr _ _, Tint => True + | _, _ => False + end. + +Fixpoint has_type_list (vl: list val) (tl: list typ) {struct vl} : Prop := + match vl, tl with + | nil, nil => True + | v1 :: vs, t1 :: ts => has_type v1 t1 /\ has_type_list vs ts + | _, _ => False + end. + +(** Truth values. Pointers and non-zero integers are treated as [True]. + The integer 0 (also used to represent the null pointer) is [False]. + [Vundef] and floats are neither true nor false. *) + +Definition is_true (v: val) : Prop := + match v with + | Vint n => n <> Int.zero + | Vptr b ofs => True + | _ => False + end. + +Definition is_false (v: val) : Prop := + match v with + | Vint n => n = Int.zero + | _ => False + end. + +Inductive bool_of_val: val -> bool -> Prop := + | bool_of_val_int_true: + forall n, n <> Int.zero -> bool_of_val (Vint n) true + | bool_of_val_int_false: + bool_of_val (Vint Int.zero) false + | bool_of_val_ptr: + forall b ofs, bool_of_val (Vptr b ofs) true. + +Definition neg (v: val) : val := + match v with + | Vint n => Vint (Int.neg n) + | _ => Vundef + end. + +Definition negf (v: val) : val := + match v with + | Vfloat f => Vfloat (Float.neg f) + | _ => Vundef + end. + +Definition absf (v: val) : val := + match v with + | Vfloat f => Vfloat (Float.abs f) + | _ => Vundef + end. + +Definition intoffloat (v: val) : val := + match v with + | Vfloat f => Vint (Float.intoffloat f) + | _ => Vundef + end. + +Definition floatofint (v: val) : val := + match v with + | Vint n => Vfloat (Float.floatofint n) + | _ => Vundef + end. + +Definition floatofintu (v: val) : val := + match v with + | Vint n => Vfloat (Float.floatofintu n) + | _ => Vundef + end. + +Definition notint (v: val) : val := + match v with + | Vint n => Vint (Int.xor n Int.mone) + | _ => Vundef + end. + +Definition notbool (v: val) : val := + match v with + | Vint n => of_bool (Int.eq n Int.zero) + | Vptr b ofs => Vfalse + | _ => Vundef + end. + +Definition cast8signed (v: val) : val := + match v with + | Vint n => Vint(Int.cast8signed n) + | _ => Vundef + end. + +Definition cast8unsigned (v: val) : val := + match v with + | Vint n => Vint(Int.cast8unsigned n) + | _ => Vundef + end. + +Definition cast16signed (v: val) : val := + match v with + | Vint n => Vint(Int.cast16signed n) + | _ => Vundef + end. + +Definition cast16unsigned (v: val) : val := + match v with + | Vint n => Vint(Int.cast16unsigned n) + | _ => Vundef + end. + +Definition singleoffloat (v: val) : val := + match v with + | Vfloat f => Vfloat(Float.singleoffloat f) + | _ => Vundef + end. + +Definition add (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => Vint(Int.add n1 n2) + | Vptr b1 ofs1, Vint n2 => Vptr b1 (Int.add ofs1 n2) + | Vint n1, Vptr b2 ofs2 => Vptr b2 (Int.add ofs2 n1) + | _, _ => Vundef + end. + +Definition sub (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => Vint(Int.sub n1 n2) + | Vptr b1 ofs1, Vint n2 => Vptr b1 (Int.sub ofs1 n2) + | Vptr b1 ofs1, Vptr b2 ofs2 => + if zeq b1 b2 then Vint(Int.sub ofs1 ofs2) else Vundef + | _, _ => Vundef + end. + +Definition mul (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => Vint(Int.mul n1 n2) + | _, _ => Vundef + end. + +Definition divs (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + if Int.eq n2 Int.zero then Vundef else Vint(Int.divs n1 n2) + | _, _ => Vundef + end. + +Definition mods (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + if Int.eq n2 Int.zero then Vundef else Vint(Int.mods n1 n2) + | _, _ => Vundef + end. + +Definition divu (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + if Int.eq n2 Int.zero then Vundef else Vint(Int.divu n1 n2) + | _, _ => Vundef + end. + +Definition modu (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + if Int.eq n2 Int.zero then Vundef else Vint(Int.modu n1 n2) + | _, _ => Vundef + end. + +Definition and (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => Vint(Int.and n1 n2) + | _, _ => Vundef + end. + +Definition or (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => Vint(Int.or n1 n2) + | _, _ => Vundef + end. + +Definition xor (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => Vint(Int.xor n1 n2) + | _, _ => Vundef + end. + +Definition shl (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + if Int.ltu n2 (Int.repr 32) + then Vint(Int.shl n1 n2) + else Vundef + | _, _ => Vundef + end. + +Definition shr (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + if Int.ltu n2 (Int.repr 32) + then Vint(Int.shr n1 n2) + else Vundef + | _, _ => Vundef + end. + +Definition shr_carry (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + if Int.ltu n2 (Int.repr 32) + then Vint(Int.shr_carry n1 n2) + else Vundef + | _, _ => Vundef + end. + +Definition shrx (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + if Int.ltu n2 (Int.repr 32) + then Vint(Int.shrx n1 n2) + else Vundef + | _, _ => Vundef + end. + +Definition shru (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + if Int.ltu n2 (Int.repr 32) + then Vint(Int.shru n1 n2) + else Vundef + | _, _ => Vundef + end. + +Definition rolm (v: val) (amount mask: int): val := + match v with + | Vint n => Vint(Int.rolm n amount mask) + | _ => Vundef + end. + +Definition addf (v1 v2: val): val := + match v1, v2 with + | Vfloat f1, Vfloat f2 => Vfloat(Float.add f1 f2) + | _, _ => Vundef + end. + +Definition subf (v1 v2: val): val := + match v1, v2 with + | Vfloat f1, Vfloat f2 => Vfloat(Float.sub f1 f2) + | _, _ => Vundef + end. + +Definition mulf (v1 v2: val): val := + match v1, v2 with + | Vfloat f1, Vfloat f2 => Vfloat(Float.mul f1 f2) + | _, _ => Vundef + end. + +Definition divf (v1 v2: val): val := + match v1, v2 with + | Vfloat f1, Vfloat f2 => Vfloat(Float.div f1 f2) + | _, _ => Vundef + end. + +Definition cmp_mismatch (c: comparison): val := + match c with + | Ceq => Vfalse + | Cne => Vtrue + | _ => Vundef + end. + +Definition cmp (c: comparison) (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => of_bool (Int.cmp c n1 n2) + | Vint n1, Vptr b2 ofs2 => + if Int.eq n1 Int.zero then cmp_mismatch c else Vundef + | Vptr b1 ofs1, Vptr b2 ofs2 => + if zeq b1 b2 + then of_bool (Int.cmp c ofs1 ofs2) + else cmp_mismatch c + | Vptr b1 ofs1, Vint n2 => + if Int.eq n2 Int.zero then cmp_mismatch c else Vundef + | _, _ => Vundef + end. + +Definition cmpu (c: comparison) (v1 v2: val): val := + match v1, v2 with + | Vint n1, Vint n2 => + of_bool (Int.cmpu c n1 n2) + | Vint n1, Vptr b2 ofs2 => + if Int.eq n1 Int.zero then cmp_mismatch c else Vundef + | Vptr b1 ofs1, Vptr b2 ofs2 => + if zeq b1 b2 + then of_bool (Int.cmpu c ofs1 ofs2) + else cmp_mismatch c + | Vptr b1 ofs1, Vint n2 => + if Int.eq n2 Int.zero then cmp_mismatch c else Vundef + | _, _ => Vundef + end. + +Definition cmpf (c: comparison) (v1 v2: val): val := + match v1, v2 with + | Vfloat f1, Vfloat f2 => of_bool (Float.cmp c f1 f2) + | _, _ => Vundef + end. + +(** [load_result] is used in the memory model (library [Mem]) + to post-process the results of a memory read. For instance, + consider storing the integer value [0xFFF] on 1 byte at a + given address, and reading it back. If it is read back with + chunk [Mint8unsigned], zero-extension must be performed, resulting + in [0xFF]. If it is read back as a [Mint8signed], sign-extension + is performed and [0xFFFFFFFF] is returned. Type mismatches + (e.g. reading back a float as a [Mint32]) read back as [Vundef]. *) + +Definition load_result (chunk: memory_chunk) (v: val) := + match chunk, v with + | Mint8signed, Vint n => Vint (Int.cast8signed n) + | Mint8unsigned, Vint n => Vint (Int.cast8unsigned n) + | Mint16signed, Vint n => Vint (Int.cast16signed n) + | Mint16unsigned, Vint n => Vint (Int.cast16unsigned n) + | Mint32, Vint n => Vint n + | Mint32, Vptr b ofs => Vptr b ofs + | Mfloat32, Vfloat f => Vfloat(Float.singleoffloat f) + | Mfloat64, Vfloat f => Vfloat f + | _, _ => Vundef + end. + +(** Theorems on arithmetic operations. *) + +Theorem cast8unsigned_and: + forall x, cast8unsigned x = and x (Vint(Int.repr 255)). +Proof. + destruct x; simpl; auto. decEq. apply Int.cast8unsigned_and. +Qed. + +Theorem cast16unsigned_and: + forall x, cast16unsigned x = and x (Vint(Int.repr 65535)). +Proof. + destruct x; simpl; auto. decEq. apply Int.cast16unsigned_and. +Qed. + +Theorem istrue_not_isfalse: + forall v, is_false v -> is_true (notbool v). +Proof. + destruct v; simpl; try contradiction. + intros. subst i. simpl. discriminate. +Qed. + +Theorem isfalse_not_istrue: + forall v, is_true v -> is_false (notbool v). +Proof. + destruct v; simpl; try contradiction. + intros. generalize (Int.eq_spec i Int.zero). + case (Int.eq i Int.zero); intro. + contradiction. simpl. auto. + auto. +Qed. + +Theorem bool_of_true_val: + forall v, is_true v -> bool_of_val v true. +Proof. + intro. destruct v; simpl; intros; try contradiction. + constructor; auto. constructor. +Qed. + +Theorem bool_of_true_val2: + forall v, bool_of_val v true -> is_true v. +Proof. + intros. inversion H; simpl; auto. +Qed. + +Theorem bool_of_true_val_inv: + forall v b, is_true v -> bool_of_val v b -> b = true. +Proof. + intros. inversion H0; subst v b; simpl in H; auto. +Qed. + +Theorem bool_of_false_val: + forall v, is_false v -> bool_of_val v false. +Proof. + intro. destruct v; simpl; intros; try contradiction. + subst i; constructor. +Qed. + +Theorem bool_of_false_val2: + forall v, bool_of_val v false -> is_false v. +Proof. + intros. inversion H; simpl; auto. +Qed. + +Theorem bool_of_false_val_inv: + forall v b, is_false v -> bool_of_val v b -> b = false. +Proof. + intros. inversion H0; subst v b; simpl in H. + congruence. auto. contradiction. +Qed. + +Theorem notbool_negb_1: + forall b, of_bool (negb b) = notbool (of_bool b). +Proof. + destruct b; reflexivity. +Qed. + +Theorem notbool_negb_2: + forall b, of_bool b = notbool (of_bool (negb b)). +Proof. + destruct b; reflexivity. +Qed. + +Theorem notbool_idem2: + forall b, notbool(notbool(of_bool b)) = of_bool b. +Proof. + destruct b; reflexivity. +Qed. + +Theorem notbool_idem3: + forall x, notbool(notbool(notbool x)) = notbool x. +Proof. + destruct x; simpl; auto. + case (Int.eq i Int.zero); reflexivity. +Qed. + +Theorem add_commut: forall x y, add x y = add y x. +Proof. + destruct x; destruct y; simpl; auto. + decEq. apply Int.add_commut. +Qed. + +Theorem add_assoc: forall x y z, add (add x y) z = add x (add y z). +Proof. + destruct x; destruct y; destruct z; simpl; auto. + rewrite Int.add_assoc; auto. + rewrite Int.add_assoc; auto. + decEq. decEq. apply Int.add_commut. + decEq. rewrite Int.add_commut. rewrite <- Int.add_assoc. + decEq. apply Int.add_commut. + decEq. rewrite Int.add_assoc. auto. +Qed. + +Theorem add_permut: forall x y z, add x (add y z) = add y (add x z). +Proof. + intros. rewrite (add_commut y z). rewrite <- add_assoc. apply add_commut. +Qed. + +Theorem add_permut_4: + forall x y z t, add (add x y) (add z t) = add (add x z) (add y t). +Proof. + intros. rewrite add_permut. rewrite add_assoc. + rewrite add_permut. symmetry. apply add_assoc. +Qed. + +Theorem neg_zero: neg Vzero = Vzero. +Proof. + reflexivity. +Qed. + +Theorem neg_add_distr: forall x y, neg(add x y) = add (neg x) (neg y). +Proof. + destruct x; destruct y; simpl; auto. decEq. apply Int.neg_add_distr. +Qed. + +Theorem sub_zero_r: forall x, sub Vzero x = neg x. +Proof. + destruct x; simpl; auto. +Qed. + +Theorem sub_add_opp: forall x y, sub x (Vint y) = add x (Vint (Int.neg y)). +Proof. + destruct x; intro y; simpl; auto; rewrite Int.sub_add_opp; auto. +Qed. + +Theorem sub_add_l: + forall v1 v2 i, sub (add v1 (Vint i)) v2 = add (sub v1 v2) (Vint i). +Proof. + destruct v1; destruct v2; intros; simpl; auto. + rewrite Int.sub_add_l. auto. + rewrite Int.sub_add_l. auto. + case (zeq b b0); intro. rewrite Int.sub_add_l. auto. reflexivity. +Qed. + +Theorem sub_add_r: + forall v1 v2 i, sub v1 (add v2 (Vint i)) = add (sub v1 v2) (Vint (Int.neg i)). +Proof. + destruct v1; destruct v2; intros; simpl; auto. + rewrite Int.sub_add_r. auto. + repeat rewrite Int.sub_add_opp. decEq. + repeat rewrite Int.add_assoc. decEq. apply Int.add_commut. + decEq. repeat rewrite Int.sub_add_opp. + rewrite Int.add_assoc. decEq. apply Int.neg_add_distr. + case (zeq b b0); intro. simpl. decEq. + repeat rewrite Int.sub_add_opp. rewrite Int.add_assoc. decEq. + apply Int.neg_add_distr. + reflexivity. +Qed. + +Theorem mul_commut: forall x y, mul x y = mul y x. +Proof. + destruct x; destruct y; simpl; auto. decEq. apply Int.mul_commut. +Qed. + +Theorem mul_assoc: forall x y z, mul (mul x y) z = mul x (mul y z). +Proof. + destruct x; destruct y; destruct z; simpl; auto. + decEq. apply Int.mul_assoc. +Qed. + +Theorem mul_add_distr_l: + forall x y z, mul (add x y) z = add (mul x z) (mul y z). +Proof. + destruct x; destruct y; destruct z; simpl; auto. + decEq. apply Int.mul_add_distr_l. +Qed. + + +Theorem mul_add_distr_r: + forall x y z, mul x (add y z) = add (mul x y) (mul x z). +Proof. + destruct x; destruct y; destruct z; simpl; auto. + decEq. apply Int.mul_add_distr_r. +Qed. + +Theorem mul_pow2: + forall x n logn, + Int.is_power2 n = Some logn -> + mul x (Vint n) = shl x (Vint logn). +Proof. + intros; destruct x; simpl; auto. + change 32 with (Z_of_nat wordsize). + rewrite (Int.is_power2_range _ _ H). decEq. apply Int.mul_pow2. auto. +Qed. + +Theorem mods_divs: + forall x y, mods x y = sub x (mul (divs x y) y). +Proof. + destruct x; destruct y; simpl; auto. + case (Int.eq i0 Int.zero); simpl. auto. decEq. apply Int.mods_divs. +Qed. + +Theorem modu_divu: + forall x y, modu x y = sub x (mul (divu x y) y). +Proof. + destruct x; destruct y; simpl; auto. + generalize (Int.eq_spec i0 Int.zero); + case (Int.eq i0 Int.zero); simpl. auto. + intro. decEq. apply Int.modu_divu. auto. +Qed. + +Theorem divs_pow2: + forall x n logn, + Int.is_power2 n = Some logn -> + divs x (Vint n) = shrx x (Vint logn). +Proof. + intros; destruct x; simpl; auto. + change 32 with (Z_of_nat wordsize). + rewrite (Int.is_power2_range _ _ H). + generalize (Int.eq_spec n Int.zero); + case (Int.eq n Int.zero); intro. + subst n. compute in H. discriminate. + decEq. apply Int.divs_pow2. auto. +Qed. + +Theorem divu_pow2: + forall x n logn, + Int.is_power2 n = Some logn -> + divu x (Vint n) = shru x (Vint logn). +Proof. + intros; destruct x; simpl; auto. + change 32 with (Z_of_nat wordsize). + rewrite (Int.is_power2_range _ _ H). + generalize (Int.eq_spec n Int.zero); + case (Int.eq n Int.zero); intro. + subst n. compute in H. discriminate. + decEq. apply Int.divu_pow2. auto. +Qed. + +Theorem modu_pow2: + forall x n logn, + Int.is_power2 n = Some logn -> + modu x (Vint n) = and x (Vint (Int.sub n Int.one)). +Proof. + intros; destruct x; simpl; auto. + generalize (Int.eq_spec n Int.zero); + case (Int.eq n Int.zero); intro. + subst n. compute in H. discriminate. + decEq. eapply Int.modu_and; eauto. +Qed. + +Theorem and_commut: forall x y, and x y = and y x. +Proof. + destruct x; destruct y; simpl; auto. decEq. apply Int.and_commut. +Qed. + +Theorem and_assoc: forall x y z, and (and x y) z = and x (and y z). +Proof. + destruct x; destruct y; destruct z; simpl; auto. + decEq. apply Int.and_assoc. +Qed. + +Theorem or_commut: forall x y, or x y = or y x. +Proof. + destruct x; destruct y; simpl; auto. decEq. apply Int.or_commut. +Qed. + +Theorem or_assoc: forall x y z, or (or x y) z = or x (or y z). +Proof. + destruct x; destruct y; destruct z; simpl; auto. + decEq. apply Int.or_assoc. +Qed. + +Theorem xor_commut: forall x y, xor x y = xor y x. +Proof. + destruct x; destruct y; simpl; auto. decEq. apply Int.xor_commut. +Qed. + +Theorem xor_assoc: forall x y z, xor (xor x y) z = xor x (xor y z). +Proof. + destruct x; destruct y; destruct z; simpl; auto. + decEq. apply Int.xor_assoc. +Qed. + +Theorem shl_mul: forall x y, Val.mul x (Val.shl Vone y) = Val.shl x y. +Proof. + destruct x; destruct y; simpl; auto. + case (Int.ltu i0 (Int.repr 32)); auto. + decEq. symmetry. apply Int.shl_mul. +Qed. + +Theorem shl_rolm: + forall x n, + Int.ltu n (Int.repr 32) = true -> + shl x (Vint n) = rolm x n (Int.shl Int.mone n). +Proof. + intros; destruct x; simpl; auto. + rewrite H. decEq. apply Int.shl_rolm. exact H. +Qed. + +Theorem shru_rolm: + forall x n, + Int.ltu n (Int.repr 32) = true -> + shru x (Vint n) = rolm x (Int.sub (Int.repr 32) n) (Int.shru Int.mone n). +Proof. + intros; destruct x; simpl; auto. + rewrite H. decEq. apply Int.shru_rolm. exact H. +Qed. + +Theorem shrx_carry: + forall x y, + add (shr x y) (shr_carry x y) = shrx x y. +Proof. + destruct x; destruct y; simpl; auto. + case (Int.ltu i0 (Int.repr 32)); auto. + simpl. decEq. apply Int.shrx_carry. +Qed. + +Theorem or_rolm: + forall x n m1 m2, + or (rolm x n m1) (rolm x n m2) = rolm x n (Int.or m1 m2). +Proof. + intros; destruct x; simpl; auto. + decEq. apply Int.or_rolm. +Qed. + +Theorem rolm_rolm: + forall x n1 m1 n2 m2, + rolm (rolm x n1 m1) n2 m2 = + rolm x (Int.and (Int.add n1 n2) (Int.repr 31)) + (Int.and (Int.rol m1 n2) m2). +Proof. + intros; destruct x; simpl; auto. + decEq. + replace (Int.and (Int.add n1 n2) (Int.repr 31)) + with (Int.modu (Int.add n1 n2) (Int.repr 32)). + apply Int.rolm_rolm. + change (Int.repr 31) with (Int.sub (Int.repr 32) Int.one). + apply Int.modu_and with (Int.repr 5). reflexivity. +Qed. + +Theorem rolm_zero: + forall x m, + rolm x Int.zero m = and x (Vint m). +Proof. + intros; destruct x; simpl; auto. decEq. apply Int.rolm_zero. +Qed. + +Theorem addf_commut: forall x y, addf x y = addf y x. +Proof. + destruct x; destruct y; simpl; auto. decEq. apply Float.addf_commut. +Qed. + +Lemma negate_cmp_mismatch: + forall c, + cmp_mismatch (negate_comparison c) = notbool(cmp_mismatch c). +Proof. + destruct c; reflexivity. +Qed. + +Theorem negate_cmp: + forall c x y, + cmp (negate_comparison c) x y = notbool (cmp c x y). +Proof. + destruct x; destruct y; simpl; auto. + rewrite Int.negate_cmp. apply notbool_negb_1. + case (Int.eq i Int.zero). apply negate_cmp_mismatch. reflexivity. + case (Int.eq i0 Int.zero). apply negate_cmp_mismatch. reflexivity. + case (zeq b b0); intro. + rewrite Int.negate_cmp. apply notbool_negb_1. + apply negate_cmp_mismatch. +Qed. + +Theorem negate_cmpu: + forall c x y, + cmpu (negate_comparison c) x y = notbool (cmpu c x y). +Proof. + destruct x; destruct y; simpl; auto. + rewrite Int.negate_cmpu. apply notbool_negb_1. + case (Int.eq i Int.zero). apply negate_cmp_mismatch. reflexivity. + case (Int.eq i0 Int.zero). apply negate_cmp_mismatch. reflexivity. + case (zeq b b0); intro. + rewrite Int.negate_cmpu. apply notbool_negb_1. + apply negate_cmp_mismatch. +Qed. + +Lemma swap_cmp_mismatch: + forall c, cmp_mismatch (swap_comparison c) = cmp_mismatch c. +Proof. + destruct c; reflexivity. +Qed. + +Theorem swap_cmp: + forall c x y, + cmp (swap_comparison c) x y = cmp c y x. +Proof. + destruct x; destruct y; simpl; auto. + rewrite Int.swap_cmp. auto. + case (Int.eq i Int.zero). apply swap_cmp_mismatch. auto. + case (Int.eq i0 Int.zero). apply swap_cmp_mismatch. auto. + case (zeq b b0); intro. + subst b0. rewrite zeq_true. rewrite Int.swap_cmp. auto. + rewrite zeq_false. apply swap_cmp_mismatch. auto. +Qed. + +Theorem swap_cmpu: + forall c x y, + cmpu (swap_comparison c) x y = cmpu c y x. +Proof. + destruct x; destruct y; simpl; auto. + rewrite Int.swap_cmpu. auto. + case (Int.eq i Int.zero). apply swap_cmp_mismatch. auto. + case (Int.eq i0 Int.zero). apply swap_cmp_mismatch. auto. + case (zeq b b0); intro. + subst b0. rewrite zeq_true. rewrite Int.swap_cmpu. auto. + rewrite zeq_false. apply swap_cmp_mismatch. auto. +Qed. + +Theorem negate_cmpf_eq: + forall v1 v2, notbool (cmpf Cne v1 v2) = cmpf Ceq v1 v2. +Proof. + destruct v1; destruct v2; simpl; auto. + rewrite Float.cmp_ne_eq. rewrite notbool_negb_1. + apply notbool_idem2. +Qed. + +Lemma or_of_bool: + forall b1 b2, or (of_bool b1) (of_bool b2) = of_bool (b1 || b2). +Proof. + destruct b1; destruct b2; reflexivity. +Qed. + +Theorem cmpf_le: + forall v1 v2, cmpf Cle v1 v2 = or (cmpf Clt v1 v2) (cmpf Ceq v1 v2). +Proof. + destruct v1; destruct v2; simpl; auto. + rewrite or_of_bool. decEq. apply Float.cmp_le_lt_eq. +Qed. + +Theorem cmpf_ge: + forall v1 v2, cmpf Cge v1 v2 = or (cmpf Cgt v1 v2) (cmpf Ceq v1 v2). +Proof. + destruct v1; destruct v2; simpl; auto. + rewrite or_of_bool. decEq. apply Float.cmp_ge_gt_eq. +Qed. + +Definition is_bool (v: val) := + v = Vundef \/ v = Vtrue \/ v = Vfalse. + +Lemma of_bool_is_bool: + forall b, is_bool (of_bool b). +Proof. + destruct b; unfold is_bool; simpl; tauto. +Qed. + +Lemma undef_is_bool: is_bool Vundef. +Proof. + unfold is_bool; tauto. +Qed. + +Lemma cmp_mismatch_is_bool: + forall c, is_bool (cmp_mismatch c). +Proof. + destruct c; simpl; unfold is_bool; tauto. +Qed. + +Lemma cmp_is_bool: + forall c v1 v2, is_bool (cmp c v1 v2). +Proof. + destruct v1; destruct v2; simpl; try apply undef_is_bool. + apply of_bool_is_bool. + case (Int.eq i Int.zero). apply cmp_mismatch_is_bool. apply undef_is_bool. + case (Int.eq i0 Int.zero). apply cmp_mismatch_is_bool. apply undef_is_bool. + case (zeq b b0); intro. apply of_bool_is_bool. apply cmp_mismatch_is_bool. +Qed. + +Lemma cmpu_is_bool: + forall c v1 v2, is_bool (cmpu c v1 v2). +Proof. + destruct v1; destruct v2; simpl; try apply undef_is_bool. + apply of_bool_is_bool. + case (Int.eq i Int.zero). apply cmp_mismatch_is_bool. apply undef_is_bool. + case (Int.eq i0 Int.zero). apply cmp_mismatch_is_bool. apply undef_is_bool. + case (zeq b b0); intro. apply of_bool_is_bool. apply cmp_mismatch_is_bool. +Qed. + +Lemma cmpf_is_bool: + forall c v1 v2, is_bool (cmpf c v1 v2). +Proof. + destruct v1; destruct v2; simpl; + apply undef_is_bool || apply of_bool_is_bool. +Qed. + +Lemma notbool_is_bool: + forall v, is_bool (notbool v). +Proof. + destruct v; simpl. + apply undef_is_bool. apply of_bool_is_bool. + apply undef_is_bool. unfold is_bool; tauto. +Qed. + +Lemma notbool_xor: + forall v, is_bool v -> v = xor (notbool v) Vone. +Proof. + intros. elim H; intro. + subst v. reflexivity. + elim H0; intro; subst v; reflexivity. +Qed. + +End Val. -- cgit