From 95f918c38b1e59f40ae7af455ec2c6746289375e Mon Sep 17 00:00:00 2001 From: Pierre Goutagny Date: Thu, 17 Jun 2021 17:31:10 +0200 Subject: Change "Tunneling" to "LTLTunneling" everywhere To respect the symmetry between RTL- and LTL-Tunneling --- backend/LTLTunneling.v | 167 +++++++++++ backend/LTLTunnelingaux.ml | 108 +++++++ backend/LTLTunnelingproof.v | 666 ++++++++++++++++++++++++++++++++++++++++++++ backend/RTLTunneling.v | 2 +- backend/Tunneling.v | 167 ----------- backend/Tunnelingaux.ml | 108 ------- backend/Tunnelinglibs.ml | 2 +- backend/Tunnelingproof.v | 666 -------------------------------------------- 8 files changed, 943 insertions(+), 943 deletions(-) create mode 100644 backend/LTLTunneling.v create mode 100644 backend/LTLTunnelingaux.ml create mode 100644 backend/LTLTunnelingproof.v delete mode 100644 backend/Tunneling.v delete mode 100644 backend/Tunnelingaux.ml delete mode 100644 backend/Tunnelingproof.v (limited to 'backend') diff --git a/backend/LTLTunneling.v b/backend/LTLTunneling.v new file mode 100644 index 00000000..4b404724 --- /dev/null +++ b/backend/LTLTunneling.v @@ -0,0 +1,167 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Branch tunneling (optimization of branches to branches). *) + +Require Import Coqlib Maps Errors. +Require Import AST. +Require Import LTL. + +(** Branch tunneling shortens sequences of branches (with no intervening + computations) by rewriting the branch and conditional branch instructions + so that they jump directly to the end of the branch sequence. + For example: +<< + L1: if (cond) nop L2; L1: nop L3; + L2: nop L3; becomes L2: nop L3; + L3: instr; L3: instr; + L4: if (cond) goto L1; L4: if (cond) nop L1; +>> + This optimization can be applied to several of our intermediate + languages. We choose to perform it on the [LTL] language, + after register allocation but before code linearization. + Register allocation can delete instructions (such as dead + computations or useless moves), therefore there are more + opportunities for tunneling after allocation than before. + Symmetrically, prior tunneling helps linearization to produce + better code, e.g. by revealing that some [branch] instructions are + dead code (as the "branch L3" in the example above). +*) + +(** The implementation consists in two passes: the first pass + records the branch t of each "nop" + and the second pass replace any "nop" node to [pc] + by a branch to a "nop" at [branch_t f pc] + +Naively, we may define [branch_t f pc] as follows: +<< + branch_t f pc = branch_t f pc' if f(pc) = nop pc' + = pc otherwise +>> + However, this definition can fail to terminate if + the program can contain loops consisting only of branches, as in +<< + L1: branch L1; +>> + or +<< + L1: nop L2; + L2: nop L1; +>> + Coq warns us of this fact by not accepting the definition + of [branch_t] above. + + To handle this problem, we use a union-find data structure, adding equalities [pc = pc'] + for every instruction [pc: nop pc'] in the function. + + Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure, + we need to iterate until we reach a fixpoint. + + Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure + in order to help the proof. + + A verifier checks that this data-structure is correct. +*) + +Definition UF := PTree.t (node * Z). + +(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) +Axiom branch_target: LTL.function -> UF. +Extract Constant branch_target => "LTLTunnelingaux.branch_target". + +Local Open Scope error_monad_scope. + +Definition get (td: UF) pc:node*Z := + match td!pc with + | Some (t,d) => (t,Z.abs d) + | _ => (pc,0) + end. + +Definition target (td: UF) (pc:node): node := fst (get td pc). +Coercion target: UF >-> Funclass. + +(* we check that the domain of [td] is included in the domain of [c] *) +Definition check_included (td: UF) (c: code): option bblock + := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil). + +(* we check the validity of targets and their bound: + the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents. +*) +Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit + := match td!pc with + | None => OK tt + | Some (tpc, dpc) => + let dpc := Z.abs dpc in + match bb with + | Lbranch s ::_ => + let (ts, ds) := get td s in + if peq tpc ts then + if zlt ds dpc then OK tt + else Error (msg "bad distance in Lbranch") + else Error (msg "invalid skip of Lbranch") + | Lcond _ _ s1 s2 _ :: _ => + let (ts1, ds1) := get td s1 in + let (ts2, ds2) := get td s2 in + if peq tpc ts1 then + if peq tpc ts2 then + if zlt ds1 dpc then + if zlt ds2 dpc then OK tt + else Error (msg "bad distance on else branch") + else Error (msg "bad distance on then branch") + else Error (msg "invalid skip of else branch") + else Error (msg "invalid skip of then branch") + | _ => Error (msg "cannot skip this block") + end + end. + +Definition check_code (td: UF) (c:code): res unit + := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt). + +(** The second pass rewrites all LTL instructions, replacing every + successor [s] of every instruction by [t s], the canonical representative + of its equivalence class in the union-find data structure. *) + +Definition tunnel_instr (t: node -> node) (i: instruction) : instruction := + match i with + | Lbranch s => Lbranch (t s) + | Lcond cond args s1 s2 info => + let s1' := t s1 in let s2' := t s2 in + if peq s1' s2' + then Lbranch s1' + else Lcond cond args s1' s2' info + | Ljumptable arg tbl => Ljumptable arg (List.map t tbl) + | _ => i + end. + +Definition tunnel_block (t: node -> node) (b: bblock) : bblock := + List.map (tunnel_instr t) b. + +Definition tunnel_function (f: LTL.function) : res LTL.function := + let td := branch_target f in + let c := (fn_code f) in + if check_included td c then + do _ <- check_code td c ; OK + (mkfunction + (fn_sig f) + (fn_stacksize f) + (PTree.map1 (tunnel_block td) c) + (td (fn_entrypoint f))) + else + Error (msg "Some node of the union-find is not in the CFG") + . + +Definition tunnel_fundef (f: fundef) : res fundef := + transf_partial_fundef tunnel_function f. + +Definition transf_program (p: program) : res program := + transform_partial_program tunnel_fundef p. diff --git a/backend/LTLTunnelingaux.ml b/backend/LTLTunnelingaux.ml new file mode 100644 index 00000000..c3b8cf82 --- /dev/null +++ b/backend/LTLTunnelingaux.ml @@ -0,0 +1,108 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +(* + +This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function, +and computes their target node with the distance (ie the number of cummulated nops) toward this target. + +See [LTLTunneling.v] + +*) + +open Coqlib +open LTL +open Maps +open Camlcoq +open Tunnelinglibs + +module LANG = struct + type code_unit = LTL.bblock + type funct = LTL.coq_function +end + +module OPT = struct + let langname = "LTL" + let limit_tunneling = None + let debug_flag = ref false + let final_dump = false +end + +module Partial = Tunnelinglibs.Tunneling(LANG)(OPT) + +module FUNS = struct + let build_simplified_cfg c acc pc bb = + match bb with + | Lbranch s :: _ -> + let ns = get_node c s in + set_branch c pc ns; + acc + | Lcond (_, _, s1, s2, _) :: _ -> + c.num_rems <- c.num_rems + 1; + let ns1 = get_node c s1 in + let ns2 = get_node c s2 in + let npc = get_node c pc in + npc.inst <- COND(ns1, ns2); + npc::acc + | _ -> acc + + let print_code_unit c println (pc, bb) = + match bb with + | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false + | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false + | _ -> Partial.debug "%d " pc; true + + let fn_code f = f.fn_code + let fn_entrypoint f = f.fn_entrypoint + + + (*************************************************************) + (* Copy-paste of the extracted code of the verifier *) + (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) + + let check_code_unit td pc bb = + match PTree.get pc td with + | Some p -> + let (tpc, dpc) = p in + let dpc0 = dpc in + (match bb with + | [] -> + raise (BugOnPC (P.to_int pc)) + | i :: _ -> + (match i with + | Lbranch s -> + let (ts, ds) = get td s in + if peq tpc ts + then if zlt ds dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | Lcond (_, _, s1, s2, _) -> + let (ts1, ds1) = get td s1 in + let (ts2, ds2) = get td s2 in + if peq tpc ts1 + then if peq tpc ts2 + then if zlt ds1 dpc0 + then if zlt ds2 dpc0 + then () + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + else raise (BugOnPC (P.to_int pc)) + | _ -> + raise (BugOnPC (P.to_int pc)))) + | None -> () +end + +module T = Partial.T(FUNS) +let branch_target = T.branch_target + diff --git a/backend/LTLTunnelingproof.v b/backend/LTLTunnelingproof.v new file mode 100644 index 00000000..d36d3c76 --- /dev/null +++ b/backend/LTLTunnelingproof.v @@ -0,0 +1,666 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(** Correctness proof for the branch tunneling optimization. *) + +Require Import Coqlib Maps Errors. +Require Import AST Linking. +Require Import Values Memory Events Globalenvs Smallstep. +Require Import Op Locations LTL. +Require Import LTLTunneling. + +Local Open Scope nat. + + +(** * Properties of the branch_target, when the verifier succeeds *) + +Definition check_included_spec (c:code) (td:UF) (ok: option bblock) := + ok <> None -> forall pc, c!pc = None -> td!pc = None. + +Lemma check_included_correct (td: UF) (c: code): + check_included_spec c td (check_included td c). +Proof. + apply PTree_Properties.fold_rec with (P := check_included_spec c). +- (* extensionality *) + unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto. +- (* base case *) + intros _ pc. rewrite PTree.gempty; try congruence. +- (* inductive case *) + unfold check_included_spec. + intros m [|] pc bb NEW ATPC IND; simpl; try congruence. + intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence. + intros; eapply IND; try congruence. +Qed. + +Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop := + | TB_default (TB: target pc = pc) ob + : target_bounds target bound pc ob + | TB_branch s bb + (EQ: target pc = target s) + (DECREASE: bound s < bound pc) + : target_bounds target bound pc (Some (Lbranch s::bb)) + | TB_cond cond args s1 s2 info bb + (EQ1: target pc = target s1) + (EQ2: target pc = target s2) + (DEC1: bound s1 < bound pc) + (DEC2: bound s2 < bound pc) + : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb)) + . +Local Hint Resolve TB_default: core. + +Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc. +Proof. + unfold target, get. intros H; rewrite H; auto. +Qed. +Local Hint Resolve target_None Z.abs_nonneg: core. + +Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z. +Proof. + unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; lia || auto. +Qed. +Local Hint Resolve get_nonneg: core. + +Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)). + +Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock): + check_bblock td pc bb = OK tt -> + target_bounds (target td) (bound td) pc (Some bb). +Proof. + unfold check_bblock, bound. + destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto. + assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. } + assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. } + destruct bb as [|[ ] bb]; simpl; try congruence. + + destruct (get td s) as (ts, ds) eqn:Hs. + repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. + intros; apply TB_branch. + * rewrite Tpc. unfold target; rewrite Hs; simpl; auto. + * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto. + + destruct (get td s1) as (ts1, ds1) eqn:Hs1. + destruct (get td s2) as (ts2, ds2) eqn:Hs2. + repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. + intros; apply TB_cond. + * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto. + * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto. + * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto. + * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto. +Qed. + +Definition check_code_spec (td:UF) (c:code) (ok: res unit) := + ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb). + +Lemma check_code_correct (td:UF) c: + check_code_spec td c (check_code td c). +Proof. + apply PTree_Properties.fold_rec with (P := check_code_spec td). +- (* extensionality *) + unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite <- ! EQ; eauto. +- (* base case *) + intros _ pc. rewrite PTree.gempty; try congruence. +- (* inductive case *) + unfold check_code_spec. + intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence. + intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto. + intros X; inversion X; subst. + apply check_bblock_correct; auto. +Qed. + +Theorem branch_target_bounds: + forall f tf pc, + tunnel_function f = OK tf -> + target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc). +Proof. + unfold tunnel_function; intros f f' pc. + destruct (check_included _ _) eqn:H1; try congruence. + destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. + intros _. + destruct ((fn_code f)!pc) eqn:X. + - exploit check_code_correct; eauto. + - exploit check_included_correct; eauto. + congruence. +Qed. + +Lemma tunnel_function_unfold: + forall f tf pc, + tunnel_function f = OK tf -> + (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc. +Proof. + unfold tunnel_function; intros f f' pc. + destruct (check_included _ _) eqn:H1; try congruence. + destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. + intros X; inversion X; clear X; subst. + simpl. rewrite PTree.gmap1. auto. +Qed. + +Lemma tunnel_fundef_Internal: + forall f tf, tunnel_fundef (Internal f) = OK tf + -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'. +Proof. + intros f tf; simpl. + destruct (tunnel_function f) eqn:X; simpl; try congruence. + intros EQ; inversion EQ. + eexists; split; eauto. +Qed. + +Lemma tunnel_fundef_External: + forall tf ef, tunnel_fundef (External ef) = OK tf + -> tf = External ef. +Proof. + intros tf ef; simpl. intros H; inversion H; auto. +Qed. + +(** * Preservation of semantics *) + +Definition match_prog (p tp: program) := + match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp. + +Lemma transf_program_match: + forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog. +Proof. + intros. eapply match_transform_partial_program_contextual; eauto. +Qed. + +Section PRESERVATION. + +Variables prog tprog: program. +Hypothesis TRANSL: match_prog prog tprog. +Let ge := Genv.globalenv prog. +Let tge := Genv.globalenv tprog. + +Lemma functions_translated: + forall (v: val) (f: fundef), + Genv.find_funct ge v = Some f -> + exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf. +Proof. + intros. exploit (Genv.find_funct_match TRANSL); eauto. + intros (cu & tf & A & B & C). + repeat eexists; intuition eauto. +Qed. + +Lemma function_ptr_translated: + forall v f, + Genv.find_funct_ptr ge v = Some f -> + exists tf, + Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf. +Proof. + intros. + exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto. +Qed. + +Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s. +Proof. + rewrite <- (Genv.find_symbol_match TRANSL). reflexivity. +Qed. + +Lemma senv_preserved: + Senv.equiv ge tge. +Proof. + eapply (Genv.senv_match TRANSL). +Qed. + +Lemma sig_preserved: + forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f. +Proof. + intros. destruct f. + - simpl in H. monadInv H. unfold tunnel_function in EQ. + destruct (check_included _ _); try congruence. + monadInv EQ. simpl; auto. + - simpl in H. monadInv H. reflexivity. +Qed. + +Lemma fn_stacksize_preserved: + forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f. +Proof. + intros f tf; unfold tunnel_function. + destruct (check_included _ _); try congruence. + destruct (check_code _ _); simpl; try congruence. + intros H; inversion H; simpl; auto. +Qed. + +Lemma fn_entrypoint_preserved: + forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f). +Proof. + intros f tf; unfold tunnel_function. + destruct (check_included _ _); try congruence. + destruct (check_code _ _); simpl; try congruence. + intros H; inversion H; simpl; auto. +Qed. + + +(** The proof of semantic preservation is a simulation argument + based on diagrams of the following form: +<< + st1 --------------- st2 + | | + t| ?|t + | | + v v + st1'--------------- st2' +>> + The [match_states] predicate, defined below, captures the precondition + between states [st1] and [st2], as well as the postcondition between + [st1'] and [st2']. One transition in the source code (left) can correspond + to zero or one transition in the transformed code (right). The + "zero transition" case occurs when executing a [Lnop] instruction + in the source code that has been removed by tunneling. + + In the definition of [match_states], what changes between the original and + transformed codes is mainly the control-flow + (in particular, the current program point [pc]), but also some values + and memory states, since some [Vundef] values can become more defined + as a consequence of eliminating useless [Lcond] instructions. *) + +Definition locmap_lessdef (ls1 ls2: locset) : Prop := + forall l, Val.lessdef (ls1 l) (ls2 l). + +Inductive match_stackframes: stackframe -> stackframe -> Prop := + | match_stackframes_intro: + forall f tf sp ls0 bb tls0, + locmap_lessdef ls0 tls0 -> + tunnel_function f = OK tf -> + match_stackframes + (Stackframe f sp ls0 bb) + (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)). + +Inductive match_states: state -> state -> Prop := + | match_states_intro: + forall s f tf sp pc ls m ts tls tm + (STK: list_forall2 match_stackframes s ts) + (LS: locmap_lessdef ls tls) + (MEM: Mem.extends m tm) + (TF: tunnel_function f = OK tf), + match_states (State s f sp pc ls m) + (State ts tf sp (branch_target f pc) tls tm) + | match_states_block: + forall s f tf sp bb ls m ts tls tm + (STK: list_forall2 match_stackframes s ts) + (LS: locmap_lessdef ls tls) + (MEM: Mem.extends m tm) + (TF: tunnel_function f = OK tf), + match_states (Block s f sp bb ls m) + (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm) + | match_states_interm: + forall s f tf sp pc i bb ls m ts tls tm + (STK: list_forall2 match_stackframes s ts) + (LS: locmap_lessdef ls tls) + (MEM: Mem.extends m tm) + (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc) + (TF: tunnel_function f = OK tf), + match_states (Block s f sp (i :: bb) ls m) + (State ts tf sp pc tls tm) + | match_states_call: + forall s f tf ls m ts tls tm + (STK: list_forall2 match_stackframes s ts) + (LS: locmap_lessdef ls tls) + (MEM: Mem.extends m tm) + (TF: tunnel_fundef f = OK tf), + match_states (Callstate s f ls m) + (Callstate ts tf tls tm) + | match_states_return: + forall s ls m ts tls tm + (STK: list_forall2 match_stackframes s ts) + (LS: locmap_lessdef ls tls) + (MEM: Mem.extends m tm), + match_states (Returnstate s ls m) + (Returnstate ts tls tm). + +(** Properties of [locmap_lessdef] *) + +Lemma reglist_lessdef: + forall rl ls1 ls2, + locmap_lessdef ls1 ls2 -> Val.lessdef_list (reglist ls1 rl) (reglist ls2 rl). +Proof. + induction rl; simpl; intros; auto. +Qed. + +Lemma locmap_set_lessdef: + forall ls1 ls2 v1 v2 l, + locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.set l v1 ls1) (Locmap.set l v2 ls2). +Proof. + intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l'). +- destruct l; auto using Val.load_result_lessdef. +- destruct (Loc.diff_dec l l'); auto. +Qed. + +Lemma locmap_set_undef_lessdef: + forall ls1 ls2 l, + locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.set l Vundef ls1) ls2. +Proof. + intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l'). +- destruct l; auto. destruct ty; auto. +- destruct (Loc.diff_dec l l'); auto. +Qed. + +Lemma locmap_undef_regs_lessdef: + forall rl ls1 ls2, + locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) (undef_regs rl ls2). +Proof. + induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_lessdef; auto. +Qed. + +Lemma locmap_undef_regs_lessdef_1: + forall rl ls1 ls2, + locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) ls2. +Proof. + induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. +Qed. + +Lemma locmap_getpair_lessdef: + forall p ls1 ls2, + locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2). +Proof. + intros; destruct p; simpl; auto using Val.longofwords_lessdef. +Qed. + +Lemma locmap_getpairs_lessdef: + forall pl ls1 ls2, + locmap_lessdef ls1 ls2 -> + Val.lessdef_list (map (fun p => Locmap.getpair p ls1) pl) (map (fun p => Locmap.getpair p ls2) pl). +Proof. + intros. induction pl; simpl; auto using locmap_getpair_lessdef. +Qed. + +Lemma locmap_setpair_lessdef: + forall p ls1 ls2 v1 v2, + locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setpair p v1 ls1) (Locmap.setpair p v2 ls2). +Proof. + intros; destruct p; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef. +Qed. + +Lemma locmap_setres_lessdef: + forall res ls1 ls2 v1 v2, + locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setres res v1 ls1) (Locmap.setres res v2 ls2). +Proof. + induction res; intros; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef. +Qed. + +Lemma locmap_undef_caller_save_regs_lessdef: + forall ls1 ls2, + locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_caller_save_regs ls1) (undef_caller_save_regs ls2). +Proof. + intros; red; intros. unfold undef_caller_save_regs. + destruct l. +- destruct (Conventions1.is_callee_save r); auto. +- destruct sl; auto. +Qed. + +Lemma find_function_translated: + forall ros ls tls fd, + locmap_lessdef ls tls -> + find_function ge ros ls = Some fd -> + exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd. +Proof. + intros. destruct ros; simpl in *. +- assert (E: tls (R m) = ls (R m)). + { exploit Genv.find_funct_inv; eauto. intros (b & EQ). + generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. } + rewrite E. exploit functions_translated; eauto. +- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. + exploit function_ptr_translated; eauto. + intros (tf & X1 & X2). exists tf; intuition. +Qed. + +Lemma call_regs_lessdef: + forall ls1 ls2, locmap_lessdef ls1 ls2 -> locmap_lessdef (call_regs ls1) (call_regs ls2). +Proof. + intros; red; intros. destruct l as [r | [] ofs ty]; simpl; auto. +Qed. + +Lemma return_regs_lessdef: + forall caller1 callee1 caller2 callee2, + locmap_lessdef caller1 caller2 -> + locmap_lessdef callee1 callee2 -> + locmap_lessdef (return_regs caller1 callee1) (return_regs caller2 callee2). +Proof. + intros; red; intros. destruct l; simpl. +- destruct (Conventions1.is_callee_save r); auto. +- destruct sl; auto. +Qed. + +(** To preserve non-terminating behaviours, we show that the transformed + code cannot take an infinity of "zero transition" cases. + We use the following [measure] function over source states, + which decreases strictly in the "zero transition" case. *) + +Definition measure (st: state) : nat := + match st with + | State s f sp pc ls m => (bound (branch_target f) pc) * 2 + | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1 + | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1 + | Block s f sp bb ls m => 0 + | Callstate s f ls m => 0 + | Returnstate s ls m => 0 + end. + +Lemma match_parent_locset: + forall s ts, + list_forall2 match_stackframes s ts -> + locmap_lessdef (parent_locset s) (parent_locset ts). +Proof. + induction 1; simpl. +- red; auto. +- inv H; auto. +Qed. + +Lemma tunnel_step_correct: + forall st1 t st2, step ge st1 t st2 -> + forall st1' (MS: match_states st1 st1'), + (exists st2', step tge st1' t st2' /\ match_states st2 st2') + \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat. +Proof. + induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH). + +- (* entering a block *) + exploit (branch_target_bounds f tf pc); eauto. + rewrite H. intros X; inversion X. + + (* TB_default *) + rewrite TB; left. econstructor; split. + * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto. + * econstructor; eauto. + + (* FT_branch *) + simpl; right. + rewrite EQ; repeat (econstructor; lia || eauto). + + (* FT_cond *) + simpl; right. + repeat (econstructor; lia || eauto); simpl. + destruct (peq _ _); try congruence. +- (* Lop *) + exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. + intros (tv & EV & LD). + left; simpl; econstructor; split. + eapply exec_Lop with (v := tv); eauto. + rewrite <- EV. apply eval_operation_preserved. exact symbols_preserved. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. +- (* Lload *) + exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. + intros (ta & EV & LD). + exploit Mem.loadv_extends. eauto. eauto. eexact LD. + intros (tv & LOAD & LD'). + left; simpl; econstructor; split. + eapply exec_Lload with (a := ta). + rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. + eauto. eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. +- (* Lload notrap1 *) + exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption. + left; simpl; econstructor; split. + eapply exec_Lload_notrap1. + rewrite <- H0. + apply eval_addressing_preserved. exact symbols_preserved. eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. +- (* Lload notrap2 *) + exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. + intros (ta & EV & LD). + destruct (Mem.loadv chunk tm ta) eqn:Htload. + { + left; simpl; econstructor; split. + eapply exec_Lload. + rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. + exact Htload. eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. + } + { + left; simpl; econstructor; split. + eapply exec_Lload_notrap2. + rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. + exact Htload. eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. + } +- (* Lgetstack *) + left; simpl; econstructor; split. + econstructor; eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. +- (* Lsetstack *) + left; simpl; econstructor; split. + econstructor; eauto. + econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. +- (* Lstore *) + exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. + intros (ta & EV & LD). + exploit Mem.storev_extends. eauto. eauto. eexact LD. apply LS. + intros (tm' & STORE & MEM'). + left; simpl; econstructor; split. + eapply exec_Lstore with (a := ta). + rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. + eauto. eauto. + econstructor; eauto using locmap_undef_regs_lessdef. +- (* Lcall *) + left; simpl. + exploit find_function_translated; eauto. + intros (tfd & Htfd & FIND). + econstructor; split. + + eapply exec_Lcall; eauto. + erewrite sig_preserved; eauto. + + econstructor; eauto. + constructor; auto. + constructor; auto. +- (* Ltailcall *) + exploit find_function_translated. 2: eauto. + { eauto using return_regs_lessdef, match_parent_locset. } + intros (tfd & Htfd & FIND). + exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). + left; simpl; econstructor; split. + + eapply exec_Ltailcall; eauto. + * eapply sig_preserved; eauto. + * erewrite fn_stacksize_preserved; eauto. + + econstructor; eauto using return_regs_lessdef, match_parent_locset. +- (* Lbuiltin *) + exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA). + exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D). + left; simpl; econstructor; split. + eapply exec_Lbuiltin; eauto. + eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. + eapply external_call_symbols_preserved. apply senv_preserved. eauto. + econstructor; eauto using locmap_setres_lessdef, locmap_undef_regs_lessdef. +- (* Lbranch (preserved) *) + left; simpl; econstructor; split. + eapply exec_Lbranch; eauto. + fold (branch_target f pc). econstructor; eauto. +- (* Lbranch (eliminated) *) + right; split. simpl. lia. split. auto. constructor; auto. +- (* Lcond (preserved) *) + simpl; left; destruct (peq _ _) eqn: EQ. + + econstructor; split. + eapply exec_Lbranch. + destruct b. + * constructor; eauto using locmap_undef_regs_lessdef_1. + * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1. + + econstructor; split. + eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef. + destruct b; econstructor; eauto using locmap_undef_regs_lessdef. +- (* Lcond (eliminated) *) + destruct (peq _ _) eqn: EQ; try inv H1. + right; split; simpl. + + destruct b. + generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia. + generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia. + + destruct b. + -- repeat (constructor; auto). + -- rewrite e; repeat (constructor; auto). +- (* Ljumptable *) + assert (tls (R arg) = Vint n). + { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. } + left; simpl; econstructor; split. + eapply exec_Ljumptable. + eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto. + econstructor; eauto using locmap_undef_regs_lessdef. +- (* Lreturn *) + exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). + left; simpl; econstructor; split. + + eapply exec_Lreturn; eauto. + erewrite fn_stacksize_preserved; eauto. + + constructor; eauto using return_regs_lessdef, match_parent_locset. +- (* internal function *) + exploit tunnel_fundef_Internal; eauto. + intros (tf' & TF' & ITF). subst. + exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl. + intros (tm' & ALLOC & MEM'). + left; simpl. + econstructor; split. + + eapply exec_function_internal; eauto. + erewrite fn_stacksize_preserved; eauto. + + simpl. + erewrite (fn_entrypoint_preserved f tf'); auto. + econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef. +- (* external function *) + exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef. + intros (tvres & tm' & A & B & C & D). + left; simpl; econstructor; split. + + erewrite (tunnel_fundef_External tf ef); eauto. + eapply exec_function_external; eauto. + eapply external_call_symbols_preserved; eauto. apply senv_preserved. + + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef. +- (* return *) + inv STK. inv H1. + left; econstructor; split. + eapply exec_return; eauto. + constructor; auto. +Qed. + +Lemma transf_initial_states: + forall st1, initial_state prog st1 -> + exists st2, initial_state tprog st2 /\ match_states st1 st2. +Proof. + intros. inversion H. + exploit function_ptr_translated; eauto. + intros (tf & Htf & Hf). + exists (Callstate nil tf (Locmap.init Vundef) m0); split. + econstructor; eauto. + apply (Genv.init_mem_transf_partial TRANSL); auto. + rewrite (match_program_main TRANSL). + rewrite symbols_preserved. eauto. + rewrite <- H3. apply sig_preserved. auto. + constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto. +Qed. + +Lemma transf_final_states: + forall st1 st2 r, + match_states st1 st2 -> final_state st1 r -> final_state st2 r. +Proof. + intros. inv H0. inv H. inv STK. + set (p := map_rpair R (Conventions1.loc_result signature_main)) in *. + generalize (locmap_getpair_lessdef p _ _ LS). rewrite H1; intros LD; inv LD. + econstructor; eauto. +Qed. + +Theorem transf_program_correct: + forward_simulation (LTL.semantics prog) (LTL.semantics tprog). +Proof. + eapply forward_simulation_opt. + apply senv_preserved. + eexact transf_initial_states. + eexact transf_final_states. + eexact tunnel_step_correct. +Qed. + +End PRESERVATION. diff --git a/backend/RTLTunneling.v b/backend/RTLTunneling.v index 049160fd..df663f48 100644 --- a/backend/RTLTunneling.v +++ b/backend/RTLTunneling.v @@ -19,7 +19,7 @@ Require Import Coqlib Maps Errors. Require Import AST. Require Import RTL. -(* This is a port of tunneling for LTL. See Tunneling.v *) +(* This is a port of tunneling for LTL. See LTLTunneling.v *) Definition UF := PTree.t (node * Z). diff --git a/backend/Tunneling.v b/backend/Tunneling.v deleted file mode 100644 index c849ea92..00000000 --- a/backend/Tunneling.v +++ /dev/null @@ -1,167 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* Sylvain Boulmé Grenoble-INP, VERIMAG *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Branch tunneling (optimization of branches to branches). *) - -Require Import Coqlib Maps Errors. -Require Import AST. -Require Import LTL. - -(** Branch tunneling shortens sequences of branches (with no intervening - computations) by rewriting the branch and conditional branch instructions - so that they jump directly to the end of the branch sequence. - For example: -<< - L1: if (cond) nop L2; L1: nop L3; - L2: nop L3; becomes L2: nop L3; - L3: instr; L3: instr; - L4: if (cond) goto L1; L4: if (cond) nop L1; ->> - This optimization can be applied to several of our intermediate - languages. We choose to perform it on the [LTL] language, - after register allocation but before code linearization. - Register allocation can delete instructions (such as dead - computations or useless moves), therefore there are more - opportunities for tunneling after allocation than before. - Symmetrically, prior tunneling helps linearization to produce - better code, e.g. by revealing that some [branch] instructions are - dead code (as the "branch L3" in the example above). -*) - -(** The implementation consists in two passes: the first pass - records the branch t of each "nop" - and the second pass replace any "nop" node to [pc] - by a branch to a "nop" at [branch_t f pc] - -Naively, we may define [branch_t f pc] as follows: -<< - branch_t f pc = branch_t f pc' if f(pc) = nop pc' - = pc otherwise ->> - However, this definition can fail to terminate if - the program can contain loops consisting only of branches, as in -<< - L1: branch L1; ->> - or -<< - L1: nop L2; - L2: nop L1; ->> - Coq warns us of this fact by not accepting the definition - of [branch_t] above. - - To handle this problem, we use a union-find data structure, adding equalities [pc = pc'] - for every instruction [pc: nop pc'] in the function. - - Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure, - we need to iterate until we reach a fixpoint. - - Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure - in order to help the proof. - - A verifier checks that this data-structure is correct. -*) - -Definition UF := PTree.t (node * Z). - -(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *) -Axiom branch_target: LTL.function -> UF. -Extract Constant branch_target => "Tunnelingaux.branch_target". - -Local Open Scope error_monad_scope. - -Definition get (td: UF) pc:node*Z := - match td!pc with - | Some (t,d) => (t,Z.abs d) - | _ => (pc,0) - end. - -Definition target (td: UF) (pc:node): node := fst (get td pc). -Coercion target: UF >-> Funclass. - -(* we check that the domain of [td] is included in the domain of [c] *) -Definition check_included (td: UF) (c: code): option bblock - := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil). - -(* we check the validity of targets and their bound: - the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents. -*) -Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit - := match td!pc with - | None => OK tt - | Some (tpc, dpc) => - let dpc := Z.abs dpc in - match bb with - | Lbranch s ::_ => - let (ts, ds) := get td s in - if peq tpc ts then - if zlt ds dpc then OK tt - else Error (msg "bad distance in Lbranch") - else Error (msg "invalid skip of Lbranch") - | Lcond _ _ s1 s2 _ :: _ => - let (ts1, ds1) := get td s1 in - let (ts2, ds2) := get td s2 in - if peq tpc ts1 then - if peq tpc ts2 then - if zlt ds1 dpc then - if zlt ds2 dpc then OK tt - else Error (msg "bad distance on else branch") - else Error (msg "bad distance on then branch") - else Error (msg "invalid skip of else branch") - else Error (msg "invalid skip of then branch") - | _ => Error (msg "cannot skip this block") - end - end. - -Definition check_code (td: UF) (c:code): res unit - := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt). - -(** The second pass rewrites all LTL instructions, replacing every - successor [s] of every instruction by [t s], the canonical representative - of its equivalence class in the union-find data structure. *) - -Definition tunnel_instr (t: node -> node) (i: instruction) : instruction := - match i with - | Lbranch s => Lbranch (t s) - | Lcond cond args s1 s2 info => - let s1' := t s1 in let s2' := t s2 in - if peq s1' s2' - then Lbranch s1' - else Lcond cond args s1' s2' info - | Ljumptable arg tbl => Ljumptable arg (List.map t tbl) - | _ => i - end. - -Definition tunnel_block (t: node -> node) (b: bblock) : bblock := - List.map (tunnel_instr t) b. - -Definition tunnel_function (f: LTL.function) : res LTL.function := - let td := branch_target f in - let c := (fn_code f) in - if check_included td c then - do _ <- check_code td c ; OK - (mkfunction - (fn_sig f) - (fn_stacksize f) - (PTree.map1 (tunnel_block td) c) - (td (fn_entrypoint f))) - else - Error (msg "Some node of the union-find is not in the CFG") - . - -Definition tunnel_fundef (f: fundef) : res fundef := - transf_partial_fundef tunnel_function f. - -Definition transf_program (p: program) : res program := - transform_partial_program tunnel_fundef p. diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml deleted file mode 100644 index d4b88a5d..00000000 --- a/backend/Tunnelingaux.ml +++ /dev/null @@ -1,108 +0,0 @@ -(* *************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Sylvain Boulmé Grenoble-INP, VERIMAG *) -(* *) -(* Copyright VERIMAG. All rights reserved. *) -(* This file is distributed under the terms of the INRIA *) -(* Non-Commercial License Agreement. *) -(* *) -(* *************************************************************) - -(* - -This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function, -and computes their target node with the distance (ie the number of cummulated nops) toward this target. - -See [Tunneling.v] - -*) - -open Coqlib -open LTL -open Maps -open Camlcoq -open Tunnelinglibs - -module LANG = struct - type code_unit = LTL.bblock - type funct = LTL.coq_function -end - -module OPT = struct - let langname = "LTL" - let limit_tunneling = None - let debug_flag = ref false - let final_dump = false -end - -module Partial = Tunnelinglibs.Tunneling(LANG)(OPT) - -module FUNS = struct - let build_simplified_cfg c acc pc bb = - match bb with - | Lbranch s :: _ -> - let ns = get_node c s in - set_branch c pc ns; - acc - | Lcond (_, _, s1, s2, _) :: _ -> - c.num_rems <- c.num_rems + 1; - let ns1 = get_node c s1 in - let ns2 = get_node c s2 in - let npc = get_node c pc in - npc.inst <- COND(ns1, ns2); - npc::acc - | _ -> acc - - let print_code_unit c println (pc, bb) = - match bb with - | Lbranch s::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false - | Lcond (_, _, s1, s2, _)::_ -> (if println then Partial.debug "\n"); Partial.debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false - | _ -> Partial.debug "%d " pc; true - - let fn_code f = f.fn_code - let fn_entrypoint f = f.fn_entrypoint - - - (*************************************************************) - (* Copy-paste of the extracted code of the verifier *) - (* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *) - - let check_code_unit td pc bb = - match PTree.get pc td with - | Some p -> - let (tpc, dpc) = p in - let dpc0 = dpc in - (match bb with - | [] -> - raise (BugOnPC (P.to_int pc)) - | i :: _ -> - (match i with - | Lbranch s -> - let (ts, ds) = get td s in - if peq tpc ts - then if zlt ds dpc0 - then () - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - | Lcond (_, _, s1, s2, _) -> - let (ts1, ds1) = get td s1 in - let (ts2, ds2) = get td s2 in - if peq tpc ts1 - then if peq tpc ts2 - then if zlt ds1 dpc0 - then if zlt ds2 dpc0 - then () - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - else raise (BugOnPC (P.to_int pc)) - | _ -> - raise (BugOnPC (P.to_int pc)))) - | None -> () -end - -module T = Partial.T(FUNS) -let branch_target = T.branch_target - diff --git a/backend/Tunnelinglibs.ml b/backend/Tunnelinglibs.ml index 1bb35f7a..7c826ba4 100644 --- a/backend/Tunnelinglibs.ml +++ b/backend/Tunnelinglibs.ml @@ -16,7 +16,7 @@ This file implements the core functions of the tunneling passes, for both RTL and LTL, by using a simplified CFG as a transparent interface -See [Tunneling.v] and [RTLTunneling.v] +See [LTLTunneling.v] and [RTLTunneling.v] *) diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v deleted file mode 100644 index 3bc92f75..00000000 --- a/backend/Tunnelingproof.v +++ /dev/null @@ -1,666 +0,0 @@ -(* *********************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Xavier Leroy, INRIA Paris-Rocquencourt *) -(* Sylvain Boulmé Grenoble-INP, VERIMAG *) -(* *) -(* Copyright Institut National de Recherche en Informatique et en *) -(* Automatique. All rights reserved. This file is distributed *) -(* under the terms of the INRIA Non-Commercial License Agreement. *) -(* *) -(* *********************************************************************) - -(** Correctness proof for the branch tunneling optimization. *) - -Require Import Coqlib Maps Errors. -Require Import AST Linking. -Require Import Values Memory Events Globalenvs Smallstep. -Require Import Op Locations LTL. -Require Import Tunneling. - -Local Open Scope nat. - - -(** * Properties of the branch_target, when the verifier succeeds *) - -Definition check_included_spec (c:code) (td:UF) (ok: option bblock) := - ok <> None -> forall pc, c!pc = None -> td!pc = None. - -Lemma check_included_correct (td: UF) (c: code): - check_included_spec c td (check_included td c). -Proof. - apply PTree_Properties.fold_rec with (P := check_included_spec c). -- (* extensionality *) - unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto. -- (* base case *) - intros _ pc. rewrite PTree.gempty; try congruence. -- (* inductive case *) - unfold check_included_spec. - intros m [|] pc bb NEW ATPC IND; simpl; try congruence. - intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence. - intros; eapply IND; try congruence. -Qed. - -Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop := - | TB_default (TB: target pc = pc) ob - : target_bounds target bound pc ob - | TB_branch s bb - (EQ: target pc = target s) - (DECREASE: bound s < bound pc) - : target_bounds target bound pc (Some (Lbranch s::bb)) - | TB_cond cond args s1 s2 info bb - (EQ1: target pc = target s1) - (EQ2: target pc = target s2) - (DEC1: bound s1 < bound pc) - (DEC2: bound s2 < bound pc) - : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb)) - . -Local Hint Resolve TB_default: core. - -Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc. -Proof. - unfold target, get. intros H; rewrite H; auto. -Qed. -Local Hint Resolve target_None Z.abs_nonneg: core. - -Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z. -Proof. - unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; lia || auto. -Qed. -Local Hint Resolve get_nonneg: core. - -Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)). - -Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock): - check_bblock td pc bb = OK tt -> - target_bounds (target td) (bound td) pc (Some bb). -Proof. - unfold check_bblock, bound. - destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto. - assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. } - assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. } - destruct bb as [|[ ] bb]; simpl; try congruence. - + destruct (get td s) as (ts, ds) eqn:Hs. - repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. - intros; apply TB_branch. - * rewrite Tpc. unfold target; rewrite Hs; simpl; auto. - * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto. - + destruct (get td s1) as (ts1, ds1) eqn:Hs1. - destruct (get td s2) as (ts2, ds2) eqn:Hs2. - repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence. - intros; apply TB_cond. - * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto. - * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto. - * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto. - * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto. -Qed. - -Definition check_code_spec (td:UF) (c:code) (ok: res unit) := - ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb). - -Lemma check_code_correct (td:UF) c: - check_code_spec td c (check_code td c). -Proof. - apply PTree_Properties.fold_rec with (P := check_code_spec td). -- (* extensionality *) - unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite <- ! EQ; eauto. -- (* base case *) - intros _ pc. rewrite PTree.gempty; try congruence. -- (* inductive case *) - unfold check_code_spec. - intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence. - intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto. - intros X; inversion X; subst. - apply check_bblock_correct; auto. -Qed. - -Theorem branch_target_bounds: - forall f tf pc, - tunnel_function f = OK tf -> - target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc). -Proof. - unfold tunnel_function; intros f f' pc. - destruct (check_included _ _) eqn:H1; try congruence. - destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. - intros _. - destruct ((fn_code f)!pc) eqn:X. - - exploit check_code_correct; eauto. - - exploit check_included_correct; eauto. - congruence. -Qed. - -Lemma tunnel_function_unfold: - forall f tf pc, - tunnel_function f = OK tf -> - (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc. -Proof. - unfold tunnel_function; intros f f' pc. - destruct (check_included _ _) eqn:H1; try congruence. - destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence. - intros X; inversion X; clear X; subst. - simpl. rewrite PTree.gmap1. auto. -Qed. - -Lemma tunnel_fundef_Internal: - forall f tf, tunnel_fundef (Internal f) = OK tf - -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'. -Proof. - intros f tf; simpl. - destruct (tunnel_function f) eqn:X; simpl; try congruence. - intros EQ; inversion EQ. - eexists; split; eauto. -Qed. - -Lemma tunnel_fundef_External: - forall tf ef, tunnel_fundef (External ef) = OK tf - -> tf = External ef. -Proof. - intros tf ef; simpl. intros H; inversion H; auto. -Qed. - -(** * Preservation of semantics *) - -Definition match_prog (p tp: program) := - match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp. - -Lemma transf_program_match: - forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog. -Proof. - intros. eapply match_transform_partial_program_contextual; eauto. -Qed. - -Section PRESERVATION. - -Variables prog tprog: program. -Hypothesis TRANSL: match_prog prog tprog. -Let ge := Genv.globalenv prog. -Let tge := Genv.globalenv tprog. - -Lemma functions_translated: - forall (v: val) (f: fundef), - Genv.find_funct ge v = Some f -> - exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf. -Proof. - intros. exploit (Genv.find_funct_match TRANSL); eauto. - intros (cu & tf & A & B & C). - repeat eexists; intuition eauto. -Qed. - -Lemma function_ptr_translated: - forall v f, - Genv.find_funct_ptr ge v = Some f -> - exists tf, - Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf. -Proof. - intros. - exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto. -Qed. - -Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s. -Proof. - rewrite <- (Genv.find_symbol_match TRANSL). reflexivity. -Qed. - -Lemma senv_preserved: - Senv.equiv ge tge. -Proof. - eapply (Genv.senv_match TRANSL). -Qed. - -Lemma sig_preserved: - forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f. -Proof. - intros. destruct f. - - simpl in H. monadInv H. unfold tunnel_function in EQ. - destruct (check_included _ _); try congruence. - monadInv EQ. simpl; auto. - - simpl in H. monadInv H. reflexivity. -Qed. - -Lemma fn_stacksize_preserved: - forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f. -Proof. - intros f tf; unfold tunnel_function. - destruct (check_included _ _); try congruence. - destruct (check_code _ _); simpl; try congruence. - intros H; inversion H; simpl; auto. -Qed. - -Lemma fn_entrypoint_preserved: - forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f). -Proof. - intros f tf; unfold tunnel_function. - destruct (check_included _ _); try congruence. - destruct (check_code _ _); simpl; try congruence. - intros H; inversion H; simpl; auto. -Qed. - - -(** The proof of semantic preservation is a simulation argument - based on diagrams of the following form: -<< - st1 --------------- st2 - | | - t| ?|t - | | - v v - st1'--------------- st2' ->> - The [match_states] predicate, defined below, captures the precondition - between states [st1] and [st2], as well as the postcondition between - [st1'] and [st2']. One transition in the source code (left) can correspond - to zero or one transition in the transformed code (right). The - "zero transition" case occurs when executing a [Lnop] instruction - in the source code that has been removed by tunneling. - - In the definition of [match_states], what changes between the original and - transformed codes is mainly the control-flow - (in particular, the current program point [pc]), but also some values - and memory states, since some [Vundef] values can become more defined - as a consequence of eliminating useless [Lcond] instructions. *) - -Definition locmap_lessdef (ls1 ls2: locset) : Prop := - forall l, Val.lessdef (ls1 l) (ls2 l). - -Inductive match_stackframes: stackframe -> stackframe -> Prop := - | match_stackframes_intro: - forall f tf sp ls0 bb tls0, - locmap_lessdef ls0 tls0 -> - tunnel_function f = OK tf -> - match_stackframes - (Stackframe f sp ls0 bb) - (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)). - -Inductive match_states: state -> state -> Prop := - | match_states_intro: - forall s f tf sp pc ls m ts tls tm - (STK: list_forall2 match_stackframes s ts) - (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm) - (TF: tunnel_function f = OK tf), - match_states (State s f sp pc ls m) - (State ts tf sp (branch_target f pc) tls tm) - | match_states_block: - forall s f tf sp bb ls m ts tls tm - (STK: list_forall2 match_stackframes s ts) - (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm) - (TF: tunnel_function f = OK tf), - match_states (Block s f sp bb ls m) - (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm) - | match_states_interm: - forall s f tf sp pc i bb ls m ts tls tm - (STK: list_forall2 match_stackframes s ts) - (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm) - (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc) - (TF: tunnel_function f = OK tf), - match_states (Block s f sp (i :: bb) ls m) - (State ts tf sp pc tls tm) - | match_states_call: - forall s f tf ls m ts tls tm - (STK: list_forall2 match_stackframes s ts) - (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm) - (TF: tunnel_fundef f = OK tf), - match_states (Callstate s f ls m) - (Callstate ts tf tls tm) - | match_states_return: - forall s ls m ts tls tm - (STK: list_forall2 match_stackframes s ts) - (LS: locmap_lessdef ls tls) - (MEM: Mem.extends m tm), - match_states (Returnstate s ls m) - (Returnstate ts tls tm). - -(** Properties of [locmap_lessdef] *) - -Lemma reglist_lessdef: - forall rl ls1 ls2, - locmap_lessdef ls1 ls2 -> Val.lessdef_list (reglist ls1 rl) (reglist ls2 rl). -Proof. - induction rl; simpl; intros; auto. -Qed. - -Lemma locmap_set_lessdef: - forall ls1 ls2 v1 v2 l, - locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.set l v1 ls1) (Locmap.set l v2 ls2). -Proof. - intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l'). -- destruct l; auto using Val.load_result_lessdef. -- destruct (Loc.diff_dec l l'); auto. -Qed. - -Lemma locmap_set_undef_lessdef: - forall ls1 ls2 l, - locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.set l Vundef ls1) ls2. -Proof. - intros; red; intros l'. unfold Locmap.set. destruct (Loc.eq l l'). -- destruct l; auto. destruct ty; auto. -- destruct (Loc.diff_dec l l'); auto. -Qed. - -Lemma locmap_undef_regs_lessdef: - forall rl ls1 ls2, - locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) (undef_regs rl ls2). -Proof. - induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_lessdef; auto. -Qed. - -Lemma locmap_undef_regs_lessdef_1: - forall rl ls1 ls2, - locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_regs rl ls1) ls2. -Proof. - induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto. -Qed. - -Lemma locmap_getpair_lessdef: - forall p ls1 ls2, - locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2). -Proof. - intros; destruct p; simpl; auto using Val.longofwords_lessdef. -Qed. - -Lemma locmap_getpairs_lessdef: - forall pl ls1 ls2, - locmap_lessdef ls1 ls2 -> - Val.lessdef_list (map (fun p => Locmap.getpair p ls1) pl) (map (fun p => Locmap.getpair p ls2) pl). -Proof. - intros. induction pl; simpl; auto using locmap_getpair_lessdef. -Qed. - -Lemma locmap_setpair_lessdef: - forall p ls1 ls2 v1 v2, - locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setpair p v1 ls1) (Locmap.setpair p v2 ls2). -Proof. - intros; destruct p; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef. -Qed. - -Lemma locmap_setres_lessdef: - forall res ls1 ls2 v1 v2, - locmap_lessdef ls1 ls2 -> Val.lessdef v1 v2 -> locmap_lessdef (Locmap.setres res v1 ls1) (Locmap.setres res v2 ls2). -Proof. - induction res; intros; simpl; auto using locmap_set_lessdef, Val.loword_lessdef, Val.hiword_lessdef. -Qed. - -Lemma locmap_undef_caller_save_regs_lessdef: - forall ls1 ls2, - locmap_lessdef ls1 ls2 -> locmap_lessdef (undef_caller_save_regs ls1) (undef_caller_save_regs ls2). -Proof. - intros; red; intros. unfold undef_caller_save_regs. - destruct l. -- destruct (Conventions1.is_callee_save r); auto. -- destruct sl; auto. -Qed. - -Lemma find_function_translated: - forall ros ls tls fd, - locmap_lessdef ls tls -> - find_function ge ros ls = Some fd -> - exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd. -Proof. - intros. destruct ros; simpl in *. -- assert (E: tls (R m) = ls (R m)). - { exploit Genv.find_funct_inv; eauto. intros (b & EQ). - generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. } - rewrite E. exploit functions_translated; eauto. -- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0. - exploit function_ptr_translated; eauto. - intros (tf & X1 & X2). exists tf; intuition. -Qed. - -Lemma call_regs_lessdef: - forall ls1 ls2, locmap_lessdef ls1 ls2 -> locmap_lessdef (call_regs ls1) (call_regs ls2). -Proof. - intros; red; intros. destruct l as [r | [] ofs ty]; simpl; auto. -Qed. - -Lemma return_regs_lessdef: - forall caller1 callee1 caller2 callee2, - locmap_lessdef caller1 caller2 -> - locmap_lessdef callee1 callee2 -> - locmap_lessdef (return_regs caller1 callee1) (return_regs caller2 callee2). -Proof. - intros; red; intros. destruct l; simpl. -- destruct (Conventions1.is_callee_save r); auto. -- destruct sl; auto. -Qed. - -(** To preserve non-terminating behaviours, we show that the transformed - code cannot take an infinity of "zero transition" cases. - We use the following [measure] function over source states, - which decreases strictly in the "zero transition" case. *) - -Definition measure (st: state) : nat := - match st with - | State s f sp pc ls m => (bound (branch_target f) pc) * 2 - | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1 - | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1 - | Block s f sp bb ls m => 0 - | Callstate s f ls m => 0 - | Returnstate s ls m => 0 - end. - -Lemma match_parent_locset: - forall s ts, - list_forall2 match_stackframes s ts -> - locmap_lessdef (parent_locset s) (parent_locset ts). -Proof. - induction 1; simpl. -- red; auto. -- inv H; auto. -Qed. - -Lemma tunnel_step_correct: - forall st1 t st2, step ge st1 t st2 -> - forall st1' (MS: match_states st1 st1'), - (exists st2', step tge st1' t st2' /\ match_states st2 st2') - \/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat. -Proof. - induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH). - -- (* entering a block *) - exploit (branch_target_bounds f tf pc); eauto. - rewrite H. intros X; inversion X. - + (* TB_default *) - rewrite TB; left. econstructor; split. - * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto. - * econstructor; eauto. - + (* FT_branch *) - simpl; right. - rewrite EQ; repeat (econstructor; lia || eauto). - + (* FT_cond *) - simpl; right. - repeat (econstructor; lia || eauto); simpl. - destruct (peq _ _); try congruence. -- (* Lop *) - exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto. - intros (tv & EV & LD). - left; simpl; econstructor; split. - eapply exec_Lop with (v := tv); eauto. - rewrite <- EV. apply eval_operation_preserved. exact symbols_preserved. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. -- (* Lload *) - exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. - intros (ta & EV & LD). - exploit Mem.loadv_extends. eauto. eauto. eexact LD. - intros (tv & LOAD & LD'). - left; simpl; econstructor; split. - eapply exec_Lload with (a := ta). - rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. - eauto. eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. -- (* Lload notrap1 *) - exploit eval_addressing_lessdef_none. apply reglist_lessdef; eauto. eassumption. - left; simpl; econstructor; split. - eapply exec_Lload_notrap1. - rewrite <- H0. - apply eval_addressing_preserved. exact symbols_preserved. eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. -- (* Lload notrap2 *) - exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. - intros (ta & EV & LD). - destruct (Mem.loadv chunk tm ta) eqn:Htload. - { - left; simpl; econstructor; split. - eapply exec_Lload. - rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. - exact Htload. eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. - } - { - left; simpl; econstructor; split. - eapply exec_Lload_notrap2. - rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. - exact Htload. eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. - } -- (* Lgetstack *) - left; simpl; econstructor; split. - econstructor; eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. -- (* Lsetstack *) - left; simpl; econstructor; split. - econstructor; eauto. - econstructor; eauto using locmap_set_lessdef, locmap_undef_regs_lessdef. -- (* Lstore *) - exploit eval_addressing_lessdef. apply reglist_lessdef; eauto. eauto. - intros (ta & EV & LD). - exploit Mem.storev_extends. eauto. eauto. eexact LD. apply LS. - intros (tm' & STORE & MEM'). - left; simpl; econstructor; split. - eapply exec_Lstore with (a := ta). - rewrite <- EV. apply eval_addressing_preserved. exact symbols_preserved. - eauto. eauto. - econstructor; eauto using locmap_undef_regs_lessdef. -- (* Lcall *) - left; simpl. - exploit find_function_translated; eauto. - intros (tfd & Htfd & FIND). - econstructor; split. - + eapply exec_Lcall; eauto. - erewrite sig_preserved; eauto. - + econstructor; eauto. - constructor; auto. - constructor; auto. -- (* Ltailcall *) - exploit find_function_translated. 2: eauto. - { eauto using return_regs_lessdef, match_parent_locset. } - intros (tfd & Htfd & FIND). - exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). - left; simpl; econstructor; split. - + eapply exec_Ltailcall; eauto. - * eapply sig_preserved; eauto. - * erewrite fn_stacksize_preserved; eauto. - + econstructor; eauto using return_regs_lessdef, match_parent_locset. -- (* Lbuiltin *) - exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA). - exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D). - left; simpl; econstructor; split. - eapply exec_Lbuiltin; eauto. - eapply eval_builtin_args_preserved with (ge1 := ge); eauto. exact symbols_preserved. - eapply external_call_symbols_preserved. apply senv_preserved. eauto. - econstructor; eauto using locmap_setres_lessdef, locmap_undef_regs_lessdef. -- (* Lbranch (preserved) *) - left; simpl; econstructor; split. - eapply exec_Lbranch; eauto. - fold (branch_target f pc). econstructor; eauto. -- (* Lbranch (eliminated) *) - right; split. simpl. lia. split. auto. constructor; auto. -- (* Lcond (preserved) *) - simpl; left; destruct (peq _ _) eqn: EQ. - + econstructor; split. - eapply exec_Lbranch. - destruct b. - * constructor; eauto using locmap_undef_regs_lessdef_1. - * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1. - + econstructor; split. - eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef. - destruct b; econstructor; eauto using locmap_undef_regs_lessdef. -- (* Lcond (eliminated) *) - destruct (peq _ _) eqn: EQ; try inv H1. - right; split; simpl. - + destruct b. - generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia. - generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); lia. - + destruct b. - -- repeat (constructor; auto). - -- rewrite e; repeat (constructor; auto). -- (* Ljumptable *) - assert (tls (R arg) = Vint n). - { generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. } - left; simpl; econstructor; split. - eapply exec_Ljumptable. - eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto. - econstructor; eauto using locmap_undef_regs_lessdef. -- (* Lreturn *) - exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM'). - left; simpl; econstructor; split. - + eapply exec_Lreturn; eauto. - erewrite fn_stacksize_preserved; eauto. - + constructor; eauto using return_regs_lessdef, match_parent_locset. -- (* internal function *) - exploit tunnel_fundef_Internal; eauto. - intros (tf' & TF' & ITF). subst. - exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl. - intros (tm' & ALLOC & MEM'). - left; simpl. - econstructor; split. - + eapply exec_function_internal; eauto. - erewrite fn_stacksize_preserved; eauto. - + simpl. - erewrite (fn_entrypoint_preserved f tf'); auto. - econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef. -- (* external function *) - exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef. - intros (tvres & tm' & A & B & C & D). - left; simpl; econstructor; split. - + erewrite (tunnel_fundef_External tf ef); eauto. - eapply exec_function_external; eauto. - eapply external_call_symbols_preserved; eauto. apply senv_preserved. - + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef. -- (* return *) - inv STK. inv H1. - left; econstructor; split. - eapply exec_return; eauto. - constructor; auto. -Qed. - -Lemma transf_initial_states: - forall st1, initial_state prog st1 -> - exists st2, initial_state tprog st2 /\ match_states st1 st2. -Proof. - intros. inversion H. - exploit function_ptr_translated; eauto. - intros (tf & Htf & Hf). - exists (Callstate nil tf (Locmap.init Vundef) m0); split. - econstructor; eauto. - apply (Genv.init_mem_transf_partial TRANSL); auto. - rewrite (match_program_main TRANSL). - rewrite symbols_preserved. eauto. - rewrite <- H3. apply sig_preserved. auto. - constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto. -Qed. - -Lemma transf_final_states: - forall st1 st2 r, - match_states st1 st2 -> final_state st1 r -> final_state st2 r. -Proof. - intros. inv H0. inv H. inv STK. - set (p := map_rpair R (Conventions1.loc_result signature_main)) in *. - generalize (locmap_getpair_lessdef p _ _ LS). rewrite H1; intros LD; inv LD. - econstructor; eauto. -Qed. - -Theorem transf_program_correct: - forward_simulation (LTL.semantics prog) (LTL.semantics tprog). -Proof. - eapply forward_simulation_opt. - apply senv_preserved. - eexact transf_initial_states. - eexact transf_final_states. - eexact tunnel_step_correct. -Qed. - -End PRESERVATION. -- cgit