aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.yml2
-rw-r--r--backend/PrintLTL.ml4
-rw-r--r--backend/Tunneling.v138
-rw-r--r--backend/Tunnelingaux.ml283
-rw-r--r--backend/Tunnelingproof.v504
-rw-r--r--driver/Compiler.vexpand2
-rw-r--r--lib/UnionFind.v63
-rw-r--r--tools/compiler_expand.ml2
8 files changed, 728 insertions, 270 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 10008017..c503c394 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -227,7 +227,7 @@ build_kvx:
- sudo apt-get -o Acquire::Check-Valid-Until=false -o Acquire::Check-Date=false update
- sudo apt-get -y install sshpass openssh-client libzip4 lttng-tools liblttng-ctl-dev liblttng-ust-dev babeltrace
- ./.download_from_Kalray.sh
- - rm -f download/*dkms*.deb download/*eclipse*.deb download/*llvm*.deb download/*board-mgmt* download/*oce-host* download/*pocl*
+ - rm -f download/*dkms*.deb download/*eclipse*.deb download/*llvm*.deb download/*board-mgmt* download/*oce-host* download/*pocl* download/*flash-util* download/*barebox*
- sudo dpkg -i download/*.deb
- rm -rf download
- eval `opam config env`
diff --git a/backend/PrintLTL.ml b/backend/PrintLTL.ml
index d8f2ac12..8259297b 100644
--- a/backend/PrintLTL.ml
+++ b/backend/PrintLTL.ml
@@ -133,10 +133,10 @@ let print_program pp (prog: LTL.program) =
let destination : string option ref = ref None
-let print_if prog =
+let print_if passno prog =
match !destination with
| None -> ()
| Some f ->
- let oc = open_out f in
+ let oc = open_out (f ^ "." ^ Z.to_string passno) in
print_program oc prog;
close_out oc
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index 78458582..269ebb6f 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -3,6 +3,7 @@
(* The Compcert verified compiler *)
(* *)
(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
@@ -12,7 +13,7 @@
(** Branch tunneling (optimization of branches to branches). *)
-Require Import Coqlib Maps UnionFind.
+Require Import Coqlib Maps Errors.
Require Import AST.
Require Import LTL.
@@ -21,10 +22,10 @@ Require Import LTL.
so that they jump directly to the end of the branch sequence.
For example:
<<
- L1: nop L2; L1: nop L3;
- L2; nop L3; becomes L2: nop L3;
+ L1: if (cond) nop L2; L1: nop L3;
+ L2: nop L3; becomes L2: nop L3;
L3: instr; L3: instr;
- L4: if (cond) goto L1; L4: if (cond) goto L3;
+ L4: if (cond) goto L1; L4: if (cond) nop L1;
>>
This optimization can be applied to several of our intermediate
languages. We choose to perform it on the [LTL] language,
@@ -37,11 +38,14 @@ Require Import LTL.
dead code (as the "nop L3" in the example above).
*)
-(** The naive implementation of branch tunneling would replace
- any branch to a node [pc] by a branch to the node
- [branch_target f pc], defined as follows:
+(** The implementation consists in two passes: the first pass
+ records the branch t of each "nop"
+ and the second pass replace any "nop" node to [pc]
+ by a branch to a "nop" at [branch_t f pc]
+
+Naively, we may define [branch_t f pc] as follows:
<<
- branch_target f pc = branch_target f pc' if f(pc) = nop pc'
+ branch_t f pc = branch_t f pc' if f(pc) = nop pc'
= pc otherwise
>>
However, this definition can fail to terminate if
@@ -50,56 +54,114 @@ Require Import LTL.
L1: nop L1;
>>
or
-<< L1: nop L2;
+<<
+ L1: nop L2;
L2: nop L1;
>>
Coq warns us of this fact by not accepting the definition
- of [branch_target] above.
+ of [branch_t] above.
+
+ To handle this problem, we use a union-find data structure, adding equalities [pc = pc']
+ for every instruction [pc: nop pc'] in the function.
+
+ Moreover, because the elimination of "useless" [Lcond] depends on the current [uf] datastructure,
+ we need to iterate until we reach a fixpoint.
+
+ Actually, it is simpler and more efficient to perform this in an external oracle, that also returns a measure
+ in order to help the proof.
+
+ A verifier checks that this data-structure is correct.
+*)
+
+Definition UF := PTree.t (node * Z).
- To handle this problem, we proceed in two passes. The first pass
- populates a union-find data structure, adding equalities [pc = pc']
- for every instruction [pc: nop pc'] in the function. *)
+(* The oracle returns a map of "nop" node to their target with a distance (ie the number of the "nop" node on the path) to the target. *)
+Axiom branch_target: LTL.function -> UF.
+Extract Constant branch_target => "Tunnelingaux.branch_target".
-Module U := UnionFind.UF(PTree).
+Local Open Scope error_monad_scope.
-Definition record_goto (uf: U.t) (pc: node) (b: bblock) : U.t :=
- match b with
- | Lbranch s :: _ => U.union uf pc s
- | _ => uf
+Definition get (td: UF) pc:node*Z :=
+ match td!pc with
+ | Some (t,d) => (t,Z.abs d)
+ | _ => (pc,0)
end.
-Definition record_gotos (f: LTL.function) : U.t :=
- PTree.fold record_goto f.(fn_code) U.empty.
+Definition target (td: UF) (pc:node): node := fst (get td pc).
+Coercion target: UF >-> Funclass.
+
+(* we check that the domain of [td] is included in the domain of [c] *)
+Definition check_included (td: UF) (c: code): option bblock
+ := PTree.fold (fun (ok:option bblock) pc _ => if ok then c!pc else None) td (Some nil).
+
+(* we check the validity of targets and their bound:
+ the distance of a "nop" node (w.r.t to the target) must be greater than the one of its parents.
+*)
+Definition check_bblock (td: UF) (pc:node) (bb: bblock): res unit
+ := match td!pc with
+ | None => OK tt
+ | Some (tpc, dpc) =>
+ let dpc := Z.abs dpc in
+ match bb with
+ | Lbranch s ::_ =>
+ let (ts, ds) := get td s in
+ if peq tpc ts then
+ if zlt ds dpc then OK tt
+ else Error (msg "bad distance in Lbranch")
+ else Error (msg "invalid skip of Lbranch")
+ | Lcond _ _ s1 s2 _ :: _ =>
+ let (ts1, ds1) := get td s1 in
+ let (ts2, ds2) := get td s2 in
+ if peq tpc ts1 then
+ if peq tpc ts2 then
+ if zlt ds1 dpc then
+ if zlt ds2 dpc then OK tt
+ else Error (msg "bad distance on else branch")
+ else Error (msg "bad distance on then branch")
+ else Error (msg "invalid skip of else branch")
+ else Error (msg "invalid skip of then branch")
+ | _ => Error (msg "cannot skip this block")
+ end
+ end.
+
+Definition check_code (td: UF) (c:code): res unit
+ := PTree.fold (fun ok pc bb => do _ <- ok; check_bblock td pc bb) c (OK tt).
(** The second pass rewrites all LTL instructions, replacing every
- successor [s] of every instruction by the canonical representative
+ successor [s] of every instruction by [t s], the canonical representative
of its equivalence class in the union-find data structure. *)
-Definition tunnel_instr (uf: U.t) (i: instruction) : instruction :=
+Definition tunnel_instr (t: node -> node) (i: instruction) : instruction :=
match i with
- | Lbranch s => Lbranch (U.repr uf s)
+ | Lbranch s => Lbranch (t s)
| Lcond cond args s1 s2 info =>
- let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in
+ let s1' := t s1 in let s2' := t s2 in
if peq s1' s2'
then Lbranch s1'
else Lcond cond args s1' s2' info
- | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl)
+ | Ljumptable arg tbl => Ljumptable arg (List.map t tbl)
| _ => i
end.
-Definition tunnel_block (uf: U.t) (b: bblock) : bblock :=
- List.map (tunnel_instr uf) b.
+Definition tunnel_block (t: node -> node) (b: bblock) : bblock :=
+ List.map (tunnel_instr t) b.
-Definition tunnel_function (f: LTL.function) : LTL.function :=
- let uf := record_gotos f in
- mkfunction
- (fn_sig f)
- (fn_stacksize f)
- (PTree.map1 (tunnel_block uf) (fn_code f))
- (U.repr uf (fn_entrypoint f)).
+Definition tunnel_function (f: LTL.function) : res LTL.function :=
+ let td := branch_target f in
+ let c := (fn_code f) in
+ if check_included td c then
+ do _ <- check_code td c ; OK
+ (mkfunction
+ (fn_sig f)
+ (fn_stacksize f)
+ (PTree.map1 (tunnel_block td) c)
+ (td (fn_entrypoint f)))
+ else
+ Error (msg "Some node of the union-find is not in the CFG")
+ .
-Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef :=
- transf_fundef tunnel_function f.
+Definition tunnel_fundef (f: fundef) : res fundef :=
+ transf_partial_fundef tunnel_function f.
-Definition transf_program (p: LTL.program) : LTL.program :=
- transform_program tunnel_fundef p.
+Definition transf_program (p: program) : res program :=
+ transform_partial_program tunnel_fundef p.
diff --git a/backend/Tunnelingaux.ml b/backend/Tunnelingaux.ml
new file mode 100644
index 00000000..af89adea
--- /dev/null
+++ b/backend/Tunnelingaux.ml
@@ -0,0 +1,283 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(*
+
+This file implements the [branch_target] oracle that identifies "nop" branches in a LTL function,
+and computes their target node with the distance (ie the number of cummulated nops) toward this target.
+
+See [Tunneling.v]
+
+*)
+
+open Coqlib
+open LTL
+open Maps
+open Camlcoq
+
+let limit_tunneling = None (* for debugging: [Some x] limit the number of iterations *)
+let debug_flag = ref false
+let final_dump = false (* set to true to have a more verbose debugging *)
+
+let debug fmt =
+ if !debug_flag then Printf.eprintf fmt
+ else Printf.ifprintf stderr fmt
+
+exception BugOnPC of int
+
+(* type of labels in the cfg *)
+type label = int * P.t
+
+(* instructions under analyzis *)
+type simple_inst = (* a simplified view of LTL instructions *)
+ LBRANCH of node
+| LCOND of node * node
+| OTHER
+and node = {
+ lab : label;
+ mutable inst: simple_inst;
+ mutable link: node; (* link in the union-find: itself for non "nop"-nodes, target of the "nop" otherwise *)
+ mutable dist: int;
+ mutable tag: int
+ }
+
+(* type of the (simplified) CFG *)
+type cfg = {
+ nodes: (int, node) Hashtbl.t;
+ mutable rems: node list; (* remaining conditions that may become lbranch or not *)
+ mutable num_rems: int;
+ mutable iter_num: int (* number of iterations in elimination of conditions *)
+ }
+
+let lab_i (n: node): int = fst n.lab
+let lab_p (n: node): P.t = snd n.lab
+
+let rec target c n = (* inspired from the "find" of union-find algorithm *)
+ match n.inst with
+ | LCOND(s1,s2) ->
+ if n.link != n
+ then update c n
+ else if n.tag < c.iter_num then (
+ (* we try to change the condition ... *)
+ n.tag <- c.iter_num; (* ... but at most once by iteration *)
+ let ts1 = target c s1 in
+ let ts2 = target c s2 in
+ if ts1 == ts2 then (n.link <- ts1; ts1) else n
+ ) else n
+ | _ ->
+ if n.link != n
+ then update c n
+ else n
+and update c n =
+ let t = target c n.link in
+ n.link <- t; t
+
+let get_node c p =
+ let li = P.to_int p in
+ try
+ Hashtbl.find c.nodes li
+ with
+ Not_found ->
+ let rec n = { lab = (li, p); inst = OTHER; link = n ; dist = 0; tag = 0 } in
+ Hashtbl.add c.nodes li n;
+ n
+
+let set_branch c p s =
+ let li = P.to_int p in
+ try
+ let n = Hashtbl.find c.nodes li in
+ n.inst <- LBRANCH s;
+ n.link <- target c s
+ with
+ Not_found ->
+ let n = { lab = (li,p); inst = LBRANCH s; link = target c s; dist = 0; tag = 0 } in
+ Hashtbl.add c.nodes li n
+
+
+(* build [c.nodes] and accumulate in [acc] conditions at beginning of LTL basic-blocks *)
+let build_simplified_cfg c acc pc bb =
+ match bb with
+ | Lbranch s :: _ ->
+ let ns = get_node c s in
+ set_branch c pc ns;
+ acc
+ | Lcond (_, _, s1, s2, _) :: _ ->
+ c.num_rems <- c.num_rems + 1;
+ let ns1 = get_node c s1 in
+ let ns2 = get_node c s2 in
+ let npc = get_node c pc in
+ npc.inst <- LCOND(ns1, ns2);
+ npc::acc
+ | _ -> acc
+
+(* try to change a condition into a branch
+[acc] is the current accumulator of conditions to consider in the next iteration of repeat_change_cond
+*)
+let try_change_cond c acc pc =
+ match pc.inst with
+ | LCOND(s1,s2) ->
+ let ts1 = target c s1 in
+ let ts2 = target c s2 in
+ if ts1 == ts2 then (
+ pc.link <- ts1;
+ c.num_rems <- c.num_rems - 1;
+ acc
+ ) else
+ pc::acc
+ | _ -> raise (BugOnPC (lab_i pc)) (* LCOND expected *)
+
+(* repeat [try_change_cond] until no condition is changed into a branch *)
+let rec repeat_change_cond c =
+ c.iter_num <- c.iter_num + 1;
+ debug "++ Tunneling.branch_target %d: remaining number of conds to consider = %d\n" (c.iter_num) (c.num_rems);
+ let old = c.num_rems in
+ c.rems <- List.fold_left (try_change_cond c) [] c.rems;
+ let curr = c.num_rems in
+ let continue =
+ match limit_tunneling with
+ | Some n -> curr < old && c.iter_num < n
+ | None -> curr < old
+ in
+ if continue
+ then repeat_change_cond c
+
+
+(* compute the final distance of each nop nodes to its target *)
+let undef_dist = -1
+let self_dist = undef_dist-1
+let rec dist n =
+ if n.dist = undef_dist
+ then (
+ n.dist <- self_dist; (* protection against an unexpected loop in the data-structure *)
+ n.dist <-
+ (match n.inst with
+ | OTHER -> 0
+ | LBRANCH p -> 1 + dist p
+ | LCOND (p1,p2) -> 1 + (max (dist p1) (dist p2)));
+ n.dist
+ ) else if n.dist=self_dist then raise (BugOnPC (lab_i n))
+ else n.dist
+
+let final_export f c =
+ let count = ref 0 in
+ let filter_nops_init_dist _ n acc =
+ let tn = target c n in
+ if tn == n
+ then (
+ n.dist <- 0; (* force [n] to be a base case in the recursion of [dist] *)
+ acc
+ ) else (
+ n.dist <- undef_dist; (* force [dist] to compute the actual [n.dist] *)
+ count := !count+1;
+ (tn, n)::acc
+ )
+ in
+ let nops = Hashtbl.fold filter_nops_init_dist c.nodes [] in
+ let res = List.fold_left (fun acc (tn,n) -> PTree.set (lab_p n) (lab_p tn, Z.of_uint (dist n)) acc) PTree.empty nops in
+ debug "* Tunneling.branch_target: final number of eliminated nops = %d\n" !count;
+ res
+
+(*********************************************)
+(*** START: printing and debugging functions *)
+
+let string_of_labeli nodes ipc =
+ try
+ let pc = Hashtbl.find nodes ipc in
+ if pc.link == pc
+ then Printf.sprintf "(Target@%d)" (dist pc)
+ else Printf.sprintf "(Nop %d @%d)" (lab_i pc.link) (dist pc)
+ with
+ Not_found -> ""
+
+let print_bblock c println (pc, bb) =
+ match bb with
+ | Lbranch s::_ -> (if println then debug "\n"); debug "%d:Lbranch %d %s\n" pc (P.to_int s) (string_of_labeli c.nodes pc); false
+ | Lcond (_, _, s1, s2, _)::_ -> (if println then debug "\n"); debug "%d:Lcond (%d,%d) %s\n" pc (P.to_int s1) (P.to_int s2) (string_of_labeli c.nodes pc); false
+ | _ -> debug "%d " pc; true
+
+
+let print_cfg f c =
+ let a = Array.of_list (PTree.fold (fun acc pc bb -> (P.to_int pc,bb)::acc) f.fn_code []) in
+ Array.fast_sort (fun (i1,_) (i2,_) -> i2 - i1) a;
+ let ep = P.to_int f.fn_entrypoint in
+ debug "entrypoint: %d %s\n" ep (string_of_labeli c.nodes ep);
+ let println = Array.fold_left (print_bblock c) false a in
+ (if println then debug "\n");debug "remaining cond:";
+ List.iter (fun n -> debug "%d " (lab_i n)) c.rems;
+ debug "\n"
+
+(*************************************************************)
+(* Copy-paste of the extracted code of the verifier *)
+(* with [raise (BugOnPC (P.to_int pc))] instead of [Error.*] *)
+
+let get td pc =
+ match PTree.get pc td with
+ | Some p -> let (t0, d) = p in (t0, d)
+ | None -> (pc, Z.of_uint 0)
+
+let check_bblock td pc bb =
+ match PTree.get pc td with
+ | Some p ->
+ let (tpc, dpc) = p in
+ let dpc0 = dpc in
+ (match bb with
+ | [] ->
+ raise (BugOnPC (P.to_int pc))
+ | i :: _ ->
+ (match i with
+ | Lbranch s ->
+ let (ts, ds) = get td s in
+ if peq tpc ts
+ then if zlt ds dpc0
+ then ()
+ else raise (BugOnPC (P.to_int pc))
+ else raise (BugOnPC (P.to_int pc))
+ | Lcond (_, _, s1, s2, _) ->
+ let (ts1, ds1) = get td s1 in
+ let (ts2, ds2) = get td s2 in
+ if peq tpc ts1
+ then if peq tpc ts2
+ then if zlt ds1 dpc0
+ then if zlt ds2 dpc0
+ then ()
+ else raise (BugOnPC (P.to_int pc))
+ else raise (BugOnPC (P.to_int pc))
+ else raise (BugOnPC (P.to_int pc))
+ else raise (BugOnPC (P.to_int pc))
+ | _ ->
+ raise (BugOnPC (P.to_int pc))))
+ | None -> ()
+
+(** val check_code : coq_UF -> code -> unit res **)
+
+let check_code td c =
+ PTree.fold (fun _ pc bb -> check_bblock td pc bb) c (())
+
+(*** END: copy-paste & debugging functions *******)
+
+let branch_target f =
+ debug "* Tunneling.branch_target: starting on a new function\n";
+ if limit_tunneling <> None then debug "* WARNING: limit_tunneling <> None\n";
+ let c = { nodes = Hashtbl.create 100; rems = []; num_rems = 0; iter_num = 0 } in
+ c.rems <- PTree.fold (build_simplified_cfg c) f.fn_code [];
+ repeat_change_cond c;
+ let res = final_export f c in
+ if !debug_flag then (
+ try
+ check_code res f.fn_code;
+ if final_dump then print_cfg f c;
+ with e -> (
+ print_cfg f c;
+ check_code res f.fn_code
+ )
+ );
+ res
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
index cdf6c800..126b7b87 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/Tunnelingproof.v
@@ -3,6 +3,7 @@
(* The Compcert verified compiler *)
(* *)
(* Xavier Leroy, INRIA Paris-Rocquencourt *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
@@ -12,131 +13,163 @@
(** Correctness proof for the branch tunneling optimization. *)
-Require Import Coqlib Maps UnionFind.
+Require Import Coqlib Maps Errors.
Require Import AST Linking.
Require Import Values Memory Events Globalenvs Smallstep.
Require Import Op Locations LTL.
Require Import Tunneling.
-Definition match_prog (p tp: program) :=
- match_program (fun ctx f tf => tf = tunnel_fundef f) eq p tp.
+Local Open Scope nat.
-Lemma transf_program_match:
- forall p, match_prog p (transf_program p).
+
+(** * Properties of the branch_target, when the verifier succeeds *)
+
+Definition check_included_spec (c:code) (td:UF) (ok: option bblock) :=
+ ok <> None -> forall pc, c!pc = None -> td!pc = None.
+
+Lemma check_included_correct (td: UF) (c: code):
+ check_included_spec c td (check_included td c).
+Proof.
+ apply PTree_Properties.fold_rec with (P := check_included_spec c).
+- (* extensionality *)
+ unfold check_included_spec. intros m m' a EQ IND X pc. rewrite <- EQ; auto.
+- (* base case *)
+ intros _ pc. rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+ unfold check_included_spec.
+ intros m [|] pc bb NEW ATPC IND; simpl; try congruence.
+ intros H pc0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; try congruence.
+ intros; eapply IND; try congruence.
+Qed.
+
+Inductive target_bounds (target: node -> node) (bound: node -> nat) (pc: node): (option bblock) -> Prop :=
+ | TB_default (TB: target pc = pc) ob
+ : target_bounds target bound pc ob
+ | TB_branch s bb
+ (EQ: target pc = target s)
+ (DECREASE: bound s < bound pc)
+ : target_bounds target bound pc (Some (Lbranch s::bb))
+ | TB_cond cond args s1 s2 info bb
+ (EQ1: target pc = target s1)
+ (EQ2: target pc = target s2)
+ (DEC1: bound s1 < bound pc)
+ (DEC2: bound s2 < bound pc)
+ : target_bounds target bound pc (Some (Lcond cond args s1 s2 info::bb))
+ .
+Local Hint Resolve TB_default: core.
+
+Lemma target_None (td:UF) (pc: node): td!pc = None -> td pc = pc.
Proof.
- intros. eapply match_transform_program; eauto.
+ unfold target, get. intros H; rewrite H; auto.
Qed.
+Local Hint Resolve target_None Z.abs_nonneg: core.
-(** * Properties of the branch map computed using union-find. *)
+Lemma get_nonneg td pc t d: get td pc = (t, d) -> (0 <= d)%Z.
+Proof.
+ unfold get. destruct (td!_) as [(t0&d0)|]; intros H; inversion H; subst; simpl; omega || auto.
+Qed.
+Local Hint Resolve get_nonneg: core.
-(** A variant of [record_goto] that also incrementally computes a measure [f: node -> nat]
- counting the number of [Lnop] instructions starting at a given [pc] that were eliminated. *)
+Definition bound (td: UF) (pc: node) := Z.to_nat (snd (get td pc)).
-Definition measure_edge (u: U.t) (pc s: node) (f: node -> nat) : node -> nat :=
- fun x => if peq (U.repr u s) pc then f x
- else if peq (U.repr u x) pc then (f x + f s + 1)%nat
- else f x.
+Lemma check_bblock_correct (td:UF) (pc:node) (bb: bblock):
+ check_bblock td pc bb = OK tt ->
+ target_bounds (target td) (bound td) pc (Some bb).
+Proof.
+ unfold check_bblock, bound.
+ destruct (td!pc) as [(tpc&dpc)|] eqn:Hpc; auto.
+ assert (Tpc: td pc = tpc). { unfold target, get; rewrite Hpc; simpl; auto. }
+ assert (Dpc: snd (get td pc) = Z.abs dpc). { unfold get; rewrite Hpc; simpl; auto. }
+ destruct bb as [|[ ] bb]; simpl; try congruence.
+ + destruct (get td s) as (ts, ds) eqn:Hs.
+ repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+ intros; apply TB_branch.
+ * rewrite Tpc. unfold target; rewrite Hs; simpl; auto.
+ * rewrite Dpc, Hs; simpl. apply Z2Nat.inj_lt; eauto.
+ + destruct (get td s1) as (ts1, ds1) eqn:Hs1.
+ destruct (get td s2) as (ts2, ds2) eqn:Hs2.
+ repeat (destruct (peq _ _) || destruct (zlt _ _)); simpl; try congruence.
+ intros; apply TB_cond.
+ * rewrite Tpc. unfold target; rewrite Hs1; simpl; auto.
+ * rewrite Tpc. unfold target; rewrite Hs2; simpl; auto.
+ * rewrite Dpc, Hs1; simpl. apply Z2Nat.inj_lt; eauto.
+ * rewrite Dpc, Hs2; simpl. apply Z2Nat.inj_lt; eauto.
+Qed.
-Definition record_goto' (uf: U.t * (node -> nat)) (pc: node) (b: bblock) : U.t * (node -> nat) :=
- match b with
- | Lbranch s :: b' => let (u, f) := uf in (U.union u pc s, measure_edge u pc s f)
- | _ => uf
- end.
+Definition check_code_spec (td:UF) (c:code) (ok: res unit) :=
+ ok = OK tt -> forall pc bb, c!pc = Some bb -> target_bounds (target td) (bound td) pc (Some bb).
-Definition branch_map_correct (c: code) (uf: U.t * (node -> nat)): Prop :=
- forall pc,
- match c!pc with
- | Some(Lbranch s :: b) =>
- U.repr (fst uf) pc = pc \/ (U.repr (fst uf) pc = U.repr (fst uf) s /\ snd uf s < snd uf pc)%nat
- | _ =>
- U.repr (fst uf) pc = pc
- end.
+Lemma check_code_correct (td:UF) c:
+ check_code_spec td c (check_code td c).
+Proof.
+ apply PTree_Properties.fold_rec with (P := check_code_spec td).
+- (* extensionality *)
+ unfold check_code_spec. intros m m' a EQ IND X pc bb; subst. rewrite <- ! EQ; eauto.
+- (* base case *)
+ intros _ pc. rewrite PTree.gempty; try congruence.
+- (* inductive case *)
+ unfold check_code_spec.
+ intros m [[]|] pc bb NEW ATPC IND; simpl; try congruence.
+ intros H pc0 bb0. rewrite PTree.gsspec; destruct (peq _ _); subst; simpl; auto.
+ intros X; inversion X; subst.
+ apply check_bblock_correct; auto.
+Qed.
-Lemma record_gotos'_correct:
- forall c,
- branch_map_correct c (PTree.fold record_goto' c (U.empty, fun (x: node) => O)).
+Theorem branch_target_bounds:
+ forall f tf pc,
+ tunnel_function f = OK tf ->
+ target_bounds (branch_target f) (bound (branch_target f)) pc (f.(fn_code)!pc).
Proof.
- intros.
- apply PTree_Properties.fold_rec with (P := fun c uf => branch_map_correct c uf).
+ unfold tunnel_function; intros f f' pc.
+ destruct (check_included _ _) eqn:H1; try congruence.
+ destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+ intros _.
+ destruct ((fn_code f)!pc) eqn:X.
+ - exploit check_code_correct; eauto.
+ - exploit check_included_correct; eauto.
+ congruence.
+Qed.
-- (* extensionality *)
- intros. red; intros. rewrite <- H. apply H0.
+Lemma tunnel_function_unfold:
+ forall f tf pc,
+ tunnel_function f = OK tf ->
+ (fn_code tf)!pc = option_map (tunnel_block (branch_target f)) (fn_code f)!pc.
+Proof.
+ unfold tunnel_function; intros f f' pc.
+ destruct (check_included _ _) eqn:H1; try congruence.
+ destruct (check_code _ _) as [[]|] eqn:H2; simpl; try congruence.
+ intros X; inversion X; clear X; subst.
+ simpl. rewrite PTree.gmap1. auto.
+Qed.
-- (* base case *)
- red; intros; simpl. rewrite PTree.gempty. apply U.repr_empty.
+Lemma tunnel_fundef_Internal:
+ forall f tf, tunnel_fundef (Internal f) = OK tf
+ -> exists tf', tunnel_function f = OK tf' /\ tf = Internal tf'.
+Proof.
+ intros f tf; simpl.
+ destruct (tunnel_function f) eqn:X; simpl; try congruence.
+ intros EQ; inversion EQ.
+ eexists; split; eauto.
+Qed.
-- (* inductive case *)
- intros m uf pc bb; intros. destruct uf as [u f].
- assert (PC: U.repr u pc = pc).
- generalize (H1 pc). rewrite H. auto.
- assert (record_goto' (u, f) pc bb = (u, f)
- \/ exists s, exists bb', bb = Lbranch s :: bb' /\ record_goto' (u, f) pc bb = (U.union u pc s, measure_edge u pc s f)).
- unfold record_goto'; simpl. destruct bb; auto. destruct i; auto. right. exists s; exists bb; auto.
- destruct H2 as [B | [s [bb' [EQ B]]]].
-
-+ (* u and f are unchanged *)
- rewrite B.
- red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'.
- destruct bb; auto. destruct i; auto.
- apply H1.
-
-+ (* b is Lbranch s, u becomes union u pc s, f becomes measure_edge u pc s f *)
- rewrite B.
- red. intro pc'. simpl. rewrite PTree.gsspec. destruct (peq pc' pc). subst pc'. rewrite EQ.
-
-* (* The new instruction *)
- rewrite (U.repr_union_2 u pc s); auto. rewrite U.repr_union_3.
- unfold measure_edge. destruct (peq (U.repr u s) pc). auto. right. split. auto.
- rewrite PC. rewrite peq_true. omega.
-
-* (* An old instruction *)
- assert (U.repr u pc' = pc' -> U.repr (U.union u pc s) pc' = pc').
- { intro. rewrite <- H2 at 2. apply U.repr_union_1. congruence. }
- generalize (H1 pc'). simpl. destruct (m!pc'); auto. destruct b; auto. destruct i; auto.
- intros [P | [P Q]]. left; auto. right.
- split. apply U.sameclass_union_2. auto.
- unfold measure_edge. destruct (peq (U.repr u s) pc). auto.
- rewrite P. destruct (peq (U.repr u s0) pc). omega. auto.
-Qed.
-
-Definition record_gotos' (f: function) :=
- PTree.fold record_goto' f.(fn_code) (U.empty, fun (x: node) => O).
-
-Lemma record_gotos_gotos':
- forall f, fst (record_gotos' f) = record_gotos f.
-Proof.
- intros. unfold record_gotos', record_gotos.
- repeat rewrite PTree.fold_spec.
- generalize (PTree.elements (fn_code f)) (U.empty) (fun _ : node => O).
- induction l; intros; simpl.
- auto.
- unfold record_goto' at 2. unfold record_goto at 2.
- destruct (snd a). apply IHl. destruct i; apply IHl.
-Qed.
-
-Definition branch_target (f: function) (pc: node) : node :=
- U.repr (record_gotos f) pc.
-
-Definition count_gotos (f: function) (pc: node) : nat :=
- snd (record_gotos' f) pc.
-
-Theorem record_gotos_correct:
- forall f pc,
- match f.(fn_code)!pc with
- | Some(Lbranch s :: b) =>
- branch_target f pc = pc \/
- (branch_target f pc = branch_target f s /\ count_gotos f s < count_gotos f pc)%nat
- | _ => branch_target f pc = pc
- end.
+Lemma tunnel_fundef_External:
+ forall tf ef, tunnel_fundef (External ef) = OK tf
+ -> tf = External ef.
Proof.
- intros.
- generalize (record_gotos'_correct f.(fn_code) pc). simpl.
- fold (record_gotos' f). unfold branch_map_correct, branch_target, count_gotos.
- rewrite record_gotos_gotos'. auto.
+ intros tf ef; simpl. intros H; inversion H; auto.
Qed.
(** * Preservation of semantics *)
+Definition match_prog (p tp: program) :=
+ match_program (fun _ f tf => tunnel_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+ forall prog tprog, transf_program prog = OK tprog -> match_prog prog tprog.
+Proof.
+ intros. eapply match_transform_partial_program_contextual; eauto.
+Qed.
+
Section PRESERVATION.
Variables prog tprog: program.
@@ -145,32 +178,65 @@ Let ge := Genv.globalenv prog.
Let tge := Genv.globalenv tprog.
Lemma functions_translated:
- forall v f,
+ forall (v: val) (f: fundef),
Genv.find_funct ge v = Some f ->
- Genv.find_funct tge v = Some (tunnel_fundef f).
-Proof (Genv.find_funct_transf TRANSL).
+ exists tf, tunnel_fundef f = OK tf /\ Genv.find_funct tge v = Some tf.
+Proof.
+ intros. exploit (Genv.find_funct_match TRANSL); eauto.
+ intros (cu & tf & A & B & C).
+ repeat eexists; intuition eauto.
+Qed.
Lemma function_ptr_translated:
forall v f,
Genv.find_funct_ptr ge v = Some f ->
- Genv.find_funct_ptr tge v = Some (tunnel_fundef f).
-Proof (Genv.find_funct_ptr_transf TRANSL).
+ exists tf,
+ Genv.find_funct_ptr tge v = Some tf /\ tunnel_fundef f = OK tf.
+Proof.
+ intros.
+ exploit (Genv.find_funct_ptr_transf_partial TRANSL); eauto.
+Qed.
-Lemma symbols_preserved:
- forall id,
- Genv.find_symbol tge id = Genv.find_symbol ge id.
-Proof (Genv.find_symbol_transf TRANSL).
+Lemma symbols_preserved s: Genv.find_symbol tge s = Genv.find_symbol ge s.
+Proof.
+ rewrite <- (Genv.find_symbol_match TRANSL). reflexivity.
+Qed.
Lemma senv_preserved:
Senv.equiv ge tge.
-Proof (Genv.senv_transf TRANSL).
+Proof.
+ eapply (Genv.senv_match TRANSL).
+Qed.
Lemma sig_preserved:
- forall f, funsig (tunnel_fundef f) = funsig f.
+ forall f tf, tunnel_fundef f = OK tf -> funsig tf = funsig f.
Proof.
- destruct f; reflexivity.
+ intros. destruct f.
+ - simpl in H. monadInv H. unfold tunnel_function in EQ.
+ destruct (check_included _ _); try congruence.
+ monadInv EQ. simpl; auto.
+ - simpl in H. monadInv H. reflexivity.
Qed.
+Lemma fn_stacksize_preserved:
+ forall f tf, tunnel_function f = OK tf -> fn_stacksize tf = fn_stacksize f.
+Proof.
+ intros f tf; unfold tunnel_function.
+ destruct (check_included _ _); try congruence.
+ destruct (check_code _ _); simpl; try congruence.
+ intros H; inversion H; simpl; auto.
+Qed.
+
+Lemma fn_entrypoint_preserved:
+ forall f tf, tunnel_function f = OK tf -> fn_entrypoint tf = branch_target f (fn_entrypoint f).
+Proof.
+ intros f tf; unfold tunnel_function.
+ destruct (check_included _ _); try congruence.
+ destruct (check_code _ _); simpl; try congruence.
+ intros H; inversion H; simpl; auto.
+Qed.
+
+
(** The proof of semantic preservation is a simulation argument
based on diagrams of the following form:
<<
@@ -185,7 +251,7 @@ Qed.
between states [st1] and [st2], as well as the postcondition between
[st1'] and [st2']. One transition in the source code (left) can correspond
to zero or one transition in the transformed code (right). The
- "zero transition" case occurs when executing a [Lgoto] instruction
+ "zero transition" case occurs when executing a [Lnop] instruction
in the source code that has been removed by tunneling.
In the definition of [match_states], what changes between the original and
@@ -194,52 +260,52 @@ Qed.
and memory states, since some [Vundef] values can become more defined
as a consequence of eliminating useless [Lcond] instructions. *)
-Definition tunneled_block (f: function) (b: bblock) :=
- tunnel_block (record_gotos f) b.
-
-Definition tunneled_code (f: function) :=
- PTree.map1 (tunneled_block f) (fn_code f).
-
Definition locmap_lessdef (ls1 ls2: locset) : Prop :=
forall l, Val.lessdef (ls1 l) (ls2 l).
Inductive match_stackframes: stackframe -> stackframe -> Prop :=
| match_stackframes_intro:
- forall f sp ls0 bb tls0,
+ forall f tf sp ls0 bb tls0,
locmap_lessdef ls0 tls0 ->
+ tunnel_function f = OK tf ->
match_stackframes
(Stackframe f sp ls0 bb)
- (Stackframe (tunnel_function f) sp tls0 (tunneled_block f bb)).
+ (Stackframe tf sp tls0 (tunnel_block (branch_target f) bb)).
Inductive match_states: state -> state -> Prop :=
| match_states_intro:
- forall s f sp pc ls m ts tls tm
+ forall s f tf sp pc ls m ts tls tm
(STK: list_forall2 match_stackframes s ts)
(LS: locmap_lessdef ls tls)
- (MEM: Mem.extends m tm),
+ (MEM: Mem.extends m tm)
+ (TF: tunnel_function f = OK tf),
match_states (State s f sp pc ls m)
- (State ts (tunnel_function f) sp (branch_target f pc) tls tm)
+ (State ts tf sp (branch_target f pc) tls tm)
| match_states_block:
- forall s f sp bb ls m ts tls tm
+ forall s f tf sp bb ls m ts tls tm
(STK: list_forall2 match_stackframes s ts)
(LS: locmap_lessdef ls tls)
- (MEM: Mem.extends m tm),
+ (MEM: Mem.extends m tm)
+ (TF: tunnel_function f = OK tf),
match_states (Block s f sp bb ls m)
- (Block ts (tunnel_function f) sp (tunneled_block f bb) tls tm)
+ (Block ts tf sp (tunnel_block (branch_target f) bb) tls tm)
| match_states_interm:
- forall s f sp pc bb ls m ts tls tm
+ forall s f tf sp pc i bb ls m ts tls tm
(STK: list_forall2 match_stackframes s ts)
(LS: locmap_lessdef ls tls)
- (MEM: Mem.extends m tm),
- match_states (Block s f sp (Lbranch pc :: bb) ls m)
- (State ts (tunnel_function f) sp (branch_target f pc) tls tm)
+ (MEM: Mem.extends m tm)
+ (IBRANCH: tunnel_instr (branch_target f) i = Lbranch pc)
+ (TF: tunnel_function f = OK tf),
+ match_states (Block s f sp (i :: bb) ls m)
+ (State ts tf sp pc tls tm)
| match_states_call:
- forall s f ls m ts tls tm
+ forall s f tf ls m ts tls tm
(STK: list_forall2 match_stackframes s ts)
(LS: locmap_lessdef ls tls)
- (MEM: Mem.extends m tm),
+ (MEM: Mem.extends m tm)
+ (TF: tunnel_fundef f = OK tf),
match_states (Callstate s f ls m)
- (Callstate ts (tunnel_fundef f) tls tm)
+ (Callstate ts tf tls tm)
| match_states_return:
forall s ls m ts tls tm
(STK: list_forall2 match_stackframes s ts)
@@ -289,22 +355,6 @@ Proof.
induction rl as [ | r rl]; intros; simpl. auto. apply locmap_set_undef_lessdef; auto.
Qed.
-(*
-Lemma locmap_undef_lessdef:
- forall ll ls1 ls2,
- locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) (Locmap.undef ll ls2).
-Proof.
- induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_lessdef; auto.
-Qed.
-
-Lemma locmap_undef_lessdef_1:
- forall ll ls1 ls2,
- locmap_lessdef ls1 ls2 -> locmap_lessdef (Locmap.undef ll ls1) ls2.
-Proof.
- induction ll as [ | l ll]; intros; simpl. auto. apply IHll. apply locmap_set_undef_lessdef; auto.
-Qed.
-*)
-
Lemma locmap_getpair_lessdef:
forall p ls1 ls2,
locmap_lessdef ls1 ls2 -> Val.lessdef (Locmap.getpair p ls1) (Locmap.getpair p ls2).
@@ -348,15 +398,16 @@ Lemma find_function_translated:
forall ros ls tls fd,
locmap_lessdef ls tls ->
find_function ge ros ls = Some fd ->
- find_function tge ros tls = Some (tunnel_fundef fd).
+ exists tfd, tunnel_fundef fd = OK tfd /\ find_function tge ros tls = Some tfd.
Proof.
intros. destruct ros; simpl in *.
- assert (E: tls (R m) = ls (R m)).
{ exploit Genv.find_funct_inv; eauto. intros (b & EQ).
generalize (H (R m)). rewrite EQ. intros LD; inv LD. auto. }
- rewrite E. apply functions_translated; auto.
+ rewrite E. exploit functions_translated; eauto.
- rewrite symbols_preserved. destruct (Genv.find_symbol ge i); inv H0.
- apply function_ptr_translated; auto.
+ exploit function_ptr_translated; eauto.
+ intros (tf & X1 & X2). exists tf; intuition.
Qed.
Lemma call_regs_lessdef:
@@ -383,11 +434,12 @@ Qed.
Definition measure (st: state) : nat :=
match st with
- | State s f sp pc ls m => (count_gotos f pc * 2)%nat
- | Block s f sp (Lbranch pc :: _) ls m => (count_gotos f pc * 2 + 1)%nat
- | Block s f sp bb ls m => 0%nat
- | Callstate s f ls m => 0%nat
- | Returnstate s ls m => 0%nat
+ | State s f sp pc ls m => (bound (branch_target f) pc) * 2
+ | Block s f sp (Lbranch pc :: _) ls m => (bound (branch_target f) pc) * 2 + 1
+ | Block s f sp (Lcond _ _ pc1 pc2 _ :: _) ls m => (max (bound (branch_target f) pc1) (bound (branch_target f) pc2)) * 2 + 1
+ | Block s f sp bb ls m => 0
+ | Callstate s f ls m => 0
+ | Returnstate s ls m => 0
end.
Lemma match_parent_locset:
@@ -406,24 +458,23 @@ Lemma tunnel_step_correct:
(exists st2', step tge st1' t st2' /\ match_states st2 st2')
\/ (measure st2 < measure st1 /\ t = E0 /\ match_states st2 st1')%nat.
Proof.
- induction 1; intros; try inv MS.
+ induction 1; intros; try inv MS; try (simpl in IBRANCH; inv IBRANCH).
- (* entering a block *)
- assert (DEFAULT: branch_target f pc = pc ->
- (exists st2' : state,
- step tge (State ts (tunnel_function f) sp (branch_target f pc) tls tm) E0 st2'
- /\ match_states (Block s f sp bb rs m) st2')).
- { intros. rewrite H0. econstructor; split.
- econstructor. simpl. rewrite PTree.gmap1. rewrite H. simpl. eauto.
- econstructor; eauto. }
-
- generalize (record_gotos_correct f pc). rewrite H.
- destruct bb; auto. destruct i; auto.
- intros [A | [B C]]. auto.
- right. split. simpl. omega.
- split. auto.
- rewrite B. econstructor; eauto.
-
+ exploit (branch_target_bounds f tf pc); eauto.
+ rewrite H. intros X; inversion X.
+ + (* TB_default *)
+ rewrite TB; left. econstructor; split.
+ * econstructor. simpl. erewrite tunnel_function_unfold, H ; simpl; eauto.
+ * econstructor; eauto.
+ + (* FT_branch *)
+ simpl; right.
+ rewrite EQ; repeat (econstructor; omega || eauto).
+ + (* FT_cond *)
+ simpl; right.
+ repeat (econstructor; omega || eauto); simpl.
+ apply Nat.max_case; omega.
+ destruct (peq _ _); try congruence.
- (* Lop *)
exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto.
intros (tv & EV & LD).
@@ -485,20 +536,25 @@ Proof.
eauto. eauto.
econstructor; eauto using locmap_undef_regs_lessdef.
- (* Lcall *)
- left; simpl; econstructor; split.
- eapply exec_Lcall with (fd := tunnel_fundef fd); eauto.
- eapply find_function_translated; eauto.
- rewrite sig_preserved. auto.
- econstructor; eauto.
- constructor; auto.
- constructor; auto.
+ left; simpl.
+ exploit find_function_translated; eauto.
+ intros (tfd & Htfd & FIND).
+ econstructor; split.
+ + eapply exec_Lcall; eauto.
+ erewrite sig_preserved; eauto.
+ + econstructor; eauto.
+ constructor; auto.
+ constructor; auto.
- (* Ltailcall *)
- exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
+ exploit find_function_translated. 2: eauto.
+ { eauto using return_regs_lessdef, match_parent_locset. }
+ intros (tfd & Htfd & FIND).
+ exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
left; simpl; econstructor; split.
- eapply exec_Ltailcall with (fd := tunnel_fundef fd); eauto.
- eapply find_function_translated; eauto using return_regs_lessdef, match_parent_locset.
- apply sig_preserved.
- econstructor; eauto using return_regs_lessdef, match_parent_locset.
+ + eapply exec_Ltailcall; eauto.
+ * eapply sig_preserved; eauto.
+ * erewrite fn_stacksize_preserved; eauto.
+ + econstructor; eauto using return_regs_lessdef, match_parent_locset.
- (* Lbuiltin *)
exploit eval_builtin_args_lessdef. eexact LS. eauto. eauto. intros (tvargs & EVA & LDA).
exploit external_call_mem_extends; eauto. intros (tvres & tm' & A & B & C & D).
@@ -513,45 +569,58 @@ Proof.
fold (branch_target f pc). econstructor; eauto.
- (* Lbranch (eliminated) *)
right; split. simpl. omega. split. auto. constructor; auto.
-
-- (* Lcond *)
- simpl tunneled_block.
- set (s1 := U.repr (record_gotos f) pc1). set (s2 := U.repr (record_gotos f) pc2).
- destruct (peq s1 s2).
-+ left; econstructor; split.
- eapply exec_Lbranch.
- destruct b.
-* constructor; eauto using locmap_undef_regs_lessdef_1.
-* rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
-+ left; econstructor; split.
- eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
- destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
-
+- (* Lcond (preserved) *)
+ simpl; left; destruct (peq _ _) eqn: EQ.
+ + econstructor; split.
+ eapply exec_Lbranch.
+ destruct b.
+ * constructor; eauto using locmap_undef_regs_lessdef_1.
+ * rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
+ + econstructor; split.
+ eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
+ destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lcond (eliminated) *)
+ destruct (peq _ _) eqn: EQ; try inv H1.
+ right; split; simpl.
+ + destruct b.
+ generalize (Nat.le_max_l (bound (branch_target f) pc1) (bound (branch_target f) pc2)); omega.
+ generalize (Nat.le_max_r (bound (branch_target f) pc1) (bound (branch_target f) pc2)); omega.
+ + destruct b.
+ -- repeat (constructor; auto).
+ -- rewrite e; repeat (constructor; auto).
- (* Ljumptable *)
assert (tls (R arg) = Vint n).
{ generalize (LS (R arg)); rewrite H; intros LD; inv LD; auto. }
left; simpl; econstructor; split.
eapply exec_Ljumptable.
- eauto. rewrite list_nth_z_map. change U.elt with node. rewrite H0. reflexivity. eauto.
+ eauto. rewrite list_nth_z_map, H0; simpl; eauto. eauto.
econstructor; eauto using locmap_undef_regs_lessdef.
- (* Lreturn *)
exploit Mem.free_parallel_extends. eauto. eauto. intros (tm' & FREE & MEM').
left; simpl; econstructor; split.
- eapply exec_Lreturn; eauto.
- constructor; eauto using return_regs_lessdef, match_parent_locset.
+ + eapply exec_Lreturn; eauto.
+ erewrite fn_stacksize_preserved; eauto.
+ + constructor; eauto using return_regs_lessdef, match_parent_locset.
- (* internal function *)
+ exploit tunnel_fundef_Internal; eauto.
+ intros (tf' & TF' & ITF). subst.
exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
- intros (tm' & ALLOC & MEM').
- left; simpl; econstructor; split.
- eapply exec_function_internal; eauto.
- simpl. econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
+ intros (tm' & ALLOC & MEM').
+ left; simpl.
+ econstructor; split.
+ + eapply exec_function_internal; eauto.
+ erewrite fn_stacksize_preserved; eauto.
+ + simpl.
+ erewrite (fn_entrypoint_preserved f tf'); auto.
+ econstructor; eauto using locmap_undef_regs_lessdef, call_regs_lessdef.
- (* external function *)
exploit external_call_mem_extends; eauto using locmap_getpairs_lessdef.
intros (tvres & tm' & A & B & C & D).
left; simpl; econstructor; split.
- eapply exec_function_external; eauto.
- eapply external_call_symbols_preserved; eauto. apply senv_preserved.
- simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
+ + erewrite (tunnel_fundef_External tf ef); eauto.
+ eapply exec_function_external; eauto.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+ + simpl. econstructor; eauto using locmap_setpair_lessdef, locmap_undef_caller_save_regs_lessdef.
- (* return *)
inv STK. inv H1.
left; econstructor; split.
@@ -564,14 +633,15 @@ Lemma transf_initial_states:
exists st2, initial_state tprog st2 /\ match_states st1 st2.
Proof.
intros. inversion H.
- exists (Callstate nil (tunnel_fundef f) (Locmap.init Vundef) m0); split.
+ exploit function_ptr_translated; eauto.
+ intros (tf & Htf & Hf).
+ exists (Callstate nil tf (Locmap.init Vundef) m0); split.
econstructor; eauto.
- apply (Genv.init_mem_transf TRANSL); auto.
+ apply (Genv.init_mem_transf_partial TRANSL); auto.
rewrite (match_program_main TRANSL).
rewrite symbols_preserved. eauto.
- apply function_ptr_translated; auto.
- rewrite <- H3. apply sig_preserved.
- constructor. constructor. red; simpl; auto. apply Mem.extends_refl.
+ rewrite <- H3. apply sig_preserved. auto.
+ constructor. constructor. red; simpl; auto. apply Mem.extends_refl. auto.
Qed.
Lemma transf_final_states:
diff --git a/driver/Compiler.vexpand b/driver/Compiler.vexpand
index 80db9097..a751b232 100644
--- a/driver/Compiler.vexpand
+++ b/driver/Compiler.vexpand
@@ -54,7 +54,7 @@ Require Import Compopts.
Parameter print_Clight: Clight.program -> unit.
Parameter print_Cminor: Cminor.program -> unit.
Parameter print_RTL: Z -> RTL.program -> unit.
-Parameter print_LTL: LTL.program -> unit.
+Parameter print_LTL: Z -> LTL.program -> unit.
Parameter print_Mach: Mach.program -> unit.
Local Open Scope string_scope.
diff --git a/lib/UnionFind.v b/lib/UnionFind.v
index 20bb91cd..bd1b763b 100644
--- a/lib/UnionFind.v
+++ b/lib/UnionFind.v
@@ -124,6 +124,15 @@ Module Type UNIONFIND.
pathlen uf x + pathlen uf b + 1
else
pathlen uf x.
+ Axiom pathlen_union:
+ forall uf a b x,
+ pathlen (union uf a b) x =
+ if elt_eq (repr uf a) (repr uf b) then
+ pathlen uf x
+ else if elt_eq (repr uf x) (repr uf a) then
+ (pathlen uf x)+1
+ else
+ (pathlen uf x).
Axiom pathlen_gt_merge:
forall uf a b x y,
repr uf x = repr uf y ->
@@ -531,6 +540,7 @@ Qed.
End PATHLEN.
+
(* Path length and merge *)
Lemma pathlen_merge:
@@ -549,16 +559,49 @@ Proof.
set (uf' := identify uf (repr uf a) b (repr_res_none uf a) (not_eq_sym n)).
pattern x. apply (well_founded_ind (mwf uf')); intros.
rewrite (pathlen_unroll uf'). destruct (M.get x0 (m uf')) as [x'|] eqn:G.
- rewrite H; auto. simpl in G. rewrite M.gsspec in G.
- destruct (M.elt_eq x0 (repr uf a)). rewrite e. rewrite repr_canonical. rewrite dec_eq_true.
- inversion G. subst x'. rewrite dec_eq_false; auto.
- replace (pathlen uf (repr uf a)) with 0. omega.
- symmetry. apply pathlen_none. apply repr_res_none.
- rewrite (repr_unroll uf x0), (pathlen_unroll uf x0); rewrite G.
- destruct (M.elt_eq (repr uf x') (repr uf a)); omega.
- simpl in G. rewrite M.gsspec in G. destruct (M.elt_eq x0 (repr uf a)); try discriminate.
- rewrite (repr_none uf x0) by auto. rewrite dec_eq_false; auto.
- symmetry. apply pathlen_zero; auto. apply repr_none; auto.
+ + rewrite H; auto. clear H. simpl in G. rewrite M.gsspec in G.
+ destruct (M.elt_eq x0 (repr uf a)).
+ - rewrite e, repr_canonical, dec_eq_true.
+ inversion G. subst x'. rewrite dec_eq_false; auto.
+ replace (pathlen uf (repr uf a)) with 0; try omega.
+ symmetry. apply pathlen_none. apply repr_res_none.
+ - rewrite (repr_unroll uf x0), (pathlen_unroll uf x0), G.
+ destruct (M.elt_eq (repr uf x') (repr uf a)); omega.
+ + clear H; simpl in G. rewrite M.gsspec in G. destruct (M.elt_eq x0 (repr uf a)); try discriminate.
+ rewrite (repr_none uf x0) by auto. rewrite dec_eq_false; auto.
+ symmetry. apply pathlen_zero; auto. apply repr_none; auto.
+Qed.
+
+Lemma pathlen_union:
+ forall uf a b x,
+ pathlen (union uf a b) x =
+ if M.elt_eq (repr uf a) (repr uf b) then
+ pathlen uf x
+ else if M.elt_eq (repr uf x) (repr uf a) then
+ (pathlen uf x)+1
+ else
+ (pathlen uf x).
+Proof.
+ intros. unfold union.
+ destruct (M.elt_eq (repr uf a) (repr uf b)).
+ auto.
+ set (uf' := identify uf _ _ _ _).
+ assert (LENa: pathlen uf (repr uf a) = 0).
+ { apply pathlen_none. apply repr_res_none. }
+ pattern x. apply (well_founded_ind (mwf uf')); intros.
+ rewrite (pathlen_unroll uf'). destruct (M.get x0 (m uf')) as [x'|] eqn:G.
+ + rewrite H; auto. clear H. simpl in G. rewrite M.gsspec in G.
+ destruct (M.elt_eq x0 (repr uf a)).
+ - inversion G; clear G. subst.
+ rewrite !repr_canonical, dec_eq_true.
+ rewrite dec_eq_false; auto.
+ rewrite LENa. rewrite (pathlen_none uf (repr uf b)); try omega.
+ apply repr_res_none.
+ - rewrite (repr_unroll uf x0), G, ! (pathlen_some _ _ _ G).
+ destruct (M.elt_eq _ _); auto.
+ + clear H. simpl in G. rewrite M.gsspec in G.
+ destruct (M.elt_eq _ (repr uf a)); try discriminate.
+ rewrite (repr_none _ _ G), !(pathlen_none _ _ G), dec_eq_false; auto.
Qed.
Lemma pathlen_gt_merge:
diff --git a/tools/compiler_expand.ml b/tools/compiler_expand.ml
index 6af0ec59..a6ae9b1f 100644
--- a/tools/compiler_expand.ml
+++ b/tools/compiler_expand.ml
@@ -54,7 +54,7 @@ let post_rtl_passes =
PARTIAL, Always, Require, (Some "Prepass scheduling"), "RTLpathScheduler", Noprint;
TOTAL, Always, Require, (Some "Projection to RTL"), "RTLpath", (Print (Printf.sprintf "RTL %d" ((Array.length rtl_passes) + 1)));
PARTIAL, Always, Require, (Some "Register allocation"), "Allocation", (Print "LTL");
- TOTAL, Always, Require, (Some "Branch tunneling"), "Tunneling", Noprint;
+ PARTIAL, Always, Require, (Some "Branch tunneling"), "Tunneling", (Print "LTL 2");
PARTIAL, Always, Require, (Some "CFG linearization"), "Linearize", Noprint;
TOTAL, Always, Require, (Some "Label cleanup"), "CleanupLabels", Noprint;
PARTIAL, (Option "debug"), Require, (Some "Debugging info for local variables"), "Debugvar", Noprint;