aboutsummaryrefslogtreecommitdiffstats
path: root/backend
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@college-de-france.fr>2020-12-15 19:29:29 +0100
committerXavier Leroy <xavierleroy@users.noreply.github.com>2021-01-13 14:45:05 +0100
commit7f152e2f27d82f0d502ee919e1576edefcd44cf5 (patch)
tree36cf28dd969faccc7635b8a259860ab62db1604d /backend
parente16f5d1de08bb0c3ac561c2ea6a98f49ed8f49d5 (diff)
downloadcompcert-7f152e2f27d82f0d502ee919e1576edefcd44cf5.tar.gz
compcert-7f152e2f27d82f0d502ee919e1576edefcd44cf5.zip
Improve branch tunneling
The previous branch tunneling was missing optimization opportunities introduced by the optimization of conditional branches. For example: L1: instr; branch L2 L2: if cond then branch L3 else branch L4 L3: branch L4 L4: ... was transformed into L1: instr; branch L2 L2: branch L4 L3: branch L4 L4: ... missing a tunneling opportunity (branch L2 -> branch L4). This commit improves branch tunneling so that the expected code is produced: L1: instr; branch L4 L2: branch L4 L3: branch L4 L4: ... To this end, additional equalities are introduced in the union-find data structure corresponding to optimizable conditional branches. In rare cases these additional equalities trigger new opportunities for optimizing conditional branches. Hence we iterate the analysis until no optimizable conditional branch remains.
Diffstat (limited to 'backend')
-rw-r--r--backend/Tunneling.v135
-rw-r--r--backend/Tunnelingproof.v253
2 files changed, 328 insertions, 60 deletions
diff --git a/backend/Tunneling.v b/backend/Tunneling.v
index da1ce45a..265e06ba 100644
--- a/backend/Tunneling.v
+++ b/backend/Tunneling.v
@@ -12,6 +12,7 @@
(** Branch tunneling (optimization of branches to branches). *)
+Require Import FunInd.
Require Import Coqlib Maps UnionFind.
Require Import AST.
Require Import LTL.
@@ -21,8 +22,8 @@ Require Import LTL.
so that they jump directly to the end of the branch sequence.
For example:
<<
- L1: nop L2; L1: nop L3;
- L2; nop L3; becomes L2: nop L3;
+ L1: branch L2; L1: branch L3;
+ L2; branch L3; becomes L2: branch L3;
L3: instr; L3: instr;
L4: if (cond) goto L1; L4: if (cond) goto L3;
>>
@@ -33,70 +34,156 @@ Require Import LTL.
computations or useless moves), therefore there are more
opportunities for tunneling after allocation than before.
Symmetrically, prior tunneling helps linearization to produce
- better code, e.g. by revealing that some [nop] instructions are
- dead code (as the "nop L3" in the example above).
+ better code, e.g. by revealing that some [branch] instructions are
+ dead code (as the "branch L3" in the example above).
*)
(** The naive implementation of branch tunneling would replace
any branch to a node [pc] by a branch to the node
[branch_target f pc], defined as follows:
<<
- branch_target f pc = branch_target f pc' if f(pc) = nop pc'
+ branch_target f pc = branch_target f pc' if f(pc) = branch pc'
= pc otherwise
>>
However, this definition can fail to terminate if
the program can contain loops consisting only of branches, as in
<<
- L1: nop L1;
+ L1: branch L1;
>>
or
-<< L1: nop L2;
- L2: nop L1;
+<<
+ L1: branch L2;
+ L2: branch L1;
>>
Coq warns us of this fact by not accepting the definition
of [branch_target] above.
- To handle this problem, we proceed in two passes. The first pass
- populates a union-find data structure, adding equalities [pc = pc']
- for every instruction [pc: nop pc'] in the function. *)
+ To handle this problem, we proceed in two passes:
+
+- The first pass populates a union-find data structure, adding equalities
+ between PCs of blocks that are connected by branches and no other
+ computation.
+
+- The second pass rewrites the code, replacing every branch to a node [pc]
+ by a branch to the canonical representative of the equivalence class of [pc].
+*)
+
+(** * Construction of the union-find data structure *)
Module U := UnionFind.UF(PTree).
-Definition record_goto (uf: U.t) (pc: node) (b: bblock) : U.t :=
+(** We start populating the union-find data structure by adding
+ equalities [pc = pc'] for every block [pc: branch pc'] in the function. *)
+
+Definition record_branch (uf: U.t) (pc: node) (b: bblock) : U.t :=
match b with
| Lbranch s :: _ => U.union uf pc s
| _ => uf
end.
+Definition record_branches (f: LTL.function) : U.t :=
+ PTree.fold record_branch f.(fn_code) U.empty.
+
+(** An additional optimization opportunity comes from conditional branches.
+ Consider a block [pc: cond ifso ifnot]. If the [ifso] case
+ and the [ifnot] case jump to the same block [pc']
+ (modulo intermediate branches), the block can be simplified into
+ [pc: branch pc'], and the equality [pc = pc'] can be added to the
+ union-find data structure. *)
+
+(** In rare cases, the extra equation [pc = pc'] introduced by the
+ simplification of a conditional branch can trigger further simplifications
+ of other conditional branches. We therefore iterate the analysis
+ until no optimizable conditional branch remains. *)
+
+(** The code [c] (first component of the [st] triple) starts identical
+ to the code [fn.(fn_code)] of the current function, but each time
+ conditional branch at [pc] is optimized, we remove the block at
+ [pc] from the code [c]. This guarantees termination of the
+ iteration. *)
+
+Definition record_cond (st: code * U.t * bool) (pc: node) (b: bblock) : code * U.t * bool :=
+ match b with
+ | Lcond cond args s1 s2 :: _ =>
+ let '(c, u, _) := st in
+ if peq (U.repr u s1) (U.repr u s2)
+ then (PTree.remove pc c, U.union u pc s1, true)
+ else st
+ | _ =>
+ st
+ end.
+
+Definition record_conds_1 (cu: code * U.t) : code * U.t * bool :=
+ let (c, u) := cu in PTree.fold record_cond c (c, u, false).
+
+Definition measure_state (cu: code * U.t) : nat :=
+ PTree_Properties.cardinal (fst cu).
+
+Function record_conds (cu: code * U.t) {measure measure_state cu} : U.t :=
+ let (cu', changed) := record_conds_1 cu in
+ if changed then record_conds cu' else snd cu.
+Proof.
+ intros [c0 u0] [c1 u1].
+ set (P := fun (c: code) (s: code * U.t * bool) =>
+ (forall pc, c!pc = None -> (fst (fst s))!pc = c0!pc) /\
+ (PTree_Properties.cardinal (fst (fst s))
+ + (if snd s then 1 else 0)
+ <= PTree_Properties.cardinal c0)%nat).
+ assert (A: P c0 (PTree.fold record_cond c0 (c0, u0, false))).
+ { apply PTree_Properties.fold_rec; unfold P.
+ - intros. destruct H0; split; auto. intros. rewrite <- H in H2. auto.
+ - simpl; split; intros. auto. simpl; lia.
+ - intros cd [[c u] changed] pc b NONE SOME [HR1 HR2]. simpl. split.
+ + intros p EQ. rewrite PTree.gsspec in EQ. destruct (peq p pc); try discriminate.
+ unfold record_cond. destruct b as [ | [] b ]; auto.
+ destruct (peq (U.repr u s1) (U.repr u s2)); auto.
+ simpl. rewrite PTree.gro by auto. auto.
+ + unfold record_cond. destruct b as [ | [] b ]; auto.
+ destruct (peq (U.repr u s1) (U.repr u s2)); auto.
+ simpl in *.
+ assert (SOME': c!pc = Some (Lcond cond args s1 s2 :: b)).
+ { rewrite HR1 by auto. auto. }
+ generalize (PTree_Properties.cardinal_remove SOME').
+ destruct changed; lia.
+ }
+ unfold record_conds_1, measure_state; intros.
+ destruct A as [_ A]. rewrite teq in A. simpl in *.
+ lia.
+Qed.
+
Definition record_gotos (f: LTL.function) : U.t :=
- PTree.fold record_goto f.(fn_code) U.empty.
+ record_conds (f.(fn_code), record_branches f).
+
+(** * Code transformation *)
-(** The second pass rewrites all LTL instructions, replacing every
+(** The code transformation rewrites all LTL instruction, replacing every
successor [s] of every instruction by the canonical representative
- of its equivalence class in the union-find data structure. *)
+ of its equivalence class in the union-find data structure.
+ Additionally, [Lcond] conditional branches are turned into [Lbranch]
+ unconditional branches whenever possible. *)
-Definition tunnel_instr (uf: U.t) (i: instruction) : instruction :=
+Definition tunnel_instr (u: U.t) (i: instruction) : instruction :=
match i with
- | Lbranch s => Lbranch (U.repr uf s)
+ | Lbranch s => Lbranch (U.repr u s)
| Lcond cond args s1 s2 =>
- let s1' := U.repr uf s1 in let s2' := U.repr uf s2 in
+ let s1' := U.repr u s1 in let s2' := U.repr u s2 in
if peq s1' s2'
then Lbranch s1'
else Lcond cond args s1' s2'
- | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr uf) tbl)
+ | Ljumptable arg tbl => Ljumptable arg (List.map (U.repr u) tbl)
| _ => i
end.
-Definition tunnel_block (uf: U.t) (b: bblock) : bblock :=
- List.map (tunnel_instr uf) b.
+Definition tunnel_block (u: U.t) (b: bblock) : bblock :=
+ List.map (tunnel_instr u) b.
Definition tunnel_function (f: LTL.function) : LTL.function :=
- let uf := record_gotos f in
+ let u := record_gotos f in
mkfunction
(fn_sig f)
(fn_stacksize f)
- (PTree.map1 (tunnel_block uf) (fn_code f))
- (U.repr uf (fn_entrypoint f)).
+ (PTree.map1 (tunnel_block u) (fn_code f))
+ (U.repr u (fn_entrypoint f)).
Definition tunnel_fundef (f: LTL.fundef) : LTL.fundef :=
transf_fundef tunnel_function f.
diff --git a/backend/Tunnelingproof.v b/backend/Tunnelingproof.v
index d514c16f..68913fc9 100644
--- a/backend/Tunnelingproof.v
+++ b/backend/Tunnelingproof.v
@@ -12,6 +12,7 @@
(** Correctness proof for the branch tunneling optimization. *)
+Require Import FunInd.
Require Import Coqlib Maps UnionFind.
Require Import AST Linking.
Require Import Values Memory Events Globalenvs Smallstep.
@@ -29,12 +30,21 @@ Qed.
(** * Properties of the branch map computed using union-find. *)
-Definition measure_edge (u: U.t) (pc s: node) (f: node -> nat) : node -> nat :=
+Section BRANCH_MAP_CORRECT.
+
+Variable fn: LTL.function.
+
+Definition measure_branch (u: U.t) (pc s: node) (f: node -> nat) : node -> nat :=
fun x => if peq (U.repr u s) pc then f x
else if peq (U.repr u x) pc then (f x + f s + 1)%nat
else f x.
-Definition branch_map_correct (c: code) (u: U.t) (f: node -> nat): Prop :=
+Definition measure_cond (u: U.t) (pc s1 s2: node) (f: node -> nat) : node -> nat :=
+ fun x => if peq (U.repr u s1) pc then f x
+ else if peq (U.repr u x) pc then (f x + Nat.max (f s1) (f s2) + 1)%nat
+ else f x.
+
+Definition branch_map_correct_1 (c: code) (u: U.t) (f: node -> nat): Prop :=
forall pc,
match c!pc with
| Some(Lbranch s :: b) =>
@@ -43,59 +53,209 @@ Definition branch_map_correct (c: code) (u: U.t) (f: node -> nat): Prop :=
U.repr u pc = pc
end.
-Lemma record_gotos_correct_1:
- forall fn, { f | branch_map_correct fn.(fn_code) (record_gotos fn) f }.
+Lemma record_branch_correct:
+ forall c u f pc b,
+ branch_map_correct_1 (PTree.remove pc c) u f ->
+ c!pc = Some b ->
+ { f' | branch_map_correct_1 c (record_branch u pc b) f' }.
Proof.
- intros.
- unfold record_gotos. apply PTree_Properties.fold_ind.
-
-- (* base case *)
- intros m EMPTY. exists (fun _ => O).
- red; intros. rewrite EMPTY. apply U.repr_empty.
-
-- (* inductive case *)
- intros m u pc bb GET1 GET2 [f BMC].
+ intros c u f pc b BMC GET1.
assert (PC: U.repr u pc = pc).
{ specialize (BMC pc). rewrite PTree.grs in BMC. auto. }
- assert (DFL: { f | branch_map_correct m u f }).
+ assert (DFL: { f | branch_map_correct_1 c u f }).
{ exists f. intros p. destruct (peq p pc).
- - subst p. rewrite GET1. destruct bb as [ | [] bb ]; auto.
+ - subst p. rewrite GET1. destruct b as [ | [] b ]; auto.
- specialize (BMC p). rewrite PTree.gro in BMC by auto. exact BMC.
}
- unfold record_goto. destruct bb as [ | [] bb ]; auto.
- exists (measure_edge u pc s f). intros p. destruct (peq p pc).
-+ subst p. rewrite GET1. unfold measure_edge.
+ unfold record_branch. destruct b as [ | [] b ]; auto.
+ exists (measure_branch u pc s f). intros p. destruct (peq p pc).
++ subst p. rewrite GET1. unfold measure_branch.
rewrite (U.repr_union_2 u pc s); auto. rewrite U.repr_union_3.
destruct (peq (U.repr u s) pc); auto. rewrite PC, peq_true. right; split; auto. lia.
+ specialize (BMC p). rewrite PTree.gro in BMC by auto.
assert (U.repr u p = p -> U.repr (U.union u pc s) p = p).
{ intro. rewrite <- H at 2. apply U.repr_union_1. congruence. }
- destruct (m!p) as [ [ | [] b ] | ]; auto.
+ destruct (c!p) as [ [ | [] _ ] | ]; auto.
destruct BMC as [A | [A B]]. auto.
right; split. apply U.sameclass_union_2; auto.
- unfold measure_edge. destruct (peq (U.repr u s) pc). auto.
+ unfold measure_branch. destruct (peq (U.repr u s) pc). auto.
rewrite A. destruct (peq (U.repr u s0) pc); lia.
Qed.
-Definition branch_target (f: function) (pc: node) : node :=
- U.repr (record_gotos f) pc.
+Lemma record_branches_correct:
+ { f | branch_map_correct_1 fn.(fn_code) (record_branches fn) f }.
+Proof.
+ unfold record_branches. apply PTree_Properties.fold_ind.
+- (* base case *)
+ intros m EMPTY. exists (fun _ => O).
+ red; intros. rewrite EMPTY. apply U.repr_empty.
+- (* inductive case *)
+ intros m u pc bb GET1 GET2 [f BMC]. eapply record_branch_correct; eauto.
+Qed.
+
+Definition branch_map_correct_2 (c: code) (u: U.t) (f: node -> nat): Prop :=
+ forall pc,
+ match fn.(fn_code)!pc with
+ | Some(Lbranch s :: b) =>
+ U.repr u pc = pc \/ (U.repr u pc = U.repr u s /\ f s < f pc)%nat
+ | Some(Lcond cond args s1 s2 :: b) =>
+ U.repr u pc = pc \/ (c!pc = None /\ U.repr u pc = U.repr u s1 /\ U.repr u pc = U.repr u s2 /\ f s1 < f pc /\ f s2 < f pc)%nat
+ | _ =>
+ U.repr u pc = pc
+ end.
-Definition count_gotos (f: function) (pc: node) : nat :=
- proj1_sig (record_gotos_correct_1 f) pc.
+Lemma record_cond_correct:
+ forall c u changed f pc b,
+ branch_map_correct_2 c u f ->
+ fn.(fn_code)!pc = Some b ->
+ c!pc <> None ->
+ let '(c1, u1, _) := record_cond (c, u, changed) pc b in
+ { f' | branch_map_correct_2 c1 u1 f' }.
+Proof.
+ intros c u changed f pc b BMC GET1 GET2.
+ assert (DFL: { f' | branch_map_correct_2 c u f' }).
+ { exists f; auto. }
+ unfold record_cond. destruct b as [ | [] b ]; auto.
+ destruct (peq (U.repr u s1) (U.repr u s2)); auto.
+ exists (measure_cond u pc s1 s2 f).
+ assert (PC: U.repr u pc = pc).
+ { specialize (BMC pc). rewrite GET1 in BMC. intuition congruence. }
+ intro p. destruct (peq p pc).
+- subst p. rewrite GET1. unfold measure_cond.
+ rewrite U.repr_union_2 by auto. rewrite <- e, PC, peq_true.
+ destruct (peq (U.repr u s1) pc); auto.
+ right; repeat split.
+ + apply PTree.grs.
+ + rewrite U.repr_union_3. auto.
+ + rewrite U.repr_union_1 by congruence. auto.
+ + lia.
+ + lia.
+- assert (P: U.repr u p = p -> U.repr (U.union u pc s1) p = p).
+ { intros. rewrite U.repr_union_1 by congruence. auto. }
+ specialize (BMC p). destruct (fn_code fn)!p as [ [ | [] bb ] | ]; auto.
+ + destruct BMC as [A | (A & B)]; auto. right; split.
+ * apply U.sameclass_union_2; auto.
+ * unfold measure_cond. rewrite <- A.
+ destruct (peq (U.repr u s1) pc). auto.
+ destruct (peq (U.repr u p) pc); lia.
+ + destruct BMC as [A | (A & B & C & D & E)]; auto. right; split; [ | split; [ | split]].
+ * rewrite PTree.gro by auto. auto.
+ * apply U.sameclass_union_2; auto.
+ * apply U.sameclass_union_2; auto.
+ * unfold measure_cond. rewrite <- B, <- C.
+ destruct (peq (U.repr u s1) pc). auto.
+ destruct (peq (U.repr u p) pc); lia.
+Qed.
+
+Definition code_compat (c: code) : Prop :=
+ forall pc b, c!pc = Some b -> fn.(fn_code)!pc = Some b.
+
+Definition code_invariant (c0 c1 c2: code) : Prop :=
+ forall pc, c0!pc = None -> c1!pc = c2!pc.
+
+Lemma record_conds_1_correct:
+ forall c u f,
+ branch_map_correct_2 c u f ->
+ code_compat c ->
+ let '(c', u', _) := record_conds_1 (c, u) in
+ (code_compat c' * { f' | branch_map_correct_2 c' u' f' })%type.
+Proof.
+ intros c0 u0 f0 BMC0 COMPAT0.
+ unfold record_conds_1.
+ set (x := PTree.fold record_cond c0 (c0, u0, false)).
+ set (P := fun (cd: code) (cuc: code * U.t * bool) =>
+ (code_compat (fst (fst cuc)) *
+ code_invariant cd (fst (fst cuc)) c0 *
+ { f | branch_map_correct_2 (fst (fst cuc)) (snd (fst cuc)) f })%type).
+ assert (REC: P c0 x).
+ { unfold x; apply PTree_Properties.fold_ind.
+ - intros cd EMPTY. split; [split|]; simpl.
+ + auto.
+ + red; auto.
+ + exists f0; auto.
+ - intros cd [[c u] changed] pc b GET1 GET2 [[COMPAT INV] [f BMC]]. simpl in *.
+ split; [split|].
+ + unfold record_cond; destruct b as [ | [] b]; simpl; auto.
+ destruct (peq (U.repr u s1) (U.repr u s2)); simpl; auto.
+ red; intros. rewrite PTree.grspec in H. destruct (PTree.elt_eq pc0 pc). discriminate. auto.
+ + assert (DFL: code_invariant cd c c0).
+ { intros p GET. apply INV. rewrite PTree.gro by congruence. auto. }
+ unfold record_cond; destruct b as [ | [] b]; simpl; auto.
+ destruct (peq (U.repr u s1) (U.repr u s2)); simpl; auto.
+ intros p GET. rewrite PTree.gro by congruence. apply INV. rewrite PTree.gro by congruence. auto.
+ + assert (GET3: c!pc = Some b).
+ { rewrite <- GET2. apply INV. apply PTree.grs. }
+ assert (X: fn.(fn_code)!pc = Some b) by auto.
+ assert (Y: c!pc <> None) by congruence.
+ generalize (record_cond_correct c u changed f pc b BMC X Y).
+ destruct (record_cond (c, u, changed) pc b) as [[c1 u1] changed1]; simpl.
+ auto.
+ }
+ destruct x as [[c1 u1] changed1]; destruct REC as [[COMPAT1 INV1] BMC1]; auto.
+Qed.
+
+Definition branch_map_correct (u: U.t) (f: node -> nat): Prop :=
+ forall pc,
+ match fn.(fn_code)!pc with
+ | Some(Lbranch s :: b) =>
+ U.repr u pc = pc \/ (U.repr u pc = U.repr u s /\ f s < f pc)%nat
+ | Some(Lcond cond args s1 s2 :: b) =>
+ U.repr u pc = pc \/ (U.repr u pc = U.repr u s1 /\ U.repr u pc = U.repr u s2 /\ f s1 < f pc /\ f s2 < f pc)%nat
+ | _ =>
+ U.repr u pc = pc
+ end.
+
+Lemma record_conds_correct:
+ forall cu,
+ { f | branch_map_correct_2 (fst cu) (snd cu) f } ->
+ code_compat (fst cu) ->
+ { f | branch_map_correct (record_conds cu) f }.
+Proof.
+ intros cu0. functional induction (record_conds cu0); intros.
+- destruct cu as [c u], cu' as [c' u'], H as [f BMC].
+ generalize (record_conds_1_correct c u f BMC H0).
+ rewrite e. intros [U V]. apply IHt; auto.
+- destruct cu as [c u], H as [f BMC].
+ exists f. intros pc. specialize (BMC pc); simpl in *.
+ destruct (fn_code fn)!pc as [ [ | [] b ] | ]; tauto.
+Qed.
+
+Lemma record_gotos_correct_1:
+ { f | branch_map_correct (record_gotos fn) f }.
+Proof.
+ apply record_conds_correct; simpl.
+- destruct record_branches_correct as [f BMC].
+ exists f. intros pc. specialize (BMC pc); simpl in *.
+ destruct (fn_code fn)!pc as [ [ | [] b ] | ]; auto.
+- red; auto.
+Qed.
+
+Definition branch_target (pc: node) : node :=
+ U.repr (record_gotos fn) pc.
+
+Definition count_gotos (pc: node) : nat :=
+ proj1_sig record_gotos_correct_1 pc.
Theorem record_gotos_correct:
- forall f pc,
- match f.(fn_code)!pc with
+ forall pc,
+ match fn.(fn_code)!pc with
| Some(Lbranch s :: b) =>
- branch_target f pc = pc \/
- (branch_target f pc = branch_target f s /\ count_gotos f s < count_gotos f pc)%nat
- | _ => branch_target f pc = pc
+ branch_target pc = pc \/
+ (branch_target pc = branch_target s /\ count_gotos s < count_gotos pc)%nat
+ | Some(Lcond cond args s1 s2 :: b) =>
+ branch_target pc = pc \/
+ (branch_target pc = branch_target s1 /\ branch_target pc = branch_target s2
+ /\ count_gotos s1 < count_gotos pc /\ count_gotos s2 < count_gotos pc)%nat
+ | _ =>
+ branch_target pc = pc
end.
Proof.
- intros. unfold count_gotos. destruct (record_gotos_correct_1 f) as [m P]; simpl.
+ intros. unfold count_gotos. destruct record_gotos_correct_1 as [f P]; simpl.
apply P.
Qed.
+End BRANCH_MAP_CORRECT.
+
(** * Preservation of semantics *)
Section PRESERVATION.
@@ -187,13 +347,21 @@ Inductive match_states: state -> state -> Prop :=
(MEM: Mem.extends m tm),
match_states (Block s f sp bb ls m)
(Block ts (tunnel_function f) sp (tunneled_block f bb) tls tm)
- | match_states_interm:
+ | match_states_interm_branch:
forall s f sp pc bb ls m ts tls tm
(STK: list_forall2 match_stackframes s ts)
(LS: locmap_lessdef ls tls)
(MEM: Mem.extends m tm),
match_states (Block s f sp (Lbranch pc :: bb) ls m)
(State ts (tunnel_function f) sp (branch_target f pc) tls tm)
+ | match_states_interm_cond:
+ forall s f sp cond args pc1 pc2 bb ls m ts tls tm
+ (STK: list_forall2 match_stackframes s ts)
+ (LS: locmap_lessdef ls tls)
+ (MEM: Mem.extends m tm)
+ (SAME: branch_target f pc1 = branch_target f pc2),
+ match_states (Block s f sp (Lcond cond args pc1 pc2 :: bb) ls m)
+ (State ts (tunnel_function f) sp (branch_target f pc1) tls tm)
| match_states_call:
forall s f ls m ts tls tm
(STK: list_forall2 match_stackframes s ts)
@@ -346,6 +514,7 @@ Definition measure (st: state) : nat :=
match st with
| State s f sp pc ls m => (count_gotos f pc * 2)%nat
| Block s f sp (Lbranch pc :: _) ls m => (count_gotos f pc * 2 + 1)%nat
+ | Block s f sp (Lcond _ _ pc1 pc2 :: _) ls m => (Nat.max (count_gotos f pc1) (count_gotos f pc2) * 2 + 1)%nat
| Block s f sp bb ls m => 0%nat
| Callstate s f ls m => 0%nat
| Returnstate s ls m => 0%nat
@@ -380,10 +549,16 @@ Proof.
generalize (record_gotos_correct f pc). rewrite H.
destruct bb; auto. destruct i; auto.
++ (* Lbranch *)
intros [A | [B C]]. auto.
right. split. simpl. lia.
split. auto.
rewrite B. econstructor; eauto.
++ (* Lcond *)
+ intros [A | (B & C & D & E)]. auto.
+ right. split. simpl. lia.
+ split. auto.
+ rewrite B. econstructor; eauto. congruence.
- (* Lop *)
exploit eval_operation_lessdef. apply reglist_lessdef; eauto. eauto. eauto.
@@ -450,18 +625,24 @@ Proof.
- (* Lbranch (eliminated) *)
right; split. simpl. lia. split. auto. constructor; auto.
-- (* Lcond *)
+- (* Lcond (preserved) *)
simpl tunneled_block.
set (s1 := U.repr (record_gotos f) pc1). set (s2 := U.repr (record_gotos f) pc2).
destruct (peq s1 s2).
+ left; econstructor; split.
- eapply exec_Lbranch.
- destruct b.
-* constructor; eauto using locmap_undef_regs_lessdef_1.
-* rewrite e. constructor; eauto using locmap_undef_regs_lessdef_1.
+ eapply exec_Lbranch.
+ set (pc := if b then pc1 else pc2).
+ replace s1 with (branch_target f pc) by (unfold pc; destruct b; auto).
+ constructor; eauto using locmap_undef_regs_lessdef_1.
+ left; econstructor; split.
eapply exec_Lcond; eauto. eapply eval_condition_lessdef; eauto using reglist_lessdef.
destruct b; econstructor; eauto using locmap_undef_regs_lessdef.
+- (* Lcond (eliminated) *)
+ right; split. simpl. destruct b; lia.
+ split. auto.
+ set (pc := if b then pc1 else pc2).
+ replace (branch_target f pc1) with (branch_target f pc) by (unfold pc; destruct b; auto).
+ econstructor; eauto.
- (* Ljumptable *)
assert (tls (R arg) = Vint n).