aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Leroy <xavier.leroy@college-de-france.fr>2019-05-31 19:15:19 +0200
committerXavier Leroy <xavierleroy@users.noreply.github.com>2019-06-06 10:58:13 +0200
commit8e3a73448c5ddfa4be3871d7f4fd80281a7549f4 (patch)
tree1d2faaa30db1d4c8d7e236f7ddfbd145d778e6e3
parent0025c4db576dbc174946b7adb83d4e0b81ce4b5f (diff)
downloadcompcert-8e3a73448c5ddfa4be3871d7f4fd80281a7549f4.tar.gz
compcert-8e3a73448c5ddfa4be3871d7f4fd80281a7549f4.zip
If-conversion optimization
Extends the instruction selection pass with an if-conversion optimization: some if/then/else statements are converted into "select" operations, which in turn can be compiled down to branchless instruction sequences if the target architecture supports them. The statements that are converted are of the form if (cond) { x = a1; } else { x = a2; } if (cond) { x = a1; } if (cond) { /*skip*/; } else { x = a2; } where a1, a2 are "safe" expressions, containing no operations that can fail at run-time, such as memory loads or integer divisions. A heuristic in backend/Selectionaux.ml controls when the optimization occurs, depending on command-line flags and the complexity of the "then" and "else" branches.
-rw-r--r--backend/Cminor.v64
-rw-r--r--backend/Selection.v93
-rw-r--r--backend/Selectionaux.ml113
-rw-r--r--backend/Selectionproof.v386
-rw-r--r--driver/Clflags.ml2
-rw-r--r--driver/Driver.ml10
-rw-r--r--extraction/extraction.v1
-rw-r--r--test/regression/Makefile2
-rw-r--r--test/regression/Results/ifconv26
-rw-r--r--test/regression/ifconv.c129
10 files changed, 751 insertions, 75 deletions
diff --git a/backend/Cminor.v b/backend/Cminor.v
index 11941da3..ca01ad50 100644
--- a/backend/Cminor.v
+++ b/backend/Cminor.v
@@ -591,6 +591,70 @@ Proof.
red; intros; inv H; simpl; try omega; eapply external_call_trace_length; eauto.
Qed.
+(** This semantics is determinate. *)
+
+Lemma eval_expr_determ:
+ forall ge sp e m a v, eval_expr ge sp e m a v ->
+ forall v', eval_expr ge sp e m a v' -> v' = v.
+Proof.
+ induction 1; intros v' E'; inv E'.
+- congruence.
+- congruence.
+- assert (v0 = v1) by eauto. congruence.
+- assert (v0 = v1) by eauto. assert (v3 = v2) by eauto. congruence.
+- assert (vaddr0 = vaddr) by eauto. congruence.
+Qed.
+
+Lemma eval_exprlist_determ:
+ forall ge sp e m al vl, eval_exprlist ge sp e m al vl ->
+ forall vl', eval_exprlist ge sp e m al vl' -> vl' = vl.
+Proof.
+ induction 1; intros vl' E'; inv E'.
+ - auto.
+ - f_equal; eauto using eval_expr_determ.
+Qed.
+
+Ltac Determ :=
+ try congruence;
+ match goal with
+ | [ |- match_traces _ E0 E0 /\ (_ -> _) ] =>
+ split; [constructor|intros _; Determ]
+ | [ H: is_call_cont ?k |- _ ] =>
+ contradiction || (clear H; Determ)
+ | [ H1: eval_expr _ _ _ _ ?a ?v1, H2: eval_expr _ _ _ _ ?a ?v2 |- _ ] =>
+ assert (v1 = v2) by (eapply eval_expr_determ; eauto);
+ clear H1 H2; Determ
+ | [ H1: eval_exprlist _ _ _ _ ?a ?v1, H2: eval_exprlist _ _ _ _ ?a ?v2 |- _ ] =>
+ assert (v1 = v2) by (eapply eval_exprlist_determ; eauto);
+ clear H1 H2; Determ
+ | _ => idtac
+ end.
+
+Lemma semantics_determinate:
+ forall (p: program), determinate (semantics p).
+Proof.
+ intros. constructor; set (ge := Genv.globalenv p); simpl; intros.
+- (* determ *)
+ inv H; inv H0; Determ.
+ + subst vargs0. exploit external_call_determ. eexact H2. eexact H13.
+ intros (A & B). split; intros; auto.
+ apply B in H; destruct H; congruence.
+ + subst v0. assert (b0 = b) by (inv H2; inv H13; auto). subst b0; auto.
+ + assert (n0 = n) by (inv H2; inv H14; auto). subst n0; auto.
+ + exploit external_call_determ. eexact H1. eexact H7.
+ intros (A & B). split; intros; auto.
+ apply B in H; destruct H; congruence.
+- (* single event *)
+ red; simpl. destruct 1; simpl; try omega;
+ eapply external_call_trace_length; eauto.
+- (* initial states *)
+ inv H; inv H0. unfold ge0, ge1 in *. congruence.
+- (* nostep final state *)
+ red; intros; red; intros. inv H; inv H0.
+- (* final states *)
+ inv H; inv H0; auto.
+Qed.
+
(** * Alternate operational semantics (big-step) *)
(** We now define another semantics for Cminor without [goto] that follows
diff --git a/backend/Selection.v b/backend/Selection.v
index 4520cb0c..4154659c 100644
--- a/backend/Selection.v
+++ b/backend/Selection.v
@@ -26,7 +26,7 @@ Require String.
Require Import Coqlib Maps.
Require Import AST Errors Integers Globalenvs Switch.
Require Cminor.
-Require Import Op CminorSel.
+Require Import Op CminorSel Cminortyping.
Require Import SelectOp SplitLong SelectLong SelectDiv.
Require Machregs.
@@ -43,6 +43,12 @@ Function condexpr_of_expr (e: expr) : condexpr :=
| _ => CEcond (Ccompuimm Cne Int.zero) (e ::: Enil)
end.
+Function condition_of_expr (e: expr) : condition * exprlist :=
+ match e with
+ | Eop (Ocmp c) el => (c, el)
+ | _ => (Ccompuimm Cne Int.zero, e ::: Enil)
+ end.
+
(** Conversion of loads and stores *)
Definition load (chunk: memory_chunk) (e1: expr) :=
@@ -173,6 +179,10 @@ Fixpoint sel_exprlist (al: list Cminor.expr) : exprlist :=
| a :: bl => Econs (sel_expr a) (sel_exprlist bl)
end.
+Definition sel_select_opt (ty: typ) (arg1 arg2 arg3: Cminor.expr) : option expr :=
+ let (cond, args) := condition_of_expr (sel_expr arg1) in
+ SelectOp.select ty cond args (sel_expr arg2) (sel_expr arg3).
+
(** Recognition of immediate calls and calls to built-in functions
that should be inlined *)
@@ -267,9 +277,63 @@ Definition sel_switch_long :=
(fun arg ofs => subl arg (longconst (Int64.repr ofs)))
lowlong.
+(** "If conversion": conversion of certain if-then-else statements
+ into branchless conditional move instructions. *)
+
+(** Recognition of "then" and "else" statements that support if-conversion.
+ Basically we are interested in assignments to local variables [id = e].
+ However the front-end may have put [skip] statements around these
+ assignments. *)
+
+Inductive stmt_class : Type :=
+ | SCskip
+ | SCassign (id: ident) (a: Cminor.expr)
+ | SCother.
+
+Function classify_stmt (s: Cminor.stmt) : stmt_class :=
+ match s with
+ | Cminor.Sskip => SCskip
+ | Cminor.Sassign id a => SCassign id a
+ | Cminor.Sseq Cminor.Sskip s => classify_stmt s
+ | Cminor.Sseq s Cminor.Sskip => classify_stmt s
+ | _ => SCother
+ end.
+
+(** External heuristic to limit the amount of if-conversion performed.
+ Arguments are: the condition, the "then" and the "else" expressions,
+ and the type at which selection is done. *)
+
+Parameter if_conversion_heuristic:
+ Cminor.expr -> Cminor.expr -> Cminor.expr -> AST.typ -> bool.
+
+Definition if_conversion_base
+ (ki: known_idents) (env: typenv)
+ (cond: Cminor.expr) (id: ident) (ifso ifnot: Cminor.expr) : option stmt :=
+ let ty := env id in
+ if is_known ki id
+ && safe_expr ki ifso && safe_expr ki ifnot
+ && if_conversion_heuristic cond ifso ifnot ty
+ then option_map
+ (fun sel => Sassign id sel)
+ (sel_select_opt ty cond ifso ifnot)
+ else None.
+
+Definition if_conversion
+ (ki: known_idents) (env: typenv)
+ (cond: Cminor.expr) (ifso ifnot: Cminor.stmt) : option stmt :=
+ match classify_stmt ifso, classify_stmt ifnot with
+ | SCskip, SCassign id a =>
+ if_conversion_base ki env cond id (Cminor.Evar id) a
+ | SCassign id a, SCskip =>
+ if_conversion_base ki env cond id a (Cminor.Evar id)
+ | SCassign id1 a1, SCassign id2 a2 =>
+ if ident_eq id1 id2 then if_conversion_base ki env cond id1 a1 a2 else None
+ | _, _ => None
+ end.
+
(** Conversion from Cminor statements to Cminorsel statements. *)
-Fixpoint sel_stmt (s: Cminor.stmt) : res stmt :=
+Fixpoint sel_stmt (ki: known_idents) (env: typenv) (s: Cminor.stmt) : res stmt :=
match s with
| Cminor.Sskip => OK Sskip
| Cminor.Sassign id e => OK (Sassign id (sel_expr e))
@@ -291,15 +355,19 @@ Fixpoint sel_stmt (s: Cminor.stmt) : res stmt :=
| _ => Stailcall sg (inl _ (sel_expr fn)) (sel_exprlist args)
end)
| Cminor.Sseq s1 s2 =>
- do s1' <- sel_stmt s1; do s2' <- sel_stmt s2;
+ do s1' <- sel_stmt ki env s1; do s2' <- sel_stmt ki env s2;
OK (Sseq s1' s2')
| Cminor.Sifthenelse e ifso ifnot =>
- do ifso' <- sel_stmt ifso; do ifnot' <- sel_stmt ifnot;
- OK (Sifthenelse (condexpr_of_expr (sel_expr e)) ifso' ifnot')
+ match if_conversion ki env e ifso ifnot with
+ | Some s => OK s
+ | None =>
+ do ifso' <- sel_stmt ki env ifso; do ifnot' <- sel_stmt ki env ifnot;
+ OK (Sifthenelse (condexpr_of_expr (sel_expr e)) ifso' ifnot')
+ end
| Cminor.Sloop body =>
- do body' <- sel_stmt body; OK (Sloop body')
+ do body' <- sel_stmt ki env body; OK (Sloop body')
| Cminor.Sblock body =>
- do body' <- sel_stmt body; OK (Sblock body')
+ do body' <- sel_stmt ki env body; OK (Sblock body')
| Cminor.Sexit n => OK (Sexit n)
| Cminor.Sswitch false e cases dfl =>
let t := compile_switch Int.modulus dfl cases in
@@ -314,7 +382,7 @@ Fixpoint sel_stmt (s: Cminor.stmt) : res stmt :=
| Cminor.Sreturn None => OK (Sreturn None)
| Cminor.Sreturn (Some e) => OK (Sreturn (Some (sel_expr e)))
| Cminor.Slabel lbl body =>
- do body' <- sel_stmt body; OK (Slabel lbl body')
+ do body' <- sel_stmt ki env body; OK (Slabel lbl body')
| Cminor.Sgoto lbl => OK (Sgoto lbl)
end.
@@ -322,8 +390,15 @@ End SELECTION.
(** Conversion of functions. *)
+Definition known_id (f: Cminor.function) : known_idents :=
+ let add (ki: known_idents) (id: ident) := PTree.set id tt ki in
+ List.fold_left add f.(Cminor.fn_vars)
+ (List.fold_left add f.(Cminor.fn_params) (PTree.empty unit)).
+
Definition sel_function (dm: PTree.t globdef) (hf: helper_functions) (f: Cminor.function) : res function :=
- do body' <- sel_stmt dm f.(Cminor.fn_body);
+ let ki := known_id f in
+ do env <- Cminortyping.type_function f;
+ do body' <- sel_stmt dm ki env f.(Cminor.fn_body);
OK (mkfunction
f.(Cminor.fn_sig)
f.(Cminor.fn_params)
diff --git a/backend/Selectionaux.ml b/backend/Selectionaux.ml
new file mode 100644
index 00000000..de8127c6
--- /dev/null
+++ b/backend/Selectionaux.ml
@@ -0,0 +1,113 @@
+(* *********************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Xavier Leroy, Collège de France and Inria Paris *)
+(* *)
+(* Copyright Institut National de Recherche en Informatique et en *)
+(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the INRIA Non-Commercial License Agreement. *)
+(* *)
+(* *********************************************************************)
+
+open AST
+open Cminor
+
+(* Heuristics to guide if conversion *)
+
+(* Estimate a cost for evaluating a safe expression.
+ Unsafe operators need not be estimated.
+ Basic integer operations (add, and, ...) have cost 1 by convention.
+ The other costs are rough estimates. *)
+
+let cost_unop = function
+ | Ocast8unsigned | Ocast8signed
+ | Ocast16unsigned | Ocast16signed
+ | Onegint | Onotint -> 1
+ | Onegf | Oabsf -> 1
+ | Onegfs | Oabsfs -> 1
+ | Osingleoffloat | Ofloatofsingle -> 2
+ | Ointoffloat | Ointuoffloat
+ | Ofloatofint | Ofloatofintu
+ | Ointofsingle | Ointuofsingle
+ | Osingleofint | Osingleofintu -> assert false
+ | Onegl | Onotl -> if Archi.splitlong then 2 else 1
+ | Ointoflong | Olongofint | Olongofintu -> 1
+ | Olongoffloat | Olonguoffloat
+ | Ofloatoflong | Ofloatoflongu
+ | Olongofsingle | Olonguofsingle
+ | Osingleoflong | Osingleoflongu -> assert false
+
+let cost_binop = function
+ | Oadd | Osub -> 1
+ | Omul -> 2
+ | Odiv | Odivu | Omod | Omodu -> assert false
+ | Oand | Oor | Oxor | Oshl | Oshr | Oshru -> 1
+ | Oaddf | Osubf | Omulf -> 2
+ | Odivf -> 10
+ | Oaddfs| Osubfs| Omulfs -> 2
+ | Odivfs -> 10
+ | Oaddl | Osubl -> if Archi.splitlong then 3 else 1
+ | Omull -> if Archi.splitlong then 6 else 2
+ | Odivl | Odivlu | Omodl | Omodlu -> assert false
+ | Oandl | Oorl | Oxorl -> if Archi.splitlong then 2 else 1
+ | Oshll | Oshrl | Oshrlu -> if Archi.splitlong then 4 else 1
+ | Ocmp _ | Ocmpu _ -> 2
+ | Ocmpf _ | Ocmpfs _ -> 2
+ | Ocmpl _ | Ocmplu _ -> assert false
+
+let rec cost_expr = function
+ | Evar _ -> 0
+ | Econst _ -> 1
+ | Eunop(op, e1) -> cost_unop op + cost_expr e1
+ | Ebinop(op, e1, e2) -> cost_binop op + cost_expr e1 + cost_expr e2
+ | Eload(_, e1) -> assert false
+
+(* Does the target architecture support an efficient "conditional move"
+ at the given type? *)
+
+let fast_cmove ty =
+ match Configuration.arch, Configuration.model with
+ | "arm", _ ->
+ (match ty with Tint | Tfloat | Tsingle -> true | _ -> false)
+ | "powerpc", "e5500" ->
+ (match ty with Tint -> true | Tlong -> true | _ -> false)
+ | "powerpc", _ -> false
+ | "riscV", _ -> false
+ | "x86", _ ->
+ (match ty with Tint -> true | Tlong -> Archi.ptr64 | _ -> false)
+ | _, _ ->
+ assert false
+
+(* The if-conversion heuristic depend on the
+ -fif-conversion and -ffavor-branchless flags.
+
+With [-fno-if-conversion] or [-0O], if-conversion is turned off entirely.
+With [-ffavor-branchless], if-conversion is performed whenever semantically
+correct, regardless of how much it could cost.
+Otherwise (and by default), optimization is performed when it seems beneficial.
+
+If-conversion seems beneficial if:
+- the target architecture supports an efficient "conditional move" instruction
+ (not an emulation that takes several instructions)
+- the total cost the "then" and "else" branches is not too high
+- the cost difference between the "then" and "else" branches is low enough.
+
+Intuition: on a modern processor, the "then" and the "else" branches
+can generally be computed in parallel, there is enough ILP for that.
+So, the bad case is if the most taken branch is much cheaper than the
+other branch. Another bad case is if both branches are big: since the
+code for one branch precedes entirely the code for the other branch,
+if the first branch contains a lot of instructions,
+dynamic reordering of instructions will not look ahead far enough
+to execute instructions from the other branch in parallel with
+instructions from the first branch.
+*)
+
+let if_conversion_heuristic cond ifso ifnot ty =
+ if not !Clflags.option_fifconversion then false else
+ if !Clflags.option_ffavor_branchless then true else
+ if not (fast_cmove ty) then false else
+ let c1 = cost_expr ifso and c2 = cost_expr ifnot in
+ c1 + c2 <= 24 && abs (c1 - c2) <= 8
+
diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v
index 621459d0..50875086 100644
--- a/backend/Selectionproof.v
+++ b/backend/Selectionproof.v
@@ -15,7 +15,7 @@
Require Import FunInd.
Require Import Coqlib Maps.
Require Import AST Linking Errors Integers Values Memory Events Globalenvs Smallstep.
-Require Import Switch Cminor Op CminorSel.
+Require Import Switch Cminor Op CminorSel Cminortyping.
Require Import SelectOp SelectDiv SplitLong SelectLong Selection.
Require Import SelectOpproof SelectDivproof SplitLongproof SelectLongproof.
@@ -119,6 +119,16 @@ Let ge := Genv.globalenv prog.
Let tge := Genv.globalenv tprog.
Hypothesis TRANSF: match_prog prog tprog.
+Lemma wt_prog : wt_program prog.
+Proof.
+ red; intros. destruct TRANSF as [A _].
+ exploit list_forall2_in_left; eauto.
+ intros ((i' & gd') & B & (C & D)). simpl in *. inv D.
+ destruct H2 as (hf & P & Q). destruct f; monadInv Q.
+- monadInv EQ. econstructor; apply type_function_sound; eauto.
+- constructor.
+Qed.
+
Lemma symbols_preserved:
forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
Proof (Genv.find_symbol_match TRANSF).
@@ -202,6 +212,25 @@ Proof.
simpl. inv H0. auto.
Qed.
+Lemma eval_condition_of_expr:
+ forall a le v b,
+ eval_expr tge sp e m le a v ->
+ Val.bool_of_val v b ->
+ let (cond, al) := condition_of_expr a in
+ exists vl,
+ eval_exprlist tge sp e m le al vl
+ /\ eval_condition cond vl m = Some b.
+Proof.
+ intros until a. functional induction (condition_of_expr a); intros.
+(* compare *)
+ inv H. simpl in H6. inv H6. apply Val.bool_of_val_of_optbool in H0.
+ exists vl; auto.
+(* default *)
+ exists (v :: nil); split.
+ econstructor. auto. constructor.
+ simpl. inv H0. auto.
+Qed.
+
Lemma eval_load:
forall le a v chunk v',
eval_expr tge sp e m le a v ->
@@ -698,6 +727,29 @@ Proof.
exists (v1' :: vl'); split; auto. constructor; eauto.
Qed.
+Lemma sel_select_opt_correct:
+ forall ty cond a1 a2 a sp e m vcond v1 v2 b e' m' le,
+ sel_select_opt ty cond a1 a2 = Some a ->
+ Cminor.eval_expr ge sp e m cond vcond ->
+ Cminor.eval_expr ge sp e m a1 v1 ->
+ Cminor.eval_expr ge sp e m a2 v2 ->
+ Val.bool_of_val vcond b ->
+ env_lessdef e e' -> Mem.extends m m' ->
+ exists v', eval_expr tge sp e' m' le a v' /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v'.
+Proof.
+ unfold sel_select_opt; intros.
+ destruct (condition_of_expr (sel_expr cond)) as [cnd args] eqn:C.
+ exploit sel_expr_correct. eexact H0. eauto. eauto. intros (vcond' & EVC & LDC).
+ exploit sel_expr_correct. eexact H1. eauto. eauto. intros (v1' & EV1 & LD1).
+ exploit sel_expr_correct. eexact H2. eauto. eauto. intros (v2' & EV2 & LD2).
+ assert (Val.bool_of_val vcond' b) by (inv H3; inv LDC; constructor).
+ exploit eval_condition_of_expr. eexact EVC. eauto. rewrite C. intros (vargs' & EVARGS & EVCOND).
+ exploit eval_select; eauto. intros (v' & X & Y).
+ exists v'; split; eauto.
+ eapply Val.lessdef_trans; [|eexact Y].
+ apply Val.select_lessdef; auto.
+Qed.
+
Lemma sel_builtin_arg_correct:
forall sp e e' m m' a v c,
env_lessdef e e' -> Mem.extends m m' ->
@@ -741,37 +793,174 @@ Proof.
intros. destruct oid; simpl; auto. apply set_var_lessdef; auto.
Qed.
+(** If-conversion *)
+
+Lemma classify_stmt_sound_1:
+ forall f sp e m s k,
+ classify_stmt s = SCskip ->
+ star Cminor.step ge (Cminor.State f s k sp e m) E0 (Cminor.State f Cminor.Sskip k sp e m).
+Proof.
+ intros until s; functional induction (classify_stmt s); intros; try discriminate.
+ - apply star_refl.
+ - eapply star_trans; eauto. eapply star_two. constructor. constructor.
+ traceEq. traceEq.
+ - eapply star_left. constructor.
+ eapply star_right. eauto. constructor.
+ traceEq. traceEq.
+Qed.
+
+Lemma classify_stmt_sound_2:
+ forall f sp e m a id v,
+ Cminor.eval_expr ge sp e m a v ->
+ forall s k,
+ classify_stmt s = SCassign id a ->
+ star Cminor.step ge (Cminor.State f s k sp e m) E0 (Cminor.State f Cminor.Sskip k sp (PTree.set id v e) m).
+Proof.
+ intros until s; functional induction (classify_stmt s); intros; try discriminate.
+ - inv H0. apply star_one. constructor; auto.
+ - eapply star_trans; eauto. eapply star_two. constructor. constructor.
+ traceEq. traceEq.
+ - eapply star_left. constructor.
+ eapply star_right. eauto. constructor.
+ traceEq. traceEq.
+Qed.
+
+Lemma classify_stmt_wt:
+ forall env tyret id a s,
+ classify_stmt s = SCassign id a ->
+ wt_stmt env tyret s ->
+ wt_expr env a (env id).
+Proof.
+ intros until s; functional induction (classify_stmt s); intros CL WT;
+ try discriminate.
+- inv CL; inv WT; auto.
+- inv WT; eauto.
+- inv WT; eauto.
+Qed.
+
+Lemma eval_select_safe_exprs:
+ forall a1 a2 f env ty e e' m m' sp cond vb b id s,
+ safe_expr (known_id f) a1 = true ->
+ safe_expr (known_id f) a2 = true ->
+ option_map (fun sel => Sassign id sel) (sel_select_opt ty cond a1 a2) = Some s ->
+ Cminor.eval_expr ge sp e m cond vb -> Val.bool_of_val vb b ->
+ wt_expr env a1 ty ->
+ wt_expr env a2 ty ->
+ def_env f e -> wt_env env e ->
+ Cminor.eval_expr ge sp e m cond vb -> Val.bool_of_val vb b ->
+ env_lessdef e e' -> Mem.extends m m' ->
+ exists a' v1 v2 v',
+ s = Sassign id a'
+ /\ Cminor.eval_expr ge sp e m a1 v1
+ /\ Cminor.eval_expr ge sp e m a2 v2
+ /\ eval_expr tge sp e' m' nil a' v'
+ /\ Val.lessdef (if b then v1 else v2) v'.
+Proof.
+ intros.
+ destruct (sel_select_opt ty cond a1 a2) as [a'|] eqn:SSO; simpl in H1; inv H1.
+ destruct (eval_safe_expr ge f sp e m a1) as (v1 & EV1); auto.
+ destruct (eval_safe_expr ge f sp e m a2) as (v2 & EV2); auto.
+ assert (TY1: Val.has_type v1 ty) by (eapply wt_eval_expr; eauto).
+ assert (TY2: Val.has_type v2 ty) by (eapply wt_eval_expr; eauto).
+ exploit sel_select_opt_correct; eauto. intros (v' & EV' & LD).
+ exists a', v1, v2, v'; intuition eauto.
+ apply Val.lessdef_trans with (Val.select (Some b) v1 v2 ty).
+ simpl. rewrite Val.normalize_idem; auto. destruct b; auto.
+ assumption.
+Qed.
+
+Lemma if_conversion_correct:
+ forall f env tyret cond ifso ifnot s vb b k f' k' sp e m e' m',
+ if_conversion (known_id f) env cond ifso ifnot = Some s ->
+ def_env f e -> wt_env env e ->
+ wt_stmt env tyret ifso ->
+ wt_stmt env tyret ifnot ->
+ Cminor.eval_expr ge sp e m cond vb -> Val.bool_of_val vb b ->
+ env_lessdef e e' -> Mem.extends m m' ->
+ let s0 := if b then ifso else ifnot in
+ exists e1 e1',
+ step tge (State f' s k' sp e' m') E0 (State f' Sskip k' sp e1' m')
+ /\ star Cminor.step ge (Cminor.State f s0 k sp e m) E0 (Cminor.State f Cminor.Sskip k sp e1 m)
+ /\ env_lessdef e1 e1'.
+Proof.
+ unfold if_conversion; intros until m'; intros IFC DE WTE WT1 WT2 EVC BOV ELD MEXT.
+ set (s0 := if b then ifso else ifnot). set (ki := known_id f) in *.
+ destruct (classify_stmt ifso) eqn:IFSO; try discriminate;
+ destruct (classify_stmt ifnot) eqn:IFNOT; try discriminate;
+ unfold if_conversion_base in IFC.
+- destruct (is_known ki id && safe_expr ki (Cminor.Evar id) && safe_expr ki a
+ && if_conversion_heuristic cond (Cminor.Evar id) a (env id)) eqn:B; inv IFC.
+ InvBooleans.
+ exploit (eval_select_safe_exprs (Cminor.Evar id) a); eauto.
+ constructor. eapply classify_stmt_wt; eauto.
+ intros (a' & v1 & v2 & v' & A & B & C & D & E).
+ exists (PTree.set id (if b then v1 else v2) e), (PTree.set id v' e').
+ split. subst s. constructor; auto.
+ split. unfold s0; destruct b.
+ rewrite PTree.gsident by (inv B; auto). apply classify_stmt_sound_1; auto.
+ eapply classify_stmt_sound_2; eauto.
+ apply set_var_lessdef; auto.
+- destruct (is_known ki id && safe_expr ki a && safe_expr ki (Cminor.Evar id)
+ && if_conversion_heuristic cond a (Cminor.Evar id) (env id)) eqn:B; inv IFC.
+ InvBooleans.
+ exploit (eval_select_safe_exprs a (Cminor.Evar id)); eauto.
+ eapply classify_stmt_wt; eauto. constructor.
+ intros (a' & v1 & v2 & v' & A & B & C & D & E).
+ exists (PTree.set id (if b then v1 else v2) e), (PTree.set id v' e').
+ split. subst s. constructor; auto.
+ split. unfold s0; destruct b.
+ eapply classify_stmt_sound_2; eauto.
+ rewrite PTree.gsident by (inv C; auto). apply classify_stmt_sound_1; auto.
+ apply set_var_lessdef; auto.
+- destruct (ident_eq id id0); try discriminate. subst id0.
+ destruct (is_known ki id && safe_expr ki a && safe_expr ki a0
+ && if_conversion_heuristic cond a a0 (env id)) eqn:B; inv IFC.
+ InvBooleans.
+ exploit (eval_select_safe_exprs a a0); eauto.
+ eapply classify_stmt_wt; eauto. eapply classify_stmt_wt; eauto.
+ intros (a' & v1 & v2 & v' & A & B & C & D & E).
+ exists (PTree.set id (if b then v1 else v2) e), (PTree.set id v' e').
+ split. subst s. constructor; auto.
+ split. unfold s0; destruct b; eapply classify_stmt_sound_2; eauto.
+ apply set_var_lessdef; auto.
+Qed.
+
End EXPRESSIONS.
(** Semantic preservation for functions and statements. *)
-Inductive match_cont: Cminor.program -> helper_functions -> Cminor.cont -> CminorSel.cont -> Prop :=
- | match_cont_stop: forall cunit hf,
- match_cont cunit hf Cminor.Kstop Kstop
- | match_cont_seq: forall cunit hf s s' k k',
- sel_stmt (prog_defmap cunit) s = OK s' ->
- match_cont cunit hf k k' ->
- match_cont cunit hf (Cminor.Kseq s k) (Kseq s' k')
- | match_cont_block: forall cunit hf k k',
- match_cont cunit hf k k' ->
- match_cont cunit hf (Cminor.Kblock k) (Kblock k')
- | match_cont_call: forall cunit' hf' cunit hf id f sp e k f' e' k',
+Inductive match_cont: Cminor.program -> helper_functions -> known_idents -> typenv -> Cminor.cont -> CminorSel.cont -> Prop :=
+ | match_cont_seq: forall cunit hf ki env s s' k k',
+ sel_stmt (prog_defmap cunit) ki env s = OK s' ->
+ match_cont cunit hf ki env k k' ->
+ match_cont cunit hf ki env (Cminor.Kseq s k) (Kseq s' k')
+ | match_cont_block: forall cunit hf ki env k k',
+ match_cont cunit hf ki env k k' ->
+ match_cont cunit hf ki env (Cminor.Kblock k) (Kblock k')
+ | match_cont_other: forall cunit hf ki env k k',
+ match_call_cont k k' ->
+ match_cont cunit hf ki env k k'
+
+with match_call_cont: Cminor.cont -> CminorSel.cont -> Prop :=
+ | match_cont_stop:
+ match_call_cont Cminor.Kstop Kstop
+ | match_cont_call: forall cunit hf env id f sp e k f' e' k',
linkorder cunit prog ->
helper_functions_declared cunit hf ->
sel_function (prog_defmap cunit) hf f = OK f' ->
- match_cont cunit hf k k' -> env_lessdef e e' ->
- match_cont cunit' hf' (Cminor.Kcall id f sp e k) (Kcall id f' sp e' k').
-
-Definition match_call_cont (k: Cminor.cont) (k': CminorSel.cont) : Prop :=
- forall cunit hf, match_cont cunit hf k k'.
+ type_function f = OK env ->
+ match_cont cunit hf (known_id f) env k k' ->
+ env_lessdef e e' ->
+ match_call_cont (Cminor.Kcall id f sp e k) (Kcall id f' sp e' k').
Inductive match_states: Cminor.state -> CminorSel.state -> Prop :=
- | match_state: forall cunit hf f f' s k s' k' sp e m e' m'
+ | match_state: forall cunit hf f f' s k s' k' sp e m e' m' env
(LINK: linkorder cunit prog)
(HF: helper_functions_declared cunit hf)
(TF: sel_function (prog_defmap cunit) hf f = OK f')
- (TS: sel_stmt (prog_defmap cunit) s = OK s')
- (MC: match_cont cunit hf k k')
+ (TYF: type_function f = OK env)
+ (TS: sel_stmt (prog_defmap cunit) (known_id f) env s = OK s')
+ (MC: match_cont cunit hf (known_id f) env k k')
(LD: env_lessdef e e')
(ME: Mem.extends m m'),
match_states
@@ -793,11 +982,12 @@ Inductive match_states: Cminor.state -> CminorSel.state -> Prop :=
match_states
(Cminor.Returnstate v k m)
(Returnstate v' k' m')
- | match_builtin_1: forall cunit hf ef args args' optid f sp e k m al f' e' k' m'
+ | match_builtin_1: forall cunit hf ef args args' optid f sp e k m al f' e' k' m' env
(LINK: linkorder cunit prog)
(HF: helper_functions_declared cunit hf)
(TF: sel_function (prog_defmap cunit) hf f = OK f')
- (MC: match_cont cunit hf k k')
+ (TYF: type_function f = OK env)
+ (MC: match_cont cunit hf (known_id f) env k k')
(LDA: Val.lessdef_list args args')
(LDE: env_lessdef e e')
(ME: Mem.extends m m')
@@ -805,11 +995,12 @@ Inductive match_states: Cminor.state -> CminorSel.state -> Prop :=
match_states
(Cminor.Callstate (External ef) args (Cminor.Kcall optid f sp e k) m)
(State f' (Sbuiltin (sel_builtin_res optid) ef al) k' sp e' m')
- | match_builtin_2: forall cunit hf v v' optid f sp e k m f' e' m' k'
+ | match_builtin_2: forall cunit hf v v' optid f sp e k m f' e' m' k' env
(LINK: linkorder cunit prog)
(HF: helper_functions_declared cunit hf)
(TF: sel_function (prog_defmap cunit) hf f = OK f')
- (MC: match_cont cunit hf k k')
+ (TYF: type_function f = OK env)
+ (MC: match_cont cunit hf (known_id f) env k k')
(LDV: Val.lessdef v v')
(LDE: env_lessdef e e')
(ME: Mem.extends m m'),
@@ -818,23 +1009,23 @@ Inductive match_states: Cminor.state -> CminorSel.state -> Prop :=
(State f' Sskip k' sp (set_builtin_res (sel_builtin_res optid) v' e') m').
Remark call_cont_commut:
- forall cunit hf k k', match_cont cunit hf k k' -> match_call_cont (Cminor.call_cont k) (call_cont k').
+ forall cunit hf ki env k k',
+ match_cont cunit hf ki env k k' -> match_call_cont (Cminor.call_cont k) (call_cont k').
Proof.
- induction 1; simpl; auto; red; intros.
-- constructor.
-- eapply match_cont_call with (hf := hf); eauto.
+ induction 1; simpl; auto. inversion H; subst; auto.
Qed.
Remark match_is_call_cont:
- forall cunit hf k k', match_cont cunit hf k k' -> Cminor.is_call_cont k -> match_call_cont k k'.
+ forall cunit hf ki env k k',
+ match_cont cunit ki env hf k k' -> Cminor.is_call_cont k ->
+ match_call_cont k k' /\ is_call_cont k'.
Proof.
- destruct 1; intros; try contradiction; red; intros.
-- constructor.
-- eapply match_cont_call with (hf := hf); eauto.
+ destruct 1; intros; try contradiction. split; auto. inv H; auto.
Qed.
+(*
Remark match_call_cont_cont:
- forall k k', match_call_cont k k' -> exists cunit hf, match_cont cunit hf k k'.
+ forall k k', match_call_cont k k' -> exists cunit hf ki env, match_cont cunit hf ki env k k'.
Proof.
intros. simple refine (let cunit : Cminor.program := _ in _).
econstructor. apply nil. apply nil. apply xH.
@@ -842,14 +1033,58 @@ Proof.
econstructor; apply xH.
exists cunit, hf; auto.
Qed.
+*)
+
+Definition nolabel (s: Cminor.stmt) : Prop :=
+ forall lbl k, Cminor.find_label lbl s k = None.
+Definition nolabel' (s: stmt) : Prop :=
+ forall lbl k, find_label lbl s k = None.
+
+Lemma classify_stmt_nolabel:
+ forall s, classify_stmt s <> SCother -> nolabel s.
+Proof.
+ intros s. functional induction (classify_stmt s); intros.
+- red; auto.
+- red; auto.
+- apply IHs0 in H. red; intros; simpl. apply H.
+- apply IHs0 in H. red; intros; simpl. rewrite H; auto.
+- congruence.
+Qed.
+
+Lemma if_conversion_base_nolabel: forall (hf: helper_functions) ki env a id a1 a2 s,
+ if_conversion_base ki env a id a1 a2 = Some s ->
+ nolabel' s.
+Proof.
+ unfold if_conversion_base; intros.
+ destruct (is_known ki id && safe_expr ki a1 && safe_expr ki a2 &&
+ if_conversion_heuristic a a1 a2 (env id)); try discriminate.
+ destruct (sel_select_opt (env id) a a1 a2); inv H.
+ red; auto.
+Qed.
+
+Lemma if_conversion_nolabel: forall (hf: helper_functions) ki env a s1 s2 s,
+ if_conversion ki env a s1 s2 = Some s ->
+ nolabel s1 /\ nolabel s2 /\ nolabel' s.
+Proof.
+ unfold if_conversion; intros.
+ Ltac conclude :=
+ split; [apply classify_stmt_nolabel;congruence
+ |split; [apply classify_stmt_nolabel;congruence
+ |eapply if_conversion_base_nolabel; eauto]].
+ destruct (classify_stmt s1) eqn:C1; try discriminate;
+ destruct (classify_stmt s2) eqn:C2; try discriminate.
+ conclude.
+ conclude.
+ destruct (ident_eq id id0). conclude. discriminate.
+Qed.
Remark find_label_commut:
- forall cunit hf lbl s k s' k',
- match_cont cunit hf k k' ->
- sel_stmt (prog_defmap cunit) s = OK s' ->
+ forall cunit hf ki env lbl s k s' k',
+ match_cont cunit hf ki env k k' ->
+ sel_stmt (prog_defmap cunit) ki env s = OK s' ->
match Cminor.find_label lbl s k, find_label lbl s' k' with
| None, None => True
- | Some(s1, k1), Some(s1', k1') => sel_stmt (prog_defmap cunit) s1 = OK s1' /\ match_cont cunit hf k1 k1'
+ | Some(s1, k1), Some(s1', k1') => sel_stmt (prog_defmap cunit) ki env s1 = OK s1' /\ match_cont cunit hf ki env k1 k1'
| _, _ => False
end.
Proof.
@@ -866,6 +1101,10 @@ Proof.
destruct (find_label lbl x (Kseq x0 k')) as [[sy ky] | ];
intuition. apply IHs2; auto.
(* ifthenelse *)
+ destruct (if_conversion ki env e s1 s2) as [s|] eqn:IFC.
+ inv SE. exploit if_conversion_nolabel; eauto. intros (A & B & C).
+ rewrite A, B, C. auto.
+ monadInv SE; simpl.
exploit (IHs1 k); eauto.
destruct (Cminor.find_label lbl s1 k) as [[sx kx] | ];
destruct (find_label lbl x k') as [[sy ky] | ];
@@ -895,20 +1134,22 @@ Definition measure (s: Cminor.state) : nat :=
Lemma sel_step_correct:
forall S1 t S2, Cminor.step ge S1 t S2 ->
- forall T1, match_states S1 T1 ->
+ forall T1, match_states S1 T1 -> wt_state S1 ->
(exists T2, step tge T1 t T2 /\ match_states S2 T2)
- \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 T1)%nat.
+ \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 T1)%nat
+ \/ (exists S3 T2, star Cminor.step ge S2 E0 S3 /\ step tge T1 t T2 /\ match_states S3 T2).
Proof.
- induction 1; intros T1 ME; inv ME; try (monadInv TS).
+ induction 1; intros T1 ME WTS; inv ME; try (monadInv TS).
- (* skip seq *)
inv MC. left; econstructor; split. econstructor. econstructor; eauto.
+ inv H.
- (* skip block *)
inv MC. left; econstructor; split. econstructor. econstructor; eauto.
+ inv H.
- (* skip call *)
exploit Mem.free_parallel_extends; eauto. intros [m2' [A B]].
left; econstructor; split.
- econstructor. inv MC; simpl in H; simpl; auto.
- eauto.
+ econstructor. eapply match_is_call_cont; eauto.
erewrite stackspace_function_translated; eauto.
econstructor; eauto. eapply match_is_call_cont; eauto.
- (* assign *)
@@ -934,7 +1175,7 @@ Proof.
econstructor; eauto. econstructor; eauto.
eapply sig_function_translated; eauto.
eapply match_callstate with (cunit := cunit'); eauto.
- red; intros. eapply match_cont_call with (cunit := cunit) (hf := hf); eauto.
+ eapply match_cont_call with (cunit := cunit) (hf := hf); eauto.
+ (* direct *)
intros [b [U V]].
exploit sel_exprlist_correct; eauto. intros [vargs' [C D]].
@@ -944,11 +1185,11 @@ Proof.
subst vf. econstructor; eauto. rewrite symbols_preserved; eauto.
eapply sig_function_translated; eauto.
eapply match_callstate with (cunit := cunit'); eauto.
- red; intros; eapply match_cont_call with (cunit := cunit) (hf := hf); eauto.
+ eapply match_cont_call with (cunit := cunit) (hf := hf); eauto.
+ (* turned into Sbuiltin *)
intros EQ. subst fd.
exploit sel_builtin_args_correct; eauto. intros [vargs' [C D]].
- right; split. simpl. omega. split. auto.
+ right; left; split. simpl. omega. split. auto.
econstructor; eauto.
- (* Stailcall *)
exploit Mem.free_parallel_extends; eauto. intros [m2' [P Q]].
@@ -977,7 +1218,13 @@ Proof.
constructor.
econstructor; eauto. constructor; auto.
- (* Sifthenelse *)
- exploit sel_expr_correct; eauto. intros [v' [A B]].
+ simpl in TS. destruct (if_conversion (known_id f) env a s1 s2) as [s|] eqn:IFC; monadInv TS.
++ inv WTS. inv WT_FN. assert (env0 = env) by congruence. subst env0. inv WT_STMT.
+ exploit if_conversion_correct; eauto.
+ set (s0 := if b then s1 else s2). intros (e1 & e1' & A & B & C).
+ right; right. econstructor; econstructor.
+ split. eexact B. split. eexact A. econstructor; eauto.
++ exploit sel_expr_correct; eauto. intros [v' [A B]].
assert (Val.bool_of_val v' b). inv B. auto. inv H0.
left; exists (State f' (if b then x else x0) k' sp e' m'); split.
econstructor; eauto. eapply eval_condexpr_of_expr; eauto.
@@ -989,10 +1236,13 @@ Proof.
left; econstructor; split. constructor. econstructor; eauto. constructor; auto.
- (* Sexit seq *)
inv MC. left; econstructor; split. constructor. econstructor; eauto.
+ inv H.
- (* Sexit0 block *)
inv MC. left; econstructor; split. constructor. econstructor; eauto.
+ inv H.
- (* SexitS block *)
inv MC. left; econstructor; split. constructor. econstructor; eauto.
+ inv H.
- (* Sswitch *)
inv H0; simpl in TS.
+ set (ct := compile_switch Int.modulus default cases) in *.
@@ -1023,10 +1273,10 @@ Proof.
- (* Slabel *)
left; econstructor; split. constructor. econstructor; eauto.
- (* Sgoto *)
- assert (sel_stmt (prog_defmap cunit) (Cminor.fn_body f) = OK (fn_body f')).
- { monadInv TF; simpl; auto. }
- exploit (find_label_commut cunit hf lbl (Cminor.fn_body f) (Cminor.call_cont k)).
- eapply call_cont_commut; eauto. eauto.
+ assert (sel_stmt (prog_defmap cunit) (known_id f) env (Cminor.fn_body f) = OK (fn_body f')).
+ { monadInv TF; simpl. congruence. }
+ exploit (find_label_commut cunit hf (known_id f) env lbl (Cminor.fn_body f) (Cminor.call_cont k)).
+ apply match_cont_other. eapply call_cont_commut; eauto. eauto.
rewrite H.
destruct (find_label lbl (fn_body f') (call_cont k'0))
as [[s'' k'']|] eqn:?; intros; try contradiction.
@@ -1035,13 +1285,15 @@ Proof.
econstructor; eauto.
econstructor; eauto.
- (* internal function *)
- destruct TF as (hf & HF & TF). specialize (MC cunit hf).
+ destruct TF as (hf & HF & TF).
monadInv TF. generalize EQ; intros TF; monadInv TF.
exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl.
intros [m2' [A B]].
left; econstructor; split.
econstructor; simpl; eauto.
- econstructor; simpl; eauto. apply set_locals_lessdef. apply set_params_lessdef; auto.
+ econstructor; simpl; eauto.
+ apply match_cont_other; auto.
+ apply set_locals_lessdef. apply set_params_lessdef; auto.
- (* external call *)
destruct TF as (hf & HF & TF).
monadInv TF.
@@ -1057,13 +1309,12 @@ Proof.
econstructor. eauto. eapply external_call_symbols_preserved; eauto. apply senv_preserved.
econstructor; eauto.
- (* return *)
- apply match_call_cont_cont in MC. destruct MC as (cunit0 & hf0 & MC).
inv MC.
left; econstructor; split.
econstructor.
econstructor; eauto. destruct optid; simpl; auto. apply set_var_lessdef; auto.
- (* return of an external call turned into a Sbuiltin *)
- right; split. simpl; omega. split. auto. econstructor; eauto.
+ right; left; split. simpl; omega. split. auto. econstructor; eauto.
apply sel_builtin_res_correct; auto.
Qed.
@@ -1079,26 +1330,35 @@ Proof.
rewrite (match_program_main TRANSF). fold tge. rewrite symbols_preserved. eauto.
eexact A.
rewrite <- H2. eapply sig_function_translated; eauto.
- econstructor; eauto. red; intros; constructor. apply Mem.extends_refl.
+ econstructor; eauto. constructor. apply Mem.extends_refl.
Qed.
Lemma sel_final_states:
forall S R r,
match_states S R -> Cminor.final_state S r -> final_state R r.
Proof.
- intros. inv H0. inv H.
- apply match_call_cont_cont in MC. destruct MC as (cunit0 & hf0 & MC).
- inv MC. inv LD. constructor.
+ intros. inv H0. inv H. inv MC. inv LD. constructor.
Qed.
Theorem transf_program_correct:
forward_simulation (Cminor.semantics prog) (CminorSel.semantics tprog).
Proof.
- apply forward_simulation_opt with (match_states := match_states) (measure := measure).
- apply senv_preserved.
- apply sel_initial_states; auto.
- apply sel_final_states; auto.
- apply sel_step_correct; auto.
+ set (MS := fun S T => match_states S T /\ wt_state S).
+ apply forward_simulation_determ_star with (match_states := MS) (measure := measure).
+- apply Cminor.semantics_determinate.
+- apply senv_preserved.
+- intros. exploit sel_initial_states; eauto. intros (T & P & Q).
+ exists T; split; auto; split; auto. eapply wt_initial_state. eexact wt_prog. auto.
+- intros. destruct H. eapply sel_final_states; eauto.
+- intros S1 t S2 A T1 [B C].
+ assert (wt_state S2) by (eapply subject_reduction; eauto using wt_prog).
+ unfold MS.
+ exploit sel_step_correct; eauto.
+ intros [(T2 & D & E) | [(D & E & F) | (S3 & T2 & D & E & F)]].
++ exists S2, T2. intuition auto using star_refl, plus_one.
++ subst t. exists S2, T1. intuition auto using star_refl.
++ assert (wt_state S3) by (eapply subject_reduction_star; eauto using wt_prog).
+ exists S3, T2. intuition auto using plus_one.
Qed.
End PRESERVATION.
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index fc12863d..d27871ef 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -27,6 +27,8 @@ let option_ftailcalls = ref true
let option_fconstprop = ref true
let option_fcse = ref true
let option_fredundancy = ref true
+let option_fifconversion = ref true
+let option_ffavor_branchless = ref false
let option_falignfunctions = ref (None: int option)
let option_falignbranchtargets = ref 0
let option_faligncondbranchs = ref 0
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 50f14d13..84392ef6 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -196,6 +196,9 @@ Processing options:
-finline Perform inlining of functions [on]
-finline-functions-called-once Integrate functions only required by their
single caller [on]
+ -fif-conversion Perform if-conversion (generation of conditional moves) [on]
+ -ffavor-branchless Favor the generation of branch-free instruction sequences,
+ even when possibly more costly than the default [off]
Code generation options: (use -fno-<opt> to turn off -f<opt>)
-ffpu Use FP registers for some integer operations [on]
-fsmall-data <n> Set maximal size <n> for allocation in small data area
@@ -250,7 +253,8 @@ let dump_mnemonics destfile =
exit 0
let optimization_options = [
- option_ftailcalls; option_fconstprop; option_fcse; option_fredundancy; option_finline_functions_called_once;
+ option_ftailcalls; option_fifconversion; option_fconstprop; option_fcse;
+ option_fredundancy; option_finline_functions_called_once;
]
let set_all opts () = List.iter (fun r -> r := true) opts
@@ -301,7 +305,8 @@ let cmdline_actions =
Exact "-Os", Set option_Osize;
Exact "-fsmall-data", Integer(fun n -> option_small_data := n);
Exact "-fsmall-const", Integer(fun n -> option_small_const := n);
- Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n);
+ Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n);
+ Exact "-ffavor-branchless", Set option_ffavor_branchless;
Exact "-falign-functions", Integer(fun n -> check_align n; option_falignfunctions := Some n);
Exact "-falign-branch-targets", Integer(fun n -> check_align n; option_falignbranchtargets := n);
Exact "-falign-cond-branches", Integer(fun n -> check_align n; option_faligncondbranchs := n);] @
@@ -364,6 +369,7 @@ let cmdline_actions =
(* Optimization options *)
(* -f options: come in -f and -fno- variants *)
@ f_opt "tailcalls" option_ftailcalls
+ @ f_opt "if-conversion" option_fifconversion
@ f_opt "const-prop" option_fconstprop
@ f_opt "cse" option_fcse
@ f_opt "redundancy" option_fredundancy
diff --git a/extraction/extraction.v b/extraction/extraction.v
index 15a64d89..c0286f7b 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -72,6 +72,7 @@ Extract Constant Iteration.GenIter.iterate =>
(* Selection *)
Extract Constant Selection.compile_switch => "Switchaux.compile_switch".
+Extract Constant Selection.if_conversion_heuristic => "Selectionaux.if_conversion_heuristic".
(* RTLgen *)
Extract Constant RTLgen.more_likely => "RTLgenaux.more_likely".
diff --git a/test/regression/Makefile b/test/regression/Makefile
index 760ee570..e5b0655e 100644
--- a/test/regression/Makefile
+++ b/test/regression/Makefile
@@ -16,7 +16,7 @@ TESTS=int32 int64 floats floats-basics \
funct3 expr5 struct7 struct8 struct11 struct12 casts1 casts2 char1 \
sizeof1 sizeof2 binops bool for1 for2 switch switch2 compound \
decl1 interop1 bitfields9 ptrs3 \
- parsing krfun
+ parsing krfun ifconv
# Can run, but only in compiled mode, and have reference output in Results
diff --git a/test/regression/Results/ifconv b/test/regression/Results/ifconv
new file mode 100644
index 00000000..38019fe6
--- /dev/null
+++ b/test/regression/Results/ifconv
@@ -0,0 +1,26 @@
+test1(0,1,12,34) = 12
+test1(1,0,45,67) = 67
+test2(0,1,12,34) = 12
+test2(1,0,45,67) = 67
+test3(0,1,12,34) = 12
+test3(1,0,45,67) = 67
+test4(0,1,12,34) = 12
+test4(1,0,45,67) = 67
+test5(0,1,12) = 13
+test5(1,0,45) = 44
+test6(NULL) = 0
+test6(&i) = 1244
+test7(1,0) = -1
+test7(-100,4) = -25
+test8(0) = 0
+test8(1) = -72
+ltest1(-1, 0, 123LL, 456LL) = 124
+ltest1(1, 0, 123LL, 456LL) = 114
+dmax(0.0, 3.14) = 3.140000
+dmax(1.0, -2.718) = 1.000000
+dabs(1.0) = 1.000000
+dabs(-2.718) = 2.718000
+smin(0.0, 3.14) = 0.000000
+smin(1.0, -2.718) = -2.718000
+sdoz(1.0, 0.5) = 0.500000
+sdoz(0.0, 3.14) = 0.000000
diff --git a/test/regression/ifconv.c b/test/regression/ifconv.c
new file mode 100644
index 00000000..dcbf43e5
--- /dev/null
+++ b/test/regression/ifconv.c
@@ -0,0 +1,129 @@
+#include <stdio.h>
+
+/* Several equivalent forms that should be turned into cmov */
+
+int test1(int x, int y, int a, int b)
+{
+ return x < y ? a : b;
+}
+
+int test2(int x, int y, int a, int b)
+{
+ int r;
+ if (x < y) { r = a; } else { r = b; }
+ return r;
+}
+
+int test3(int x, int y, int a, int b)
+{
+ int r = b;
+ if (x < y) { r = a; }
+ return r;
+}
+
+int test4(int x, int y, int a, int b)
+{
+ int r = a;
+ if (x < y) { /*skip*/; } else { r = b; }
+ return r;
+}
+
+/* A more advanced example */
+
+int test5(int x, int y, int a)
+{
+ return x < y ? a + 1 : a - 1;
+}
+
+/* Unsafe operations should not be turned into cmov */
+
+int test6(int * p)
+{
+ return p == NULL ? 0 : *p + 10;
+}
+
+int test7(int a, int b)
+{
+ return b == 0 ? -1 : a / b;
+}
+
+/* Very large operations should not be turned into cmov */
+
+int test8(int a)
+{
+ return a == 0 ? 0 : a*a*a*a - 2*a*a*a + 10*a*a + 42*a - 123;
+}
+
+/* Some examples with 64-bit integers */
+
+long long ltest1(int x, int y, long long a, long long b)
+{
+ return x < y ? a + 1 : b >> 2;
+}
+
+/* Some examples with floating-point */
+
+double dmax(double x, double y)
+{
+ return x >= y ? x : y;
+}
+
+double dabs(double x)
+{
+ return x < 0.0 ? -x : x;
+}
+
+float smin(float x, float y)
+{
+ return x <= y ? x : y;
+}
+
+float sdoz(float x, float y)
+{
+ return x >= y ? x - y : 0.0f;
+}
+
+/* Test harness */
+
+#define TESTI(call) printf(#call " = %d\n", call)
+#define TESTL(call) printf(#call " = %lld\n", call)
+#define TESTF(call) printf(#call " = %f\n", call)
+
+
+int main()
+{
+ int i = 1234;
+ TESTI(test1(0,1,12,34));
+ TESTI(test1(1,0,45,67));
+ TESTI(test2(0,1,12,34));
+ TESTI(test2(1,0,45,67));
+ TESTI(test3(0,1,12,34));
+ TESTI(test3(1,0,45,67));
+ TESTI(test4(0,1,12,34));
+ TESTI(test4(1,0,45,67));
+ TESTI(test5(0,1,12));
+ TESTI(test5(1,0,45));
+ TESTI(test6(NULL));
+ TESTI(test6(&i));
+ TESTI(test7(1,0));
+ TESTI(test7(-100,4));
+ TESTI(test8(0));
+ TESTI(test8(1));
+
+ TESTL(ltest1(-1, 0, 123LL, 456LL));
+ TESTL(ltest1(1, 0, 123LL, 456LL));
+
+ TESTF(dmax(0.0, 3.14));
+ TESTF(dmax(1.0, -2.718));
+
+ TESTF(dabs(1.0));
+ TESTF(dabs(-2.718));
+
+ TESTF(smin(0.0, 3.14));
+ TESTF(smin(1.0, -2.718));
+
+ TESTF(sdoz(1.0, 0.5));
+ TESTF(sdoz(0.0, 3.14));
+
+ return 0;
+}