diff options
-rw-r--r-- | backend/Cminor.v | 64 | ||||
-rw-r--r-- | backend/Selection.v | 93 | ||||
-rw-r--r-- | backend/Selectionaux.ml | 113 | ||||
-rw-r--r-- | backend/Selectionproof.v | 386 | ||||
-rw-r--r-- | driver/Clflags.ml | 2 | ||||
-rw-r--r-- | driver/Driver.ml | 10 | ||||
-rw-r--r-- | extraction/extraction.v | 1 | ||||
-rw-r--r-- | test/regression/Makefile | 2 | ||||
-rw-r--r-- | test/regression/Results/ifconv | 26 | ||||
-rw-r--r-- | test/regression/ifconv.c | 129 |
10 files changed, 751 insertions, 75 deletions
diff --git a/backend/Cminor.v b/backend/Cminor.v index 11941da3..ca01ad50 100644 --- a/backend/Cminor.v +++ b/backend/Cminor.v @@ -591,6 +591,70 @@ Proof. red; intros; inv H; simpl; try omega; eapply external_call_trace_length; eauto. Qed. +(** This semantics is determinate. *) + +Lemma eval_expr_determ: + forall ge sp e m a v, eval_expr ge sp e m a v -> + forall v', eval_expr ge sp e m a v' -> v' = v. +Proof. + induction 1; intros v' E'; inv E'. +- congruence. +- congruence. +- assert (v0 = v1) by eauto. congruence. +- assert (v0 = v1) by eauto. assert (v3 = v2) by eauto. congruence. +- assert (vaddr0 = vaddr) by eauto. congruence. +Qed. + +Lemma eval_exprlist_determ: + forall ge sp e m al vl, eval_exprlist ge sp e m al vl -> + forall vl', eval_exprlist ge sp e m al vl' -> vl' = vl. +Proof. + induction 1; intros vl' E'; inv E'. + - auto. + - f_equal; eauto using eval_expr_determ. +Qed. + +Ltac Determ := + try congruence; + match goal with + | [ |- match_traces _ E0 E0 /\ (_ -> _) ] => + split; [constructor|intros _; Determ] + | [ H: is_call_cont ?k |- _ ] => + contradiction || (clear H; Determ) + | [ H1: eval_expr _ _ _ _ ?a ?v1, H2: eval_expr _ _ _ _ ?a ?v2 |- _ ] => + assert (v1 = v2) by (eapply eval_expr_determ; eauto); + clear H1 H2; Determ + | [ H1: eval_exprlist _ _ _ _ ?a ?v1, H2: eval_exprlist _ _ _ _ ?a ?v2 |- _ ] => + assert (v1 = v2) by (eapply eval_exprlist_determ; eauto); + clear H1 H2; Determ + | _ => idtac + end. + +Lemma semantics_determinate: + forall (p: program), determinate (semantics p). +Proof. + intros. constructor; set (ge := Genv.globalenv p); simpl; intros. +- (* determ *) + inv H; inv H0; Determ. + + subst vargs0. exploit external_call_determ. eexact H2. eexact H13. + intros (A & B). split; intros; auto. + apply B in H; destruct H; congruence. + + subst v0. assert (b0 = b) by (inv H2; inv H13; auto). subst b0; auto. + + assert (n0 = n) by (inv H2; inv H14; auto). subst n0; auto. + + exploit external_call_determ. eexact H1. eexact H7. + intros (A & B). split; intros; auto. + apply B in H; destruct H; congruence. +- (* single event *) + red; simpl. destruct 1; simpl; try omega; + eapply external_call_trace_length; eauto. +- (* initial states *) + inv H; inv H0. unfold ge0, ge1 in *. congruence. +- (* nostep final state *) + red; intros; red; intros. inv H; inv H0. +- (* final states *) + inv H; inv H0; auto. +Qed. + (** * Alternate operational semantics (big-step) *) (** We now define another semantics for Cminor without [goto] that follows diff --git a/backend/Selection.v b/backend/Selection.v index 4520cb0c..4154659c 100644 --- a/backend/Selection.v +++ b/backend/Selection.v @@ -26,7 +26,7 @@ Require String. Require Import Coqlib Maps. Require Import AST Errors Integers Globalenvs Switch. Require Cminor. -Require Import Op CminorSel. +Require Import Op CminorSel Cminortyping. Require Import SelectOp SplitLong SelectLong SelectDiv. Require Machregs. @@ -43,6 +43,12 @@ Function condexpr_of_expr (e: expr) : condexpr := | _ => CEcond (Ccompuimm Cne Int.zero) (e ::: Enil) end. +Function condition_of_expr (e: expr) : condition * exprlist := + match e with + | Eop (Ocmp c) el => (c, el) + | _ => (Ccompuimm Cne Int.zero, e ::: Enil) + end. + (** Conversion of loads and stores *) Definition load (chunk: memory_chunk) (e1: expr) := @@ -173,6 +179,10 @@ Fixpoint sel_exprlist (al: list Cminor.expr) : exprlist := | a :: bl => Econs (sel_expr a) (sel_exprlist bl) end. +Definition sel_select_opt (ty: typ) (arg1 arg2 arg3: Cminor.expr) : option expr := + let (cond, args) := condition_of_expr (sel_expr arg1) in + SelectOp.select ty cond args (sel_expr arg2) (sel_expr arg3). + (** Recognition of immediate calls and calls to built-in functions that should be inlined *) @@ -267,9 +277,63 @@ Definition sel_switch_long := (fun arg ofs => subl arg (longconst (Int64.repr ofs))) lowlong. +(** "If conversion": conversion of certain if-then-else statements + into branchless conditional move instructions. *) + +(** Recognition of "then" and "else" statements that support if-conversion. + Basically we are interested in assignments to local variables [id = e]. + However the front-end may have put [skip] statements around these + assignments. *) + +Inductive stmt_class : Type := + | SCskip + | SCassign (id: ident) (a: Cminor.expr) + | SCother. + +Function classify_stmt (s: Cminor.stmt) : stmt_class := + match s with + | Cminor.Sskip => SCskip + | Cminor.Sassign id a => SCassign id a + | Cminor.Sseq Cminor.Sskip s => classify_stmt s + | Cminor.Sseq s Cminor.Sskip => classify_stmt s + | _ => SCother + end. + +(** External heuristic to limit the amount of if-conversion performed. + Arguments are: the condition, the "then" and the "else" expressions, + and the type at which selection is done. *) + +Parameter if_conversion_heuristic: + Cminor.expr -> Cminor.expr -> Cminor.expr -> AST.typ -> bool. + +Definition if_conversion_base + (ki: known_idents) (env: typenv) + (cond: Cminor.expr) (id: ident) (ifso ifnot: Cminor.expr) : option stmt := + let ty := env id in + if is_known ki id + && safe_expr ki ifso && safe_expr ki ifnot + && if_conversion_heuristic cond ifso ifnot ty + then option_map + (fun sel => Sassign id sel) + (sel_select_opt ty cond ifso ifnot) + else None. + +Definition if_conversion + (ki: known_idents) (env: typenv) + (cond: Cminor.expr) (ifso ifnot: Cminor.stmt) : option stmt := + match classify_stmt ifso, classify_stmt ifnot with + | SCskip, SCassign id a => + if_conversion_base ki env cond id (Cminor.Evar id) a + | SCassign id a, SCskip => + if_conversion_base ki env cond id a (Cminor.Evar id) + | SCassign id1 a1, SCassign id2 a2 => + if ident_eq id1 id2 then if_conversion_base ki env cond id1 a1 a2 else None + | _, _ => None + end. + (** Conversion from Cminor statements to Cminorsel statements. *) -Fixpoint sel_stmt (s: Cminor.stmt) : res stmt := +Fixpoint sel_stmt (ki: known_idents) (env: typenv) (s: Cminor.stmt) : res stmt := match s with | Cminor.Sskip => OK Sskip | Cminor.Sassign id e => OK (Sassign id (sel_expr e)) @@ -291,15 +355,19 @@ Fixpoint sel_stmt (s: Cminor.stmt) : res stmt := | _ => Stailcall sg (inl _ (sel_expr fn)) (sel_exprlist args) end) | Cminor.Sseq s1 s2 => - do s1' <- sel_stmt s1; do s2' <- sel_stmt s2; + do s1' <- sel_stmt ki env s1; do s2' <- sel_stmt ki env s2; OK (Sseq s1' s2') | Cminor.Sifthenelse e ifso ifnot => - do ifso' <- sel_stmt ifso; do ifnot' <- sel_stmt ifnot; - OK (Sifthenelse (condexpr_of_expr (sel_expr e)) ifso' ifnot') + match if_conversion ki env e ifso ifnot with + | Some s => OK s + | None => + do ifso' <- sel_stmt ki env ifso; do ifnot' <- sel_stmt ki env ifnot; + OK (Sifthenelse (condexpr_of_expr (sel_expr e)) ifso' ifnot') + end | Cminor.Sloop body => - do body' <- sel_stmt body; OK (Sloop body') + do body' <- sel_stmt ki env body; OK (Sloop body') | Cminor.Sblock body => - do body' <- sel_stmt body; OK (Sblock body') + do body' <- sel_stmt ki env body; OK (Sblock body') | Cminor.Sexit n => OK (Sexit n) | Cminor.Sswitch false e cases dfl => let t := compile_switch Int.modulus dfl cases in @@ -314,7 +382,7 @@ Fixpoint sel_stmt (s: Cminor.stmt) : res stmt := | Cminor.Sreturn None => OK (Sreturn None) | Cminor.Sreturn (Some e) => OK (Sreturn (Some (sel_expr e))) | Cminor.Slabel lbl body => - do body' <- sel_stmt body; OK (Slabel lbl body') + do body' <- sel_stmt ki env body; OK (Slabel lbl body') | Cminor.Sgoto lbl => OK (Sgoto lbl) end. @@ -322,8 +390,15 @@ End SELECTION. (** Conversion of functions. *) +Definition known_id (f: Cminor.function) : known_idents := + let add (ki: known_idents) (id: ident) := PTree.set id tt ki in + List.fold_left add f.(Cminor.fn_vars) + (List.fold_left add f.(Cminor.fn_params) (PTree.empty unit)). + Definition sel_function (dm: PTree.t globdef) (hf: helper_functions) (f: Cminor.function) : res function := - do body' <- sel_stmt dm f.(Cminor.fn_body); + let ki := known_id f in + do env <- Cminortyping.type_function f; + do body' <- sel_stmt dm ki env f.(Cminor.fn_body); OK (mkfunction f.(Cminor.fn_sig) f.(Cminor.fn_params) diff --git a/backend/Selectionaux.ml b/backend/Selectionaux.ml new file mode 100644 index 00000000..de8127c6 --- /dev/null +++ b/backend/Selectionaux.ml @@ -0,0 +1,113 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, Collège de France and Inria Paris *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +open AST +open Cminor + +(* Heuristics to guide if conversion *) + +(* Estimate a cost for evaluating a safe expression. + Unsafe operators need not be estimated. + Basic integer operations (add, and, ...) have cost 1 by convention. + The other costs are rough estimates. *) + +let cost_unop = function + | Ocast8unsigned | Ocast8signed + | Ocast16unsigned | Ocast16signed + | Onegint | Onotint -> 1 + | Onegf | Oabsf -> 1 + | Onegfs | Oabsfs -> 1 + | Osingleoffloat | Ofloatofsingle -> 2 + | Ointoffloat | Ointuoffloat + | Ofloatofint | Ofloatofintu + | Ointofsingle | Ointuofsingle + | Osingleofint | Osingleofintu -> assert false + | Onegl | Onotl -> if Archi.splitlong then 2 else 1 + | Ointoflong | Olongofint | Olongofintu -> 1 + | Olongoffloat | Olonguoffloat + | Ofloatoflong | Ofloatoflongu + | Olongofsingle | Olonguofsingle + | Osingleoflong | Osingleoflongu -> assert false + +let cost_binop = function + | Oadd | Osub -> 1 + | Omul -> 2 + | Odiv | Odivu | Omod | Omodu -> assert false + | Oand | Oor | Oxor | Oshl | Oshr | Oshru -> 1 + | Oaddf | Osubf | Omulf -> 2 + | Odivf -> 10 + | Oaddfs| Osubfs| Omulfs -> 2 + | Odivfs -> 10 + | Oaddl | Osubl -> if Archi.splitlong then 3 else 1 + | Omull -> if Archi.splitlong then 6 else 2 + | Odivl | Odivlu | Omodl | Omodlu -> assert false + | Oandl | Oorl | Oxorl -> if Archi.splitlong then 2 else 1 + | Oshll | Oshrl | Oshrlu -> if Archi.splitlong then 4 else 1 + | Ocmp _ | Ocmpu _ -> 2 + | Ocmpf _ | Ocmpfs _ -> 2 + | Ocmpl _ | Ocmplu _ -> assert false + +let rec cost_expr = function + | Evar _ -> 0 + | Econst _ -> 1 + | Eunop(op, e1) -> cost_unop op + cost_expr e1 + | Ebinop(op, e1, e2) -> cost_binop op + cost_expr e1 + cost_expr e2 + | Eload(_, e1) -> assert false + +(* Does the target architecture support an efficient "conditional move" + at the given type? *) + +let fast_cmove ty = + match Configuration.arch, Configuration.model with + | "arm", _ -> + (match ty with Tint | Tfloat | Tsingle -> true | _ -> false) + | "powerpc", "e5500" -> + (match ty with Tint -> true | Tlong -> true | _ -> false) + | "powerpc", _ -> false + | "riscV", _ -> false + | "x86", _ -> + (match ty with Tint -> true | Tlong -> Archi.ptr64 | _ -> false) + | _, _ -> + assert false + +(* The if-conversion heuristic depend on the + -fif-conversion and -ffavor-branchless flags. + +With [-fno-if-conversion] or [-0O], if-conversion is turned off entirely. +With [-ffavor-branchless], if-conversion is performed whenever semantically +correct, regardless of how much it could cost. +Otherwise (and by default), optimization is performed when it seems beneficial. + +If-conversion seems beneficial if: +- the target architecture supports an efficient "conditional move" instruction + (not an emulation that takes several instructions) +- the total cost the "then" and "else" branches is not too high +- the cost difference between the "then" and "else" branches is low enough. + +Intuition: on a modern processor, the "then" and the "else" branches +can generally be computed in parallel, there is enough ILP for that. +So, the bad case is if the most taken branch is much cheaper than the +other branch. Another bad case is if both branches are big: since the +code for one branch precedes entirely the code for the other branch, +if the first branch contains a lot of instructions, +dynamic reordering of instructions will not look ahead far enough +to execute instructions from the other branch in parallel with +instructions from the first branch. +*) + +let if_conversion_heuristic cond ifso ifnot ty = + if not !Clflags.option_fifconversion then false else + if !Clflags.option_ffavor_branchless then true else + if not (fast_cmove ty) then false else + let c1 = cost_expr ifso and c2 = cost_expr ifnot in + c1 + c2 <= 24 && abs (c1 - c2) <= 8 + diff --git a/backend/Selectionproof.v b/backend/Selectionproof.v index 621459d0..50875086 100644 --- a/backend/Selectionproof.v +++ b/backend/Selectionproof.v @@ -15,7 +15,7 @@ Require Import FunInd. Require Import Coqlib Maps. Require Import AST Linking Errors Integers Values Memory Events Globalenvs Smallstep. -Require Import Switch Cminor Op CminorSel. +Require Import Switch Cminor Op CminorSel Cminortyping. Require Import SelectOp SelectDiv SplitLong SelectLong Selection. Require Import SelectOpproof SelectDivproof SplitLongproof SelectLongproof. @@ -119,6 +119,16 @@ Let ge := Genv.globalenv prog. Let tge := Genv.globalenv tprog. Hypothesis TRANSF: match_prog prog tprog. +Lemma wt_prog : wt_program prog. +Proof. + red; intros. destruct TRANSF as [A _]. + exploit list_forall2_in_left; eauto. + intros ((i' & gd') & B & (C & D)). simpl in *. inv D. + destruct H2 as (hf & P & Q). destruct f; monadInv Q. +- monadInv EQ. econstructor; apply type_function_sound; eauto. +- constructor. +Qed. + Lemma symbols_preserved: forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s. Proof (Genv.find_symbol_match TRANSF). @@ -202,6 +212,25 @@ Proof. simpl. inv H0. auto. Qed. +Lemma eval_condition_of_expr: + forall a le v b, + eval_expr tge sp e m le a v -> + Val.bool_of_val v b -> + let (cond, al) := condition_of_expr a in + exists vl, + eval_exprlist tge sp e m le al vl + /\ eval_condition cond vl m = Some b. +Proof. + intros until a. functional induction (condition_of_expr a); intros. +(* compare *) + inv H. simpl in H6. inv H6. apply Val.bool_of_val_of_optbool in H0. + exists vl; auto. +(* default *) + exists (v :: nil); split. + econstructor. auto. constructor. + simpl. inv H0. auto. +Qed. + Lemma eval_load: forall le a v chunk v', eval_expr tge sp e m le a v -> @@ -698,6 +727,29 @@ Proof. exists (v1' :: vl'); split; auto. constructor; eauto. Qed. +Lemma sel_select_opt_correct: + forall ty cond a1 a2 a sp e m vcond v1 v2 b e' m' le, + sel_select_opt ty cond a1 a2 = Some a -> + Cminor.eval_expr ge sp e m cond vcond -> + Cminor.eval_expr ge sp e m a1 v1 -> + Cminor.eval_expr ge sp e m a2 v2 -> + Val.bool_of_val vcond b -> + env_lessdef e e' -> Mem.extends m m' -> + exists v', eval_expr tge sp e' m' le a v' /\ Val.lessdef (Val.select (Some b) v1 v2 ty) v'. +Proof. + unfold sel_select_opt; intros. + destruct (condition_of_expr (sel_expr cond)) as [cnd args] eqn:C. + exploit sel_expr_correct. eexact H0. eauto. eauto. intros (vcond' & EVC & LDC). + exploit sel_expr_correct. eexact H1. eauto. eauto. intros (v1' & EV1 & LD1). + exploit sel_expr_correct. eexact H2. eauto. eauto. intros (v2' & EV2 & LD2). + assert (Val.bool_of_val vcond' b) by (inv H3; inv LDC; constructor). + exploit eval_condition_of_expr. eexact EVC. eauto. rewrite C. intros (vargs' & EVARGS & EVCOND). + exploit eval_select; eauto. intros (v' & X & Y). + exists v'; split; eauto. + eapply Val.lessdef_trans; [|eexact Y]. + apply Val.select_lessdef; auto. +Qed. + Lemma sel_builtin_arg_correct: forall sp e e' m m' a v c, env_lessdef e e' -> Mem.extends m m' -> @@ -741,37 +793,174 @@ Proof. intros. destruct oid; simpl; auto. apply set_var_lessdef; auto. Qed. +(** If-conversion *) + +Lemma classify_stmt_sound_1: + forall f sp e m s k, + classify_stmt s = SCskip -> + star Cminor.step ge (Cminor.State f s k sp e m) E0 (Cminor.State f Cminor.Sskip k sp e m). +Proof. + intros until s; functional induction (classify_stmt s); intros; try discriminate. + - apply star_refl. + - eapply star_trans; eauto. eapply star_two. constructor. constructor. + traceEq. traceEq. + - eapply star_left. constructor. + eapply star_right. eauto. constructor. + traceEq. traceEq. +Qed. + +Lemma classify_stmt_sound_2: + forall f sp e m a id v, + Cminor.eval_expr ge sp e m a v -> + forall s k, + classify_stmt s = SCassign id a -> + star Cminor.step ge (Cminor.State f s k sp e m) E0 (Cminor.State f Cminor.Sskip k sp (PTree.set id v e) m). +Proof. + intros until s; functional induction (classify_stmt s); intros; try discriminate. + - inv H0. apply star_one. constructor; auto. + - eapply star_trans; eauto. eapply star_two. constructor. constructor. + traceEq. traceEq. + - eapply star_left. constructor. + eapply star_right. eauto. constructor. + traceEq. traceEq. +Qed. + +Lemma classify_stmt_wt: + forall env tyret id a s, + classify_stmt s = SCassign id a -> + wt_stmt env tyret s -> + wt_expr env a (env id). +Proof. + intros until s; functional induction (classify_stmt s); intros CL WT; + try discriminate. +- inv CL; inv WT; auto. +- inv WT; eauto. +- inv WT; eauto. +Qed. + +Lemma eval_select_safe_exprs: + forall a1 a2 f env ty e e' m m' sp cond vb b id s, + safe_expr (known_id f) a1 = true -> + safe_expr (known_id f) a2 = true -> + option_map (fun sel => Sassign id sel) (sel_select_opt ty cond a1 a2) = Some s -> + Cminor.eval_expr ge sp e m cond vb -> Val.bool_of_val vb b -> + wt_expr env a1 ty -> + wt_expr env a2 ty -> + def_env f e -> wt_env env e -> + Cminor.eval_expr ge sp e m cond vb -> Val.bool_of_val vb b -> + env_lessdef e e' -> Mem.extends m m' -> + exists a' v1 v2 v', + s = Sassign id a' + /\ Cminor.eval_expr ge sp e m a1 v1 + /\ Cminor.eval_expr ge sp e m a2 v2 + /\ eval_expr tge sp e' m' nil a' v' + /\ Val.lessdef (if b then v1 else v2) v'. +Proof. + intros. + destruct (sel_select_opt ty cond a1 a2) as [a'|] eqn:SSO; simpl in H1; inv H1. + destruct (eval_safe_expr ge f sp e m a1) as (v1 & EV1); auto. + destruct (eval_safe_expr ge f sp e m a2) as (v2 & EV2); auto. + assert (TY1: Val.has_type v1 ty) by (eapply wt_eval_expr; eauto). + assert (TY2: Val.has_type v2 ty) by (eapply wt_eval_expr; eauto). + exploit sel_select_opt_correct; eauto. intros (v' & EV' & LD). + exists a', v1, v2, v'; intuition eauto. + apply Val.lessdef_trans with (Val.select (Some b) v1 v2 ty). + simpl. rewrite Val.normalize_idem; auto. destruct b; auto. + assumption. +Qed. + +Lemma if_conversion_correct: + forall f env tyret cond ifso ifnot s vb b k f' k' sp e m e' m', + if_conversion (known_id f) env cond ifso ifnot = Some s -> + def_env f e -> wt_env env e -> + wt_stmt env tyret ifso -> + wt_stmt env tyret ifnot -> + Cminor.eval_expr ge sp e m cond vb -> Val.bool_of_val vb b -> + env_lessdef e e' -> Mem.extends m m' -> + let s0 := if b then ifso else ifnot in + exists e1 e1', + step tge (State f' s k' sp e' m') E0 (State f' Sskip k' sp e1' m') + /\ star Cminor.step ge (Cminor.State f s0 k sp e m) E0 (Cminor.State f Cminor.Sskip k sp e1 m) + /\ env_lessdef e1 e1'. +Proof. + unfold if_conversion; intros until m'; intros IFC DE WTE WT1 WT2 EVC BOV ELD MEXT. + set (s0 := if b then ifso else ifnot). set (ki := known_id f) in *. + destruct (classify_stmt ifso) eqn:IFSO; try discriminate; + destruct (classify_stmt ifnot) eqn:IFNOT; try discriminate; + unfold if_conversion_base in IFC. +- destruct (is_known ki id && safe_expr ki (Cminor.Evar id) && safe_expr ki a + && if_conversion_heuristic cond (Cminor.Evar id) a (env id)) eqn:B; inv IFC. + InvBooleans. + exploit (eval_select_safe_exprs (Cminor.Evar id) a); eauto. + constructor. eapply classify_stmt_wt; eauto. + intros (a' & v1 & v2 & v' & A & B & C & D & E). + exists (PTree.set id (if b then v1 else v2) e), (PTree.set id v' e'). + split. subst s. constructor; auto. + split. unfold s0; destruct b. + rewrite PTree.gsident by (inv B; auto). apply classify_stmt_sound_1; auto. + eapply classify_stmt_sound_2; eauto. + apply set_var_lessdef; auto. +- destruct (is_known ki id && safe_expr ki a && safe_expr ki (Cminor.Evar id) + && if_conversion_heuristic cond a (Cminor.Evar id) (env id)) eqn:B; inv IFC. + InvBooleans. + exploit (eval_select_safe_exprs a (Cminor.Evar id)); eauto. + eapply classify_stmt_wt; eauto. constructor. + intros (a' & v1 & v2 & v' & A & B & C & D & E). + exists (PTree.set id (if b then v1 else v2) e), (PTree.set id v' e'). + split. subst s. constructor; auto. + split. unfold s0; destruct b. + eapply classify_stmt_sound_2; eauto. + rewrite PTree.gsident by (inv C; auto). apply classify_stmt_sound_1; auto. + apply set_var_lessdef; auto. +- destruct (ident_eq id id0); try discriminate. subst id0. + destruct (is_known ki id && safe_expr ki a && safe_expr ki a0 + && if_conversion_heuristic cond a a0 (env id)) eqn:B; inv IFC. + InvBooleans. + exploit (eval_select_safe_exprs a a0); eauto. + eapply classify_stmt_wt; eauto. eapply classify_stmt_wt; eauto. + intros (a' & v1 & v2 & v' & A & B & C & D & E). + exists (PTree.set id (if b then v1 else v2) e), (PTree.set id v' e'). + split. subst s. constructor; auto. + split. unfold s0; destruct b; eapply classify_stmt_sound_2; eauto. + apply set_var_lessdef; auto. +Qed. + End EXPRESSIONS. (** Semantic preservation for functions and statements. *) -Inductive match_cont: Cminor.program -> helper_functions -> Cminor.cont -> CminorSel.cont -> Prop := - | match_cont_stop: forall cunit hf, - match_cont cunit hf Cminor.Kstop Kstop - | match_cont_seq: forall cunit hf s s' k k', - sel_stmt (prog_defmap cunit) s = OK s' -> - match_cont cunit hf k k' -> - match_cont cunit hf (Cminor.Kseq s k) (Kseq s' k') - | match_cont_block: forall cunit hf k k', - match_cont cunit hf k k' -> - match_cont cunit hf (Cminor.Kblock k) (Kblock k') - | match_cont_call: forall cunit' hf' cunit hf id f sp e k f' e' k', +Inductive match_cont: Cminor.program -> helper_functions -> known_idents -> typenv -> Cminor.cont -> CminorSel.cont -> Prop := + | match_cont_seq: forall cunit hf ki env s s' k k', + sel_stmt (prog_defmap cunit) ki env s = OK s' -> + match_cont cunit hf ki env k k' -> + match_cont cunit hf ki env (Cminor.Kseq s k) (Kseq s' k') + | match_cont_block: forall cunit hf ki env k k', + match_cont cunit hf ki env k k' -> + match_cont cunit hf ki env (Cminor.Kblock k) (Kblock k') + | match_cont_other: forall cunit hf ki env k k', + match_call_cont k k' -> + match_cont cunit hf ki env k k' + +with match_call_cont: Cminor.cont -> CminorSel.cont -> Prop := + | match_cont_stop: + match_call_cont Cminor.Kstop Kstop + | match_cont_call: forall cunit hf env id f sp e k f' e' k', linkorder cunit prog -> helper_functions_declared cunit hf -> sel_function (prog_defmap cunit) hf f = OK f' -> - match_cont cunit hf k k' -> env_lessdef e e' -> - match_cont cunit' hf' (Cminor.Kcall id f sp e k) (Kcall id f' sp e' k'). - -Definition match_call_cont (k: Cminor.cont) (k': CminorSel.cont) : Prop := - forall cunit hf, match_cont cunit hf k k'. + type_function f = OK env -> + match_cont cunit hf (known_id f) env k k' -> + env_lessdef e e' -> + match_call_cont (Cminor.Kcall id f sp e k) (Kcall id f' sp e' k'). Inductive match_states: Cminor.state -> CminorSel.state -> Prop := - | match_state: forall cunit hf f f' s k s' k' sp e m e' m' + | match_state: forall cunit hf f f' s k s' k' sp e m e' m' env (LINK: linkorder cunit prog) (HF: helper_functions_declared cunit hf) (TF: sel_function (prog_defmap cunit) hf f = OK f') - (TS: sel_stmt (prog_defmap cunit) s = OK s') - (MC: match_cont cunit hf k k') + (TYF: type_function f = OK env) + (TS: sel_stmt (prog_defmap cunit) (known_id f) env s = OK s') + (MC: match_cont cunit hf (known_id f) env k k') (LD: env_lessdef e e') (ME: Mem.extends m m'), match_states @@ -793,11 +982,12 @@ Inductive match_states: Cminor.state -> CminorSel.state -> Prop := match_states (Cminor.Returnstate v k m) (Returnstate v' k' m') - | match_builtin_1: forall cunit hf ef args args' optid f sp e k m al f' e' k' m' + | match_builtin_1: forall cunit hf ef args args' optid f sp e k m al f' e' k' m' env (LINK: linkorder cunit prog) (HF: helper_functions_declared cunit hf) (TF: sel_function (prog_defmap cunit) hf f = OK f') - (MC: match_cont cunit hf k k') + (TYF: type_function f = OK env) + (MC: match_cont cunit hf (known_id f) env k k') (LDA: Val.lessdef_list args args') (LDE: env_lessdef e e') (ME: Mem.extends m m') @@ -805,11 +995,12 @@ Inductive match_states: Cminor.state -> CminorSel.state -> Prop := match_states (Cminor.Callstate (External ef) args (Cminor.Kcall optid f sp e k) m) (State f' (Sbuiltin (sel_builtin_res optid) ef al) k' sp e' m') - | match_builtin_2: forall cunit hf v v' optid f sp e k m f' e' m' k' + | match_builtin_2: forall cunit hf v v' optid f sp e k m f' e' m' k' env (LINK: linkorder cunit prog) (HF: helper_functions_declared cunit hf) (TF: sel_function (prog_defmap cunit) hf f = OK f') - (MC: match_cont cunit hf k k') + (TYF: type_function f = OK env) + (MC: match_cont cunit hf (known_id f) env k k') (LDV: Val.lessdef v v') (LDE: env_lessdef e e') (ME: Mem.extends m m'), @@ -818,23 +1009,23 @@ Inductive match_states: Cminor.state -> CminorSel.state -> Prop := (State f' Sskip k' sp (set_builtin_res (sel_builtin_res optid) v' e') m'). Remark call_cont_commut: - forall cunit hf k k', match_cont cunit hf k k' -> match_call_cont (Cminor.call_cont k) (call_cont k'). + forall cunit hf ki env k k', + match_cont cunit hf ki env k k' -> match_call_cont (Cminor.call_cont k) (call_cont k'). Proof. - induction 1; simpl; auto; red; intros. -- constructor. -- eapply match_cont_call with (hf := hf); eauto. + induction 1; simpl; auto. inversion H; subst; auto. Qed. Remark match_is_call_cont: - forall cunit hf k k', match_cont cunit hf k k' -> Cminor.is_call_cont k -> match_call_cont k k'. + forall cunit hf ki env k k', + match_cont cunit ki env hf k k' -> Cminor.is_call_cont k -> + match_call_cont k k' /\ is_call_cont k'. Proof. - destruct 1; intros; try contradiction; red; intros. -- constructor. -- eapply match_cont_call with (hf := hf); eauto. + destruct 1; intros; try contradiction. split; auto. inv H; auto. Qed. +(* Remark match_call_cont_cont: - forall k k', match_call_cont k k' -> exists cunit hf, match_cont cunit hf k k'. + forall k k', match_call_cont k k' -> exists cunit hf ki env, match_cont cunit hf ki env k k'. Proof. intros. simple refine (let cunit : Cminor.program := _ in _). econstructor. apply nil. apply nil. apply xH. @@ -842,14 +1033,58 @@ Proof. econstructor; apply xH. exists cunit, hf; auto. Qed. +*) + +Definition nolabel (s: Cminor.stmt) : Prop := + forall lbl k, Cminor.find_label lbl s k = None. +Definition nolabel' (s: stmt) : Prop := + forall lbl k, find_label lbl s k = None. + +Lemma classify_stmt_nolabel: + forall s, classify_stmt s <> SCother -> nolabel s. +Proof. + intros s. functional induction (classify_stmt s); intros. +- red; auto. +- red; auto. +- apply IHs0 in H. red; intros; simpl. apply H. +- apply IHs0 in H. red; intros; simpl. rewrite H; auto. +- congruence. +Qed. + +Lemma if_conversion_base_nolabel: forall (hf: helper_functions) ki env a id a1 a2 s, + if_conversion_base ki env a id a1 a2 = Some s -> + nolabel' s. +Proof. + unfold if_conversion_base; intros. + destruct (is_known ki id && safe_expr ki a1 && safe_expr ki a2 && + if_conversion_heuristic a a1 a2 (env id)); try discriminate. + destruct (sel_select_opt (env id) a a1 a2); inv H. + red; auto. +Qed. + +Lemma if_conversion_nolabel: forall (hf: helper_functions) ki env a s1 s2 s, + if_conversion ki env a s1 s2 = Some s -> + nolabel s1 /\ nolabel s2 /\ nolabel' s. +Proof. + unfold if_conversion; intros. + Ltac conclude := + split; [apply classify_stmt_nolabel;congruence + |split; [apply classify_stmt_nolabel;congruence + |eapply if_conversion_base_nolabel; eauto]]. + destruct (classify_stmt s1) eqn:C1; try discriminate; + destruct (classify_stmt s2) eqn:C2; try discriminate. + conclude. + conclude. + destruct (ident_eq id id0). conclude. discriminate. +Qed. Remark find_label_commut: - forall cunit hf lbl s k s' k', - match_cont cunit hf k k' -> - sel_stmt (prog_defmap cunit) s = OK s' -> + forall cunit hf ki env lbl s k s' k', + match_cont cunit hf ki env k k' -> + sel_stmt (prog_defmap cunit) ki env s = OK s' -> match Cminor.find_label lbl s k, find_label lbl s' k' with | None, None => True - | Some(s1, k1), Some(s1', k1') => sel_stmt (prog_defmap cunit) s1 = OK s1' /\ match_cont cunit hf k1 k1' + | Some(s1, k1), Some(s1', k1') => sel_stmt (prog_defmap cunit) ki env s1 = OK s1' /\ match_cont cunit hf ki env k1 k1' | _, _ => False end. Proof. @@ -866,6 +1101,10 @@ Proof. destruct (find_label lbl x (Kseq x0 k')) as [[sy ky] | ]; intuition. apply IHs2; auto. (* ifthenelse *) + destruct (if_conversion ki env e s1 s2) as [s|] eqn:IFC. + inv SE. exploit if_conversion_nolabel; eauto. intros (A & B & C). + rewrite A, B, C. auto. + monadInv SE; simpl. exploit (IHs1 k); eauto. destruct (Cminor.find_label lbl s1 k) as [[sx kx] | ]; destruct (find_label lbl x k') as [[sy ky] | ]; @@ -895,20 +1134,22 @@ Definition measure (s: Cminor.state) : nat := Lemma sel_step_correct: forall S1 t S2, Cminor.step ge S1 t S2 -> - forall T1, match_states S1 T1 -> + forall T1, match_states S1 T1 -> wt_state S1 -> (exists T2, step tge T1 t T2 /\ match_states S2 T2) - \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 T1)%nat. + \/ (measure S2 < measure S1 /\ t = E0 /\ match_states S2 T1)%nat + \/ (exists S3 T2, star Cminor.step ge S2 E0 S3 /\ step tge T1 t T2 /\ match_states S3 T2). Proof. - induction 1; intros T1 ME; inv ME; try (monadInv TS). + induction 1; intros T1 ME WTS; inv ME; try (monadInv TS). - (* skip seq *) inv MC. left; econstructor; split. econstructor. econstructor; eauto. + inv H. - (* skip block *) inv MC. left; econstructor; split. econstructor. econstructor; eauto. + inv H. - (* skip call *) exploit Mem.free_parallel_extends; eauto. intros [m2' [A B]]. left; econstructor; split. - econstructor. inv MC; simpl in H; simpl; auto. - eauto. + econstructor. eapply match_is_call_cont; eauto. erewrite stackspace_function_translated; eauto. econstructor; eauto. eapply match_is_call_cont; eauto. - (* assign *) @@ -934,7 +1175,7 @@ Proof. econstructor; eauto. econstructor; eauto. eapply sig_function_translated; eauto. eapply match_callstate with (cunit := cunit'); eauto. - red; intros. eapply match_cont_call with (cunit := cunit) (hf := hf); eauto. + eapply match_cont_call with (cunit := cunit) (hf := hf); eauto. + (* direct *) intros [b [U V]]. exploit sel_exprlist_correct; eauto. intros [vargs' [C D]]. @@ -944,11 +1185,11 @@ Proof. subst vf. econstructor; eauto. rewrite symbols_preserved; eauto. eapply sig_function_translated; eauto. eapply match_callstate with (cunit := cunit'); eauto. - red; intros; eapply match_cont_call with (cunit := cunit) (hf := hf); eauto. + eapply match_cont_call with (cunit := cunit) (hf := hf); eauto. + (* turned into Sbuiltin *) intros EQ. subst fd. exploit sel_builtin_args_correct; eauto. intros [vargs' [C D]]. - right; split. simpl. omega. split. auto. + right; left; split. simpl. omega. split. auto. econstructor; eauto. - (* Stailcall *) exploit Mem.free_parallel_extends; eauto. intros [m2' [P Q]]. @@ -977,7 +1218,13 @@ Proof. constructor. econstructor; eauto. constructor; auto. - (* Sifthenelse *) - exploit sel_expr_correct; eauto. intros [v' [A B]]. + simpl in TS. destruct (if_conversion (known_id f) env a s1 s2) as [s|] eqn:IFC; monadInv TS. ++ inv WTS. inv WT_FN. assert (env0 = env) by congruence. subst env0. inv WT_STMT. + exploit if_conversion_correct; eauto. + set (s0 := if b then s1 else s2). intros (e1 & e1' & A & B & C). + right; right. econstructor; econstructor. + split. eexact B. split. eexact A. econstructor; eauto. ++ exploit sel_expr_correct; eauto. intros [v' [A B]]. assert (Val.bool_of_val v' b). inv B. auto. inv H0. left; exists (State f' (if b then x else x0) k' sp e' m'); split. econstructor; eauto. eapply eval_condexpr_of_expr; eauto. @@ -989,10 +1236,13 @@ Proof. left; econstructor; split. constructor. econstructor; eauto. constructor; auto. - (* Sexit seq *) inv MC. left; econstructor; split. constructor. econstructor; eauto. + inv H. - (* Sexit0 block *) inv MC. left; econstructor; split. constructor. econstructor; eauto. + inv H. - (* SexitS block *) inv MC. left; econstructor; split. constructor. econstructor; eauto. + inv H. - (* Sswitch *) inv H0; simpl in TS. + set (ct := compile_switch Int.modulus default cases) in *. @@ -1023,10 +1273,10 @@ Proof. - (* Slabel *) left; econstructor; split. constructor. econstructor; eauto. - (* Sgoto *) - assert (sel_stmt (prog_defmap cunit) (Cminor.fn_body f) = OK (fn_body f')). - { monadInv TF; simpl; auto. } - exploit (find_label_commut cunit hf lbl (Cminor.fn_body f) (Cminor.call_cont k)). - eapply call_cont_commut; eauto. eauto. + assert (sel_stmt (prog_defmap cunit) (known_id f) env (Cminor.fn_body f) = OK (fn_body f')). + { monadInv TF; simpl. congruence. } + exploit (find_label_commut cunit hf (known_id f) env lbl (Cminor.fn_body f) (Cminor.call_cont k)). + apply match_cont_other. eapply call_cont_commut; eauto. eauto. rewrite H. destruct (find_label lbl (fn_body f') (call_cont k'0)) as [[s'' k'']|] eqn:?; intros; try contradiction. @@ -1035,13 +1285,15 @@ Proof. econstructor; eauto. econstructor; eauto. - (* internal function *) - destruct TF as (hf & HF & TF). specialize (MC cunit hf). + destruct TF as (hf & HF & TF). monadInv TF. generalize EQ; intros TF; monadInv TF. exploit Mem.alloc_extends. eauto. eauto. apply Z.le_refl. apply Z.le_refl. intros [m2' [A B]]. left; econstructor; split. econstructor; simpl; eauto. - econstructor; simpl; eauto. apply set_locals_lessdef. apply set_params_lessdef; auto. + econstructor; simpl; eauto. + apply match_cont_other; auto. + apply set_locals_lessdef. apply set_params_lessdef; auto. - (* external call *) destruct TF as (hf & HF & TF). monadInv TF. @@ -1057,13 +1309,12 @@ Proof. econstructor. eauto. eapply external_call_symbols_preserved; eauto. apply senv_preserved. econstructor; eauto. - (* return *) - apply match_call_cont_cont in MC. destruct MC as (cunit0 & hf0 & MC). inv MC. left; econstructor; split. econstructor. econstructor; eauto. destruct optid; simpl; auto. apply set_var_lessdef; auto. - (* return of an external call turned into a Sbuiltin *) - right; split. simpl; omega. split. auto. econstructor; eauto. + right; left; split. simpl; omega. split. auto. econstructor; eauto. apply sel_builtin_res_correct; auto. Qed. @@ -1079,26 +1330,35 @@ Proof. rewrite (match_program_main TRANSF). fold tge. rewrite symbols_preserved. eauto. eexact A. rewrite <- H2. eapply sig_function_translated; eauto. - econstructor; eauto. red; intros; constructor. apply Mem.extends_refl. + econstructor; eauto. constructor. apply Mem.extends_refl. Qed. Lemma sel_final_states: forall S R r, match_states S R -> Cminor.final_state S r -> final_state R r. Proof. - intros. inv H0. inv H. - apply match_call_cont_cont in MC. destruct MC as (cunit0 & hf0 & MC). - inv MC. inv LD. constructor. + intros. inv H0. inv H. inv MC. inv LD. constructor. Qed. Theorem transf_program_correct: forward_simulation (Cminor.semantics prog) (CminorSel.semantics tprog). Proof. - apply forward_simulation_opt with (match_states := match_states) (measure := measure). - apply senv_preserved. - apply sel_initial_states; auto. - apply sel_final_states; auto. - apply sel_step_correct; auto. + set (MS := fun S T => match_states S T /\ wt_state S). + apply forward_simulation_determ_star with (match_states := MS) (measure := measure). +- apply Cminor.semantics_determinate. +- apply senv_preserved. +- intros. exploit sel_initial_states; eauto. intros (T & P & Q). + exists T; split; auto; split; auto. eapply wt_initial_state. eexact wt_prog. auto. +- intros. destruct H. eapply sel_final_states; eauto. +- intros S1 t S2 A T1 [B C]. + assert (wt_state S2) by (eapply subject_reduction; eauto using wt_prog). + unfold MS. + exploit sel_step_correct; eauto. + intros [(T2 & D & E) | [(D & E & F) | (S3 & T2 & D & E & F)]]. ++ exists S2, T2. intuition auto using star_refl, plus_one. ++ subst t. exists S2, T1. intuition auto using star_refl. ++ assert (wt_state S3) by (eapply subject_reduction_star; eauto using wt_prog). + exists S3, T2. intuition auto using plus_one. Qed. End PRESERVATION. diff --git a/driver/Clflags.ml b/driver/Clflags.ml index fc12863d..d27871ef 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -27,6 +27,8 @@ let option_ftailcalls = ref true let option_fconstprop = ref true let option_fcse = ref true let option_fredundancy = ref true +let option_fifconversion = ref true +let option_ffavor_branchless = ref false let option_falignfunctions = ref (None: int option) let option_falignbranchtargets = ref 0 let option_faligncondbranchs = ref 0 diff --git a/driver/Driver.ml b/driver/Driver.ml index 50f14d13..84392ef6 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -196,6 +196,9 @@ Processing options: -finline Perform inlining of functions [on] -finline-functions-called-once Integrate functions only required by their single caller [on] + -fif-conversion Perform if-conversion (generation of conditional moves) [on] + -ffavor-branchless Favor the generation of branch-free instruction sequences, + even when possibly more costly than the default [off] Code generation options: (use -fno-<opt> to turn off -f<opt>) -ffpu Use FP registers for some integer operations [on] -fsmall-data <n> Set maximal size <n> for allocation in small data area @@ -250,7 +253,8 @@ let dump_mnemonics destfile = exit 0 let optimization_options = [ - option_ftailcalls; option_fconstprop; option_fcse; option_fredundancy; option_finline_functions_called_once; + option_ftailcalls; option_fifconversion; option_fconstprop; option_fcse; + option_fredundancy; option_finline_functions_called_once; ] let set_all opts () = List.iter (fun r -> r := true) opts @@ -301,7 +305,8 @@ let cmdline_actions = Exact "-Os", Set option_Osize; Exact "-fsmall-data", Integer(fun n -> option_small_data := n); Exact "-fsmall-const", Integer(fun n -> option_small_const := n); - Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); + Exact "-ffloat-const-prop", Integer(fun n -> option_ffloatconstprop := n); + Exact "-ffavor-branchless", Set option_ffavor_branchless; Exact "-falign-functions", Integer(fun n -> check_align n; option_falignfunctions := Some n); Exact "-falign-branch-targets", Integer(fun n -> check_align n; option_falignbranchtargets := n); Exact "-falign-cond-branches", Integer(fun n -> check_align n; option_faligncondbranchs := n);] @ @@ -364,6 +369,7 @@ let cmdline_actions = (* Optimization options *) (* -f options: come in -f and -fno- variants *) @ f_opt "tailcalls" option_ftailcalls + @ f_opt "if-conversion" option_fifconversion @ f_opt "const-prop" option_fconstprop @ f_opt "cse" option_fcse @ f_opt "redundancy" option_fredundancy diff --git a/extraction/extraction.v b/extraction/extraction.v index 15a64d89..c0286f7b 100644 --- a/extraction/extraction.v +++ b/extraction/extraction.v @@ -72,6 +72,7 @@ Extract Constant Iteration.GenIter.iterate => (* Selection *) Extract Constant Selection.compile_switch => "Switchaux.compile_switch". +Extract Constant Selection.if_conversion_heuristic => "Selectionaux.if_conversion_heuristic". (* RTLgen *) Extract Constant RTLgen.more_likely => "RTLgenaux.more_likely". diff --git a/test/regression/Makefile b/test/regression/Makefile index 760ee570..e5b0655e 100644 --- a/test/regression/Makefile +++ b/test/regression/Makefile @@ -16,7 +16,7 @@ TESTS=int32 int64 floats floats-basics \ funct3 expr5 struct7 struct8 struct11 struct12 casts1 casts2 char1 \ sizeof1 sizeof2 binops bool for1 for2 switch switch2 compound \ decl1 interop1 bitfields9 ptrs3 \ - parsing krfun + parsing krfun ifconv # Can run, but only in compiled mode, and have reference output in Results diff --git a/test/regression/Results/ifconv b/test/regression/Results/ifconv new file mode 100644 index 00000000..38019fe6 --- /dev/null +++ b/test/regression/Results/ifconv @@ -0,0 +1,26 @@ +test1(0,1,12,34) = 12 +test1(1,0,45,67) = 67 +test2(0,1,12,34) = 12 +test2(1,0,45,67) = 67 +test3(0,1,12,34) = 12 +test3(1,0,45,67) = 67 +test4(0,1,12,34) = 12 +test4(1,0,45,67) = 67 +test5(0,1,12) = 13 +test5(1,0,45) = 44 +test6(NULL) = 0 +test6(&i) = 1244 +test7(1,0) = -1 +test7(-100,4) = -25 +test8(0) = 0 +test8(1) = -72 +ltest1(-1, 0, 123LL, 456LL) = 124 +ltest1(1, 0, 123LL, 456LL) = 114 +dmax(0.0, 3.14) = 3.140000 +dmax(1.0, -2.718) = 1.000000 +dabs(1.0) = 1.000000 +dabs(-2.718) = 2.718000 +smin(0.0, 3.14) = 0.000000 +smin(1.0, -2.718) = -2.718000 +sdoz(1.0, 0.5) = 0.500000 +sdoz(0.0, 3.14) = 0.000000 diff --git a/test/regression/ifconv.c b/test/regression/ifconv.c new file mode 100644 index 00000000..dcbf43e5 --- /dev/null +++ b/test/regression/ifconv.c @@ -0,0 +1,129 @@ +#include <stdio.h> + +/* Several equivalent forms that should be turned into cmov */ + +int test1(int x, int y, int a, int b) +{ + return x < y ? a : b; +} + +int test2(int x, int y, int a, int b) +{ + int r; + if (x < y) { r = a; } else { r = b; } + return r; +} + +int test3(int x, int y, int a, int b) +{ + int r = b; + if (x < y) { r = a; } + return r; +} + +int test4(int x, int y, int a, int b) +{ + int r = a; + if (x < y) { /*skip*/; } else { r = b; } + return r; +} + +/* A more advanced example */ + +int test5(int x, int y, int a) +{ + return x < y ? a + 1 : a - 1; +} + +/* Unsafe operations should not be turned into cmov */ + +int test6(int * p) +{ + return p == NULL ? 0 : *p + 10; +} + +int test7(int a, int b) +{ + return b == 0 ? -1 : a / b; +} + +/* Very large operations should not be turned into cmov */ + +int test8(int a) +{ + return a == 0 ? 0 : a*a*a*a - 2*a*a*a + 10*a*a + 42*a - 123; +} + +/* Some examples with 64-bit integers */ + +long long ltest1(int x, int y, long long a, long long b) +{ + return x < y ? a + 1 : b >> 2; +} + +/* Some examples with floating-point */ + +double dmax(double x, double y) +{ + return x >= y ? x : y; +} + +double dabs(double x) +{ + return x < 0.0 ? -x : x; +} + +float smin(float x, float y) +{ + return x <= y ? x : y; +} + +float sdoz(float x, float y) +{ + return x >= y ? x - y : 0.0f; +} + +/* Test harness */ + +#define TESTI(call) printf(#call " = %d\n", call) +#define TESTL(call) printf(#call " = %lld\n", call) +#define TESTF(call) printf(#call " = %f\n", call) + + +int main() +{ + int i = 1234; + TESTI(test1(0,1,12,34)); + TESTI(test1(1,0,45,67)); + TESTI(test2(0,1,12,34)); + TESTI(test2(1,0,45,67)); + TESTI(test3(0,1,12,34)); + TESTI(test3(1,0,45,67)); + TESTI(test4(0,1,12,34)); + TESTI(test4(1,0,45,67)); + TESTI(test5(0,1,12)); + TESTI(test5(1,0,45)); + TESTI(test6(NULL)); + TESTI(test6(&i)); + TESTI(test7(1,0)); + TESTI(test7(-100,4)); + TESTI(test8(0)); + TESTI(test8(1)); + + TESTL(ltest1(-1, 0, 123LL, 456LL)); + TESTL(ltest1(1, 0, 123LL, 456LL)); + + TESTF(dmax(0.0, 3.14)); + TESTF(dmax(1.0, -2.718)); + + TESTF(dabs(1.0)); + TESTF(dabs(-2.718)); + + TESTF(smin(0.0, 3.14)); + TESTF(smin(1.0, -2.718)); + + TESTF(sdoz(1.0, 0.5)); + TESTF(sdoz(0.0, 3.14)); + + return 0; +} |