From 265fa07b34a813ba9d8249ddad82d71e6002c10d Mon Sep 17 00:00:00 2001 From: xleroy Date: Thu, 2 Sep 2010 12:42:19 +0000 Subject: Merge of the reuse-temps branch: - Reload temporaries are marked as destroyed (set to Vundef) across operations in the semantics of LTL, LTLin, Linear and Mach, allowing Asmgen to reuse them. - Added IA32 port. - Cleaned up float conversions and axiomatization of floats. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1499 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- backend/Allocproof.v | 32 +++++-- backend/CSE.v | 19 ++-- backend/Coloring.v | 2 +- backend/Coloringaux.ml | 93 ++++++++++++-------- backend/Coloringproof.v | 4 +- backend/LTL.v | 16 ++-- backend/LTLin.v | 12 +-- backend/Linear.v | 30 +++++-- backend/Locations.v | 23 +++++ backend/Mach.v | 16 ++++ backend/Machabstr.v | 16 ++-- backend/Machabstr2concr.v | 39 +++++++-- backend/Machconcr.v | 16 ++-- backend/Machtyping.v | 88 ++++++++++++++----- backend/RTLgen.v | 35 +++++++- backend/RTLgenproof.v | 17 +++- backend/RTLgenspec.v | 76 ++++++++++++++-- backend/RTLtypingaux.ml | 2 + backend/Reloadproof.v | 214 +++++++++++++++++++++++++++++----------------- backend/Reloadtyping.v | 3 +- backend/Selection.v | 4 +- backend/Stacking.v | 10 +-- backend/Stackingproof.v | 143 +++++++++++++++++++++---------- backend/Stackingtyping.v | 2 +- 24 files changed, 637 insertions(+), 275 deletions(-) (limited to 'backend') diff --git a/backend/Allocproof.v b/backend/Allocproof.v index d06c26fa..5a5e4c4d 100644 --- a/backend/Allocproof.v +++ b/backend/Allocproof.v @@ -235,6 +235,14 @@ Proof. rewrite H0. apply H. auto. eapply regalloc_not_temporary; eauto. Qed. +Lemma agree_undef_temps: + forall live rs ls, + agree live rs ls -> agree live rs (undef_temps ls). +Proof. + intros. apply agree_exten with ls; auto. + intros. apply Locmap.guo; auto. +Qed. + (** If a register is dead, assigning it an arbitrary value in [rs] and leaving [ls] unchanged preserves agreement. (This corresponds to an operation over a dead register in the original program @@ -603,7 +611,11 @@ Proof. (* sub-case: non-redundant move *) econstructor; split. eapply exec_Lop; eauto. simpl. eauto. MatchStates. - rewrite <- H1. eapply agree_move_live; eauto. + rewrite <- H1. set (ls1 := undef_temps ls). + replace (ls (assign arg)) with (ls1 (assign arg)). + eapply agree_move_live; eauto. + unfold ls1. eapply agree_undef_temps; eauto. + unfold ls1. simpl. apply Locmap.guo. eapply regalloc_not_temporary; eauto. (* Not a move *) intros INMO CORR CODE. assert (eval_operation tge sp op (map ls (map assign args)) = Some v). @@ -612,6 +624,7 @@ Proof. eapply agree_eval_regs; eauto. econstructor; split. eapply exec_Lop; eauto. MatchStates. apply agree_assign_live with f env live; auto. + eapply agree_undef_temps; eauto. eapply agree_reg_list_live; eauto. (* Result is not live, instruction turned into a nop *) intro CODE. econstructor; split. eapply exec_Lnop; eauto. @@ -633,6 +646,7 @@ Proof. unfold correct_alloc_instr. intro CORR. MatchStates. eapply agree_assign_live; eauto. + eapply agree_undef_temps; eauto. eapply agree_reg_list_live; eauto. (* dst is dead *) econstructor; split. @@ -650,7 +664,9 @@ Proof. econstructor; split. eapply exec_Lstore; eauto. TranslInstr. rewrite <- ESRC. eauto. - MatchStates. eapply agree_reg_live. eapply agree_reg_list_live. eauto. + MatchStates. + eapply agree_undef_temps; eauto. + eapply agree_reg_live. eapply agree_reg_list_live. eauto. (* Icall *) exploit transl_find_function; eauto. intros [tf [TFIND TF]]. @@ -695,20 +711,26 @@ Proof. eapply agree_eval_regs; eauto. econstructor; split. eapply exec_Lcond_true; eauto. TranslInstr. - MatchStates. eapply agree_reg_list_live. eauto. + MatchStates. + eapply agree_undef_temps; eauto. + eapply agree_reg_list_live. eauto. (* Icond, false *) assert (COND: eval_condition cond (map ls (map assign args)) = Some false). replace (map ls (map assign args)) with (rs##args). auto. eapply agree_eval_regs; eauto. econstructor; split. eapply exec_Lcond_false; eauto. TranslInstr. - MatchStates. eapply agree_reg_list_live. eauto. + MatchStates. + eapply agree_undef_temps; eauto. + eapply agree_reg_list_live. eauto. (* Ijumptable *) assert (rs#arg = ls (assign arg)). apply AG. apply Regset.add_1. auto. econstructor; split. eapply exec_Ljumptable; eauto. TranslInstr. congruence. - MatchStates. eapply list_nth_z_in; eauto. eapply agree_reg_live; eauto. + MatchStates. eapply list_nth_z_in; eauto. + eapply agree_undef_temps; eauto. + eapply agree_reg_live; eauto. (* Ireturn *) econstructor; split. diff --git a/backend/CSE.v b/backend/CSE.v index dab8fc31..4347c334 100644 --- a/backend/CSE.v +++ b/backend/CSE.v @@ -375,23 +375,14 @@ Definition analyze (f: RTL.function): PMap.t numbering := (** * Code transformation *) -(** Some operations are so cheap to compute that it is generally not - worth reusing their results. These operations are detected by the - function below. *) - -Definition is_trivial_op (op: operation) : bool := - match op with - | Omove => true - | Ointconst _ => true - | Oaddrsymbol _ _ => true - | Oaddrstack _ => true - | _ => false - end. - (** The code transformation is performed instruction by instruction. [Iload] instructions and non-trivial [Iop] instructions are turned into move instructions if their result is already available in a - register, as indicated by the numbering inferred at that program point. *) + register, as indicated by the numbering inferred at that program point. + + Some operations are so cheap to compute that it is generally not + worth reusing their results. These operations are detected by the + function [is_trivial_op] in module [Op]. *) Definition transf_instr (n: numbering) (instr: instruction) := match instr with diff --git a/backend/Coloring.v b/backend/Coloring.v index 28626cba..6d34e2cc 100644 --- a/backend/Coloring.v +++ b/backend/Coloring.v @@ -282,7 +282,7 @@ Definition alloc_of_coloring (coloring: reg -> loc) (env: regenv) (rs: Regset.t) fun r => if Regset.mem r rs then coloring r - else match env r with Tint => R R3 | Tfloat => R F1 end. + else match env r with Tint => R dummy_int_reg | Tfloat => R dummy_float_reg end. (** * Coloring of the interference graph *) diff --git a/backend/Coloringaux.ml b/backend/Coloringaux.ml index 922506f0..04209726 100644 --- a/backend/Coloringaux.ml +++ b/backend/Coloringaux.ml @@ -41,6 +41,7 @@ type node = typ: typ; (*r its type *) regname: reg option; (*r the RTL register it comes from *) regclass: int; (*r identifier of register class *) + mutable accesses: int; (*r number of defs and uses *) mutable spillcost: float; (*r estimated cost of spilling *) mutable adjlist: node list; (*r all nodes it interferes with *) mutable degree: int; (*r number of adjacent nodes *) @@ -109,7 +110,7 @@ module DLinkNode = struct let make state = let rec empty = { ident = 0; typ = Tint; regname = None; regclass = 0; - adjlist = []; degree = 0; spillcost = 0.0; + adjlist = []; degree = 0; accesses = 0; spillcost = 0.0; movelist = []; alias = None; color = None; nstate = state; nprev = empty; nnext = empty } in empty @@ -196,6 +197,8 @@ let num_available_registers = Array.make num_register_classes 0 let reserved_registers = ref ([]: mreg list) +let allocatable_registers = ref ([]: mreg list) + let rec remove_reserved = function | [] -> [] | hd :: tl -> @@ -204,14 +207,17 @@ let rec remove_reserved = function else hd :: remove_reserved tl let init_regs() = - caller_save_registers.(0) <- - Array.of_list (remove_reserved int_caller_save_regs); - caller_save_registers.(1) <- - Array.of_list (remove_reserved float_caller_save_regs); - callee_save_registers.(0) <- - Array.of_list (remove_reserved int_callee_save_regs); - callee_save_registers.(1) <- - Array.of_list (remove_reserved float_callee_save_regs); + let int_caller_save = remove_reserved int_caller_save_regs + and float_caller_save = remove_reserved float_caller_save_regs + and int_callee_save = remove_reserved int_callee_save_regs + and float_callee_save = remove_reserved float_callee_save_regs in + allocatable_registers := + List.flatten [int_caller_save; float_caller_save; + int_callee_save; float_callee_save]; + caller_save_registers.(0) <- Array.of_list int_caller_save; + caller_save_registers.(1) <- Array.of_list float_caller_save; + callee_save_registers.(0) <- Array.of_list int_callee_save; + callee_save_registers.(1) <- Array.of_list float_callee_save; for i = 0 to num_register_classes - 1 do num_available_registers.(i) <- Array.length caller_save_registers.(i) @@ -365,9 +371,10 @@ let checkInvariants () = let nodeOfReg r typenv spillcosts = let ty = typenv r in incr nextRegIdent; + let (acc, cost) = spillcosts r in { ident = !nextRegIdent; typ = ty; regname = Some r; regclass = class_of_type ty; - spillcost = float(spillcosts r); + accesses = acc; spillcost = float cost; adjlist = []; degree = 0; movelist = []; alias = None; color = None; nstate = Initial; @@ -378,7 +385,7 @@ let nodeOfMreg mr = incr nextRegIdent; { ident = !nextRegIdent; typ = ty; regname = None; regclass = class_of_type ty; - spillcost = 0.0; + accesses = 0; spillcost = 0.0; adjlist = []; degree = 0; movelist = []; alias = None; color = Some (R mr); nstate = Colored; @@ -426,7 +433,9 @@ let build g typenv spillcosts = g.pref_reg_reg (); SetRegMreg.fold (fun (Coq_pair(r1, mr2)) () -> - add_move (find_reg_node r1) (find_mreg_node mr2)) + let r1' = find_reg_node r1 in + if List.mem mr2 !allocatable_registers then + add_move r1' (find_mreg_node mr2)) g.pref_reg_mreg (); (* Initial partition of nodes into spill / freeze / simplify *) Hashtbl.iter @@ -568,14 +577,14 @@ let canCoalesceGeorge u v = so George's criterion is safe in this case. *) -let thresholdGeorge = 2.0 (* = 1 def + 1 use *) +let thresholdGeorge = 2 (* = 1 def + 1 use *) let canCoalesce u v = if u.nstate = Colored then canCoalesceGeorge u v else canCoalesceBriggs u v - || (v.spillcost <= thresholdGeorge && canCoalesceGeorge u v) - || (u.spillcost <= thresholdGeorge && canCoalesceGeorge v u) + || (v.accesses <= thresholdGeorge && canCoalesceGeorge u v) + || (u.accesses <= thresholdGeorge && canCoalesceGeorge v u) (* Update worklists after a move was processed *) @@ -652,7 +661,12 @@ let freeze () = (* Chaitin's cost measure *) -let spillCost n = n.spillcost /. float n.degree +let spillCost n = +(*i + Printf.printf "spillCost %s: uses = %.0f degree = %d cost = %f\n" + (name_of_node n) n.spillcost n.degree (n.spillcost /. float n.degree); +*) + n.spillcost /. float n.degree (* Spill a node *) @@ -778,35 +792,40 @@ let location_of_node n = | None -> assert false | Some loc -> loc -(* Estimate spilling costs. Currently, just count the number of accesses - to each pseudoregister. To do: take loops into account. *) +(* Estimate spilling costs and counts the number of defs and uses. + Currently, we charge 10 for each access and 1 for each move. + To do: take loops into account. *) let spill_costs f = - let costs = ref (PTree.empty : int PTree.t) in + let costs = ref (PMap.init (0,0)) in let cost r = - match PTree.get r !costs with None -> 0 | Some n -> n in - let incr r = - costs := PTree.set r (1 + cost r) !costs in - let incr_list rl = - List.iter incr rl in - let incr_ros ros = - match ros with Coq_inl r -> incr r | Coq_inr _ -> () in + PMap.get r !costs in + let charge amount r = + let (n, c) = cost r in + costs := PMap.set r (n + 1, c + amount) !costs in + let charge_list amount rl = + List.iter (charge amount) rl in + let charge_ros amount ros = + match ros with Coq_inl r -> charge amount r | Coq_inr _ -> () in let process_instr () pc i = match i with | Inop _ -> () - | Iop(op, args, res, _) -> incr_list args; incr res - | Iload(chunk, addr, args, dst, _) -> incr_list args; incr dst - | Istore(chunk, addr, args, src, _) -> incr_list args; incr src - | Icall(sg, ros, args, res, _) -> incr_ros ros; incr_list args; incr res - | Itailcall(sg, ros, args) -> incr_ros ros; incr_list args - | Ibuiltin(ef, args, res, _) -> incr_list args; incr res - | Icond(cond, args, _, _) -> incr_list args - | Ijumptable(arg, _) -> incr arg - | Ireturn(Some r) -> incr r + | Iop(Op.Omove, arg::nil, res, _) -> charge 1 arg; charge 1 res + | Iop(op, args, res, _) -> charge_list 10 args; charge 10 res + | Iload(chunk, addr, args, dst, _) -> charge_list 10 args; charge 10 dst + | Istore(chunk, addr, args, src, _) -> charge_list 10 args; charge 10 src + | Icall(sg, ros, args, res, _) -> + charge_ros 10 ros; charge_list 1 args; charge 1 res + | Itailcall(sg, ros, args) -> + charge_ros 10 ros; charge_list 1 args + | Ibuiltin(ef, args, res, _) -> charge_list 10 args; charge 10 res + | Icond(cond, args, _, _) -> charge_list 10 args + | Ijumptable(arg, _) -> charge 10 arg + | Ireturn(Some r) -> charge 1 r | Ireturn None -> () in - incr_list f.fn_params; + charge_list 1 f.fn_params; PTree.fold process_instr f.fn_code (); - (* Result is cost function reg -> integer cost *) + (* Result is cost function reg -> (num accesses, integer cost *) cost (* This is the entry point for graph coloring. *) diff --git a/backend/Coloringproof.v b/backend/Coloringproof.v index 5f035b40..bb97c87a 100644 --- a/backend/Coloringproof.v +++ b/backend/Coloringproof.v @@ -719,7 +719,9 @@ Proof. elim (andb_prop _ _ H1); intros. caseEq (Regset.mem r allregs); intro. generalize (check_coloring_3_correct _ _ _ r H3 H4). tauto. - case (env r); simpl; intuition congruence. + case (env r); simpl. + unfold dummy_int_reg. intuition congruence. + unfold dummy_float_reg. intuition congruence. Qed. Lemma alloc_of_coloring_correct_4: diff --git a/backend/LTL.v b/backend/LTL.v index e1222a52..2eff67cd 100644 --- a/backend/LTL.v +++ b/backend/LTL.v @@ -101,6 +101,10 @@ Definition postcall_locs (ls: locset) : locset := | S s => ls (S s) end. +(** Temporaries destroyed across instructions *) + +Definition undef_temps (ls: locset) := Locmap.undef temporaries ls. + (** LTL execution states. *) Inductive stackframe : Type := @@ -166,21 +170,21 @@ Inductive step: state -> trace -> state -> Prop := (fn_code f)!pc = Some(Lop op args res pc') -> eval_operation ge sp op (map rs args) = Some v -> step (State s f sp pc rs m) - E0 (State s f sp pc' (Locmap.set res v rs) m) + E0 (State s f sp pc' (Locmap.set res v (undef_temps rs)) m) | exec_Lload: forall s f sp pc rs m chunk addr args dst pc' a v, (fn_code f)!pc = Some(Lload chunk addr args dst pc') -> eval_addressing ge sp addr (map rs args) = Some a -> Mem.loadv chunk m a = Some v -> step (State s f sp pc rs m) - E0 (State s f sp pc' (Locmap.set dst v rs) m) + E0 (State s f sp pc' (Locmap.set dst v (undef_temps rs)) m) | exec_Lstore: forall s f sp pc rs m chunk addr args src pc' a m', (fn_code f)!pc = Some(Lstore chunk addr args src pc') -> eval_addressing ge sp addr (map rs args) = Some a -> Mem.storev chunk m a (rs src) = Some m' -> step (State s f sp pc rs m) - E0 (State s f sp pc' rs m') + E0 (State s f sp pc' (undef_temps rs) m') | exec_Lcall: forall s f sp pc rs m sig ros args res pc' f', (fn_code f)!pc = Some(Lcall sig ros args res pc') -> @@ -208,20 +212,20 @@ Inductive step: state -> trace -> state -> Prop := (fn_code f)!pc = Some(Lcond cond args ifso ifnot) -> eval_condition cond (map rs args) = Some true -> step (State s f sp pc rs m) - E0 (State s f sp ifso rs m) + E0 (State s f sp ifso (undef_temps rs) m) | exec_Lcond_false: forall s f sp pc rs m cond args ifso ifnot, (fn_code f)!pc = Some(Lcond cond args ifso ifnot) -> eval_condition cond (map rs args) = Some false -> step (State s f sp pc rs m) - E0 (State s f sp ifnot rs m) + E0 (State s f sp ifnot (undef_temps rs) m) | exec_Ljumptable: forall s f sp pc rs m arg tbl n pc', (fn_code f)!pc = Some(Ljumptable arg tbl) -> rs arg = Vint n -> list_nth_z tbl (Int.signed n) = Some pc' -> step (State s f sp pc rs m) - E0 (State s f sp pc' rs m) + E0 (State s f sp pc' (undef_temps rs) m) | exec_Lreturn: forall s f stk pc rs m or m', (fn_code f)!pc = Some(Lreturn or) -> diff --git a/backend/LTLin.v b/backend/LTLin.v index ee4cb943..d6c5fa71 100644 --- a/backend/LTLin.v +++ b/backend/LTLin.v @@ -160,19 +160,19 @@ Inductive step: state -> trace -> state -> Prop := forall s f sp op args res b rs m v, eval_operation ge sp op (map rs args) = Some v -> step (State s f sp (Lop op args res :: b) rs m) - E0 (State s f sp b (Locmap.set res v rs) m) + E0 (State s f sp b (Locmap.set res v (undef_temps rs)) m) | exec_Lload: forall s f sp chunk addr args dst b rs m a v, eval_addressing ge sp addr (map rs args) = Some a -> Mem.loadv chunk m a = Some v -> step (State s f sp (Lload chunk addr args dst :: b) rs m) - E0 (State s f sp b (Locmap.set dst v rs) m) + E0 (State s f sp b (Locmap.set dst v (undef_temps rs)) m) | exec_Lstore: forall s f sp chunk addr args src b rs m m' a, eval_addressing ge sp addr (map rs args) = Some a -> Mem.storev chunk m a (rs src) = Some m' -> step (State s f sp (Lstore chunk addr args src :: b) rs m) - E0 (State s f sp b rs m') + E0 (State s f sp b (undef_temps rs) m') | exec_Lcall: forall s f sp sig ros args res b rs m f', find_function ros rs = Some f' -> @@ -206,19 +206,19 @@ Inductive step: state -> trace -> state -> Prop := eval_condition cond (map rs args) = Some true -> find_label lbl f.(fn_code) = Some b' -> step (State s f sp (Lcond cond args lbl :: b) rs m) - E0 (State s f sp b' rs m) + E0 (State s f sp b' (undef_temps rs) m) | exec_Lcond_false: forall s f sp cond args lbl b rs m, eval_condition cond (map rs args) = Some false -> step (State s f sp (Lcond cond args lbl :: b) rs m) - E0 (State s f sp b rs m) + E0 (State s f sp b (undef_temps rs) m) | exec_Ljumptable: forall s f sp arg tbl b rs m n lbl b', rs arg = Vint n -> list_nth_z tbl (Int.signed n) = Some lbl -> find_label lbl f.(fn_code) = Some b' -> step (State s f sp (Ljumptable arg tbl :: b) rs m) - E0 (State s f sp b' rs m) + E0 (State s f sp b' (undef_temps rs) m) | exec_Lreturn: forall s f stk rs m or b m', Mem.free m stk 0 f.(fn_stacksize) = Some m' -> diff --git a/backend/Linear.v b/backend/Linear.v index 0f44206d..40f7e416 100644 --- a/backend/Linear.v +++ b/backend/Linear.v @@ -163,6 +163,20 @@ Definition return_regs (caller callee: locset) : locset := | S s => caller (S s) end. +(** Temporaries destroyed across operations *) + +Definition undef_op (op: operation) (rs: locset) := + match op with + | Omove => rs + | _ => undef_temps rs + end. + +Definition undef_getstack (s: slot) (rs: locset) := + match s with + | Incoming _ _ => Locmap.set (R IT1) Vundef rs + | _ => rs + end. + (** Linear execution states. *) Inductive stackframe: Type := @@ -241,7 +255,7 @@ Inductive step: state -> trace -> state -> Prop := | exec_Lgetstack: forall s f sp sl r b rs m, step (State s f sp (Lgetstack sl r :: b) rs m) - E0 (State s f sp b (Locmap.set (R r) (rs (S sl)) rs) m) + E0 (State s f sp b (Locmap.set (R r) (rs (S sl)) (undef_getstack sl rs)) m) | exec_Lsetstack: forall s f sp r sl b rs m, step (State s f sp (Lsetstack r sl :: b) rs m) @@ -250,19 +264,19 @@ Inductive step: state -> trace -> state -> Prop := forall s f sp op args res b rs m v, eval_operation ge sp op (reglist rs args) = Some v -> step (State s f sp (Lop op args res :: b) rs m) - E0 (State s f sp b (Locmap.set (R res) v rs) m) + E0 (State s f sp b (Locmap.set (R res) v (undef_op op rs)) m) | exec_Lload: forall s f sp chunk addr args dst b rs m a v, eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.loadv chunk m a = Some v -> step (State s f sp (Lload chunk addr args dst :: b) rs m) - E0 (State s f sp b (Locmap.set (R dst) v rs) m) + E0 (State s f sp b (Locmap.set (R dst) v (undef_temps rs)) m) | exec_Lstore: forall s f sp chunk addr args src b rs m m' a, eval_addressing ge sp addr (reglist rs args) = Some a -> Mem.storev chunk m a (rs (R src)) = Some m' -> step (State s f sp (Lstore chunk addr args src :: b) rs m) - E0 (State s f sp b rs m') + E0 (State s f sp b (undef_temps rs) m') | exec_Lcall: forall s f sp sig ros b rs m f', find_function ros rs = Some f' -> @@ -280,7 +294,7 @@ Inductive step: state -> trace -> state -> Prop := forall s f sp rs m ef args res b t v m', external_call ef ge (reglist rs args) m t v m' -> step (State s f sp (Lbuiltin ef args res :: b) rs m) - t (State s f sp b (Locmap.set (R res) v rs) m') + t (State s f sp b (Locmap.set (R res) v (undef_temps rs)) m') | exec_Llabel: forall s f sp lbl b rs m, step (State s f sp (Llabel lbl :: b) rs m) @@ -295,19 +309,19 @@ Inductive step: state -> trace -> state -> Prop := eval_condition cond (reglist rs args) = Some true -> find_label lbl f.(fn_code) = Some b' -> step (State s f sp (Lcond cond args lbl :: b) rs m) - E0 (State s f sp b' rs m) + E0 (State s f sp b' (undef_temps rs) m) | exec_Lcond_false: forall s f sp cond args lbl b rs m, eval_condition cond (reglist rs args) = Some false -> step (State s f sp (Lcond cond args lbl :: b) rs m) - E0 (State s f sp b rs m) + E0 (State s f sp b (undef_temps rs) m) | exec_Ljumptable: forall s f sp arg tbl b rs m n lbl b', rs (R arg) = Vint n -> list_nth_z tbl (Int.signed n) = Some lbl -> find_label lbl f.(fn_code) = Some b' -> step (State s f sp (Ljumptable arg tbl :: b) rs m) - E0 (State s f sp b' rs m) + E0 (State s f sp b' (undef_temps rs) m) | exec_Lreturn: forall s f stk b rs m m', Mem.free m stk 0 f.(fn_stacksize) = Some m' -> diff --git a/backend/Locations.v b/backend/Locations.v index c2fda9c2..1270e1d3 100644 --- a/backend/Locations.v +++ b/backend/Locations.v @@ -423,4 +423,27 @@ Module Locmap. auto. Qed. + Fixpoint undef (ll: list loc) (m: t) {struct ll} : t := + match ll with + | nil => m + | l1 :: ll' => undef ll' (set l1 Vundef m) + end. + + Lemma guo: forall ll l m, Loc.notin l ll -> (undef ll m) l = m l. + Proof. + induction ll; simpl; intros. auto. + destruct H. rewrite IHll; auto. apply gso. apply Loc.diff_sym; auto. + Qed. + + Lemma gus: forall ll l m, In l ll -> (undef ll m) l = Vundef. + Proof. + assert (P: forall ll l m, m l = Vundef -> (undef ll m) l = Vundef). + induction ll; simpl; intros. auto. apply IHll. + unfold set. destruct (Loc.eq a l); auto. + destruct (Loc.overlap a l); auto. + induction ll; simpl; intros. contradiction. + destruct H. apply P. subst a. apply gss. + auto. + Qed. + End Locmap. diff --git a/backend/Mach.v b/backend/Mach.v index 2ec312e4..c6a692a1 100644 --- a/backend/Mach.v +++ b/backend/Mach.v @@ -27,6 +27,7 @@ Require Import Events. Require Import Globalenvs. Require Import Op. Require Import Locations. +Require Import Conventions. (** * Abstract syntax *) @@ -101,6 +102,21 @@ Definition regset := Regmap.t val. Notation "a ## b" := (List.map a b) (at level 1). Notation "a # b <- c" := (Regmap.set b c a) (at level 1, b at next level). +Fixpoint undef_regs (rl: list mreg) (rs: regset) {struct rl} : regset := + match rl with + | nil => rs + | r1 :: rl' => undef_regs rl' (Regmap.set r1 Vundef rs) + end. + +Definition undef_temps (rs: regset) := + undef_regs (int_temporaries ++ float_temporaries) rs. + +Definition undef_op (op: operation) (rs: regset) := + match op with + | Omove => rs + | _ => undef_temps rs + end. + Definition is_label (lbl: label) (instr: instruction) : bool := match instr with | Mlabel lbl' => if peq lbl lbl' then true else false diff --git a/backend/Machabstr.v b/backend/Machabstr.v index 291a4682..23ca895f 100644 --- a/backend/Machabstr.v +++ b/backend/Machabstr.v @@ -238,24 +238,24 @@ Inductive step: state -> trace -> state -> Prop := forall s f sp ofs ty dst c rs fr m v, get_slot (parent_function s) (parent_frame s) ty (Int.signed ofs) v -> step (State s f sp (Mgetparam ofs ty dst :: c) rs fr m) - E0 (State s f sp c (rs#dst <- v) fr m) + E0 (State s f sp c (rs # IT1 <- Vundef # dst <- v) fr m) | exec_Mop: forall s f sp op args res c rs fr m v, eval_operation ge sp op rs##args = Some v -> step (State s f sp (Mop op args res :: c) rs fr m) - E0 (State s f sp c (rs#res <- v) fr m) + E0 (State s f sp c ((undef_op op rs)#res <- v) fr m) | exec_Mload: forall s f sp chunk addr args dst c rs fr m a v, eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = Some v -> step (State s f sp (Mload chunk addr args dst :: c) rs fr m) - E0 (State s f sp c (rs#dst <- v) fr m) + E0 (State s f sp c ((undef_temps rs)#dst <- v) fr m) | exec_Mstore: forall s f sp chunk addr args src c rs fr m m' a, eval_addressing ge sp addr rs##args = Some a -> Mem.storev chunk m a (rs src) = Some m' -> step (State s f sp (Mstore chunk addr args src :: c) rs fr m) - E0 (State s f sp c rs fr m') + E0 (State s f sp c (undef_temps rs) fr m') | exec_Mcall: forall s f sp sig ros c rs fr m f', find_function ros rs = Some f' -> @@ -271,7 +271,7 @@ Inductive step: state -> trace -> state -> Prop := forall s f sp rs fr m ef args res b t v m', external_call ef ge rs##args m t v m' -> step (State s f sp (Mbuiltin ef args res :: b) rs fr m) - t (State s f sp b (rs#res <- v) fr m') + t (State s f sp b ((undef_temps rs)#res <- v) fr m') | exec_Mgoto: forall s f sp lbl c rs fr m c', find_label lbl f.(fn_code) = Some c' -> @@ -282,19 +282,19 @@ Inductive step: state -> trace -> state -> Prop := eval_condition cond rs##args = Some true -> find_label lbl f.(fn_code) = Some c' -> step (State s f sp (Mcond cond args lbl :: c) rs fr m) - E0 (State s f sp c' rs fr m) + E0 (State s f sp c' (undef_temps rs) fr m) | exec_Mcond_false: forall s f sp cond args lbl c rs fr m, eval_condition cond rs##args = Some false -> step (State s f sp (Mcond cond args lbl :: c) rs fr m) - E0 (State s f sp c rs fr m) + E0 (State s f sp c (undef_temps rs) fr m) | exec_Mjumptable: forall s f sp arg tbl c rs fr m n lbl c', rs arg = Vint n -> list_nth_z tbl (Int.signed n) = Some lbl -> find_label lbl f.(fn_code) = Some c' -> step (State s f sp (Mjumptable arg tbl :: c) rs fr m) - E0 (State s f sp c' rs fr m) + E0 (State s f sp c' (undef_temps rs) fr m) | exec_Mreturn: forall s f stk soff c rs fr m m', Mem.free m stk 0 f.(fn_stacksize) = Some m' -> diff --git a/backend/Machabstr2concr.v b/backend/Machabstr2concr.v index 1a97dda5..fa7f5803 100644 --- a/backend/Machabstr2concr.v +++ b/backend/Machabstr2concr.v @@ -618,6 +618,23 @@ Proof. destruct (RegEq.eq r0 r); auto. Qed. +Lemma regset_lessdef_undef_temps: + forall rs1 rs2, + regset_lessdef rs1 rs2 -> regset_lessdef (undef_temps rs1) (undef_temps rs2). +Proof. + unfold undef_temps. + generalize (int_temporaries ++ float_temporaries). + induction l; simpl; intros. auto. apply IHl. apply regset_lessdef_set; auto. +Qed. + +Lemma regset_lessdef_undef_op: + forall op rs1 rs2, + regset_lessdef rs1 rs2 -> regset_lessdef (undef_op op rs1) (undef_op op rs2). +Proof. + intros. set (D := regset_lessdef_undef_temps _ _ H). + destruct op; simpl; auto. +Qed. + Lemma regset_lessdef_find_function_ptr: forall ge ros rs1 rs2 fb, find_function_ptr ge ros rs1 = Some fb -> @@ -965,40 +982,44 @@ Proof. (* Mgetparam *) assert (WTF: wt_function f) by (inv WTS; auto). exploit match_stacks_get_parent; eauto. intros [v' [A B]]. - exists (State ts fb (Vptr sp0 base) c (trs#dst <- v') ms); split. + exists (State ts fb (Vptr sp0 base) c (trs # IT1 <- Vundef # dst <- v') ms); split. eapply exec_Mgetparam; eauto. eapply frame_match_load_link; eauto. eapply match_stacks_parent_sp_pointer; eauto. - econstructor; eauto with coqlib. apply regset_lessdef_set; eauto. + econstructor; eauto with coqlib. + apply regset_lessdef_set; eauto. apply regset_lessdef_set; eauto. (* Mop *) exploit eval_operation_lessdef. 2: eauto. eapply regset_lessdef_list; eauto. intros [v' [A B]]. - exists (State ts fb (Vptr sp0 base) c (trs#res <- v') ms); split. + exists (State ts fb (Vptr sp0 base) c ((undef_op op trs)#res <- v') ms); split. apply exec_Mop; auto. econstructor; eauto with coqlib. apply regset_lessdef_set; eauto. + apply regset_lessdef_undef_op; auto. (* Mload *) exploit eval_addressing_lessdef. 2: eauto. eapply regset_lessdef_list; eauto. intros [a' [A B]]. exploit Mem.loadv_extends. eauto. eauto. eexact B. intros [v' [C D]]. - exists (State ts fb (Vptr sp0 base) c (trs#dst <- v') ms); split. + exists (State ts fb (Vptr sp0 base) c ((undef_temps trs)#dst <- v') ms); split. eapply exec_Mload; eauto. econstructor; eauto with coqlib. apply regset_lessdef_set; eauto. + apply regset_lessdef_undef_temps; auto. (* Mstore *) exploit eval_addressing_lessdef. 2: eauto. eapply regset_lessdef_list; eauto. intros [a' [A B]]. exploit Mem.storev_extends. eauto. eauto. eexact B. apply RLD. intros [ms' [C D]]. - exists (State ts fb (Vptr sp0 base) c trs ms'); split. + exists (State ts fb (Vptr sp0 base) c (undef_temps trs) ms'); split. eapply exec_Mstore; eauto. destruct a; simpl in H0; try congruence. inv B. simpl in C. econstructor; eauto with coqlib. eapply match_stacks_store. eauto. eexact H0. eexact C. eapply frame_match_store; eauto. + apply regset_lessdef_undef_temps; auto. (* Mcall *) exploit find_function_find_function_ptr; eauto. @@ -1032,7 +1053,7 @@ Proof. econstructor; eauto with coqlib. eapply match_stacks_external_call; eauto. eapply frame_match_external_call; eauto. - apply regset_lessdef_set; eauto. + apply regset_lessdef_set; eauto. apply regset_lessdef_undef_temps; auto. (* Mgoto *) econstructor; split. @@ -1043,17 +1064,17 @@ Proof. econstructor; split. eapply exec_Mcond_true; eauto. eapply eval_condition_lessdef; eauto. apply regset_lessdef_list; auto. - econstructor; eauto. + econstructor; eauto. apply regset_lessdef_undef_temps; auto. econstructor; split. eapply exec_Mcond_false; eauto. eapply eval_condition_lessdef; eauto. apply regset_lessdef_list; auto. - econstructor; eauto. + econstructor; eauto. apply regset_lessdef_undef_temps; auto. (* Mjumptable *) econstructor; split. eapply exec_Mjumptable; eauto. generalize (RLD arg); intro LD. rewrite H in LD. inv LD. auto. - econstructor; eauto. + econstructor; eauto. apply regset_lessdef_undef_temps; auto. (* Mreturn *) assert (WTF: wt_function f) by (inv WTS; auto). diff --git a/backend/Machconcr.v b/backend/Machconcr.v index b736c8f7..5a98dd95 100644 --- a/backend/Machconcr.v +++ b/backend/Machconcr.v @@ -152,24 +152,24 @@ Inductive step: state -> trace -> state -> Prop := load_stack m sp Tint f.(fn_link_ofs) = Some parent -> load_stack m parent ty ofs = Some v -> step (State s fb sp (Mgetparam ofs ty dst :: c) rs m) - E0 (State s fb sp c (rs#dst <- v) m) + E0 (State s fb sp c (rs # IT1 <- Vundef # dst <- v) m) | exec_Mop: forall s f sp op args res c rs m v, eval_operation ge sp op rs##args = Some v -> step (State s f sp (Mop op args res :: c) rs m) - E0 (State s f sp c (rs#res <- v) m) + E0 (State s f sp c ((undef_op op rs)#res <- v) m) | exec_Mload: forall s f sp chunk addr args dst c rs m a v, eval_addressing ge sp addr rs##args = Some a -> Mem.loadv chunk m a = Some v -> step (State s f sp (Mload chunk addr args dst :: c) rs m) - E0 (State s f sp c (rs#dst <- v) m) + E0 (State s f sp c ((undef_temps rs)#dst <- v) m) | exec_Mstore: forall s f sp chunk addr args src c rs m m' a, eval_addressing ge sp addr rs##args = Some a -> Mem.storev chunk m a (rs src) = Some m' -> step (State s f sp (Mstore chunk addr args src :: c) rs m) - E0 (State s f sp c rs m') + E0 (State s f sp c (undef_temps rs) m') | exec_Mcall: forall s fb sp sig ros c rs m f f' ra, find_function_ptr ge ros rs = Some f' -> @@ -191,7 +191,7 @@ Inductive step: state -> trace -> state -> Prop := forall s f sp rs m ef args res b t v m', external_call ef ge rs##args m t v m' -> step (State s f sp (Mbuiltin ef args res :: b) rs m) - t (State s f sp b (rs#res <- v) m') + t (State s f sp b ((undef_temps rs)#res <- v) m') | exec_Mgoto: forall s fb f sp lbl c rs m c', Genv.find_funct_ptr ge fb = Some (Internal f) -> @@ -204,12 +204,12 @@ Inductive step: state -> trace -> state -> Prop := Genv.find_funct_ptr ge fb = Some (Internal f) -> find_label lbl f.(fn_code) = Some c' -> step (State s fb sp (Mcond cond args lbl :: c) rs m) - E0 (State s fb sp c' rs m) + E0 (State s fb sp c' (undef_temps rs) m) | exec_Mcond_false: forall s f sp cond args lbl c rs m, eval_condition cond rs##args = Some false -> step (State s f sp (Mcond cond args lbl :: c) rs m) - E0 (State s f sp c rs m) + E0 (State s f sp c (undef_temps rs) m) | exec_Mjumptable: forall s fb f sp arg tbl c rs m n lbl c', rs arg = Vint n -> @@ -217,7 +217,7 @@ Inductive step: state -> trace -> state -> Prop := Genv.find_funct_ptr ge fb = Some (Internal f) -> find_label lbl f.(fn_code) = Some c' -> step (State s fb sp (Mjumptable arg tbl :: c) rs m) - E0 (State s fb sp c' rs m) + E0 (State s fb sp c' (undef_temps rs) m) | exec_Mreturn: forall s fb stk soff c rs m f m', Genv.find_funct_ptr ge fb = Some (Internal f) -> diff --git a/backend/Machtyping.v b/backend/Machtyping.v index 7013e296..93ac00cd 100644 --- a/backend/Machtyping.v +++ b/backend/Machtyping.v @@ -156,6 +156,30 @@ Proof. apply H0. Qed. +Lemma wt_undef_temps: + forall rs, wt_regset rs -> wt_regset (undef_temps rs). +Proof. + unfold undef_temps. + generalize (int_temporaries ++ float_temporaries). + induction l; simpl; intros. auto. + apply IHl. red; intros. unfold Regmap.set. + destruct (RegEq.eq r a). constructor. auto. +Qed. + +Lemma wt_undef_op: + forall op rs, wt_regset rs -> wt_regset (undef_op op rs). +Proof. + intros. set (W := wt_undef_temps rs H). + destruct op; simpl; auto. +Qed. + +Lemma wt_undef_getparam: + forall rs, wt_regset rs -> wt_regset (rs#IT1 <- Vundef). +Proof. + intros; red; intros. unfold Regmap.set. + destruct (RegEq.eq r IT1). constructor. auto. +Qed. + Lemma wt_get_slot: forall f fr ty ofs v, get_slot f fr ty ofs v -> @@ -237,31 +261,41 @@ Lemma subject_reduction: forall (WTS: wt_state s1), wt_state s2. Proof. induction 1; intros; inv WTS; - try (generalize (wt_function_instrs _ WTF _ (is_tail_in TAIL)); intro; + try (generalize (wt_function_instrs _ WTF _ (is_tail_in TAIL)); intro WTI; eapply wt_state_intro; eauto with coqlib). - apply wt_setreg; auto. - inversion H0. rewrite H2. eapply wt_get_slot; eauto. + apply wt_setreg; auto. inv WTI. eapply wt_get_slot; eauto. - inversion H0. eapply wt_set_slot; eauto. - rewrite <- H2. apply WTRS. + eapply wt_set_slot; eauto. inv WTI; auto. + assert (mreg_type dst = ty). + inv WTI; auto. assert (wt_frame (parent_frame s)). destruct s; simpl. apply wt_empty_frame. generalize (STK s (in_eq _ _)); intro. inv H1. auto. - inversion H0. apply wt_setreg; auto. - rewrite H3. eapply wt_get_slot; eauto. - - apply wt_setreg; auto. inv H0. - simpl in H. - rewrite <- H2. replace v with (rs r1). apply WTRS. congruence. - replace (mreg_type res) with (snd (type_of_operation op)). - apply type_of_operation_sound with fundef unit ge rs##args sp; auto. - rewrite <- H5; reflexivity. - - apply wt_setreg; auto. inversion H1. rewrite H7. - eapply type_of_chunk_correct; eauto. - + apply wt_setreg; auto. + rewrite H0. eapply wt_get_slot; eauto. + apply wt_undef_getparam; auto. + +(* op *) + apply wt_setreg; auto. + inv WTI. + (* move *) + simpl in H. inv H. rewrite <- H1. apply WTRS. + (* not move *) + replace (mreg_type res) with (snd (type_of_operation op)). + apply type_of_operation_sound with fundef unit ge rs##args sp; auto. + rewrite <- H4; reflexivity. + apply wt_undef_op; auto. + +(* load *) + apply wt_setreg; auto. inv WTI. rewrite H6. eapply type_of_chunk_correct; eauto. + apply wt_undef_temps; auto. + +(* store *) + apply wt_undef_temps; auto. + +(* call *) assert (WTFD: wt_fundef f'). destruct ros; simpl in H. apply (Genv.find_funct_prop wt_fundef _ _ wt_p H). @@ -271,6 +305,7 @@ Proof. intros. elim H0; intro. subst s0. econstructor; eauto with coqlib. auto. +(* tailcall *) assert (WTFD: wt_fundef f'). destruct ros; simpl in H. apply (Genv.find_funct_prop wt_fundef _ _ wt_p H). @@ -278,17 +313,27 @@ Proof. apply (Genv.find_funct_ptr_prop wt_fundef _ _ wt_p H). econstructor; eauto. - inv H0. apply wt_setreg; auto. rewrite H5. eapply external_call_well_typed; eauto. +(* extcall *) + apply wt_setreg; auto. + inv WTI. rewrite H4. eapply external_call_well_typed; eauto. + apply wt_undef_temps; auto. +(* goto *) apply is_tail_find_label with lbl; congruence. - apply is_tail_find_label with lbl; congruence. - apply is_tail_find_label with lbl; congruence. +(* cond *) + apply is_tail_find_label with lbl; congruence. apply wt_undef_temps; auto. + apply wt_undef_temps; auto. +(* jumptable *) + apply is_tail_find_label with lbl; congruence. apply wt_undef_temps; auto. +(* return *) econstructor; eauto. +(* internal function *) econstructor; eauto with coqlib. inv H5; auto. exact I. apply wt_empty_frame. +(* external function *) econstructor; eauto. apply wt_setreg; auto. generalize (external_call_well_typed _ _ _ _ _ _ _ H). unfold proj_sig_res, loc_result. @@ -296,6 +341,7 @@ Proof. destruct t0; simpl; auto. simpl; auto. +(* returnstate *) generalize (H1 _ (in_eq _ _)); intro. inv H. econstructor; eauto. eauto with coqlib. diff --git a/backend/RTLgen.v b/backend/RTLgen.v index b728829f..e9184494 100644 --- a/backend/RTLgen.v +++ b/backend/RTLgen.v @@ -363,6 +363,18 @@ Fixpoint alloc_regs (map: mapping) (al: exprlist) ret (r :: rl) end. +(** A variant of [alloc_regs] for two-address instructions: + reuse the result register as destination for the first argument. *) + +Definition alloc_regs_2addr (map: mapping) (al: exprlist) (rd: reg) + : mon (list reg) := + match al with + | Enil => + ret nil + | Econs a bl => + do rl <- alloc_regs map bl; ret (rd :: rl) + end. + (** [alloc_optreg] is used for function calls. If a destination is specified for the call, it is returned. Otherwise, a fresh register is returned. *) @@ -395,9 +407,11 @@ Fixpoint transl_expr (map: mapping) (a: expr) (rd: reg) (nd: node) | Evar v => do r <- find_var map v; add_move r rd nd | Eop op al => - do rl <- alloc_regs map al; + do rl <- if two_address_op op + then alloc_regs_2addr map al rd + else alloc_regs map al; do no <- add_instr (Iop op rl rd nd); - transl_exprlist map al rl no + transl_exprlist map al rl no | Eload chunk addr al => do rl <- alloc_regs map al; do no <- add_instr (Iload chunk addr rl rd nd); @@ -502,6 +516,16 @@ Fixpoint transl_switch (r: reg) (nexits: list node) (t: comptree) add_instr (Iop op (r :: nil) rt n3) end. +(** Detect a two-address operator at the top of an expression. *) + +Fixpoint expr_is_2addr_op (e: expr) : bool := + match e with + | Eop op _ => two_address_op op + | Econdition e1 e2 e3 => expr_is_2addr_op e2 || expr_is_2addr_op e3 + | Elet e1 e2 => expr_is_2addr_op e2 + | _ => false + end. + (** Translation of statements. [transl_stmt map s nd nexits nret rret] enriches the current CFG with the RTL instructions necessary to execute the CminorSel statement [s], and returns the node of the first @@ -521,7 +545,12 @@ Fixpoint transl_stmt (map: mapping) (s: stmt) (nd: node) ret nd | Sassign v b => do r <- find_var map v; - transl_expr map b r nd + if expr_is_2addr_op b then + do rd <- new_reg; + do n1 <- add_move rd r nd; + transl_expr map b rd n1 + else + transl_expr map b r nd | Sstore chunk addr al b => do rl <- alloc_regs map al; do r <- alloc_reg map b; diff --git a/backend/RTLgenproof.v b/backend/RTLgenproof.v index 12a8e2be..b49b671a 100644 --- a/backend/RTLgenproof.v +++ b/backend/RTLgenproof.v @@ -587,7 +587,8 @@ Lemma transl_expr_Eop_correct: eval_operation ge sp op vargs = Some v -> transl_expr_prop le (Eop op args) v. Proof. - intros; red; intros. inv TE. + intros; red; intros. inv TE. +(* normal case *) exploit H0; eauto. intros [rs1 [EX1 [ME1 [RR1 RO1]]]]. exists (rs1#rd <- v). (* Exec *) @@ -1116,12 +1117,24 @@ Proof. constructor; auto. (* assign *) - inv TS. + inv TS. + (* optimized translation (not 2 addr) *) exploit transl_expr_correct; eauto. intros [rs' [A [B [C D]]]]. econstructor; split. right; split. eauto. Lt_state. econstructor; eauto. constructor. + (* alternate translation (2 addr) *) + exploit transl_expr_correct; eauto. + intros [rs' [A [B [C D]]]]. + exploit tr_move_correct; eauto. + intros [rs'' [P [Q R]]]. + econstructor; split. + right; split. eapply star_trans. eexact A. eexact P. traceEq. Lt_state. + econstructor; eauto. constructor. + simpl in B. apply match_env_invariant with (rs'#r <- v). + apply match_env_update_var; auto. + intros. rewrite Regmap.gsspec. destruct (peq r0 r). congruence. auto. (* store *) inv TS. diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v index 22a1e79d..44e7c418 100644 --- a/backend/RTLgenspec.v +++ b/backend/RTLgenspec.v @@ -504,6 +504,32 @@ Proof. right; eauto with rtlg. Qed. +Lemma alloc_regs_2addr_valid: + forall al rd s1 s2 map rl i, + map_valid map s1 -> + reg_valid rd s1 -> + alloc_regs_2addr map al rd s1 = OK rl s2 i -> + regs_valid rl s2. +Proof. + unfold alloc_regs_2addr; intros. + destruct al; monadInv H1. + apply regs_valid_nil. + apply regs_valid_cons. eauto with rtlg. eauto with rtlg. +Qed. +Hint Resolve alloc_regs_2addr_valid: rtlg. + +Lemma alloc_regs_2addr_fresh_or_in_map: + forall map al rd s rl s' i, + map_valid map s -> + alloc_regs_2addr map al rd s = OK rl s' i -> + forall r, In r rl -> r = rd \/ reg_in_map map r \/ reg_fresh r s. +Proof. + unfold alloc_regs_2addr; intros. + destruct al; monadInv H0. + elim H1. + simpl in H1; destruct H1. auto. right. eapply alloc_regs_fresh_or_in_map; eauto. +Qed. + (** A register is an adequate target for holding the value of an expression if - either the register is associated with a Cminor let-bound variable @@ -606,7 +632,24 @@ Proof. apply regs_valid_cons; eauto with rtlg. Qed. -Hint Resolve new_reg_target_ok alloc_reg_target_ok alloc_regs_target_ok: rtlg. +Lemma alloc_regs_2addr_target_ok: + forall map al rd pr s1 rl s2 i, + map_valid map s1 -> + regs_valid pr s1 -> + reg_valid rd s1 -> + ~(reg_in_map map rd) -> ~In rd pr -> + alloc_regs_2addr map al rd s1 = OK rl s2 i -> + target_regs_ok map pr al rl. +Proof. + unfold alloc_regs_2addr; intros. destruct al; monadInv H4. + constructor. + constructor. constructor; auto. + eapply alloc_regs_target_ok; eauto. + apply regs_valid_cons; auto. +Qed. + +Hint Resolve new_reg_target_ok alloc_reg_target_ok + alloc_regs_target_ok alloc_regs_2addr_target_ok: rtlg. (** The following predicate is a variant of [target_reg_ok] used to characterize registers that are adequate for holding the return @@ -804,9 +847,14 @@ Inductive tr_stmt (c: code) (map: mapping): | tr_Sskip: forall ns nexits ngoto nret rret, tr_stmt c map Sskip ns ns nexits ngoto nret rret | tr_Sassign: forall id a ns nd nexits ngoto nret rret r, - map.(map_vars)!id = Some r -> + map.(map_vars)!id = Some r -> expr_is_2addr_op a = false -> tr_expr c map nil a ns nd r (Some id) -> tr_stmt c map (Sassign id a) ns nd nexits ngoto nret rret + | tr_Sassign_2: forall id a ns n1 nd nexits ngoto nret rret rd r, + map.(map_vars)!id = Some r -> + tr_expr c map nil a ns n1 rd None -> + tr_move c n1 rd nd r -> + tr_stmt c map (Sassign id a) ns nd nexits ngoto nret rret | tr_Sstore: forall chunk addr al b ns nd nexits ngoto nret rret rd n1 rl n2, tr_exprlist c map nil al ns n1 rl -> tr_expr c map rl b n1 n2 rd None -> @@ -970,7 +1018,9 @@ Proof. inv OK. left; split; congruence. right; eauto with rtlg. eapply add_move_charact; eauto. (* Eop *) - inv OK. + inv OK. destruct (two_address_op o). + econstructor; eauto with rtlg. + eapply transl_exprlist_charact; eauto with rtlg. econstructor; eauto with rtlg. eapply transl_exprlist_charact; eauto with rtlg. (* Eload *) @@ -1047,21 +1097,25 @@ Lemma transl_expr_assign_charact: forall id a map rd nd s ns s' INCR (TR: transl_expr map a rd nd s = OK ns s' INCR) (WF: map_valid map s) - (OK: reg_map_ok map rd (Some id)), + (OK: reg_map_ok map rd (Some id)) + (NOT2ADDR: expr_is_2addr_op a = false), tr_expr s'.(st_code) map nil a ns nd rd (Some id). Proof. +Opaque two_address_op. induction a; intros; monadInv TR; saturateTrans. (* Evar *) generalize EQ; unfold find_var. caseEq (map_vars map)!i; intros; inv EQ1. econstructor; eauto. eapply add_move_charact; eauto. (* Eop *) - econstructor; eauto with rtlg. + simpl in NOT2ADDR. rewrite NOT2ADDR in EQ. + econstructor; eauto with rtlg. eapply transl_exprlist_charact; eauto with rtlg. (* Eload *) econstructor; eauto with rtlg. eapply transl_exprlist_charact; eauto with rtlg. (* Econdition *) + simpl in NOT2ADDR. destruct (orb_false_elim _ _ NOT2ADDR). econstructor; eauto with rtlg. eapply transl_condexpr_charact; eauto with rtlg. apply tr_expr_incr with s1; auto. @@ -1069,6 +1123,7 @@ Proof. apply tr_expr_incr with s0; auto. eapply IHa2; eauto 2 with rtlg. (* Elet *) + simpl in NOT2ADDR. econstructor. eapply new_reg_not_in_map; eauto with rtlg. eapply transl_expr_charact; eauto 3 with rtlg. apply tr_expr_incr with s1; auto. @@ -1112,8 +1167,9 @@ Proof. intros s1 s2 EXT. generalize tr_expr_incr tr_condition_incr tr_exprlist_incr; intros I1 I2 I3. pose (AT := fun pc i => instr_at_incr s1 s2 pc i EXT). - induction 1; econstructor; eauto. - eapply tr_switch_incr; eauto. + induction 1; try (econstructor; eauto; fail). + eapply tr_Sassign_2; eauto. eapply tr_move_incr; eauto. + econstructor; eauto. eapply tr_switch_incr; eauto. Qed. Lemma transl_exit_charact: @@ -1181,7 +1237,11 @@ Proof. constructor. (* Sassign *) revert EQ. unfold find_var. case_eq (map_vars map)!i; intros; monadInv EQ. - econstructor. eauto. + remember (expr_is_2addr_op e) as is2a. destruct is2a. + monadInv EQ0. eapply tr_Sassign_2; eauto. + eapply transl_expr_charact; eauto with rtlg. + apply tr_move_incr with s1; auto. eapply add_move_charact; eauto. + eapply tr_Sassign; eauto. eapply transl_expr_assign_charact; eauto with rtlg. constructor. auto. (* Sstore *) diff --git a/backend/RTLtypingaux.ml b/backend/RTLtypingaux.ml index 657c4daa..7549ff49 100644 --- a/backend/RTLtypingaux.ml +++ b/backend/RTLtypingaux.ml @@ -45,6 +45,8 @@ let type_instr retty (Coq_pair(pc, i)) = | Iop(Omove, _, _, _) -> () | Iop(op, args, res, _) -> + if two_address_op op && List.length args >= 1 && List.hd args <> res + then raise (Type_error "two-address constraint violation"); let (Coq_pair(targs, tres)) = type_of_operation op in set_types args targs; set_type res tres | Iload(chunk, addr, args, dst, _) -> diff --git a/backend/Reloadproof.v b/backend/Reloadproof.v index 286a266d..a3ed3037 100644 --- a/backend/Reloadproof.v +++ b/backend/Reloadproof.v @@ -98,19 +98,10 @@ Lemma enough_temporaries_op_args: Proof. intros. apply arity_ok_enough. replace (map Loc.type args) with (fst (type_of_operation op)). - destruct op; try (destruct c); compute; reflexivity. + destruct op; try (destruct c); try (destruct a); compute; reflexivity. rewrite <- H. auto. Qed. -Lemma enough_temporaries_cond: - forall (cond: condition) (args: list loc), - List.map Loc.type args = type_of_condition cond -> - enough_temporaries args = true. -Proof. - intros. apply arity_ok_enough. rewrite H. - destruct cond; compute; reflexivity. -Qed. - Lemma enough_temporaries_addr: forall (addr: addressing) (args: list loc), List.map Loc.type args = type_of_addressing addr -> @@ -120,6 +111,15 @@ Proof. destruct addr; compute; reflexivity. Qed. +Lemma enough_temporaries_cond: + forall (cond: condition) (args: list loc), + List.map Loc.type args = type_of_condition cond -> + enough_temporaries args = true. +Proof. + intros. apply arity_ok_enough. rewrite H. + destruct cond; compute; reflexivity. +Qed. + Lemma arity_ok_rec_length: forall tys itmps ftmps, (length tys <= length itmps)%nat -> @@ -225,7 +225,10 @@ Lemma add_reload_correct: star step ge (State stk f sp (add_reload src dst k) rs m) E0 (State stk f sp k rs' m) /\ rs' (R dst) = rs src /\ - forall l, Loc.diff (R dst) l -> rs' l = rs l. + forall l, + Loc.diff (R dst) l -> + loc_acceptable src \/ Loc.diff (R IT1) l -> + rs' l = rs l. Proof. intros. unfold add_reload. destruct src. case (mreg_eq m0 dst); intro. @@ -234,29 +237,40 @@ Proof. split. apply star_one; apply exec_Lop. reflexivity. split. apply Locmap.gss. intros; apply Locmap.gso; auto. - exists (Locmap.set (R dst) (rs (S s)) rs). + exists (Locmap.set (R dst) (rs (S s)) (undef_getstack s rs)). split. apply star_one; apply exec_Lgetstack. - split. apply Locmap.gss. - intros; apply Locmap.gso; auto. + split. apply Locmap.gss. + intros. rewrite Locmap.gso; auto. + destruct s; unfold undef_getstack; unfold loc_acceptable in H0; auto. + apply Locmap.gso. tauto. Qed. Lemma add_reload_correct_2: forall src k rs m, + loc_acceptable src -> exists rs', star step ge (State stk f sp (add_reload src (reg_for src) k) rs m) E0 (State stk f sp k rs' m) /\ rs' (R (reg_for src)) = rs src /\ - (forall l, Loc.diff (R (reg_for src)) l -> rs' l = rs l) /\ - (forall l, Loc.notin l temporaries -> rs' l = rs l). + (forall l, Loc.notin l temporaries -> rs' l = rs l) /\ + rs' (R IT2) = rs (R IT2). Proof. - intros. destruct (reg_for_spec src). - set (rf := reg_for src) in *. - unfold add_reload. rewrite <- H. rewrite dec_eq_true. - exists rs. split. constructor. auto. - destruct (add_reload_correct src (reg_for src) k rs m) - as [rs' [A [B C]]]. - exists rs'; intuition. - apply C. apply Loc.diff_sym. eapply Loc.in_notin_diff; eauto. + intros. unfold reg_for, add_reload; destruct src. + rewrite dec_eq_true. exists rs; split. constructor. auto. + set (t := match slot_type s with + | Tint => IT1 + | Tfloat => FT1 + end). + exists (Locmap.set (R t) (rs (S s)) (undef_getstack s rs)). + split. apply star_one; apply exec_Lgetstack. + split. apply Locmap.gss. + split. intros. rewrite Locmap.gso; auto. + destruct s; unfold undef_getstack; unfold loc_acceptable in H; auto. + apply Locmap.gso. tauto. + apply Loc.diff_sym. simpl in H0; unfold t; destruct (slot_type s); tauto. + rewrite Locmap.gso. unfold undef_getstack. destruct s; auto. + apply Locmap.gso. red; congruence. + unfold t; destruct (slot_type s); red; congruence. Qed. Lemma add_spill_correct: @@ -282,6 +296,7 @@ Qed. Lemma add_reloads_correct_rec: forall srcs itmps ftmps k rs m, + locs_acceptable srcs -> enough_temporaries_rec srcs itmps ftmps = true -> (forall r, In (R r) srcs -> In r itmps -> False) -> (forall r, In (R r) srcs -> In r ftmps -> False) -> @@ -300,6 +315,8 @@ Proof. (* base case *) exists rs. split. apply star_refl. tauto. (* inductive case *) + assert (ACC1: loc_acceptable a) by (auto with coqlib). + assert (ACC2: locs_acceptable srcs) by (red; auto with coqlib). destruct a as [r | s]. (* a is a register *) simpl add_reload. rewrite dec_eq_true. @@ -311,41 +328,42 @@ Proof. (* a is a stack slot *) destruct (slot_type s). (* ... of integer type *) - destruct itmps as [ | it1 itmps ]. discriminate. inv H3. + destruct itmps as [ | it1 itmps ]. discriminate. inv H4. destruct (add_reload_correct (S s) it1 (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m) as [rs1 [A [B C]]]. exploit IHsrcs; eauto. - intros. apply H0 with r. tauto. simpl. tauto. eapply list_disjoint_cons_left; eauto. + intros. apply H1 with r. tauto. simpl. tauto. eapply list_disjoint_cons_left; eauto. intros [rs' [P [Q [T U]]]]. exists rs'. split. eapply star_trans; eauto. split. simpl. decEq. rewrite <- B. apply T. auto. eapply list_disjoint_notin; eauto with coqlib. rewrite Q. apply list_map_exten. intros. symmetry. apply C. - simpl. destruct x; auto. red; intro; subst m0. apply H0 with it1; auto with coqlib. + simpl. destruct x; auto. red; intro; subst m0. apply H1 with it1; auto with coqlib. + auto. split. simpl. intros. transitivity (rs1 (R r)). - apply T; tauto. apply C. simpl. tauto. - intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto. + apply T; tauto. apply C. simpl. tauto. auto. + intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto. auto. (* ... of float type *) - destruct ftmps as [ | ft1 ftmps ]. discriminate. inv H4. + destruct ftmps as [ | ft1 ftmps ]. discriminate. inv H5. destruct (add_reload_correct (S s) ft1 (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m) as [rs1 [A [B C]]]. exploit IHsrcs; eauto. - intros. apply H1 with r. tauto. simpl. tauto. eapply list_disjoint_cons_right; eauto. + intros. apply H2 with r. tauto. simpl. tauto. eapply list_disjoint_cons_right; eauto. intros [rs' [P [Q [T U]]]]. exists rs'. split. eapply star_trans; eauto. split. simpl. decEq. rewrite <- B. apply T; auto. eapply list_disjoint_notin; eauto. apply list_disjoint_sym. eauto. auto with coqlib. rewrite Q. apply list_map_exten. intros. symmetry. apply C. - simpl. destruct x; auto. red; intro; subst m0. apply H1 with ft1; auto with coqlib. + simpl. destruct x; auto. red; intro; subst m0. apply H2 with ft1; auto with coqlib. auto. split. simpl. intros. transitivity (rs1 (R r)). - apply T; tauto. apply C. simpl. tauto. - intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto. + apply T; tauto. apply C. simpl. tauto. auto. + intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto. auto. Qed. Lemma add_reloads_correct: forall srcs k rs m, enough_temporaries srcs = true -> - Loc.disjoint srcs temporaries -> + locs_acceptable srcs -> exists rs', star step ge (State stk f sp (add_reloads srcs (regs_for srcs) k) rs m) E0 (State stk f sp k rs' m) /\ @@ -355,10 +373,10 @@ Proof. intros. unfold enough_temporaries in H. exploit add_reloads_correct_rec; eauto. - intros. exploit (H0 (R r) (R r)); auto. - simpl in H2. simpl. intuition congruence. - intros. exploit (H0 (R r) (R r)); auto. - simpl in H2. simpl. intuition congruence. + intros. generalize (H0 _ H1). unfold loc_acceptable. generalize H2. + simpl. intuition congruence. + intros. generalize (H0 _ H1). unfold loc_acceptable. generalize H2. + simpl. intuition congruence. red; intros r1 r2; simpl. intuition congruence. unfold int_temporaries. NoRepet. unfold float_temporaries. NoRepet. @@ -389,7 +407,7 @@ Proof. destruct dst. (* src is a stack slot, dst a register *) generalize (add_reload_correct (S s) m0 k rs m); intros [rs' [EX [RES OTH]]]. - exists rs'; intuition. apply OTH; apply Loc.diff_sym; auto. + exists rs'; intuition. apply OTH. apply Loc.diff_sym; auto. right; apply Loc.diff_sym; auto. (* src and dst are stack slots *) set (tmp := match slot_type s with Tint => IT1 | Tfloat => FT1 end). generalize (add_reload_correct (S s) tmp (add_spill tmp (S s0) k) rs m); @@ -401,6 +419,7 @@ Proof. split. congruence. intros. rewrite OTH2. apply OTH1. apply Loc.diff_sym. unfold tmp; case (slot_type s); auto. + right. apply Loc.diff_sym; auto. apply Loc.diff_sym; auto. Qed. @@ -535,6 +554,37 @@ Proof. apply temporaries_not_acceptable; auto. Qed. +Remark undef_temps_others: + forall rs l, + Loc.notin l temporaries -> LTL.undef_temps rs l = rs l. +Proof. + intros. apply Locmap.guo; auto. +Qed. + +Remark undef_op_others: + forall op rs l, + Loc.notin l temporaries -> undef_op op rs l = rs l. +Proof. + intros. generalize (undef_temps_others rs l H); intro. + destruct op; simpl; auto. +Qed. + +Lemma agree_undef_temps: + forall rs1 rs2, + agree rs1 rs2 -> agree (LTL.undef_temps rs1) rs2. +Proof. + intros; red; intros. rewrite undef_temps_others; auto. + apply Conventions.temporaries_not_acceptable. auto. +Qed. + +Lemma agree_undef_temps2: + forall rs1 rs2, + agree rs1 rs2 -> agree (LTL.undef_temps rs1) (LTL.undef_temps rs2). +Proof. + intros. apply agree_exten with rs2. apply agree_undef_temps; auto. + intros. apply undef_temps_others; auto. +Qed. + Lemma agree_set: forall rs1 rs2 rs2' l v, loc_acceptable l -> @@ -550,6 +600,16 @@ Proof. apply temporaries_not_acceptable; auto. Qed. +Lemma agree_set2: + forall rs1 rs2 rs2' l v, + loc_acceptable l -> + Val.lessdef v (rs2' l) -> + (forall l', Loc.diff l l' -> Loc.notin l' temporaries -> rs2' l' = rs2 l') -> + agree rs1 rs2 -> agree (Locmap.set l v (LTL.undef_temps rs1)) rs2'. +Proof. + intros. eapply agree_set; eauto. apply agree_undef_temps; auto. +Qed. + Lemma agree_find_funct: forall (ge: Linear.genv) rs ls r f, Genv.find_funct ge (rs r) = Some f -> @@ -938,11 +998,11 @@ Proof. inv A. right. split. omega. split. auto. rewrite H1. rewrite H1. econstructor; eauto with coqlib. - apply agree_set with ls2; auto. + apply agree_set2 with ls2; auto. rewrite B. simpl in H; inversion H. auto. left; econstructor; split. eapply plus_left; eauto. econstructor; eauto with coqlib. - apply agree_set with ls; auto. + apply agree_set2 with ls; auto. rewrite B. simpl in H; inversion H. auto. intros. apply C. apply Loc.diff_sym; auto. simpl in H7; tauto. simpl in H7; tauto. @@ -953,8 +1013,7 @@ Proof. auto. rewrite H0. exploit add_reloads_correct. - eapply enough_temporaries_op_args; eauto. - apply locs_acceptable_disj_temporaries; auto. + eapply enough_temporaries_op_args; eauto. auto. intros [ls2 [A [B C]]]. instantiate (1 := ls) in B. assert (exists tv, eval_operation tge sp op (reglist ls2 (regs_for args)) = Some tv /\ Val.lessdef v tv). @@ -969,16 +1028,15 @@ Proof. eapply plus_left. eapply exec_Lop with (v := tv); eauto. eexact D. eauto. traceEq. econstructor; eauto with coqlib. - apply agree_set with ls; auto. + apply agree_set2 with ls; auto. rewrite E. rewrite Locmap.gss. auto. - intros. rewrite F; auto. rewrite Locmap.gso. auto. + intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_op_others; auto. apply reg_for_diff; auto. (* Lload *) ExploitWT; inv WTI. exploit add_reloads_correct. - eapply enough_temporaries_addr; eauto. - apply locs_acceptable_disj_temporaries; auto. + eapply enough_temporaries_addr; eauto. auto. intros [ls2 [A [B C]]]. assert (exists ta, eval_addressing tge sp addr (reglist ls2 (regs_for args)) = Some ta /\ Val.lessdef a ta). @@ -994,9 +1052,9 @@ Proof. eapply plus_left. eapply exec_Lload; eauto. eauto. auto. traceEq. econstructor; eauto with coqlib. - apply agree_set with ls; auto. + apply agree_set2 with ls; auto. rewrite E. rewrite Locmap.gss. auto. - intros. rewrite F; auto. rewrite Locmap.gso. auto. + intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_temps_others; auto. apply reg_for_diff; auto. (* Lstore *) @@ -1004,8 +1062,7 @@ Proof. caseEq (enough_temporaries (src :: args)); intro ENOUGH. destruct (regs_for_cons src args) as [rsrc [rargs EQ]]. rewrite EQ. exploit add_reloads_correct. - eauto. apply locs_acceptable_disj_temporaries; auto. - red; intros. elim H1; intro; auto. subst l; auto. + eauto. red; simpl; intros. destruct H1. congruence. auto. intros [ls2 [A [B C]]]. rewrite EQ in A. rewrite EQ in B. injection B. intros D E. simpl in B. @@ -1024,7 +1081,7 @@ Proof. eapply exec_Lstore with (a := ta); eauto. traceEq. econstructor; eauto with coqlib. - apply agree_exten with ls; auto. + apply agree_undef_temps2. apply agree_exten with ls; auto. (* not enough temporaries *) destruct (add_reloads_correct tge s' (transf_function f) sp args (Lop (op_for_binary_addressing addr) (regs_for args) IT2 @@ -1032,25 +1089,24 @@ Proof. (Lstore chunk (Aindexed Int.zero) (IT2 :: nil) (reg_for src) :: transf_code f b)) ls tm) as [ls2 [A [B C]]]. - eapply enough_temporaries_addr; eauto. - apply locs_acceptable_disj_temporaries; auto. + eapply enough_temporaries_addr; eauto. auto. assert (exists ta, eval_addressing tge sp addr (reglist ls2 (regs_for args)) = Some ta /\ Val.lessdef a ta). apply eval_addressing_lessdef with (map rs args). rewrite B. eapply agree_locs; eauto. rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved. destruct H1 as [ta [P Q]]. - set (ls3 := Locmap.set (R IT2) ta ls2). + set (ls3 := Locmap.set (R IT2) ta (undef_op (op_for_binary_addressing addr) ls2)). destruct (add_reload_correct_2 tge s' (transf_function f) sp src (Lstore chunk (Aindexed Int.zero) (IT2 :: nil) (reg_for src) :: transf_code f b) - ls3 tm) + ls3 tm H8) as [ls4 [D [E [F G]]]]. + assert (NT: Loc.notin src temporaries) by (apply temporaries_not_acceptable; auto). assert (X: Val.lessdef (rs src) (ls4 (R (reg_for src)))). - rewrite E. unfold ls3. rewrite Locmap.gso. eapply agree_loc; eauto. - eapply agree_exten; eauto. - apply Loc.diff_sym. apply loc_acceptable_noteq_diff. auto. - red; intros; subst src. simpl in H8. intuition congruence. + rewrite E. unfold ls3. rewrite Locmap.gso. + rewrite undef_op_others; auto. rewrite C; auto. + apply Loc.diff_sym. simpl in NT; tauto. exploit Mem.storev_extends. eexact MMD. eauto. eexact Q. eexact X. intros [tm2 [Y Z]]. left; econstructor; split. @@ -1060,14 +1116,14 @@ Proof. rewrite <- B; auto. eapply star_right. eauto. eapply exec_Lstore with (a := ta); eauto. - simpl reglist. rewrite F. unfold ls3. rewrite Locmap.gss. simpl. + simpl reglist. rewrite G. unfold ls3. rewrite Locmap.gss. simpl. destruct ta; simpl in Y; try discriminate. rewrite Int.add_zero. auto. - simpl. apply reg_for_not_IT2; auto. reflexivity. reflexivity. traceEq. econstructor; eauto with coqlib. - apply agree_exten with ls; auto. - intros. rewrite G; auto. unfold ls3. rewrite Locmap.gso. auto. - simpl. destruct l; auto. simpl in H1. intuition congruence. + apply agree_undef_temps2. apply agree_exten with ls; auto. + intros. rewrite F; auto. unfold ls3. rewrite Locmap.gso. + rewrite undef_op_others; auto. + apply Loc.diff_sym. simpl in H1; tauto. (* Lcall *) ExploitWT. inversion WTI. subst ros0 args0 res0. rewrite <- H0. @@ -1086,13 +1142,13 @@ Proof. destruct (add_reload_correct_2 tge s' (transf_function f) sp fn (Lcall sig (inl ident (reg_for fn)) :: add_spill (loc_result sig) res (transf_code f b)) - ls2 tm) + ls2 tm OK2) as [ls3 [D [E [F G]]]]. assert (ARGS: Val.lessdef_list (map rs args) (map ls3 (loc_arguments sig))). replace (map ls3 (loc_arguments sig)) with (map ls2 (loc_arguments sig)). rewrite B. apply agree_locs; auto. - apply list_map_exten; intros. apply G. + apply list_map_exten; intros. apply F. apply Loc.disjoint_notin with (loc_arguments sig). apply loc_arguments_not_temporaries. auto. left; econstructor; split. @@ -1108,7 +1164,7 @@ Proof. econstructor; eauto with coqlib. rewrite H0. auto. apply agree_postcall_call with ls sig; auto. - intros. rewrite G; auto. congruence. + intros. rewrite F; auto. congruence. (* direct call *) rewrite <- H0 in H4. destruct (parallel_move_arguments_correct tge s' (transf_function f) sp @@ -1158,6 +1214,7 @@ Proof. apply list_map_exten; intros. apply F. apply Loc.diff_sym. apply (loc_arguments_not_temporaries sig x (R IT1)); simpl; auto. + auto. left; econstructor; split. eapply star_plus_trans. eexact A. eapply plus_right. eexact D. eapply exec_Ltailcall; eauto. @@ -1197,8 +1254,7 @@ Proof. (* Lbuiltin *) ExploitWT; inv WTI. exploit add_reloads_correct. - instantiate (1 := args). apply arity_ok_enough. rewrite H3. auto. - apply locs_acceptable_disj_temporaries; auto. + instantiate (1 := args). apply arity_ok_enough. rewrite H3. auto. auto. intros [ls2 [A [B C]]]. exploit external_call_mem_extends; eauto. apply agree_locs; eauto. @@ -1214,7 +1270,7 @@ Proof. econstructor; eauto with coqlib. apply agree_set with ls; auto. rewrite E. rewrite Locmap.gss. auto. - intros. rewrite F; auto. rewrite Locmap.gso. auto. + intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_temps_others; auto. apply reg_for_diff; auto. (* Llabel *) @@ -1231,8 +1287,7 @@ Proof. (* Lcond true *) ExploitWT; inv WTI. exploit add_reloads_correct. - eapply enough_temporaries_cond; eauto. - apply locs_acceptable_disj_temporaries; auto. + eapply enough_temporaries_cond; eauto. auto. intros [ls2 [A [B C]]]. left; econstructor; split. eapply plus_right. eauto. eapply exec_Lcond_true; eauto. @@ -1241,14 +1296,13 @@ Proof. apply find_label_transf_function; eauto. traceEq. econstructor; eauto. - apply agree_exten with ls; auto. + apply agree_undef_temps2. apply agree_exten with ls; auto. eapply LTLin.find_label_is_tail; eauto. (* Lcond false *) ExploitWT; inv WTI. exploit add_reloads_correct. - eapply enough_temporaries_cond; eauto. - apply locs_acceptable_disj_temporaries; auto. + eapply enough_temporaries_cond; eauto. auto. intros [ls2 [A [B C]]]. left; econstructor; split. eapply plus_right. eauto. eapply exec_Lcond_false; eauto. @@ -1256,11 +1310,11 @@ Proof. eapply agree_locs; eauto. traceEq. econstructor; eauto with coqlib. - apply agree_exten with ls; auto. + apply agree_undef_temps2. apply agree_exten with ls; auto. (* Ljumptable *) ExploitWT; inv WTI. - exploit add_reload_correct_2. + exploit add_reload_correct_2; eauto. intros [ls2 [A [B [C D]]]]. left; econstructor; split. eapply plus_right. eauto. eapply exec_Ljumptable; eauto. @@ -1269,7 +1323,7 @@ Proof. apply find_label_transf_function; eauto. traceEq. econstructor; eauto with coqlib. - apply agree_exten with ls; auto. + apply agree_undef_temps2. apply agree_exten with ls; auto. eapply LTLin.find_label_is_tail; eauto. (* Lreturn *) diff --git a/backend/Reloadtyping.v b/backend/Reloadtyping.v index 1bb462dc..60be59b9 100644 --- a/backend/Reloadtyping.v +++ b/backend/Reloadtyping.v @@ -197,7 +197,8 @@ Proof. destruct a. simpl. decEq. eauto. caseEq (slot_type s); intro SLOTTYPE; rewrite SLOTTYPE in H. - destruct itmps. discriminate. simpl. decEq. + destruct itmps. discriminate. + simpl. decEq. rewrite SLOTTYPE. auto with coqlib. apply IHlocs; auto with coqlib. destruct ftmps. discriminate. simpl. decEq. diff --git a/backend/Selection.v b/backend/Selection.v index ebdad8a2..68fb9ba1 100644 --- a/backend/Selection.v +++ b/backend/Selection.v @@ -101,8 +101,8 @@ Definition sel_constant (cst: Cminor.constant) : expr := match cst with | Cminor.Ointconst n => Eop (Ointconst n) Enil | Cminor.Ofloatconst f => Eop (Ofloatconst f) Enil - | Cminor.Oaddrsymbol id ofs => Eop (Oaddrsymbol id ofs) Enil - | Cminor.Oaddrstack ofs => Eop (Oaddrstack ofs) Enil + | Cminor.Oaddrsymbol id ofs => addrsymbol id ofs + | Cminor.Oaddrstack ofs => addrstack ofs end. Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr := diff --git a/backend/Stacking.v b/backend/Stacking.v index f289793e..2ea08beb 100644 --- a/backend/Stacking.v +++ b/backend/Stacking.v @@ -127,16 +127,10 @@ Definition restore_callee_save (fe: frame_env) (k: Mach.code) := behaviour. *) Definition transl_op (fe: frame_env) (op: operation) := - match op with - | Oaddrstack ofs => Oaddrstack (Int.add (Int.repr fe.(fe_size)) ofs) - | _ => op - end. + shift_stack_operation (Int.repr fe.(fe_size)) op. Definition transl_addr (fe: frame_env) (addr: addressing) := - match addr with - | Ainstack ofs => Ainstack (Int.add (Int.repr fe.(fe_size)) ofs) - | _ => addr - end. + shift_stack_addressing (Int.repr fe.(fe_size)) addr. (** Translation of a Linear instruction. Prepends the corresponding Mach instructions to the given list of instructions. diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v index 68d179a6..5b06c71a 100644 --- a/backend/Stackingproof.v +++ b/backend/Stackingproof.v @@ -32,6 +32,7 @@ Require Import Events. Require Import Globalenvs. Require Import Smallstep. Require Import Locations. +Require LTL. Require Import Linear. Require Import Lineartyping. Require Import Mach. @@ -575,6 +576,58 @@ Proof. intros. rewrite get_set_index_val_other; eauto with stacking. red; auto. Qed. +Lemma agree_undef_regs: + forall rl ls ls0 rs fr cs, + agree ls ls0 rs fr cs -> + (forall r, In r rl -> In (R r) temporaries) -> + agree (Locmap.undef (List.map R rl) ls) ls0 (undef_regs rl rs) fr cs. +Proof. + induction rl; intros; simpl. + auto. + eapply IHrl; eauto. + apply agree_set_reg; auto with coqlib. + assert (In (R a) temporaries) by auto with coqlib. + red. destruct (mreg_type a). + destruct (zlt (index_int_callee_save a) 0). + generalize (bound_int_callee_save_pos b). omega. + elim (int_callee_save_not_destroyed a). auto. apply index_int_callee_save_pos2; auto. + destruct (zlt (index_float_callee_save a) 0). + generalize (bound_float_callee_save_pos b). omega. + elim (float_callee_save_not_destroyed a). auto. apply index_float_callee_save_pos2; auto. + intros. apply H0. auto with coqlib. +Qed. + +Lemma agree_undef_temps: + forall ls ls0 rs fr cs, + agree ls ls0 rs fr cs -> + agree (LTL.undef_temps ls) ls0 (Mach.undef_temps rs) fr cs. +Proof. + intros. unfold undef_temps, LTL.undef_temps. + change temporaries with (List.map R (int_temporaries ++ float_temporaries)). + apply agree_undef_regs; auto. + intros. + change temporaries with (List.map R (int_temporaries ++ float_temporaries)). + apply List.in_map. auto. +Qed. + +Lemma agree_undef_op: + forall op env ls ls0 rs fr cs, + agree ls ls0 rs fr cs -> + agree (Linear.undef_op op ls) ls0 (Mach.undef_op (transl_op env op) rs) fr cs. +Proof. + intros. exploit agree_undef_temps; eauto. intro. + destruct op; simpl; auto. +Qed. + +Lemma agree_undef_getparam: + forall ls ls0 rs fr cs, + agree ls ls0 rs fr cs -> + agree (Locmap.set (R IT1) Vundef ls) ls0 (rs#IT1 <- Vundef) fr cs. +Proof. + intros. exploit (agree_undef_regs (IT1 :: nil)); eauto. + simpl; intros. intuition congruence. +Qed. + Lemma agree_return_regs: forall ls ls0 rs fr cs rs', agree ls ls0 rs fr cs -> @@ -831,9 +884,10 @@ Proof. intros until idx. destruct idx; simpl; auto. congruence. apply incl_refl. apply float_callee_save_norepet. eauto. - intros [fr' [A [B C]]]. - exists fr'; intuition. unfold save_callee_save_float; eauto. - apply C. auto. intros; subst idx. auto. + intros [fr' [A [B C]]]. + exists fr'. split. unfold save_callee_save_float; eauto. + split. auto. + intros. apply C. auto. intros; subst. red; intros; subst idx. contradiction. Qed. Lemma save_callee_save_correct: @@ -1216,24 +1270,15 @@ Qed. Definition shift_sp (tf: Mach.function) (sp: val) := Val.add sp (Vint (Int.repr (-tf.(fn_framesize)))). -Remark shift_offset_sp: - forall f tf sp n v, +Remark shift_sp_eq: + forall f tf sp, transf_function f = OK tf -> - offset_sp sp n = Some v -> - offset_sp (shift_sp tf sp) - (Int.add (Int.repr (fe_size (make_env (function_bounds f)))) n) = Some v. + shift_sp tf sp = Val.sub sp (Vint (Int.repr (fe_size (make_env (function_bounds f))))). Proof. - intros. destruct sp; try discriminate. - unfold offset_sp in *. - unfold shift_sp. - rewrite (unfold_transf_function _ _ H). unfold fn_framesize. - unfold Val.add. rewrite <- Int.neg_repr. - set (p := Int.repr (fe_size (make_env (function_bounds f)))). - inversion H0. decEq. decEq. - rewrite Int.add_assoc. decEq. - rewrite <- Int.add_assoc. - rewrite (Int.add_commut (Int.neg p) p). rewrite Int.add_neg_zero. - rewrite Int.add_commut. apply Int.add_zero. + intros. unfold shift_sp. + replace (fe_size (make_env (function_bounds f))) with (fn_framesize tf). + rewrite <- Int.neg_repr. destruct sp; simpl; auto; rewrite Int.sub_add_opp; auto. + rewrite (unfold_transf_function _ _ H). auto. Qed. Lemma shift_eval_operation: @@ -1243,10 +1288,10 @@ Lemma shift_eval_operation: eval_operation tge (shift_sp tf sp) (transl_op (make_env (function_bounds f)) op) args = Some v. Proof. - intros until v. destruct op; intros; auto. - simpl in *. rewrite symbols_preserved. auto. - destruct args; auto. unfold eval_operation in *. unfold transl_op. - apply shift_offset_sp; auto. + intros. rewrite <- H0. rewrite (shift_sp_eq f tf sp H). unfold transl_op. + rewrite (eval_operation_preserved ge tge). + apply shift_stack_eval_operation. + exact symbols_preserved. Qed. Lemma shift_eval_addressing: @@ -1257,11 +1302,10 @@ Lemma shift_eval_addressing: (transl_addr (make_env (function_bounds f)) addr) args = Some v. Proof. - intros. - unfold transl_addr, eval_addressing in *; - destruct addr; try (rewrite symbols_preserved); auto. - destruct args; try discriminate. - apply shift_offset_sp; auto. + intros. rewrite <- H0. rewrite (shift_sp_eq f tf sp H). unfold transl_addr. + rewrite (eval_addressing_preserved ge tge). + apply shift_stack_eval_addressing. + exact symbols_preserved. Qed. (** Preservation of the arguments to an external call. *) @@ -1389,23 +1433,29 @@ Proof. intro BOUND; simpl in BOUND); unfold transl_instr. (* Lgetstack *) - inv WTI. destruct BOUND. - exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) - (rs0#r <- (rs (S sl))) fr m). - split. destruct sl. + inv WTI. destruct BOUND. unfold undef_getstack; destruct sl. (* Lgetstack, local *) + exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) + (rs0#r <- (rs (S (Local z t)))) fr m); split. apply plus_one. apply exec_Mgetstack. apply get_slot_index. auto. apply index_local_valid. auto. congruence. congruence. auto. eapply agree_locals; eauto. + econstructor; eauto with coqlib. + apply agree_set_reg; auto. (* Lgetstack, incoming *) - apply plus_one; apply exec_Mgetparam. + exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) + (rs0 # IT1 <- Vundef # r <- (rs (S (Incoming z t)))) fr m); split. + apply plus_one. apply exec_Mgetparam. change (get_parent_slot ts z t (rs (S (Incoming z t)))). eapply agree_incoming; eauto. + econstructor; eauto with coqlib. + apply agree_set_reg; auto. apply agree_undef_getparam; auto. (* Lgetstack, outgoing *) - apply plus_one; apply exec_Mgetstack. - apply get_slot_index. auto. apply index_arg_valid. auto. congruence. congruence. auto. + exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) + (rs0#r <- (rs (S (Outgoing z t)))) fr m); split. + apply plus_one. apply exec_Mgetstack. + apply get_slot_index. auto. apply index_arg_valid. auto. congruence. congruence. auto. eapply agree_outgoing; eauto. - (* Lgetstack, common *) econstructor; eauto with coqlib. apply agree_set_reg; auto. @@ -1432,22 +1482,23 @@ Proof. symmetry. eapply agree_reg; eauto. (* Lop *) - exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) (rs0#res <- v) fr m); split. + set (op' := transl_op (make_env (function_bounds f)) op). + exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) ((undef_op op' rs0)#res <- v) fr m); split. apply plus_one. apply exec_Mop. apply shift_eval_operation. auto. change mreg with RegEq.t. rewrite (agree_eval_regs _ _ _ _ _ _ _ args AG). auto. econstructor; eauto with coqlib. - apply agree_set_reg; auto. + apply agree_set_reg; auto. apply agree_undef_op; auto. (* Lload *) - exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) (rs0#dst <- v) fr m); split. + exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) ((undef_temps rs0)#dst <- v) fr m); split. apply plus_one; eapply exec_Mload; eauto. apply shift_eval_addressing; auto. change mreg with RegEq.t. rewrite (agree_eval_regs _ _ _ _ _ _ _ args AG). eauto. econstructor; eauto with coqlib. - apply agree_set_reg; auto. + apply agree_set_reg; auto. apply agree_undef_temps; auto. (* Lstore *) econstructor; split. @@ -1456,7 +1507,7 @@ Proof. change mreg with RegEq.t. rewrite (agree_eval_regs _ _ _ _ _ _ _ args AG). eauto. rewrite (agree_eval_reg _ _ _ _ _ _ _ src AG). eauto. - econstructor; eauto with coqlib. + econstructor; eauto with coqlib. apply agree_undef_temps; auto. (* Lcall *) assert (WTF': wt_fundef f'). eapply find_function_well_typed; eauto. @@ -1504,14 +1555,14 @@ Proof. apply agree_callee_save_return_regs. (* Lbuiltin *) - exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) (rs0#res <- v) fr m'); split. + econstructor; split. apply plus_one. apply exec_Mbuiltin. change mreg with RegEq.t. rewrite (agree_eval_regs _ _ _ _ _ _ _ args AG). eapply external_call_symbols_preserved; eauto. exact symbols_preserved. exact varinfo_preserved. econstructor; eauto with coqlib. - apply agree_set_reg; auto. + apply agree_set_reg; auto. apply agree_undef_temps; auto. (* Llabel *) econstructor; split. @@ -1530,14 +1581,14 @@ Proof. apply plus_one; apply exec_Mcond_true. rewrite <- (agree_eval_regs _ _ _ _ _ _ _ args AG) in H; eauto. apply transl_find_label; eauto. - econstructor; eauto. + econstructor; eauto. apply agree_undef_temps; auto. eapply find_label_incl; eauto. (* Lcond, false *) econstructor; split. apply plus_one; apply exec_Mcond_false. rewrite <- (agree_eval_regs _ _ _ _ _ _ _ args AG) in H; auto. - econstructor; eauto with coqlib. + econstructor; eauto with coqlib. apply agree_undef_temps; auto. (* Ljumptable *) econstructor; split. @@ -1545,7 +1596,7 @@ Proof. rewrite <- (agree_eval_reg _ _ _ _ _ _ _ arg AG) in H; eauto. eauto. apply transl_find_label; eauto. - econstructor; eauto. + econstructor; eauto. apply agree_undef_temps; auto. eapply find_label_incl; eauto. (* Lreturn *) diff --git a/backend/Stackingtyping.v b/backend/Stackingtyping.v index 6ef86690..b42dbbb4 100644 --- a/backend/Stackingtyping.v +++ b/backend/Stackingtyping.v @@ -160,7 +160,7 @@ Proof. apply wt_instrs_cons; auto. constructor. destruct o; simpl; congruence. - rewrite H6. destruct o; reflexivity || congruence. + rewrite H6. symmetry. apply type_shift_stack_operation. (* load *) apply wt_instrs_cons; auto. constructor; auto. -- cgit