aboutsummaryrefslogtreecommitdiffstats
path: root/backend
diff options
context:
space:
mode:
authorxleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2010-09-02 12:42:19 +0000
committerxleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2010-09-02 12:42:19 +0000
commit265fa07b34a813ba9d8249ddad82d71e6002c10d (patch)
tree45831b1793c7920b10969fc7cf6316c202d78e91 /backend
parent94470fb6a652cb993982269fcb7a0e8319b54488 (diff)
downloadcompcert-kvx-265fa07b34a813ba9d8249ddad82d71e6002c10d.tar.gz
compcert-kvx-265fa07b34a813ba9d8249ddad82d71e6002c10d.zip
Merge of the reuse-temps branch:
- Reload temporaries are marked as destroyed (set to Vundef) across operations in the semantics of LTL, LTLin, Linear and Mach, allowing Asmgen to reuse them. - Added IA32 port. - Cleaned up float conversions and axiomatization of floats. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1499 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'backend')
-rw-r--r--backend/Allocproof.v32
-rw-r--r--backend/CSE.v19
-rw-r--r--backend/Coloring.v2
-rw-r--r--backend/Coloringaux.ml93
-rw-r--r--backend/Coloringproof.v4
-rw-r--r--backend/LTL.v16
-rw-r--r--backend/LTLin.v12
-rw-r--r--backend/Linear.v30
-rw-r--r--backend/Locations.v23
-rw-r--r--backend/Mach.v16
-rw-r--r--backend/Machabstr.v16
-rw-r--r--backend/Machabstr2concr.v39
-rw-r--r--backend/Machconcr.v16
-rw-r--r--backend/Machtyping.v88
-rw-r--r--backend/RTLgen.v35
-rw-r--r--backend/RTLgenproof.v17
-rw-r--r--backend/RTLgenspec.v76
-rw-r--r--backend/RTLtypingaux.ml2
-rw-r--r--backend/Reloadproof.v214
-rw-r--r--backend/Reloadtyping.v3
-rw-r--r--backend/Selection.v4
-rw-r--r--backend/Stacking.v10
-rw-r--r--backend/Stackingproof.v143
-rw-r--r--backend/Stackingtyping.v2
24 files changed, 637 insertions, 275 deletions
diff --git a/backend/Allocproof.v b/backend/Allocproof.v
index d06c26fa..5a5e4c4d 100644
--- a/backend/Allocproof.v
+++ b/backend/Allocproof.v
@@ -235,6 +235,14 @@ Proof.
rewrite H0. apply H. auto. eapply regalloc_not_temporary; eauto.
Qed.
+Lemma agree_undef_temps:
+ forall live rs ls,
+ agree live rs ls -> agree live rs (undef_temps ls).
+Proof.
+ intros. apply agree_exten with ls; auto.
+ intros. apply Locmap.guo; auto.
+Qed.
+
(** If a register is dead, assigning it an arbitrary value in [rs]
and leaving [ls] unchanged preserves agreement. (This corresponds
to an operation over a dead register in the original program
@@ -603,7 +611,11 @@ Proof.
(* sub-case: non-redundant move *)
econstructor; split. eapply exec_Lop; eauto. simpl. eauto.
MatchStates.
- rewrite <- H1. eapply agree_move_live; eauto.
+ rewrite <- H1. set (ls1 := undef_temps ls).
+ replace (ls (assign arg)) with (ls1 (assign arg)).
+ eapply agree_move_live; eauto.
+ unfold ls1. eapply agree_undef_temps; eauto.
+ unfold ls1. simpl. apply Locmap.guo. eapply regalloc_not_temporary; eauto.
(* Not a move *)
intros INMO CORR CODE.
assert (eval_operation tge sp op (map ls (map assign args)) = Some v).
@@ -612,6 +624,7 @@ Proof.
eapply agree_eval_regs; eauto.
econstructor; split. eapply exec_Lop; eauto. MatchStates.
apply agree_assign_live with f env live; auto.
+ eapply agree_undef_temps; eauto.
eapply agree_reg_list_live; eauto.
(* Result is not live, instruction turned into a nop *)
intro CODE. econstructor; split. eapply exec_Lnop; eauto.
@@ -633,6 +646,7 @@ Proof.
unfold correct_alloc_instr. intro CORR.
MatchStates.
eapply agree_assign_live; eauto.
+ eapply agree_undef_temps; eauto.
eapply agree_reg_list_live; eauto.
(* dst is dead *)
econstructor; split.
@@ -650,7 +664,9 @@ Proof.
econstructor; split.
eapply exec_Lstore; eauto. TranslInstr.
rewrite <- ESRC. eauto.
- MatchStates. eapply agree_reg_live. eapply agree_reg_list_live. eauto.
+ MatchStates.
+ eapply agree_undef_temps; eauto.
+ eapply agree_reg_live. eapply agree_reg_list_live. eauto.
(* Icall *)
exploit transl_find_function; eauto. intros [tf [TFIND TF]].
@@ -695,20 +711,26 @@ Proof.
eapply agree_eval_regs; eauto.
econstructor; split.
eapply exec_Lcond_true; eauto. TranslInstr.
- MatchStates. eapply agree_reg_list_live. eauto.
+ MatchStates.
+ eapply agree_undef_temps; eauto.
+ eapply agree_reg_list_live. eauto.
(* Icond, false *)
assert (COND: eval_condition cond (map ls (map assign args)) = Some false).
replace (map ls (map assign args)) with (rs##args). auto.
eapply agree_eval_regs; eauto.
econstructor; split.
eapply exec_Lcond_false; eauto. TranslInstr.
- MatchStates. eapply agree_reg_list_live. eauto.
+ MatchStates.
+ eapply agree_undef_temps; eauto.
+ eapply agree_reg_list_live. eauto.
(* Ijumptable *)
assert (rs#arg = ls (assign arg)). apply AG. apply Regset.add_1. auto.
econstructor; split.
eapply exec_Ljumptable; eauto. TranslInstr. congruence.
- MatchStates. eapply list_nth_z_in; eauto. eapply agree_reg_live; eauto.
+ MatchStates. eapply list_nth_z_in; eauto.
+ eapply agree_undef_temps; eauto.
+ eapply agree_reg_live; eauto.
(* Ireturn *)
econstructor; split.
diff --git a/backend/CSE.v b/backend/CSE.v
index dab8fc31..4347c334 100644
--- a/backend/CSE.v
+++ b/backend/CSE.v
@@ -375,23 +375,14 @@ Definition analyze (f: RTL.function): PMap.t numbering :=
(** * Code transformation *)
-(** Some operations are so cheap to compute that it is generally not
- worth reusing their results. These operations are detected by the
- function below. *)
-
-Definition is_trivial_op (op: operation) : bool :=
- match op with
- | Omove => true
- | Ointconst _ => true
- | Oaddrsymbol _ _ => true
- | Oaddrstack _ => true
- | _ => false
- end.
-
(** The code transformation is performed instruction by instruction.
[Iload] instructions and non-trivial [Iop] instructions are turned
into move instructions if their result is already available in a
- register, as indicated by the numbering inferred at that program point. *)
+ register, as indicated by the numbering inferred at that program point.
+
+ Some operations are so cheap to compute that it is generally not
+ worth reusing their results. These operations are detected by the
+ function [is_trivial_op] in module [Op]. *)
Definition transf_instr (n: numbering) (instr: instruction) :=
match instr with
diff --git a/backend/Coloring.v b/backend/Coloring.v
index 28626cba..6d34e2cc 100644
--- a/backend/Coloring.v
+++ b/backend/Coloring.v
@@ -282,7 +282,7 @@ Definition alloc_of_coloring (coloring: reg -> loc) (env: regenv) (rs: Regset.t)
fun r =>
if Regset.mem r rs
then coloring r
- else match env r with Tint => R R3 | Tfloat => R F1 end.
+ else match env r with Tint => R dummy_int_reg | Tfloat => R dummy_float_reg end.
(** * Coloring of the interference graph *)
diff --git a/backend/Coloringaux.ml b/backend/Coloringaux.ml
index 922506f0..04209726 100644
--- a/backend/Coloringaux.ml
+++ b/backend/Coloringaux.ml
@@ -41,6 +41,7 @@ type node =
typ: typ; (*r its type *)
regname: reg option; (*r the RTL register it comes from *)
regclass: int; (*r identifier of register class *)
+ mutable accesses: int; (*r number of defs and uses *)
mutable spillcost: float; (*r estimated cost of spilling *)
mutable adjlist: node list; (*r all nodes it interferes with *)
mutable degree: int; (*r number of adjacent nodes *)
@@ -109,7 +110,7 @@ module DLinkNode = struct
let make state =
let rec empty =
{ ident = 0; typ = Tint; regname = None; regclass = 0;
- adjlist = []; degree = 0; spillcost = 0.0;
+ adjlist = []; degree = 0; accesses = 0; spillcost = 0.0;
movelist = []; alias = None; color = None;
nstate = state; nprev = empty; nnext = empty }
in empty
@@ -196,6 +197,8 @@ let num_available_registers = Array.make num_register_classes 0
let reserved_registers = ref ([]: mreg list)
+let allocatable_registers = ref ([]: mreg list)
+
let rec remove_reserved = function
| [] -> []
| hd :: tl ->
@@ -204,14 +207,17 @@ let rec remove_reserved = function
else hd :: remove_reserved tl
let init_regs() =
- caller_save_registers.(0) <-
- Array.of_list (remove_reserved int_caller_save_regs);
- caller_save_registers.(1) <-
- Array.of_list (remove_reserved float_caller_save_regs);
- callee_save_registers.(0) <-
- Array.of_list (remove_reserved int_callee_save_regs);
- callee_save_registers.(1) <-
- Array.of_list (remove_reserved float_callee_save_regs);
+ let int_caller_save = remove_reserved int_caller_save_regs
+ and float_caller_save = remove_reserved float_caller_save_regs
+ and int_callee_save = remove_reserved int_callee_save_regs
+ and float_callee_save = remove_reserved float_callee_save_regs in
+ allocatable_registers :=
+ List.flatten [int_caller_save; float_caller_save;
+ int_callee_save; float_callee_save];
+ caller_save_registers.(0) <- Array.of_list int_caller_save;
+ caller_save_registers.(1) <- Array.of_list float_caller_save;
+ callee_save_registers.(0) <- Array.of_list int_callee_save;
+ callee_save_registers.(1) <- Array.of_list float_callee_save;
for i = 0 to num_register_classes - 1 do
num_available_registers.(i) <-
Array.length caller_save_registers.(i)
@@ -365,9 +371,10 @@ let checkInvariants () =
let nodeOfReg r typenv spillcosts =
let ty = typenv r in
incr nextRegIdent;
+ let (acc, cost) = spillcosts r in
{ ident = !nextRegIdent; typ = ty;
regname = Some r; regclass = class_of_type ty;
- spillcost = float(spillcosts r);
+ accesses = acc; spillcost = float cost;
adjlist = []; degree = 0; movelist = []; alias = None;
color = None;
nstate = Initial;
@@ -378,7 +385,7 @@ let nodeOfMreg mr =
incr nextRegIdent;
{ ident = !nextRegIdent; typ = ty;
regname = None; regclass = class_of_type ty;
- spillcost = 0.0;
+ accesses = 0; spillcost = 0.0;
adjlist = []; degree = 0; movelist = []; alias = None;
color = Some (R mr);
nstate = Colored;
@@ -426,7 +433,9 @@ let build g typenv spillcosts =
g.pref_reg_reg ();
SetRegMreg.fold
(fun (Coq_pair(r1, mr2)) () ->
- add_move (find_reg_node r1) (find_mreg_node mr2))
+ let r1' = find_reg_node r1 in
+ if List.mem mr2 !allocatable_registers then
+ add_move r1' (find_mreg_node mr2))
g.pref_reg_mreg ();
(* Initial partition of nodes into spill / freeze / simplify *)
Hashtbl.iter
@@ -568,14 +577,14 @@ let canCoalesceGeorge u v =
so George's criterion is safe in this case.
*)
-let thresholdGeorge = 2.0 (* = 1 def + 1 use *)
+let thresholdGeorge = 2 (* = 1 def + 1 use *)
let canCoalesce u v =
if u.nstate = Colored
then canCoalesceGeorge u v
else canCoalesceBriggs u v
- || (v.spillcost <= thresholdGeorge && canCoalesceGeorge u v)
- || (u.spillcost <= thresholdGeorge && canCoalesceGeorge v u)
+ || (v.accesses <= thresholdGeorge && canCoalesceGeorge u v)
+ || (u.accesses <= thresholdGeorge && canCoalesceGeorge v u)
(* Update worklists after a move was processed *)
@@ -652,7 +661,12 @@ let freeze () =
(* Chaitin's cost measure *)
-let spillCost n = n.spillcost /. float n.degree
+let spillCost n =
+(*i
+ Printf.printf "spillCost %s: uses = %.0f degree = %d cost = %f\n"
+ (name_of_node n) n.spillcost n.degree (n.spillcost /. float n.degree);
+*)
+ n.spillcost /. float n.degree
(* Spill a node *)
@@ -778,35 +792,40 @@ let location_of_node n =
| None -> assert false
| Some loc -> loc
-(* Estimate spilling costs. Currently, just count the number of accesses
- to each pseudoregister. To do: take loops into account. *)
+(* Estimate spilling costs and counts the number of defs and uses.
+ Currently, we charge 10 for each access and 1 for each move.
+ To do: take loops into account. *)
let spill_costs f =
- let costs = ref (PTree.empty : int PTree.t) in
+ let costs = ref (PMap.init (0,0)) in
let cost r =
- match PTree.get r !costs with None -> 0 | Some n -> n in
- let incr r =
- costs := PTree.set r (1 + cost r) !costs in
- let incr_list rl =
- List.iter incr rl in
- let incr_ros ros =
- match ros with Coq_inl r -> incr r | Coq_inr _ -> () in
+ PMap.get r !costs in
+ let charge amount r =
+ let (n, c) = cost r in
+ costs := PMap.set r (n + 1, c + amount) !costs in
+ let charge_list amount rl =
+ List.iter (charge amount) rl in
+ let charge_ros amount ros =
+ match ros with Coq_inl r -> charge amount r | Coq_inr _ -> () in
let process_instr () pc i =
match i with
| Inop _ -> ()
- | Iop(op, args, res, _) -> incr_list args; incr res
- | Iload(chunk, addr, args, dst, _) -> incr_list args; incr dst
- | Istore(chunk, addr, args, src, _) -> incr_list args; incr src
- | Icall(sg, ros, args, res, _) -> incr_ros ros; incr_list args; incr res
- | Itailcall(sg, ros, args) -> incr_ros ros; incr_list args
- | Ibuiltin(ef, args, res, _) -> incr_list args; incr res
- | Icond(cond, args, _, _) -> incr_list args
- | Ijumptable(arg, _) -> incr arg
- | Ireturn(Some r) -> incr r
+ | Iop(Op.Omove, arg::nil, res, _) -> charge 1 arg; charge 1 res
+ | Iop(op, args, res, _) -> charge_list 10 args; charge 10 res
+ | Iload(chunk, addr, args, dst, _) -> charge_list 10 args; charge 10 dst
+ | Istore(chunk, addr, args, src, _) -> charge_list 10 args; charge 10 src
+ | Icall(sg, ros, args, res, _) ->
+ charge_ros 10 ros; charge_list 1 args; charge 1 res
+ | Itailcall(sg, ros, args) ->
+ charge_ros 10 ros; charge_list 1 args
+ | Ibuiltin(ef, args, res, _) -> charge_list 10 args; charge 10 res
+ | Icond(cond, args, _, _) -> charge_list 10 args
+ | Ijumptable(arg, _) -> charge 10 arg
+ | Ireturn(Some r) -> charge 1 r
| Ireturn None -> () in
- incr_list f.fn_params;
+ charge_list 1 f.fn_params;
PTree.fold process_instr f.fn_code ();
- (* Result is cost function reg -> integer cost *)
+ (* Result is cost function reg -> (num accesses, integer cost *)
cost
(* This is the entry point for graph coloring. *)
diff --git a/backend/Coloringproof.v b/backend/Coloringproof.v
index 5f035b40..bb97c87a 100644
--- a/backend/Coloringproof.v
+++ b/backend/Coloringproof.v
@@ -719,7 +719,9 @@ Proof.
elim (andb_prop _ _ H1); intros.
caseEq (Regset.mem r allregs); intro.
generalize (check_coloring_3_correct _ _ _ r H3 H4). tauto.
- case (env r); simpl; intuition congruence.
+ case (env r); simpl.
+ unfold dummy_int_reg. intuition congruence.
+ unfold dummy_float_reg. intuition congruence.
Qed.
Lemma alloc_of_coloring_correct_4:
diff --git a/backend/LTL.v b/backend/LTL.v
index e1222a52..2eff67cd 100644
--- a/backend/LTL.v
+++ b/backend/LTL.v
@@ -101,6 +101,10 @@ Definition postcall_locs (ls: locset) : locset :=
| S s => ls (S s)
end.
+(** Temporaries destroyed across instructions *)
+
+Definition undef_temps (ls: locset) := Locmap.undef temporaries ls.
+
(** LTL execution states. *)
Inductive stackframe : Type :=
@@ -166,21 +170,21 @@ Inductive step: state -> trace -> state -> Prop :=
(fn_code f)!pc = Some(Lop op args res pc') ->
eval_operation ge sp op (map rs args) = Some v ->
step (State s f sp pc rs m)
- E0 (State s f sp pc' (Locmap.set res v rs) m)
+ E0 (State s f sp pc' (Locmap.set res v (undef_temps rs)) m)
| exec_Lload:
forall s f sp pc rs m chunk addr args dst pc' a v,
(fn_code f)!pc = Some(Lload chunk addr args dst pc') ->
eval_addressing ge sp addr (map rs args) = Some a ->
Mem.loadv chunk m a = Some v ->
step (State s f sp pc rs m)
- E0 (State s f sp pc' (Locmap.set dst v rs) m)
+ E0 (State s f sp pc' (Locmap.set dst v (undef_temps rs)) m)
| exec_Lstore:
forall s f sp pc rs m chunk addr args src pc' a m',
(fn_code f)!pc = Some(Lstore chunk addr args src pc') ->
eval_addressing ge sp addr (map rs args) = Some a ->
Mem.storev chunk m a (rs src) = Some m' ->
step (State s f sp pc rs m)
- E0 (State s f sp pc' rs m')
+ E0 (State s f sp pc' (undef_temps rs) m')
| exec_Lcall:
forall s f sp pc rs m sig ros args res pc' f',
(fn_code f)!pc = Some(Lcall sig ros args res pc') ->
@@ -208,20 +212,20 @@ Inductive step: state -> trace -> state -> Prop :=
(fn_code f)!pc = Some(Lcond cond args ifso ifnot) ->
eval_condition cond (map rs args) = Some true ->
step (State s f sp pc rs m)
- E0 (State s f sp ifso rs m)
+ E0 (State s f sp ifso (undef_temps rs) m)
| exec_Lcond_false:
forall s f sp pc rs m cond args ifso ifnot,
(fn_code f)!pc = Some(Lcond cond args ifso ifnot) ->
eval_condition cond (map rs args) = Some false ->
step (State s f sp pc rs m)
- E0 (State s f sp ifnot rs m)
+ E0 (State s f sp ifnot (undef_temps rs) m)
| exec_Ljumptable:
forall s f sp pc rs m arg tbl n pc',
(fn_code f)!pc = Some(Ljumptable arg tbl) ->
rs arg = Vint n ->
list_nth_z tbl (Int.signed n) = Some pc' ->
step (State s f sp pc rs m)
- E0 (State s f sp pc' rs m)
+ E0 (State s f sp pc' (undef_temps rs) m)
| exec_Lreturn:
forall s f stk pc rs m or m',
(fn_code f)!pc = Some(Lreturn or) ->
diff --git a/backend/LTLin.v b/backend/LTLin.v
index ee4cb943..d6c5fa71 100644
--- a/backend/LTLin.v
+++ b/backend/LTLin.v
@@ -160,19 +160,19 @@ Inductive step: state -> trace -> state -> Prop :=
forall s f sp op args res b rs m v,
eval_operation ge sp op (map rs args) = Some v ->
step (State s f sp (Lop op args res :: b) rs m)
- E0 (State s f sp b (Locmap.set res v rs) m)
+ E0 (State s f sp b (Locmap.set res v (undef_temps rs)) m)
| exec_Lload:
forall s f sp chunk addr args dst b rs m a v,
eval_addressing ge sp addr (map rs args) = Some a ->
Mem.loadv chunk m a = Some v ->
step (State s f sp (Lload chunk addr args dst :: b) rs m)
- E0 (State s f sp b (Locmap.set dst v rs) m)
+ E0 (State s f sp b (Locmap.set dst v (undef_temps rs)) m)
| exec_Lstore:
forall s f sp chunk addr args src b rs m m' a,
eval_addressing ge sp addr (map rs args) = Some a ->
Mem.storev chunk m a (rs src) = Some m' ->
step (State s f sp (Lstore chunk addr args src :: b) rs m)
- E0 (State s f sp b rs m')
+ E0 (State s f sp b (undef_temps rs) m')
| exec_Lcall:
forall s f sp sig ros args res b rs m f',
find_function ros rs = Some f' ->
@@ -206,19 +206,19 @@ Inductive step: state -> trace -> state -> Prop :=
eval_condition cond (map rs args) = Some true ->
find_label lbl f.(fn_code) = Some b' ->
step (State s f sp (Lcond cond args lbl :: b) rs m)
- E0 (State s f sp b' rs m)
+ E0 (State s f sp b' (undef_temps rs) m)
| exec_Lcond_false:
forall s f sp cond args lbl b rs m,
eval_condition cond (map rs args) = Some false ->
step (State s f sp (Lcond cond args lbl :: b) rs m)
- E0 (State s f sp b rs m)
+ E0 (State s f sp b (undef_temps rs) m)
| exec_Ljumptable:
forall s f sp arg tbl b rs m n lbl b',
rs arg = Vint n ->
list_nth_z tbl (Int.signed n) = Some lbl ->
find_label lbl f.(fn_code) = Some b' ->
step (State s f sp (Ljumptable arg tbl :: b) rs m)
- E0 (State s f sp b' rs m)
+ E0 (State s f sp b' (undef_temps rs) m)
| exec_Lreturn:
forall s f stk rs m or b m',
Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
diff --git a/backend/Linear.v b/backend/Linear.v
index 0f44206d..40f7e416 100644
--- a/backend/Linear.v
+++ b/backend/Linear.v
@@ -163,6 +163,20 @@ Definition return_regs (caller callee: locset) : locset :=
| S s => caller (S s)
end.
+(** Temporaries destroyed across operations *)
+
+Definition undef_op (op: operation) (rs: locset) :=
+ match op with
+ | Omove => rs
+ | _ => undef_temps rs
+ end.
+
+Definition undef_getstack (s: slot) (rs: locset) :=
+ match s with
+ | Incoming _ _ => Locmap.set (R IT1) Vundef rs
+ | _ => rs
+ end.
+
(** Linear execution states. *)
Inductive stackframe: Type :=
@@ -241,7 +255,7 @@ Inductive step: state -> trace -> state -> Prop :=
| exec_Lgetstack:
forall s f sp sl r b rs m,
step (State s f sp (Lgetstack sl r :: b) rs m)
- E0 (State s f sp b (Locmap.set (R r) (rs (S sl)) rs) m)
+ E0 (State s f sp b (Locmap.set (R r) (rs (S sl)) (undef_getstack sl rs)) m)
| exec_Lsetstack:
forall s f sp r sl b rs m,
step (State s f sp (Lsetstack r sl :: b) rs m)
@@ -250,19 +264,19 @@ Inductive step: state -> trace -> state -> Prop :=
forall s f sp op args res b rs m v,
eval_operation ge sp op (reglist rs args) = Some v ->
step (State s f sp (Lop op args res :: b) rs m)
- E0 (State s f sp b (Locmap.set (R res) v rs) m)
+ E0 (State s f sp b (Locmap.set (R res) v (undef_op op rs)) m)
| exec_Lload:
forall s f sp chunk addr args dst b rs m a v,
eval_addressing ge sp addr (reglist rs args) = Some a ->
Mem.loadv chunk m a = Some v ->
step (State s f sp (Lload chunk addr args dst :: b) rs m)
- E0 (State s f sp b (Locmap.set (R dst) v rs) m)
+ E0 (State s f sp b (Locmap.set (R dst) v (undef_temps rs)) m)
| exec_Lstore:
forall s f sp chunk addr args src b rs m m' a,
eval_addressing ge sp addr (reglist rs args) = Some a ->
Mem.storev chunk m a (rs (R src)) = Some m' ->
step (State s f sp (Lstore chunk addr args src :: b) rs m)
- E0 (State s f sp b rs m')
+ E0 (State s f sp b (undef_temps rs) m')
| exec_Lcall:
forall s f sp sig ros b rs m f',
find_function ros rs = Some f' ->
@@ -280,7 +294,7 @@ Inductive step: state -> trace -> state -> Prop :=
forall s f sp rs m ef args res b t v m',
external_call ef ge (reglist rs args) m t v m' ->
step (State s f sp (Lbuiltin ef args res :: b) rs m)
- t (State s f sp b (Locmap.set (R res) v rs) m')
+ t (State s f sp b (Locmap.set (R res) v (undef_temps rs)) m')
| exec_Llabel:
forall s f sp lbl b rs m,
step (State s f sp (Llabel lbl :: b) rs m)
@@ -295,19 +309,19 @@ Inductive step: state -> trace -> state -> Prop :=
eval_condition cond (reglist rs args) = Some true ->
find_label lbl f.(fn_code) = Some b' ->
step (State s f sp (Lcond cond args lbl :: b) rs m)
- E0 (State s f sp b' rs m)
+ E0 (State s f sp b' (undef_temps rs) m)
| exec_Lcond_false:
forall s f sp cond args lbl b rs m,
eval_condition cond (reglist rs args) = Some false ->
step (State s f sp (Lcond cond args lbl :: b) rs m)
- E0 (State s f sp b rs m)
+ E0 (State s f sp b (undef_temps rs) m)
| exec_Ljumptable:
forall s f sp arg tbl b rs m n lbl b',
rs (R arg) = Vint n ->
list_nth_z tbl (Int.signed n) = Some lbl ->
find_label lbl f.(fn_code) = Some b' ->
step (State s f sp (Ljumptable arg tbl :: b) rs m)
- E0 (State s f sp b' rs m)
+ E0 (State s f sp b' (undef_temps rs) m)
| exec_Lreturn:
forall s f stk b rs m m',
Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
diff --git a/backend/Locations.v b/backend/Locations.v
index c2fda9c2..1270e1d3 100644
--- a/backend/Locations.v
+++ b/backend/Locations.v
@@ -423,4 +423,27 @@ Module Locmap.
auto.
Qed.
+ Fixpoint undef (ll: list loc) (m: t) {struct ll} : t :=
+ match ll with
+ | nil => m
+ | l1 :: ll' => undef ll' (set l1 Vundef m)
+ end.
+
+ Lemma guo: forall ll l m, Loc.notin l ll -> (undef ll m) l = m l.
+ Proof.
+ induction ll; simpl; intros. auto.
+ destruct H. rewrite IHll; auto. apply gso. apply Loc.diff_sym; auto.
+ Qed.
+
+ Lemma gus: forall ll l m, In l ll -> (undef ll m) l = Vundef.
+ Proof.
+ assert (P: forall ll l m, m l = Vundef -> (undef ll m) l = Vundef).
+ induction ll; simpl; intros. auto. apply IHll.
+ unfold set. destruct (Loc.eq a l); auto.
+ destruct (Loc.overlap a l); auto.
+ induction ll; simpl; intros. contradiction.
+ destruct H. apply P. subst a. apply gss.
+ auto.
+ Qed.
+
End Locmap.
diff --git a/backend/Mach.v b/backend/Mach.v
index 2ec312e4..c6a692a1 100644
--- a/backend/Mach.v
+++ b/backend/Mach.v
@@ -27,6 +27,7 @@ Require Import Events.
Require Import Globalenvs.
Require Import Op.
Require Import Locations.
+Require Import Conventions.
(** * Abstract syntax *)
@@ -101,6 +102,21 @@ Definition regset := Regmap.t val.
Notation "a ## b" := (List.map a b) (at level 1).
Notation "a # b <- c" := (Regmap.set b c a) (at level 1, b at next level).
+Fixpoint undef_regs (rl: list mreg) (rs: regset) {struct rl} : regset :=
+ match rl with
+ | nil => rs
+ | r1 :: rl' => undef_regs rl' (Regmap.set r1 Vundef rs)
+ end.
+
+Definition undef_temps (rs: regset) :=
+ undef_regs (int_temporaries ++ float_temporaries) rs.
+
+Definition undef_op (op: operation) (rs: regset) :=
+ match op with
+ | Omove => rs
+ | _ => undef_temps rs
+ end.
+
Definition is_label (lbl: label) (instr: instruction) : bool :=
match instr with
| Mlabel lbl' => if peq lbl lbl' then true else false
diff --git a/backend/Machabstr.v b/backend/Machabstr.v
index 291a4682..23ca895f 100644
--- a/backend/Machabstr.v
+++ b/backend/Machabstr.v
@@ -238,24 +238,24 @@ Inductive step: state -> trace -> state -> Prop :=
forall s f sp ofs ty dst c rs fr m v,
get_slot (parent_function s) (parent_frame s) ty (Int.signed ofs) v ->
step (State s f sp (Mgetparam ofs ty dst :: c) rs fr m)
- E0 (State s f sp c (rs#dst <- v) fr m)
+ E0 (State s f sp c (rs # IT1 <- Vundef # dst <- v) fr m)
| exec_Mop:
forall s f sp op args res c rs fr m v,
eval_operation ge sp op rs##args = Some v ->
step (State s f sp (Mop op args res :: c) rs fr m)
- E0 (State s f sp c (rs#res <- v) fr m)
+ E0 (State s f sp c ((undef_op op rs)#res <- v) fr m)
| exec_Mload:
forall s f sp chunk addr args dst c rs fr m a v,
eval_addressing ge sp addr rs##args = Some a ->
Mem.loadv chunk m a = Some v ->
step (State s f sp (Mload chunk addr args dst :: c) rs fr m)
- E0 (State s f sp c (rs#dst <- v) fr m)
+ E0 (State s f sp c ((undef_temps rs)#dst <- v) fr m)
| exec_Mstore:
forall s f sp chunk addr args src c rs fr m m' a,
eval_addressing ge sp addr rs##args = Some a ->
Mem.storev chunk m a (rs src) = Some m' ->
step (State s f sp (Mstore chunk addr args src :: c) rs fr m)
- E0 (State s f sp c rs fr m')
+ E0 (State s f sp c (undef_temps rs) fr m')
| exec_Mcall:
forall s f sp sig ros c rs fr m f',
find_function ros rs = Some f' ->
@@ -271,7 +271,7 @@ Inductive step: state -> trace -> state -> Prop :=
forall s f sp rs fr m ef args res b t v m',
external_call ef ge rs##args m t v m' ->
step (State s f sp (Mbuiltin ef args res :: b) rs fr m)
- t (State s f sp b (rs#res <- v) fr m')
+ t (State s f sp b ((undef_temps rs)#res <- v) fr m')
| exec_Mgoto:
forall s f sp lbl c rs fr m c',
find_label lbl f.(fn_code) = Some c' ->
@@ -282,19 +282,19 @@ Inductive step: state -> trace -> state -> Prop :=
eval_condition cond rs##args = Some true ->
find_label lbl f.(fn_code) = Some c' ->
step (State s f sp (Mcond cond args lbl :: c) rs fr m)
- E0 (State s f sp c' rs fr m)
+ E0 (State s f sp c' (undef_temps rs) fr m)
| exec_Mcond_false:
forall s f sp cond args lbl c rs fr m,
eval_condition cond rs##args = Some false ->
step (State s f sp (Mcond cond args lbl :: c) rs fr m)
- E0 (State s f sp c rs fr m)
+ E0 (State s f sp c (undef_temps rs) fr m)
| exec_Mjumptable:
forall s f sp arg tbl c rs fr m n lbl c',
rs arg = Vint n ->
list_nth_z tbl (Int.signed n) = Some lbl ->
find_label lbl f.(fn_code) = Some c' ->
step (State s f sp (Mjumptable arg tbl :: c) rs fr m)
- E0 (State s f sp c' rs fr m)
+ E0 (State s f sp c' (undef_temps rs) fr m)
| exec_Mreturn:
forall s f stk soff c rs fr m m',
Mem.free m stk 0 f.(fn_stacksize) = Some m' ->
diff --git a/backend/Machabstr2concr.v b/backend/Machabstr2concr.v
index 1a97dda5..fa7f5803 100644
--- a/backend/Machabstr2concr.v
+++ b/backend/Machabstr2concr.v
@@ -618,6 +618,23 @@ Proof.
destruct (RegEq.eq r0 r); auto.
Qed.
+Lemma regset_lessdef_undef_temps:
+ forall rs1 rs2,
+ regset_lessdef rs1 rs2 -> regset_lessdef (undef_temps rs1) (undef_temps rs2).
+Proof.
+ unfold undef_temps.
+ generalize (int_temporaries ++ float_temporaries).
+ induction l; simpl; intros. auto. apply IHl. apply regset_lessdef_set; auto.
+Qed.
+
+Lemma regset_lessdef_undef_op:
+ forall op rs1 rs2,
+ regset_lessdef rs1 rs2 -> regset_lessdef (undef_op op rs1) (undef_op op rs2).
+Proof.
+ intros. set (D := regset_lessdef_undef_temps _ _ H).
+ destruct op; simpl; auto.
+Qed.
+
Lemma regset_lessdef_find_function_ptr:
forall ge ros rs1 rs2 fb,
find_function_ptr ge ros rs1 = Some fb ->
@@ -965,40 +982,44 @@ Proof.
(* Mgetparam *)
assert (WTF: wt_function f) by (inv WTS; auto).
exploit match_stacks_get_parent; eauto. intros [v' [A B]].
- exists (State ts fb (Vptr sp0 base) c (trs#dst <- v') ms); split.
+ exists (State ts fb (Vptr sp0 base) c (trs # IT1 <- Vundef # dst <- v') ms); split.
eapply exec_Mgetparam; eauto.
eapply frame_match_load_link; eauto.
eapply match_stacks_parent_sp_pointer; eauto.
- econstructor; eauto with coqlib. apply regset_lessdef_set; eauto.
+ econstructor; eauto with coqlib.
+ apply regset_lessdef_set; eauto. apply regset_lessdef_set; eauto.
(* Mop *)
exploit eval_operation_lessdef. 2: eauto.
eapply regset_lessdef_list; eauto.
intros [v' [A B]].
- exists (State ts fb (Vptr sp0 base) c (trs#res <- v') ms); split.
+ exists (State ts fb (Vptr sp0 base) c ((undef_op op trs)#res <- v') ms); split.
apply exec_Mop; auto.
econstructor; eauto with coqlib. apply regset_lessdef_set; eauto.
+ apply regset_lessdef_undef_op; auto.
(* Mload *)
exploit eval_addressing_lessdef. 2: eauto. eapply regset_lessdef_list; eauto.
intros [a' [A B]].
exploit Mem.loadv_extends. eauto. eauto. eexact B.
intros [v' [C D]].
- exists (State ts fb (Vptr sp0 base) c (trs#dst <- v') ms); split.
+ exists (State ts fb (Vptr sp0 base) c ((undef_temps trs)#dst <- v') ms); split.
eapply exec_Mload; eauto.
econstructor; eauto with coqlib. apply regset_lessdef_set; eauto.
+ apply regset_lessdef_undef_temps; auto.
(* Mstore *)
exploit eval_addressing_lessdef. 2: eauto. eapply regset_lessdef_list; eauto.
intros [a' [A B]].
exploit Mem.storev_extends. eauto. eauto. eexact B. apply RLD.
intros [ms' [C D]].
- exists (State ts fb (Vptr sp0 base) c trs ms'); split.
+ exists (State ts fb (Vptr sp0 base) c (undef_temps trs) ms'); split.
eapply exec_Mstore; eauto.
destruct a; simpl in H0; try congruence. inv B. simpl in C.
econstructor; eauto with coqlib.
eapply match_stacks_store. eauto. eexact H0. eexact C.
eapply frame_match_store; eauto.
+ apply regset_lessdef_undef_temps; auto.
(* Mcall *)
exploit find_function_find_function_ptr; eauto.
@@ -1032,7 +1053,7 @@ Proof.
econstructor; eauto with coqlib.
eapply match_stacks_external_call; eauto.
eapply frame_match_external_call; eauto.
- apply regset_lessdef_set; eauto.
+ apply regset_lessdef_set; eauto. apply regset_lessdef_undef_temps; auto.
(* Mgoto *)
econstructor; split.
@@ -1043,17 +1064,17 @@ Proof.
econstructor; split.
eapply exec_Mcond_true; eauto.
eapply eval_condition_lessdef; eauto. apply regset_lessdef_list; auto.
- econstructor; eauto.
+ econstructor; eauto. apply regset_lessdef_undef_temps; auto.
econstructor; split.
eapply exec_Mcond_false; eauto.
eapply eval_condition_lessdef; eauto. apply regset_lessdef_list; auto.
- econstructor; eauto.
+ econstructor; eauto. apply regset_lessdef_undef_temps; auto.
(* Mjumptable *)
econstructor; split.
eapply exec_Mjumptable; eauto.
generalize (RLD arg); intro LD. rewrite H in LD. inv LD. auto.
- econstructor; eauto.
+ econstructor; eauto. apply regset_lessdef_undef_temps; auto.
(* Mreturn *)
assert (WTF: wt_function f) by (inv WTS; auto).
diff --git a/backend/Machconcr.v b/backend/Machconcr.v
index b736c8f7..5a98dd95 100644
--- a/backend/Machconcr.v
+++ b/backend/Machconcr.v
@@ -152,24 +152,24 @@ Inductive step: state -> trace -> state -> Prop :=
load_stack m sp Tint f.(fn_link_ofs) = Some parent ->
load_stack m parent ty ofs = Some v ->
step (State s fb sp (Mgetparam ofs ty dst :: c) rs m)
- E0 (State s fb sp c (rs#dst <- v) m)
+ E0 (State s fb sp c (rs # IT1 <- Vundef # dst <- v) m)
| exec_Mop:
forall s f sp op args res c rs m v,
eval_operation ge sp op rs##args = Some v ->
step (State s f sp (Mop op args res :: c) rs m)
- E0 (State s f sp c (rs#res <- v) m)
+ E0 (State s f sp c ((undef_op op rs)#res <- v) m)
| exec_Mload:
forall s f sp chunk addr args dst c rs m a v,
eval_addressing ge sp addr rs##args = Some a ->
Mem.loadv chunk m a = Some v ->
step (State s f sp (Mload chunk addr args dst :: c) rs m)
- E0 (State s f sp c (rs#dst <- v) m)
+ E0 (State s f sp c ((undef_temps rs)#dst <- v) m)
| exec_Mstore:
forall s f sp chunk addr args src c rs m m' a,
eval_addressing ge sp addr rs##args = Some a ->
Mem.storev chunk m a (rs src) = Some m' ->
step (State s f sp (Mstore chunk addr args src :: c) rs m)
- E0 (State s f sp c rs m')
+ E0 (State s f sp c (undef_temps rs) m')
| exec_Mcall:
forall s fb sp sig ros c rs m f f' ra,
find_function_ptr ge ros rs = Some f' ->
@@ -191,7 +191,7 @@ Inductive step: state -> trace -> state -> Prop :=
forall s f sp rs m ef args res b t v m',
external_call ef ge rs##args m t v m' ->
step (State s f sp (Mbuiltin ef args res :: b) rs m)
- t (State s f sp b (rs#res <- v) m')
+ t (State s f sp b ((undef_temps rs)#res <- v) m')
| exec_Mgoto:
forall s fb f sp lbl c rs m c',
Genv.find_funct_ptr ge fb = Some (Internal f) ->
@@ -204,12 +204,12 @@ Inductive step: state -> trace -> state -> Prop :=
Genv.find_funct_ptr ge fb = Some (Internal f) ->
find_label lbl f.(fn_code) = Some c' ->
step (State s fb sp (Mcond cond args lbl :: c) rs m)
- E0 (State s fb sp c' rs m)
+ E0 (State s fb sp c' (undef_temps rs) m)
| exec_Mcond_false:
forall s f sp cond args lbl c rs m,
eval_condition cond rs##args = Some false ->
step (State s f sp (Mcond cond args lbl :: c) rs m)
- E0 (State s f sp c rs m)
+ E0 (State s f sp c (undef_temps rs) m)
| exec_Mjumptable:
forall s fb f sp arg tbl c rs m n lbl c',
rs arg = Vint n ->
@@ -217,7 +217,7 @@ Inductive step: state -> trace -> state -> Prop :=
Genv.find_funct_ptr ge fb = Some (Internal f) ->
find_label lbl f.(fn_code) = Some c' ->
step (State s fb sp (Mjumptable arg tbl :: c) rs m)
- E0 (State s fb sp c' rs m)
+ E0 (State s fb sp c' (undef_temps rs) m)
| exec_Mreturn:
forall s fb stk soff c rs m f m',
Genv.find_funct_ptr ge fb = Some (Internal f) ->
diff --git a/backend/Machtyping.v b/backend/Machtyping.v
index 7013e296..93ac00cd 100644
--- a/backend/Machtyping.v
+++ b/backend/Machtyping.v
@@ -156,6 +156,30 @@ Proof.
apply H0.
Qed.
+Lemma wt_undef_temps:
+ forall rs, wt_regset rs -> wt_regset (undef_temps rs).
+Proof.
+ unfold undef_temps.
+ generalize (int_temporaries ++ float_temporaries).
+ induction l; simpl; intros. auto.
+ apply IHl. red; intros. unfold Regmap.set.
+ destruct (RegEq.eq r a). constructor. auto.
+Qed.
+
+Lemma wt_undef_op:
+ forall op rs, wt_regset rs -> wt_regset (undef_op op rs).
+Proof.
+ intros. set (W := wt_undef_temps rs H).
+ destruct op; simpl; auto.
+Qed.
+
+Lemma wt_undef_getparam:
+ forall rs, wt_regset rs -> wt_regset (rs#IT1 <- Vundef).
+Proof.
+ intros; red; intros. unfold Regmap.set.
+ destruct (RegEq.eq r IT1). constructor. auto.
+Qed.
+
Lemma wt_get_slot:
forall f fr ty ofs v,
get_slot f fr ty ofs v ->
@@ -237,31 +261,41 @@ Lemma subject_reduction:
forall (WTS: wt_state s1), wt_state s2.
Proof.
induction 1; intros; inv WTS;
- try (generalize (wt_function_instrs _ WTF _ (is_tail_in TAIL)); intro;
+ try (generalize (wt_function_instrs _ WTF _ (is_tail_in TAIL)); intro WTI;
eapply wt_state_intro; eauto with coqlib).
- apply wt_setreg; auto.
- inversion H0. rewrite H2. eapply wt_get_slot; eauto.
+ apply wt_setreg; auto. inv WTI. eapply wt_get_slot; eauto.
- inversion H0. eapply wt_set_slot; eauto.
- rewrite <- H2. apply WTRS.
+ eapply wt_set_slot; eauto. inv WTI; auto.
+ assert (mreg_type dst = ty).
+ inv WTI; auto.
assert (wt_frame (parent_frame s)).
destruct s; simpl. apply wt_empty_frame.
generalize (STK s (in_eq _ _)); intro. inv H1. auto.
- inversion H0. apply wt_setreg; auto.
- rewrite H3. eapply wt_get_slot; eauto.
-
- apply wt_setreg; auto. inv H0.
- simpl in H.
- rewrite <- H2. replace v with (rs r1). apply WTRS. congruence.
- replace (mreg_type res) with (snd (type_of_operation op)).
- apply type_of_operation_sound with fundef unit ge rs##args sp; auto.
- rewrite <- H5; reflexivity.
-
- apply wt_setreg; auto. inversion H1. rewrite H7.
- eapply type_of_chunk_correct; eauto.
-
+ apply wt_setreg; auto.
+ rewrite H0. eapply wt_get_slot; eauto.
+ apply wt_undef_getparam; auto.
+
+(* op *)
+ apply wt_setreg; auto.
+ inv WTI.
+ (* move *)
+ simpl in H. inv H. rewrite <- H1. apply WTRS.
+ (* not move *)
+ replace (mreg_type res) with (snd (type_of_operation op)).
+ apply type_of_operation_sound with fundef unit ge rs##args sp; auto.
+ rewrite <- H4; reflexivity.
+ apply wt_undef_op; auto.
+
+(* load *)
+ apply wt_setreg; auto. inv WTI. rewrite H6. eapply type_of_chunk_correct; eauto.
+ apply wt_undef_temps; auto.
+
+(* store *)
+ apply wt_undef_temps; auto.
+
+(* call *)
assert (WTFD: wt_fundef f').
destruct ros; simpl in H.
apply (Genv.find_funct_prop wt_fundef _ _ wt_p H).
@@ -271,6 +305,7 @@ Proof.
intros. elim H0; intro. subst s0. econstructor; eauto with coqlib.
auto.
+(* tailcall *)
assert (WTFD: wt_fundef f').
destruct ros; simpl in H.
apply (Genv.find_funct_prop wt_fundef _ _ wt_p H).
@@ -278,17 +313,27 @@ Proof.
apply (Genv.find_funct_ptr_prop wt_fundef _ _ wt_p H).
econstructor; eauto.
- inv H0. apply wt_setreg; auto. rewrite H5. eapply external_call_well_typed; eauto.
+(* extcall *)
+ apply wt_setreg; auto.
+ inv WTI. rewrite H4. eapply external_call_well_typed; eauto.
+ apply wt_undef_temps; auto.
+(* goto *)
apply is_tail_find_label with lbl; congruence.
- apply is_tail_find_label with lbl; congruence.
- apply is_tail_find_label with lbl; congruence.
+(* cond *)
+ apply is_tail_find_label with lbl; congruence. apply wt_undef_temps; auto.
+ apply wt_undef_temps; auto.
+(* jumptable *)
+ apply is_tail_find_label with lbl; congruence. apply wt_undef_temps; auto.
+(* return *)
econstructor; eauto.
+(* internal function *)
econstructor; eauto with coqlib. inv H5; auto. exact I.
apply wt_empty_frame.
+(* external function *)
econstructor; eauto. apply wt_setreg; auto.
generalize (external_call_well_typed _ _ _ _ _ _ _ H).
unfold proj_sig_res, loc_result.
@@ -296,6 +341,7 @@ Proof.
destruct t0; simpl; auto.
simpl; auto.
+(* returnstate *)
generalize (H1 _ (in_eq _ _)); intro. inv H.
econstructor; eauto.
eauto with coqlib.
diff --git a/backend/RTLgen.v b/backend/RTLgen.v
index b728829f..e9184494 100644
--- a/backend/RTLgen.v
+++ b/backend/RTLgen.v
@@ -363,6 +363,18 @@ Fixpoint alloc_regs (map: mapping) (al: exprlist)
ret (r :: rl)
end.
+(** A variant of [alloc_regs] for two-address instructions:
+ reuse the result register as destination for the first argument. *)
+
+Definition alloc_regs_2addr (map: mapping) (al: exprlist) (rd: reg)
+ : mon (list reg) :=
+ match al with
+ | Enil =>
+ ret nil
+ | Econs a bl =>
+ do rl <- alloc_regs map bl; ret (rd :: rl)
+ end.
+
(** [alloc_optreg] is used for function calls. If a destination is
specified for the call, it is returned. Otherwise, a fresh
register is returned. *)
@@ -395,9 +407,11 @@ Fixpoint transl_expr (map: mapping) (a: expr) (rd: reg) (nd: node)
| Evar v =>
do r <- find_var map v; add_move r rd nd
| Eop op al =>
- do rl <- alloc_regs map al;
+ do rl <- if two_address_op op
+ then alloc_regs_2addr map al rd
+ else alloc_regs map al;
do no <- add_instr (Iop op rl rd nd);
- transl_exprlist map al rl no
+ transl_exprlist map al rl no
| Eload chunk addr al =>
do rl <- alloc_regs map al;
do no <- add_instr (Iload chunk addr rl rd nd);
@@ -502,6 +516,16 @@ Fixpoint transl_switch (r: reg) (nexits: list node) (t: comptree)
add_instr (Iop op (r :: nil) rt n3)
end.
+(** Detect a two-address operator at the top of an expression. *)
+
+Fixpoint expr_is_2addr_op (e: expr) : bool :=
+ match e with
+ | Eop op _ => two_address_op op
+ | Econdition e1 e2 e3 => expr_is_2addr_op e2 || expr_is_2addr_op e3
+ | Elet e1 e2 => expr_is_2addr_op e2
+ | _ => false
+ end.
+
(** Translation of statements. [transl_stmt map s nd nexits nret rret]
enriches the current CFG with the RTL instructions necessary to
execute the CminorSel statement [s], and returns the node of the first
@@ -521,7 +545,12 @@ Fixpoint transl_stmt (map: mapping) (s: stmt) (nd: node)
ret nd
| Sassign v b =>
do r <- find_var map v;
- transl_expr map b r nd
+ if expr_is_2addr_op b then
+ do rd <- new_reg;
+ do n1 <- add_move rd r nd;
+ transl_expr map b rd n1
+ else
+ transl_expr map b r nd
| Sstore chunk addr al b =>
do rl <- alloc_regs map al;
do r <- alloc_reg map b;
diff --git a/backend/RTLgenproof.v b/backend/RTLgenproof.v
index 12a8e2be..b49b671a 100644
--- a/backend/RTLgenproof.v
+++ b/backend/RTLgenproof.v
@@ -587,7 +587,8 @@ Lemma transl_expr_Eop_correct:
eval_operation ge sp op vargs = Some v ->
transl_expr_prop le (Eop op args) v.
Proof.
- intros; red; intros. inv TE.
+ intros; red; intros. inv TE.
+(* normal case *)
exploit H0; eauto. intros [rs1 [EX1 [ME1 [RR1 RO1]]]].
exists (rs1#rd <- v).
(* Exec *)
@@ -1116,12 +1117,24 @@ Proof.
constructor; auto.
(* assign *)
- inv TS.
+ inv TS.
+ (* optimized translation (not 2 addr) *)
exploit transl_expr_correct; eauto.
intros [rs' [A [B [C D]]]].
econstructor; split.
right; split. eauto. Lt_state.
econstructor; eauto. constructor.
+ (* alternate translation (2 addr) *)
+ exploit transl_expr_correct; eauto.
+ intros [rs' [A [B [C D]]]].
+ exploit tr_move_correct; eauto.
+ intros [rs'' [P [Q R]]].
+ econstructor; split.
+ right; split. eapply star_trans. eexact A. eexact P. traceEq. Lt_state.
+ econstructor; eauto. constructor.
+ simpl in B. apply match_env_invariant with (rs'#r <- v).
+ apply match_env_update_var; auto.
+ intros. rewrite Regmap.gsspec. destruct (peq r0 r). congruence. auto.
(* store *)
inv TS.
diff --git a/backend/RTLgenspec.v b/backend/RTLgenspec.v
index 22a1e79d..44e7c418 100644
--- a/backend/RTLgenspec.v
+++ b/backend/RTLgenspec.v
@@ -504,6 +504,32 @@ Proof.
right; eauto with rtlg.
Qed.
+Lemma alloc_regs_2addr_valid:
+ forall al rd s1 s2 map rl i,
+ map_valid map s1 ->
+ reg_valid rd s1 ->
+ alloc_regs_2addr map al rd s1 = OK rl s2 i ->
+ regs_valid rl s2.
+Proof.
+ unfold alloc_regs_2addr; intros.
+ destruct al; monadInv H1.
+ apply regs_valid_nil.
+ apply regs_valid_cons. eauto with rtlg. eauto with rtlg.
+Qed.
+Hint Resolve alloc_regs_2addr_valid: rtlg.
+
+Lemma alloc_regs_2addr_fresh_or_in_map:
+ forall map al rd s rl s' i,
+ map_valid map s ->
+ alloc_regs_2addr map al rd s = OK rl s' i ->
+ forall r, In r rl -> r = rd \/ reg_in_map map r \/ reg_fresh r s.
+Proof.
+ unfold alloc_regs_2addr; intros.
+ destruct al; monadInv H0.
+ elim H1.
+ simpl in H1; destruct H1. auto. right. eapply alloc_regs_fresh_or_in_map; eauto.
+Qed.
+
(** A register is an adequate target for holding the value of an
expression if
- either the register is associated with a Cminor let-bound variable
@@ -606,7 +632,24 @@ Proof.
apply regs_valid_cons; eauto with rtlg.
Qed.
-Hint Resolve new_reg_target_ok alloc_reg_target_ok alloc_regs_target_ok: rtlg.
+Lemma alloc_regs_2addr_target_ok:
+ forall map al rd pr s1 rl s2 i,
+ map_valid map s1 ->
+ regs_valid pr s1 ->
+ reg_valid rd s1 ->
+ ~(reg_in_map map rd) -> ~In rd pr ->
+ alloc_regs_2addr map al rd s1 = OK rl s2 i ->
+ target_regs_ok map pr al rl.
+Proof.
+ unfold alloc_regs_2addr; intros. destruct al; monadInv H4.
+ constructor.
+ constructor. constructor; auto.
+ eapply alloc_regs_target_ok; eauto.
+ apply regs_valid_cons; auto.
+Qed.
+
+Hint Resolve new_reg_target_ok alloc_reg_target_ok
+ alloc_regs_target_ok alloc_regs_2addr_target_ok: rtlg.
(** The following predicate is a variant of [target_reg_ok] used
to characterize registers that are adequate for holding the return
@@ -804,9 +847,14 @@ Inductive tr_stmt (c: code) (map: mapping):
| tr_Sskip: forall ns nexits ngoto nret rret,
tr_stmt c map Sskip ns ns nexits ngoto nret rret
| tr_Sassign: forall id a ns nd nexits ngoto nret rret r,
- map.(map_vars)!id = Some r ->
+ map.(map_vars)!id = Some r -> expr_is_2addr_op a = false ->
tr_expr c map nil a ns nd r (Some id) ->
tr_stmt c map (Sassign id a) ns nd nexits ngoto nret rret
+ | tr_Sassign_2: forall id a ns n1 nd nexits ngoto nret rret rd r,
+ map.(map_vars)!id = Some r ->
+ tr_expr c map nil a ns n1 rd None ->
+ tr_move c n1 rd nd r ->
+ tr_stmt c map (Sassign id a) ns nd nexits ngoto nret rret
| tr_Sstore: forall chunk addr al b ns nd nexits ngoto nret rret rd n1 rl n2,
tr_exprlist c map nil al ns n1 rl ->
tr_expr c map rl b n1 n2 rd None ->
@@ -970,7 +1018,9 @@ Proof.
inv OK. left; split; congruence. right; eauto with rtlg.
eapply add_move_charact; eauto.
(* Eop *)
- inv OK.
+ inv OK. destruct (two_address_op o).
+ econstructor; eauto with rtlg.
+ eapply transl_exprlist_charact; eauto with rtlg.
econstructor; eauto with rtlg.
eapply transl_exprlist_charact; eauto with rtlg.
(* Eload *)
@@ -1047,21 +1097,25 @@ Lemma transl_expr_assign_charact:
forall id a map rd nd s ns s' INCR
(TR: transl_expr map a rd nd s = OK ns s' INCR)
(WF: map_valid map s)
- (OK: reg_map_ok map rd (Some id)),
+ (OK: reg_map_ok map rd (Some id))
+ (NOT2ADDR: expr_is_2addr_op a = false),
tr_expr s'.(st_code) map nil a ns nd rd (Some id).
Proof.
+Opaque two_address_op.
induction a; intros; monadInv TR; saturateTrans.
(* Evar *)
generalize EQ; unfold find_var. caseEq (map_vars map)!i; intros; inv EQ1.
econstructor; eauto.
eapply add_move_charact; eauto.
(* Eop *)
- econstructor; eauto with rtlg.
+ simpl in NOT2ADDR. rewrite NOT2ADDR in EQ.
+ econstructor; eauto with rtlg.
eapply transl_exprlist_charact; eauto with rtlg.
(* Eload *)
econstructor; eauto with rtlg.
eapply transl_exprlist_charact; eauto with rtlg.
(* Econdition *)
+ simpl in NOT2ADDR. destruct (orb_false_elim _ _ NOT2ADDR).
econstructor; eauto with rtlg.
eapply transl_condexpr_charact; eauto with rtlg.
apply tr_expr_incr with s1; auto.
@@ -1069,6 +1123,7 @@ Proof.
apply tr_expr_incr with s0; auto.
eapply IHa2; eauto 2 with rtlg.
(* Elet *)
+ simpl in NOT2ADDR.
econstructor. eapply new_reg_not_in_map; eauto with rtlg.
eapply transl_expr_charact; eauto 3 with rtlg.
apply tr_expr_incr with s1; auto.
@@ -1112,8 +1167,9 @@ Proof.
intros s1 s2 EXT.
generalize tr_expr_incr tr_condition_incr tr_exprlist_incr; intros I1 I2 I3.
pose (AT := fun pc i => instr_at_incr s1 s2 pc i EXT).
- induction 1; econstructor; eauto.
- eapply tr_switch_incr; eauto.
+ induction 1; try (econstructor; eauto; fail).
+ eapply tr_Sassign_2; eauto. eapply tr_move_incr; eauto.
+ econstructor; eauto. eapply tr_switch_incr; eauto.
Qed.
Lemma transl_exit_charact:
@@ -1181,7 +1237,11 @@ Proof.
constructor.
(* Sassign *)
revert EQ. unfold find_var. case_eq (map_vars map)!i; intros; monadInv EQ.
- econstructor. eauto.
+ remember (expr_is_2addr_op e) as is2a. destruct is2a.
+ monadInv EQ0. eapply tr_Sassign_2; eauto.
+ eapply transl_expr_charact; eauto with rtlg.
+ apply tr_move_incr with s1; auto. eapply add_move_charact; eauto.
+ eapply tr_Sassign; eauto.
eapply transl_expr_assign_charact; eauto with rtlg.
constructor. auto.
(* Sstore *)
diff --git a/backend/RTLtypingaux.ml b/backend/RTLtypingaux.ml
index 657c4daa..7549ff49 100644
--- a/backend/RTLtypingaux.ml
+++ b/backend/RTLtypingaux.ml
@@ -45,6 +45,8 @@ let type_instr retty (Coq_pair(pc, i)) =
| Iop(Omove, _, _, _) ->
()
| Iop(op, args, res, _) ->
+ if two_address_op op && List.length args >= 1 && List.hd args <> res
+ then raise (Type_error "two-address constraint violation");
let (Coq_pair(targs, tres)) = type_of_operation op in
set_types args targs; set_type res tres
| Iload(chunk, addr, args, dst, _) ->
diff --git a/backend/Reloadproof.v b/backend/Reloadproof.v
index 286a266d..a3ed3037 100644
--- a/backend/Reloadproof.v
+++ b/backend/Reloadproof.v
@@ -98,19 +98,10 @@ Lemma enough_temporaries_op_args:
Proof.
intros. apply arity_ok_enough.
replace (map Loc.type args) with (fst (type_of_operation op)).
- destruct op; try (destruct c); compute; reflexivity.
+ destruct op; try (destruct c); try (destruct a); compute; reflexivity.
rewrite <- H. auto.
Qed.
-Lemma enough_temporaries_cond:
- forall (cond: condition) (args: list loc),
- List.map Loc.type args = type_of_condition cond ->
- enough_temporaries args = true.
-Proof.
- intros. apply arity_ok_enough. rewrite H.
- destruct cond; compute; reflexivity.
-Qed.
-
Lemma enough_temporaries_addr:
forall (addr: addressing) (args: list loc),
List.map Loc.type args = type_of_addressing addr ->
@@ -120,6 +111,15 @@ Proof.
destruct addr; compute; reflexivity.
Qed.
+Lemma enough_temporaries_cond:
+ forall (cond: condition) (args: list loc),
+ List.map Loc.type args = type_of_condition cond ->
+ enough_temporaries args = true.
+Proof.
+ intros. apply arity_ok_enough. rewrite H.
+ destruct cond; compute; reflexivity.
+Qed.
+
Lemma arity_ok_rec_length:
forall tys itmps ftmps,
(length tys <= length itmps)%nat ->
@@ -225,7 +225,10 @@ Lemma add_reload_correct:
star step ge (State stk f sp (add_reload src dst k) rs m)
E0 (State stk f sp k rs' m) /\
rs' (R dst) = rs src /\
- forall l, Loc.diff (R dst) l -> rs' l = rs l.
+ forall l,
+ Loc.diff (R dst) l ->
+ loc_acceptable src \/ Loc.diff (R IT1) l ->
+ rs' l = rs l.
Proof.
intros. unfold add_reload. destruct src.
case (mreg_eq m0 dst); intro.
@@ -234,29 +237,40 @@ Proof.
split. apply star_one; apply exec_Lop. reflexivity.
split. apply Locmap.gss.
intros; apply Locmap.gso; auto.
- exists (Locmap.set (R dst) (rs (S s)) rs).
+ exists (Locmap.set (R dst) (rs (S s)) (undef_getstack s rs)).
split. apply star_one; apply exec_Lgetstack.
- split. apply Locmap.gss.
- intros; apply Locmap.gso; auto.
+ split. apply Locmap.gss.
+ intros. rewrite Locmap.gso; auto.
+ destruct s; unfold undef_getstack; unfold loc_acceptable in H0; auto.
+ apply Locmap.gso. tauto.
Qed.
Lemma add_reload_correct_2:
forall src k rs m,
+ loc_acceptable src ->
exists rs',
star step ge (State stk f sp (add_reload src (reg_for src) k) rs m)
E0 (State stk f sp k rs' m) /\
rs' (R (reg_for src)) = rs src /\
- (forall l, Loc.diff (R (reg_for src)) l -> rs' l = rs l) /\
- (forall l, Loc.notin l temporaries -> rs' l = rs l).
+ (forall l, Loc.notin l temporaries -> rs' l = rs l) /\
+ rs' (R IT2) = rs (R IT2).
Proof.
- intros. destruct (reg_for_spec src).
- set (rf := reg_for src) in *.
- unfold add_reload. rewrite <- H. rewrite dec_eq_true.
- exists rs. split. constructor. auto.
- destruct (add_reload_correct src (reg_for src) k rs m)
- as [rs' [A [B C]]].
- exists rs'; intuition.
- apply C. apply Loc.diff_sym. eapply Loc.in_notin_diff; eauto.
+ intros. unfold reg_for, add_reload; destruct src.
+ rewrite dec_eq_true. exists rs; split. constructor. auto.
+ set (t := match slot_type s with
+ | Tint => IT1
+ | Tfloat => FT1
+ end).
+ exists (Locmap.set (R t) (rs (S s)) (undef_getstack s rs)).
+ split. apply star_one; apply exec_Lgetstack.
+ split. apply Locmap.gss.
+ split. intros. rewrite Locmap.gso; auto.
+ destruct s; unfold undef_getstack; unfold loc_acceptable in H; auto.
+ apply Locmap.gso. tauto.
+ apply Loc.diff_sym. simpl in H0; unfold t; destruct (slot_type s); tauto.
+ rewrite Locmap.gso. unfold undef_getstack. destruct s; auto.
+ apply Locmap.gso. red; congruence.
+ unfold t; destruct (slot_type s); red; congruence.
Qed.
Lemma add_spill_correct:
@@ -282,6 +296,7 @@ Qed.
Lemma add_reloads_correct_rec:
forall srcs itmps ftmps k rs m,
+ locs_acceptable srcs ->
enough_temporaries_rec srcs itmps ftmps = true ->
(forall r, In (R r) srcs -> In r itmps -> False) ->
(forall r, In (R r) srcs -> In r ftmps -> False) ->
@@ -300,6 +315,8 @@ Proof.
(* base case *)
exists rs. split. apply star_refl. tauto.
(* inductive case *)
+ assert (ACC1: loc_acceptable a) by (auto with coqlib).
+ assert (ACC2: locs_acceptable srcs) by (red; auto with coqlib).
destruct a as [r | s].
(* a is a register *)
simpl add_reload. rewrite dec_eq_true.
@@ -311,41 +328,42 @@ Proof.
(* a is a stack slot *)
destruct (slot_type s).
(* ... of integer type *)
- destruct itmps as [ | it1 itmps ]. discriminate. inv H3.
+ destruct itmps as [ | it1 itmps ]. discriminate. inv H4.
destruct (add_reload_correct (S s) it1 (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m)
as [rs1 [A [B C]]].
exploit IHsrcs; eauto.
- intros. apply H0 with r. tauto. simpl. tauto. eapply list_disjoint_cons_left; eauto.
+ intros. apply H1 with r. tauto. simpl. tauto. eapply list_disjoint_cons_left; eauto.
intros [rs' [P [Q [T U]]]].
exists rs'. split. eapply star_trans; eauto.
split. simpl. decEq. rewrite <- B. apply T. auto.
eapply list_disjoint_notin; eauto with coqlib.
rewrite Q. apply list_map_exten. intros. symmetry. apply C.
- simpl. destruct x; auto. red; intro; subst m0. apply H0 with it1; auto with coqlib.
+ simpl. destruct x; auto. red; intro; subst m0. apply H1 with it1; auto with coqlib.
+ auto.
split. simpl. intros. transitivity (rs1 (R r)).
- apply T; tauto. apply C. simpl. tauto.
- intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto.
+ apply T; tauto. apply C. simpl. tauto. auto.
+ intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto. auto.
(* ... of float type *)
- destruct ftmps as [ | ft1 ftmps ]. discriminate. inv H4.
+ destruct ftmps as [ | ft1 ftmps ]. discriminate. inv H5.
destruct (add_reload_correct (S s) ft1 (add_reloads srcs (regs_for_rec srcs itmps ftmps) k) rs m)
as [rs1 [A [B C]]].
exploit IHsrcs; eauto.
- intros. apply H1 with r. tauto. simpl. tauto. eapply list_disjoint_cons_right; eauto.
+ intros. apply H2 with r. tauto. simpl. tauto. eapply list_disjoint_cons_right; eauto.
intros [rs' [P [Q [T U]]]].
exists rs'. split. eapply star_trans; eauto.
split. simpl. decEq. rewrite <- B. apply T; auto.
eapply list_disjoint_notin; eauto. apply list_disjoint_sym. eauto. auto with coqlib.
rewrite Q. apply list_map_exten. intros. symmetry. apply C.
- simpl. destruct x; auto. red; intro; subst m0. apply H1 with ft1; auto with coqlib.
+ simpl. destruct x; auto. red; intro; subst m0. apply H2 with ft1; auto with coqlib. auto.
split. simpl. intros. transitivity (rs1 (R r)).
- apply T; tauto. apply C. simpl. tauto.
- intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto.
+ apply T; tauto. apply C. simpl. tauto. auto.
+ intros. transitivity (rs1 (S s0)). auto. apply C. simpl. auto. auto.
Qed.
Lemma add_reloads_correct:
forall srcs k rs m,
enough_temporaries srcs = true ->
- Loc.disjoint srcs temporaries ->
+ locs_acceptable srcs ->
exists rs',
star step ge (State stk f sp (add_reloads srcs (regs_for srcs) k) rs m)
E0 (State stk f sp k rs' m) /\
@@ -355,10 +373,10 @@ Proof.
intros.
unfold enough_temporaries in H.
exploit add_reloads_correct_rec; eauto.
- intros. exploit (H0 (R r) (R r)); auto.
- simpl in H2. simpl. intuition congruence.
- intros. exploit (H0 (R r) (R r)); auto.
- simpl in H2. simpl. intuition congruence.
+ intros. generalize (H0 _ H1). unfold loc_acceptable. generalize H2.
+ simpl. intuition congruence.
+ intros. generalize (H0 _ H1). unfold loc_acceptable. generalize H2.
+ simpl. intuition congruence.
red; intros r1 r2; simpl. intuition congruence.
unfold int_temporaries. NoRepet.
unfold float_temporaries. NoRepet.
@@ -389,7 +407,7 @@ Proof.
destruct dst.
(* src is a stack slot, dst a register *)
generalize (add_reload_correct (S s) m0 k rs m); intros [rs' [EX [RES OTH]]].
- exists rs'; intuition. apply OTH; apply Loc.diff_sym; auto.
+ exists rs'; intuition. apply OTH. apply Loc.diff_sym; auto. right; apply Loc.diff_sym; auto.
(* src and dst are stack slots *)
set (tmp := match slot_type s with Tint => IT1 | Tfloat => FT1 end).
generalize (add_reload_correct (S s) tmp (add_spill tmp (S s0) k) rs m);
@@ -401,6 +419,7 @@ Proof.
split. congruence.
intros. rewrite OTH2. apply OTH1.
apply Loc.diff_sym. unfold tmp; case (slot_type s); auto.
+ right. apply Loc.diff_sym; auto.
apply Loc.diff_sym; auto.
Qed.
@@ -535,6 +554,37 @@ Proof.
apply temporaries_not_acceptable; auto.
Qed.
+Remark undef_temps_others:
+ forall rs l,
+ Loc.notin l temporaries -> LTL.undef_temps rs l = rs l.
+Proof.
+ intros. apply Locmap.guo; auto.
+Qed.
+
+Remark undef_op_others:
+ forall op rs l,
+ Loc.notin l temporaries -> undef_op op rs l = rs l.
+Proof.
+ intros. generalize (undef_temps_others rs l H); intro.
+ destruct op; simpl; auto.
+Qed.
+
+Lemma agree_undef_temps:
+ forall rs1 rs2,
+ agree rs1 rs2 -> agree (LTL.undef_temps rs1) rs2.
+Proof.
+ intros; red; intros. rewrite undef_temps_others; auto.
+ apply Conventions.temporaries_not_acceptable. auto.
+Qed.
+
+Lemma agree_undef_temps2:
+ forall rs1 rs2,
+ agree rs1 rs2 -> agree (LTL.undef_temps rs1) (LTL.undef_temps rs2).
+Proof.
+ intros. apply agree_exten with rs2. apply agree_undef_temps; auto.
+ intros. apply undef_temps_others; auto.
+Qed.
+
Lemma agree_set:
forall rs1 rs2 rs2' l v,
loc_acceptable l ->
@@ -550,6 +600,16 @@ Proof.
apply temporaries_not_acceptable; auto.
Qed.
+Lemma agree_set2:
+ forall rs1 rs2 rs2' l v,
+ loc_acceptable l ->
+ Val.lessdef v (rs2' l) ->
+ (forall l', Loc.diff l l' -> Loc.notin l' temporaries -> rs2' l' = rs2 l') ->
+ agree rs1 rs2 -> agree (Locmap.set l v (LTL.undef_temps rs1)) rs2'.
+Proof.
+ intros. eapply agree_set; eauto. apply agree_undef_temps; auto.
+Qed.
+
Lemma agree_find_funct:
forall (ge: Linear.genv) rs ls r f,
Genv.find_funct ge (rs r) = Some f ->
@@ -938,11 +998,11 @@ Proof.
inv A.
right. split. omega. split. auto.
rewrite H1. rewrite H1. econstructor; eauto with coqlib.
- apply agree_set with ls2; auto.
+ apply agree_set2 with ls2; auto.
rewrite B. simpl in H; inversion H. auto.
left; econstructor; split. eapply plus_left; eauto.
econstructor; eauto with coqlib.
- apply agree_set with ls; auto.
+ apply agree_set2 with ls; auto.
rewrite B. simpl in H; inversion H. auto.
intros. apply C. apply Loc.diff_sym; auto.
simpl in H7; tauto. simpl in H7; tauto.
@@ -953,8 +1013,7 @@ Proof.
auto.
rewrite H0.
exploit add_reloads_correct.
- eapply enough_temporaries_op_args; eauto.
- apply locs_acceptable_disj_temporaries; auto.
+ eapply enough_temporaries_op_args; eauto. auto.
intros [ls2 [A [B C]]]. instantiate (1 := ls) in B.
assert (exists tv, eval_operation tge sp op (reglist ls2 (regs_for args)) = Some tv
/\ Val.lessdef v tv).
@@ -969,16 +1028,15 @@ Proof.
eapply plus_left. eapply exec_Lop with (v := tv); eauto.
eexact D. eauto. traceEq.
econstructor; eauto with coqlib.
- apply agree_set with ls; auto.
+ apply agree_set2 with ls; auto.
rewrite E. rewrite Locmap.gss. auto.
- intros. rewrite F; auto. rewrite Locmap.gso. auto.
+ intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_op_others; auto.
apply reg_for_diff; auto.
(* Lload *)
ExploitWT; inv WTI.
exploit add_reloads_correct.
- eapply enough_temporaries_addr; eauto.
- apply locs_acceptable_disj_temporaries; auto.
+ eapply enough_temporaries_addr; eauto. auto.
intros [ls2 [A [B C]]].
assert (exists ta, eval_addressing tge sp addr (reglist ls2 (regs_for args)) = Some ta
/\ Val.lessdef a ta).
@@ -994,9 +1052,9 @@ Proof.
eapply plus_left. eapply exec_Lload; eauto.
eauto. auto. traceEq.
econstructor; eauto with coqlib.
- apply agree_set with ls; auto.
+ apply agree_set2 with ls; auto.
rewrite E. rewrite Locmap.gss. auto.
- intros. rewrite F; auto. rewrite Locmap.gso. auto.
+ intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_temps_others; auto.
apply reg_for_diff; auto.
(* Lstore *)
@@ -1004,8 +1062,7 @@ Proof.
caseEq (enough_temporaries (src :: args)); intro ENOUGH.
destruct (regs_for_cons src args) as [rsrc [rargs EQ]]. rewrite EQ.
exploit add_reloads_correct.
- eauto. apply locs_acceptable_disj_temporaries; auto.
- red; intros. elim H1; intro; auto. subst l; auto.
+ eauto. red; simpl; intros. destruct H1. congruence. auto.
intros [ls2 [A [B C]]]. rewrite EQ in A. rewrite EQ in B.
injection B. intros D E.
simpl in B.
@@ -1024,7 +1081,7 @@ Proof.
eapply exec_Lstore with (a := ta); eauto.
traceEq.
econstructor; eauto with coqlib.
- apply agree_exten with ls; auto.
+ apply agree_undef_temps2. apply agree_exten with ls; auto.
(* not enough temporaries *)
destruct (add_reloads_correct tge s' (transf_function f) sp args
(Lop (op_for_binary_addressing addr) (regs_for args) IT2
@@ -1032,25 +1089,24 @@ Proof.
(Lstore chunk (Aindexed Int.zero) (IT2 :: nil) (reg_for src)
:: transf_code f b)) ls tm)
as [ls2 [A [B C]]].
- eapply enough_temporaries_addr; eauto.
- apply locs_acceptable_disj_temporaries; auto.
+ eapply enough_temporaries_addr; eauto. auto.
assert (exists ta, eval_addressing tge sp addr (reglist ls2 (regs_for args)) = Some ta
/\ Val.lessdef a ta).
apply eval_addressing_lessdef with (map rs args).
rewrite B. eapply agree_locs; eauto.
rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
destruct H1 as [ta [P Q]].
- set (ls3 := Locmap.set (R IT2) ta ls2).
+ set (ls3 := Locmap.set (R IT2) ta (undef_op (op_for_binary_addressing addr) ls2)).
destruct (add_reload_correct_2 tge s' (transf_function f) sp src
(Lstore chunk (Aindexed Int.zero) (IT2 :: nil) (reg_for src)
:: transf_code f b)
- ls3 tm)
+ ls3 tm H8)
as [ls4 [D [E [F G]]]].
+ assert (NT: Loc.notin src temporaries) by (apply temporaries_not_acceptable; auto).
assert (X: Val.lessdef (rs src) (ls4 (R (reg_for src)))).
- rewrite E. unfold ls3. rewrite Locmap.gso. eapply agree_loc; eauto.
- eapply agree_exten; eauto.
- apply Loc.diff_sym. apply loc_acceptable_noteq_diff. auto.
- red; intros; subst src. simpl in H8. intuition congruence.
+ rewrite E. unfold ls3. rewrite Locmap.gso.
+ rewrite undef_op_others; auto. rewrite C; auto.
+ apply Loc.diff_sym. simpl in NT; tauto.
exploit Mem.storev_extends. eexact MMD. eauto. eexact Q. eexact X.
intros [tm2 [Y Z]].
left; econstructor; split.
@@ -1060,14 +1116,14 @@ Proof.
rewrite <- B; auto.
eapply star_right. eauto.
eapply exec_Lstore with (a := ta); eauto.
- simpl reglist. rewrite F. unfold ls3. rewrite Locmap.gss. simpl.
+ simpl reglist. rewrite G. unfold ls3. rewrite Locmap.gss. simpl.
destruct ta; simpl in Y; try discriminate. rewrite Int.add_zero. auto.
- simpl. apply reg_for_not_IT2; auto.
reflexivity. reflexivity. traceEq.
econstructor; eauto with coqlib.
- apply agree_exten with ls; auto.
- intros. rewrite G; auto. unfold ls3. rewrite Locmap.gso. auto.
- simpl. destruct l; auto. simpl in H1. intuition congruence.
+ apply agree_undef_temps2. apply agree_exten with ls; auto.
+ intros. rewrite F; auto. unfold ls3. rewrite Locmap.gso.
+ rewrite undef_op_others; auto.
+ apply Loc.diff_sym. simpl in H1; tauto.
(* Lcall *)
ExploitWT. inversion WTI. subst ros0 args0 res0. rewrite <- H0.
@@ -1086,13 +1142,13 @@ Proof.
destruct (add_reload_correct_2 tge s' (transf_function f) sp fn
(Lcall sig (inl ident (reg_for fn))
:: add_spill (loc_result sig) res (transf_code f b))
- ls2 tm)
+ ls2 tm OK2)
as [ls3 [D [E [F G]]]].
assert (ARGS: Val.lessdef_list (map rs args)
(map ls3 (loc_arguments sig))).
replace (map ls3 (loc_arguments sig)) with (map ls2 (loc_arguments sig)).
rewrite B. apply agree_locs; auto.
- apply list_map_exten; intros. apply G.
+ apply list_map_exten; intros. apply F.
apply Loc.disjoint_notin with (loc_arguments sig).
apply loc_arguments_not_temporaries. auto.
left; econstructor; split.
@@ -1108,7 +1164,7 @@ Proof.
econstructor; eauto with coqlib.
rewrite H0. auto.
apply agree_postcall_call with ls sig; auto.
- intros. rewrite G; auto. congruence.
+ intros. rewrite F; auto. congruence.
(* direct call *)
rewrite <- H0 in H4.
destruct (parallel_move_arguments_correct tge s' (transf_function f) sp
@@ -1158,6 +1214,7 @@ Proof.
apply list_map_exten; intros. apply F.
apply Loc.diff_sym.
apply (loc_arguments_not_temporaries sig x (R IT1)); simpl; auto.
+ auto.
left; econstructor; split.
eapply star_plus_trans. eexact A. eapply plus_right. eexact D.
eapply exec_Ltailcall; eauto.
@@ -1197,8 +1254,7 @@ Proof.
(* Lbuiltin *)
ExploitWT; inv WTI.
exploit add_reloads_correct.
- instantiate (1 := args). apply arity_ok_enough. rewrite H3. auto.
- apply locs_acceptable_disj_temporaries; auto.
+ instantiate (1 := args). apply arity_ok_enough. rewrite H3. auto. auto.
intros [ls2 [A [B C]]].
exploit external_call_mem_extends; eauto.
apply agree_locs; eauto.
@@ -1214,7 +1270,7 @@ Proof.
econstructor; eauto with coqlib.
apply agree_set with ls; auto.
rewrite E. rewrite Locmap.gss. auto.
- intros. rewrite F; auto. rewrite Locmap.gso. auto.
+ intros. rewrite F; auto. rewrite Locmap.gso. rewrite undef_temps_others; auto.
apply reg_for_diff; auto.
(* Llabel *)
@@ -1231,8 +1287,7 @@ Proof.
(* Lcond true *)
ExploitWT; inv WTI.
exploit add_reloads_correct.
- eapply enough_temporaries_cond; eauto.
- apply locs_acceptable_disj_temporaries; auto.
+ eapply enough_temporaries_cond; eauto. auto.
intros [ls2 [A [B C]]].
left; econstructor; split.
eapply plus_right. eauto. eapply exec_Lcond_true; eauto.
@@ -1241,14 +1296,13 @@ Proof.
apply find_label_transf_function; eauto.
traceEq.
econstructor; eauto.
- apply agree_exten with ls; auto.
+ apply agree_undef_temps2. apply agree_exten with ls; auto.
eapply LTLin.find_label_is_tail; eauto.
(* Lcond false *)
ExploitWT; inv WTI.
exploit add_reloads_correct.
- eapply enough_temporaries_cond; eauto.
- apply locs_acceptable_disj_temporaries; auto.
+ eapply enough_temporaries_cond; eauto. auto.
intros [ls2 [A [B C]]].
left; econstructor; split.
eapply plus_right. eauto. eapply exec_Lcond_false; eauto.
@@ -1256,11 +1310,11 @@ Proof.
eapply agree_locs; eauto.
traceEq.
econstructor; eauto with coqlib.
- apply agree_exten with ls; auto.
+ apply agree_undef_temps2. apply agree_exten with ls; auto.
(* Ljumptable *)
ExploitWT; inv WTI.
- exploit add_reload_correct_2.
+ exploit add_reload_correct_2; eauto.
intros [ls2 [A [B [C D]]]].
left; econstructor; split.
eapply plus_right. eauto. eapply exec_Ljumptable; eauto.
@@ -1269,7 +1323,7 @@ Proof.
apply find_label_transf_function; eauto.
traceEq.
econstructor; eauto with coqlib.
- apply agree_exten with ls; auto.
+ apply agree_undef_temps2. apply agree_exten with ls; auto.
eapply LTLin.find_label_is_tail; eauto.
(* Lreturn *)
diff --git a/backend/Reloadtyping.v b/backend/Reloadtyping.v
index 1bb462dc..60be59b9 100644
--- a/backend/Reloadtyping.v
+++ b/backend/Reloadtyping.v
@@ -197,7 +197,8 @@ Proof.
destruct a.
simpl. decEq. eauto.
caseEq (slot_type s); intro SLOTTYPE; rewrite SLOTTYPE in H.
- destruct itmps. discriminate. simpl. decEq.
+ destruct itmps. discriminate.
+ simpl. decEq.
rewrite SLOTTYPE. auto with coqlib.
apply IHlocs; auto with coqlib.
destruct ftmps. discriminate. simpl. decEq.
diff --git a/backend/Selection.v b/backend/Selection.v
index ebdad8a2..68fb9ba1 100644
--- a/backend/Selection.v
+++ b/backend/Selection.v
@@ -101,8 +101,8 @@ Definition sel_constant (cst: Cminor.constant) : expr :=
match cst with
| Cminor.Ointconst n => Eop (Ointconst n) Enil
| Cminor.Ofloatconst f => Eop (Ofloatconst f) Enil
- | Cminor.Oaddrsymbol id ofs => Eop (Oaddrsymbol id ofs) Enil
- | Cminor.Oaddrstack ofs => Eop (Oaddrstack ofs) Enil
+ | Cminor.Oaddrsymbol id ofs => addrsymbol id ofs
+ | Cminor.Oaddrstack ofs => addrstack ofs
end.
Definition sel_unop (op: Cminor.unary_operation) (arg: expr) : expr :=
diff --git a/backend/Stacking.v b/backend/Stacking.v
index f289793e..2ea08beb 100644
--- a/backend/Stacking.v
+++ b/backend/Stacking.v
@@ -127,16 +127,10 @@ Definition restore_callee_save (fe: frame_env) (k: Mach.code) :=
behaviour. *)
Definition transl_op (fe: frame_env) (op: operation) :=
- match op with
- | Oaddrstack ofs => Oaddrstack (Int.add (Int.repr fe.(fe_size)) ofs)
- | _ => op
- end.
+ shift_stack_operation (Int.repr fe.(fe_size)) op.
Definition transl_addr (fe: frame_env) (addr: addressing) :=
- match addr with
- | Ainstack ofs => Ainstack (Int.add (Int.repr fe.(fe_size)) ofs)
- | _ => addr
- end.
+ shift_stack_addressing (Int.repr fe.(fe_size)) addr.
(** Translation of a Linear instruction. Prepends the corresponding
Mach instructions to the given list of instructions.
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index 68d179a6..5b06c71a 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -32,6 +32,7 @@ Require Import Events.
Require Import Globalenvs.
Require Import Smallstep.
Require Import Locations.
+Require LTL.
Require Import Linear.
Require Import Lineartyping.
Require Import Mach.
@@ -575,6 +576,58 @@ Proof.
intros. rewrite get_set_index_val_other; eauto with stacking. red; auto.
Qed.
+Lemma agree_undef_regs:
+ forall rl ls ls0 rs fr cs,
+ agree ls ls0 rs fr cs ->
+ (forall r, In r rl -> In (R r) temporaries) ->
+ agree (Locmap.undef (List.map R rl) ls) ls0 (undef_regs rl rs) fr cs.
+Proof.
+ induction rl; intros; simpl.
+ auto.
+ eapply IHrl; eauto.
+ apply agree_set_reg; auto with coqlib.
+ assert (In (R a) temporaries) by auto with coqlib.
+ red. destruct (mreg_type a).
+ destruct (zlt (index_int_callee_save a) 0).
+ generalize (bound_int_callee_save_pos b). omega.
+ elim (int_callee_save_not_destroyed a). auto. apply index_int_callee_save_pos2; auto.
+ destruct (zlt (index_float_callee_save a) 0).
+ generalize (bound_float_callee_save_pos b). omega.
+ elim (float_callee_save_not_destroyed a). auto. apply index_float_callee_save_pos2; auto.
+ intros. apply H0. auto with coqlib.
+Qed.
+
+Lemma agree_undef_temps:
+ forall ls ls0 rs fr cs,
+ agree ls ls0 rs fr cs ->
+ agree (LTL.undef_temps ls) ls0 (Mach.undef_temps rs) fr cs.
+Proof.
+ intros. unfold undef_temps, LTL.undef_temps.
+ change temporaries with (List.map R (int_temporaries ++ float_temporaries)).
+ apply agree_undef_regs; auto.
+ intros.
+ change temporaries with (List.map R (int_temporaries ++ float_temporaries)).
+ apply List.in_map. auto.
+Qed.
+
+Lemma agree_undef_op:
+ forall op env ls ls0 rs fr cs,
+ agree ls ls0 rs fr cs ->
+ agree (Linear.undef_op op ls) ls0 (Mach.undef_op (transl_op env op) rs) fr cs.
+Proof.
+ intros. exploit agree_undef_temps; eauto. intro.
+ destruct op; simpl; auto.
+Qed.
+
+Lemma agree_undef_getparam:
+ forall ls ls0 rs fr cs,
+ agree ls ls0 rs fr cs ->
+ agree (Locmap.set (R IT1) Vundef ls) ls0 (rs#IT1 <- Vundef) fr cs.
+Proof.
+ intros. exploit (agree_undef_regs (IT1 :: nil)); eauto.
+ simpl; intros. intuition congruence.
+Qed.
+
Lemma agree_return_regs:
forall ls ls0 rs fr cs rs',
agree ls ls0 rs fr cs ->
@@ -831,9 +884,10 @@ Proof.
intros until idx. destruct idx; simpl; auto. congruence.
apply incl_refl.
apply float_callee_save_norepet. eauto.
- intros [fr' [A [B C]]].
- exists fr'; intuition. unfold save_callee_save_float; eauto.
- apply C. auto. intros; subst idx. auto.
+ intros [fr' [A [B C]]].
+ exists fr'. split. unfold save_callee_save_float; eauto.
+ split. auto.
+ intros. apply C. auto. intros; subst. red; intros; subst idx. contradiction.
Qed.
Lemma save_callee_save_correct:
@@ -1216,24 +1270,15 @@ Qed.
Definition shift_sp (tf: Mach.function) (sp: val) :=
Val.add sp (Vint (Int.repr (-tf.(fn_framesize)))).
-Remark shift_offset_sp:
- forall f tf sp n v,
+Remark shift_sp_eq:
+ forall f tf sp,
transf_function f = OK tf ->
- offset_sp sp n = Some v ->
- offset_sp (shift_sp tf sp)
- (Int.add (Int.repr (fe_size (make_env (function_bounds f)))) n) = Some v.
+ shift_sp tf sp = Val.sub sp (Vint (Int.repr (fe_size (make_env (function_bounds f))))).
Proof.
- intros. destruct sp; try discriminate.
- unfold offset_sp in *.
- unfold shift_sp.
- rewrite (unfold_transf_function _ _ H). unfold fn_framesize.
- unfold Val.add. rewrite <- Int.neg_repr.
- set (p := Int.repr (fe_size (make_env (function_bounds f)))).
- inversion H0. decEq. decEq.
- rewrite Int.add_assoc. decEq.
- rewrite <- Int.add_assoc.
- rewrite (Int.add_commut (Int.neg p) p). rewrite Int.add_neg_zero.
- rewrite Int.add_commut. apply Int.add_zero.
+ intros. unfold shift_sp.
+ replace (fe_size (make_env (function_bounds f))) with (fn_framesize tf).
+ rewrite <- Int.neg_repr. destruct sp; simpl; auto; rewrite Int.sub_add_opp; auto.
+ rewrite (unfold_transf_function _ _ H). auto.
Qed.
Lemma shift_eval_operation:
@@ -1243,10 +1288,10 @@ Lemma shift_eval_operation:
eval_operation tge (shift_sp tf sp)
(transl_op (make_env (function_bounds f)) op) args = Some v.
Proof.
- intros until v. destruct op; intros; auto.
- simpl in *. rewrite symbols_preserved. auto.
- destruct args; auto. unfold eval_operation in *. unfold transl_op.
- apply shift_offset_sp; auto.
+ intros. rewrite <- H0. rewrite (shift_sp_eq f tf sp H). unfold transl_op.
+ rewrite (eval_operation_preserved ge tge).
+ apply shift_stack_eval_operation.
+ exact symbols_preserved.
Qed.
Lemma shift_eval_addressing:
@@ -1257,11 +1302,10 @@ Lemma shift_eval_addressing:
(transl_addr (make_env (function_bounds f)) addr) args =
Some v.
Proof.
- intros.
- unfold transl_addr, eval_addressing in *;
- destruct addr; try (rewrite symbols_preserved); auto.
- destruct args; try discriminate.
- apply shift_offset_sp; auto.
+ intros. rewrite <- H0. rewrite (shift_sp_eq f tf sp H). unfold transl_addr.
+ rewrite (eval_addressing_preserved ge tge).
+ apply shift_stack_eval_addressing.
+ exact symbols_preserved.
Qed.
(** Preservation of the arguments to an external call. *)
@@ -1389,23 +1433,29 @@ Proof.
intro BOUND; simpl in BOUND);
unfold transl_instr.
(* Lgetstack *)
- inv WTI. destruct BOUND.
- exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b)
- (rs0#r <- (rs (S sl))) fr m).
- split. destruct sl.
+ inv WTI. destruct BOUND. unfold undef_getstack; destruct sl.
(* Lgetstack, local *)
+ exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b)
+ (rs0#r <- (rs (S (Local z t)))) fr m); split.
apply plus_one. apply exec_Mgetstack.
apply get_slot_index. auto. apply index_local_valid. auto. congruence. congruence. auto.
eapply agree_locals; eauto.
+ econstructor; eauto with coqlib.
+ apply agree_set_reg; auto.
(* Lgetstack, incoming *)
- apply plus_one; apply exec_Mgetparam.
+ exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b)
+ (rs0 # IT1 <- Vundef # r <- (rs (S (Incoming z t)))) fr m); split.
+ apply plus_one. apply exec_Mgetparam.
change (get_parent_slot ts z t (rs (S (Incoming z t)))).
eapply agree_incoming; eauto.
+ econstructor; eauto with coqlib.
+ apply agree_set_reg; auto. apply agree_undef_getparam; auto.
(* Lgetstack, outgoing *)
- apply plus_one; apply exec_Mgetstack.
- apply get_slot_index. auto. apply index_arg_valid. auto. congruence. congruence. auto.
+ exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b)
+ (rs0#r <- (rs (S (Outgoing z t)))) fr m); split.
+ apply plus_one. apply exec_Mgetstack.
+ apply get_slot_index. auto. apply index_arg_valid. auto. congruence. congruence. auto.
eapply agree_outgoing; eauto.
- (* Lgetstack, common *)
econstructor; eauto with coqlib.
apply agree_set_reg; auto.
@@ -1432,22 +1482,23 @@ Proof.
symmetry. eapply agree_reg; eauto.
(* Lop *)
- exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) (rs0#res <- v) fr m); split.
+ set (op' := transl_op (make_env (function_bounds f)) op).
+ exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) ((undef_op op' rs0)#res <- v) fr m); split.
apply plus_one. apply exec_Mop.
apply shift_eval_operation. auto.
change mreg with RegEq.t.
rewrite (agree_eval_regs _ _ _ _ _ _ _ args AG). auto.
econstructor; eauto with coqlib.
- apply agree_set_reg; auto.
+ apply agree_set_reg; auto. apply agree_undef_op; auto.
(* Lload *)
- exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) (rs0#dst <- v) fr m); split.
+ exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) ((undef_temps rs0)#dst <- v) fr m); split.
apply plus_one; eapply exec_Mload; eauto.
apply shift_eval_addressing; auto.
change mreg with RegEq.t.
rewrite (agree_eval_regs _ _ _ _ _ _ _ args AG). eauto.
econstructor; eauto with coqlib.
- apply agree_set_reg; auto.
+ apply agree_set_reg; auto. apply agree_undef_temps; auto.
(* Lstore *)
econstructor; split.
@@ -1456,7 +1507,7 @@ Proof.
change mreg with RegEq.t.
rewrite (agree_eval_regs _ _ _ _ _ _ _ args AG). eauto.
rewrite (agree_eval_reg _ _ _ _ _ _ _ src AG). eauto.
- econstructor; eauto with coqlib.
+ econstructor; eauto with coqlib. apply agree_undef_temps; auto.
(* Lcall *)
assert (WTF': wt_fundef f'). eapply find_function_well_typed; eauto.
@@ -1504,14 +1555,14 @@ Proof.
apply agree_callee_save_return_regs.
(* Lbuiltin *)
- exists (State ts tf (shift_sp tf sp) (transl_code (make_env (function_bounds f)) b) (rs0#res <- v) fr m'); split.
+ econstructor; split.
apply plus_one. apply exec_Mbuiltin.
change mreg with RegEq.t.
rewrite (agree_eval_regs _ _ _ _ _ _ _ args AG).
eapply external_call_symbols_preserved; eauto.
exact symbols_preserved. exact varinfo_preserved.
econstructor; eauto with coqlib.
- apply agree_set_reg; auto.
+ apply agree_set_reg; auto. apply agree_undef_temps; auto.
(* Llabel *)
econstructor; split.
@@ -1530,14 +1581,14 @@ Proof.
apply plus_one; apply exec_Mcond_true.
rewrite <- (agree_eval_regs _ _ _ _ _ _ _ args AG) in H; eauto.
apply transl_find_label; eauto.
- econstructor; eauto.
+ econstructor; eauto. apply agree_undef_temps; auto.
eapply find_label_incl; eauto.
(* Lcond, false *)
econstructor; split.
apply plus_one; apply exec_Mcond_false.
rewrite <- (agree_eval_regs _ _ _ _ _ _ _ args AG) in H; auto.
- econstructor; eauto with coqlib.
+ econstructor; eauto with coqlib. apply agree_undef_temps; auto.
(* Ljumptable *)
econstructor; split.
@@ -1545,7 +1596,7 @@ Proof.
rewrite <- (agree_eval_reg _ _ _ _ _ _ _ arg AG) in H; eauto.
eauto.
apply transl_find_label; eauto.
- econstructor; eauto.
+ econstructor; eauto. apply agree_undef_temps; auto.
eapply find_label_incl; eauto.
(* Lreturn *)
diff --git a/backend/Stackingtyping.v b/backend/Stackingtyping.v
index 6ef86690..b42dbbb4 100644
--- a/backend/Stackingtyping.v
+++ b/backend/Stackingtyping.v
@@ -160,7 +160,7 @@ Proof.
apply wt_instrs_cons; auto.
constructor.
destruct o; simpl; congruence.
- rewrite H6. destruct o; reflexivity || congruence.
+ rewrite H6. symmetry. apply type_shift_stack_operation.
(* load *)
apply wt_instrs_cons; auto.
constructor; auto.