aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64
diff options
context:
space:
mode:
authorLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2020-11-02 17:13:50 +0100
committerLéo Gourdin <leo.gourdin@univ-grenoble-alpes.fr>2020-11-02 17:13:50 +0100
commit241da496839a9101e843ce7b1da4a668f998498a (patch)
treeeafa2250ce4f5a8bb96e16afa6ebd9a7149e9435 /aarch64
parent8de1a1f5811470bc1d7d1a7b2f0e5193de40698e (diff)
downloadcompcert-kvx-241da496839a9101e843ce7b1da4a668f998498a.tar.gz
compcert-kvx-241da496839a9101e843ce7b1da4a668f998498a.zip
Preparation for postpass in aarch64 and refactoring
Diffstat (limited to 'aarch64')
-rw-r--r--aarch64/Asm.v20
-rw-r--r--aarch64/Asmblock.v1
-rw-r--r--aarch64/Asmblockdeps.v1841
-rw-r--r--aarch64/Asmblockgen.v40
-rw-r--r--aarch64/Asmblockgenproof0.v983
-rw-r--r--aarch64/Asmblockprops.v358
-rw-r--r--aarch64/Asmexpand.ml80
-rw-r--r--aarch64/Asmgen.v13
-rw-r--r--aarch64/Asmgenproof.v3
-rw-r--r--aarch64/PostpassScheduling.v530
-rw-r--r--aarch64/PostpassSchedulingOracle.ml1029
-rw-r--r--aarch64/PostpassSchedulingproof.v690
-rw-r--r--aarch64/TargetPrinter.ml8
13 files changed, 5515 insertions, 81 deletions
diff --git a/aarch64/Asm.v b/aarch64/Asm.v
index 22ab0222..5dafb01f 100644
--- a/aarch64/Asm.v
+++ b/aarch64/Asm.v
@@ -77,8 +77,8 @@ Lemma crbit_eq: forall (x y: crbit), {x=y} + {x<>y}.
Proof. decide equality. Defined.
Inductive dreg : Type :=
- | IR': iregsp -> dreg (**r 64- or 32-bit integer registers *)
- | FR': freg -> dreg. (**r double- or single-precision float registers *)
+ | IR: iregsp -> dreg (**r 64- or 32-bit integer registers *)
+ | FR: freg -> dreg. (**r double- or single-precision float registers *)
(** We model the following registers of the ARM architecture. *)
@@ -88,8 +88,8 @@ Inductive preg: Type :=
| PC: preg. (**r program counter *)
(* XXX: ireg no longer coerces to preg *)
-Coercion IR': iregsp >-> dreg.
-Coercion FR': freg >-> dreg.
+Coercion IR: iregsp >-> dreg.
+Coercion FR: freg >-> dreg.
Coercion DR: dreg >-> preg.
Definition SP:preg := XSP.
Coercion CR: crbit >-> preg.
@@ -108,7 +108,7 @@ End PregEq.
Module Pregmap := EMap(PregEq).
Definition preg_of_iregsp (r: iregsp) : preg :=
- match r with RR1 r => IR' r | XSP => SP end.
+ match r with RR1 r => IR r | XSP => SP end.
(** Conventional name for return address ([RA]) *)
@@ -359,7 +359,7 @@ Definition regset := Pregmap.t val.
or 0. *)
Definition ir0 (is:isize) (rs: regset) (r: ireg0) : val :=
- match r with RR0 r => rs (IR' r) | XZR => if is (* is W *) then Vint Int.zero else Vlong Int64.zero end.
+ match r with RR0 r => rs (IR r) | XZR => if is (* is W *) then Vint Int.zero else Vlong Int64.zero end.
(** Concise notations for accessing and updating the values of registers. *)
@@ -1357,10 +1357,10 @@ Qed.
Definition data_preg (r: preg) : bool :=
match r with
- | DR (IR' X16) => false
- | DR (IR' X30) => false
- | DR (IR' _) => true
- | DR (FR' _) => true
+ | DR (IR X16) => false
+ | DR (IR X30) => false
+ | DR (IR _) => true
+ | DR (FR _) => true
| CR _ => false
(* | SP => true; subsumed by IR (iregsp) *)
| PC => false
diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v
index 96162f9e..ddd58223 100644
--- a/aarch64/Asmblock.v
+++ b/aarch64/Asmblock.v
@@ -36,6 +36,7 @@ Require Import Values Memory Events Globalenvs Smallstep.
Require Import Locations Conventions.
Require Stacklayout.
Require Import OptionMonad Asm.
+Require Export Asm.
Local Open Scope option_monad_scope.
diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v
new file mode 100644
index 00000000..1acbdf1e
--- /dev/null
+++ b/aarch64/Asmblockdeps.v
@@ -0,0 +1,1841 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** * Translation from [Asmvliw] to [AbstractBB] *)
+
+(** We define a specific instance [L] of [AbstractBB] and translate [bblocks] from [Asmvliw] into [L].
+ [AbstractBB] will then define two semantics for [L]: a sequential and a parallel one.
+ We prove a bisimulation between the parallel semantics of [L] and [AsmVLIW].
+ We also prove a bisimulation between the sequential semantics of [L] and [Asmblock].
+ Then, the checkers on [Asmblock] and [Asmvliw] are deduced from those of [L].
+ *)
+
+ (*
+Require Import AST.
+Require Import Asmblock.
+Require Import Asmblockgenproof0 Asmblockprops.
+Require Import Values.
+Require Import Globalenvs.
+Require Import Memory.
+Require Import Errors.
+Require Import Integers.
+Require Import Floats.
+Require Import ZArith.
+Require Import Coqlib.
+Require Import ImpSimuTest.
+Require Import Axioms.
+Require Import Parallelizability.
+Require Import Asmvliw Permutation.
+Require Import Chunks.
+
+Require Import Lia.
+
+Open Scope impure.
+
+(** Definition of [L] *)
+
+Module P<: ImpParam.
+Module R := Pos.
+
+Section IMPPARAM.
+
+Definition env := Genv.t fundef unit.
+
+Inductive genv_wrap := Genv (ge: env) (fn: function).
+Definition genv := genv_wrap.
+
+Variable Ge: genv.
+
+Inductive value_wrap :=
+ | Val (v: val)
+ | Memstate (m: mem)
+.
+
+Definition value := value_wrap.
+
+Inductive control_op :=
+ | Oj_l (l: label)
+ | Ocb (bt: btest) (l: label)
+ | Ocbu (bt: btest) (l: label)
+ | Odiv
+ | Odivu
+ | OError
+ | OIncremPC (sz: Z)
+ | Ojumptable (l: list label)
+.
+
+Inductive arith_op :=
+ | OArithR (n: arith_name_r)
+ | OArithRR (n: arith_name_rr)
+ | OArithRI32 (n: arith_name_ri32) (imm: int)
+ | OArithRI64 (n: arith_name_ri64) (imm: int64)
+ | OArithRF32 (n: arith_name_rf32) (imm: float32)
+ | OArithRF64 (n: arith_name_rf64) (imm: float)
+ | OArithRRR (n: arith_name_rrr)
+ | OArithRRI32 (n: arith_name_rri32) (imm: int)
+ | OArithRRI64 (n: arith_name_rri64) (imm: int64)
+ | OArithARRR (n: arith_name_arrr)
+ | OArithARR (n: arith_name_arr)
+ | OArithARRI32 (n: arith_name_arri32) (imm: int)
+ | OArithARRI64 (n: arith_name_arri64) (imm: int64)
+.
+
+Coercion OArithR: arith_name_r >-> arith_op.
+Coercion OArithRR: arith_name_rr >-> arith_op.
+Coercion OArithRI32: arith_name_ri32 >-> Funclass.
+Coercion OArithRI64: arith_name_ri64 >-> Funclass.
+Coercion OArithRF32: arith_name_rf32 >-> Funclass.
+Coercion OArithRF64: arith_name_rf64 >-> Funclass.
+Coercion OArithRRR: arith_name_rrr >-> arith_op.
+Coercion OArithRRI32: arith_name_rri32 >-> Funclass.
+Coercion OArithRRI64: arith_name_rri64 >-> Funclass.
+
+Inductive load_op :=
+ | OLoadRRO (n: load_name) (trap: trapping_mode) (ofs: offset)
+ | OLoadRRR (n: load_name) (trap: trapping_mode)
+ | OLoadRRRXS (n: load_name) (trap: trapping_mode)
+.
+
+Coercion OLoadRRO: load_name >-> Funclass.
+
+Inductive store_op :=
+ | OStoreRRO (n: store_name) (ofs: offset)
+ | OStoreRRR (n: store_name)
+ | OStoreRRRXS (n: store_name)
+.
+
+Coercion OStoreRRO: store_name >-> Funclass.
+
+Inductive op_wrap :=
+ | Arith (ao: arith_op)
+ | Load (lo: load_op)
+ | Store (so: store_op)
+ | Control (co: control_op)
+ | Allocframe (sz: Z) (pos: ptrofs)
+ | Allocframe2 (sz: Z) (pos: ptrofs)
+ | Freeframe (sz: Z) (pos: ptrofs)
+ | Freeframe2 (sz: Z) (pos: ptrofs)
+ | Constant (v: val)
+ | Fail
+.
+
+Coercion Arith: arith_op >-> op_wrap.
+Coercion Load: load_op >-> op_wrap.
+Coercion Store: store_op >-> op_wrap.
+Coercion Control: control_op >-> op_wrap.
+
+Definition op := op_wrap.
+
+Definition arith_eval (ao: arith_op) (l: list value) :=
+ let (ge, fn) := Ge in
+ match ao, l with
+ | OArithR n, [] => Some (Val (arith_eval_r ge n))
+
+ | OArithRR n, [Val v] => Some (Val (arith_eval_rr n v))
+
+ | OArithRI32 n i, [] => Some (Val (arith_eval_ri32 n i))
+ | OArithRI64 n i, [] => Some (Val (arith_eval_ri64 n i))
+ | OArithRF32 n i, [] => Some (Val (arith_eval_rf32 n i))
+ | OArithRF64 n i, [] => Some (Val (arith_eval_rf64 n i))
+
+ | OArithRRR n, [Val v1; Val v2] => Some (Val (arith_eval_rrr n v1 v2))
+ | OArithRRI32 n i, [Val v] => Some (Val (arith_eval_rri32 n v i))
+ | OArithRRI64 n i, [Val v] => Some (Val (arith_eval_rri64 n v i))
+
+ | OArithARR n, [Val v1; Val v2] => Some (Val (arith_eval_arr n v1 v2))
+ | OArithARRR n, [Val v1; Val v2; Val v3] => Some (Val (arith_eval_arrr n v1 v2 v3))
+ | OArithARRI32 n i, [Val v1; Val v2] => Some (Val (arith_eval_arri32 n v1 v2 i))
+ | OArithARRI64 n i, [Val v1; Val v2] => Some (Val (arith_eval_arri64 n v1 v2 i))
+
+ | _, _ => None
+ end.
+
+Definition exec_incorrect_load trap chunk :=
+ match trap with
+ | TRAP => None
+ | NOTRAP => Some (Val (concrete_default_notrap_load_value chunk))
+ end.
+
+Definition exec_load_deps_offset (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v: val) (ofs: offset) :=
+ let (ge, fn) := Ge in
+ match (eval_offset ofs) with
+ | OK ptr => match Mem.loadv chunk m (Val.offset_ptr v ptr) with
+ | None => exec_incorrect_load trap chunk
+ | Some vl => Some (Val vl)
+ end
+ | _ => None
+ end.
+
+Definition exec_load_deps_reg (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) :=
+ match Mem.loadv chunk m (Val.addl v vo) with
+ | None => exec_incorrect_load trap chunk
+ | Some vl => Some (Val vl)
+ end.
+
+Definition exec_load_deps_regxs (trap: trapping_mode) (chunk: memory_chunk) (m: mem) (v vo: val) :=
+ match Mem.loadv chunk m (Val.addl v (Val.shll vo (scale_of_chunk chunk))) with
+ | None => exec_incorrect_load trap chunk
+ | Some vl => Some (Val vl)
+ end.
+
+Definition load_eval (lo: load_op) (l: list value) :=
+ match lo, l with
+ | OLoadRRO n trap ofs, [Val v; Memstate m] => exec_load_deps_offset trap (load_chunk n) m v ofs
+ | OLoadRRR n trap, [Val v; Val vo; Memstate m] => exec_load_deps_reg trap (load_chunk n) m v vo
+ | OLoadRRRXS n trap, [Val v; Val vo; Memstate m] => exec_load_deps_regxs trap (load_chunk n) m v vo
+ | _, _ => None
+ end.
+
+Definition exec_store_deps_offset (chunk: memory_chunk) (m: mem) (vs va: val) (ofs: offset) :=
+ let (ge, fn) := Ge in
+ match (eval_offset ofs) with
+ | OK ptr => match Mem.storev chunk m (Val.offset_ptr va ptr) vs with
+ | None => None
+ | Some m' => Some (Memstate m')
+ end
+ | _ => None
+ end.
+
+Definition exec_store_deps_reg (chunk: memory_chunk) (m: mem) (vs va vo: val) :=
+ match Mem.storev chunk m (Val.addl va vo) vs with
+ | None => None
+ | Some m' => Some (Memstate m')
+ end.
+
+Definition exec_store_deps_regxs (chunk: memory_chunk) (m: mem) (vs va vo: val) :=
+ match Mem.storev chunk m (Val.addl va (Val.shll vo (scale_of_chunk chunk))) vs with
+ | None => None
+ | Some m' => Some (Memstate m')
+ end.
+
+Definition store_eval (so: store_op) (l: list value) :=
+ match so, l with
+ | OStoreRRO n ofs, [Val vs; Val va; Memstate m] => exec_store_deps_offset (store_chunk n) m vs va ofs
+ | OStoreRRR n, [Val vs; Val va; Val vo; Memstate m] => exec_store_deps_reg (store_chunk n) m vs va vo
+ | OStoreRRRXS n, [Val vs; Val va; Val vo; Memstate m] => exec_store_deps_regxs (store_chunk n) m vs va vo
+ | _, _ => None
+ end.
+
+Local Open Scope Z.
+
+Remark size_chunk_positive: forall chunk,
+ (size_chunk chunk) > 0.
+Proof.
+ destruct chunk; simpl; lia.
+Qed.
+
+Remark size_chunk_small: forall chunk,
+ (size_chunk chunk) <= 8.
+Proof.
+ destruct chunk; simpl; lia.
+Qed.
+
+Definition disjoint_chunks
+ (ofs1 : offset) (chunk1 : memory_chunk)
+ (ofs2 : offset) (chunk2 : memory_chunk) :=
+ Intv.disjoint ((Ptrofs.unsigned ofs1),
+ ((Ptrofs.unsigned ofs1) + (size_chunk chunk1)))
+ ((Ptrofs.unsigned ofs2),
+ ((Ptrofs.unsigned ofs2) + (size_chunk chunk2))).
+
+Definition small_offset_threshold := 18446744073709551608.
+
+Lemma store_store_disjoint_offsets :
+ forall n1 n2 ofs1 ofs2 vs1 vs2 va m0 m1 m2 m1' m2',
+ (disjoint_chunks ofs1 (store_chunk n1) ofs2 (store_chunk n2)) ->
+ (Ptrofs.unsigned ofs1) < small_offset_threshold ->
+ (Ptrofs.unsigned ofs2) < small_offset_threshold ->
+ store_eval (OStoreRRO n1 ofs1) [vs1; va; Memstate m0] = Some (Memstate m1) ->
+ store_eval (OStoreRRO n2 ofs2) [vs2; va; Memstate m1] = Some (Memstate m2) ->
+ store_eval (OStoreRRO n2 ofs2) [vs2; va; Memstate m0] = Some (Memstate m1') ->
+ store_eval (OStoreRRO n1 ofs1) [vs1; va; Memstate m1'] = Some (Memstate m2') ->
+ m2 = m2'.
+Proof.
+ intros until m2'.
+ intros DISJOINT SMALL1 SMALL2 STORE0 STORE1 STORE0' STORE1'.
+ unfold disjoint_chunks in DISJOINT.
+ destruct vs1 as [v1 | ]; simpl in STORE0, STORE1'; try congruence.
+ destruct vs2 as [v2 | ]; simpl in STORE1, STORE0'; try congruence.
+ destruct va as [base | ]; try congruence.
+ unfold exec_store_deps_offset in *.
+ destruct Ge.
+ unfold eval_offset in *; simpl in *.
+ unfold Mem.storev in *.
+ unfold Val.offset_ptr in *.
+ destruct base as [ | | | | | wblock wpofs] in * ; try congruence.
+ destruct (Mem.store _ _ _ _ _) eqn:E0; try congruence.
+ inv STORE0.
+ destruct (Mem.store (store_chunk n2) _ _ _ _) eqn:E1; try congruence.
+ inv STORE1.
+ destruct (Mem.store (store_chunk n2) m0 _ _ _) eqn:E0'; try congruence.
+ inv STORE0'.
+ destruct (Mem.store _ m1' _ _ _) eqn:E1'; try congruence.
+ inv STORE1'.
+ assert (Some m2 = Some m2').
+ 2: congruence.
+ rewrite <- E1.
+ rewrite <- E1'.
+ eapply Mem.store_store_other.
+ 2, 3: eassumption.
+
+ right.
+ pose proof (size_chunk_positive (store_chunk n1)).
+ pose proof (size_chunk_positive (store_chunk n2)).
+ pose proof (size_chunk_small (store_chunk n1)).
+ pose proof (size_chunk_small (store_chunk n2)).
+ destruct (Intv.range_disjoint _ _ DISJOINT) as [DIS | [DIS | DIS]];
+ unfold Intv.empty in DIS; simpl in DIS.
+ 1, 2: lia.
+ pose proof (Ptrofs.unsigned_range ofs1).
+ pose proof (Ptrofs.unsigned_range ofs2).
+ unfold small_offset_threshold in *.
+ destruct (Ptrofs.unsigned_add_either wpofs ofs1) as [R1 | R1]; rewrite R1;
+ destruct (Ptrofs.unsigned_add_either wpofs ofs2) as [R2 | R2]; rewrite R2;
+ change Ptrofs.modulus with 18446744073709551616 in *;
+ lia.
+Qed.
+
+Lemma load_store_disjoint_offsets :
+ forall n1 n2 tm ofs1 ofs2 vs va m0 m1,
+ (disjoint_chunks ofs1 (store_chunk n1) ofs2 (load_chunk n2)) ->
+ (Ptrofs.unsigned ofs1) < small_offset_threshold ->
+ (Ptrofs.unsigned ofs2) < small_offset_threshold ->
+ store_eval (OStoreRRO n1 ofs1) [vs; va; Memstate m0] = Some (Memstate m1) ->
+ load_eval (OLoadRRO n2 tm ofs2) [va; Memstate m1] =
+ load_eval (OLoadRRO n2 tm ofs2) [va; Memstate m0].
+Proof.
+ intros until m1.
+ intros DISJOINT SMALL1 SMALL2 STORE0.
+ destruct vs as [v | ]; simpl in STORE0; try congruence.
+ destruct va as [base | ]; try congruence.
+ unfold exec_store_deps_offset in *.
+ unfold eval_offset in *; simpl in *.
+ unfold exec_load_deps_offset.
+ unfold Mem.storev, Mem.loadv in *.
+ destruct Ge in *.
+ unfold eval_offset in *.
+ unfold Val.offset_ptr in *.
+ destruct base as [ | | | | | wblock wpofs] in * ; try congruence.
+ destruct (Mem.store _ _ _ _) eqn:E0; try congruence.
+ inv STORE0.
+ assert (
+ (Mem.load (load_chunk n2) m1 wblock
+ (Ptrofs.unsigned (Ptrofs.add wpofs ofs2))) =
+ (Mem.load (load_chunk n2) m0 wblock
+ (Ptrofs.unsigned (Ptrofs.add wpofs ofs2))) ) as LOADS.
+ {
+ eapply Mem.load_store_other.
+ eassumption.
+ right.
+ pose proof (size_chunk_positive (store_chunk n1)).
+ pose proof (size_chunk_positive (load_chunk n2)).
+ pose proof (size_chunk_small (store_chunk n1)).
+ pose proof (size_chunk_small (load_chunk n2)).
+ destruct (Intv.range_disjoint _ _ DISJOINT) as [DIS | [DIS | DIS]];
+ unfold Intv.empty in DIS; simpl in DIS.
+ 1,2: lia.
+
+ pose proof (Ptrofs.unsigned_range ofs1).
+ pose proof (Ptrofs.unsigned_range ofs2).
+ unfold small_offset_threshold in *.
+ destruct (Ptrofs.unsigned_add_either wpofs ofs1) as [R1 | R1]; rewrite R1;
+ destruct (Ptrofs.unsigned_add_either wpofs ofs2) as [R2 | R2]; rewrite R2;
+ change Ptrofs.modulus with 18446744073709551616 in *;
+ lia.
+ }
+ destruct (Mem.load _ m1 _ _) in *; destruct (Mem.load _ m0 _ _) in *; congruence.
+Qed.
+
+Definition goto_label_deps (f: function) (lbl: label) (vpc: val) :=
+ match label_pos lbl 0 (fn_blocks f) with
+ | None => None
+ | Some pos =>
+ match vpc with
+ | Vptr b ofs => Some (Val (Vptr b (Ptrofs.repr pos)))
+ | _ => None
+ end
+ end.
+
+Definition eval_branch_deps (f: function) (l: label) (vpc: val) (res: option bool) :=
+ match res with
+ | Some true => goto_label_deps f l vpc
+ | Some false => Some (Val vpc)
+ | None => None
+ end.
+
+Definition control_eval (o: control_op) (l: list value) :=
+ let (ge, fn) := Ge in
+ match o, l with
+ | (Ojumptable tbl), [Val index; Val vpc] =>
+ match index with
+ | Vint n =>
+ match list_nth_z tbl (Int.unsigned n) with
+ | None => None
+ | Some lbl => goto_label_deps fn lbl vpc
+ end
+ | _ => None
+ end
+ | Oj_l l, [Val vpc] => goto_label_deps fn l vpc
+ | Ocb bt l, [Val v; Val vpc] =>
+ match cmp_for_btest bt with
+ | (Some c, Int) => eval_branch_deps fn l vpc (Val.cmp_bool c v (Vint (Int.repr 0)))
+ | (Some c, Long) => eval_branch_deps fn l vpc (Val.cmpl_bool c v (Vlong (Int64.repr 0)))
+ | (None, _) => None
+ end
+ | Ocbu bt l, [Val v; Val vpc] =>
+ match cmpu_for_btest bt with
+ | (Some c, Int) => eval_branch_deps fn l vpc (Val_cmpu_bool c v (Vint (Int.repr 0)))
+ | (Some c, Long) => eval_branch_deps fn l vpc (Val_cmplu_bool c v (Vlong (Int64.repr 0)))
+ | (None, _) => None
+ end
+ | Odiv, [Val v1; Val v2] =>
+ match Val.divs v1 v2 with
+ | Some v => Some (Val v)
+ | None => None
+ end
+ | Odivu, [Val v1; Val v2] =>
+ match Val.divu v1 v2 with
+ | Some v => Some (Val v)
+ | None => None
+ end
+ | OIncremPC sz, [Val vpc] => Some (Val (Val.offset_ptr vpc (Ptrofs.repr sz)))
+ | OError, _ => None
+ | _, _ => None
+ end.
+
+Definition op_eval (o: op) (l: list value) :=
+ match o, l with
+ | Arith o, l => arith_eval o l
+ | Load o, l => load_eval o l
+ | Store o, l => store_eval o l
+ | Control o, l => control_eval o l
+ | Allocframe sz pos, [Val spv; Memstate m] =>
+ let (m1, stk) := Mem.alloc m 0 sz in
+ let sp := (Vptr stk Ptrofs.zero) in
+ match Mem.storev Mptr m1 (Val.offset_ptr sp pos) spv with
+ | None => None
+ | Some m => Some (Memstate m)
+ end
+ | Allocframe2 sz pos, [Val spv; Memstate m] =>
+ let (m1, stk) := Mem.alloc m 0 sz in
+ let sp := (Vptr stk Ptrofs.zero) in
+ match Mem.storev Mptr m1 (Val.offset_ptr sp pos) spv with
+ | None => None
+ | Some m => Some (Val sp)
+ end
+ | Freeframe sz pos, [Val spv; Memstate m] =>
+ match Mem.loadv Mptr m (Val.offset_ptr spv pos) with
+ | None => None
+ | Some v =>
+ match spv with
+ | Vptr stk ofs =>
+ match Mem.free m stk 0 sz with
+ | None => None
+ | Some m' => Some (Memstate m')
+ end
+ | _ => None
+ end
+ end
+ | Freeframe2 sz pos, [Val spv; Memstate m] =>
+ match Mem.loadv Mptr m (Val.offset_ptr spv pos) with
+ | None => None
+ | Some v =>
+ match spv with
+ | Vptr stk ofs =>
+ match Mem.free m stk 0 sz with
+ | None => None
+ | Some m' => Some (Val v)
+ end
+ | _ => None
+ end
+ end
+ | Constant v, [] => Some (Val v)
+ | Fail, _ => None
+ | _, _ => None
+ end.
+
+
+Definition arith_op_eq (o1 o2: arith_op): ?? bool :=
+ match o1 with
+ | OArithR n1 =>
+ match o2 with OArithR n2 => struct_eq n1 n2 | _ => RET false end
+ | OArithRR n1 =>
+ match o2 with OArithRR n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithRI32 n1 i1 =>
+ match o2 with OArithRI32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithRI64 n1 i1 =>
+ match o2 with OArithRI64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithRF32 n1 i1 =>
+ match o2 with OArithRF32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithRF64 n1 i1 =>
+ match o2 with OArithRF64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithRRR n1 =>
+ match o2 with OArithRRR n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithRRI32 n1 i1 =>
+ match o2 with OArithRRI32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithRRI64 n1 i1 =>
+ match o2 with OArithRRI64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithARRR n1 =>
+ match o2 with OArithARRR n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithARR n1 =>
+ match o2 with OArithARR n2 => phys_eq n1 n2 | _ => RET false end
+ | OArithARRI32 n1 i1 =>
+ match o2 with OArithARRI32 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ | OArithARRI64 n1 i1 =>
+ match o2 with OArithARRI64 n2 i2 => iandb (phys_eq n1 n2) (phys_eq i1 i2) | _ => RET false end
+ end.
+
+Ltac my_wlp_simplify := wlp_xsimplify ltac:(intros; subst; simpl in * |- *; congruence || intuition eauto with wlp).
+
+Lemma arith_op_eq_correct o1 o2:
+ WHEN arith_op_eq o1 o2 ~> b THEN b = true -> o1 = o2.
+Proof.
+ destruct o1, o2; my_wlp_simplify; try congruence.
+Qed.
+Hint Resolve arith_op_eq_correct: wlp.
+Opaque arith_op_eq_correct.
+
+Definition offset_eq (ofs1 ofs2 : offset): ?? bool :=
+ RET (Ptrofs.eq ofs1 ofs2).
+
+Lemma offset_eq_correct ofs1 ofs2:
+ WHEN offset_eq ofs1 ofs2 ~> b THEN b = true -> ofs1 = ofs2.
+Proof.
+ wlp_simplify.
+ pose (Ptrofs.eq_spec ofs1 ofs2).
+ rewrite H in *.
+ trivial.
+Qed.
+Hint Resolve offset_eq_correct: wlp.
+
+Definition trapping_mode_eq trap1 trap2 :=
+ RET (match trap1, trap2 with
+ | TRAP, TRAP | NOTRAP, NOTRAP => true
+ | TRAP, NOTRAP | NOTRAP, TRAP => false
+ end).
+Lemma trapping_mode_eq_correct t1 t2:
+ WHEN trapping_mode_eq t1 t2 ~> b THEN b = true -> t1 = t2.
+Proof.
+ wlp_simplify.
+ destruct t1; destruct t2; trivial; discriminate.
+Qed.
+Hint Resolve trapping_mode_eq_correct: wlp.
+
+Definition load_op_eq (o1 o2: load_op): ?? bool :=
+ match o1 with
+ | OLoadRRO n1 trap ofs1 =>
+ match o2 with
+ | OLoadRRO n2 trap2 ofs2 => iandb (phys_eq n1 n2) (iandb (offset_eq ofs1 ofs2) (trapping_mode_eq trap trap2))
+ | _ => RET false
+ end
+ | OLoadRRR n1 trap =>
+ match o2 with
+ | OLoadRRR n2 trap2 => iandb (phys_eq n1 n2) (trapping_mode_eq trap trap2)
+ | _ => RET false
+ end
+ | OLoadRRRXS n1 trap =>
+ match o2 with
+ | OLoadRRRXS n2 trap2 => iandb (phys_eq n1 n2) (trapping_mode_eq trap trap2)
+ | _ => RET false
+ end
+ end.
+
+Lemma load_op_eq_correct o1 o2:
+ WHEN load_op_eq o1 o2 ~> b THEN b = true -> o1 = o2.
+Proof.
+ destruct o1, o2; wlp_simplify; try discriminate.
+ { f_equal.
+ destruct trap, trap0; simpl in *; trivial; discriminate.
+ pose (Ptrofs.eq_spec ofs ofs0).
+ rewrite H in *. trivial. }
+ all: destruct trap, trap0; simpl in *; trivial; discriminate.
+Qed.
+Hint Resolve load_op_eq_correct: wlp.
+Opaque load_op_eq_correct.
+
+Definition store_op_eq (o1 o2: store_op): ?? bool :=
+ match o1 with
+ | OStoreRRO n1 ofs1 =>
+ match o2 with OStoreRRO n2 ofs2 => iandb (phys_eq n1 n2) (offset_eq ofs1 ofs2) | _ => RET false end
+ | OStoreRRR n1 =>
+ match o2 with OStoreRRR n2 => phys_eq n1 n2 | _ => RET false end
+ | OStoreRRRXS n1 =>
+ match o2 with OStoreRRRXS n2 => phys_eq n1 n2 | _ => RET false end
+ end.
+
+Lemma store_op_eq_correct o1 o2:
+ WHEN store_op_eq o1 o2 ~> b THEN b = true -> o1 = o2.
+Proof.
+ destruct o1, o2; wlp_simplify; try discriminate.
+ - f_equal. pose (Ptrofs.eq_spec ofs ofs0).
+ rewrite H in *. trivial.
+ - congruence.
+ - congruence.
+Qed.
+Hint Resolve store_op_eq_correct: wlp.
+Opaque store_op_eq_correct.
+
+Definition control_op_eq (c1 c2: control_op): ?? bool :=
+ match c1 with
+ | Oj_l l1 =>
+ match c2 with Oj_l l2 => phys_eq l1 l2 | _ => RET false end
+ | Ocb bt1 l1 =>
+ match c2 with Ocb bt2 l2 => iandb (phys_eq bt1 bt2) (phys_eq l1 l2) | _ => RET false end
+ | Ocbu bt1 l1 =>
+ match c2 with Ocbu bt2 l2 => iandb (phys_eq bt1 bt2) (phys_eq l1 l2) | _ => RET false end
+ | Ojumptable tbl1 =>
+ match c2 with Ojumptable tbl2 => phys_eq tbl1 tbl2 | _ => RET false end
+ | Odiv =>
+ match c2 with Odiv => RET true | _ => RET false end
+ | Odivu =>
+ match c2 with Odivu => RET true | _ => RET false end
+ | OIncremPC sz1 =>
+ match c2 with OIncremPC sz2 => RET (Z.eqb sz1 sz2) | _ => RET false end
+ | OError =>
+ match c2 with OError => RET true | _ => RET false end
+ end.
+
+Lemma control_op_eq_correct c1 c2:
+ WHEN control_op_eq c1 c2 ~> b THEN b = true -> c1 = c2.
+Proof.
+ destruct c1, c2; wlp_simplify; try rewrite Z.eqb_eq in * |-; try congruence.
+Qed.
+Hint Resolve control_op_eq_correct: wlp.
+Opaque control_op_eq_correct.
+
+Definition op_eq (o1 o2: op): ?? bool :=
+ match o1 with
+ | Arith i1 =>
+ match o2 with Arith i2 => arith_op_eq i1 i2 | _ => RET false end
+ | Load i1 =>
+ match o2 with Load i2 => load_op_eq i1 i2 | _ => RET false end
+ | Store i1 =>
+ match o2 with Store i2 => store_op_eq i1 i2 | _ => RET false end
+ | Control i1 =>
+ match o2 with Control i2 => control_op_eq i1 i2 | _ => RET false end
+ | Allocframe sz1 pos1 =>
+ match o2 with Allocframe sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end
+ | Allocframe2 sz1 pos1 =>
+ match o2 with Allocframe2 sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end
+ | Freeframe sz1 pos1 =>
+ match o2 with Freeframe sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end
+ | Freeframe2 sz1 pos1 =>
+ match o2 with Freeframe2 sz2 pos2 => iandb (RET (Z.eqb sz1 sz2)) (phys_eq pos1 pos2) | _ => RET false end
+ | Constant c1 =>
+ match o2 with Constant c2 => phys_eq c1 c2 | _ => RET false end
+ | Fail =>
+ match o2 with Fail => RET true | _ => RET false end
+ end.
+
+Theorem op_eq_correct o1 o2:
+ WHEN op_eq o1 o2 ~> b THEN b=true -> o1 = o2.
+Proof.
+ destruct o1, o2; wlp_simplify; try rewrite Z.eqb_eq in * |- ; try congruence.
+Qed.
+Hint Resolve op_eq_correct: wlp.
+Global Opaque op_eq_correct.
+
+End IMPPARAM.
+
+End P.
+
+Module L <: ISeqLanguage with Module LP:=P.
+
+Module LP:=P.
+
+Include MkSeqLanguage P.
+
+End L.
+
+Module IST := ImpSimu L ImpPosDict.
+
+Import L.
+Import P.
+
+(** Compilation from [Asmvliw] to [L] *)
+
+Local Open Scope positive_scope.
+
+Definition pmem : R.t := 1.
+
+Definition ireg_to_pos (ir: ireg) : R.t :=
+ match ir with
+ | GPR0 => 1 | GPR1 => 2 | GPR2 => 3 | GPR3 => 4 | GPR4 => 5 | GPR5 => 6 | GPR6 => 7 | GPR7 => 8 | GPR8 => 9 | GPR9 => 10
+ | GPR10 => 11 | GPR11 => 12 | GPR12 => 13 | GPR13 => 14 | GPR14 => 15 | GPR15 => 16 | GPR16 => 17 | GPR17 => 18 | GPR18 => 19 | GPR19 => 20
+ | GPR20 => 21 | GPR21 => 22 | GPR22 => 23 | GPR23 => 24 | GPR24 => 25 | GPR25 => 26 | GPR26 => 27 | GPR27 => 28 | GPR28 => 29 | GPR29 => 30
+ | GPR30 => 31 | GPR31 => 32 | GPR32 => 33 | GPR33 => 34 | GPR34 => 35 | GPR35 => 36 | GPR36 => 37 | GPR37 => 38 | GPR38 => 39 | GPR39 => 40
+ | GPR40 => 41 | GPR41 => 42 | GPR42 => 43 | GPR43 => 44 | GPR44 => 45 | GPR45 => 46 | GPR46 => 47 | GPR47 => 48 | GPR48 => 49 | GPR49 => 50
+ | GPR50 => 51 | GPR51 => 52 | GPR52 => 53 | GPR53 => 54 | GPR54 => 55 | GPR55 => 56 | GPR56 => 57 | GPR57 => 58 | GPR58 => 59 | GPR59 => 60
+ | GPR60 => 61 | GPR61 => 62 | GPR62 => 63 | GPR63 => 64
+ end
+.
+
+Lemma ireg_to_pos_discr: forall r r', r <> r' -> ireg_to_pos r <> ireg_to_pos r'.
+Proof.
+ destruct r; destruct r'; try contradiction; discriminate.
+Qed.
+
+Definition ppos (r: preg) : R.t :=
+ match r with
+ | RA => 2
+ | PC => 3
+ | IR ir => 3 + ireg_to_pos ir
+ end
+.
+
+Notation "# r" := (ppos r) (at level 100, right associativity).
+
+Lemma not_eq_add:
+ forall k n n', n <> n' -> k + n <> k + n'.
+Proof.
+ intros k n n' H1 H2. apply H1; clear H1. eapply Pos.add_reg_l; eauto.
+Qed.
+
+Lemma ppos_discr: forall r r', r <> r' -> ppos r <> ppos r'.
+Proof.
+ destruct r; destruct r'.
+ all: try discriminate; try contradiction.
+ - intros. apply not_eq_add. apply ireg_to_pos_discr. congruence.
+ - intros. unfold ppos. cutrewrite (3 + ireg_to_pos g = (1 + ireg_to_pos g) + 2). apply Pos.add_no_neutral.
+ apply eq_sym. rewrite Pos.add_comm. rewrite Pos.add_assoc. reflexivity.
+ - intros. unfold ppos. rewrite Pos.add_comm. apply Pos.add_no_neutral.
+ - intros. unfold ppos. apply not_eq_sym.
+ cutrewrite (3 + ireg_to_pos g = (1 + ireg_to_pos g) + 2). apply Pos.add_no_neutral.
+ apply eq_sym. rewrite Pos.add_comm. rewrite Pos.add_assoc. reflexivity.
+ - intros. unfold ppos. apply not_eq_sym. rewrite Pos.add_comm. apply Pos.add_no_neutral.
+Qed.
+
+Lemma ppos_pmem_discr: forall r, pmem <> ppos r.
+Proof.
+ intros. destruct r.
+ - unfold ppos. unfold pmem. apply not_eq_sym. rewrite Pos.add_comm. cutrewrite (3 = 2 + 1). rewrite Pos.add_assoc. apply Pos.add_no_neutral.
+ reflexivity.
+ - unfold ppos. unfold pmem. discriminate.
+ - unfold ppos. unfold pmem. discriminate.
+Qed.
+
+(** Inversion functions, used for debug traces *)
+
+Definition pos_to_ireg (p: R.t) : option gpreg :=
+ match p with
+ | 1 => Some GPR0 | 2 => Some GPR1 | 3 => Some GPR2 | 4 => Some GPR3 | 5 => Some GPR4 | 6 => Some GPR5 | 7 => Some GPR6 | 8 => Some GPR7 | 9 => Some GPR8 | 10 => Some GPR9
+ | 11 => Some GPR10 | 12 => Some GPR11 | 13 => Some GPR12 | 14 => Some GPR13 | 15 => Some GPR14 | 16 => Some GPR15 | 17 => Some GPR16 | 18 => Some GPR17 | 19 => Some GPR18 | 20 => Some GPR19
+ | 21 => Some GPR20 | 22 => Some GPR21 | 23 => Some GPR22 | 24 => Some GPR23 | 25 => Some GPR24 | 26 => Some GPR25 | 27 => Some GPR26 | 28 => Some GPR27 | 29 => Some GPR28 | 30 => Some GPR29
+ | 31 => Some GPR30 | 32 => Some GPR31 | 33 => Some GPR32 | 34 => Some GPR33 | 35 => Some GPR34 | 36 => Some GPR35 | 37 => Some GPR36 | 38 => Some GPR37 | 39 => Some GPR38 | 40 => Some GPR39
+ | 41 => Some GPR40 | 42 => Some GPR41 | 43 => Some GPR42 | 44 => Some GPR43 | 45 => Some GPR44 | 46 => Some GPR45 | 47 => Some GPR46 | 48 => Some GPR47 | 49 => Some GPR48 | 50 => Some GPR49
+ | 51 => Some GPR50 | 52 => Some GPR51 | 53 => Some GPR52 | 54 => Some GPR53 | 55 => Some GPR54 | 56 => Some GPR55 | 57 => Some GPR56 | 58 => Some GPR57 | 59 => Some GPR58 | 60 => Some GPR59
+ | 61 => Some GPR60 | 62 => Some GPR61 | 63 => Some GPR62 | 64 => Some GPR63
+ | _ => None
+ end.
+
+Definition inv_ppos (p: R.t) : option preg :=
+ match p with
+ | 1 => None
+ | 2 => Some RA | 3 => Some PC
+ | n => match pos_to_ireg (n-3) with
+ | None => None
+ | Some gpr => Some (IR gpr)
+ end
+ end.
+
+Notation "a @ b" := (Econs a b) (at level 102, right associativity).
+
+(** Translations of instructions *)
+
+Definition trans_control (ctl: control) : inst :=
+ match ctl with
+ | Pret => [(#PC, PReg(#RA))]
+ | Pcall s => [(#RA, PReg(#PC)); (#PC, Op (Arith (OArithR (Ploadsymbol s Ptrofs.zero))) Enil)]
+ | Picall r => [(#RA, PReg(#PC)); (#PC, PReg(#r))]
+ | Pgoto s => [(#PC, Op (Arith (OArithR (Ploadsymbol s Ptrofs.zero))) Enil)]
+ | Pigoto r => [(#PC, PReg(#r))]
+ | Pj_l l => [(#PC, Op (Control (Oj_l l)) (PReg(#PC) @ Enil))]
+ | Pcb bt r l => [(#PC, Op (Control (Ocb bt l)) (PReg(#r) @ PReg(#PC) @ Enil))]
+ | Pcbu bt r l => [(#PC, Op (Control (Ocbu bt l)) (PReg(#r) @ PReg(#PC) @ Enil))]
+ | Pjumptable r labels => [(#PC, Op (Control (Ojumptable labels)) (PReg(#r) @ PReg(#PC) @ Enil));
+ (#GPR62, Op (Constant Vundef) Enil);
+ (#GPR63, Op (Constant Vundef) Enil) ]
+ | Pbuiltin ef args res => [(#PC, Op (Control (OError)) Enil)]
+ end.
+
+Definition trans_exit (ex: option control) : L.inst :=
+ match ex with
+ | None => []
+ | Some ctl => trans_control ctl
+ end
+.
+
+Definition trans_arith (ai: ar_instruction) : inst :=
+ match ai with
+ | PArithR n d => [(#d, Op (Arith (OArithR n)) Enil)]
+ | PArithRR n d s => [(#d, Op (Arith (OArithRR n)) (PReg(#s) @ Enil))]
+ | PArithRI32 n d i => [(#d, Op (Arith (OArithRI32 n i)) Enil)]
+ | PArithRI64 n d i => [(#d, Op (Arith (OArithRI64 n i)) Enil)]
+ | PArithRF32 n d i => [(#d, Op (Arith (OArithRF32 n i)) Enil)]
+ | PArithRF64 n d i => [(#d, Op (Arith (OArithRF64 n i)) Enil)]
+ | PArithRRR n d s1 s2 => [(#d, Op (Arith (OArithRRR n)) (PReg(#s1) @ PReg(#s2) @ Enil))]
+ | PArithRRI32 n d s i => [(#d, Op (Arith (OArithRRI32 n i)) (PReg(#s) @ Enil))]
+ | PArithRRI64 n d s i => [(#d, Op (Arith (OArithRRI64 n i)) (PReg(#s) @ Enil))]
+ | PArithARRR n d s1 s2 => [(#d, Op (Arith (OArithARRR n)) (PReg(#d) @ PReg(#s1) @ PReg(#s2) @ Enil))]
+ | PArithARR n d s => [(#d, Op (Arith (OArithARR n)) (PReg(#d) @ PReg(#s) @ Enil))]
+ | PArithARRI32 n d s i => [(#d, Op (Arith (OArithARRI32 n i)) (PReg(#d) @ PReg(#s) @ Enil))]
+ | PArithARRI64 n d s i => [(#d, Op (Arith (OArithARRI64 n i)) (PReg(#d) @ PReg(#s) @ Enil))]
+ end.
+
+
+Definition trans_basic (b: basic) : inst :=
+ match b with
+ | PArith ai => trans_arith ai
+ | PLoadRRO trap n d a ofs => [(#d, Op (Load (OLoadRRO n trap ofs)) (PReg (#a) @ PReg pmem @ Enil))]
+ | PLoadRRR trap n d a ro => [(#d, Op (Load (OLoadRRR n trap)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+ | PLoadRRRXS trap n d a ro => [(#d, Op (Load (OLoadRRRXS n trap)) (PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+ | PStoreRRO n s a ofs => [(pmem, Op (Store (OStoreRRO n ofs)) (PReg (#s) @ PReg (#a) @ PReg pmem @ Enil))]
+ | PLoadQRRO qd a ofs =>
+ let (d0, d1) := gpreg_q_expand qd in
+ [(#d0, Op (Load (OLoadRRO Pld_a TRAP ofs)) (PReg (#a) @ PReg pmem @ Enil));
+ (#d1, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 8)))) (Old(PReg (#a)) @ PReg pmem @ Enil))]
+ | PLoadORRO od a ofs =>
+ match gpreg_o_expand od with
+ | (d0, d1, d2, d3) =>
+ [(#d0, Op (Load (OLoadRRO Pld_a TRAP ofs)) (PReg (#a) @ PReg pmem @ Enil));
+ (#d1, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 8)))) (Old(PReg (#a)) @ PReg pmem @ Enil));
+ (#d2, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 16)))) (Old(PReg (#a)) @ PReg pmem @ Enil));
+ (#d3, Op (Load (OLoadRRO Pld_a TRAP (Ptrofs.add ofs (Ptrofs.repr 24)))) (Old(PReg (#a)) @ PReg pmem @ Enil))]
+ end
+ | PStoreRRR n s a ro => [(pmem, Op (Store (OStoreRRR n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+ | PStoreRRRXS n s a ro => [(pmem, Op (Store (OStoreRRRXS n)) (PReg (#s) @ PReg (#a) @ PReg (#ro) @ PReg pmem @ Enil))]
+ | PStoreQRRO qs a ofs =>
+ let (s0, s1) := gpreg_q_expand qs in
+ [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil));
+ (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil))]
+ | PStoreORRO os a ofs =>
+ match gpreg_o_expand os with
+ | (s0, s1, s2, s3) =>
+ [(pmem, Op (Store (OStoreRRO Psd_a ofs)) (PReg (#s0) @ PReg (#a) @ PReg pmem @ Enil));
+ (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 8)))) (PReg (#s1) @ PReg (#a) @ PReg pmem @ Enil));
+ (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 16)))) (PReg (#s2) @ PReg (#a) @ PReg pmem @ Enil));
+ (pmem, Op (Store (OStoreRRO Psd_a (Ptrofs.add ofs (Ptrofs.repr 24)))) (PReg (#s3) @ PReg (#a) @ PReg pmem @ Enil))]
+ end
+ | Pallocframe sz pos => [(#FP, PReg (#SP)); (#SP, Op (Allocframe2 sz pos) (PReg (#SP) @ PReg pmem @ Enil)); (#RTMP, Op (Constant Vundef) Enil);
+ (pmem, Op (Allocframe sz pos) (Old (PReg (#SP)) @ PReg pmem @ Enil))]
+ | Pfreeframe sz pos => [(pmem, Op (Freeframe sz pos) (PReg (#SP) @ PReg pmem @ Enil));
+ (#SP, Op (Freeframe2 sz pos) (PReg (#SP) @ Old (PReg pmem) @ Enil));
+ (#RTMP, Op (Constant Vundef) Enil)]
+ | Pget rd ra => match ra with
+ | RA => [(#rd, PReg(#ra))]
+ | _ => [(#rd, Op Fail Enil)]
+ end
+ | Pset ra rd => match ra with
+ | RA => [(#ra, PReg(#rd))]
+ | _ => [(#rd, Op Fail Enil)]
+ end
+ | Pnop => []
+ end.
+
+Fixpoint trans_body (b: list basic) : list L.inst :=
+ match b with
+ | nil => nil
+ | b :: lb => (trans_basic b) :: (trans_body lb)
+ end.
+
+Definition trans_pcincr (sz: Z) (k: L.inst) := (#PC, Op (Control (OIncremPC sz)) (PReg(#PC) @ Enil)) :: k.
+
+Definition trans_block (b: Asmvliw.bblock) : L.bblock :=
+ trans_body (body b) ++ (trans_pcincr (size b) (trans_exit (exit b)) :: nil).
+
+Theorem trans_block_noheader_inv: forall bb, trans_block (no_header bb) = trans_block bb.
+Proof.
+ intros. destruct bb as [hd bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity.
+Qed.
+
+Theorem trans_block_header_inv: forall bb hd, trans_block (stick_header hd bb) = trans_block bb.
+Proof.
+ intros. destruct bb as [hdr bdy ex COR]; unfold no_header; simpl. unfold trans_block. simpl. reflexivity.
+Qed.
+
+(** Lemmas on the translation *)
+
+Definition state := L.mem.
+Definition exec := L.run.
+
+Definition match_states (s: Asmvliw.state) (s': state) :=
+ let (rs, m) := s in
+ s' pmem = Memstate m
+ /\ forall r, s' (#r) = Val (rs r).
+
+Definition match_outcome (o:outcome) (s: option state) :=
+ match o with
+ | Next rs m => exists s', s=Some s' /\ match_states (State rs m) s'
+ | Stuck => s=None
+ end.
+
+Notation "a <[ b <- c ]>" := (assign a b c) (at level 102, right associativity).
+
+Definition trans_state (s: Asmvliw.state) : state :=
+ let (rs, m) := s in
+ fun x => if (Pos.eq_dec x pmem) then Memstate m
+ else match (inv_ppos x) with
+ | Some r => Val (rs r)
+ | None => Val Vundef
+ end.
+
+Lemma not_eq_IR:
+ forall r r', r <> r' -> IR r <> IR r'.
+Proof.
+ intros. congruence.
+Qed.
+
+(** Parallelizability test of a bblock (bundle), and bisimulation of the Asmblock and L parallel semantics *)
+
+Module PChk := ParallelChecks L PosPseudoRegSet.
+
+Definition bblock_para_check (p: Asmvliw.bblock) : bool :=
+ PChk.is_parallelizable (trans_block p).
+
+Section SECT_PAR.
+
+Import PChk.
+
+Ltac Simplif :=
+ ((rewrite nextblock_inv by eauto with asmgen)
+ || (rewrite nextblock_inv1 by eauto with asmgen)
+ || (rewrite Pregmap.gss)
+ || (rewrite nextblock_pc)
+ || (rewrite Pregmap.gso by eauto with asmgen)
+ || (rewrite assign_diff by (auto; try discriminate; try (apply ppos_discr; try discriminate; congruence); try (apply ppos_pmem_discr);
+ try (apply not_eq_sym; apply ppos_discr; try discriminate; congruence); try (apply not_eq_sym; apply ppos_pmem_discr); auto))
+ || (rewrite assign_eq)
+ ); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+Arguments Pos.add: simpl never.
+Arguments ppos: simpl never.
+
+Variable Ge: genv.
+
+Lemma trans_arith_par_correct ge fn rsr mr sr rsw mw sw rsw' i:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ parexec_arith_instr ge i rsr rsw = rsw' ->
+ exists sw',
+ inst_prun Ge (trans_arith i) sw sr sr = Some sw'
+ /\ match_states (State rsw' mw) sw'.
+Proof.
+ intros GENV MSR MSW PARARITH. subst. inv MSR. inv MSW.
+ unfold parexec_arith_instr. destruct i.
+(* Ploadsymbol *)
+ - destruct i. eexists; split; [| split].
+ * simpl. reflexivity.
+ * Simpl.
+ * simpl. intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRR *)
+ - eexists; split; [| split].
+ * simpl. rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRI32 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRI64 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRF32 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRF64 *)
+ - eexists; split; [|split].
+ * simpl. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRRR *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rs1). rewrite (H0 rs2). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRRI32 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithRRI64 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARRR *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs1). rewrite (H0 rs2). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARR *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARRI32 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* PArithARRI64 *)
+ - eexists; split; [|split].
+ * simpl. rewrite (H0 rd). rewrite (H0 rs). reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+Qed.
+
+
+
+Theorem bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw bi:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ match_outcome (bstep ge bi rsr rsw mr mw) (inst_prun Ge (trans_basic bi) sw sr sr).
+Proof.
+
+(* a little tactic to automate reasoning on preg_eq *)
+Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
+Local Ltac preg_eq_discr r rd :=
+ destruct (preg_eq r rd); try (subst r; rewrite assign_eq, Pregmap.gss; auto);
+ rewrite (assign_diff _ (#rd) (#r) _); auto;
+ rewrite Pregmap.gso; auto.
+
+ intros GENV MSR MSW; inversion MSR as (H & H0); inversion MSW as (H1 & H2).
+ destruct bi; simpl.
+(* Arith *)
+ - exploit trans_arith_par_correct. 5: eauto. all: eauto.
+(* Load *)
+ - destruct i.
+ (* Load Offset *)
+ + destruct i; simpl load_chunk. all:
+ unfold parexec_load_offset; simpl; unfold exec_load_deps_offset; erewrite GENV, H, H0;
+ unfold eval_offset;
+ simpl; auto;
+ destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl.
+
+ (* Load Reg *)
+ + destruct i; simpl load_chunk. all:
+ unfold parexec_load_reg; simpl; unfold exec_load_deps_reg; rewrite H, H0; rewrite (H0 rofs);
+ destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl.
+
+ (* Load Reg XS *)
+ + destruct i; simpl load_chunk. all:
+ unfold parexec_load_regxs; simpl; unfold exec_load_deps_regxs; rewrite H, H0; rewrite (H0 rofs);
+ destruct (Mem.loadv _ _ _) eqn:MEML; destruct trap; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl; destruct (ireg_eq g rd); subst; Simpl.
+
+ (* Load Quad word *)
+ + unfold parexec_load_q_offset.
+ destruct (gpreg_q_expand rd) as [rd0 rd1]; destruct Ge; simpl.
+ rewrite H0, H.
+ destruct (Mem.loadv Many64 mr _) as [load0 | ]; simpl; auto.
+ rewrite !(assign_diff _ _ pmem), H; auto.
+ destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 8)))) as [load1| ]; simpl; auto.
+ eexists; intuition eauto.
+ { rewrite !(assign_diff _ _ pmem); auto. }
+ { preg_eq_discr r rd1.
+ preg_eq_discr r rd0. }
+
+ (* Load Octuple word *)
+ + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
+ unfold parexec_load_o_offset.
+ destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3]; destruct Ge; simpl.
+ rewrite H0, H.
+ destruct (Mem.loadv Many64 mr (Val.offset_ptr (rsr ra) ofs)) as [load0 | ]; simpl; auto.
+ rewrite !(assign_diff _ _ pmem), !H; auto.
+ destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 8)))) as [load1| ]; simpl; auto.
+ rewrite !(assign_diff _ _ pmem), !H; auto.
+ destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 16)))) as [load2| ]; simpl; auto.
+ rewrite !(assign_diff _ _ pmem), !H; auto.
+ destruct (Mem.loadv Many64 mr (_ _ (Ptrofs.add ofs (Ptrofs.repr 24)))) as [load3| ]; simpl; auto.
+ eexists; intuition eauto.
+ { rewrite !(assign_diff _ _ pmem); auto. }
+ { preg_eq_discr r rd3.
+ preg_eq_discr r rd2.
+ preg_eq_discr r rd1.
+ preg_eq_discr r rd0. }
+
+(* Store *)
+ - destruct i.
+ (* Store Offset *)
+ + destruct i; simpl store_chunk. all:
+ unfold parexec_store_offset; simpl; unfold exec_store_deps_offset; erewrite GENV, H, H0; rewrite (H0 ra);
+ unfold eval_offset; simpl; auto;
+ destruct (Mem.storev _ _ _ _) eqn:MEML; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl.
+
+ (* Store Reg *)
+ + destruct i; simpl store_chunk. all:
+ unfold parexec_store_reg; simpl; unfold exec_store_deps_reg; rewrite H, H0; rewrite (H0 ra); rewrite (H0 rofs);
+ destruct (Mem.storev _ _ _ _) eqn:MEML; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl.
+
+ (* Store Reg XS *)
+ + destruct i; simpl store_chunk. all:
+ unfold parexec_store_regxs; simpl; unfold exec_store_deps_regxs; rewrite H, H0; rewrite (H0 ra); rewrite (H0 rofs);
+ destruct (Mem.storev _ _ _ _) eqn:MEML; simpl; auto;
+ eexists; split; try split; Simpl;
+ intros rr; destruct rr; Simpl.
+
+ (* Store Quad Word *)
+ + unfold parexec_store_q_offset.
+ destruct (gpreg_q_expand rs) as [s0 s1]; destruct Ge; simpl.
+ rewrite !H0, !H.
+ destruct (Mem.storev _ _ _ (rsr s0)) as [mem0 | ]; simpl; auto.
+ rewrite !assign_diff, !H0; auto.
+ destruct (Mem.storev _ _ _ (rsr s1)) as [mem1 | ]; simpl; auto.
+ eexists; intuition eauto.
+ rewrite !assign_diff; auto.
+
+ (* Store Ocuple Word *)
+ + unfold parexec_store_o_offset.
+ destruct (gpreg_o_expand rs) as [[[s0 s1] s2] s3]; destruct Ge; simpl.
+ rewrite !H0, !H.
+ destruct (Mem.storev _ _ _ (rsr s0)) as [store0 | ]; simpl; auto.
+ rewrite !assign_diff, !H0; auto.
+ destruct (Mem.storev _ _ _ (rsr s1)) as [store1 | ]; simpl; auto.
+ rewrite !assign_diff, !H0; auto.
+ destruct (Mem.storev _ _ _ (rsr s2)) as [store2 | ]; simpl; auto.
+ rewrite !assign_diff, !H0; auto.
+ destruct (Mem.storev _ _ _ (rsr s3)) as [store3 | ]; simpl; auto.
+ eexists; intuition eauto.
+ rewrite !assign_diff; auto.
+
+ (* Allocframe *)
+ - destruct (Mem.alloc _ _ _) eqn:MEMAL. destruct (Mem.store _ _ _ _) eqn:MEMS.
+ * eexists; repeat split.
+ { Simpl. erewrite !H0, H, MEMAL, MEMS. Simpl.
+ rewrite H, MEMAL. rewrite MEMS. reflexivity. }
+ { Simpl. }
+ { intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g GPR32); [| destruct (ireg_eq g GPR12); [| destruct (ireg_eq g GPR17)]]; subst; Simpl. }
+ * simpl; Simpl; erewrite !H0, H, MEMAL, MEMS; auto.
+ (* Freeframe *)
+ - erewrite !H0, H.
+ destruct (Mem.loadv _ _ _) eqn:MLOAD; simpl; auto.
+ destruct (rsr GPR12) eqn:SPeq; simpl; auto.
+ destruct (Mem.free _ _ _ _) eqn:MFREE; simpl; auto.
+ eexists; repeat split.
+ * simpl. Simpl. erewrite H0, SPeq, MLOAD, MFREE. reflexivity.
+ * Simpl.
+ * intros rr; destruct rr; Simpl. destruct (ireg_eq g GPR32); [| destruct (ireg_eq g GPR12); [| destruct (ireg_eq g GPR17)]]; subst; Simpl.
+(* Pget *)
+ - destruct rs eqn:rseq; simpl; auto.
+ eexists. repeat split. Simpl. intros rr; destruct rr; Simpl.
+ destruct (ireg_eq g rd); subst; Simpl.
+(* Pset *)
+ - destruct rd eqn:rdeq; simpl; auto.
+ eexists. repeat split. Simpl. intros rr; destruct rr; Simpl.
+(* Pnop *)
+ - eexists. repeat split; assumption.
+Qed.
+
+
+Theorem bisimu_par_body:
+ forall bdy ge fn rsr mr sr rsw mw sw,
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ match_outcome (parexec_wio_body ge bdy rsr rsw mr mw) (prun_iw Ge (trans_body bdy) sw sr).
+Proof.
+ induction bdy as [|i bdy]; simpl; eauto.
+ intros.
+ exploit (bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw i); eauto.
+ destruct (bstep _ _ _ _ _ _); simpl.
+ - intros (s' & X1 & X2). rewrite X1; simpl; eauto.
+ - intros X; rewrite X; simpl; auto.
+Qed.
+
+Theorem bisimu_par_control ex sz aux ge fn rsr rsw mr mw sr sw:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ match_outcome (parexec_control ge fn ex (incrPC (Ptrofs.repr sz) rsr) (rsw#PC <- aux) mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr).
+Proof.
+ intros GENV MSR MSW; unfold estep.
+ simpl in *. inv MSR. inv MSW.
+ destruct ex.
+ - destruct c; destruct i; try discriminate; simpl.
+ all: try (rewrite (H0 PC); eexists; split; try split; Simpl; intros rr; destruct rr; unfold incrPC; Simpl).
+
+ (* Pjumptable *)
+ + rewrite (H0 PC). Simpl. rewrite (H0 r). unfold incrPC. Simpl.
+ destruct (rsr r); simpl; auto. destruct (list_nth_z _ _); simpl; auto.
+ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
+ destruct (Val.offset_ptr _ _); simpl; auto.
+ eexists; split; try split; Simpl. intros rr; destruct rr; unfold incrPC; Simpl.
+ destruct (preg_eq g GPR62). rewrite e. Simpl.
+ destruct (preg_eq g GPR63). rewrite e. Simpl. Simpl.
+
+ (* Pj_l *)
+ + rewrite (H0 PC). Simpl. unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto.
+ unfold incrPC. Simpl. destruct (Val.offset_ptr _ _); simpl; auto.
+ eexists; split; try split; Simpl. intros rr; destruct rr; unfold incrPC; Simpl.
+
+ (* Pcb *)
+ + rewrite (H0 PC). Simpl. rewrite (H0 r). destruct (cmp_for_btest _); simpl; auto. destruct o; simpl; auto.
+ unfold par_eval_branch. unfold eval_branch_deps. unfold incrPC. Simpl. destruct i.
+ ++ destruct (Val.cmp_bool _ _ _); simpl; auto. destruct b.
+ +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
+ destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl.
+ intros rr; destruct rr; Simpl.
+ +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl.
+ ++ destruct (Val.cmpl_bool _ _ _); simpl; auto. destruct b.
+ +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
+ destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl.
+ intros rr; destruct rr; Simpl.
+ +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl.
+
+ (* Pcbu *)
+ + rewrite (H0 PC). Simpl. rewrite (H0 r). destruct (cmpu_for_btest _); simpl; auto. destruct o; simpl; auto.
+ unfold par_eval_branch. unfold eval_branch_deps. unfold incrPC. Simpl. destruct i.
+ ++ destruct (Val_cmpu_bool _ _ _); simpl; auto. destruct b.
+ +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
+ destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl.
+ intros rr; destruct rr; Simpl.
+ +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl.
+ ++ destruct (Val_cmplu_bool _ _ _); simpl; auto. destruct b.
+ +++ unfold par_goto_label. unfold goto_label_deps. destruct (label_pos _ _ _); simpl; auto. Simpl.
+ destruct (Val.offset_ptr _ _); simpl; auto. eexists; split; try split; Simpl.
+ intros rr; destruct rr; Simpl.
+ +++ repeat (econstructor; eauto). intros rr; destruct rr; Simpl.
+
+ - simpl in *. rewrite (H0 PC). eexists; split; try split; Simpl.
+ intros rr; destruct rr; unfold incrPC; Simpl.
+Qed.
+
+Theorem bisimu_par_exit ex sz ge fn rsr rsw mr mw sr sw:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_states (State rsw mw) sw ->
+ match_outcome (estep ge fn ex (Ptrofs.repr sz) rsr rsw mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr).
+Proof.
+ intros; unfold estep.
+ exploit (bisimu_par_control ex sz rsw#PC ge fn rsr rsw mr mw sr sw); eauto.
+ cutrewrite (rsw # PC <- (rsw PC) = rsw); auto.
+ apply extensionality. intros; destruct x; simpl; auto.
+Qed.
+
+Definition trans_block_aux bdy sz ex := (trans_body bdy) ++ (trans_pcincr sz (trans_exit ex) :: nil).
+
+Theorem bisimu_par_wio ge fn rsr mr sr bdy ex sz:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_outcome (parexec_wio ge fn bdy ex (Ptrofs.repr sz) rsr mr) (prun_iw Ge (trans_block_aux bdy sz ex) sr sr).
+Proof.
+ intros GENV MSR. unfold parexec_wio, trans_block_aux.
+ exploit (bisimu_par_body bdy ge fn rsr mr sr rsr mr sr); eauto.
+ destruct (parexec_wio_body _ _ _ _ _ _); simpl.
+ - intros (s' & X1 & X2).
+ erewrite prun_iw_app_Some; eauto.
+ exploit (bisimu_par_exit ex sz ge fn rsr rs mr m sr s'); eauto.
+ subst Ge; simpl. destruct MSR as (Y1 & Y2). erewrite Y2; simpl.
+ destruct (inst_prun _ _ _ _ _); simpl; auto.
+ - intros X; erewrite prun_iw_app_None; eauto.
+Qed.
+
+Theorem bisimu_par_wio_bblock ge fn rsr mr sr bdy1 bdy2 ex sz:
+ Ge = Genv ge fn ->
+ match_states (State rsr mr) sr ->
+ match_outcome
+ match parexec_wio ge fn bdy1 ex (Ptrofs.repr sz) rsr mr with
+ | Next rs' m' => parexec_wio_body ge bdy2 rsr rs' mr m'
+ | Stuck => Stuck
+ end
+ (prun_iw Ge ((trans_block_aux bdy1 sz ex)++(trans_body bdy2)) sr sr).
+Proof.
+ intros.
+ exploit (bisimu_par_wio ge fn rsr mr sr bdy1 ex sz); eauto.
+ destruct (parexec_wio _ _ _ _ _ _); simpl.
+ - intros (s' & X1 & X2).
+ erewrite prun_iw_app_Some; eauto.
+ eapply bisimu_par_body; eauto.
+ - intros; erewrite prun_iw_app_None; eauto.
+Qed.
+
+Lemma trans_body_perserves_permutation bdy1 bdy2:
+ Permutation bdy1 bdy2 ->
+ Permutation (trans_body bdy1) (trans_body bdy2).
+Proof.
+ induction 1; simpl; econstructor; eauto.
+Qed.
+
+Lemma trans_body_app bdy1: forall bdy2,
+ trans_body (bdy1++bdy2) = (trans_body bdy1) ++ (trans_body bdy2).
+Proof.
+ induction bdy1; simpl; congruence.
+Qed.
+
+Theorem trans_block_perserves_permutation bdy1 bdy2 b:
+ Permutation (bdy1 ++ bdy2) (body b) ->
+ Permutation (trans_block b) ((trans_block_aux bdy1 (size b) (exit b))++(trans_body bdy2)).
+Proof.
+ intro H; unfold trans_block, trans_block_aux.
+ eapply perm_trans.
+ - eapply Permutation_app_tail.
+ apply trans_body_perserves_permutation.
+ apply Permutation_sym; eapply H.
+ - rewrite trans_body_app. rewrite <-! app_assoc.
+ apply Permutation_app_head.
+ apply Permutation_app_comm.
+Qed.
+
+Theorem bisimu_par rs1 m1 s1' b ge fn o2:
+ Ge = Genv ge fn ->
+ match_states (State rs1 m1) s1' ->
+ parexec_bblock ge fn b rs1 m1 o2 ->
+ exists o2',
+ prun Ge (trans_block b) s1' o2'
+ /\ match_outcome o2 o2'.
+Proof.
+ intros GENV MS PAREXEC.
+ inversion PAREXEC as (bdy1 & bdy2 & PERM & WIO).
+ exploit trans_block_perserves_permutation; eauto.
+ intros Perm.
+ exploit (bisimu_par_wio_bblock ge fn rs1 m1 s1' bdy1 bdy2 (exit b) (size b)); eauto.
+ rewrite <- WIO. clear WIO.
+ intros H; eexists; split. 2: eapply H.
+ unfold prun; eexists; split; eauto.
+ destruct (prun_iw _ _ _ _); simpl; eauto.
+Qed.
+
+(** sequential execution *)
+Theorem bisimu_basic ge fn bi rs m s:
+ Ge = Genv ge fn ->
+ match_states (State rs m) s ->
+ match_outcome (exec_basic_instr ge bi rs m) (inst_run Ge (trans_basic bi) s s).
+Proof.
+ intros; unfold exec_basic_instr. rewrite inst_run_prun.
+ eapply bisimu_par_wio_basic; eauto.
+Qed.
+
+Lemma bisimu_body:
+ forall bdy ge fn rs m s,
+ Ge = Genv ge fn ->
+ match_states (State rs m) s ->
+ match_outcome (exec_body ge bdy rs m) (exec Ge (trans_body bdy) s).
+Proof.
+ induction bdy as [|i bdy]; simpl; eauto.
+ intros.
+ exploit (bisimu_basic ge fn i rs m s); eauto.
+ destruct (exec_basic_instr _ _ _ _); simpl.
+ - intros (s' & X1 & X2). rewrite X1; simpl; eauto.
+ - intros X; rewrite X; simpl; auto.
+Qed.
+
+Theorem bisimu_exit ge fn b rs m s:
+ Ge = Genv ge fn ->
+ match_states (State rs m) s ->
+ match_outcome (exec_control ge fn (exit b) (nextblock b rs) m) (inst_run Ge (trans_pcincr (size b) (trans_exit (exit b))) s s).
+Proof.
+ intros; unfold exec_control, nextblock. rewrite inst_run_prun.
+ apply (bisimu_par_control (exit b) (size b) (Val.offset_ptr (rs PC) (Ptrofs.repr (size b))) ge fn rs rs m m s s); auto.
+Qed.
+
+Theorem bisimu rs m b ge fn s:
+ Ge = Genv ge fn ->
+ match_states (State rs m) s ->
+ match_outcome (exec_bblock ge fn b rs m) (exec Ge (trans_block b) s).
+Proof.
+ intros GENV MS. unfold exec_bblock.
+ exploit (bisimu_body (body b) ge fn rs m s); eauto.
+ unfold exec, trans_block; simpl.
+ destruct (exec_body _ _ _ _); simpl.
+ - intros (s' & X1 & X2).
+ erewrite run_app_Some; eauto.
+ exploit (bisimu_exit ge fn b rs0 m0 s'); eauto.
+ subst Ge; simpl. destruct X2 as (Y1 & Y2). erewrite Y2; simpl.
+ destruct (inst_run _ _ _); simpl; auto.
+ - intros X; erewrite run_app_None; eauto.
+Qed.
+
+
+Theorem trans_state_match: forall S, match_states S (trans_state S).
+Proof.
+ intros. destruct S as (rs & m). simpl.
+ split. reflexivity.
+ intro. destruct r; try reflexivity.
+ destruct g; reflexivity.
+Qed.
+
+
+Lemma state_eq_decomp:
+ forall rs1 m1 rs2 m2, rs1 = rs2 -> m1 = m2 -> State rs1 m1 = State rs2 m2.
+Proof.
+ intros. congruence.
+Qed.
+
+Theorem state_equiv S1 S2 S': match_states S1 S' -> match_states S2 S' -> S1 = S2.
+Proof.
+ unfold match_states; intros H0 H1. destruct S1 as (rs1 & m1). destruct S2 as (rs2 & m2). inv H0. inv H1.
+ apply state_eq_decomp.
+ - apply functional_extensionality. intros. assert (Val (rs1 x) = Val (rs2 x)) by congruence. congruence.
+ - congruence.
+Qed.
+
+Lemma bblock_para_check_correct ge fn bb rs m rs' m':
+ Ge = Genv ge fn ->
+ exec_bblock ge fn bb rs m = Next rs' m' ->
+ bblock_para_check bb = true ->
+ det_parexec ge fn bb rs m rs' m'.
+Proof.
+ intros H H0 H1 o H2. unfold bblock_para_check in H1.
+ exploit (bisimu rs m bb ge fn); eauto. eapply trans_state_match.
+ rewrite H0; simpl.
+ intros (s2' & EXEC & MS).
+ exploit bisimu_par. 2: apply (trans_state_match (State rs m)). all: eauto.
+ intros (o2' & PRUN & MO).
+ exploit parallelizable_correct. apply is_para_correct_aux. eassumption.
+ intro. eapply H3 in PRUN. clear H3. destruct o2'.
+ - inv PRUN. inv H3. unfold exec in EXEC; unfold trans_state in H.
+ assert (x = s2') by congruence. subst. clear H.
+ assert (m0 = s2') by (apply functional_extensionality; auto). subst. clear H4.
+ destruct o; try discriminate. inv MO. inv H. assert (s2' = x) by congruence. subst.
+ exploit (state_equiv (State rs' m') (State rs0 m0)).
+ 2: eapply H4. eapply MS. intro H. inv H. reflexivity.
+ - unfold match_outcome in MO. destruct o.
+ + inv MO. inv H3. discriminate.
+ + clear MO. unfold exec in EXEC.
+ unfold trans_state in PRUN; rewrite EXEC in PRUN. discriminate.
+Qed.
+
+End SECT_PAR.
+
+Section SECT_BBLOCK_EQUIV.
+
+Variable Ge: genv.
+
+Local Hint Resolve trans_state_match: core.
+
+Lemma bblock_simu_reduce:
+ forall p1 p2 ge fn,
+ Ge = Genv ge fn ->
+ L.bblock_simu Ge (trans_block p1) (trans_block p2) ->
+ Asmblockprops.bblock_simu ge fn p1 p2.
+Proof.
+ unfold bblock_simu, res_eq; intros p1 p2 ge fn H1 H2 rs m DONTSTUCK.
+ generalize (H2 (trans_state (State rs m))); clear H2.
+ intro H2.
+ exploit (bisimu Ge rs m p1 ge fn (trans_state (State rs m))); eauto.
+ exploit (bisimu Ge rs m p2 ge fn (trans_state (State rs m))); eauto.
+ destruct (exec_bblock ge fn p1 rs m); try congruence.
+ intros H3 (s2' & exp2 & MS'). unfold exec in exp2, H3. rewrite exp2 in H2.
+ destruct H2 as (m2' & H2 & H4). discriminate. rewrite H2 in H3.
+ destruct (exec_bblock ge fn p2 rs m); simpl in H3.
+ * destruct H3 as (s' & H3 & H5 & H6). inv H3. inv MS'.
+ cutrewrite (rs0=rs1).
+ - cutrewrite (m0=m1); auto. congruence.
+ - apply functional_extensionality. intros r.
+ generalize (H0 r). intros Hr. congruence.
+ * discriminate.
+Qed.
+
+(** Used for debug traces *)
+
+Definition gpreg_name (gpr: gpreg) :=
+ match gpr with
+ | GPR0 => Str ("GPR0") | GPR1 => Str ("GPR1") | GPR2 => Str ("GPR2") | GPR3 => Str ("GPR3") | GPR4 => Str ("GPR4")
+ | GPR5 => Str ("GPR5") | GPR6 => Str ("GPR6") | GPR7 => Str ("GPR7") | GPR8 => Str ("GPR8") | GPR9 => Str ("GPR9")
+ | GPR10 => Str ("GPR10") | GPR11 => Str ("GPR11") | GPR12 => Str ("GPR12") | GPR13 => Str ("GPR13") | GPR14 => Str ("GPR14")
+ | GPR15 => Str ("GPR15") | GPR16 => Str ("GPR16") | GPR17 => Str ("GPR17") | GPR18 => Str ("GPR18") | GPR19 => Str ("GPR19")
+ | GPR20 => Str ("GPR20") | GPR21 => Str ("GPR21") | GPR22 => Str ("GPR22") | GPR23 => Str ("GPR23") | GPR24 => Str ("GPR24")
+ | GPR25 => Str ("GPR25") | GPR26 => Str ("GPR26") | GPR27 => Str ("GPR27") | GPR28 => Str ("GPR28") | GPR29 => Str ("GPR29")
+ | GPR30 => Str ("GPR30") | GPR31 => Str ("GPR31") | GPR32 => Str ("GPR32") | GPR33 => Str ("GPR33") | GPR34 => Str ("GPR34")
+ | GPR35 => Str ("GPR35") | GPR36 => Str ("GPR36") | GPR37 => Str ("GPR37") | GPR38 => Str ("GPR38") | GPR39 => Str ("GPR39")
+ | GPR40 => Str ("GPR40") | GPR41 => Str ("GPR41") | GPR42 => Str ("GPR42") | GPR43 => Str ("GPR43") | GPR44 => Str ("GPR44")
+ | GPR45 => Str ("GPR45") | GPR46 => Str ("GPR46") | GPR47 => Str ("GPR47") | GPR48 => Str ("GPR48") | GPR49 => Str ("GPR49")
+ | GPR50 => Str ("GPR50") | GPR51 => Str ("GPR51") | GPR52 => Str ("GPR52") | GPR53 => Str ("GPR53") | GPR54 => Str ("GPR54")
+ | GPR55 => Str ("GPR55") | GPR56 => Str ("GPR56") | GPR57 => Str ("GPR57") | GPR58 => Str ("GPR58") | GPR59 => Str ("GPR59")
+ | GPR60 => Str ("GPR60") | GPR61 => Str ("GPR61") | GPR62 => Str ("GPR62") | GPR63 => Str ("GPR63")
+ end.
+
+Definition string_of_name (x: P.R.t): ?? pstring :=
+ if (Pos.eqb x pmem) then
+ RET (Str "MEM")
+ else
+ match inv_ppos x with
+ | Some RA => RET (Str ("RA"))
+ | Some PC => RET (Str ("PC"))
+ | Some (IR gpr) => RET (gpreg_name gpr)
+ | _ => RET (Str ("UNDEFINED"))
+ end.
+
+Definition string_of_name_r (n: arith_name_r): pstring :=
+ match n with
+ | Ploadsymbol _ _ => "Ploadsymbol"
+ end.
+
+Definition string_of_name_rr (n: arith_name_rr): pstring :=
+ match n with
+ Pmv => "Pmv"
+ | Pnegw => "Pnegw"
+ | Pnegl => "Pnegl"
+ | Pcvtl2w => "Pcvtl2w"
+ | Psxwd => "Psxwd"
+ | Pzxwd => "Pzxwd"
+ | Pextfz _ _ => "Pextfz"
+ | Pextfs _ _ => "Pextfs"
+ | Pextfzl _ _ => "Pextfzl"
+ | Pextfsl _ _ => "Pextfsl"
+ | Pfabsd => "Pfabsd"
+ | Pfabsw => "Pfabsw"
+ | Pfnegd => "Pfnegd"
+ | Pfnegw => "Pfnegw"
+ | Pfinvw => "Pfinvw"
+ | Pfnarrowdw => "Pfnarrowdw"
+ | Pfwidenlwd => "Pfwidenlwd"
+ | Pfloatwrnsz => "Pfloatwrnsz"
+ | Pfloatuwrnsz => "Pfloatuwrnsz"
+ | Pfloatudrnsz => "Pfloatudrnsz"
+ | Pfloatdrnsz => "Pfloatdrnsz"
+ | Pfixedwrzz => "Pfixedwrzz"
+ | Pfixeduwrzz => "Pfixeduwrzz"
+ | Pfixeddrzz => "Pfixeddrzz"
+ | Pfixedudrzz => "Pfixedudrzz"
+ | Pfixeddrzz_i32 => "Pfixeddrzz_i32"
+ | Pfixedudrzz_i32 => "Pfixedudrzz_i32"
+ end.
+
+Definition string_of_name_ri32 (n: arith_name_ri32): pstring :=
+ match n with
+ | Pmake => "Pmake"
+ end.
+
+Definition string_of_name_ri64 (n: arith_name_ri64): pstring :=
+ match n with
+ | Pmakel => "Pmakel"
+ end.
+
+Definition string_of_name_rf32 (n: arith_name_rf32): pstring :=
+ match n with
+ | Pmakefs => "Pmakefs"
+ end.
+
+Definition string_of_name_rf64 (n: arith_name_rf64): pstring :=
+ match n with
+ | Pmakef => "Pmakef"
+ end.
+
+Definition string_of_name_rrr (n: arith_name_rrr): pstring :=
+ match n with
+ | Pcompw _ => "Pcompw"
+ | Pcompl _ => "Pcompl"
+ | Pfcompw _ => "Pfcompw"
+ | Pfcompl _ => "Pfcompl"
+ | Paddw => "Paddw"
+ | Paddxw _ => "Paddxw"
+ | Psubw => "Psubw"
+ | Prevsubxw _ => "Prevsubxw"
+ | Pmulw => "Pmulw"
+ | Pandw => "Pandw"
+ | Pnandw => "Pnandw"
+ | Porw => "Porw"
+ | Pnorw => "Pnorw"
+ | Pxorw => "Pxorw"
+ | Pnxorw => "Pnxorw"
+ | Pandnw => "Pandnw"
+ | Pornw => "Pornw"
+ | Psraw => "Psraw"
+ | Psrlw => "Psrlw"
+ | Psrxw => "Psrxw"
+ | Psllw => "Psllw"
+ | Paddl => "Paddl"
+ | Paddxl _ => "Paddxl"
+ | Psubl => "Psubl"
+ | Prevsubxl _ => "Prevsubxl"
+ | Pandl => "Pandl"
+ | Pnandl => "Pnandl"
+ | Porl => "Porl"
+ | Pnorl => "Pnorl"
+ | Pxorl => "Pxorl"
+ | Pnxorl => "Pnxorl"
+ | Pandnl => "Pandnl"
+ | Pornl => "Pornl"
+ | Pmull => "Pmull"
+ | Pslll => "Pslll"
+ | Psrll => "Psrll"
+ | Psrxl => "Psrxl"
+ | Psral => "Psral"
+ | Pfaddd => "Pfaddd"
+ | Pfaddw => "Pfaddw"
+ | Pfsbfd => "Pfsbfd"
+ | Pfsbfw => "Pfsbfw"
+ | Pfmuld => "Pfmuld"
+ | Pfmulw => "Pfmulw"
+ | Pfmind => "Pfmind"
+ | Pfminw => "Pfminw"
+ | Pfmaxd => "Pfmaxd"
+ | Pfmaxw => "Pfmaxw"
+ end.
+
+Definition string_of_name_rri32 (n: arith_name_rri32): pstring :=
+ match n with
+ Pcompiw _ => "Pcompiw"
+ | Paddiw => "Paddiw"
+ | Paddxiw _ => "Paddxiw"
+ | Prevsubiw => "Prevsubiw"
+ | Prevsubxiw _ => "Prevsubxiw"
+ | Pmuliw => "Pmuliw"
+ | Pandiw => "Pandiw"
+ | Pnandiw => "Pnandiw"
+ | Poriw => "Poriw"
+ | Pnoriw => "Pnoriw"
+ | Pxoriw => "Pxoriw"
+ | Pnxoriw => "Pnxoriw"
+ | Pandniw => "Pandniw"
+ | Porniw => "Porniw"
+ | Psraiw => "Psraiw"
+ | Psrliw => "Psrliw"
+ | Psrxiw => "Psrxiw"
+ | Pslliw => "Pslliw"
+ | Proriw => "Proriw"
+ | Psllil => "Psllil"
+ | Psrlil => "Psrlil"
+ | Psrail => "Psrail"
+ | Psrxil => "Psrxil"
+ end.
+
+Definition string_of_name_rri64 (n: arith_name_rri64): pstring :=
+ match n with
+ Pcompil _ => "Pcompil"
+ | Paddil => "Paddil"
+ | Prevsubil => "Prevsubil"
+ | Paddxil _ => "Paddxil"
+ | Prevsubxil _ => "Prevsubxil"
+ | Pmulil => "Pmulil"
+ | Pandil => "Pandil"
+ | Pnandil => "Pnandil"
+ | Poril => "Poril"
+ | Pnoril => "Pnoril"
+ | Pxoril => "Pxoril"
+ | Pnxoril => "Pnxoril"
+ | Pandnil => "Pandnil"
+ | Pornil => "Pornil"
+ end.
+
+Definition string_of_name_arrr (n: arith_name_arrr): pstring :=
+ match n with
+ | Pmaddw => "Pmaddw"
+ | Pmaddl => "Pmaddl"
+ | Pmsubw => "Pmsubw"
+ | Pmsubl => "Pmsubl"
+ | Pcmove _ => "Pcmove"
+ | Pcmoveu _ => "Pcmoveu"
+ | Pfmaddfw => "Pfmaddfw"
+ | Pfmaddfl => "Pfmaddfl"
+ | Pfmsubfw => "Pfmsubfw"
+ | Pfmsubfl => "Pfmsubfl"
+ end.
+
+Definition string_of_name_arr (n: arith_name_arr): pstring :=
+ match n with
+ | Pinsf _ _ => "Pinsf"
+ | Pinsfl _ _ => "Pinsfl"
+ end.
+
+Definition string_of_name_arri32 (n: arith_name_arri32): pstring :=
+ match n with
+ | Pmaddiw => "Pmaddw"
+ | Pcmoveiw _ => "Pcmoveiw"
+ | Pcmoveuiw _ => "Pcmoveuiw"
+ end.
+
+Definition string_of_name_arri64 (n: arith_name_arri64): pstring :=
+ match n with
+ | Pmaddil => "Pmaddl"
+ | Pcmoveil _ => "Pcmoveil"
+ | Pcmoveuil _ => "Pcmoveuil"
+ end.
+
+Definition string_of_arith (op: arith_op): pstring :=
+ match op with
+ | OArithR n => string_of_name_r n
+ | OArithRR n => string_of_name_rr n
+ | OArithRI32 n _ => string_of_name_ri32 n
+ | OArithRI64 n _ => string_of_name_ri64 n
+ | OArithRF32 n _ => string_of_name_rf32 n
+ | OArithRF64 n _ => string_of_name_rf64 n
+ | OArithRRR n => string_of_name_rrr n
+ | OArithRRI32 n _ => string_of_name_rri32 n
+ | OArithRRI64 n _ => string_of_name_rri64 n
+ | OArithARRR n => string_of_name_arrr n
+ | OArithARR n => string_of_name_arr n
+ | OArithARRI32 n _ => string_of_name_arri32 n
+ | OArithARRI64 n _ => string_of_name_arri64 n
+ end.
+
+Definition string_of_load_name (n: load_name) : pstring :=
+ match n with
+ Plb => "Plb"
+ | Plbu => "Plbu"
+ | Plh => "Plh"
+ | Plhu => "Plhu"
+ | Plw => "Plw"
+ | Plw_a => "Plw_a"
+ | Pld => "Pld"
+ | Pld_a => "Pld_a"
+ | Pfls => "Pfls"
+ | Pfld => "Pfld"
+ end.
+
+Definition string_of_load (op: load_op): pstring :=
+ match op with
+ | OLoadRRO n _ _ => string_of_load_name n
+ | OLoadRRR n _ => string_of_load_name n
+ | OLoadRRRXS n _ => string_of_load_name n
+ end.
+
+Definition string_of_store_name (n: store_name) : pstring :=
+ match n with
+ Psb => "Psb"
+ | Psh => "Psh"
+ | Psw => "Psw"
+ | Psw_a => "Psw_a"
+ | Psd => "Psd"
+ | Psd_a => "Psd_a"
+ | Pfss => "Pfss"
+ | Pfsd => "Pfsd"
+ end.
+
+Definition string_of_store (op: store_op) : pstring :=
+ match op with
+ | OStoreRRO n _ => string_of_store_name n
+ | OStoreRRR n => string_of_store_name n
+ | OStoreRRRXS n => string_of_store_name n
+ end.
+
+Definition string_of_control (op: control_op) : pstring :=
+ match op with
+ | Oj_l _ => "Oj_l"
+ | Ocb _ _ => "Ocb"
+ | Ocbu _ _ => "Ocbu"
+ | Odiv => "Odiv"
+ | Odivu => "Odivu"
+ | Ojumptable _ => "Ojumptable"
+ | OError => "OError"
+ | OIncremPC _ => "OIncremPC"
+ end.
+
+Definition string_of_op (op: P.op): ?? pstring :=
+ match op with
+ | Arith op => RET (string_of_arith op)
+ | Load op => RET (string_of_load op)
+ | Store op => RET (string_of_store op)
+ | Control op => RET (string_of_control op)
+ | Allocframe _ _ => RET (Str "Allocframe")
+ | Allocframe2 _ _ => RET (Str "Allocframe2")
+ | Freeframe _ _ => RET (Str "Freeframe")
+ | Freeframe2 _ _ => RET (Str "Freeframe2")
+ | Constant _ => RET (Str "Constant")
+ | Fail => RET (Str "Fail")
+ end.
+
+End SECT_BBLOCK_EQUIV.
+
+(** REWRITE RULES *)
+
+Definition is_constant (o: op): bool :=
+ match o with
+ | Constant _ | OArithR _ | OArithRI32 _ _ | OArithRI64 _ _ | OArithRF32 _ _ | OArithRF64 _ _ => true
+ | _ => false
+ end.
+
+Lemma is_constant_correct ge o: is_constant o = true -> op_eval ge o [] <> None.
+Proof.
+ destruct o; simpl in * |- *; try congruence.
+ destruct ao; simpl in * |- *; try congruence;
+ destruct n; simpl in * |- *; try congruence;
+ unfold arith_eval; destruct ge; simpl in * |- *; try congruence.
+Qed.
+
+Definition main_reduce (t: Terms.term):= RET (Terms.nofail is_constant t).
+
+Local Hint Resolve is_constant_correct: wlp.
+
+Lemma main_reduce_correct t:
+ WHEN main_reduce t ~> pt THEN Terms.match_pt t pt.
+Proof.
+ wlp_simplify.
+Qed.
+
+Definition reduce := {| Terms.result := main_reduce; Terms.result_correct := main_reduce_correct |}.
+
+Definition bblock_simu_test (verb: bool) (p1 p2: Asmvliw.bblock) : ?? bool :=
+ if verb then
+ IST.verb_bblock_simu_test reduce string_of_name string_of_op (trans_block p1) (trans_block p2)
+ else
+ IST.bblock_simu_test reduce (trans_block p1) (trans_block p2).
+
+Local Hint Resolve IST.bblock_simu_test_correct bblock_simu_reduce IST.verb_bblock_simu_test_correct: wlp.
+
+(** Main simulation (Impure) theorem *)
+Theorem bblock_simu_test_correct verb p1 p2 :
+ WHEN bblock_simu_test verb p1 p2 ~> b THEN b=true -> forall ge fn, Asmblockprops.bblock_simu ge fn p1 p2.
+Proof.
+ wlp_simplify.
+Qed.
+Hint Resolve bblock_simu_test_correct: wlp.
+
+(** ** Coerce bblock_simu_test into a pure function (this is a little unsafe like all oracles in CompCert). *)
+
+Import UnsafeImpure.
+
+Definition pure_bblock_simu_test (verb: bool) (p1 p2: Asmvliw.bblock): bool :=
+ match unsafe_coerce (bblock_simu_test verb p1 p2) with
+ | Some b => b
+ | None => false
+ end.
+
+Theorem pure_bblock_simu_test_correct verb p1 p2 ge fn: pure_bblock_simu_test verb p1 p2 = true -> Asmblockprops.bblock_simu ge fn p1 p2.
+Proof.
+ unfold pure_bblock_simu_test.
+ destruct (unsafe_coerce (bblock_simu_test verb p1 p2)) eqn: UNSAFE; try discriminate.
+ intros; subst. eapply bblock_simu_test_correct; eauto.
+ apply unsafe_coerce_not_really_correct; eauto.
+Qed.
+
+Definition bblock_simub: Asmvliw.bblock -> Asmvliw.bblock -> bool := pure_bblock_simu_test true.
+
+Lemma bblock_simub_correct p1 p2 ge fn: bblock_simub p1 p2 = true -> Asmblockprops.bblock_simu ge fn p1 p2.
+Proof.
+ eapply (pure_bblock_simu_test_correct true).
+Qed.*)
diff --git a/aarch64/Asmblockgen.v b/aarch64/Asmblockgen.v
index fc356d52..0a21485c 100644
--- a/aarch64/Asmblockgen.v
+++ b/aarch64/Asmblockgen.v
@@ -29,7 +29,7 @@ Local Open Scope error_monad_scope.
Definition ireg_of (r: mreg) : res ireg :=
match preg_of r with
- | IR' irs => match irs with
+ | IR irs => match irs with
| RR1 mr => OK mr
| _ => Error(msg "Asmgenblock.ireg_of")
end
@@ -37,7 +37,7 @@ Definition ireg_of (r: mreg) : res ireg :=
end.
Definition freg_of (r: mreg) : res freg :=
- match preg_of r with FR' mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end.
+ match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end.
(** Recognition of immediate arguments for logical integer operations.*)
@@ -247,25 +247,25 @@ Definition indexed_memory_access_bc (insn: addressing -> basic)
Definition loadind (base: iregsp) (ofs: ptrofs) (ty: typ) (dst: mreg) (k: bcode) :=
match ty, preg_of dst with
- | Tint, IR' rd => OK (indexed_memory_access_bc (PLoad Pldrw rd) 4 base ofs k)
- | Tlong, IR' rd => OK (indexed_memory_access_bc (PLoad Pldrx rd) 8 base ofs k)
- | Tsingle, FR' rd => OK (indexed_memory_access_bc (PLoad Pldrs rd) 4 base ofs k)
- | Tfloat, FR' rd => OK (indexed_memory_access_bc (PLoad Pldrd rd) 8 base ofs k)
- | Tany32, IR' rd => OK (indexed_memory_access_bc (PLoad Pldrw_a rd) 4 base ofs k)
- | Tany64, IR' rd => OK (indexed_memory_access_bc (PLoad Pldrx_a rd) 8 base ofs k)
- | Tany64, FR' rd => OK (indexed_memory_access_bc (PLoad Pldrd_a rd) 8 base ofs k)
+ | Tint, IR rd => OK (indexed_memory_access_bc (PLoad Pldrw rd) 4 base ofs k)
+ | Tlong, IR rd => OK (indexed_memory_access_bc (PLoad Pldrx rd) 8 base ofs k)
+ | Tsingle, FR rd => OK (indexed_memory_access_bc (PLoad Pldrs rd) 4 base ofs k)
+ | Tfloat, FR rd => OK (indexed_memory_access_bc (PLoad Pldrd rd) 8 base ofs k)
+ | Tany32, IR rd => OK (indexed_memory_access_bc (PLoad Pldrw_a rd) 4 base ofs k)
+ | Tany64, IR rd => OK (indexed_memory_access_bc (PLoad Pldrx_a rd) 8 base ofs k)
+ | Tany64, FR rd => OK (indexed_memory_access_bc (PLoad Pldrd_a rd) 8 base ofs k)
| _, _ => Error (msg "Asmgen.loadind")
end.
Definition storeind (src: mreg) (base: iregsp) (ofs: ptrofs) (ty: typ) (k: bcode) :=
match ty, preg_of src with
- | Tint, IR' rd => OK (indexed_memory_access_bc (PStore Pstrw rd) 4 base ofs k)
- | Tlong, IR' rd => OK (indexed_memory_access_bc (PStore Pstrx rd) 8 base ofs k)
- | Tsingle, FR' rd => OK (indexed_memory_access_bc (PStore Pstrs rd) 4 base ofs k)
- | Tfloat, FR' rd => OK (indexed_memory_access_bc (PStore Pstrd rd) 8 base ofs k)
- | Tany32, IR' rd => OK (indexed_memory_access_bc (PStore Pstrw_a rd) 4 base ofs k)
- | Tany64, IR' rd => OK (indexed_memory_access_bc (PStore Pstrx_a rd) 8 base ofs k)
- | Tany64, FR' rd => OK (indexed_memory_access_bc (PStore Pstrd_a rd) 8 base ofs k)
+ | Tint, IR rd => OK (indexed_memory_access_bc (PStore Pstrw rd) 4 base ofs k)
+ | Tlong, IR rd => OK (indexed_memory_access_bc (PStore Pstrx rd) 8 base ofs k)
+ | Tsingle, FR rd => OK (indexed_memory_access_bc (PStore Pstrs rd) 4 base ofs k)
+ | Tfloat, FR rd => OK (indexed_memory_access_bc (PStore Pstrd rd) 8 base ofs k)
+ | Tany32, IR rd => OK (indexed_memory_access_bc (PStore Pstrw_a rd) 4 base ofs k)
+ | Tany64, IR rd => OK (indexed_memory_access_bc (PStore Pstrx_a rd) 8 base ofs k)
+ | Tany64, FR rd => OK (indexed_memory_access_bc (PStore Pstrd_a rd) 8 base ofs k)
| _, _ => Error (msg "Asmgen.storeind")
end.
@@ -621,8 +621,8 @@ Definition transl_op
match op, args with
| Omove, a1 :: nil =>
match preg_of res, preg_of a1 with
- | IR' r, IR' a => OK (Pmov r a ::bi k)
- | FR' r, FR' a => OK (Pfmov r a ::bi k)
+ | IR r, IR a => OK (Pmov r a ::bi k)
+ | FR r, FR a => OK (Pfmov r a ::bi k)
| _ , _ => Error(msg "Asmgenblock.Omove")
end
| Ointconst n, nil =>
@@ -1009,10 +1009,10 @@ Definition transl_op
(** Conditional move *)
| Osel cmp ty, a1 :: a2 :: args =>
match preg_of res with
- | IR' r =>
+ | IR r =>
do r1 <- ireg_of a1; do r2 <- ireg_of a2;
transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) ::bi k)
- | FR' r =>
+ | FR r =>
do r1 <- freg_of a1; do r2 <- freg_of a2;
transl_cond cmp args (Pcsel r r1 r2 (cond_for_cond cmp) ::bi k)
| _ =>
diff --git a/aarch64/Asmblockgenproof0.v b/aarch64/Asmblockgenproof0.v
new file mode 100644
index 00000000..92c0aa58
--- /dev/null
+++ b/aarch64/Asmblockgenproof0.v
@@ -0,0 +1,983 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* Xavier Leroy INRIA Paris-Rocquencourt *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** * "block" version of Asmgenproof0
+
+ This module is largely adapted from Asmgenproof0.v of the other backends
+ It needs to stand apart because of the block structure, and the distinction control/basic that there isn't in the other backends
+ It has similar definitions than Asmgenproof0, but adapted to this new structure *)
+
+Require Import Coqlib.
+Require Intv.
+Require Import AST.
+Require Import Errors.
+Require Import Integers.
+Require Import Floats.
+Require Import Values.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Events.
+Require Import Smallstep.
+Require Import Locations.
+Require Import Machblock.
+Require Import Asmblock.
+Require Import Asmblockgen.
+Require Import Conventions1.
+Require Import Axioms.
+Require Import Machblockgenproof. (* FIXME: only use to import [is_tail_app] and [is_tail_app_inv] *)
+Require Import Asmblockprops.
+
+Module MB:=Machblock.
+Module AB:=Asmblock.
+
+(*
+Lemma ireg_of_eq:
+ forall r r', ireg_of r = OK r' -> preg_of r = IR r'.
+Proof.
+ unfold ireg_of; intros. destruct (preg_of r); inv H; auto.
+Qed.
+
+Lemma freg_of_eq:
+ forall r r', freg_of r = OK r' -> preg_of r = IR r'.
+Proof.
+ unfold freg_of; intros. destruct (preg_of r); inv H; auto.
+Qed.
+
+Lemma preg_of_injective:
+ forall r1 r2, preg_of r1 = preg_of r2 -> r1 = r2.
+Proof.
+ destruct r1; destruct r2; simpl; intros; reflexivity || discriminate.
+Qed.
+
+Lemma undef_regs_other:
+ forall r rl rs,
+ (forall r', In r' rl -> r <> r') ->
+ undef_regs rl rs r = rs r.
+Proof.
+ induction rl; simpl; intros. auto.
+ rewrite IHrl by auto. rewrite Pregmap.gso; auto.
+Qed.
+
+Fixpoint preg_notin (r: preg) (rl: list mreg) : Prop :=
+ match rl with
+ | nil => True
+ | r1 :: nil => r <> preg_of r1
+ | r1 :: rl => r <> preg_of r1 /\ preg_notin r rl
+ end.
+
+Remark preg_notin_charact:
+ forall r rl,
+ preg_notin r rl <-> (forall mr, In mr rl -> r <> preg_of mr).
+Proof.
+ induction rl; simpl; intros.
+ tauto.
+ destruct rl.
+ simpl. split. intros. intuition congruence. auto.
+ rewrite IHrl. split.
+ intros [A B]. intros. destruct H. congruence. auto.
+ auto.
+Qed.
+
+Lemma undef_regs_other_2:
+ forall r rl rs,
+ preg_notin r rl ->
+ undef_regs (map preg_of rl) rs r = rs r.
+Proof.
+ intros. apply undef_regs_other. intros.
+ exploit list_in_map_inv; eauto. intros [mr [A B]]. subst.
+ rewrite preg_notin_charact in H. auto.
+Qed.
+
+(** * Agreement between Mach registers and processor registers *)
+
+Record agree (ms: Mach.regset) (sp: val) (rs: AB.regset) : Prop := mkagree {
+ agree_sp: rs#SP = sp;
+ agree_sp_def: sp <> Vundef;
+ agree_mregs: forall r: mreg, Val.lessdef (ms r) (rs#(preg_of r))
+}.
+
+Lemma preg_val:
+ forall ms sp rs r, agree ms sp rs -> Val.lessdef (ms r) rs#(preg_of r).
+Proof.
+ intros. destruct H. auto.
+Qed.
+
+Lemma preg_vals:
+ forall ms sp rs, agree ms sp rs ->
+ forall l, Val.lessdef_list (map ms l) (map rs (map preg_of l)).
+Proof.
+ induction l; simpl. constructor. constructor. eapply preg_val; eauto. auto.
+Qed.
+
+Lemma sp_val:
+ forall ms sp rs, agree ms sp rs -> sp = rs#SP.
+Proof.
+ intros. destruct H; auto.
+Qed.
+
+Lemma ireg_val:
+ forall ms sp rs r r',
+ agree ms sp rs ->
+ ireg_of r = OK r' ->
+ Val.lessdef (ms r) rs#r'.
+Proof.
+ intros. rewrite <- (ireg_of_eq _ _ H0). eapply preg_val; eauto.
+Qed.
+
+Lemma freg_val:
+ forall ms sp rs r r',
+ agree ms sp rs ->
+ freg_of r = OK r' ->
+ Val.lessdef (ms r) (rs#r').
+Proof.
+ intros. rewrite <- (freg_of_eq _ _ H0). eapply preg_val; eauto.
+Qed.
+
+Lemma agree_exten:
+ forall ms sp rs rs',
+ agree ms sp rs ->
+ (forall r, data_preg r = true -> rs'#r = rs#r) ->
+ agree ms sp rs'.
+Proof.
+ intros. destruct H. split; auto.
+ rewrite H0; auto. auto.
+ intros. rewrite H0; auto. apply preg_of_data.
+Qed.
+
+(** Preservation of register agreement under various assignments. *)
+
+Lemma agree_set_mreg:
+ forall ms sp rs r v rs',
+ agree ms sp rs ->
+ Val.lessdef v (rs'#(preg_of r)) ->
+ (forall r', data_preg r' = true -> r' <> preg_of r -> rs'#r' = rs#r') ->
+ agree (Mach.Regmap.set r v ms) sp rs'.
+Proof.
+ intros. destruct H. split; auto.
+ rewrite H1; auto. apply not_eq_sym. apply preg_of_not_SP.
+ intros. unfold Mach.Regmap.set. destruct (Mach.RegEq.eq r0 r). congruence.
+ rewrite H1. auto. apply preg_of_data.
+ red; intros; elim n. eapply preg_of_injective; eauto.
+Qed.
+
+Corollary agree_set_mreg_parallel:
+ forall ms sp rs r v v',
+ agree ms sp rs ->
+ Val.lessdef v v' ->
+ agree (Mach.Regmap.set r v ms) sp (Pregmap.set (preg_of r) v' rs).
+Proof.
+ intros. eapply agree_set_mreg; eauto. rewrite Pregmap.gss; auto. intros; apply Pregmap.gso; auto.
+Qed.
+
+Lemma agree_set_other:
+ forall ms sp rs r v,
+ agree ms sp rs ->
+ data_preg r = false ->
+ agree ms sp (rs#r <- v).
+Proof.
+ intros. apply agree_exten with rs. auto.
+ intros. apply Pregmap.gso. congruence.
+Qed.
+
+Lemma agree_nextblock:
+ forall ms sp rs b,
+ agree ms sp rs -> agree ms sp (nextblock b rs).
+Proof.
+ intros. unfold nextblock. apply agree_set_other. auto. auto.
+Qed.
+
+Lemma agree_set_pair:
+ forall sp p v v' ms rs,
+ agree ms sp rs ->
+ Val.lessdef v v' ->
+ agree (Mach.set_pair p v ms) sp (set_pair (map_rpair preg_of p) v' rs).
+Proof.
+ intros. destruct p; simpl.
+- apply agree_set_mreg_parallel; auto.
+- apply agree_set_mreg_parallel. apply agree_set_mreg_parallel; auto.
+ apply Val.hiword_lessdef; auto. apply Val.loword_lessdef; auto.
+Qed.
+
+Lemma agree_undef_nondata_regs:
+ forall ms sp rl rs,
+ agree ms sp rs ->
+ (forall r, In r rl -> data_preg r = false) ->
+ agree ms sp (undef_regs rl rs).
+Proof.
+ induction rl; simpl; intros. auto.
+ apply IHrl. apply agree_exten with rs; auto.
+ intros. apply Pregmap.gso. red; intros; subst.
+ assert (data_preg a = false) by auto. congruence.
+ intros. apply H0; auto.
+Qed.
+
+Lemma agree_undef_regs:
+ forall ms sp rl rs rs',
+ agree ms sp rs ->
+ (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') ->
+ agree (Mach.undef_regs rl ms) sp rs'.
+Proof.
+ intros. destruct H. split; auto.
+ rewrite <- agree_sp0. apply H0; auto.
+ rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP.
+ intros. destruct (In_dec mreg_eq r rl).
+ rewrite Mach.undef_regs_same; auto.
+ rewrite Mach.undef_regs_other; auto. rewrite H0; auto.
+ apply preg_of_data.
+ rewrite preg_notin_charact. intros; red; intros. elim n.
+ exploit preg_of_injective; eauto. congruence.
+Qed.
+
+Lemma agree_set_undef_mreg:
+ forall ms sp rs r v rl rs',
+ agree ms sp rs ->
+ Val.lessdef v (rs'#(preg_of r)) ->
+ (forall r', data_preg r' = true -> r' <> preg_of r -> preg_notin r' rl -> rs'#r' = rs#r') ->
+ agree (Mach.Regmap.set r v (Mach.undef_regs rl ms)) sp rs'.
+Proof.
+ intros. apply agree_set_mreg with (rs'#(preg_of r) <- (rs#(preg_of r))); auto.
+ apply agree_undef_regs with rs; auto.
+ intros. unfold Pregmap.set. destruct (PregEq.eq r' (preg_of r)).
+ congruence. auto.
+ intros. rewrite Pregmap.gso; auto.
+Qed.
+
+Lemma agree_undef_caller_save_regs:
+ forall ms sp rs,
+ agree ms sp rs ->
+ agree (Mach.undef_caller_save_regs ms) sp (undef_caller_save_regs rs).
+Proof.
+ intros. destruct H. unfold Mach.undef_caller_save_regs, undef_caller_save_regs; split.
+- unfold proj_sumbool; rewrite dec_eq_true. auto.
+- auto.
+- intros. unfold proj_sumbool. rewrite dec_eq_false by (apply preg_of_not_SP).
+ destruct (List.in_dec preg_eq (preg_of r) (List.map preg_of (List.filter is_callee_save all_mregs))); simpl.
++ apply list_in_map_inv in i. destruct i as (mr & A & B).
+ assert (r = mr) by (apply preg_of_injective; auto). subst mr; clear A.
+ apply List.filter_In in B. destruct B as [C D]. rewrite D. auto.
++ destruct (is_callee_save r) eqn:CS; auto.
+ elim n. apply List.in_map. apply List.filter_In. auto using all_mregs_complete.
+Qed.
+
+Lemma agree_change_sp:
+ forall ms sp rs sp',
+ agree ms sp rs -> sp' <> Vundef ->
+ agree ms sp' (rs#SP <- sp').
+Proof.
+ intros. inv H. split; auto.
+ intros. rewrite Pregmap.gso; auto with asmgen.
+Qed.
+
+(** Connection between Mach and Asm calling conventions for external
+ functions. *)
+
+Lemma extcall_arg_match:
+ forall ms sp rs m m' l v,
+ agree ms sp rs ->
+ Mem.extends m m' ->
+ Mach.extcall_arg ms m sp l v ->
+ exists v', AB.extcall_arg rs m' l v' /\ Val.lessdef v v'.
+Proof.
+ intros. inv H1.
+ exists (rs#(preg_of r)); split. constructor. eapply preg_val; eauto.
+ unfold Mach.load_stack in H2.
+ exploit Mem.loadv_extends; eauto. intros [v' [A B]].
+ rewrite (sp_val _ _ _ H) in A.
+ exists v'; split; auto.
+ econstructor. eauto. assumption.
+Qed.
+
+Lemma extcall_arg_pair_match:
+ forall ms sp rs m m' p v,
+ agree ms sp rs ->
+ Mem.extends m m' ->
+ Mach.extcall_arg_pair ms m sp p v ->
+ exists v', AB.extcall_arg_pair rs m' p v' /\ Val.lessdef v v'.
+Proof.
+ intros. inv H1.
+- exploit extcall_arg_match; eauto. intros (v' & A & B). exists v'; split; auto. constructor; auto.
+- exploit extcall_arg_match. eauto. eauto. eexact H2. intros (v1 & A1 & B1).
+ exploit extcall_arg_match. eauto. eauto. eexact H3. intros (v2 & A2 & B2).
+ exists (Val.longofwords v1 v2); split. constructor; auto. apply Val.longofwords_lessdef; auto.
+Qed.
+
+
+Lemma extcall_args_match:
+ forall ms sp rs m m', agree ms sp rs -> Mem.extends m m' ->
+ forall ll vl,
+ list_forall2 (Mach.extcall_arg_pair ms m sp) ll vl ->
+ exists vl', list_forall2 (AB.extcall_arg_pair rs m') ll vl' /\ Val.lessdef_list vl vl'.
+Proof.
+ induction 3; intros.
+ exists (@nil val); split. constructor. constructor.
+ exploit extcall_arg_pair_match; eauto. intros [v1' [A B]].
+ destruct IHlist_forall2 as [vl' [C D]].
+ exists (v1' :: vl'); split; constructor; auto.
+Qed.
+
+Lemma extcall_arguments_match:
+ forall ms m m' sp rs sg args,
+ agree ms sp rs -> Mem.extends m m' ->
+ Mach.extcall_arguments ms m sp sg args ->
+ exists args', AB.extcall_arguments rs m' sg args' /\ Val.lessdef_list args args'.
+Proof.
+ unfold Mach.extcall_arguments, AB.extcall_arguments; intros.
+ eapply extcall_args_match; eauto.
+Qed.
+
+Remark builtin_arg_match:
+ forall ge (rs: regset) sp m a v,
+ eval_builtin_arg ge (fun r => rs (preg_of r)) sp m a v ->
+ eval_builtin_arg ge rs sp m (map_builtin_arg preg_of a) v.
+Proof.
+ induction 1; simpl; eauto with barg.
+Qed.
+
+Lemma builtin_args_match:
+ forall ge ms sp rs m m', agree ms sp rs -> Mem.extends m m' ->
+ forall al vl, eval_builtin_args ge ms sp m al vl ->
+ exists vl', eval_builtin_args ge rs sp m' (map (map_builtin_arg preg_of) al) vl'
+ /\ Val.lessdef_list vl vl'.
+Proof.
+ induction 3; intros; simpl.
+ exists (@nil val); split; constructor.
+ exploit (@eval_builtin_arg_lessdef _ ge ms (fun r => rs (preg_of r))); eauto.
+ intros; eapply preg_val; eauto.
+ intros (v1' & A & B).
+ destruct IHlist_forall2 as [vl' [C D]].
+ exists (v1' :: vl'); split; constructor; auto. apply builtin_arg_match; auto.
+Qed.
+
+Lemma agree_set_res:
+ forall res ms sp rs v v',
+ agree ms sp rs ->
+ Val.lessdef v v' ->
+ agree (Mach.set_res res v ms) sp (AB.set_res (map_builtin_res preg_of res) v' rs).
+Proof.
+ induction res; simpl; intros.
+- eapply agree_set_mreg; eauto. rewrite Pregmap.gss. auto.
+ intros. apply Pregmap.gso; auto.
+- auto.
+- apply IHres2. apply IHres1. auto.
+ apply Val.hiword_lessdef; auto.
+ apply Val.loword_lessdef; auto.
+Qed.
+
+Lemma set_res_other:
+ forall r res v rs,
+ data_preg r = false ->
+ set_res (map_builtin_res preg_of res) v rs r = rs r.
+Proof.
+ induction res; simpl; intros.
+- apply Pregmap.gso. red; intros; subst r. rewrite preg_of_data in H; discriminate.
+- auto.
+- rewrite IHres2, IHres1; auto.
+Qed.
+
+(* inspired from Mach *)
+
+Lemma find_label_tail:
+ forall lbl c c', MB.find_label lbl c = Some c' -> is_tail c' c.
+Proof.
+ induction c; simpl; intros. discriminate.
+ destruct (MB.is_label lbl a). inv H. auto with coqlib. eauto with coqlib.
+Qed.
+
+(* inspired from Asmgenproof0 *)
+
+(* ... skip ... *)
+
+(** The ``code tail'' of an instruction list [c] is the list of instructions
+ starting at PC [pos]. *)
+
+Inductive code_tail: Z -> bblocks -> bblocks -> Prop :=
+ | code_tail_0: forall c,
+ code_tail 0 c c
+ | code_tail_S: forall pos bi c1 c2,
+ code_tail pos c1 c2 ->
+ code_tail (pos + (size bi)) (bi :: c1) c2.
+
+Lemma code_tail_pos:
+ forall pos c1 c2, code_tail pos c1 c2 -> pos >= 0.
+Proof.
+ induction 1. omega. generalize (size_positive bi); intros; omega.
+Qed.
+
+Lemma find_bblock_tail:
+ forall c1 bi c2 pos,
+ code_tail pos c1 (bi :: c2) ->
+ find_bblock pos c1 = Some bi.
+Proof.
+ induction c1; simpl; intros.
+ inversion H.
+ destruct (zlt pos 0). generalize (code_tail_pos _ _ _ H); intro; omega.
+ destruct (zeq pos 0). subst pos.
+ inv H. auto. generalize (size_positive a) (code_tail_pos _ _ _ H4). intro; omega.
+ inv H. congruence. replace (pos0 + size a - size a) with pos0 by omega.
+ eauto.
+Qed.
+
+
+Local Hint Resolve code_tail_0 code_tail_S: core.
+
+Lemma code_tail_next:
+ forall fn ofs c0,
+ code_tail ofs fn c0 ->
+ forall bi c1, c0 = bi :: c1 -> code_tail (ofs + (size bi)) fn c1.
+Proof.
+ induction 1; intros.
+ - subst; eauto.
+ - replace (pos + size bi + size bi0) with ((pos + size bi0) + size bi); eauto.
+ omega.
+Qed.
+
+Lemma size_blocks_pos c: 0 <= size_blocks c.
+Proof.
+ induction c as [| a l ]; simpl; try omega.
+ generalize (size_positive a); omega.
+Qed.
+
+Remark code_tail_positive:
+ forall fn ofs c,
+ code_tail ofs fn c -> 0 <= ofs.
+Proof.
+ induction 1; intros; simpl.
+ - omega.
+ - generalize (size_positive bi). omega.
+Qed.
+
+Remark code_tail_size:
+ forall fn ofs c,
+ code_tail ofs fn c -> size_blocks fn = ofs + size_blocks c.
+Proof.
+ induction 1; intros; simpl; try omega.
+Qed.
+
+Remark code_tail_bounds fn ofs c:
+ code_tail ofs fn c -> 0 <= ofs <= size_blocks fn.
+Proof.
+ intro H;
+ exploit code_tail_size; eauto.
+ generalize (code_tail_positive _ _ _ H), (size_blocks_pos c).
+ omega.
+Qed.
+
+Local Hint Resolve code_tail_next: core.
+
+Lemma code_tail_next_int:
+ forall fn ofs bi c,
+ size_blocks fn <= Ptrofs.max_unsigned ->
+ code_tail (Ptrofs.unsigned ofs) fn (bi :: c) ->
+ code_tail (Ptrofs.unsigned (Ptrofs.add ofs (Ptrofs.repr (size bi)))) fn c.
+Proof.
+ intros.
+ exploit code_tail_size; eauto.
+ simpl; generalize (code_tail_positive _ _ _ H0), (size_positive bi), (size_blocks_pos c).
+ intros.
+ rewrite Ptrofs.add_unsigned, Ptrofs.unsigned_repr.
+ - rewrite Ptrofs.unsigned_repr; eauto.
+ omega.
+ - rewrite Ptrofs.unsigned_repr; omega.
+Qed.
+
+(** Predictor for return addresses in generated Asm code.
+
+ The [return_address_offset] predicate defined here is used in the
+ semantics for Mach to determine the return addresses that are
+ stored in activation records. *)
+
+(** Consider a Mach function [f] and a sequence [c] of Mach instructions
+ representing the Mach code that remains to be executed after a
+ function call returns. The predicate [return_address_offset f c ofs]
+ holds if [ofs] is the integer offset of the PPC instruction
+ following the call in the Asm code obtained by translating the
+ code of [f]. Graphically:
+<<
+ Mach function f |--------- Mcall ---------|
+ Mach code c | |--------|
+ | \ \
+ | \ \
+ | \ \
+ Asm code | |--------|
+ Asm function |------------- Pcall ---------|
+
+ <-------- ofs ------->
+>>
+*)
+
+Definition return_address_offset (f: MB.function) (c: MB.code) (ofs: ptrofs) : Prop :=
+ forall tf tc,
+ transf_function f = OK tf ->
+ transl_blocks f c false = OK tc ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc.
+
+Lemma transl_blocks_tail:
+ forall f c1 c2, is_tail c1 c2 ->
+ forall tc2 ep2, transl_blocks f c2 ep2 = OK tc2 ->
+ exists tc1, exists ep1, transl_blocks f c1 ep1 = OK tc1 /\ is_tail tc1 tc2.
+Proof.
+ induction 1; simpl; intros.
+ exists tc2; exists ep2; split; auto with coqlib.
+ monadInv H0. exploit IHis_tail; eauto. intros (tc1 & ep1 & A & B).
+ exists tc1; exists ep1; split. auto.
+ eapply is_tail_trans with x0; eauto with coqlib.
+Qed.
+
+Lemma is_tail_code_tail:
+ forall c1 c2, is_tail c1 c2 -> exists ofs, code_tail ofs c2 c1.
+Proof.
+ induction 1; eauto.
+ destruct IHis_tail; eauto.
+Qed.
+
+Section RETADDR_EXISTS.
+
+Hypothesis transf_function_inv:
+ forall f tf, transf_function f = OK tf ->
+ exists tc ep, transl_blocks f (Machblock.fn_code f) ep = OK tc /\ is_tail tc (fn_blocks tf).
+
+Hypothesis transf_function_len:
+ forall f tf, transf_function f = OK tf -> size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned.
+
+
+Lemma return_address_exists:
+ forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
+ exists ra, return_address_offset f c ra.
+Proof.
+ intros. destruct (transf_function f) as [tf|] eqn:TF.
+ + exploit transf_function_inv; eauto. intros (tc1 & ep1 & TR1 & TL1).
+ exploit transl_blocks_tail; eauto. intros (tc2 & ep2 & TR2 & TL2).
+ monadInv TR2.
+ assert (TL3: is_tail x0 (fn_blocks tf)).
+ { apply is_tail_trans with tc1; auto.
+ apply is_tail_trans with (x++x0); auto. eapply is_tail_app.
+ }
+ exploit is_tail_code_tail. eexact TL3. intros [ofs CT].
+ exists (Ptrofs.repr ofs). red; intros.
+ rewrite Ptrofs.unsigned_repr. congruence.
+ exploit code_tail_bounds; eauto.
+ intros; apply transf_function_len in TF. omega.
+ + exists Ptrofs.zero; red; intros. congruence.
+Qed.
+
+End RETADDR_EXISTS.
+
+(** [transl_code_at_pc pc fb f c ep tf tc] holds if the code pointer [pc] points
+ within the Asmblock code generated by translating Machblock function [f],
+ and [tc] is the tail of the generated code at the position corresponding
+ to the code pointer [pc]. *)
+
+Inductive transl_code_at_pc (ge: MB.genv):
+ val -> block -> MB.function -> MB.code -> bool -> AB.function -> AB.bblocks -> Prop :=
+ transl_code_at_pc_intro:
+ forall b ofs f c ep tf tc,
+ Genv.find_funct_ptr ge b = Some(Internal f) ->
+ transf_function f = Errors.OK tf ->
+ transl_blocks f c ep = OK tc ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) tc ->
+ transl_code_at_pc ge (Vptr b ofs) b f c ep tf tc.
+
+Remark code_tail_no_bigger:
+ forall pos c1 c2, code_tail pos c1 c2 -> (length c2 <= length c1)%nat.
+Proof.
+ induction 1; simpl; omega.
+Qed.
+
+Remark code_tail_unique:
+ forall fn c pos pos',
+ code_tail pos fn c -> code_tail pos' fn c -> pos = pos'.
+Proof.
+ induction fn; intros until pos'; intros ITA CT; inv ITA; inv CT; auto.
+ generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; omega.
+ generalize (code_tail_no_bigger _ _ _ H3); simpl; intro; omega.
+ f_equal. eauto.
+Qed.
+
+Lemma return_address_offset_correct:
+ forall ge b ofs fb f c tf tc ofs',
+ transl_code_at_pc ge (Vptr b ofs) fb f c false tf tc ->
+ return_address_offset f c ofs' ->
+ ofs' = ofs.
+Proof.
+ intros. inv H. red in H0.
+ exploit code_tail_unique. eexact H12. eapply H0; eauto. intro.
+ rewrite <- (Ptrofs.repr_unsigned ofs).
+ rewrite <- (Ptrofs.repr_unsigned ofs').
+ congruence.
+Qed.
+
+(** The [find_label] function returns the code tail starting at the
+ given label. A connection with [code_tail] is then established. *)
+
+Fixpoint find_label (lbl: label) (c: bblocks) {struct c} : option bblocks :=
+ match c with
+ | nil => None
+ | bb1 :: bbl => if is_label lbl bb1 then Some c else find_label lbl bbl
+ end.
+
+Lemma label_pos_code_tail:
+ forall lbl c pos c',
+ find_label lbl c = Some c' ->
+ exists pos',
+ label_pos lbl pos c = Some pos'
+ /\ code_tail (pos' - pos) c c'
+ /\ pos <= pos' <= pos + size_blocks c.
+Proof.
+ induction c.
+ simpl; intros. discriminate.
+ simpl; intros until c'.
+ case (is_label lbl a).
+ - intros. inv H. exists pos. split; auto. split.
+ replace (pos - pos) with 0 by omega. constructor. constructor; try omega.
+ generalize (size_blocks_pos c). generalize (size_positive a). omega.
+ - intros. generalize (IHc (pos+size a) c' H). intros [pos' [A [B C]]].
+ exists pos'. split. auto. split.
+ replace (pos' - pos) with ((pos' - (pos + (size a))) + (size a)) by omega.
+ constructor. auto. generalize (size_positive a). omega.
+Qed.
+
+(** Helper lemmas to reason about
+- the "code is tail of" property
+- correct translation of labels. *)
+
+Definition tail_nolabel (k c: bblocks) : Prop :=
+ is_tail k c /\ forall lbl, find_label lbl c = find_label lbl k.
+
+Lemma tail_nolabel_refl:
+ forall c, tail_nolabel c c.
+Proof.
+ intros; split. apply is_tail_refl. auto.
+Qed.
+
+Lemma tail_nolabel_trans:
+ forall c1 c2 c3, tail_nolabel c2 c3 -> tail_nolabel c1 c2 -> tail_nolabel c1 c3.
+Proof.
+ intros. destruct H; destruct H0; split.
+ eapply is_tail_trans; eauto.
+ intros. rewrite H1; auto.
+Qed.
+
+Definition nolabel (b: bblock) :=
+ match (header b) with nil => True | _ => False end.
+
+Hint Extern 1 (nolabel _) => exact I : labels.
+
+Lemma tail_nolabel_cons:
+ forall b c k,
+ nolabel b -> tail_nolabel k c -> tail_nolabel k (b :: c).
+Proof.
+ intros. destruct H0. split.
+ constructor; auto.
+ intros. simpl. rewrite <- H1. destruct b as [hd bdy ex]; simpl in *.
+ destruct hd as [|l hd]; simpl in *.
+ - assert (is_label lbl {| AB.header := nil; AB.body := bdy; AB.exit := ex; AB.correct := correct |} = false).
+ { apply is_label_correct_false. simpl header. apply in_nil. }
+ rewrite H2. auto.
+ - contradiction.
+Qed.
+
+Hint Resolve tail_nolabel_refl: labels.
+
+Ltac TailNoLabel :=
+ eauto with labels;
+ match goal with
+ | [ |- tail_nolabel _ (_ :: _) ] => apply tail_nolabel_cons; [auto; exact I | TailNoLabel]
+ | [ H: Error _ = OK _ |- _ ] => discriminate
+ | [ H: assertion_failed = OK _ |- _ ] => discriminate
+ | [ H: OK _ = OK _ |- _ ] => inv H; TailNoLabel
+ | [ H: bind _ _ = OK _ |- _ ] => monadInv H; TailNoLabel
+ | [ H: (if ?x then _ else _) = OK _ |- _ ] => destruct x; TailNoLabel
+ | [ H: match ?x with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct x; TailNoLabel
+ | _ => idtac
+ end.
+
+Remark tail_nolabel_find_label:
+ forall lbl k c, tail_nolabel k c -> find_label lbl c = find_label lbl k.
+Proof.
+ intros. destruct H. auto.
+Qed.
+
+Remark tail_nolabel_is_tail:
+ forall k c, tail_nolabel k c -> is_tail k c.
+Proof.
+ intros. destruct H. auto.
+Qed.
+
+Lemma exec_body_pc:
+ forall ge l rs1 m1 rs2 m2,
+ exec_body ge l rs1 m1 = Next rs2 m2 ->
+ rs2 PC = rs1 PC.
+Proof.
+ induction l.
+ - intros. inv H. auto.
+ - intros until m2. intro EXEB.
+ inv EXEB. destruct (exec_basic_instr _ _ _ _) eqn:EBI; try discriminate.
+ eapply IHl in H0. rewrite H0.
+ erewrite exec_basic_instr_pc; eauto.
+Qed.
+
+Section STRAIGHTLINE.
+
+Variable ge: genv.
+Variable fn: function.
+
+(** Straight-line code is composed of processor instructions that execute
+ in sequence (no branches, no function calls and returns).
+ The following inductive predicate relates the machine states
+ before and after executing a straight-line sequence of instructions.
+ Instructions are taken from the first list instead of being fetched
+ from memory. *)
+
+Inductive exec_straight: list instruction -> regset -> mem ->
+ list instruction -> regset -> mem -> Prop :=
+ | exec_straight_one:
+ forall i1 c rs1 m1 rs2 m2,
+ exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 ->
+ exec_straight ((PBasic i1) ::g c) rs1 m1 c rs2 m2
+ | exec_straight_step:
+ forall i c rs1 m1 rs2 m2 c' rs3 m3,
+ exec_basic_instr ge i rs1 m1 = Next rs2 m2 ->
+ exec_straight c rs2 m2 c' rs3 m3 ->
+ exec_straight ((PBasic i) :: c) rs1 m1 c' rs3 m3.
+
+Inductive exec_control_rel: option control -> bblock -> regset -> mem ->
+ regset -> mem -> Prop :=
+ | exec_control_rel_intro:
+ forall rs1 m1 b rs1' ctl rs2 m2,
+ rs1' = nextblock b rs1 ->
+ exec_control ge fn ctl rs1' m1 = Next rs2 m2 ->
+ exec_control_rel ctl b rs1 m1 rs2 m2.
+
+Inductive exec_bblock_rel: bblock -> regset -> mem -> regset -> mem -> Prop :=
+ | exec_bblock_rel_intro:
+ forall rs1 m1 b rs2 m2,
+ exec_bblock ge fn b rs1 m1 = Next rs2 m2 ->
+ exec_bblock_rel b rs1 m1 rs2 m2.
+
+Lemma exec_straight_body:
+ forall c l rs1 m1 rs2 m2,
+ exec_straight c rs1 m1 nil rs2 m2 ->
+ code_to_basics c = Some l ->
+ exec_body ge l rs1 m1 = Next rs2 m2.
+Proof.
+ induction c as [|i c].
+ - intros until m2. intros EXES CTB. inv EXES.
+ - intros until m2. intros EXES CTB. inv EXES.
+ + inv CTB. simpl. rewrite H6. auto.
+ + inv CTB. destruct (code_to_basics c); try discriminate. inv H0. eapply IHc in H7; eauto.
+ rewrite <- H7. simpl. rewrite H1. auto.
+Qed.
+
+Lemma exec_straight_body2:
+ forall c rs1 m1 c' rs2 m2,
+ exec_straight c rs1 m1 c' rs2 m2 ->
+ exists body,
+ exec_body ge body rs1 m1 = Next rs2 m2
+ /\ (basics_to_code body) ++g c' = c.
+Proof.
+ intros until m2. induction 1.
+ - exists (i1::nil). split; auto. simpl. rewrite H. auto.
+ - destruct IHexec_straight as (bdy & EXEB & BTC).
+ exists (i:: bdy). split; simpl.
+ + rewrite H. auto.
+ + congruence.
+Qed.
+
+Lemma exec_straight_trans:
+ forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3,
+ exec_straight c1 rs1 m1 c2 rs2 m2 ->
+ exec_straight c2 rs2 m2 c3 rs3 m3 ->
+ exec_straight c1 rs1 m1 c3 rs3 m3.
+Proof.
+ induction 1; intros.
+ apply exec_straight_step with rs2 m2; auto.
+ apply exec_straight_step with rs2 m2; auto.
+Qed.
+
+Lemma exec_straight_two:
+ forall i1 i2 c rs1 m1 rs2 m2 rs3 m3,
+ exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 ->
+ exec_basic_instr ge i2 rs2 m2 = Next rs3 m3 ->
+ exec_straight (i1 ::g i2 ::g c) rs1 m1 c rs3 m3.
+Proof.
+ intros. apply exec_straight_step with rs2 m2; auto.
+ apply exec_straight_one; auto.
+Qed.
+
+Lemma exec_straight_three:
+ forall i1 i2 i3 c rs1 m1 rs2 m2 rs3 m3 rs4 m4,
+ exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 ->
+ exec_basic_instr ge i2 rs2 m2 = Next rs3 m3 ->
+ exec_basic_instr ge i3 rs3 m3 = Next rs4 m4 ->
+ exec_straight (i1 ::g i2 ::g i3 ::g c) rs1 m1 c rs4 m4.
+Proof.
+ intros. apply exec_straight_step with rs2 m2; auto.
+ eapply exec_straight_two; eauto.
+Qed.
+
+(** Like exec_straight predicate, but on blocks *)
+
+Inductive exec_straight_blocks: bblocks -> regset -> mem ->
+ bblocks -> regset -> mem -> Prop :=
+ | exec_straight_blocks_one:
+ forall b1 c rs1 m1 rs2 m2,
+ exec_bblock ge fn b1 rs1 m1 = Next rs2 m2 ->
+ rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b1)) ->
+ exec_straight_blocks (b1 :: c) rs1 m1 c rs2 m2
+ | exec_straight_blocks_step:
+ forall b c rs1 m1 rs2 m2 c' rs3 m3,
+ exec_bblock ge fn b rs1 m1 = Next rs2 m2 ->
+ rs2#PC = Val.offset_ptr rs1#PC (Ptrofs.repr (size b)) ->
+ exec_straight_blocks c rs2 m2 c' rs3 m3 ->
+ exec_straight_blocks (b :: c) rs1 m1 c' rs3 m3.
+
+Lemma exec_straight_blocks_trans:
+ forall c1 rs1 m1 c2 rs2 m2 c3 rs3 m3,
+ exec_straight_blocks c1 rs1 m1 c2 rs2 m2 ->
+ exec_straight_blocks c2 rs2 m2 c3 rs3 m3 ->
+ exec_straight_blocks c1 rs1 m1 c3 rs3 m3.
+Proof.
+ induction 1; intros.
+ apply exec_straight_blocks_step with rs2 m2; auto.
+ apply exec_straight_blocks_step with rs2 m2; auto.
+Qed.
+
+(** Linking exec_straight with exec_straight_blocks *)
+
+Lemma exec_straight_pc:
+ forall c c' rs1 m1 rs2 m2,
+ exec_straight c rs1 m1 c' rs2 m2 ->
+ rs2 PC = rs1 PC.
+Proof.
+ induction c; intros; try (inv H; fail).
+ inv H.
+ - eapply exec_basic_instr_pc; eauto.
+ - rewrite (IHc c' rs3 m3 rs2 m2); auto.
+ erewrite exec_basic_instr_pc; eauto.
+Qed.
+
+Lemma regset_same_assign (rs: regset) r:
+ rs # r <- (rs r) = rs.
+Proof.
+ apply functional_extensionality. intros x. destruct (preg_eq x r); subst; Simpl.
+Qed.
+
+Lemma exec_straight_through_singleinst:
+ forall a b rs1 m1 rs2 m2 rs2' m2' lb,
+ bblock_single_inst (PBasic a) = b ->
+ exec_straight (a ::g nil) rs1 m1 nil rs2 m2 ->
+ nextblock b rs2 = rs2' -> m2 = m2' ->
+ exec_straight_blocks (b::lb) rs1 m1 lb rs2' m2'.
+Proof.
+ intros. subst. constructor 1. unfold exec_bblock. simpl body. erewrite exec_straight_body; eauto.
+ simpl. rewrite regset_same_assign. auto.
+ simpl; auto. unfold nextblock, incrPC; simpl. Simpl. erewrite exec_straight_pc; eauto.
+Qed.
+
+(** The following lemmas show that straight-line executions
+ (predicate [exec_straight_blocks]) correspond to correct Asm executions. *)
+
+Lemma exec_straight_steps_1:
+ forall c rs m c' rs' m',
+ exec_straight_blocks c rs m c' rs' m' ->
+ size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned ->
+ forall b ofs,
+ rs#PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal fn) ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c ->
+ plus step ge (State rs m) E0 (State rs' m').
+Proof.
+ induction 1; intros.
+ apply plus_one.
+ econstructor; eauto.
+ eapply find_bblock_tail. eauto.
+ eapply plus_left'.
+ econstructor; eauto.
+ eapply find_bblock_tail. eauto.
+ apply IHexec_straight_blocks with b0 (Ptrofs.add ofs (Ptrofs.repr (size b))).
+ auto. rewrite H0. rewrite H3. reflexivity.
+ auto.
+ apply code_tail_next_int; auto.
+ traceEq.
+Qed.
+
+Lemma exec_straight_steps_2:
+ forall c rs m c' rs' m',
+ exec_straight_blocks c rs m c' rs' m' ->
+ size_blocks (fn_blocks fn) <= Ptrofs.max_unsigned ->
+ forall b ofs,
+ rs#PC = Vptr b ofs ->
+ Genv.find_funct_ptr ge b = Some (Internal fn) ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks fn) c ->
+ exists ofs',
+ rs'#PC = Vptr b ofs'
+ /\ code_tail (Ptrofs.unsigned ofs') (fn_blocks fn) c'.
+Proof.
+ induction 1; intros.
+ exists (Ptrofs.add ofs (Ptrofs.repr (size b1))). split.
+ rewrite H0. rewrite H2. auto.
+ apply code_tail_next_int; auto.
+ apply IHexec_straight_blocks with (Ptrofs.add ofs (Ptrofs.repr (size b))).
+ auto. rewrite H0. rewrite H3. reflexivity. auto.
+ apply code_tail_next_int; auto.
+Qed.
+
+End STRAIGHTLINE.
+
+(** * Properties of the Machblock call stack *)
+
+Section MATCH_STACK.
+
+Variable ge: MB.genv.
+
+Inductive match_stack: list MB.stackframe -> Prop :=
+ | match_stack_nil:
+ match_stack nil
+ | match_stack_cons: forall fb sp ra c s f tf tc,
+ Genv.find_funct_ptr ge fb = Some (Internal f) ->
+ transl_code_at_pc ge ra fb f c false tf tc ->
+ sp <> Vundef ->
+ match_stack s ->
+ match_stack (Stackframe fb sp ra c :: s).
+
+Lemma parent_sp_def: forall s, match_stack s -> parent_sp s <> Vundef.
+Proof.
+ induction 1; simpl.
+ unfold Vnullptr; destruct Archi.ptr64; congruence.
+ auto.
+Qed.
+
+Lemma parent_ra_def: forall s, match_stack s -> parent_ra s <> Vundef.
+Proof.
+ induction 1; simpl.
+ unfold Vnullptr; destruct Archi.ptr64; congruence.
+ inv H0. congruence.
+Qed.
+
+Lemma lessdef_parent_sp:
+ forall s v,
+ match_stack s -> Val.lessdef (parent_sp s) v -> v = parent_sp s.
+Proof.
+ intros. inv H0. auto. exploit parent_sp_def; eauto. tauto.
+Qed.
+
+Lemma lessdef_parent_ra:
+ forall s v,
+ match_stack s -> Val.lessdef (parent_ra s) v -> v = parent_ra s.
+Proof.
+ intros. inv H0. auto. exploit parent_ra_def; eauto. tauto.
+Qed.
+
+End MATCH_STACK.*)
diff --git a/aarch64/Asmblockprops.v b/aarch64/Asmblockprops.v
new file mode 100644
index 00000000..714f83b8
--- /dev/null
+++ b/aarch64/Asmblockprops.v
@@ -0,0 +1,358 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Common definition and proofs on Asmblock required by various modules *)
+
+Require Import Coqlib.
+Require Import Integers.
+Require Import Memory.
+Require Import Globalenvs.
+Require Import Values.
+Require Import Asmblock.
+Require Import Axioms.
+
+(*
+Definition bblock_simu (ge: Genv.t fundef unit) (f: function) (bb bb': bblock) :=
+ forall rs m,
+ exec_bblock ge f bb rs m <> Stuck ->
+ exec_bblock ge f bb rs m = exec_bblock ge f bb' rs m.
+
+Hint Extern 2 (_ <> _) => congruence: asmgen.
+
+Lemma preg_of_data:
+ forall r, data_preg (preg_of r) = true.
+Proof.
+ intros. destruct r; reflexivity.
+Qed.
+Hint Resolve preg_of_data: asmgen.
+
+Lemma data_diff:
+ forall r r',
+ data_preg r = true -> data_preg r' = false -> r <> r'.
+Proof.
+ congruence.
+Qed.
+Hint Resolve data_diff: asmgen.
+
+Lemma preg_of_not_PC:
+ forall r, preg_of r <> PC.
+Proof.
+ intros. apply data_diff; auto with asmgen.
+Qed.
+
+Lemma preg_of_not_SP:
+ forall r, preg_of r <> SP.
+Proof.
+ intros. unfold preg_of; destruct r; cbn; congruence.
+Qed.
+
+Hint Resolve preg_of_not_SP preg_of_not_PC: asmgen.
+
+
+Lemma nextblock_pc:
+ forall b rs, (nextblock b rs)#PC = Val.offset_ptr rs#PC (Ptrofs.repr (size b)).
+Proof.
+ intros. apply Pregmap.gss.
+Qed.
+
+Lemma nextblock_inv:
+ forall b r rs, r <> PC -> (nextblock b rs)#r = rs#r.
+Proof.
+ intros. unfold nextblock. apply Pregmap.gso. red; intro; subst. auto.
+Qed.
+
+Lemma nextblock_inv1:
+ forall b r rs, data_preg r = true -> (nextblock b rs)#r = rs#r.
+Proof.
+ intros. apply nextblock_inv. red; intro; subst; discriminate.
+Qed.
+
+Ltac Simplif :=
+ ((rewrite nextblock_inv by eauto with asmgen)
+ || (rewrite nextblock_inv1 by eauto with asmgen)
+ || (rewrite Pregmap.gss)
+ || (rewrite nextblock_pc)
+ || (rewrite Pregmap.gso by eauto with asmgen)
+ ); auto with asmgen.
+
+Ltac Simpl := repeat Simplif.
+
+(* For Asmblockgenproof0 *)
+
+Theorem exec_basic_instr_pc:
+ forall ge b rs1 m1 rs2 m2,
+ exec_basic_instr ge b rs1 m1 = Next rs2 m2 ->
+ rs2 PC = rs1 PC.
+Proof.
+ intros. destruct b; try destruct i; try destruct i.
+ all: try (inv H; Simpl).
+ 1-10: unfold parexec_load_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail.
+
+ 1-20: unfold parexec_load_reg, parexec_load_regxs in H1; destruct (Mem.loadv _ _ _); unfold parexec_incorrect_load in *; destruct trap; try discriminate; unfold concrete_default_notrap_load_value in *; inv H1; Simpl; fail.
+
+ { (* PLoadQRRO *)
+ unfold parexec_load_q_offset in H1.
+ destruct (gpreg_q_expand _) as [r0 r1] in H1.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ inv H1. Simpl. }
+ { (* PLoadORRO *)
+ unfold parexec_load_o_offset in H1.
+ destruct (gpreg_o_expand _) as [[[r0 r1] r2] r3] in H1.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ destruct (Mem.loadv _ _ _) in H1; try discriminate.
+ inv H1. Simpl. }
+ 1-8: unfold parexec_store_offset in H1; destruct (eval_offset ofs); try discriminate; destruct (Mem.storev _ _ _); [inv H1; auto | discriminate]; fail.
+ 1-8: unfold parexec_store_reg in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]; auto; fail.
+ 1-8: unfold parexec_store_regxs in H1; destruct (Mem.storev _ _ _); [inv H1; Simpl | discriminate]; auto; fail.
+
+ { (* PStoreQRRO *)
+ unfold parexec_store_q_offset in H1.
+ destruct (gpreg_q_expand _) as [r0 r1] in H1.
+ unfold eval_offset in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ inv H1. Simpl. reflexivity. }
+ { (* PStoreORRO *)
+ unfold parexec_store_o_offset in H1.
+ destruct (gpreg_o_expand _) as [[[r0 r1] r2] r3] in H1.
+ unfold eval_offset in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ destruct (Mem.storev _ _ _) in H1; try discriminate.
+ inv H1. Simpl. reflexivity. }
+ - destruct (Mem.alloc _ _ _). destruct (Mem.store _ _ _ _ _). inv H1. Simpl. discriminate.
+ - destruct (Mem.loadv _ _ _); try discriminate. destruct (rs1 _); try discriminate.
+ destruct (Mem.free _ _ _ _). inv H1. Simpl. discriminate.
+ - destruct rs; try discriminate. inv H1. Simpl.
+ - destruct rd; try discriminate. inv H1; Simpl.
+ - reflexivity.
+Qed.
+
+(* For PostpassSchedulingproof *)
+
+Lemma regset_double_set:
+ forall r1 r2 (rs: regset) v1 v2,
+ r1 <> r2 ->
+ (rs # r1 <- v1 # r2 <- v2) = (rs # r2 <- v2 # r1 <- v1).
+Proof.
+ intros. apply functional_extensionality. intros r. destruct (preg_eq r r1).
+ - subst. rewrite Pregmap.gso; auto. repeat (rewrite Pregmap.gss). auto.
+ - destruct (preg_eq r r2).
+ + subst. rewrite Pregmap.gss. rewrite Pregmap.gso; auto. rewrite Pregmap.gss. auto.
+ + repeat (rewrite Pregmap.gso; auto).
+Qed.
+
+Lemma next_eq:
+ forall (rs rs': regset) m m',
+ rs = rs' -> m = m' -> Next rs m = Next rs' m'.
+Proof.
+ intros; apply f_equal2; auto.
+Qed.
+
+Lemma exec_load_offset_pc_var:
+ forall trap t rs m rd ra ofs rs' m' v,
+ exec_load_offset trap t rs m rd ra ofs = Next rs' m' ->
+ exec_load_offset trap t rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_load_offset in *. unfold parexec_load_offset in *. rewrite Pregmap.gso; try discriminate. destruct (eval_offset ofs); try discriminate.
+ destruct (Mem.loadv _ _ _).
+ - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+ - unfold parexec_incorrect_load in *.
+ destruct trap; try discriminate.
+ inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+Qed.
+
+Lemma exec_load_reg_pc_var:
+ forall trap t rs m rd ra ro rs' m' v,
+ exec_load_reg trap t rs m rd ra ro = Next rs' m' ->
+ exec_load_reg trap t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_load_reg in *. unfold parexec_load_reg in *. rewrite Pregmap.gso; try discriminate.
+ destruct (Mem.loadv _ _ _).
+ - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+ - unfold parexec_incorrect_load in *.
+ destruct trap; try discriminate.
+ inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+Qed.
+
+Lemma exec_load_regxs_pc_var:
+ forall trap t rs m rd ra ro rs' m' v,
+ exec_load_regxs trap t rs m rd ra ro = Next rs' m' ->
+ exec_load_regxs trap t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_load_regxs in *. unfold parexec_load_regxs in *. rewrite Pregmap.gso; try discriminate.
+ destruct (Mem.loadv _ _ _).
+ - inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+ - unfold parexec_incorrect_load in *.
+ destruct trap; try discriminate.
+ inv H. apply next_eq; auto. apply functional_extensionality. intros. rewrite regset_double_set; auto. discriminate.
+Qed.
+
+Lemma exec_load_offset_q_pc_var:
+ forall rs m rd ra ofs rs' m' v,
+ exec_load_q_offset rs m rd ra ofs = Next rs' m' ->
+ exec_load_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_load_q_offset in *. unfold parexec_load_q_offset in *.
+ destruct (gpreg_q_expand rd) as [rd0 rd1].
+ (* destruct (ireg_eq rd0 ra); try discriminate. *)
+ rewrite Pregmap.gso; try discriminate.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ inv H.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ inv H1. f_equal.
+ rewrite (regset_double_set PC rd0) by discriminate.
+ rewrite (regset_double_set PC rd1) by discriminate.
+ reflexivity.
+Qed.
+
+Lemma exec_load_offset_o_pc_var:
+ forall rs m rd ra ofs rs' m' v,
+ exec_load_o_offset rs m rd ra ofs = Next rs' m' ->
+ exec_load_o_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_load_o_offset in *. unfold parexec_load_o_offset in *.
+ destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3].
+(*
+ destruct (ireg_eq rd0 ra); try discriminate.
+ destruct (ireg_eq rd1 ra); try discriminate.
+ destruct (ireg_eq rd2 ra); try discriminate.
+*)
+ rewrite Pregmap.gso; try discriminate.
+ cbn in *.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ destruct (Mem.loadv _ _ _); try discriminate.
+ rewrite (regset_double_set PC rd0) by discriminate.
+ rewrite (regset_double_set PC rd1) by discriminate.
+ rewrite (regset_double_set PC rd2) by discriminate.
+ rewrite (regset_double_set PC rd3) by discriminate.
+ inv H.
+ trivial.
+Qed.
+
+Lemma exec_store_offset_pc_var:
+ forall t rs m rd ra ofs rs' m' v,
+ exec_store_offset t rs m rd ra ofs = Next rs' m' ->
+ exec_store_offset t rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_store_offset in *. unfold parexec_store_offset in *. rewrite Pregmap.gso; try discriminate.
+ destruct (eval_offset ofs); try discriminate.
+ destruct (Mem.storev _ _ _).
+ - inv H. apply next_eq; auto.
+ - discriminate.
+Qed.
+
+Lemma exec_store_q_offset_pc_var:
+ forall rs m rd ra ofs rs' m' v,
+ exec_store_q_offset rs m rd ra ofs = Next rs' m' ->
+ exec_store_q_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_store_q_offset in *. unfold parexec_store_q_offset in *. rewrite Pregmap.gso; try discriminate.
+ cbn in *.
+ destruct (gpreg_q_expand _) as [s0 s1].
+ destruct (Mem.storev _ _ _); try discriminate.
+ destruct (Mem.storev _ _ _); try discriminate.
+ inv H. apply next_eq; auto.
+Qed.
+
+Lemma exec_store_o_offset_pc_var:
+ forall rs m rd ra ofs rs' m' v,
+ exec_store_o_offset rs m rd ra ofs = Next rs' m' ->
+ exec_store_o_offset rs # PC <- v m rd ra ofs = Next rs' # PC <- v m'.
+Proof.
+ intros.
+ unfold exec_store_o_offset in *. unfold parexec_store_o_offset in *.
+ destruct (gpreg_o_expand _) as [[[s0 s1] s2] s3].
+ destruct (Mem.storev _ _ _); try discriminate.
+ destruct (Mem.storev _ _ _); try discriminate.
+ destruct (Mem.storev _ _ _); try discriminate.
+ destruct (Mem.storev _ _ _); try discriminate.
+ inv H.
+ trivial.
+Qed.
+
+Lemma exec_store_reg_pc_var:
+ forall t rs m rd ra ro rs' m' v,
+ exec_store_reg t rs m rd ra ro = Next rs' m' ->
+ exec_store_reg t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_store_reg in *. unfold parexec_store_reg in *. rewrite Pregmap.gso; try discriminate.
+ destruct (Mem.storev _ _ _).
+ - inv H. apply next_eq; auto.
+ - discriminate.
+Qed.
+
+Lemma exec_store_regxs_pc_var:
+ forall t rs m rd ra ro rs' m' v,
+ exec_store_regxs t rs m rd ra ro = Next rs' m' ->
+ exec_store_regxs t rs # PC <- v m rd ra ro = Next rs' # PC <- v m'.
+Proof.
+ intros. unfold exec_store_regxs in *. unfold parexec_store_regxs in *. rewrite Pregmap.gso; try discriminate.
+ destruct (Mem.storev _ _ _).
+ - inv H. apply next_eq; auto.
+ - discriminate.
+Qed.
+
+Theorem exec_basic_instr_pc_var:
+ forall ge i rs m rs' m' v,
+ exec_basic_instr ge i rs m = Next rs' m' ->
+ exec_basic_instr ge i (rs # PC <- v) m = Next (rs' # PC <- v) m'.
+Proof.
+ intros. unfold exec_basic_instr in *. unfold bstep in *. destruct i.
+ - unfold exec_arith_instr in *. destruct i; destruct i.
+ all: try (exploreInst; inv H; apply next_eq; auto;
+ apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate).
+(*
+ (* Some cases treated seperately because exploreInst destructs too much *)
+ all: try (inv H; apply next_eq; auto; apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate). *)
+ - destruct i.
+ + exploreInst; apply exec_load_offset_pc_var; auto.
+ + exploreInst; apply exec_load_reg_pc_var; auto.
+ + exploreInst; apply exec_load_regxs_pc_var; auto.
+ + apply exec_load_offset_q_pc_var; auto.
+ + apply exec_load_offset_o_pc_var; auto.
+ - destruct i.
+ + exploreInst; apply exec_store_offset_pc_var; auto.
+ + exploreInst; apply exec_store_reg_pc_var; auto.
+ + exploreInst; apply exec_store_regxs_pc_var; auto.
+ + apply exec_store_q_offset_pc_var; auto.
+ + apply exec_store_o_offset_pc_var; auto.
+ - destruct (Mem.alloc _ _ _) as (m1 & stk). repeat (rewrite Pregmap.gso; try discriminate).
+ destruct (Mem.storev _ _ _ _); try discriminate.
+ inv H. apply next_eq; auto. apply functional_extensionality. intros.
+ rewrite (regset_double_set GPR32 PC); try discriminate.
+ rewrite (regset_double_set GPR12 PC); try discriminate.
+ rewrite (regset_double_set FP PC); try discriminate. reflexivity.
+ - repeat (rewrite Pregmap.gso; try discriminate).
+ destruct (Mem.loadv _ _ _); try discriminate.
+ destruct (rs GPR12); try discriminate.
+ destruct (Mem.free _ _ _ _); try discriminate.
+ inv H. apply next_eq; auto.
+ rewrite (regset_double_set GPR32 PC).
+ rewrite (regset_double_set GPR12 PC). reflexivity.
+ all: discriminate.
+ - destruct rs0; try discriminate. inv H. apply next_eq; auto.
+ repeat (rewrite Pregmap.gso; try discriminate). apply regset_double_set; discriminate.
+ - destruct rd; try discriminate. inv H. apply next_eq; auto.
+ repeat (rewrite Pregmap.gso; try discriminate). apply regset_double_set; discriminate.
+ - inv H. apply next_eq; auto.
+Qed.
+
+*)
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index 8e3c169a..0cee763e 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -132,9 +132,9 @@ let expand_builtin_va_start r =
let expand_annot_val kind txt targ args res =
emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none));
match args, res with
- | [BA(DR(IR' src))], BR(DR(IR' dst)) ->
+ | [BA(DR(IR src))], BR(DR(IR dst)) ->
if dst <> src then emit (Pmov (dst, src))
- | [BA(DR(FR' src))], BR(DR(FR' dst)) ->
+ | [BA(DR(FR src))], BR(DR(FR dst)) ->
if dst <> src then emit (Pfmov (dst, src))
| _, _ ->
raise (Error "ill-formed __builtin_annot_val")
@@ -152,7 +152,7 @@ let offset_in_range ofs =
let memcpy_small_arg sz arg tmp =
match arg with
- | BA (DR(IR' r)) ->
+ | BA (DR(IR r)) ->
(r, _0)
| BA_addrstack ofs ->
if offset_in_range ofs
@@ -164,7 +164,7 @@ let memcpy_small_arg sz arg tmp =
let expand_builtin_memcpy_small sz al src dst =
let (tsrc, tdst) =
- if dst <> BA (DR(IR'(RR1 X17))) then (X17, X29) else (X29, X17) in
+ if dst <> BA (DR(IR(RR1 X17))) then (X17, X29) else (X29, X17) in
let (rsrc, osrc) = memcpy_small_arg sz src tsrc in
let (rdst, odst) = memcpy_small_arg sz dst tdst in
let rec copy osrc odst sz =
@@ -197,7 +197,7 @@ let expand_builtin_memcpy_small sz al src dst =
let memcpy_big_arg arg tmp =
match arg with
- | BA (DR(IR' r)) -> emit (Pmov(RR1 tmp, r))
+ | BA (DR(IR r)) -> emit (Pmov(RR1 tmp, r))
| BA_addrstack ofs -> expand_addimm64 (RR1 tmp) XSP ofs
| _ -> assert false
@@ -241,28 +241,28 @@ let expand_builtin_memcpy sz al args =
let expand_builtin_vload_common chunk base ofs res =
let addr = ADimm(base, ofs) in
match chunk, res with
- | Mint8unsigned, BR(DR(IR'(RR1 res))) ->
+ | Mint8unsigned, BR(DR(IR(RR1 res))) ->
emit (Pldrb(W, res, addr))
- | Mint8signed, BR(DR(IR'(RR1 res))) ->
+ | Mint8signed, BR(DR(IR(RR1 res))) ->
emit (Pldrsb(W, res, addr))
- | Mint16unsigned, BR(DR(IR'(RR1 res))) ->
+ | Mint16unsigned, BR(DR(IR(RR1 res))) ->
emit (Pldrh(W, res, addr))
- | Mint16signed, BR(DR(IR'(RR1 res))) ->
+ | Mint16signed, BR(DR(IR(RR1 res))) ->
emit (Pldrsh(W, res, addr))
- | Mint32, BR(DR(IR'(RR1 res))) ->
+ | Mint32, BR(DR(IR(RR1 res))) ->
emit (Pldrw(res, addr))
- | Mint64, BR(DR(IR'(RR1 res))) ->
+ | Mint64, BR(DR(IR(RR1 res))) ->
emit (Pldrx(res, addr))
- | Mfloat32, BR(DR(FR' res)) ->
+ | Mfloat32, BR(DR(FR res)) ->
emit (Pldrs(res, addr))
- | Mfloat64, BR(DR(FR' res)) ->
+ | Mfloat64, BR(DR(FR res)) ->
emit (Pldrd(res, addr))
| _ ->
assert false
let expand_builtin_vload chunk args res =
match args with
- | [BA(DR(IR' addr))] ->
+ | [BA(DR(IR addr))] ->
expand_builtin_vload_common chunk addr _0 res
| [BA_addrstack ofs] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
@@ -271,7 +271,7 @@ let expand_builtin_vload chunk args res =
expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
expand_builtin_vload_common chunk (RR1 X16) _0 res
end
- | [BA_addptr(BA(DR(IR' addr)), BA_long ofs)] ->
+ | [BA_addptr(BA(DR(IR addr)), BA_long ofs)] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
expand_builtin_vload_common chunk addr ofs res
else begin
@@ -284,24 +284,24 @@ let expand_builtin_vload chunk args res =
let expand_builtin_vstore_common chunk base ofs src =
let addr = ADimm(base, ofs) in
match chunk, src with
- | (Mint8signed | Mint8unsigned), BA(DR(IR'(RR1 src))) ->
+ | (Mint8signed | Mint8unsigned), BA(DR(IR(RR1 src))) ->
emit (Pstrb(src, addr))
- | (Mint16signed | Mint16unsigned), BA(DR(IR'(RR1 src))) ->
+ | (Mint16signed | Mint16unsigned), BA(DR(IR(RR1 src))) ->
emit (Pstrh(src, addr))
- | Mint32, BA(DR(IR'(RR1 src))) ->
+ | Mint32, BA(DR(IR(RR1 src))) ->
emit (Pstrw(src, addr))
- | Mint64, BA(DR(IR'(RR1 src))) ->
+ | Mint64, BA(DR(IR(RR1 src))) ->
emit (Pstrx(src, addr))
- | Mfloat32, BA(DR(FR' src)) ->
+ | Mfloat32, BA(DR(FR src)) ->
emit (Pstrs(src, addr))
- | Mfloat64, BA(DR(FR' src)) ->
+ | Mfloat64, BA(DR(FR src)) ->
emit (Pstrd(src, addr))
| _ ->
assert false
let expand_builtin_vstore chunk args =
match args with
- | [BA(DR(IR' addr)); src] ->
+ | [BA(DR(IR addr)); src] ->
expand_builtin_vstore_common chunk addr _0 src
| [BA_addrstack ofs; src] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
@@ -310,7 +310,7 @@ let expand_builtin_vstore chunk args =
expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
expand_builtin_vstore_common chunk (RR1 X16) _0 src
end
- | [BA_addptr(BA(DR(IR' addr)), BA_long ofs); src] ->
+ | [BA_addptr(BA(DR(IR addr)), BA_long ofs); src] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
expand_builtin_vstore_common chunk addr ofs src
else begin
@@ -330,37 +330,37 @@ let expand_builtin_inline name args res =
| "__builtin_nop", [], _ ->
emit Pnop
(* Byte swap *)
- | ("__builtin_bswap" | "__builtin_bswap32"), [BA(DR(IR'(RR1 a1)))], BR(DR(IR'(RR1 res))) ->
+ | ("__builtin_bswap" | "__builtin_bswap32"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prev(W, res, a1))
- | "__builtin_bswap64", [BA(DR(IR'(RR1 a1)))], BR(DR(IR'(RR1 res))) ->
+ | "__builtin_bswap64", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prev(X, res, a1))
- | "__builtin_bswap16", [BA(DR(IR'(RR1 a1)))], BR(DR(IR'(RR1 res))) ->
+ | "__builtin_bswap16", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prev16(W, res, a1));
emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF))
(* Count leading zeros and leading sign bits *)
- | "__builtin_clz", [BA(DR(IR'(RR1 a1)))], BR(DR(IR'(RR1 res))) ->
+ | "__builtin_clz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pclz(W, res, a1))
- | ("__builtin_clzl" | "__builtin_clzll"), [BA(DR(IR'(RR1 a1)))], BR(DR(IR'(RR1 res))) ->
+ | ("__builtin_clzl" | "__builtin_clzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pclz(X, res, a1))
- | "__builtin_cls", [BA(DR(IR'(RR1 a1)))], BR(DR(IR'(RR1 res))) ->
+ | "__builtin_cls", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pcls(W, res, a1))
- | ("__builtin_clsl" | "__builtin_clsll"), [BA(DR(IR'(RR1 a1)))], BR(DR(IR'(RR1 res))) ->
+ | ("__builtin_clsl" | "__builtin_clsll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pcls(X, res, a1))
(* Float arithmetic *)
- | "__builtin_fabs", [BA(DR(FR' a1))], BR(DR(FR' res)) ->
+ | "__builtin_fabs", [BA(DR(FR a1))], BR(DR(FR res)) ->
emit (Pfabs(D, res, a1))
- | "__builtin_fsqrt", [BA(DR(FR' a1))], BR(DR(FR' res)) ->
+ | "__builtin_fsqrt", [BA(DR(FR a1))], BR(DR(FR res)) ->
emit (Pfsqrt(D, res, a1))
- | "__builtin_fmadd", [BA(DR(FR' a1)); BA(DR(FR' a2)); BA(DR(FR' a3))], BR(DR(FR' res)) ->
+ | "__builtin_fmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfmadd(D, res, a1, a2, a3))
- | "__builtin_fmsub", [BA(DR(FR' a1)); BA(DR(FR' a2)); BA(DR(FR' a3))], BR(DR(FR' res)) ->
+ | "__builtin_fmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfmsub(D, res, a1, a2, a3))
- | "__builtin_fnmadd", [BA(DR(FR' a1)); BA(DR(FR' a2)); BA(DR(FR' a3))], BR(DR(FR' res)) ->
+ | "__builtin_fnmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfnmadd(D, res, a1, a2, a3))
- | "__builtin_fnmsub", [BA(DR(FR' a1)); BA(DR(FR' a2)); BA(DR(FR' a3))], BR(DR(FR' res)) ->
+ | "__builtin_fnmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfnmsub(D, res, a1, a2, a3))
(* Vararg *)
- | "__builtin_va_start", [BA(DR(IR'(RR1 a)))], _ ->
+ | "__builtin_va_start", [BA(DR(IR(RR1 a)))], _ ->
expand_builtin_va_start a
(* Catch-all *)
| _ ->
@@ -427,9 +427,9 @@ let float_reg_to_dwarf = function
| D30 -> 94 | D31 -> 95
let preg_to_dwarf = function
- | DR(IR'(RR1 r)) -> int_reg_to_dwarf r
- | DR(FR' r) -> float_reg_to_dwarf r
- | DR(IR'(XSP)) -> 31
+ | DR(IR(RR1 r)) -> int_reg_to_dwarf r
+ | DR(FR r) -> float_reg_to_dwarf r
+ | DR(IR(XSP)) -> 31
| _ -> assert false
let expand_function id fn =
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
index fcd12eef..fcc64956 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/Asmgen.v
@@ -17,7 +17,7 @@
Require Import Recdef Coqlib Zwf Zbits.
Require Import Errors AST Integers Floats Op.
Require Import Locations Compopts.
-Require Import Mach Asm Asmblock Asmblockgen Machblockgen PseudoAsmblock PseudoAsmblockproof.
+Require Import Mach Asm Asmblock Asmblockgen Machblockgen PseudoAsmblock PseudoAsmblockproof PostpassScheduling.
Local Open Scope error_monad_scope.
@@ -30,19 +30,19 @@ Module Asmblock_TRANSF.
Definition ireg_of_preg (p : Asm.preg) : res ireg :=
match p with
- | DR (IR' (RR1 r)) => OK r
+ | DR (IR (RR1 r)) => OK r
| _ => Error (msg "Asmgen.ireg_of_preg")
end.
Definition freg_of_preg (p : Asm.preg) : res freg :=
match p with
- | DR (FR' r) => OK r
+ | DR (FR r) => OK r
| _ => Error (msg "Asmgen.freg_of_preg")
end.
Definition iregsp_of_preg (p : Asm.preg) : res iregsp :=
match p with
- | DR (IR' r) => OK r
+ | DR (IR r) => OK r
| _ => Error (msg "Asmgen.iregsp_of_preg")
end.
@@ -219,11 +219,11 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction :=
| PArith (Pfmovi fsz rd r1) => OK (Asm.Pfmovi fsz rd r1)
| PArith (Pcsel rd r1 r2 c) =>
match r1, r2 with
- | IR' r1', IR' r2' => do rd' <- ireg_of_preg rd;
+ | IR r1', IR r2' => do rd' <- ireg_of_preg rd;
do r1'' <- ireg_of_preg r1';
do r2'' <- ireg_of_preg r2';
OK (Asm.Pcsel rd' r1'' r2'' c)
- | FR' r1', FR' r2' => do rd' <- freg_of_preg rd;
+ | FR r1', FR r2' => do rd' <- freg_of_preg rd;
do r1'' <- freg_of_preg r1';
do r2'' <- freg_of_preg r2';
OK (Asm.Pfsel rd' r1'' r2'' c)
@@ -359,4 +359,5 @@ Definition transf_program (p: Mach.program) : res Asm.program :=
let mbp := Machblockgen.transf_program p in
do mbp' <- PseudoAsmblockproof.transf_program mbp;
do abp <- Asmblockgen.transf_program mbp';
+ (*do abp' <- (time "PostpassScheduling total oracle+verification" PostpassScheduling.transf_program) abp;*)
Asmblock_TRANSF.transf_program abp.
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index f28bcb2e..bff18716 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -19,7 +19,7 @@ Require Import Coqlib Errors.
Require Import Integers Floats AST Linking.
Require Import Values Memory Events Globalenvs Smallstep.
Require Import Op Locations Machblock Conventions PseudoAsmblock Asm Asmblock.
-Require Machblockgenproof Asmblockgenproof.
+Require Machblockgenproof Asmblockgenproof PostpassSchedulingproof.
Require Import Asmgen.
Require Import Axioms.
Require Import IterList.
@@ -2174,6 +2174,7 @@ Definition block_passes :=
mkpass Machblockgenproof.match_prog
::: mkpass PseudoAsmblockproof.match_prog
::: mkpass Asmblockgenproof.match_prog
+ (*::: mkpass PostpassSchedulingproof.match_prog*)
::: mkpass Asmblock_PRESERVATION.match_prog
::: pass_nil _.
diff --git a/aarch64/PostpassScheduling.v b/aarch64/PostpassScheduling.v
new file mode 100644
index 00000000..485b297b
--- /dev/null
+++ b/aarch64/PostpassScheduling.v
@@ -0,0 +1,530 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+(** Implementation (and basic properties) of the verified postpass scheduler *)
+
+Require Import Coqlib Errors AST Integers.
+Require Import Asmblock Axioms Memory Globalenvs.
+Require Import Asmblockdeps Asmblockgenproof0 Asmblockprops.
+(*Require Peephole.*)
+
+Local Open Scope error_monad_scope.
+
+(** * Oracle taking as input a basic block,
+ returns a scheduled basic block *)
+Axiom schedule: bblock -> (list basic) * option control.
+
+Extract Constant schedule => "PostpassSchedulingOracle.schedule".
+
+(*
+(** * Concat all bundles into one big basic block *)
+
+(* Lemmas necessary for defining concat_all *)
+Lemma app_nonil {A: Type} (l l': list A) : l <> nil -> l ++ l' <> nil.
+Proof.
+ intros. destruct l; simpl.
+ - contradiction.
+ - discriminate.
+Qed.
+
+Lemma app_nonil2 {A: Type} : forall (l l': list A), l' <> nil -> l ++ l' <> nil.
+Proof.
+ destruct l.
+ - intros. simpl; auto.
+ - intros. rewrite <- app_comm_cons. discriminate.
+Qed.
+
+Definition check_size bb :=
+ if zlt Ptrofs.max_unsigned (size bb)
+ then Error (msg "PostpassSchedulingproof.check_size")
+ else OK tt.
+
+Program Definition concat2 (bb bb': bblock) : res bblock :=
+ do ch <- check_size bb;
+ do ch' <- check_size bb';
+ match (exit bb) with
+ | None =>
+ match (header bb') with
+ | nil =>
+ match (exit bb') with
+ | Some (PExpand (Pbuiltin _ _ _)) => Error (msg "PostpassSchedulingproof.concat2: builtin not alone")
+ | _ => OK {| header := header bb; body := body bb ++ body bb'; exit := exit bb' |}
+ end
+ | _ => Error (msg "PostpassSchedulingproof.concat2")
+ end
+ | _ => Error (msg "PostpassSchedulingproof.concat2")
+ end.
+Next Obligation.
+ apply wf_bblock_refl. constructor.
+ - destruct bb' as [hd' bdy' ex' WF']. destruct bb as [hd bdy ex WF]. simpl in *.
+ apply wf_bblock_refl in WF'. apply wf_bblock_refl in WF.
+ inversion_clear WF'. inversion_clear WF. clear H1 H3.
+ inversion H2; inversion H0.
+ + left. apply app_nonil. auto.
+ + right. auto.
+ + left. apply app_nonil2. auto.
+ + right. auto.
+ - unfold builtin_alone. intros. rewrite H0 in H.
+ assert (Some (PExpand (Pbuiltin ef args res)) <> Some (PExpand (Pbuiltin ef args res))).
+ apply (H ef args res). contradict H1. auto.
+Defined.
+
+Lemma concat2_zlt_size:
+ forall a b bb,
+ concat2 a b = OK bb ->
+ size a <= Ptrofs.max_unsigned
+ /\ size b <= Ptrofs.max_unsigned.
+Proof.
+ intros. monadInv H.
+ split.
+ - unfold check_size in EQ. destruct (zlt Ptrofs.max_unsigned (size a)); monadInv EQ. omega.
+ - unfold check_size in EQ1. destruct (zlt Ptrofs.max_unsigned (size b)); monadInv EQ1. omega.
+Qed.
+
+Lemma concat2_noexit:
+ forall a b bb,
+ concat2 a b = OK bb ->
+ exit a = None.
+Proof.
+ intros. destruct a as [hd bdy ex WF]; simpl in *.
+ destruct ex as [e|]; simpl in *; auto.
+ unfold concat2 in H. simpl in H. monadInv H.
+Qed.
+
+Lemma concat2_decomp:
+ forall a b bb,
+ concat2 a b = OK bb ->
+ body bb = body a ++ body b
+ /\ exit bb = exit b.
+Proof.
+ intros. exploit concat2_noexit; eauto. intros.
+ destruct a as [hda bda exa WFa]; destruct b as [hdb bdb exb WFb]; destruct bb as [hd bd ex WF]; simpl in *.
+ subst exa.
+ unfold concat2 in H; simpl in H.
+ destruct hdb.
+ - destruct exb.
+ + destruct c.
+ * destruct i; monadInv H; split; auto.
+ * monadInv H. split; auto.
+ + monadInv H. split; auto.
+ - monadInv H.
+Qed.
+
+Lemma concat2_size:
+ forall a b bb, concat2 a b = OK bb -> size bb = size a + size b.
+Proof.
+ intros. unfold concat2 in H.
+ destruct a as [hda bda exa WFa]; destruct b as [hdb bdb exb WFb]; destruct bb as [hd bdy ex WF]; simpl in *.
+ destruct exa; monadInv H. destruct hdb; try (monadInv EQ2). destruct exb; try (monadInv EQ2).
+ - destruct c.
+ + destruct i; monadInv EQ2;
+ unfold size; simpl; rewrite app_length; rewrite Nat.add_0_r; rewrite <- Nat2Z.inj_add; rewrite Nat.add_assoc; reflexivity.
+ + monadInv EQ2. unfold size; simpl. rewrite app_length. rewrite Nat.add_0_r. rewrite <- Nat2Z.inj_add. rewrite Nat.add_assoc. reflexivity.
+ - unfold size; simpl. rewrite app_length. repeat (rewrite Nat.add_0_r). rewrite <- Nat2Z.inj_add. reflexivity.
+Qed.
+
+Lemma concat2_header:
+ forall bb bb' tbb,
+ concat2 bb bb' = OK tbb -> header bb = header tbb.
+Proof.
+ intros. destruct bb as [hd bdy ex COR]; destruct bb' as [hd' bdy' ex' COR']; destruct tbb as [thd tbdy tex tCOR]; simpl in *.
+ unfold concat2 in H. simpl in H. monadInv H.
+ destruct ex; try discriminate. destruct hd'; try discriminate. destruct ex'.
+ - destruct c.
+ + destruct i; try discriminate; congruence.
+ + congruence.
+ - congruence.
+Qed.
+
+Lemma concat2_no_header_in_middle:
+ forall bb bb' tbb,
+ concat2 bb bb' = OK tbb ->
+ header bb' = nil.
+Proof.
+ intros. destruct bb as [hd bdy ex COR]; destruct bb' as [hd' bdy' ex' COR']; destruct tbb as [thd tbdy tex tCOR]; simpl in *.
+ unfold concat2 in H. simpl in H. monadInv H.
+ destruct ex; try discriminate. destruct hd'; try discriminate. reflexivity.
+Qed.
+
+
+
+Fixpoint concat_all (lbb: list bblock) : res bblock :=
+ match lbb with
+ | nil => Error (msg "PostpassSchedulingproof.concatenate: empty list")
+ | bb::nil => OK bb
+ | bb::lbb =>
+ do bb' <- concat_all lbb;
+ concat2 bb bb'
+ end.
+
+Lemma concat_all_size :
+ forall lbb a bb bb',
+ concat_all (a :: lbb) = OK bb ->
+ concat_all lbb = OK bb' ->
+ size bb = size a + size bb'.
+Proof.
+ intros. unfold concat_all in H. fold concat_all in H.
+ destruct lbb; try discriminate.
+ monadInv H. rewrite H0 in EQ. inv EQ.
+ apply concat2_size. assumption.
+Qed.
+
+Lemma concat_all_header:
+ forall lbb bb tbb,
+ concat_all (bb::lbb) = OK tbb -> header bb = header tbb.
+Proof.
+ destruct lbb.
+ - intros. simpl in H. congruence.
+ - intros. simpl in H. destruct lbb.
+ + inv H. eapply concat2_header; eassumption.
+ + monadInv H. eapply concat2_header; eassumption.
+Qed.
+
+Lemma concat_all_no_header_in_middle:
+ forall lbb tbb,
+ concat_all lbb = OK tbb ->
+ Forall (fun b => header b = nil) (tail lbb).
+Proof.
+ induction lbb; intros; try constructor.
+ simpl. simpl in H. destruct lbb.
+ - constructor.
+ - monadInv H. simpl tl in IHlbb. constructor.
+ + apply concat2_no_header_in_middle in EQ0. apply concat_all_header in EQ. congruence.
+ + apply IHlbb in EQ. assumption.
+Qed.
+
+Inductive is_concat : bblock -> list bblock -> Prop :=
+ | mk_is_concat: forall tbb lbb, concat_all lbb = OK tbb -> is_concat tbb lbb.
+*)
+(** * Remainder of the verified scheduler *)
+
+(*Definition verify_schedule (bb bb' : bblock) : res unit :=*)
+ (*match bblock_simub bb bb' with*)
+ (*| true => OK tt*)
+ (*| false => Error (msg "PostpassScheduling.verify_schedule")*)
+ (*end.*)
+
+
+(*Definition verify_size bb lbb := if (Z.eqb (size bb) (size_blocks lbb)) then OK tt else Error (msg "PostpassScheduling:verify_size: wrong size").*)
+
+(*Lemma verify_size_size:*)
+ (*forall bb lbb, verify_size bb lbb = OK tt -> size bb = size_blocks lbb.*)
+(*Proof.*)
+ (*intros. unfold verify_size in H. destruct (size bb =? size_blocks lbb) eqn:SIZE; try discriminate.*)
+ (*apply Z.eqb_eq. assumption.*)
+(*Qed.*)
+
+(*Lemma verify_schedule_no_header:*)
+ (*forall bb bb',*)
+ (*verify_schedule (no_header bb) bb' = verify_schedule bb bb'.*)
+(*Proof.*)
+ (*intros. unfold verify_schedule. unfold bblock_simub. unfold pure_bblock_simu_test, bblock_simu_test. rewrite trans_block_noheader_inv.*)
+ (*reflexivity.*)
+(*Qed.*)
+
+
+(*Lemma stick_header_verify_schedule:*)
+ (*forall hd bb' hbb' bb,*)
+ (*stick_header hd bb' = hbb' ->*)
+ (*verify_schedule bb bb' = verify_schedule bb hbb'.*)
+(*Proof.*)
+ (*intros. unfold verify_schedule. unfold bblock_simub, pure_bblock_simu_test, bblock_simu_test.*)
+ (*rewrite <- H. rewrite trans_block_header_inv. reflexivity.*)
+(*Qed.*)
+
+(*Lemma check_size_stick_header:*)
+ (*forall bb hd,*)
+ (*check_size bb = check_size (stick_header hd bb).*)
+(*Proof.*)
+ (*intros. unfold check_size. rewrite stick_header_size. reflexivity.*)
+(*Qed.*)
+
+(*Lemma stick_header_concat2:*)
+ (*forall bb bb' hd tbb,*)
+ (*concat2 bb bb' = OK tbb ->*)
+ (*concat2 (stick_header hd bb) bb' = OK (stick_header hd tbb).*)
+(*Proof.*)
+ (*intros. monadInv H. erewrite check_size_stick_header in EQ.*)
+ (*unfold concat2. rewrite EQ. rewrite EQ1. simpl.*)
+ (*destruct bb as [hdr bdy ex COR]; destruct bb' as [hdr' bdy' ex' COR']; simpl in *.*)
+ (*destruct ex; try discriminate. destruct hdr'; try discriminate. destruct ex'.*)
+ (*- destruct c.*)
+ (*+ destruct i; try discriminate; inv EQ2; unfold stick_header; simpl; reflexivity.*)
+ (*+ inv EQ2. unfold stick_header; simpl. reflexivity.*)
+ (*- inv EQ2. unfold stick_header; simpl. reflexivity.*)
+(*Qed.*)
+
+(*Lemma stick_header_concat_all:*)
+ (*forall bb c tbb hd,*)
+ (*concat_all (bb :: c) = OK tbb ->*)
+ (*concat_all (stick_header hd bb :: c) = OK (stick_header hd tbb).*)
+(*Proof.*)
+ (*intros. simpl in *. destruct c; try congruence.*)
+ (*monadInv H. rewrite EQ. simpl.*)
+ (*apply stick_header_concat2. assumption.*)
+(*Qed.*)
+
+
+
+(*Definition stick_header_code (h : list label) (lbb : list bblock) :=*)
+ (*match (head lbb) with*)
+ (*| None => Error (msg "PostpassScheduling.stick_header: empty schedule")*)
+ (*| Some fst => OK ((stick_header h fst) :: tail lbb)*)
+ (*end.*)
+
+(*Lemma stick_header_code_no_header:*)
+ (*forall bb c,*)
+ (*stick_header_code (header bb) (no_header bb :: c) = OK (bb :: c).*)
+(*Proof.*)
+ (*intros. unfold stick_header_code. simpl. rewrite stick_header_no_header. reflexivity.*)
+(*Qed.*)
+
+(*Lemma hd_tl_size:*)
+ (*forall lbb bb, hd_error lbb = Some bb -> size_blocks lbb = size bb + size_blocks (tl lbb).*)
+(*Proof.*)
+ (*destruct lbb.*)
+ (*- intros. simpl in H. discriminate.*)
+ (*- intros. simpl in H. inv H. simpl. reflexivity.*)
+(*Qed.*)
+
+(*Lemma stick_header_code_size:*)
+ (*forall h lbb lbb', stick_header_code h lbb = OK lbb' -> size_blocks lbb = size_blocks lbb'.*)
+(*Proof.*)
+ (*intros. unfold stick_header_code in H. destruct (hd_error lbb) eqn:HD; try discriminate.*)
+ (*inv H. simpl. rewrite stick_header_size. erewrite hd_tl_size; eauto.*)
+(*Qed.*)
+
+(*Lemma stick_header_code_no_header_in_middle:*)
+ (*forall c h lbb,*)
+ (*stick_header_code h c = OK lbb ->*)
+ (*Forall (fun b => header b = nil) (tl c) ->*)
+ (*Forall (fun b => header b = nil) (tl lbb).*)
+(*Proof.*)
+ (*destruct c; intros.*)
+ (*- unfold stick_header_code in H. simpl in H. discriminate.*)
+ (*- unfold stick_header_code in H. simpl in H. inv H. simpl in H0.*)
+ (*simpl. assumption.*)
+(*Qed.*)
+
+(*Lemma stick_header_code_concat_all:*)
+ (*forall hd lbb hlbb tbb,*)
+ (*stick_header_code hd lbb = OK hlbb ->*)
+ (*concat_all lbb = OK tbb ->*)
+ (*exists htbb,*)
+ (*concat_all hlbb = OK htbb*)
+ (*/\ stick_header hd tbb = htbb.*)
+(*Proof.*)
+ (*intros. exists (stick_header hd tbb). split; auto.*)
+ (*destruct lbb.*)
+ (*- unfold stick_header_code in H. simpl in H. discriminate.*)
+ (*- unfold stick_header_code in H. simpl in H. inv H.*)
+ (*apply stick_header_concat_all. assumption.*)
+(*Qed.*)
+
+Program Definition make_bblock_from_basics lb :=
+ match lb with
+ | nil => Error (msg "PostpassScheduling.make_bblock_from_basics")
+ | b :: lb => OK {| header := nil; body := b::lb; exit := None |}
+ end.
+
+Fixpoint schedule_to_bblocks_nocontrol llb :=
+ match llb with
+ | nil => OK nil
+ | lb :: llb => do bb <- make_bblock_from_basics lb;
+ do lbb <- schedule_to_bblocks_nocontrol llb;
+ OK (bb :: lbb)
+ end.
+
+Program Definition make_bblock_from_basics_and_control lb c :=
+ match c with
+ | PCtlFlow cf => OK {| header := nil; body := lb; exit := Some (PCtlFlow cf) |}
+ | _ => Error (msg "PostpassScheduling.make_bblock_from_basics_and_control")
+ end.
+Next Obligation.
+Admitted.
+(* TODO
+ apply wf_bblock_refl. constructor.
+ - right. discriminate.
+ - discriminate.
+Qed.*)
+
+(*Fixpoint schedule_to_bblocks_wcontrol llb c :=*)
+ (*match llb with*)
+ (*| nil => OK ((bblock_single_inst (PControl c)) :: nil) [> TODO fusion avec au dessus ? <]*)
+ (*| lb :: nil => do bb <- make_bblock_from_basics_and_control lb c; OK (bb :: nil)*)
+ (*| lb :: llb => do bb <- make_bblock_from_basics lb;*)
+ (*do lbb <- schedule_to_bblocks_wcontrol llb c;*)
+ (*OK (bb :: lbb)*)
+ (*end.*)
+
+(*Definition schedule_to_bblocks (llb: list (list basic)) (oc: option control) : res (list bblock) :=*)
+ (*match oc with*)
+ (*| None => schedule_to_bblocks_nocontrol llb*)
+ (*| Some c => schedule_to_bblocks_wcontrol llb c*)
+ (*end.*)
+
+(*Definition do_schedule (bb: bblock) : res (list bblock) :=*)
+ (*if (Z.eqb (size bb) 1) then OK (bb::nil) *)
+ (*else match (schedule bb) with (llb, oc) => schedule_to_bblocks llb oc end. [> TODO Something here <]*)
+
+(*Definition verify_par_bblock (bb: bblock) : res unit :=*)
+ (*if (bblock_para_check bb) then OK tt else Error (msg "PostpassScheduling.verify_par_bblock").*)
+
+(*Fixpoint verify_par (lbb: list bblock) :=*)
+ (*match lbb with*)
+ (*| nil => OK tt*)
+ (*| bb :: lbb => do res <- verify_par_bblock bb; verify_par lbb*)
+ (*end.*)
+
+(*Definition verified_schedule_nob (bb : bblock) : res (list bblock) :=*)
+ (*let bb' := no_header bb in*)
+ (*[> TODO remove? let bb'' := Peephole.optimize_bblock bb' in<]*)
+ (*do lbb <- do_schedule bb';*)
+ (*do tbb <- concat_all lbb;*)
+ (*do sizecheck <- verify_size bb lbb;*)
+ (*do schedcheck <- verify_schedule bb' tbb;*)
+ (*do res <- stick_header_code (header bb) lbb;*)
+ (*do parcheck <- verify_par res;*)
+ (*OK res.*)
+
+(*Lemma verified_schedule_nob_size:*)
+ (*forall bb lbb, verified_schedule_nob bb = OK lbb -> size bb = size_blocks lbb.*)
+(*Proof.*)
+ (*intros. monadInv H. erewrite <- stick_header_code_size; eauto.*)
+ (*apply verify_size_size.*)
+ (*destruct x1; try discriminate. assumption.*)
+(*Qed.*)
+
+(*Lemma verified_schedule_nob_no_header_in_middle:*)
+ (*forall lbb bb,*)
+ (*verified_schedule_nob bb = OK lbb ->*)
+ (*Forall (fun b => header b = nil) (tail lbb).*)
+(*Proof.*)
+ (*intros. monadInv H. eapply stick_header_code_no_header_in_middle; eauto.*)
+ (*eapply concat_all_no_header_in_middle. eassumption.*)
+(*Qed.*)
+
+(*Lemma verified_schedule_nob_header:*)
+ (*forall bb tbb lbb,*)
+ (*verified_schedule_nob bb = OK (tbb :: lbb) ->*)
+ (*header bb = header tbb*)
+ (*/\ Forall (fun b => header b = nil) lbb.*)
+(*Proof.*)
+ (*intros. split.*)
+ (*- monadInv H. unfold stick_header_code in EQ3. destruct (hd_error _); try discriminate. inv EQ3.*)
+ (*simpl. reflexivity.*)
+ (*- apply verified_schedule_nob_no_header_in_middle in H. assumption.*)
+(*Qed.*)
+
+
+(*Definition verified_schedule (bb : bblock) : res (list bblock) :=*)
+ (*verified_schedule_nob bb.*)
+(* TODO Remove?
+ match exit bb with
+ | Some (PExpand (Pbuiltin ef args res)) => OK (bb::nil) (* Special case for ensuring the lemma verified_schedule_builtin_idem *)
+ | _ => verified_schedule_nob bb
+ end.*)
+
+(*Lemma verified_schedule_size:*)
+ (*forall bb lbb, verified_schedule bb = OK lbb -> size bb = size_blocks lbb.*)
+(*Proof.*)
+ (*intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i.*)
+ (*all: try (apply verified_schedule_nob_size; auto; fail).*)
+ (*inv H. simpl. omega.*)
+(*Qed.*)
+
+(*Lemma verified_schedule_no_header_in_middle:*)
+ (*forall lbb bb,*)
+ (*verified_schedule bb = OK lbb ->*)
+ (*Forall (fun b => header b = nil) (tail lbb).*)
+(*Proof.*)
+ (*intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i.*)
+ (*all: try (eapply verified_schedule_nob_no_header_in_middle; eauto; fail).*)
+ (*inv H. simpl. auto.*)
+(*Qed.*)
+
+(*Lemma verified_schedule_header:*)
+ (*forall bb tbb lbb,*)
+ (*verified_schedule bb = OK (tbb :: lbb) ->*)
+ (*header bb = header tbb*)
+ (*/\ Forall (fun b => header b = nil) lbb.*)
+(*Proof.*)
+ (*intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i.*)
+ (*all: try (eapply verified_schedule_nob_header; eauto; fail).*)
+ (*inv H. split; simpl; auto.*)
+(*Qed.*)
+
+
+(*Lemma verified_schedule_nob_correct:*)
+ (*forall ge f bb lbb,*)
+ (*verified_schedule_nob bb = OK lbb ->*)
+ (*exists tbb,*)
+ (*is_concat tbb lbb*)
+ (*/\ bblock_simu ge f bb tbb.*)
+(*Proof.*)
+ (*intros. monadInv H.*)
+ (*exploit stick_header_code_concat_all; eauto.*)
+ (*intros (tbb & CONC & STH).*)
+ (*exists tbb. split; auto. constructor; auto.*)
+ (*rewrite verify_schedule_no_header in EQ2. erewrite stick_header_verify_schedule in EQ2; eauto.*)
+ (*eapply bblock_simub_correct; eauto. unfold verify_schedule in EQ2.*)
+ (*destruct (bblock_simub _ _); auto; try discriminate.*)
+(*Qed.*)
+
+(*Theorem verified_schedule_correct:*)
+ (*forall ge f bb lbb,*)
+ (*verified_schedule bb = OK lbb ->*)
+ (*exists tbb,*)
+ (*is_concat tbb lbb*)
+ (*/\ bblock_simu ge f bb tbb.*)
+(*Proof.*)
+ (*intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i.*)
+ (*all: try (eapply verified_schedule_nob_correct; eauto; fail).*)
+ (*inv H. eexists. split; simpl; auto. constructor; auto. simpl; auto. constructor; auto.*)
+(*Qed.*)
+
+(*Lemma verified_schedule_builtin_idem:*)
+ (*forall bb ef args res lbb,*)
+ (*exit bb = Some (PExpand (Pbuiltin ef args res)) ->*)
+ (*verified_schedule bb = OK lbb ->*)
+ (*lbb = bb :: nil.*)
+(*Proof.*)
+ (*intros. unfold verified_schedule in H0. rewrite H in H0. inv H0. reflexivity.*)
+(*Qed.*)
+
+
+(*Fixpoint transf_blocks (lbb : list bblock) : res (list bblock) :=*)
+ (*match lbb with*)
+ (*| nil => OK nil*)
+ (*| (cons bb lbb) =>*)
+ (*do tlbb <- transf_blocks lbb;*)
+ (*do tbb <- verified_schedule bb;*)
+ (*OK (tbb ++ tlbb)*)
+ (*end.*)
+
+(*Definition transl_function (f: function) : res function :=*)
+ (*do lb <- transf_blocks (fn_blocks f); *)
+ (*OK (mkfunction (fn_sig f) lb).*)
+
+(*Definition transf_function (f: function) : res function :=*)
+ (*do tf <- transl_function f;*)
+ (*if zlt Ptrofs.max_unsigned (size_blocks tf.(fn_blocks))*)
+ (*then Error (msg "code size exceeded")*)
+ (*else OK tf.*)
+
+(*Definition transf_fundef (f: fundef) : res fundef :=*)
+ (*transf_partial_fundef transf_function f.*)
+
+(*Definition transf_program (p: program) : res program :=*)
+ (*transform_partial_program transf_fundef p.*)
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
new file mode 100644
index 00000000..2107ce22
--- /dev/null
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -0,0 +1,1029 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open Asmvliw
+open Asmblock
+open Printf
+open Camlcoq
+open InstructionScheduler
+open TargetPrinter.Target
+
+let debug = false
+
+(**
+ * Extracting infos from Asmvliw instructions
+ *)
+
+type immediate = I32 of Integers.Int.int | I64 of Integers.Int64.int | Off of offset
+
+type location = Reg of preg | Mem
+
+type real_instruction =
+ (* ALU *)
+ | Addw | Andw | Compw | Mulw | Orw | Sbfw | Sbfxw | Sraw | Srlw | Sllw | Srsw | Rorw | Xorw
+ | Addd | Andd | Compd | Muld | Ord | Sbfd | Sbfxd | Srad | Srld | Slld | Srsd | Xord
+ | Nandw | Norw | Nxorw | Nandd | Nord | Nxord | Andnw | Ornw | Andnd | Ornd
+ | Maddw | Maddd | Msbfw | Msbfd | Cmoved
+ | Make | Nop | Extfz | Extfs | Insf
+ | Addxw | Addxd
+ (* LSU *)
+ | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo
+ | Sb | Sh | Sw | Sd | Sq | So
+ (* BCU *)
+ | Icall | Call | Cb | Igoto | Goto | Ret | Get | Set
+ (* FPU *)
+ | Fabsd | Fabsw | Fnegw | Fnegd
+ | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw
+ | Fmind | Fminw | Fmaxd | Fmaxw | Finvw
+ | Ffmaw | Ffmad | Ffmsw | Ffmsd
+ | Fnarrowdw | Fwidenlwd | Floatwz | Floatuwz | Floatdz | Floatudz | Fixedwz | Fixeduwz | Fixeddz | Fixedudz
+ | Fcompw | Fcompd
+
+type ab_inst_rec = {
+ inst: real_instruction;
+ write_locs : location list;
+ read_locs : location list;
+ read_at_id : location list; (* Must be contained in read_locs *)
+ read_at_e1 : location list; (* idem *)
+ imm : immediate option;
+ is_control : bool;
+}
+
+(** Asmvliw constructor to real instructions *)
+
+exception OpaqueInstruction
+
+let arith_rr_real = function
+ | Pcvtl2w -> Addw
+ | Pmv -> Addd
+ | Pnegw -> Sbfw
+ | Pnegl -> Sbfd
+ | Psxwd -> Extfs
+ | Pzxwd -> Extfz
+ | Pextfz(_,_) -> Extfz
+ | Pextfs(_,_) -> Extfs
+ | Pextfzl(_,_) -> Extfz
+ | Pextfsl(_,_) -> Extfs
+ | Pfabsw -> Fabsw
+ | Pfabsd -> Fabsd
+ | Pfnegw -> Fnegw
+ | Pfnegd -> Fnegd
+ | Pfinvw -> Finvw
+ | Pfnarrowdw -> Fnarrowdw
+ | Pfwidenlwd -> Fwidenlwd
+ | Pfloatwrnsz -> Floatwz
+ | Pfloatuwrnsz -> Floatuwz
+ | Pfloatudrnsz -> Floatudz
+ | Pfloatdrnsz -> Floatdz
+ | Pfixedwrzz -> Fixedwz
+ | Pfixeduwrzz -> Fixeduwz
+ | Pfixeddrzz -> Fixeddz
+ | Pfixedudrzz -> Fixedudz
+ | Pfixeddrzz_i32 -> Fixeddz
+ | Pfixedudrzz_i32 -> Fixedudz
+
+let arith_rrr_real = function
+ | Pcompw it -> Compw
+ | Pcompl it -> Compd
+ | Pfcompw ft -> Fcompw
+ | Pfcompl ft -> Fcompd
+ | Paddw -> Addw
+ | Paddxw _ -> Addxw
+ | Psubw -> Sbfw
+ | Prevsubxw _ -> Sbfxw
+ | Pmulw -> Mulw
+ | Pandw -> Andw
+ | Pnandw -> Nandw
+ | Porw -> Orw
+ | Pnorw -> Norw
+ | Pxorw -> Xorw
+ | Pnxorw -> Nxorw
+ | Pandnw -> Andnw
+ | Pornw -> Ornw
+ | Psraw -> Sraw
+ | Psrlw -> Srlw
+ | Psrxw -> Srsw
+ | Psllw -> Sllw
+ | Paddl -> Addd
+ | Paddxl _ -> Addxd
+ | Psubl -> Sbfd
+ | Prevsubxl _ -> Sbfxd
+ | Pandl -> Andd
+ | Pnandl -> Nandd
+ | Porl -> Ord
+ | Pnorl -> Nord
+ | Pxorl -> Xord
+ | Pnxorl -> Nxord
+ | Pandnl -> Andnd
+ | Pornl -> Ornd
+ | Pmull -> Muld
+ | Pslll -> Slld
+ | Psrll -> Srld
+ | Psrxl -> Srsd
+ | Psral -> Srad
+ | Pfaddd -> Faddd
+ | Pfaddw -> Faddw
+ | Pfsbfd -> Fsbfd
+ | Pfsbfw -> Fsbfw
+ | Pfmuld -> Fmuld
+ | Pfmulw -> Fmulw
+ | Pfmind -> Fmind
+ | Pfminw -> Fminw
+ | Pfmaxd -> Fmaxd
+ | Pfmaxw -> Fmaxw
+
+let arith_rri32_real = function
+ | Pcompiw it -> Compw
+ | Paddiw -> Addw
+ | Paddxiw _ -> Addxw
+ | Prevsubiw -> Sbfw
+ | Prevsubxiw _ -> Sbfxw
+ | Pmuliw -> Mulw
+ | Pandiw -> Andw
+ | Pnandiw -> Nandw
+ | Poriw -> Orw
+ | Pnoriw -> Norw
+ | Pxoriw -> Xorw
+ | Pnxoriw -> Nxorw
+ | Pandniw -> Andnw
+ | Porniw -> Ornw
+ | Psraiw -> Sraw
+ | Psrxiw -> Srsw
+ | Psrliw -> Srlw
+ | Pslliw -> Sllw
+ | Proriw -> Rorw
+ | Psllil -> Slld
+ | Psrlil -> Srld
+ | Psrail -> Srad
+ | Psrxil -> Srsd
+
+let arith_rri64_real = function
+ | Pcompil it -> Compd
+ | Paddil -> Addd
+ | Prevsubil -> Sbfd
+ | Paddxil _ -> Addxd
+ | Prevsubxil _ -> Sbfxd
+ | Pmulil -> Muld
+ | Pandil -> Andd
+ | Pnandil -> Nandd
+ | Poril -> Ord
+ | Pnoril -> Nord
+ | Pxoril -> Xord
+ | Pnxoril -> Nxord
+ | Pandnil -> Andnd
+ | Pornil -> Ornd
+
+
+let arith_arr_real = function
+ | Pinsf (_, _) -> Insf
+ | Pinsfl (_, _) -> Insf
+
+let arith_arrr_real = function
+ | Pfmaddfw -> Ffmaw
+ | Pfmaddfl -> Ffmad
+ | Pfmsubfw -> Ffmsw
+ | Pfmsubfl -> Ffmsd
+ | Pmaddw -> Maddw
+ | Pmaddl -> Maddd
+ | Pmsubw -> Msbfw
+ | Pmsubl -> Msbfd
+ | Pcmove _ -> Cmoved
+ | Pcmoveu _ -> Cmoved
+
+let arith_arri32_real = function
+ | Pmaddiw -> Maddw
+ | Pcmoveiw _ -> Cmoved
+ | Pcmoveuiw _ -> Cmoved
+
+let arith_arri64_real = function
+ | Pmaddil -> Maddd
+ | Pcmoveil _ -> Cmoved
+ | Pcmoveuil _ -> Cmoved
+
+let arith_ri32_real = Make
+
+let arith_ri64_real = Make
+
+let arith_rf32_real = Make
+
+let arith_rf64_real = Make
+
+let store_real = function
+ | Psb -> Sb
+ | Psh -> Sh
+ | Psw -> Sw
+ | Psw_a -> Sw
+ | Psd -> Sd
+ | Psd_a -> Sd
+ | Pfss -> Sw
+ | Pfsd -> Sd
+
+let load_real = function
+ | Plb -> Lbs
+ | Plbu -> Lbz
+ | Plh -> Lhs
+ | Plhu -> Lhz
+ | Plw -> Lws
+ | Plw_a -> Lws
+ | Pld -> Ld
+ | Pld_a -> Ld
+ | Pfls -> Lws
+ | Pfld -> Ld
+
+let set_real = Set
+let get_real = Get
+let nop_real = Nop
+let loadsymbol_real = Make
+let loadqrro_real = Lq
+let loadorro_real = Lo
+let storeqrro_real = Sq
+let storeorro_real = So
+
+let ret_real = Ret
+let call_real = Call
+let icall_real = Icall
+let goto_real = Goto
+let igoto_real = Igoto
+let jl_real = Goto
+let cb_real = Cb
+let cbu_real = Cb
+
+let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let arith_arri32_rec i rd rs imm32 =
+ let rae1 = match i with Pmaddiw -> [Reg rd] | _ -> []
+ in { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false;
+ read_at_id = [] ; read_at_e1 = rae1 }
+
+let arith_arri64_rec i rd rs imm64 =
+ let rae1 = match i with Pmaddil -> [Reg rd] | _ -> []
+ in { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false;
+ read_at_id = []; read_at_e1 = rae1 }
+
+let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let arith_arrr_rec i rd rs1 rs2 =
+ let rae1 = match i with Pmaddl | Pmaddw | Pmsubl | Pmsubw -> [Reg rd] | _ -> []
+ in { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = rae1 }
+
+let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let arith_r_rec i rd = match i with
+ (* For Ploadsymbol, writing the highest integer since we do not know how many bits does a symbol have *)
+ | Ploadsymbol (id, ofs) -> { inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed);
+ is_control = false; read_at_id = []; read_at_e1 = [] }
+
+let arith_rec i =
+ match i with
+ | PArithRRI32 (i, rd, rs, imm32) -> arith_rri32_rec i (IR rd) (IR rs) (Some (I32 imm32))
+ | PArithRRI64 (i, rd, rs, imm64) -> arith_rri64_rec i (IR rd) (IR rs) (Some (I64 imm64))
+ | PArithRRR (i, rd, rs1, rs2) -> arith_rrr_rec i (IR rd) (IR rs1) (IR rs2)
+ | PArithARR (i, rd, rs) -> arith_arr_rec i (IR rd) (IR rs)
+ (* Seems like single constant constructor types are elided *)
+ | PArithARRI32 (i, rd, rs, imm32) -> arith_arri32_rec i (IR rd) (IR rs) (Some (I32 imm32))
+ | PArithARRI64 (i, rd, rs, imm64) -> arith_arri64_rec i (IR rd) (IR rs) (Some (I64 imm64))
+ | PArithARRR (i, rd, rs1, rs2) -> arith_arrr_rec i (IR rd) (IR rs1) (IR rs2)
+ | PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+ | PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+ | PArithRF32 (rd, f) -> { inst = arith_rf32_real; write_locs = [Reg (IR rd)]; read_locs = [];
+ imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}
+ | PArithRF64 (rd, f) -> { inst = arith_rf64_real; write_locs = [Reg (IR rd)]; read_locs = [];
+ imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}
+ | PArithRR (i, rd, rs) -> arith_rr_rec i (IR rd) (IR rs)
+ | PArithR (i, rd) -> arith_r_rec i (IR rd)
+
+let load_rec i = match i with
+ | PLoadRRO (trap, i, rs1, rs2, imm) ->
+ { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+ | PLoadQRRO(rs, ra, imm) ->
+ let (rs0, rs1) = gpreg_q_expand rs in
+ { inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+ | PLoadORRO(rs, ra, imm) ->
+ let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
+ { inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)];
+ imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []}
+ | PLoadRRR (trap, i, rs1, rs2, rs3) | PLoadRRRXS (trap, i, rs1, rs2, rs3) ->
+ { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let store_rec i = match i with
+ | PStoreRRO (i, rs, ra, imm) ->
+ { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra)]; imm = (Some (Off imm));
+ read_at_id = []; read_at_e1 = [Reg (IR rs)] ; is_control = false}
+ | PStoreQRRO (rs, ra, imm) ->
+ let (rs0, rs1) = gpreg_q_expand rs in
+ { inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm));
+ read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1)] ; is_control = false}
+ | PStoreORRO (rs, ra, imm) ->
+ let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
+ { inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)];
+ imm = (Some (Off imm)); read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; is_control = false}
+ | PStoreRRR (i, rs, ra1, ra2) | PStoreRRRXS (i, rs, ra1, ra2) ->
+ { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra1); Reg (IR ra2)]; imm = None;
+ read_at_id = []; read_at_e1 = [Reg (IR rs)]; is_control = false}
+
+let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+
+let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false;
+ read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
+
+let basic_rec i =
+ match i with
+ | PArith i -> arith_rec i
+ | PLoad i -> load_rec i
+ | PStore i -> store_rec i
+ | Pallocframe (_, _) -> raise OpaqueInstruction
+ | Pfreeframe (_, _) -> raise OpaqueInstruction
+ | Pget (rd, rs) -> get_rec rd rs
+ | Pset (rd, rs) -> set_rec rd rs
+ | Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false; read_at_id = []; read_at_e1 = []}
+
+let expand_rec = function
+ | Pbuiltin _ -> raise OpaqueInstruction
+
+let ctl_flow_rec = function
+ | Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true; read_at_id = [Reg RA]; read_at_e1 = []}
+ | Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+ | Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true;
+ read_at_id = [Reg (IR r)]; read_at_e1 = [] }
+ | Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+ | Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true;
+ read_at_id = [Reg (IR r)]; read_at_e1 = [] }
+ | Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+ | Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;
+ read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
+ | Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;
+ read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
+ | Pjumptable (r, _) -> raise OpaqueInstruction (* { inst = "Pjumptable"; write_locs = [Reg (IR GPR62); Reg (IR GPR63)]; read_locs = [Reg (IR r)]; imm = None ; is_control = true} *)
+
+let control_rec i =
+ match i with
+ | PExpand i -> expand_rec i
+ | PCtlFlow i -> ctl_flow_rec i
+
+let rec basic_recs body = match body with
+ | [] -> []
+ | bi :: body -> (basic_rec bi) :: (basic_recs body)
+
+let exit_rec exit = match exit with
+ | None -> []
+ | Some ex -> [control_rec ex]
+
+let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit)
+
+(**
+ * Providing informations relative to the real instructions
+ *)
+
+(** Abstraction providing all the necessary informations for solving the scheduling problem *)
+type inst_info = {
+ write_locs : location list;
+ read_locs : location list;
+ reads_at_id : bool;
+ reads_at_e1 : bool;
+ is_control : bool;
+ usage: int array; (* resources consumed by the instruction *)
+ latency: int;
+}
+
+(** Figuring out whether an immediate is s10, u27l10 or e27u27l10 *)
+type imm_encoding = U6 | S10 | U27L5 | U27L10 | E27U27L10
+
+let rec pow a = function
+ | 0 -> Int64.one
+ | 1 -> Int64.of_int a
+ | n -> let b = pow a (n/2) in
+ Int64.mul b (Int64.mul b (if n mod 2 = 0 then Int64.one else Int64.of_int a))
+
+let signed_interval n : (int64 * int64) = begin
+ assert (n > 0);
+ let min = Int64.neg @@ pow 2 (n-1)
+ and max = Int64.sub (pow 2 (n-1)) Int64.one
+ in (min, max)
+end
+
+let within i interv = match interv with (min, max) -> (i >= min && i <= max)
+
+let signed_length (i:int64) =
+ let rec f (i:int64) n =
+ let interv = signed_interval n
+ in if (within i interv) then n else f i (n+1)
+ in f i 1
+
+let unsigned_length (i:int64) = (signed_length i) - 1
+
+let encode_imm (imm:int64) =
+ if (Int64.compare imm Int64.zero < 0) then
+ let length = signed_length imm
+ in if length <= 10 then S10
+ else if length <= 32 then U27L5
+ else if length <= 37 then U27L10
+ else if length <= 64 then E27U27L10
+ else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm
+ else
+ let length = unsigned_length imm
+ in if length <= 6 then U6
+ else if length <= 9 then S10 (* Special case for S10 - stay signed no matter what *)
+ else if length <= 32 then U27L5
+ else if length <= 37 then U27L10
+ else if length <= 64 then E27U27L10
+ else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm
+
+(** Resources *)
+type rname = Rissue | Rtiny | Rlite | Rfull | Rlsu | Rmau | Rbcu | Rtca | Rauxr | Rauxw | Rcrrp | Rcrwl | Rcrwh | Rnop
+
+let resource_names = [Rissue; Rtiny; Rlite; Rfull; Rlsu; Rmau; Rbcu; Rtca; Rauxr; Rauxw; Rcrrp; Rcrwl; Rcrwh; Rnop]
+
+let rec find_index elt l =
+ match l with
+ | [] -> raise Not_found
+ | e::l -> if (e == elt) then 0
+ else 1 + find_index elt l
+
+let resource_id resource : int = find_index resource resource_names
+
+let resource_bound resource : int =
+ match resource with
+ | Rissue -> 8
+ | Rtiny -> 4
+ | Rlite -> 2
+ | Rfull -> 1
+ | Rlsu -> 1
+ | Rmau -> 1
+ | Rbcu -> 1
+ | Rtca -> 1
+ | Rauxr -> 1
+ | Rauxw -> 1
+ | Rcrrp -> 1
+ | Rcrwl -> 1
+ | Rcrwh -> 1
+ | Rnop -> 4
+
+let resource_bounds : int array = Array.of_list (List.map resource_bound resource_names)
+
+(** Reservation tables *)
+let alu_full : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | Rfull -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_lite : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_lite_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | Rlite -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_lite_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | Rlite -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_nop : int array = let resmap = fun r -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_tiny : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_tiny_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let alu_tiny_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let bcu : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rbcu -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let bcu_tiny_tiny_mau_xnop : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 2 | Rmau -> 1 | Rbcu -> 1 | Rnop -> 4 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxr : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxr_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxr_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxw : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxw_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let lsu_auxw_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau_auxr : int array = let resmap = fun r -> match r with
+ | Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau_auxr_x : int array = let resmap = fun r -> match r with
+ | Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+let mau_auxr_y : int array = let resmap = fun r -> match r with
+ | Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0
+ in Array.of_list (List.map resmap resource_names)
+
+(** Real instructions *)
+
+exception InvalidEncoding
+
+let rec_to_usage r =
+ let encoding = match r.imm with None -> None | Some (I32 i) | Some (I64 i) -> Some (encode_imm @@ Z.to_int64 i)
+ | Some (Off ptr) -> Some (encode_imm @@ camlint64_of_ptrofs ptr)
+
+ in match r.inst with
+ | Addw | Andw | Nandw | Orw | Norw | Sbfw | Xorw
+ | Nxorw | Andnw | Ornw ->
+ (match encoding with None | Some U6 | Some S10 -> alu_tiny
+ | Some U27L5 | Some U27L10 -> alu_tiny_x
+ | _ -> raise InvalidEncoding)
+ | Sbfxw | Sbfxd ->
+ (match encoding with None -> alu_lite
+ | Some U6 | Some S10 | Some U27L5 -> alu_lite_x
+ | _ -> raise InvalidEncoding)
+ | Addd | Andd | Nandd | Ord | Nord | Sbfd | Xord
+ | Nxord | Andnd | Ornd ->
+ (match encoding with None | Some U6 | Some S10 -> alu_tiny
+ | Some U27L5 | Some U27L10 -> alu_tiny_x
+ | Some E27U27L10 -> alu_tiny_y)
+ |Cmoved ->
+ (match encoding with None | Some U6 | Some S10 -> alu_lite
+ | Some U27L5 | Some U27L10 -> alu_lite_x
+ | Some E27U27L10 -> alu_lite_y)
+ | Addxw ->
+ (match encoding with None | Some U6 | Some S10 -> alu_lite
+ | Some U27L5 | Some U27L10 -> alu_lite_x
+ | _ -> raise InvalidEncoding)
+ | Addxd ->
+ (match encoding with None | Some U6 | Some S10 -> alu_lite
+ | Some U27L5 | Some U27L10 -> alu_lite_x
+ | Some E27U27L10 -> alu_lite_y)
+ | Compw -> (match encoding with None -> alu_tiny
+ | Some U6 | Some S10 | Some U27L5 -> alu_tiny_x
+ | _ -> raise InvalidEncoding)
+ | Compd -> (match encoding with None | Some U6 | Some S10 -> alu_tiny
+ | Some U27L5 | Some U27L10 -> alu_tiny_x
+ | Some E27U27L10 -> alu_tiny_y)
+ | Fcompw -> (match encoding with None -> alu_lite
+ | Some U6 | Some S10 | Some U27L5 -> alu_lite_x
+ | _ -> raise InvalidEncoding)
+ | Fcompd -> (match encoding with None -> alu_lite
+ | Some U6 | Some S10 | Some U27L5 -> alu_lite_x
+ | _ -> raise InvalidEncoding)
+ | Make -> (match encoding with Some U6 | Some S10 -> alu_tiny
+ | Some U27L5 | Some U27L10 -> alu_tiny_x
+ | Some E27U27L10 -> alu_tiny_y
+ | _ -> raise InvalidEncoding)
+ | Maddw | Msbfw -> (match encoding with None -> mau_auxr
+ | Some U6 | Some S10 | Some U27L5 -> mau_auxr_x
+ | _ -> raise InvalidEncoding)
+ | Maddd | Msbfd -> (match encoding with None | Some U6 | Some S10 -> mau_auxr
+ | Some U27L5 | Some U27L10 -> mau_auxr_x
+ | Some E27U27L10 -> mau_auxr_y)
+ | Mulw -> (match encoding with None -> mau
+ | Some U6 | Some S10 | Some U27L5 -> mau_x
+ | _ -> raise InvalidEncoding)
+ | Muld -> (match encoding with None | Some U6 | Some S10 -> mau
+ | Some U27L5 | Some U27L10 -> mau_x
+ | Some E27U27L10 -> mau_y)
+ | Nop -> alu_nop
+ | Sraw | Srlw | Sllw | Srad | Srld | Slld -> (match encoding with None | Some U6 -> alu_tiny | _ -> raise InvalidEncoding)
+ (* TODO: check *)
+ | Srsw | Srsd | Rorw -> (match encoding with None | Some U6 -> alu_lite | _ -> raise InvalidEncoding)
+ | Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding)
+ | Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau
+ | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo ->
+ (match encoding with None | Some U6 | Some S10 -> lsu_auxw
+ | Some U27L5 | Some U27L10 -> lsu_auxw_x
+ | Some E27U27L10 -> lsu_auxw_y)
+ | Sb | Sh | Sw | Sd | Sq | So ->
+ (match encoding with None | Some U6 | Some S10 -> lsu_auxr
+ | Some U27L5 | Some U27L10 -> lsu_auxr_x
+ | Some E27U27L10 -> lsu_auxr_y)
+ | Icall | Call | Cb | Igoto | Goto | Ret | Set -> bcu
+ | Get -> bcu_tiny_tiny_mau_xnop
+ | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd
+ | Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite
+ | Fnarrowdw -> alu_full
+ | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw
+ | Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau
+
+
+let inst_info_to_dlatency i =
+ begin
+ assert (not (i.reads_at_id && i.reads_at_e1));
+ match i.reads_at_id with
+ | true -> +1
+ | false -> (match i.reads_at_e1 with
+ | true -> -1
+ | false -> 0)
+ end
+
+let real_inst_to_latency = function
+ | Nop -> 0 (* Only goes through ID *)
+ | Addw | Andw | Compw | Orw | Sbfw | Sbfxw | Sraw | Srsw | Srlw | Sllw | Xorw
+ (* TODO check rorw *)
+ | Rorw | Nandw | Norw | Nxorw | Ornw | Andnw
+ | Nandd | Nord | Nxord | Ornd | Andnd
+ | Addd | Andd | Compd | Ord | Sbfd | Sbfxd | Srad | Srsd | Srld | Slld | Xord | Make
+ | Extfs | Extfz | Insf | Fcompw | Fcompd | Cmoved | Addxw | Addxd
+ | Fmind | Fmaxd | Fminw | Fmaxw
+ -> 1
+ | Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4
+ | Mulw | Muld | Maddw | Maddd | Msbfw | Msbfd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *)
+ | Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> 3
+ | Sb | Sh | Sw | Sd | Sq | So -> 1 (* See kvx-Optimization.pdf page 19 *)
+ | Get -> 1
+ | Set -> 4 (* According to the manual should be 3, but I measured 4 *)
+ | Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *)
+ | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1
+ | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw
+ | Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4
+
+let rec empty_inter la = function
+ | [] -> true
+ | b::lb -> if (List.mem b la) then false else empty_inter la lb
+
+let rec_to_info r : inst_info =
+ let usage = rec_to_usage r
+ and latency = real_inst_to_latency r.inst
+ and reads_at_id = not (empty_inter r.read_locs r.read_at_id)
+ and reads_at_e1 = not (empty_inter r.read_locs r.read_at_e1)
+ in { write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control;
+ reads_at_id = reads_at_id; reads_at_e1 = reads_at_e1 }
+
+let instruction_infos bb = List.map rec_to_info (instruction_recs bb)
+
+let instruction_usages bb =
+ let usages = List.map (fun info -> info.usage) (instruction_infos bb)
+ in Array.of_list usages
+
+(**
+ * Latency constraints building
+ *)
+
+(* type access = { inst: int; loc: location } *)
+
+let preg2int pr = Camlcoq.P.to_int @@ Asmblockdeps.ppos pr
+
+let loc2int = function
+ | Mem -> 1
+ | Reg pr -> preg2int pr
+
+(* module HashedLoc = struct
+ type t = { loc: location; key: int }
+ let equal l1 l2 = (l1.key = l2.key)
+ let hash l = l.key
+ let create (l:location) : t = { loc=l; key = loc2int l }
+end *)
+
+(* module LocHash = Hashtbl.Make(HashedLoc) *)
+module LocHash = Hashtbl
+
+(* Hash table : location => list of instruction ids *)
+
+let rec intlist n =
+ if n < 0 then failwith "intlist: n < 0"
+ else if n = 0 then []
+ else (n-1) :: (intlist (n-1))
+
+let find_in_hash hashloc loc =
+ match LocHash.find_opt hashloc loc with
+ | Some idl -> idl
+ | None -> []
+
+(* Returns a list of instruction ids *)
+let rec get_accesses hashloc (ll: location list) = match ll with
+ | [] -> []
+ | loc :: llocs -> (find_in_hash hashloc loc) @ (get_accesses hashloc llocs)
+
+let compute_latency (ifrom: inst_info) (ito: inst_info) =
+ let dlat = inst_info_to_dlatency ito
+ in let lat = ifrom.latency + dlat
+ in assert (lat >= 0); if (lat == 0) then 1 else lat
+
+let latency_constraints bb =
+ let written = LocHash.create 70
+ and read = LocHash.create 70
+ and count = ref 0
+ and constraints = ref []
+ and instr_infos = instruction_infos bb
+ in let step (i: inst_info) =
+ let raw = get_accesses written i.read_locs
+ and waw = get_accesses written i.write_locs
+ and war = get_accesses read i.write_locs
+ in begin
+ List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;
+ latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) raw;
+ List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;
+ latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) waw;
+ List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = 0} :: !constraints) war;
+ if i.is_control then List.iter (fun n -> constraints := {instr_from = n; instr_to = !count; latency = 0} :: !constraints) (intlist !count);
+ (* Updating "read" and "written" hashmaps *)
+ List.iter (fun loc ->
+ begin
+ LocHash.replace written loc [!count];
+ LocHash.replace read loc []; (* Clearing all the entries of "read" hashmap when a register is written *)
+ end) i.write_locs;
+ List.iter (fun loc -> LocHash.replace read loc ((!count) :: (find_in_hash read loc))) i.read_locs;
+ count := !count + 1
+ end
+ in (List.iter step instr_infos; !constraints)
+
+(**
+ * Using the InstructionScheduler
+ *)
+
+let build_problem bb =
+ { max_latency = -1; resource_bounds = resource_bounds;
+ instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb }
+
+let rec find_min_opt (l: int option list) =
+ match l with
+ | [] -> None
+ | e :: l ->
+ begin match find_min_opt l with
+ | None -> e
+ | Some m ->
+ begin match e with
+ | None -> Some m
+ | Some n -> if n < m then Some n else Some m
+ end
+ end
+
+let rec filter_indexes predicate = function
+ | [] -> []
+ | e :: l -> if (predicate e) then e :: (filter_indexes predicate l) else filter_indexes predicate l
+
+let get_from_indexes indexes l = List.map (List.nth l) indexes
+
+let is_basic = function PBasic _ -> true | _ -> false
+let is_control = function PControl _ -> true | _ -> false
+let to_basic = function PBasic i -> i | _ -> failwith "to_basic: control instruction found"
+let to_control = function PControl i -> i | _ -> failwith "to_control: basic instruction found"
+
+let bundlize li hd =
+ let last = List.nth li (List.length li - 1)
+ in if is_control last then
+ let cut_li = Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1)
+ in let bli = List.map to_basic cut_li
+ in { header = hd; body = bli; exit = Some (to_control last) }
+ else
+ let bli = List.map to_basic li
+ in { header = hd; body = bli; exit = None }
+
+let apply_pbasic b = PBasic b
+let extract_some o = match o with Some e -> e | None -> failwith "extract_some: None found"
+
+let rec find_min = function
+ | [] -> None
+ | e :: l ->
+ match find_min l with
+ | None -> Some e
+ | Some m -> if (e < m) then Some e else Some m
+
+let rec remove_all m = function
+ | [] -> []
+ | e :: l -> if m=e then remove_all m l
+ else e :: (remove_all m l)
+
+let rec find_mins l = match find_min l with
+ | None -> []
+ | Some m -> m :: find_mins (remove_all m l)
+
+let find_all_indices m l =
+ let rec find m off = function
+ | [] -> []
+ | e :: l -> if m=e then off :: find m (off+1) l
+ else find m (off+1) l
+ in find m 0 l
+
+module TimeHash = Hashtbl
+
+(* Hash table : time => list of instruction ids *)
+
+let hashtbl2list h maxint =
+ let rec f i = match TimeHash.find_opt h i with
+ | None -> if (i > maxint) then [] else (f (i+1))
+ | Some bund -> bund :: (f (i+1))
+ in f 0
+
+let find_max l =
+ let rec f = function
+ | [] -> None
+ | e :: l -> match f l with
+ | None -> Some e
+ | Some m -> if (e > m) then Some e else Some m
+ in match (f l) with
+ | None -> raise Not_found
+ | Some m -> m
+
+(* [0, 2, 3, 1, 1, 2, 4, 5] -> [[0], [3, 4], [1, 5], [2], [6], [7]] *)
+let minpack_list (l: int list) =
+ let timehash = TimeHash.create (List.length l)
+ in let rec f i = function
+ | [] -> ()
+ | t::l -> begin
+ (match TimeHash.find_opt timehash t with
+ | None -> TimeHash.add timehash t [i]
+ | Some bund -> TimeHash.replace timehash t (bund @ [i]));
+ f (i+1) l
+ end
+ in begin
+ f 0 l;
+ hashtbl2list timehash (find_max l)
+ end;;
+
+(* let minpack_list l =
+ let mins = find_mins l
+ in List.map (fun m -> find_all_indices m l) mins
+ *)
+
+let bb_to_instrs bb = (List.map apply_pbasic bb.body) @ (match bb.exit with None -> [] | Some e -> [PControl e])
+
+let bundlize_solution bb sol =
+ let tmp = (Array.to_list @@ Array.sub sol 0 (Array.length sol - 1))
+ in let packs = minpack_list tmp
+ and instrs = bb_to_instrs bb
+ in let rec bund hd = function
+ | [] -> []
+ | pack :: packs -> bundlize (get_from_indexes pack instrs) hd :: (bund [] packs)
+ in bund bb.header packs
+
+let print_inst oc = function
+ | Asm.Pallocframe(sz, ofs) -> fprintf oc " Pallocframe\n"
+ | Asm.Pfreeframe(sz, ofs) -> fprintf oc " Pfreeframe\n"
+ | Asm.Pbuiltin(ef, args, res) -> fprintf oc " Pbuiltin\n"
+ | Asm.Pcvtl2w(rd, rs) -> fprintf oc " Pcvtl2w %a = %a\n" ireg rd ireg rs
+ | i -> print_instruction oc i
+
+let print_bb oc bb =
+ let asm_instructions = Asm.unfold_bblock bb
+ in List.iter (print_inst oc) asm_instructions
+
+let print_schedule sched =
+ print_string "[ ";
+ Array.iter (fun x -> Printf.printf "%d; " x) sched;
+ print_endline "]";;
+
+let do_schedule bb =
+ let problem = build_problem bb in
+ (if debug then print_problem stdout problem);
+ let solution = scheduler_by_name (!Clflags.option_fpostpass_sched) problem
+ in match solution with
+ | None -> failwith "Could not find a valid schedule"
+ | Some sol ->
+ ((if debug then print_schedule sol);
+ let bundles = bundlize_solution bb sol in
+ (if debug then
+ begin
+ Printf.eprintf "Scheduling the following group of instructions:\n";
+ print_bb stderr bb;
+ Printf.eprintf "Gave the following solution:\n";
+ List.iter (print_bb stderr) bundles;
+ Printf.eprintf "--------------------------------\n"
+ end;
+ bundles))
+
+(**
+ * Dumb schedule if the above doesn't work
+ *)
+
+let bundlize_label l =
+ match l with
+ | [] -> []
+ | l -> [{ header = l; body = []; exit = None }]
+
+let rec bundlize_basic l =
+ match l with
+ | [] -> []
+ | b :: l -> { header = []; body = [b]; exit = None } :: bundlize_basic l
+
+let bundlize_exit e =
+ match e with
+ | Some e -> [{ header = []; body = []; exit = Some e }]
+ | None -> []
+
+let dumb_schedule (bb : bblock) : bblock list = bundlize_label bb.header @ bundlize_basic bb.body @ bundlize_exit bb.exit
+
+(**
+ * Separates the opaque instructions such as Pfreeframe and Pallocframe
+ *)
+
+let is_opaque = function
+ | PBasic (Pallocframe _) | PBasic (Pfreeframe _) | PControl (PExpand (Pbuiltin _)) -> true
+ | _ -> false
+
+(* Returns : (accumulated instructions, remaining instructions, opaque instruction if found) *)
+let rec biggest_wo_opaque = function
+ | [] -> ([], [], None)
+ | i :: li -> if is_opaque i then ([], li, Some i)
+ else let big, rem, opaque = biggest_wo_opaque li in (i :: big, rem, opaque);;
+
+let separate_opaque bb =
+ let instrs = bb_to_instrs bb
+ in let rec f hd li =
+ match li with
+ | [] -> []
+ | li -> let big, rem, opaque = biggest_wo_opaque li in
+ match opaque with
+ | Some i ->
+ (match big with
+ | [] -> (bundlize [i] hd) :: (f [] rem)
+ | big -> (bundlize big hd) :: (bundlize [i] []) :: (f [] rem)
+ )
+ | None -> (bundlize big hd) :: (f [] rem)
+ in f bb.header instrs
+
+let smart_schedule bb =
+ let lbb = separate_opaque bb
+ in let rec f = function
+ | [] -> []
+ | bb :: lbb ->
+ let bundles =
+ try do_schedule bb
+ with OpaqueInstruction -> dumb_schedule bb
+ | e ->
+ let msg = Printexc.to_string e
+ and stack = Printexc.get_backtrace ()
+ in begin
+ Printf.eprintf "In regards to this group of instructions:\n";
+ print_bb stderr bb;
+ Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg stack;
+ failwith "Invalid schedule"
+ (*
+ Printf.eprintf "Issuing one instruction per bundle instead\n\n";
+ dumb_schedule bb
+ *)
+ end
+ in bundles @ (f lbb)
+ in f lbb
+
+let bblock_to_bundles bb =
+ if debug then (eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"; print_bb stderr bb);
+ (* print_problem (build_problem bb); *)
+ if Compopts.optim_postpass () then smart_schedule bb else dumb_schedule bb
+
+(** To deal with the Coq Axiom schedule : bblock -> (list (list basic)) * option control *)
+
+let rec bundles_to_coq_schedule = function
+ | [] -> ([], None)
+ | bb :: [] -> ([bb.body], bb.exit)
+ | bb :: lbb -> let (llb, oc) = bundles_to_coq_schedule lbb in (bb.body :: llb, oc)
+
+(** Called schedule function from Coq *)
+
+let schedule_notime bb = let toto = bundles_to_coq_schedule @@ bblock_to_bundles bb in toto
+let schedule bb = Timing.time_coq ('P'::('o'::('s'::('t'::('p'::('a'::('s'::('s'::('S'::('c'::('h'::('e'::('d'::('u'::('l'::('i'::('n'::('g'::(' '::('o'::('r'::('a'::('c'::('l'::('e'::([])))))))))))))))))))))))))) schedule_notime bb
diff --git a/aarch64/PostpassSchedulingproof.v b/aarch64/PostpassSchedulingproof.v
new file mode 100644
index 00000000..3d08232b
--- /dev/null
+++ b/aarch64/PostpassSchedulingproof.v
@@ -0,0 +1,690 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Sylvain Boulmé Grenoble-INP, VERIMAG *)
+(* David Monniaux CNRS, VERIMAG *)
+(* Cyril Six Kalray *)
+(* *)
+(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+Require Import Coqlib Errors.
+Require Import Integers Floats AST Linking.
+Require Import Values Memory Events Globalenvs Smallstep.
+Require Import Op Locations Machblock Conventions Asmblock.
+Require Import Asmblockgenproof0 Asmblockprops.
+Require Import PostpassScheduling.
+Require Import Asmblockgenproof.
+Require Import Axioms.
+
+Local Open Scope error_monad_scope.
+
+(*
+Definition match_prog (p tp: Asmvliw.program) :=
+ match_program (fun _ f tf => transf_fundef f = OK tf) eq p tp.
+
+Lemma transf_program_match:
+ forall p tp, transf_program p = OK tp -> match_prog p tp.
+Proof.
+ intros. eapply match_transform_partial_program; eauto.
+Qed.
+
+Lemma regset_double_set_id:
+ forall r (rs: regset) v1 v2,
+ (rs # r <- v1 # r <- v2) = (rs # r <- v2).
+Proof.
+ intros. apply functional_extensionality. intros. destruct (preg_eq r x).
+ - subst r. repeat (rewrite Pregmap.gss; auto).
+ - repeat (rewrite Pregmap.gso); auto.
+Qed.
+
+Lemma exec_body_pc_var:
+ forall l ge rs m rs' m' v,
+ exec_body ge l rs m = Next rs' m' ->
+ exec_body ge l (rs # PC <- v) m = Next (rs' # PC <- v) m'.
+Proof.
+ induction l.
+ - intros. simpl. simpl in H. inv H. auto.
+ - intros. simpl in *.
+ destruct (exec_basic_instr ge a rs m) eqn:EXEBI; try discriminate.
+ erewrite exec_basic_instr_pc_var; eauto.
+Qed.
+
+Lemma pc_set_add:
+ forall rs v r x y,
+ 0 <= x <= Ptrofs.max_unsigned ->
+ 0 <= y <= Ptrofs.max_unsigned ->
+ rs # r <- (Val.offset_ptr v (Ptrofs.repr (x + y))) = rs # r <- (Val.offset_ptr (rs # r <- (Val.offset_ptr v (Ptrofs.repr x)) r) (Ptrofs.repr y)).
+Proof.
+ intros. apply functional_extensionality. intros r0. destruct (preg_eq r r0).
+ - subst. repeat (rewrite Pregmap.gss); auto.
+ destruct v; simpl; auto.
+ rewrite Ptrofs.add_assoc.
+ enough (Ptrofs.repr (x + y) = Ptrofs.add (Ptrofs.repr x) (Ptrofs.repr y)) as ->; auto.
+ unfold Ptrofs.add.
+ enough (x + y = Ptrofs.unsigned (Ptrofs.repr x) + Ptrofs.unsigned (Ptrofs.repr y)) as ->; auto.
+ repeat (rewrite Ptrofs.unsigned_repr); auto.
+ - repeat (rewrite Pregmap.gso; auto).
+Qed.
+
+Lemma concat2_straight:
+ forall a b bb rs m rs'' m'' f ge,
+ concat2 a b = OK bb ->
+ exec_bblock ge f bb rs m = Next rs'' m'' ->
+ exists rs' m',
+ exec_bblock ge f a rs m = Next rs' m'
+ /\ rs' PC = Val.offset_ptr (rs PC) (Ptrofs.repr (size a))
+ /\ exec_bblock ge f b rs' m' = Next rs'' m''.
+Proof.
+ intros until ge. intros CONC2 EXEB.
+ exploit concat2_zlt_size; eauto. intros (LTA & LTB).
+ exploit concat2_noexit; eauto. intros EXA.
+ exploit concat2_decomp; eauto. intros. inv H.
+ unfold exec_bblock in EXEB. destruct (exec_body ge (body bb) rs m) eqn:EXEB'; try discriminate.
+ rewrite H0 in EXEB'. apply exec_body_app in EXEB'. destruct EXEB' as (rs1 & m1 & EXEB1 & EXEB2).
+ eexists; eexists. split.
+ unfold exec_bblock. rewrite EXEB1. rewrite EXA. simpl. eauto.
+ split.
+ exploit exec_body_pc. eapply EXEB1. intros. rewrite <- H. auto.
+ unfold exec_bblock. unfold nextblock, incrPC. rewrite regset_same_assign. erewrite exec_body_pc_var; eauto.
+ rewrite <- H1. unfold nextblock in EXEB. rewrite regset_double_set_id.
+ assert (size bb = size a + size b).
+ { unfold size. rewrite H0. rewrite H1. rewrite app_length. rewrite EXA. simpl. rewrite Nat.add_0_r.
+ repeat (rewrite Nat2Z.inj_add). omega. }
+ clear EXA H0 H1. rewrite H in EXEB.
+ assert (rs1 PC = rs0 PC). { apply exec_body_pc in EXEB2. auto. }
+ rewrite H0. rewrite <- pc_set_add; auto.
+ exploit size_positive. instantiate (1 := a). intro. omega.
+ exploit size_positive. instantiate (1 := b). intro. omega.
+Qed.
+
+Lemma concat_all_exec_bblock (ge: Genv.t fundef unit) (f: function) :
+ forall a bb rs m lbb rs'' m'',
+ lbb <> nil ->
+ concat_all (a :: lbb) = OK bb ->
+ exec_bblock ge f bb rs m = Next rs'' m'' ->
+ exists bb' rs' m',
+ concat_all lbb = OK bb'
+ /\ exec_bblock ge f a rs m = Next rs' m'
+ /\ rs' PC = Val.offset_ptr (rs PC) (Ptrofs.repr (size a))
+ /\ exec_bblock ge f bb' rs' m' = Next rs'' m''.
+Proof.
+ intros until m''. intros Hnonil CONC EXEB.
+ simpl in CONC.
+ destruct lbb as [|b lbb]; try contradiction. clear Hnonil.
+ monadInv CONC. exploit concat2_straight; eauto. intros (rs' & m' & EXEB1 & PCeq & EXEB2).
+ exists x. repeat econstructor. all: eauto.
+Qed.
+
+Lemma ptrofs_add_repr :
+ forall a b,
+ Ptrofs.unsigned (Ptrofs.add (Ptrofs.repr a) (Ptrofs.repr b)) = Ptrofs.unsigned (Ptrofs.repr (a + b)).
+Proof.
+ intros a b.
+ rewrite Ptrofs.add_unsigned. repeat (rewrite Ptrofs.unsigned_repr_eq).
+ rewrite <- Zplus_mod. auto.
+Qed.
+
+Section PRESERVATION_ASMBLOCK.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma transf_function_no_overflow:
+ forall f tf,
+ transf_function f = OK tf -> size_blocks tf.(fn_blocks) <= Ptrofs.max_unsigned.
+Proof.
+ intros. monadInv H. destruct (zlt Ptrofs.max_unsigned (size_blocks x.(fn_blocks))); inv EQ0.
+ omega.
+Qed.
+
+Lemma symbols_preserved:
+ forall id,
+ Genv.find_symbol tge id = Genv.find_symbol ge id.
+Proof (Genv.find_symbol_match TRANSL).
+
+Lemma senv_preserved:
+ Senv.equiv ge tge.
+Proof (Genv.senv_match TRANSL).
+
+Lemma functions_translated:
+ forall v f,
+ Genv.find_funct ge v = Some f ->
+ exists tf,
+ Genv.find_funct tge v = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_transf_partial TRANSL).
+
+Lemma function_ptr_translated:
+ forall v f,
+ Genv.find_funct_ptr ge v = Some f ->
+ exists tf,
+ Genv.find_funct_ptr tge v = Some tf /\ transf_fundef f = OK tf.
+Proof (Genv.find_funct_ptr_transf_partial TRANSL).
+
+Lemma functions_transl:
+ forall fb f tf,
+ Genv.find_funct_ptr ge fb = Some (Internal f) ->
+ transf_function f = OK tf ->
+ Genv.find_funct_ptr tge fb = Some (Internal tf).
+Proof.
+ intros. exploit function_ptr_translated; eauto.
+ intros (tf' & A & B). monadInv B. rewrite H0 in EQ. inv EQ. auto.
+Qed.
+
+Inductive match_states: state -> state -> Prop :=
+ | match_states_intro:
+ forall s1 s2, s1 = s2 -> match_states s1 s2.
+
+Lemma prog_main_preserved:
+ prog_main tprog = prog_main prog.
+Proof (match_program_main TRANSL).
+
+Lemma prog_main_address_preserved:
+ (Genv.symbol_address (Genv.globalenv prog) (prog_main prog) Ptrofs.zero) =
+ (Genv.symbol_address (Genv.globalenv tprog) (prog_main tprog) Ptrofs.zero).
+Proof.
+ unfold Genv.symbol_address. rewrite symbols_preserved.
+ rewrite prog_main_preserved. auto.
+Qed.
+
+Lemma transf_initial_states:
+ forall st1, initial_state prog st1 ->
+ exists st2, initial_state tprog st2 /\ match_states st1 st2.
+Proof.
+ intros. inv H.
+ econstructor; split.
+ - eapply initial_state_intro.
+ eapply (Genv.init_mem_transf_partial TRANSL); eauto.
+ - econstructor; eauto. subst ge0. subst rs0. rewrite prog_main_address_preserved. auto.
+Qed.
+
+Lemma transf_final_states:
+ forall st1 st2 r,
+ match_states st1 st2 -> final_state st1 r -> final_state st2 r.
+Proof.
+ intros. inv H0. inv H. econstructor; eauto.
+Qed.
+
+Lemma tail_find_bblock:
+ forall lbb pos bb,
+ find_bblock pos lbb = Some bb ->
+ exists c, code_tail pos lbb (bb::c).
+Proof.
+ induction lbb.
+ - intros. simpl in H. inv H.
+ - intros. simpl in H.
+ destruct (zlt pos 0); try (inv H; fail).
+ destruct (zeq pos 0).
+ + inv H. exists lbb. constructor; auto.
+ + apply IHlbb in H. destruct H as (c & TAIL). exists c.
+ enough (pos = pos - size a + size a) as ->.
+ apply code_tail_S; auto.
+ omega.
+Qed.
+
+Lemma code_tail_head_app:
+ forall l pos c1 c2,
+ code_tail pos c1 c2 ->
+ code_tail (pos + size_blocks l) (l++c1) c2.
+Proof.
+ induction l.
+ - intros. simpl. rewrite Z.add_0_r. auto.
+ - intros. apply IHl in H. simpl. rewrite (Z.add_comm (size a)). rewrite Z.add_assoc. apply code_tail_S. assumption.
+Qed.
+
+Lemma transf_blocks_verified:
+ forall c tc pos bb c',
+ transf_blocks c = OK tc ->
+ code_tail pos c (bb::c') ->
+ exists lbb,
+ verified_schedule bb = OK lbb
+ /\ exists tc', code_tail pos tc (lbb ++ tc').
+Proof.
+ induction c; intros.
+ - simpl in H. inv H. inv H0.
+ - inv H0.
+ + monadInv H. exists x0.
+ split; simpl; auto. eexists; eauto. econstructor; eauto.
+ + unfold transf_blocks in H. fold transf_blocks in H. monadInv H.
+ exploit IHc; eauto.
+ intros (lbb & TRANS & tc' & TAIL).
+(* monadInv TRANS. *)
+ repeat eexists; eauto.
+ erewrite verified_schedule_size; eauto.
+ apply code_tail_head_app.
+ eauto.
+Qed.
+
+Lemma transf_find_bblock:
+ forall ofs f bb tf,
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bb ->
+ transf_function f = OK tf ->
+ exists lbb,
+ verified_schedule bb = OK lbb
+ /\ exists c, code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (lbb ++ c).
+Proof.
+ intros.
+ monadInv H0. destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks x))); try (inv EQ0; fail). inv EQ0.
+ monadInv EQ. apply tail_find_bblock in H. destruct H as (c & TAIL).
+ eapply transf_blocks_verified; eauto.
+Qed.
+
+Lemma symbol_address_preserved:
+ forall l ofs, Genv.symbol_address ge l ofs = Genv.symbol_address tge l ofs.
+Proof.
+ intros. unfold Genv.symbol_address. repeat (rewrite symbols_preserved). reflexivity.
+Qed.
+
+Lemma head_tail {A: Type}:
+ forall (l: list A) hd, hd::l = hd :: (tail (hd::l)).
+Proof.
+ intros. simpl. auto.
+Qed.
+
+Lemma verified_schedule_not_empty:
+ forall bb lbb,
+ verified_schedule bb = OK lbb -> lbb <> nil.
+Proof.
+ intros. apply verified_schedule_size in H.
+ pose (size_positive bb). assert (size_blocks lbb > 0) by omega. clear H g.
+ destruct lbb; simpl in *; discriminate.
+Qed.
+
+Lemma header_nil_label_pos_none:
+ forall lbb l p,
+ Forall (fun b => header b = nil) lbb -> label_pos l p lbb = None.
+Proof.
+ induction lbb.
+ - intros. simpl. auto.
+ - intros. inv H. simpl. unfold is_label. rewrite H2. destruct (in_dec l nil). { inv i. }
+ auto.
+Qed.
+
+Lemma verified_schedule_label:
+ forall bb tbb lbb l,
+ verified_schedule bb = OK (tbb :: lbb) ->
+ is_label l bb = is_label l tbb
+ /\ label_pos l 0 lbb = None.
+Proof.
+ intros. exploit verified_schedule_header; eauto.
+ intros (HdrEq & HdrNil).
+ split.
+ - unfold is_label. rewrite HdrEq. reflexivity.
+ - apply header_nil_label_pos_none. assumption.
+Qed.
+
+Lemma label_pos_app_none:
+ forall c c' l p p',
+ label_pos l p c = None ->
+ label_pos l (p' + size_blocks c) c' = label_pos l p' (c ++ c').
+Proof.
+ induction c.
+ - intros. simpl in *. rewrite Z.add_0_r. reflexivity.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLABEL.
+ + discriminate.
+ + eapply IHc in H. rewrite Z.add_assoc. eauto.
+Qed.
+
+Remark label_pos_pvar_none_add:
+ forall tc l p p' k,
+ label_pos l (p+k) tc = None -> label_pos l (p'+k) tc = None.
+Proof.
+ induction tc.
+ - intros. simpl. auto.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+ + discriminate.
+ + pose (IHtc l p p' (k + size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar_none:
+ forall tc l p p',
+ label_pos l p tc = None -> label_pos l p' tc = None.
+Proof.
+ intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+ eapply label_pos_pvar_none_add; eauto.
+Qed.
+
+Remark label_pos_pvar_some_add_add:
+ forall tc l p p' k k',
+ label_pos l (p+k') tc = Some (p+k) -> label_pos l (p'+k') tc = Some (p'+k).
+Proof.
+ induction tc.
+ - intros. simpl in H. discriminate.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+ + inv H. assert (k = k') by omega. subst. reflexivity.
+ + pose (IHtc l p p' k (k' + size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar_some_add:
+ forall tc l p p' k,
+ label_pos l p tc = Some (p+k) -> label_pos l p' tc = Some (p'+k).
+Proof.
+ intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+ eapply label_pos_pvar_some_add_add; eauto.
+Qed.
+
+Remark label_pos_pvar_add:
+ forall c tc l p p' k,
+ label_pos l (p+k) c = label_pos l p tc ->
+ label_pos l (p'+k) c = label_pos l p' tc.
+Proof.
+ induction c.
+ - intros. simpl in *.
+ exploit label_pos_pvar_none; eauto.
+ - intros. simpl in *. destruct (is_label _ _) eqn:ISLBL.
+ + exploit label_pos_pvar_some_add; eauto.
+ + pose (IHc tc l p p' (k+size a)). repeat (rewrite Z.add_assoc in e). auto.
+Qed.
+
+Lemma label_pos_pvar:
+ forall c tc l p p',
+ label_pos l p c = label_pos l p tc ->
+ label_pos l p' c = label_pos l p' tc.
+Proof.
+ intros. rewrite (Zplus_0_r_reverse p') at 1. rewrite (Zplus_0_r_reverse p) in H at 1.
+ eapply label_pos_pvar_add; eauto.
+Qed.
+
+Lemma label_pos_head_app:
+ forall c bb lbb l tc p,
+ verified_schedule bb = OK lbb ->
+ label_pos l p c = label_pos l p tc ->
+ label_pos l p (bb :: c) = label_pos l p (lbb ++ tc).
+Proof.
+ intros. simpl. destruct lbb as [|tbb lbb].
+ - apply verified_schedule_not_empty in H. contradiction.
+ - simpl. exploit verified_schedule_label; eauto. intros (ISLBL & LBLPOS).
+ rewrite ISLBL.
+ destruct (is_label l tbb) eqn:ISLBL'; simpl; auto.
+ eapply label_pos_pvar in H0. erewrite H0.
+ erewrite verified_schedule_size; eauto. simpl size_blocks. rewrite Z.add_assoc.
+ erewrite label_pos_app_none; eauto.
+Qed.
+
+Lemma label_pos_preserved:
+ forall c tc l,
+ transf_blocks c = OK tc -> label_pos l 0 c = label_pos l 0 tc.
+Proof.
+ induction c.
+ - intros. simpl in *. inv H. reflexivity.
+ - intros. unfold transf_blocks in H; fold transf_blocks in H. monadInv H. eapply IHc in EQ.
+ eapply label_pos_head_app; eauto.
+Qed.
+
+Lemma label_pos_preserved_blocks:
+ forall l f tf,
+ transf_function f = OK tf ->
+ label_pos l 0 (fn_blocks f) = label_pos l 0 (fn_blocks tf).
+Proof.
+ intros. monadInv H. monadInv EQ.
+ destruct (zlt Ptrofs.max_unsigned _); try discriminate.
+ monadInv EQ0. simpl. eapply label_pos_preserved; eauto.
+Qed.
+
+Lemma transf_exec_control:
+ forall f tf ex rs m,
+ transf_function f = OK tf ->
+ exec_control ge f ex rs m = exec_control tge tf ex rs m.
+Proof.
+ intros. destruct ex; simpl; auto.
+ assert (ge = Genv.globalenv prog). auto.
+ assert (tge = Genv.globalenv tprog). auto.
+ pose symbol_address_preserved.
+ exploreInst; simpl; auto; try congruence;
+ unfold par_goto_label; unfold par_eval_branch; unfold par_goto_label; erewrite label_pos_preserved_blocks; eauto.
+Qed.
+
+Lemma transf_exec_basic_instr:
+ forall i rs m, exec_basic_instr ge i rs m = exec_basic_instr tge i rs m.
+Proof.
+ intros. pose symbol_address_preserved.
+ unfold exec_basic_instr. unfold bstep. exploreInst; simpl; auto; try congruence.
+ unfold parexec_arith_instr; unfold arith_eval_r; exploreInst; simpl; auto; try congruence.
+Qed.
+
+Lemma transf_exec_body:
+ forall bdy rs m, exec_body ge bdy rs m = exec_body tge bdy rs m.
+Proof.
+ induction bdy; intros.
+ - simpl. reflexivity.
+ - simpl. rewrite transf_exec_basic_instr.
+ destruct (exec_basic_instr _ _ _); auto.
+Qed.
+
+Lemma transf_exec_bblock:
+ forall f tf bb rs m,
+ transf_function f = OK tf ->
+ exec_bblock ge f bb rs m = exec_bblock tge tf bb rs m.
+Proof.
+ intros. unfold exec_bblock. rewrite transf_exec_body. destruct (exec_body _ _ _ _); auto.
+ eapply transf_exec_control; eauto.
+Qed.
+
+Lemma transf_step_simu:
+ forall tf b lbb ofs c tbb rs m rs' m',
+ Genv.find_funct_ptr tge b = Some (Internal tf) ->
+ size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned ->
+ rs PC = Vptr b ofs ->
+ code_tail (Ptrofs.unsigned ofs) (fn_blocks tf) (lbb ++ c) ->
+ concat_all lbb = OK tbb ->
+ exec_bblock tge tf tbb rs m = Next rs' m' ->
+ plus step tge (State rs m) E0 (State rs' m').
+Proof.
+ induction lbb.
+ - intros until m'. simpl. intros. discriminate.
+ - intros until m'. intros GFIND SIZE PCeq TAIL CONC EXEB.
+ destruct lbb.
+ + simpl in *. clear IHlbb. inv CONC. eapply plus_one. econstructor; eauto. eapply find_bblock_tail; eauto.
+ + exploit concat_all_exec_bblock; eauto; try discriminate.
+ intros (tbb0 & rs0 & m0 & CONC0 & EXEB0 & PCeq' & EXEB1).
+ eapply plus_left.
+ econstructor.
+ 3: eapply find_bblock_tail. rewrite <- app_comm_cons in TAIL. 3: eauto.
+ all: eauto.
+ eapply plus_star. eapply IHlbb; eauto. rewrite PCeq in PCeq'. simpl in PCeq'. all: eauto.
+ eapply code_tail_next_int; eauto.
+Qed.
+
+Theorem transf_step_correct:
+ forall s1 t s2, step ge s1 t s2 ->
+ forall s1' (MS: match_states s1 s1'),
+ (exists s2', plus step tge s1' t s2' /\ match_states s2 s2').
+Proof.
+ induction 1; intros; inv MS.
+ - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
+ exploit transf_find_bblock; eauto. intros (lbb & VES & c & TAIL).
+ exploit verified_schedule_correct; eauto. intros (tbb & CONC & BBEQ). inv CONC. rename H3 into CONC.
+ assert (NOOV: size_blocks x.(fn_blocks) <= Ptrofs.max_unsigned).
+ eapply transf_function_no_overflow; eauto.
+
+ erewrite transf_exec_bblock in H2; eauto.
+ unfold bblock_simu in BBEQ. rewrite BBEQ in H2; try congruence.
+ exists (State rs' m'). split; try (constructor; auto).
+ eapply transf_step_simu; eauto.
+
+ - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
+ exploit transf_find_bblock; eauto. intros (lbb & VES & c & TAIL).
+ exploit verified_schedule_builtin_idem; eauto. intros. subst lbb.
+
+ remember (State (nextblock _ _) _) as s'. exists s'.
+ split; try constructor; auto.
+ eapply plus_one. subst s'.
+ eapply exec_step_builtin.
+ 3: eapply find_bblock_tail. simpl in TAIL. 3: eauto.
+ all: eauto.
+ eapply eval_builtin_args_preserved with (ge1 := ge). exact symbols_preserved. eauto.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+
+ - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
+ remember (State _ m') as s'. exists s'. split; try constructor; auto.
+ subst s'. eapply plus_one. eapply exec_step_external; eauto.
+ eapply external_call_symbols_preserved; eauto. apply senv_preserved.
+Qed.
+
+Theorem transf_program_correct_Asmblock:
+ forward_simulation (Asmblock.semantics prog) (Asmblock.semantics tprog).
+Proof.
+ eapply forward_simulation_plus.
+ - apply senv_preserved.
+ - apply transf_initial_states.
+ - apply transf_final_states.
+ - apply transf_step_correct.
+Qed.
+
+End PRESERVATION_ASMBLOCK.
+
+Require Import Asmvliw.
+
+Lemma verified_par_checks_alls_bundles lb x: forall bundle,
+ verify_par lb = OK x ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ induction lb; simpl; try tauto.
+ intros bundle H; monadInv H.
+ destruct 1; subst; eauto.
+ destruct x0; auto.
+Qed.
+
+Lemma verified_schedule_nob_checks_alls_bundles bb lb bundle:
+ verified_schedule_nob bb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ unfold verified_schedule_nob. intros H;
+ monadInv H. destruct x4.
+ intros; eapply verified_par_checks_alls_bundles; eauto.
+Qed.
+
+Lemma verify_par_bblock_PExpand bb i:
+ exit bb = Some (PExpand i) -> verify_par_bblock bb = OK tt.
+Proof.
+ destruct bb as [h bdy ext H]; simpl.
+ intros; subst. destruct i.
+ generalize H.
+ rewrite <- wf_bblock_refl in H.
+ destruct H as [H H0].
+ unfold builtin_alone in H0. erewrite H0; eauto.
+Qed.
+
+Local Hint Resolve verified_schedule_nob_checks_alls_bundles: core.
+
+Lemma verified_schedule_checks_alls_bundles bb lb bundle:
+ verified_schedule bb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ unfold verified_schedule. remember (exit bb) as exb.
+ destruct exb as [c|]; eauto.
+ destruct c as [i|]; eauto.
+ destruct i; intros H. inversion_clear H; simpl.
+ intuition subst.
+ intros; eapply verify_par_bblock_PExpand; eauto.
+Qed.
+
+Lemma transf_blocks_checks_all_bundles lbb: forall lb bundle,
+ transf_blocks lbb = OK lb ->
+ List.In bundle lb -> verify_par_bblock bundle = OK tt.
+Proof.
+ induction lbb; simpl.
+ - intros lb bundle H; inversion_clear H. simpl; try tauto.
+ - intros lb bundle H0.
+ monadInv H0.
+ rewrite in_app. destruct 1; eauto.
+ eapply verified_schedule_checks_alls_bundles; eauto.
+Qed.
+
+Lemma find_bblock_Some_in lb:
+ forall ofs b, find_bblock ofs lb = Some b -> List.In b lb.
+Proof.
+ induction lb; simpl; try congruence.
+ intros ofs b.
+ destruct (zlt ofs 0); try congruence.
+ destruct (zeq ofs 0); eauto.
+ intros X; inversion X; eauto.
+Qed.
+
+Section PRESERVATION_ASMVLIW.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Lemma all_bundles_are_checked b ofs f bundle:
+ Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+ find_bblock ofs (fn_blocks f) = Some bundle ->
+ verify_par_bblock bundle = OK tt.
+Proof.
+ unfold match_prog, match_program in TRANSL.
+ unfold Genv.find_funct_ptr; simpl; intros X.
+ destruct (Genv.find_def_match_2 TRANSL b) as [|f0 y H]; try congruence.
+ destruct y as [tf0|]; try congruence.
+ inversion X as [H1]. subst. clear X.
+ remember (@Gfun fundef unit (Internal f)) as f2.
+ destruct H as [ctx' f1 f2 H0|]; try congruence.
+ inversion Heqf2 as [H2]. subst; clear Heqf2.
+ unfold transf_fundef, transf_partial_fundef in H.
+ destruct f1 as [f1|f1]; try congruence.
+ unfold transf_function, transl_function in H.
+ monadInv H. monadInv EQ.
+ destruct (zlt Ptrofs.max_unsigned (size_blocks (fn_blocks _))); simpl in *|-; try congruence.
+ injection EQ1; intros; subst.
+ monadInv EQ0. simpl in * |-.
+ intros; exploit transf_blocks_checks_all_bundles; eauto.
+ intros; eapply find_bblock_Some_in; eauto.
+Qed.
+
+Lemma checked_bundles_are_parexec_equiv f bundle rs rs' m m':
+ exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+ verify_par_bblock bundle = OK tt ->
+ det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'.
+Proof.
+ intros. unfold verify_par_bblock in H0. destruct (Asmblockdeps.bblock_para_check _) eqn:BPC; try discriminate. clear H0.
+ simpl in H.
+ eapply Asmblockdeps.bblock_para_check_correct; eauto.
+Qed.
+
+Lemma seqexec_parexec_equiv b ofs f bundle rs rs' m m':
+ Genv.find_funct_ptr (globalenv (Asmblock.semantics tprog)) b = Some (Internal f) ->
+ find_bblock (Ptrofs.unsigned ofs) (fn_blocks f) = Some bundle ->
+ exec_bblock (globalenv (Asmblock.semantics tprog)) f bundle rs m = Next rs' m' ->
+ det_parexec (globalenv (semantics tprog)) f bundle rs m rs' m'.
+Proof.
+ intros; eapply checked_bundles_are_parexec_equiv; eauto.
+ eapply all_bundles_are_checked; eauto.
+Qed.
+
+Theorem transf_program_correct_Asmvliw:
+ forward_simulation (Asmblock.semantics tprog) (Asmvliw.semantics tprog).
+Proof.
+ eapply forward_simulation_step with (match_states:=fun (s1:Asmvliw.state) s2 => s1=s2); eauto.
+ - intros; subst; auto.
+ - intros s1 t s1' H s2 H0; subst; inversion H; clear H; subst; eexists; split; eauto.
+ + eapply exec_step_internal; eauto.
+ intros; eapply seqexec_parexec_equiv; eauto.
+ + eapply exec_step_builtin; eauto.
+ + eapply exec_step_external; eauto.
+Qed.
+
+End PRESERVATION_ASMVLIW.
+
+Section PRESERVATION.
+
+Variables prog tprog: program.
+Hypothesis TRANSL: match_prog prog tprog.
+Let ge := Genv.globalenv prog.
+Let tge := Genv.globalenv tprog.
+
+Theorem transf_program_correct:
+ forward_simulation (Asmblock.semantics prog) (Asmvliw.semantics tprog).
+Proof.
+ eapply compose_forward_simulations.
+ eapply transf_program_correct_Asmblock; eauto.
+ eapply transf_program_correct_Asmvliw; eauto.
+Qed.
+
+End PRESERVATION.*)
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index b1425d56..a5b1465b 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -120,13 +120,13 @@ module Target : TARGET =
output_string oc (match sz with D -> dreg_name r | S -> sreg_name r)
let preg_asm oc ty = function
- | DR (IR' (RR1 r)) -> if ty = Tint then wreg oc r else xreg oc r
- | DR (FR' r) -> if ty = Tsingle then sreg oc r else dreg oc r
+ | DR (IR (RR1 r)) -> if ty = Tint then wreg oc r else xreg oc r
+ | DR (FR r) -> if ty = Tsingle then sreg oc r else dreg oc r
| _ -> assert false
let preg_annot = function
- | DR (IR' (RR1 r)) -> xreg_name r
- | DR (FR' r) -> dreg_name r
+ | DR (IR (RR1 r)) -> xreg_name r
+ | DR (FR r) -> dreg_name r
| _ -> assert false
(* Names of sections *)