aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-08-31 09:26:36 +0200
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2019-08-31 09:26:36 +0200
commitdc8b24fa2dd7ede561dd75458899cf42e9be09d2 (patch)
tree3389a4f3f07af151b5a30c0e23ed46c4cafdd98b
parent7c790ecd1c32b529a5e5e5977ce84cfade8e1eb6 (diff)
parentccd2fa5638e50b5fd8308b4b0c26531f911ff087 (diff)
downloadcompcert-kvx-dc8b24fa2dd7ede561dd75458899cf42e9be09d2.tar.gz
compcert-kvx-dc8b24fa2dd7ede561dd75458899cf42e9be09d2.zip
Merge branch 'mppa-work' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into mppa-work
-rw-r--r--.gitmodules3
-rw-r--r--mppa_k1c/Archi.v3
-rw-r--r--mppa_k1c/Asm.v54
-rw-r--r--mppa_k1c/Asmaux.v2
-rw-r--r--mppa_k1c/Asmblock.v35
-rw-r--r--mppa_k1c/Asmblockdeps.v45
-rw-r--r--mppa_k1c/Asmblockgen.v66
-rw-r--r--mppa_k1c/Asmblockgenproof0.v86
-rw-r--r--mppa_k1c/Asmblockgenproof1.v246
-rw-r--r--mppa_k1c/Asmexpand.ml4
-rw-r--r--mppa_k1c/Asmgenproof.v4
-rw-r--r--mppa_k1c/Asmvliw.v33
-rw-r--r--mppa_k1c/CBuiltins.ml9
-rw-r--r--mppa_k1c/Op.v20
-rw-r--r--mppa_k1c/PostpassScheduling.v70
-rw-r--r--mppa_k1c/PostpassSchedulingOracle.ml150
-rw-r--r--mppa_k1c/PostpassSchedulingproof.v8
-rw-r--r--mppa_k1c/PrintOp.ml66
-rw-r--r--mppa_k1c/TargetPrinter.ml6
-rw-r--r--test/monniaux/cycles.h2
-rw-r--r--test/monniaux/rules.mk7
-rwxr-xr-xtest/monniaux/run_benches.sh2
m---------test/mppa/asm_coverage0
-rwxr-xr-x[-rw-r--r--]test/mppa/coverage.sh21
-rw-r--r--test/mppa/coverage_helper.py70
-rw-r--r--test/mppa/instr/Makefile10
-rw-r--r--test/mppa/instr/builtin64.c17
-rw-r--r--test/mppa/instr/i32.c42
-rw-r--r--test/mppa/instr/i64.c43
29 files changed, 435 insertions, 689 deletions
diff --git a/.gitmodules b/.gitmodules
index 955c7fc2..e69de29b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "test/mppa/asm_coverage"]
- path = test/mppa/asm_coverage
- url = git@gricad-gitlab.univ-grenoble-alpes.fr:sixcy/asm-scanner.git
diff --git a/mppa_k1c/Archi.v b/mppa_k1c/Archi.v
index 113f5d51..96571841 100644
--- a/mppa_k1c/Archi.v
+++ b/mppa_k1c/Archi.v
@@ -14,10 +14,9 @@
(* *)
(* *********************************************************************)
-(** Architecture-dependent parameters for RISC-V *)
+(** Architecture-dependent parameters for MPPA K1c. Mostly copied from the Risc-V backend *)
Require Import ZArith.
-(*From Flocq*)
Require Import Binary Bits.
Definition ptr64 := true.
diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v
index 620aa91e..a0c5e71c 100644
--- a/mppa_k1c/Asm.v
+++ b/mppa_k1c/Asm.v
@@ -15,7 +15,13 @@
(* *)
(* *********************************************************************)
-(** Abstract syntax and semantics for K1c assembly language. *)
+(** * Abstract syntax for K1c textual assembly language.
+
+ Each emittable instruction is defined here. ';;' is also defined as an instruction.
+ The goal of this representation is to stay compatible with the rest of the generic backend of CompCert
+ We define [unfold : list bblock -> list instruction]
+ An Asm function is then defined as : [fn_sig], [fn_blocks], [fn_code], and a proof of [unfold fn_blocks = fn_code]
+ [fn_code] has no semantic. Instead, the semantic of Asm is given by using the AsmVLIW semantic on [fn_blocks] *)
Require Import Coqlib.
Require Import Maps.
@@ -57,10 +63,6 @@ Inductive instruction : Type :=
| Psemi (**r semi colon separating bundles *)
| Pnop (**r instruction that does nothing *)
- (** builtins *)
- | Pclzll (rd rs: ireg)
- | Pstsud (rd rs1 rs2: ireg)
-
(** Control flow instructions *)
| Pget (rd: ireg) (rs: preg) (**r get system register *)
| Pset (rd: preg) (rs: ireg) (**r set system register *)
@@ -97,10 +99,12 @@ Inductive instruction : Type :=
| Piinvals (addr: ireg)
| Pitouchl (addr: ireg)
| Pdzerol (addr: ireg)
- | Pafaddd (addr: ireg) (incr_res: ireg)
- | Pafaddw (addr: ireg) (incr_res: ireg)
+(*| Pafaddd (addr: ireg) (incr_res: ireg)
+ | Pafaddw (addr: ireg) (incr_res: ireg) *) (* see #157 *)
| Palclrd (dst: ireg) (addr: ireg)
| Palclrw (dst: ireg) (addr: ireg)
+ | Pclzll (rd rs: ireg)
+ | Pstsud (rd rs1 rs2: ireg)
(** Loads **)
| Plb (rd: ireg) (ra: ireg) (ofs: addressing) (**r load byte *)
@@ -571,12 +575,6 @@ Definition genv := Genv.t fundef unit.
Definition function_proj (f: function) := Asmvliw.mkfunction (fn_sig f) (fn_blocks f).
-(*
-Definition fundef_proj (fu: fundef) : Asmblock.fundef := transf_fundef function_proj fu.
-
-Definition program_proj (p: program) : Asmblock.program := transform_program fundef_proj p.
- *)
-
Definition fundef_proj (fu: fundef) : Asmvliw.fundef :=
match fu with
| Internal f => Internal (function_proj f)
@@ -650,35 +648,6 @@ Proof.
rewrite transf_function_proj. auto.
Qed.
-(* Definition transf_globdef (gd: globdef Asmblock.fundef unit) : globdef fundef unit :=
- match gd with
- | Gfun f => Gfun (transf_fundef f)
- | Gvar gu => Gvar gu
- end.
-
-Lemma transf_globdef_proj: forall gd, globdef_proj (transf_globdef gd) = gd.
-Proof.
- intros gd. destruct gd as [f|v]; simpl; auto.
- rewrite transf_fundef_proj; auto.
-Qed.
-
-Fixpoint transf_prog_defs (l: list (ident * globdef Asmblock.fundef unit))
- : list (ident * globdef fundef unit) :=
- match l with
- | nil => nil
- | (i, gd) :: l => (i, transf_globdef gd) :: transf_prog_defs l
- end.
-
-Lemma transf_prog_proj: forall p, prog_defs p = prog_defs_proj (transf_prog_defs (prog_defs p)).
-Proof.
- intros p. destruct p as [defs pub main]. simpl.
- induction defs; simpl; auto.
- destruct a as [i gd]. simpl.
- rewrite transf_globdef_proj.
- congruence.
-Qed.
- *)
-
Definition transf_program : Asmvliw.program -> program := transform_program transf_fundef.
Lemma program_equals {A B: Type} : forall (p1 p2: AST.program A B),
@@ -716,7 +685,6 @@ Proof.
intros. congruence.
Qed.
-(* I think it is a special case of Asmblock -> Asm. Very handy to have *)
Lemma match_program_transf:
forall p tp, match_prog p tp -> transf_program p = tp.
Proof.
diff --git a/mppa_k1c/Asmaux.v b/mppa_k1c/Asmaux.v
index 85359658..94b39f4e 100644
--- a/mppa_k1c/Asmaux.v
+++ b/mppa_k1c/Asmaux.v
@@ -1,5 +1,5 @@
Require Import Asm.
Require Import AST.
-(* Constant only needed by Asmexpandaux.ml *)
+(** Constant only needed by Asmexpandaux.ml *)
Program Definition dummy_function := {| fn_code := nil; fn_sig := signature_main; fn_blocks := nil |}.
diff --git a/mppa_k1c/Asmblock.v b/mppa_k1c/Asmblock.v
index ddb7ce7d..9b4489c5 100644
--- a/mppa_k1c/Asmblock.v
+++ b/mppa_k1c/Asmblock.v
@@ -15,7 +15,7 @@
(* *)
(* *********************************************************************)
-(** Abstract syntax and semantics for K1c assembly language. *)
+(** Sequential block semantics for K1c assembly. The syntax is given in AsmVLIW *)
Require Import Coqlib.
Require Import Maps.
@@ -172,7 +172,6 @@ Proof.
Qed.
Ltac bblock_auto_correct := (apply non_empty_bblock_refl; try discriminate; try (left; discriminate); try (right; discriminate)).
-(* Local Obligation Tactic := bblock_auto_correct. *)
Lemma Istrue_proof_irrelevant (b: bool): forall (p1 p2:Is_true b), p1=p2.
Proof.
@@ -250,9 +249,6 @@ Proof.
intros. destruct bb as [hd bdy ex COR]. simpl. unfold no_header; unfold stick_header; simpl. reflexivity.
Qed.
-
-
-
(** * Sequential Semantics of basic blocks *)
Section RELSEM.
@@ -286,7 +282,7 @@ Definition exec_store_regxs (chunk: memory_chunk) (rs: regset) (m: mem) (s a ro:
(** * basic instructions *)
-Definition exec_basic_instr (bi: basic) (rs: regset) (m: mem) : outcome := parexec_basic_instr ge bi rs rs m m.
+Definition exec_basic_instr (bi: basic) (rs: regset) (m: mem) : outcome := bstep ge bi rs rs m m.
Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome :=
match body with
@@ -302,29 +298,8 @@ Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome :=
Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) : outcome := par_goto_label f lbl rs rs m.
-(** Evaluating a branch
-
-Warning: in m PC is assumed to be already pointing on the next instruction !
-
-*)
Definition eval_branch (f: function) (l: label) (rs: regset) (m: mem) (res: option bool) : outcome := par_eval_branch f l rs rs m res.
-(** Execution of a single control-flow instruction [i] in initial state [rs] and
- [m]. Return updated state.
-
- As above: PC is assumed to be incremented on the next block before the control-flow instruction
-
- For instructions that correspond tobuiltin
- actual RISC-V instructions, the cases are straightforward
- transliterations of the informal descriptions given in the RISC-V
- user-mode specification. For pseudo-instructions, refer to the
- informal descriptions given above.
-
- Note that we set to [Vundef] the registers used as temporaries by
- the expansions of the pseudo-instructions, so that the RISC-V code
- we generate cannot use those registers to hold values that must
- survive the execution of the pseudo-instruction. *)
-
Definition exec_control (f: function) (oc: option control) (rs: regset) (m: mem) : outcome := parexec_control ge f oc rs rs m.
Definition exec_bblock (f: function) (b: bblock) (rs0: regset) (m: mem) : outcome :=
@@ -368,16 +343,11 @@ Inductive step: state -> trace -> state -> Prop :=
step (State rs m) t (State rs' m')
.
-
-
End RELSEM.
-
-
Definition semantics (p: program) :=
Semantics step (initial_state p) final_state (Genv.globalenv p).
-
Definition data_preg (r: preg) : bool :=
match r with
| RA => false
@@ -386,4 +356,3 @@ Definition data_preg (r: preg) : bool :=
| IR _ => true
| PC => false
end.
-
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index 9855afa2..2d144bb6 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -1,3 +1,10 @@
+(** * Translation from Asmblock to AbstractBB
+
+ We define a specific instance of AbstractBB, named L, translate bblocks from Asmblock into this instance
+ AbstractBB will then define two semantics for L : a sequential, and a semantic one
+ We prove a bisimulation between the parallel semantics of L and AsmVLIW
+ From this, we also deduce a bisimulation between the sequential semantics of L and Asmblock *)
+
Require Import AST.
Require Import Asmblock.
Require Import Asmblockgenproof0.
@@ -17,6 +24,8 @@ Require Import Chunks.
Open Scope impure.
+(** Definition of L *)
+
Module P<: ImpParam.
Module R := Pos.
@@ -459,18 +468,6 @@ Qed.
Hint Resolve op_eq_correct: wlp.
Global Opaque op_eq_correct.
-
-(* QUICK FIX WITH struct_eq *)
-
-(* Definition op_eq (o1 o2: op): ?? bool := struct_eq o1 o2.
-
-Theorem op_eq_correct o1 o2:
- WHEN op_eq o1 o2 ~> b THEN b=true -> o1 = o2.
-Proof.
- wlp_simplify.
-Qed.
-*)
-
End IMPPARAM.
End P.
@@ -550,7 +547,7 @@ Proof.
- unfold ppos. unfold pmem. discriminate.
Qed.
-(** Inversion functions, used for debugging *)
+(** Inversion functions, used for debug traces *)
Definition pos_to_ireg (p: R.t) : option gpreg :=
match p with
@@ -574,9 +571,6 @@ Definition inv_ppos (p: R.t) : option preg :=
end
end.
-
-(** Traduction Asmblock -> Asmblockdeps *)
-
Notation "a @ b" := (Econs a b) (at level 102, right associativity).
Definition trans_control (ctl: control) : inst :=
@@ -720,7 +714,7 @@ Proof.
intros. congruence.
Qed.
-(** Parallelizability of a bblock (bundle) *)
+(** Parallelizability test of a bblock (bundle), and bisimulation of the Asmblock and L parallel semantics *)
Module PChk := ParallelChecks L PosPseudoRegSet.
@@ -846,7 +840,7 @@ Theorem bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw bi:
Ge = Genv ge fn ->
match_states (State rsr mr) sr ->
match_states (State rsw mw) sw ->
- match_outcome (parexec_basic_instr ge bi rsr rsw mr mw) (inst_prun Ge (trans_basic bi) sw sr sr).
+ match_outcome (bstep ge bi rsr rsw mr mw) (inst_prun Ge (trans_basic bi) sw sr sr).
Proof.
(* a little tactic to automate reasoning on preg_eq *)
@@ -1004,7 +998,7 @@ Proof.
induction bdy as [|i bdy]; simpl; eauto.
intros.
exploit (bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw i); eauto.
- destruct (parexec_basic_instr _ _ _ _ _ _); simpl.
+ destruct (bstep _ _ _ _ _ _); simpl.
- intros (s' & X1 & X2). rewrite X1; simpl; eauto.
- intros X; rewrite X; simpl; auto.
Qed.
@@ -1015,7 +1009,7 @@ Theorem bisimu_par_control ex sz aux ge fn rsr rsw mr mw sr sw:
match_states (State rsw mw) sw ->
match_outcome (parexec_control ge fn ex (incrPC (Ptrofs.repr sz) rsr) (rsw#PC <- aux) mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr).
Proof.
- intros GENV MSR MSW; unfold parexec_exit.
+ intros GENV MSR MSW; unfold estep.
simpl in *. inv MSR. inv MSW.
destruct ex.
- destruct c; destruct i; try discriminate; simpl.
@@ -1071,9 +1065,9 @@ Theorem bisimu_par_exit ex sz ge fn rsr rsw mr mw sr sw:
Ge = Genv ge fn ->
match_states (State rsr mr) sr ->
match_states (State rsw mw) sw ->
- match_outcome (parexec_exit ge fn ex (Ptrofs.repr sz) rsr rsw mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr).
+ match_outcome (estep ge fn ex (Ptrofs.repr sz) rsr rsw mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr).
Proof.
- intros; unfold parexec_exit.
+ intros; unfold estep.
exploit (bisimu_par_control ex sz rsw#PC ge fn rsr rsw mr mw sr sw); eauto.
cutrewrite (rsw # PC <- (rsw PC) = rsw); auto.
apply extensionality. intros; destruct x; simpl; auto.
@@ -1162,7 +1156,7 @@ Proof.
destruct (prun_iw _ _ _ _); simpl; eauto.
Qed.
-(* sequential execution *)
+(** sequential execution *)
Theorem bisimu_basic ge fn bi rs m s:
Ge = Genv ge fn ->
match_states (State rs m) s ->
@@ -1264,7 +1258,6 @@ Qed.
End SECT_PAR.
-
Section SECT_BBLOCK_EQUIV.
Variable Ge: genv.
@@ -1294,6 +1287,8 @@ Proof.
* discriminate.
Qed.
+(** Used for debug traces *)
+
Definition gpreg_name (gpr: gpreg) :=
match gpr with
| GPR0 => Str ("GPR0") | GPR1 => Str ("GPR1") | GPR2 => Str ("GPR2") | GPR3 => Str ("GPR3") | GPR4 => Str ("GPR4")
@@ -1645,4 +1640,4 @@ Definition bblock_simub: Asmvliw.bblock -> Asmvliw.bblock -> bool := pure_bblock
Lemma bblock_simub_correct p1 p2 ge fn: bblock_simub p1 p2 = true -> Asmblockgenproof0.bblock_simu ge fn p1 p2.
Proof.
eapply (pure_bblock_simu_test_correct true).
-Qed. \ No newline at end of file
+Qed.
diff --git a/mppa_k1c/Asmblockgen.v b/mppa_k1c/Asmblockgen.v
index e5b9b35a..7e415c2a 100644
--- a/mppa_k1c/Asmblockgen.v
+++ b/mppa_k1c/Asmblockgen.v
@@ -15,7 +15,8 @@
(* *)
(* *********************************************************************)
-(** Translation from Machblock to K1c assembly language (Asmblock) *)
+(** * Translation from Machblock to K1c assembly language (Asmblock)
+ Inspired from the Mach->Asm pass of other backends, but adapted to the block structure *)
Require Archi.
Require Import Coqlib Errors.
@@ -41,23 +42,15 @@ Definition ireg_of (r: mreg) : res ireg :=
Definition freg_of (r: mreg) : res freg :=
match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end.
-(*
-(** Decomposition of 32-bit integer constants. They are split into either
- small signed immediates that fit in 12-bits, or, if they do not fit,
- into a (20-bit hi, 12-bit lo) pair where lo is sign-extended. *)
-
-*)
Inductive immed32 : Type :=
| Imm32_single (imm: int).
Definition make_immed32 (val: int) := Imm32_single val.
-(** Likewise, for 64-bit integer constants. *)
Inductive immed64 : Type :=
| Imm64_single (imm: int64)
.
-(* For now, let's suppose all instructions of K1c can handle 64-bits immediate *)
Definition make_immed64 (val: int64) := Imm64_single val.
Notation "a ::g b" := (cons (A:=instruction) a b) (at level 49, right associativity).
@@ -66,12 +59,6 @@ Notation "a ::b lb" := ((bblock_single_inst a) :: lb) (at level 49, right associ
Notation "a ++g b" := (app (A:=instruction) a b) (at level 49, right associativity).
Notation "a @@ b" := (app a b) (at level 49, right associativity).
-(** Smart constructors for arithmetic operations involving
- a 32-bit or 64-bit integer constant. Depending on whether the
- constant fits in 12 bits or not, one or several instructions
- are generated as required to perform the operation
- and prepended to the given instruction sequence [k]. *)
-
Definition loadimm32 (r: ireg) (n: int) :=
match make_immed32 n with
| Imm32_single imm => Pmake r imm
@@ -92,10 +79,6 @@ Definition orimm32 := opimm32 Porw Poriw.
Definition norimm32 := opimm32 Pnorw Pnoriw.
Definition xorimm32 := opimm32 Pxorw Pxoriw.
Definition nxorimm32 := opimm32 Pnxorw Pnxoriw.
-(*
-Definition sltimm32 := opimm32 Psltw Psltiw.
-Definition sltuimm32 := opimm32 Psltuw Psltiuw.
-*)
Definition loadimm64 (r: ireg) (n: int64) :=
match make_immed64 n with
@@ -118,11 +101,6 @@ Definition norimm64 := opimm64 Pnorl Pnoril.
Definition nandimm64 := opimm64 Pnandl Pnandil.
Definition nxorimm64 := opimm64 Pnxorl Pnxoril.
-(*
-Definition sltimm64 := opimm64 Psltl Psltil.
-Definition sltuimm64 := opimm64 Psltul Psltiul.
-*)
-
Definition addptrofs (rd rs: ireg) (n: ptrofs) :=
if Ptrofs.eq_dec n Ptrofs.zero then
Pmv rd rs
@@ -170,19 +148,6 @@ Definition transl_opt_compuimm
transl_compi c Unsigned r1 n lbl k
.
-(* Definition transl_opt_compuimm
- (n: int) (c: comparison) (r1: ireg) (lbl: label) (k: code) : list instruction :=
- loadimm32 RTMP n ::g (transl_comp c Unsigned r1 RTMP lbl k). *)
-
-(* match select_comp n c with
- | Some Ceq => Pcbu BTweqz r1 lbl ::g k
- | Some Cne => Pcbu BTwnez r1 lbl ::g k
- | Some _ => nil (* Never happens *)
- | None => loadimm32 RTMP n ::g (transl_comp c Unsigned r1 RTMP lbl k)
- end
- .
- *)
-
Definition select_compl (n: int64) (c: comparison) : option comparison :=
if Int64.eq n Int64.zero then
match c with
@@ -1052,7 +1017,7 @@ Definition make_epilogue (f: Machblock.function) (k: code) :=
(loadind_ptr SP f.(fn_retaddr_ofs) GPRA)
::g Pset RA GPRA ::g Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) ::g k.
-(** Translation of a Mach instruction. *)
+(** Translation of a Machblock instruction. *)
Definition transl_instr_basic (f: Machblock.function) (i: Machblock.basic_inst)
(ep: bool) (k: bcode) :=
@@ -1096,20 +1061,12 @@ Definition transl_instr_control (f: Machblock.function) (oi: option Machblock.co
transl_cbranch cond args lbl nil
| MBreturn =>
OK (make_epilogue f (Pret ::g nil))
- (*OK (make_epilogue f (Pj_r RA f.(Mach.fn_sig) :: k))*)
| MBjumptable arg tbl =>
do r <- ireg_of arg;
OK (Pjumptable r tbl ::g nil)
end
end.
-(* TODO - dans l'idée, transl_instr_control renvoie une liste d'instructions sous la forme :
- * transl_instr_control _ _ _ = lb ++ (ctl :: nil), où lb est une liste de basics, ctl est un control_inst
-
- Il faut arriver à exprimer cet aspect là ; extraire le lb, le rajouter dans le body ; et extraire le ctl
- qu'on met dans le exit
-*)
-
(** Translation of a code sequence *)
Definition fp_is_parent (before: bool) (i: Machblock.basic_inst) : bool :=
@@ -1120,8 +1077,7 @@ Definition fp_is_parent (before: bool) (i: Machblock.basic_inst) : bool :=
| _ => false
end.
-(** This is the naive definition that we no longer use because it
- is not tail-recursive. It is kept as specification. *)
+(** This is the naive definition, which is not tail-recursive unlike the other backends *)
Fixpoint transl_basic_code (f: Machblock.function) (il: list Machblock.basic_inst) (it1p: bool) :=
match il with
@@ -1147,20 +1103,11 @@ Definition transl_basic_code' (f: Machblock.function) (il: list Machblock.basic_
transl_basic_rec f il it1p (fun c => OK c). *)
(** Translation of a whole function. Note that we must check
- that the generated code contains less than [2^32] instructions,
+ that the generated code contains less than [2^64] instructions,
otherwise the offset part of the [PC] code pointer could wrap
around, leading to incorrect executions. *)
-(* Local Obligation Tactic := bblock_auto_correct. *)
-
-(* Program Definition gen_bblock_noctl (hd: list label) (c: list basic) :=
- match c with
- | nil => {| header := hd; body := Pnop::nil; exit := None |}
- | i::c => {| header := hd; body := i::c; exit := None |}
- end.
- *)
-
-(** Can generate two bblocks if the ctl is a PExpand (since the PExpand must be alone in its block) *)
+(* gen_bblocks can generate two bblocks if the ctl is a PExpand (since the PExpand must be alone in its block) *)
Program Definition gen_bblocks (hd: list label) (c: list basic) (ctl: list instruction) :=
match (extract_ctl ctl) with
| None =>
@@ -1168,7 +1115,6 @@ Program Definition gen_bblocks (hd: list label) (c: list basic) (ctl: list instr
| nil => {| header := hd; body := Pnop::nil; exit := None |} :: nil
| i::c => {| header := hd; body := ((i::c) ++ extract_basic ctl); exit := None |} :: nil
end
-(* gen_bblock_noctl hd (c ++ (extract_basic ctl)) :: nil *)
| Some (PExpand (Pbuiltin ef args res)) =>
match c with
| nil => {| header := hd; body := nil; exit := Some (PExpand (Pbuiltin ef args res)) |} :: nil
diff --git a/mppa_k1c/Asmblockgenproof0.v b/mppa_k1c/Asmblockgenproof0.v
index 89d41017..decc3e2e 100644
--- a/mppa_k1c/Asmblockgenproof0.v
+++ b/mppa_k1c/Asmblockgenproof0.v
@@ -1,3 +1,9 @@
+(** * "block" version of Asmgenproof0
+
+ This module is largely adapted from Asmgenproof0.v of the other backends
+ It needs to stand apart because of the block structure, and the distinction control/basic that there isn't in the other backends
+ It has similar definitions than Asmgenproof0, but adapted to this new structure *)
+
Require Import Coqlib.
Require Intv.
Require Import AST.
@@ -31,19 +37,13 @@ Lemma ireg_of_eq:
forall r r', ireg_of r = OK r' -> preg_of r = IR r'.
Proof.
unfold ireg_of; intros. destruct (preg_of r); inv H; auto.
-(* destruct b. all: try discriminate.
- inv H1. auto.
- *)Qed.
+Qed.
-(* FIXME - Replaced FR by IR for MPPA *)
Lemma freg_of_eq:
forall r r', freg_of r = OK r' -> preg_of r = IR r'.
Proof.
unfold freg_of; intros. destruct (preg_of r); inv H; auto.
-(* destruct b. all: try discriminate.
- inv H1. auto.
- *)Qed.
-
+Qed.
Lemma preg_of_injective:
forall r1 r2, preg_of r1 = preg_of r2 -> r1 = r2.
@@ -277,24 +277,6 @@ Proof.
exploit preg_of_injective; eauto. congruence.
Qed.
-(* Lemma agree_undef_regs2:
- forall ms sp rl rs rs',
- agree (Mach.undef_regs rl ms) sp rs ->
- (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') ->
- agree (Mach.undef_regs rl ms) sp rs'.
-Proof.
- intros. destruct H. split; auto.
- rewrite <- agree_sp0. apply H0; auto.
- rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP.
- intros. destruct (In_dec mreg_eq r rl).
- rewrite Mach.undef_regs_same; auto.
- rewrite H0; auto.
- apply preg_of_data.
- rewrite preg_notin_charact. intros; red; intros. elim n.
- exploit preg_of_injective; eauto. congruence.
-Qed.
- *)
-
Lemma agree_set_undef_mreg:
forall ms sp rs r v rl rs',
agree ms sp rs ->
@@ -607,15 +589,13 @@ Hypothesis transf_function_len:
forall f tf, transf_function f = OK tf -> size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned.
-(* NB: the hypothesis in comment on [b] is not needed in the proof ! *)
Lemma return_address_exists:
- forall b f (* sg ros *) c, (* b.(MB.exit) = Some (MBcall sg ros) -> *) is_tail (b :: c) f.(MB.fn_code) ->
+ forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
exists ra, return_address_offset f c ra.
Proof.
intros. destruct (transf_function f) as [tf|] eqn:TF.
+ exploit transf_function_inv; eauto. intros (tc1 & ep1 & TR1 & TL1).
exploit transl_blocks_tail; eauto. intros (tc2 & ep2 & TR2 & TL2).
-(* unfold return_address_offset. *)
monadInv TR2.
assert (TL3: is_tail x0 (fn_blocks tf)).
{ apply is_tail_trans with tc1; auto.
@@ -632,7 +612,7 @@ Qed.
End RETADDR_EXISTS.
(** [transl_code_at_pc pc fb f c ep tf tc] holds if the code pointer [pc] points
- within the Asm code generated by translating Mach function [f],
+ within the Asmblock code generated by translating Machblock function [f],
and [tc] is the tail of the generated code at the position corresponding
to the code pointer [pc]. *)
@@ -850,18 +830,6 @@ Proof.
apply exec_straight_step with rs2 m2; auto.
Qed.
-(* Theorem exec_straight_bblock:
- forall rs1 m1 rs2 m2 rs3 m3 b,
- exec_straight (body b) rs1 m1 nil rs2 m2 ->
- exec_control_rel (exit b) b rs2 m2 rs3 m3 ->
- exec_bblock_rel b rs1 m1 rs3 m3.
-Proof.
- intros.
- econstructor; eauto. unfold exec_bblock. erewrite exec_straight_body; eauto.
- inv H0. auto.
-Qed. *)
-
-
Lemma exec_straight_two:
forall i1 i2 c rs1 m1 rs2 m2 rs3 m3,
exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 ->
@@ -973,18 +941,6 @@ Proof.
- reflexivity.
Qed.
-(* Lemma exec_straight_pc':
- forall c rs1 m1 rs2 m2,
- exec_straight c rs1 m1 nil rs2 m2 ->
- rs2 PC = rs1 PC.
-Proof.
- induction c; intros; try (inv H; fail).
- inv H.
- - erewrite exec_basic_instr_pc; eauto.
- - rewrite (IHc rs3 m3 rs2 m2); auto.
- erewrite exec_basic_instr_pc; eauto.
-Qed. *)
-
Lemma exec_straight_pc:
forall c c' rs1 m1 rs2 m2,
exec_straight c rs1 m1 c' rs2 m2 ->
@@ -997,25 +953,6 @@ Proof.
erewrite exec_basic_instr_pc; eauto.
Qed.
-(* Lemma exec_straight_through:
- forall c i b lb rs1 m1 rs2 m2 rs2' m2',
- bblock_basic_ctl c i = b ->
- exec_straight c rs1 m1 nil rs2 m2 ->
- nextblock b rs2 = rs2' -> m2 = m2' ->
- exec_control ge fn i rs2' m2' = Next rs2' m2' -> (* if the control does not jump *)
- exec_straight_blocks (b::lb) rs1 m1 lb rs2' m2'.
-Proof.
- intros. subst. destruct i.
- - constructor 1.
- + unfold exec_bblock. simpl body. erewrite exec_straight_body; eauto.
- + rewrite <- (exec_straight_pc c nil rs1 m1 rs2 m2'); auto.
- - destruct c as [|i c]; try (inv H0; fail).
- constructor 1.
- + unfold exec_bblock. simpl body. erewrite exec_straight_body; eauto.
- + rewrite <- (exec_straight_pc (i ::i c) nil rs1 m1 rs2 m2'); auto.
-Qed.
- *)
-
Lemma regset_same_assign (rs: regset) r:
rs # r <- (rs r) = rs.
Proof.
@@ -1034,8 +971,6 @@ Proof.
simpl; auto. unfold nextblock, incrPC; simpl. Simpl. erewrite exec_straight_pc; eauto.
Qed.
-
-
(** The following lemmas show that straight-line executions
(predicate [exec_straight_blocks]) correspond to correct Asm executions. *)
@@ -1086,7 +1021,6 @@ Qed.
End STRAIGHTLINE.
-
(** * Properties of the Machblock call stack *)
Section MATCH_STACK.
diff --git a/mppa_k1c/Asmblockgenproof1.v b/mppa_k1c/Asmblockgenproof1.v
index bc549b4a..e1e2b0b0 100644
--- a/mppa_k1c/Asmblockgenproof1.v
+++ b/mppa_k1c/Asmblockgenproof1.v
@@ -15,6 +15,8 @@
(* *)
(* *********************************************************************)
+(** * Proof of correctness for individual instructions *)
+
Require Import Coqlib Errors Maps.
Require Import AST Integers Floats Values Memory Globalenvs.
Require Import Op Locations Machblock Conventions.
@@ -86,31 +88,6 @@ Section CONSTRUCTORS.
Variable ge: genv.
Variable fn: function.
-(*
-(** 32-bit integer constants and arithmetic *)
-(*
-Lemma load_hilo32_correct:
- forall rd hi lo k rs m,
- exists rs',
- exec_straight ge fn (load_hilo32 rd hi lo k) rs m k rs' m
- /\ rs'#rd = Vint (Int.add (Int.shl hi (Int.repr 12)) lo)
- /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
- unfold load_hilo32; intros.
- predSpec Int.eq Int.eq_spec lo Int.zero.
-- subst lo. econstructor; split.
- apply exec_straight_one. simpl; eauto. auto.
- split. rewrite Int.add_zero. Simpl.
- intros; Simpl.
-- econstructor; split.
- eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
- split. Simpl.
- intros; Simpl.
-Qed.
-*)
-
-*)
-
Lemma loadimm32_correct:
forall rd n k rs m,
exists rs',
@@ -141,60 +118,6 @@ Proof.
intros; Simpl.
Qed.
-(*
-(*
-Lemma opimm32_correct:
- forall (op: ireg -> ireg0 -> ireg0 -> instruction)
- (opi: ireg -> ireg0 -> int -> instruction)
- (sem: val -> val -> val) m,
- (forall d s1 s2 rs,
- exec_instr ge fn (op d s1 s2) rs m = Next (nextinstr (rs#d <- (sem rs##s1 rs##s2))) m) ->
- (forall d s n rs,
- exec_instr ge fn (opi d s n) rs m = Next (nextinstr (rs#d <- (sem rs##s (Vint n)))) m) ->
- forall rd r1 n k rs,
- r1 <> RTMP ->
- exists rs',
- exec_straight ge fn (opimm32 op opi rd r1 n k) rs m k rs' m
- /\ rs'#rd = sem rs##r1 (Vint n)
- /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
-Proof.
- intros. unfold opimm32. generalize (make_immed32_sound n); intros E.
- destruct (make_immed32 n).
-- subst imm. econstructor; split.
- apply exec_straight_one. rewrite H0. simpl; eauto. auto.
- split. Simpl. intros; Simpl.
-- destruct (load_hilo32_correct RTMP hi lo (op rd r1 RTMP :: k) rs m)
- as (rs' & A & B & C).
- econstructor; split.
- eapply exec_straight_trans. eexact A. apply exec_straight_one.
- rewrite H; eauto. auto.
- split. Simpl. simpl. rewrite B, C, E. auto. congruence. congruence.
- intros; Simpl.
-Qed.
-
-(** 64-bit integer constants and arithmetic *)
-
-Lemma load_hilo64_correct:
- forall rd hi lo k rs m,
- exists rs',
- exec_straight ge fn (load_hilo64 rd hi lo k) rs m k rs' m
- /\ rs'#rd = Vlong (Int64.add (Int64.sign_ext 32 (Int64.shl hi (Int64.repr 12))) lo)
- /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
- unfold load_hilo64; intros.
- predSpec Int64.eq Int64.eq_spec lo Int64.zero.
-- subst lo. econstructor; split.
- apply exec_straight_one. simpl; eauto. auto.
- split. rewrite Int64.add_zero. Simpl.
- intros; Simpl.
-- econstructor; split.
- eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
- split. Simpl.
- intros; Simpl.
-Qed.
-*)
-*)
-
Lemma opimm64_correct:
forall (op: arith_name_rrr)
(opi: arith_name_rri64)
@@ -215,18 +138,6 @@ Proof.
- subst imm. econstructor; split.
apply exec_straight_one. rewrite H0. simpl; eauto. auto.
split. Simpl. intros; Simpl.
-(*
-- destruct (load_hilo64_correct RTMP hi lo (op rd r1 RTMP :: k) rs m)
- as (rs' & A & B & C).
- econstructor; split.
- eapply exec_straight_trans. eexact A. apply exec_straight_one.
- rewrite H; eauto. auto.
- split. Simpl. simpl. rewrite B, C, E. auto. congruence. congruence.
- intros; Simpl.
-- subst imm. econstructor; split.
- eapply exec_straight_two. simpl; eauto. rewrite H. simpl; eauto. auto. auto.
- split. Simpl. intros; Simpl.
-*)
Qed.
(** Add offset to pointer *)
@@ -252,35 +163,6 @@ Proof.
rewrite Ptrofs.of_int64_to_int64 by auto. auto.
Qed.
-(*
-(*
-Lemma addptrofs_correct_2:
- forall rd r1 n k (rs: regset) m b ofs,
- r1 <> RTMP -> rs#r1 = Vptr b of
-s ->
- exists rs',
- exec_straight ge fn (addptrofs rd r1 n k) rs m k rs' m
- /\ rs'#rd = Vptr b (Ptrofs.add ofs n)
- /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
-Proof.
- intros. exploit (addptrofs_correct rd r1 n); eauto. intros (rs' & A & B & C).
- exists rs'; intuition eauto.
- rewrite H0 in B. inv B. auto.
-Qed.
-
-(** Translation of conditional branches *)
-
-Remark branch_on_RTMP:
- forall normal lbl (rs: regset) m b,
- rs#RTMP = Val.of_bool (eqb normal b) ->
- exec_instr ge fn (if normal then Pbnew RTMP X0 lbl else Pbeqw RTMP X0 lbl) rs m =
- eval_branch fn lbl rs m (Some b).
-Proof.
- intros. destruct normal; simpl; rewrite H; simpl; destruct b; reflexivity.
-Qed.
-*)
-*)
-
Ltac ArgsInv :=
repeat (match goal with
| [ H: Error _ = OK _ |- _ ] => discriminate
@@ -1522,99 +1404,6 @@ Proof.
exploit transl_cond_nofloat32_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto.
Qed.
-(*
-(*
-+ (* cmpf *)
- destruct (transl_cond_float c0 rd x x0) as [insn normal] eqn:TR.
- fold (Val.cmpf c0 (rs x) (rs x0)).
- set (v := Val.cmpf c0 (rs x) (rs x0)).
- destruct normal; inv EQ2.
-* econstructor; split.
- apply exec_straight_one. eapply transl_cond_float_correct with (v := v); eauto. auto.
- split; intros; Simpl.
-* econstructor; split.
- eapply exec_straight_two.
- eapply transl_cond_float_correct with (v := Val.notbool v); eauto.
- simpl; reflexivity.
- auto. auto.
- split; intros; Simpl. unfold v, Val.cmpf. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-+ (* notcmpf *)
- destruct (transl_cond_float c0 rd x x0) as [insn normal] eqn:TR.
- rewrite Val.notbool_negb_3. fold (Val.cmpf c0 (rs x) (rs x0)).
- set (v := Val.cmpf c0 (rs x) (rs x0)).
- destruct normal; inv EQ2.
-* econstructor; split.
- eapply exec_straight_two.
- eapply transl_cond_float_correct with (v := v); eauto.
- simpl; reflexivity.
- auto. auto.
- split; intros; Simpl. unfold v, Val.cmpf. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-* econstructor; split.
- apply exec_straight_one. eapply transl_cond_float_correct with (v := Val.notbool v); eauto. auto.
- split; intros; Simpl.
-+ (* cmpfs *)
- destruct (transl_cond_single c0 rd x x0) as [insn normal] eqn:TR.
- fold (Val.cmpfs c0 (rs x) (rs x0)).
- set (v := Val.cmpfs c0 (rs x) (rs x0)).
- destruct normal; inv EQ2.
-* econstructor; split.
- apply exec_straight_one. eapply transl_cond_single_correct with (v := v); eauto. auto.
- split; intros; Simpl.
-* econstructor; split.
- eapply exec_straight_two.
- eapply transl_cond_single_correct with (v := Val.notbool v); eauto.
- simpl; reflexivity.
- auto. auto.
- split; intros; Simpl. unfold v, Val.cmpfs. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-+ (* notcmpfs *)
- destruct (transl_cond_single c0 rd x x0) as [insn normal] eqn:TR.
- rewrite Val.notbool_negb_3. fold (Val.cmpfs c0 (rs x) (rs x0)).
- set (v := Val.cmpfs c0 (rs x) (rs x0)).
- destruct normal; inv EQ2.
-* econstructor; split.
- eapply exec_straight_two.
- eapply transl_cond_single_correct with (v := v); eauto.
- simpl; reflexivity.
- auto. auto.
- split; intros; Simpl. unfold v, Val.cmpfs. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-* econstructor; split.
- apply exec_straight_one. eapply transl_cond_single_correct with (v := Val.notbool v); eauto. auto.
- split; intros; Simpl.
-*)
-*)
-
-(** Some arithmetic properties. *)
-
-(* Remark cast32unsigned_from_cast32signed:
- forall i, Int64.repr (Int.unsigned i) = Int64.zero_ext 32 (Int64.repr (Int.signed i)).
-Proof.
- intros. apply Int64.same_bits_eq; intros.
- rewrite Int64.bits_zero_ext, !Int64.testbit_repr by tauto.
- rewrite Int.bits_signed by tauto. fold (Int.testbit i i0).
- change Int.zwordsize with 32.
- destruct (zlt i0 32). auto. apply Int.bits_above. auto.
-Qed.
-
-Lemma cast32signed_correct:
- forall (d s: ireg) (k: code) (rs: regset) (m: mem),
- exists rs': regset,
- exec_straight ge (cast32signed d s ::g k) rs m k rs' m
- /\ Val.lessdef (Val.longofint (rs s)) (rs' d)
- /\ (forall r: preg, r <> PC -> r <> d -> rs' r = rs r).
-Proof.
- intros. unfold cast32signed. destruct (ireg_eq d s).
-- econstructor; split.
- + apply exec_straight_one. simpl. eauto with asmgen.
- + split.
- * rewrite e. Simpl.
- * intros. destruct r; Simpl.
-- econstructor; split.
- + apply exec_straight_one. simpl. eauto with asmgen.
- + split.
- * Simpl.
- * intros. destruct r; Simpl.
-Qed. *)
-
(* Translation of arithmetic operations *)
Ltac SimplEval H :=
@@ -1868,33 +1657,6 @@ Proof.
+ econstructor; econstructor; econstructor; econstructor; split.
apply exec_straight_opt_refl.
split; auto. simpl. subst imm. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
-(*
-+ econstructor; econstructor; econstructor; split.
- constructor. eapply exec_straight_two.
- simpl; eauto. simpl; eauto. auto. auto.
- split; intros; Simpl. destruct (rs base); auto; simpl. rewrite SF. simpl.
- rewrite Ptrofs.add_assoc. f_equal. f_equal.
- rewrite <- (Ptrofs.of_int64_to_int64 SF ofs). rewrite EQ.
- symmetry; auto with ptrofs.
-+ econstructor; econstructor; econstructor; split.
- constructor. eapply exec_straight_two.
- simpl; eauto. simpl; eauto. auto. auto.
- split; intros; Simpl. unfold eval_offset. destruct (rs base); auto; simpl. rewrite SF. simpl.
- rewrite Ptrofs.add_zero. subst imm. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
-(* 32 bits part, irrelevant for us
-- generalize (make_immed32_sound (Ptrofs.to_int ofs)); intros EQ.
- destruct (make_immed32 (Ptrofs.to_int ofs)).
-+ econstructor; econstructor; econstructor; split.
- apply exec_straight_opt_refl.
- split; auto. simpl. subst imm. rewrite Ptrofs.of_int_to_int by auto. auto.
-+ econstructor; econstructor; econstructor; split.
- constructor. eapply exec_straight_two.
- simpl; eauto. simpl; eauto. auto. auto.
- split; intros; Simpl. destruct (rs base); auto; simpl. rewrite SF. simpl.
- rewrite Ptrofs.add_assoc. f_equal. f_equal.
- rewrite <- (Ptrofs.of_int_to_int SF ofs). rewrite EQ.
- symmetry; auto with ptrofs.
-*)*)
Qed.
@@ -2555,8 +2317,8 @@ Proof.
{ eapply A2. }
{ apply exec_straight_one. simpl.
rewrite (C2 SP) by auto with asmgen. rewrite <- (sp_val _ _ rs1 AG1). simpl; rewrite LP'.
- rewrite FREE'. eauto. (* auto. *) } }
- * split. (* apply agree_nextinstr. *)apply agree_set_other; auto with asmgen.
+ rewrite FREE'. eauto. } }
+ * split. apply agree_set_other; auto with asmgen.
apply agree_change_sp with (Vptr stk soff).
apply agree_exten with rs; auto. intros; rewrite C2; auto with asmgen.
eapply parent_sp_def; eauto.
diff --git a/mppa_k1c/Asmexpand.ml b/mppa_k1c/Asmexpand.ml
index 65dee6c7..20d27951 100644
--- a/mppa_k1c/Asmexpand.ml
+++ b/mppa_k1c/Asmexpand.ml
@@ -465,14 +465,14 @@ let expand_builtin_inline name args res = let open Asmvliw in
emit (Pitouchl addr)
| "__builtin_k1_dzerol", [BA(IR addr)], _ ->
emit (Pdzerol addr)
- | "__builtin_k1_afaddd", [BA(IR addr); BA (IR incr_res)], BR(IR res) ->
+(*| "__builtin_k1_afaddd", [BA(IR addr); BA (IR incr_res)], BR(IR res) ->
(if res <> incr_res
then (emit (Pmv(res, incr_res)); emit Psemi));
emit (Pafaddd(addr, res))
| "__builtin_k1_afaddw", [BA(IR addr); BA (IR incr_res)], BR(IR res) ->
(if res <> incr_res
then (emit (Pmv(res, incr_res)); emit Psemi));
- emit (Pafaddw(addr, res))
+ emit (Pafaddw(addr, res)) *) (* see #157 *)
| "__builtin_alclrd", [BA(IR addr)], BR(IR res) ->
emit (Palclrd(res, addr))
| "__builtin_alclrw", [BA(IR addr)], BR(IR res) ->
diff --git a/mppa_k1c/Asmgenproof.v b/mppa_k1c/Asmgenproof.v
index e7e21a18..e0878c7d 100644
--- a/mppa_k1c/Asmgenproof.v
+++ b/mppa_k1c/Asmgenproof.v
@@ -10,7 +10,7 @@
(* *)
(* *********************************************************************)
-(** Correctness proof for RISC-V generation: main proof. *)
+(** Correctness proof for Asmgen *)
Require Import Coqlib Errors.
Require Import Integers Floats AST Linking.
@@ -89,4 +89,4 @@ Module Asmgenproof0.
Definition return_address_offset := return_address_offset.
-End Asmgenproof0. \ No newline at end of file
+End Asmgenproof0.
diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v
index c5b7db45..72584d2a 100644
--- a/mppa_k1c/Asmvliw.v
+++ b/mppa_k1c/Asmvliw.v
@@ -17,8 +17,6 @@
(** Abstract syntax and semantics for VLIW semantics of K1c assembly language. *)
-(* FIXME: develop/fix the comments in this file *)
-
Require Import Coqlib.
Require Import Maps.
Require Import AST.
@@ -45,8 +43,7 @@ Require Import Chunks.
this view induces our sequential semantics of bundles defined in [Asmblock].
*)
-(** General Purpose registers.
-*)
+(** General Purpose registers. *)
Inductive gpreg: Type :=
| GPR0: gpreg | GPR1: gpreg | GPR2: gpreg | GPR3: gpreg | GPR4: gpreg
@@ -152,9 +149,6 @@ Definition gpreg_o_expand (x : gpreg_o) : gpreg * gpreg * gpreg * gpreg :=
Lemma gpreg_o_eq : forall (x y : gpreg_o), {x=y} + {x<>y}.
Proof. decide equality. Defined.
-(** We model the following registers of the RISC-V architecture. *)
-
-(** basic register *)
Inductive preg: Type :=
| IR: gpreg -> preg (**r integer general purpose registers *)
| RA: preg
@@ -173,7 +167,7 @@ End PregEq.
Module Pregmap := EMap(PregEq).
-(** Conventional names for stack pointer ([SP]) and return address ([RA]). *)
+(** Conventional names for stack pointer ([SP]), return address ([RA]), frame pointer ([FP]) and other temporaries used *)
Notation "'SP'" := GPR12 (only parsing) : asm.
Notation "'FP'" := GPR17 (only parsing) : asm.
@@ -188,9 +182,7 @@ Inductive btest: Type :=
| BTdgez (**r Double Greater Than or Equal to Zero *)
| BTdlez (**r Double Less Than or Equal to Zero *)
| BTdgtz (**r Double Greater Than Zero *)
-(*| BTodd (**r Odd (LSB Set) *)
- | BTeven (**r Even (LSB Clear) *)
-*)| BTwnez (**r Word Not Equal to Zero *)
+ | BTwnez (**r Word Not Equal to Zero *)
| BTweqz (**r Word Equal to Zero *)
| BTwltz (**r Word Less Than Zero *)
| BTwgez (**r Word Greater Than or Equal to Zero *)
@@ -251,16 +243,7 @@ Definition offset : Type := ptrofs.
Definition label := positive.
-(* FIXME - rewrite the comment *)
-(** A note on immediates: there are various constraints on immediate
- operands to K1c instructions. We do not attempt to capture these
- restrictions in the abstract syntax nor in the semantics. The
- assembler will emit an error if immediate operands exceed the
- representable range. Of course, our K1c generator (file
- [Asmgen]) is careful to respect this range. *)
-
-(** Instructions to be expanded in control-flow
-*)
+(** Instructions to be expanded in control-flow *)
Inductive ex_instruction : Type :=
(* Pseudo-instructions *)
| Pbuiltin: external_function -> list (builtin_arg preg)
@@ -1355,7 +1338,7 @@ Definition store_chunk n :=
(** * basic instructions *)
-Definition parexec_basic_instr (bi: basic) (rsr rsw: regset) (mr mw: mem) :=
+Definition bstep (bi: basic) (rsr rsw: regset) (mr mw: mem) :=
match bi with
| PArith ai => Next (parexec_arith_instr ai rsr rsw) mw
@@ -1414,7 +1397,7 @@ Fixpoint parexec_wio_body (body: list basic) (rsr rsw: regset) (mr mw: mem) :=
match body with
| nil => Next rsw mw
| bi::body' =>
- match parexec_basic_instr bi rsr rsw mr mw with
+ match bstep bi rsr rsw mr mw with
| Next rsw mw => parexec_wio_body body' rsr rsw mr mw
| Stuck => Stuck
end
@@ -1550,12 +1533,12 @@ Definition incrPC size_b (rs: regset) :=
rs#PC <- (Val.offset_ptr rs#PC size_b).
(** parallel execution of the exit instruction of a bundle *)
-Definition parexec_exit (f: function) ext size_b (rsr rsw: regset) (mw: mem)
+Definition estep (f: function) ext size_b (rsr rsw: regset) (mw: mem)
:= parexec_control f ext (incrPC size_b rsr) rsw mw.
Definition parexec_wio f bdy ext size_b (rs: regset) (m: mem): outcome :=
match parexec_wio_body bdy rs rs m m with
- | Next rsw mw => parexec_exit f ext size_b rs rsw mw
+ | Next rsw mw => estep f ext size_b rs rsw mw
| Stuck => Stuck
end.
diff --git a/mppa_k1c/CBuiltins.ml b/mppa_k1c/CBuiltins.ml
index 2f80c90f..5fb69f62 100644
--- a/mppa_k1c/CBuiltins.ml
+++ b/mppa_k1c/CBuiltins.ml
@@ -43,8 +43,8 @@ let builtins = {
(* LSU Instructions *)
(* acswapd and acswapw done using headers and assembly *)
- "__builtin_k1_afaddd", (TInt(IULongLong, []), [TPtr(TVoid [], []); TInt(ILongLong, [])], false);
- "__builtin_k1_afaddw", (TInt(IUInt, []), [TPtr(TVoid [], []); TInt(IInt, [])], false);
+(* "__builtin_k1_afaddd", (TInt(IULongLong, []), [TPtr(TVoid [], []); TInt(ILongLong, [])], false);
+ "__builtin_k1_afaddw", (TInt(IUInt, []), [TPtr(TVoid [], []); TInt(IInt, [])], false); *) (* see #157 *)
"__builtin_k1_alclrd", (TInt(IULongLong, []), [TPtr(TVoid [], [])], false); (* DONE *)
"__builtin_k1_alclrw", (TInt(IUInt, []), [TPtr(TVoid [], [])], false); (* DONE *)
"__builtin_k1_dinval", (TVoid [], [], false); (* DONE *)
@@ -63,7 +63,6 @@ let builtins = {
"__builtin_k1_lwzu", (TInt(IUInt, []), [TPtr(TVoid [], [])], false);
(* ALU Instructions *)
- "__builtin_clzll", (TInt(IULongLong, []), [TInt(IULongLong, [])], false);
(* "__builtin_k1_addhp", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); *)
(* "__builtin_k1_adds", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); *)
(* "__builtin_k1_bwlu", (TInt(IUInt, []),
@@ -74,8 +73,8 @@ let builtins = {
(* "__builtin_k1_cbs", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
(* "__builtin_k1_cbsdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *)
(* "__builtin_k1_clz", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
- "__builtin_k1_clzw", (TInt(IInt, []), [TInt(IUInt, [])], false);
- "__builtin_k1_clzd", (TInt(ILongLong, []), [TInt(IULongLong, [])], false);
+ "__builtin_clzw", (TInt(IInt, []), [TInt(IUInt, [])], false);
+ "__builtin_clzll", (TInt(ILongLong, []), [TInt(IULongLong, [])], false);
(* "__builtin_k1_clzdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *)
(* "__builtin_k1_cmove", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, []); TInt(IInt, [])], false); *)
(* "__builtin_k1_ctz", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
diff --git a/mppa_k1c/Op.v b/mppa_k1c/Op.v
index 35fbb596..815d3958 100644
--- a/mppa_k1c/Op.v
+++ b/mppa_k1c/Op.v
@@ -100,23 +100,23 @@ Inductive operation : Type :=
| Onandimm (n: int) (**r [rd = ~(r1 & n)] *)
| Oor (**r [rd = r1 | r2] *)
| Oorimm (n: int) (**r [rd = r1 | n] *)
- | Onor (**r [rd = r1 | r2] *)
- | Onorimm (n: int) (**r [rd = r1 | n] *)
+ | Onor (**r [rd = ~(r1 | r2)] *)
+ | Onorimm (n: int) (**r [rd = ~(r1 | n)] *)
| Oxor (**r [rd = r1 ^ r2] *)
| Oxorimm (n: int) (**r [rd = r1 ^ n] *)
| Onxor (**r [rd = ~(r1 ^ r2)] *)
| Onxorimm (n: int) (**r [rd = ~(r1 ^ n)] *)
| Onot (**r [rd = ~r1] *)
- | Oandn (**r [rd = (~r1) ^ r2] *)
- | Oandnimm (n: int) (**r [rd = (~r1) ^ n] *)
+ | Oandn (**r [rd = (~r1) & r2] *)
+ | Oandnimm (n: int) (**r [rd = (~r1) & n] *)
| Oorn (**r [rd = (~r1) | r2] *)
| Oornimm (n: int) (**r [rd = (~r1) | n] *)
| Oshl (**r [rd = r1 << r2] *)
| Oshlimm (n: int) (**r [rd = r1 << n] *)
- | Oshr (**r [rd = r1 >> r2] (signed) *)
- | Oshrimm (n: int) (**r [rd = r1 >> n] (signed) *)
- | Oshru (**r [rd = r1 >> r2] (unsigned) *)
- | Oshruimm (n: int) (**r [rd = r1 >> n] (unsigned) *)
+ | Oshr (**r [rd = r1 >>s r2] (signed) *)
+ | Oshrimm (n: int) (**r [rd = r1 >>s n] (signed) *)
+ | Oshru (**r [rd = r1 >>u r2] (unsigned) *)
+ | Oshruimm (n: int) (**r [rd = r1 >>x n] (unsigned) *)
| Oshrximm (n: int) (**r [rd = r1 / 2^n] (signed) *)
| Ororimm (n: int) (**r rotate right immediate *)
| Omadd (**r [rd = rd + r1 * r2] *)
@@ -158,8 +158,8 @@ Inductive operation : Type :=
| Onxorl (**r [rd = ~(r1 ^ r2)] *)
| Onxorlimm (n: int64) (**r [rd = ~(r1 ^ n)] *)
| Onotl (**r [rd = ~r1] *)
- | Oandnl (**r [rd = (~r1) ^ r2] *)
- | Oandnlimm (n: int64) (**r [rd = (~r1) ^ n] *)
+ | Oandnl (**r [rd = (~r1) & r2] *)
+ | Oandnlimm (n: int64) (**r [rd = (~r1) & n] *)
| Oornl (**r [rd = (~r1) | r2] *)
| Oornlimm (n: int64) (**r [rd = (~r1) | n] *)
| Oshll (**r [rd = r1 << r2] *)
diff --git a/mppa_k1c/PostpassScheduling.v b/mppa_k1c/PostpassScheduling.v
index 15cb4c48..8b6de1e2 100644
--- a/mppa_k1c/PostpassScheduling.v
+++ b/mppa_k1c/PostpassScheduling.v
@@ -19,7 +19,7 @@ Local Open Scope error_monad_scope.
(** Oracle taking as input a basic block,
returns a schedule expressed as a list of bundles *)
-Axiom schedule: bblock -> list bblock.
+Axiom schedule: bblock -> (list (list basic)) * option control.
Extract Constant schedule => "PostpassSchedulingOracle.schedule".
@@ -208,7 +208,8 @@ Proof.
+ apply IHlbb in EQ. assumption.
Qed.
-
+Inductive is_concat : bblock -> list bblock -> Prop :=
+ | mk_is_concat: forall tbb lbb, concat_all lbb = OK tbb -> is_concat tbb lbb.
Definition verify_schedule (bb bb' : bblock) : res unit :=
match bblock_simub bb bb' with
@@ -333,10 +334,49 @@ Proof.
apply stick_header_concat_all. assumption.
Qed.
+Program Definition make_bblock_from_basics lb :=
+ match lb with
+ | nil => Error (msg "PostpassScheduling.make_bblock_from_basics")
+ | b :: lb => OK {| header := nil; body := b::lb; exit := None |}
+ end.
+
+Fixpoint schedule_to_bblocks_nocontrol llb :=
+ match llb with
+ | nil => OK nil
+ | lb :: llb => do bb <- make_bblock_from_basics lb;
+ do lbb <- schedule_to_bblocks_nocontrol llb;
+ OK (bb :: lbb)
+ end.
+
+Program Definition make_bblock_from_basics_and_control lb c :=
+ match c with
+ | PExpand (Pbuiltin _ _ _) => Error (msg "PostpassScheduling.make_bblock_from_basics_and_control")
+ | PCtlFlow cf => OK {| header := nil; body := lb; exit := Some (PCtlFlow cf) |}
+ end.
+Next Obligation.
+ apply wf_bblock_refl. constructor.
+ - right. discriminate.
+ - discriminate.
+Qed.
+
+Fixpoint schedule_to_bblocks_wcontrol llb c :=
+ match llb with
+ | nil => OK ((bblock_single_inst (PControl c)) :: nil)
+ | lb :: nil => do bb <- make_bblock_from_basics_and_control lb c; OK (bb :: nil)
+ | lb :: llb => do bb <- make_bblock_from_basics lb;
+ do lbb <- schedule_to_bblocks_wcontrol llb c;
+ OK (bb :: lbb)
+ end.
+Definition schedule_to_bblocks (llb: list (list basic)) (oc: option control) : res (list bblock) :=
+ match oc with
+ | None => schedule_to_bblocks_nocontrol llb
+ | Some c => schedule_to_bblocks_wcontrol llb c
+ end.
-Definition do_schedule (bb: bblock) : list bblock :=
- if (Z.eqb (size bb) 1) then bb::nil else schedule bb.
+Definition do_schedule (bb: bblock) : res (list bblock) :=
+ if (Z.eqb (size bb) 1) then OK (bb::nil)
+ else match (schedule bb) with (llb, oc) => schedule_to_bblocks llb oc end.
Definition verify_par_bblock (bb: bblock) : res unit :=
if (bblock_para_check bb) then OK tt else Error (msg "PostpassScheduling.verify_par_bblock").
@@ -350,7 +390,7 @@ Fixpoint verify_par (lbb: list bblock) :=
Definition verified_schedule_nob (bb : bblock) : res (list bblock) :=
let bb' := no_header bb in
let bb'' := Peephole.optimize_bblock bb' in
- let lbb := do_schedule bb'' in
+ do lbb <- do_schedule bb'';
do tbb <- concat_all lbb;
do sizecheck <- verify_size bb lbb;
do schedcheck <- verify_schedule bb' tbb;
@@ -363,7 +403,7 @@ Lemma verified_schedule_nob_size:
Proof.
intros. monadInv H. erewrite <- stick_header_code_size; eauto.
apply verify_size_size.
- destruct x0; try discriminate. assumption.
+ destruct x1; try discriminate. assumption.
Qed.
Lemma verified_schedule_nob_no_header_in_middle:
@@ -382,7 +422,7 @@ Lemma verified_schedule_nob_header:
/\ Forall (fun b => header b = nil) lbb.
Proof.
intros. split.
- - monadInv H. unfold stick_header_code in EQ2. destruct (hd_error _); try discriminate. inv EQ2.
+ - monadInv H. unfold stick_header_code in EQ3. destruct (hd_error _); try discriminate. inv EQ3.
simpl. reflexivity.
- apply verified_schedule_nob_no_header_in_middle in H. assumption.
Qed.
@@ -427,29 +467,29 @@ Qed.
Lemma verified_schedule_nob_correct:
forall ge f bb lbb,
verified_schedule_nob bb = OK lbb ->
- exists tbb,
- concat_all lbb = OK tbb
+ exists tbb,
+ is_concat tbb lbb
/\ bblock_simu ge f bb tbb.
Proof.
intros. monadInv H.
exploit stick_header_code_concat_all; eauto.
intros (tbb & CONC & STH).
- exists tbb. split; auto.
- rewrite verify_schedule_no_header in EQ0. erewrite stick_header_verify_schedule in EQ0; eauto.
- eapply bblock_simub_correct; eauto. unfold verify_schedule in EQ0.
+ exists tbb. split; auto. constructor; auto.
+ rewrite verify_schedule_no_header in EQ2. erewrite stick_header_verify_schedule in EQ2; eauto.
+ eapply bblock_simub_correct; eauto. unfold verify_schedule in EQ2.
destruct (bblock_simub _ _); auto; try discriminate.
Qed.
Theorem verified_schedule_correct:
forall ge f bb lbb,
verified_schedule bb = OK lbb ->
- exists tbb,
- concat_all lbb = OK tbb
+ exists tbb,
+ is_concat tbb lbb
/\ bblock_simu ge f bb tbb.
Proof.
intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i.
all: try (eapply verified_schedule_nob_correct; eauto; fail).
- inv H. eexists. split; simpl; auto. constructor; auto.
+ inv H. eexists. split; simpl; auto. constructor; auto. simpl; auto. constructor; auto.
Qed.
Lemma verified_schedule_builtin_idem:
diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml
index 895f9f40..40f1d9c7 100644
--- a/mppa_k1c/PostpassSchedulingOracle.ml
+++ b/mppa_k1c/PostpassSchedulingOracle.ml
@@ -38,6 +38,8 @@ type ab_inst_rec = {
inst: real_instruction;
write_locs : location list;
read_locs : location list;
+ read_at_id : location list; (* Must be contained in read_locs *)
+ read_at_e1 : location list; (* idem *)
imm : immediate option;
is_control : bool;
}
@@ -232,25 +234,40 @@ let jl_real = Goto
let cb_real = Cb
let cbu_real = Cb
-let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false }
+let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
-let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false }
+let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
-let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false}
+let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
-let arith_arri32_rec i rd rs imm32 = { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false }
+let arith_arri32_rec i rd rs imm32 =
+ let rae1 = match i with Pmaddiw -> [Reg rd] | _ -> []
+ in { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false;
+ read_at_id = [] ; read_at_e1 = rae1 }
-let arith_arri64_rec i rd rs imm64 = { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false }
+let arith_arri64_rec i rd rs imm64 =
+ let rae1 = match i with Pmaddil -> [Reg rd] | _ -> []
+ in { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false;
+ read_at_id = []; read_at_e1 = rae1 }
-let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false}
+let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
-let arith_arrr_rec i rd rs1 rs2 = { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false}
+let arith_arrr_rec i rd rs1 rs2 =
+ let rae1 = match i with Pmaddl | Pmaddw | Pmsubl | Pmsubw -> [Reg rd] | _ -> []
+ in { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = rae1 }
-let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false}
+let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
let arith_r_rec i rd = match i with
(* For Ploadsymbol, writing the highest integer since we do not know how many bits does a symbol have *)
- | Ploadsymbol (id, ofs) -> { inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed); is_control = false}
+ | Ploadsymbol (id, ofs) -> { inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed);
+ is_control = false; read_at_id = []; read_at_e1 = [] }
let arith_rec i =
match i with
@@ -262,45 +279,54 @@ let arith_rec i =
| PArithARRI32 (i, rd, rs, imm32) -> arith_arri32_rec i (IR rd) (IR rs) (Some (I32 imm32))
| PArithARRI64 (i, rd, rs, imm64) -> arith_arri64_rec i (IR rd) (IR rs) (Some (I64 imm64))
| PArithARRR (i, rd, rs1, rs2) -> arith_arrr_rec i (IR rd) (IR rs1) (IR rs2)
- | PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false}
- | PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false}
+ | PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
+ | PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
| PArithRF32 (rd, f) -> { inst = arith_rf32_real; write_locs = [Reg (IR rd)]; read_locs = [];
- imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false}
+ imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}
| PArithRF64 (rd, f) -> { inst = arith_rf64_real; write_locs = [Reg (IR rd)]; read_locs = [];
- imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false}
+ imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}
| PArithRR (i, rd, rs) -> arith_rr_rec i (IR rd) (IR rs)
| PArithR (i, rd) -> arith_r_rec i (IR rd)
let load_rec i = match i with
| PLoadRRO (i, rs1, rs2, imm) ->
- { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false}
+ { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
| PLoadQRRO(rs, ra, imm) ->
let (rs0, rs1) = gpreg_q_expand rs in
- { inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false}
+ { inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
| PLoadORRO(rs, ra, imm) ->
let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
- { inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false}
+ { inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)];
+ imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []}
| PLoadRRR (i, rs1, rs2, rs3) | PLoadRRRXS (i, rs1, rs2, rs3) ->
- { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false}
+ { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
let store_rec i = match i with
- | PStoreRRO (i, rs1, rs2, imm) ->
- { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs1); Reg (IR rs2)]; imm = (Some (Off imm))
- ; is_control = false}
+ | PStoreRRO (i, rs, ra, imm) ->
+ { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra)]; imm = (Some (Off imm));
+ read_at_id = []; read_at_e1 = [Reg (IR rs)] ; is_control = false}
| PStoreQRRO (rs, ra, imm) ->
let (rs0, rs1) = gpreg_q_expand rs in
- { inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm))
- ; is_control = false}
+ { inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm));
+ read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1)] ; is_control = false}
| PStoreORRO (rs, ra, imm) ->
let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
- { inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)]; imm = (Some (Off imm))
- ; is_control = false}
- | PStoreRRR (i, rs1, rs2, rs3) | PStoreRRRXS (i, rs1, rs2, rs3) -> { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; imm = None
- ; is_control = false}
+ { inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)];
+ imm = (Some (Off imm)); read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; is_control = false}
+ | PStoreRRR (i, rs, ra1, ra2) | PStoreRRRXS (i, rs, ra1, ra2) ->
+ { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra1); Reg (IR ra2)]; imm = None;
+ read_at_id = []; read_at_e1 = [Reg (IR rs)]; is_control = false}
-let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false }
+let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false;
+ read_at_id = []; read_at_e1 = [] }
-let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false }
+let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false;
+ read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
let basic_rec i =
match i with
@@ -311,20 +337,24 @@ let basic_rec i =
| Pfreeframe (_, _) -> raise OpaqueInstruction
| Pget (rd, rs) -> get_rec rd rs
| Pset (rd, rs) -> set_rec rd rs
- | Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false}
+ | Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false; read_at_id = []; read_at_e1 = []}
let expand_rec = function
| Pbuiltin _ -> raise OpaqueInstruction
let ctl_flow_rec = function
- | Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true}
- | Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true}
- | Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true}
- | Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true}
- | Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true}
- | Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true}
- | Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true}
- | Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true}
+ | Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true; read_at_id = [Reg RA]; read_at_e1 = []}
+ | Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+ | Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true;
+ read_at_id = [Reg (IR r)]; read_at_e1 = [] }
+ | Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+ | Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true;
+ read_at_id = [Reg (IR r)]; read_at_e1 = [] }
+ | Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+ | Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;
+ read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
+ | Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;
+ read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
| Pjumptable (r, _) -> raise OpaqueInstruction (* { inst = "Pjumptable"; write_locs = [Reg (IR GPR62); Reg (IR GPR63)]; read_locs = [Reg (IR r)]; imm = None ; is_control = true} *)
let control_rec i =
@@ -350,6 +380,8 @@ let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit)
type inst_info = {
write_locs : location list;
read_locs : location list;
+ reads_at_id : bool;
+ reads_at_e1 : bool;
is_control : bool;
usage: int array; (* resources consumed by the instruction *)
latency: int;
@@ -582,6 +614,16 @@ let rec_to_usage r =
| Fnarrowdw -> alu_full
| Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw -> mau
+let inst_info_to_dlatency i =
+ begin
+ assert (not (i.reads_at_id && i.reads_at_e1));
+ match i.reads_at_id with
+ | true -> +1
+ | false -> (match i.reads_at_e1 with
+ | true -> -1
+ | false -> 0)
+ end
+
let real_inst_to_latency = function
| Nop -> 0 (* Only goes through ID *)
| Addw | Andw | Compw | Orw | Sbfw | Sbfxw | Sraw | Srsw | Srlw | Sllw | Xorw
@@ -601,10 +643,17 @@ let real_inst_to_latency = function
| Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1
| Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw -> 4
+let rec empty_inter la = function
+ | [] -> true
+ | b::lb -> if (List.mem b la) then false else empty_inter la lb
+
let rec_to_info r : inst_info =
let usage = rec_to_usage r
and latency = real_inst_to_latency r.inst
- in { write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control }
+ and reads_at_id = not (empty_inter r.read_locs r.read_at_id)
+ and reads_at_e1 = not (empty_inter r.read_locs r.read_at_e1)
+ in { write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control;
+ reads_at_id = reads_at_id; reads_at_e1 = reads_at_e1 }
let instruction_infos bb = List.map rec_to_info (instruction_recs bb)
@@ -651,6 +700,11 @@ let rec get_accesses hashloc (ll: location list) = match ll with
| [] -> []
| loc :: llocs -> (find_in_hash hashloc loc) @ (get_accesses hashloc llocs)
+let compute_latency (ifrom: inst_info) (ito: inst_info) =
+ let dlat = inst_info_to_dlatency ito
+ in let lat = ifrom.latency + dlat
+ in assert (lat >= 0); if (lat == 0) then 1 else lat
+
let latency_constraints bb =
let written = LocHash.create 70
and read = LocHash.create 70
@@ -662,8 +716,10 @@ let latency_constraints bb =
and waw = get_accesses written i.write_locs
and war = get_accesses read i.write_locs
in begin
- List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = (List.nth instr_infos i).latency} :: !constraints) raw;
- List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = (List.nth instr_infos i).latency} :: !constraints) waw;
+ List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;
+ latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) raw;
+ List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;
+ latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) waw;
List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = 0} :: !constraints) war;
if i.is_control then List.iter (fun n -> constraints := {instr_from = n; instr_to = !count; latency = 0} :: !constraints) (intlist !count);
(* Updating "read" and "written" hashmaps *)
@@ -905,10 +961,18 @@ let smart_schedule bb =
in bundles @ (f lbb)
in f lbb
-(** Called schedule function from Coq *)
-
-let schedule bb =
+let bblock_to_bundles bb =
if debug then (eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"; print_bb stderr bb);
(* print_problem (build_problem bb); *)
if Compopts.optim_postpass () then smart_schedule bb else dumb_schedule bb
+(** To deal with the Coq Axiom schedule : bblock -> (list (list basic)) * option control *)
+
+let rec bundles_to_coq_schedule = function
+ | [] -> ([], None)
+ | bb :: [] -> ([bb.body], bb.exit)
+ | bb :: lbb -> let (llb, oc) = bundles_to_coq_schedule lbb in (bb.body :: llb, oc)
+
+(** Called schedule function from Coq *)
+
+let schedule bb = let toto = bundles_to_coq_schedule @@ bblock_to_bundles bb in toto
diff --git a/mppa_k1c/PostpassSchedulingproof.v b/mppa_k1c/PostpassSchedulingproof.v
index 5d4fc881..21af276b 100644
--- a/mppa_k1c/PostpassSchedulingproof.v
+++ b/mppa_k1c/PostpassSchedulingproof.v
@@ -240,7 +240,7 @@ Lemma exec_basic_instr_pc_var:
exec_basic_instr ge i rs m = Next rs' m' ->
exec_basic_instr ge i (rs # PC <- v) m = Next (rs' # PC <- v) m'.
Proof.
- intros. unfold exec_basic_instr in *. unfold parexec_basic_instr in *. destruct i.
+ intros. unfold exec_basic_instr in *. unfold bstep in *. destruct i.
- unfold exec_arith_instr in *. destruct i; destruct i.
all: try (exploreInst; inv H; apply next_eq; auto;
apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate).
@@ -681,7 +681,7 @@ Lemma transf_exec_basic_instr:
forall i rs m, exec_basic_instr ge i rs m = exec_basic_instr tge i rs m.
Proof.
intros. pose symbol_address_preserved.
- unfold exec_basic_instr. unfold parexec_basic_instr. exploreInst; simpl; auto; try congruence.
+ unfold exec_basic_instr. unfold bstep. exploreInst; simpl; auto; try congruence.
unfold parexec_arith_instr; unfold arith_eval_r; exploreInst; simpl; auto; try congruence.
Qed.
@@ -736,7 +736,7 @@ Proof.
induction 1; intros; inv MS.
- exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
exploit transf_find_bblock; eauto. intros (lbb & VES & c & TAIL).
- exploit verified_schedule_correct; eauto. intros (tbb & CONC & BBEQ).
+ exploit verified_schedule_correct; eauto. intros (tbb & CONC & BBEQ). inv CONC. rename H3 into CONC.
assert (NOOV: size_blocks x.(fn_blocks) <= Ptrofs.max_unsigned).
eapply transf_function_no_overflow; eauto.
@@ -798,7 +798,7 @@ Lemma verified_schedule_nob_checks_alls_bundles bb lb bundle:
List.In bundle lb -> verify_par_bblock bundle = OK tt.
Proof.
unfold verified_schedule_nob. intros H;
- monadInv H. destruct x3.
+ monadInv H. destruct x4.
intros; eapply verified_par_checks_alls_bundles; eauto.
Qed.
diff --git a/mppa_k1c/PrintOp.ml b/mppa_k1c/PrintOp.ml
index 575fa94f..67f87000 100644
--- a/mppa_k1c/PrintOp.ml
+++ b/mppa_k1c/PrintOp.ml
@@ -72,7 +72,7 @@ let int_of_s14 = function
| SHIFT3 -> 3
| SHIFT4 -> 4
-let print_operation reg pp = function
+let print_operation reg pp op = match op with
| Omove, [r1] -> reg pp r1
| Ointconst n, [] -> fprintf pp "%ld" (camlint_of_coqint n)
| Olongconst n, [] -> fprintf pp "%LdL" (camlint64_of_coqint n)
@@ -86,9 +86,15 @@ let print_operation reg pp = function
| Ocast16signed, [r1] -> fprintf pp "int16signed(%a)" reg r1
| Oadd, [r1;r2] -> fprintf pp "%a + %a" reg r1 reg r2
| Oaddimm n, [r1] -> fprintf pp "%a + %ld" reg r1 (camlint_of_coqint n)
+ | Oaddx(s14), [r1; r2] -> fprintf pp "(%a << %d) + %a" reg r1 (int_of_s14 s14) reg r2
+ | Oaddximm(s14, imm), [r1] -> fprintf pp "(%a << %d) + %ld" reg r1 (int_of_s14 s14) (camlint_of_coqint imm)
| Oneg, [r1] -> fprintf pp "-(%a)" reg r1
| Osub, [r1;r2] -> fprintf pp "%a - %a" reg r1 reg r2
+ | Orevsubimm(imm), [r1] -> fprintf pp "%ld - %a" (camlint_of_coqint imm) reg r1
+ | Orevsubx(s14), [r1; r2] -> fprintf pp "%a - (%a << %d)" reg r2 reg r1 (int_of_s14 s14)
+ | Orevsubximm(s14, imm), [r1] -> fprintf pp "%ld - (%a << %d)" (camlint_of_coqint imm) reg r1 (int_of_s14 s14)
| Omul, [r1;r2] -> fprintf pp "%a * %a" reg r1 reg r2
+ | Omulimm(imm), [r1] -> fprintf pp "%a * %ld" reg r1 (camlint_of_coqint imm)
| Omulhs, [r1;r2] -> fprintf pp "%a *hs %a" reg r1 reg r2
| Omulhu, [r1;r2] -> fprintf pp "%a *hu %a" reg r1 reg r2
| Odiv, [r1;r2] -> fprintf pp "%a /s %a" reg r1 reg r2
@@ -101,6 +107,13 @@ let print_operation reg pp = function
| Oorimm n, [r1] -> fprintf pp "%a | %ld" reg r1 (camlint_of_coqint n)
| Oxor, [r1;r2] -> fprintf pp "%a ^ %a" reg r1 reg r2
| Oxorimm n, [r1] -> fprintf pp "%a ^ %ld" reg r1 (camlint_of_coqint n)
+ | Onxor, [r1;r2] -> fprintf pp "~(%a ^ %a)" reg r1 reg r2
+ | Onxorimm n, [r1] -> fprintf pp "~(%a ^ %ld)" reg r1 (camlint_of_coqint n)
+ | Onot, [r1] -> fprintf pp "~%a" reg r1
+ | Oandn, [r1; r2] -> fprintf pp "(~%a) & %a" reg r1 reg r2
+ | Oandnimm n, [r1] -> fprintf pp "(~%a) & %ld" reg r1 (camlint_of_coqint n)
+ | Oorn, [r1;r2] -> fprintf pp "(~%a) | %a" reg r1 reg r2
+ | Oornimm n, [r1] -> fprintf pp "(~%a) | %ld" reg r1 (camlint_of_coqint n)
| Oshl, [r1;r2] -> fprintf pp "%a << %a" reg r1 reg r2
| Oshlimm n, [r1] -> fprintf pp "%a << %ld" reg r1 (camlint_of_coqint n)
| Oshr, [r1;r2] -> fprintf pp "%a >>s %a" reg r1 reg r2
@@ -108,6 +121,10 @@ let print_operation reg pp = function
| Oshru, [r1;r2] -> fprintf pp "%a >>u %a" reg r1 reg r2
| Oshruimm n, [r1] -> fprintf pp "%a >>u %ld" reg r1 (camlint_of_coqint n)
| Oshrximm n, [r1] -> fprintf pp "%a >>x %ld" reg r1 (camlint_of_coqint n)
+ | Ororimm n, [r1] -> fprintf pp "(%a ror %ld)" reg r1 (camlint_of_coqint n)
+ | Omadd, [r1; r2; r3] -> fprintf pp "%a + %a * %a" reg r1 reg r2 reg r3
+ | Omaddimm imm, [r1; r2] -> fprintf pp "%a + %a * %ld" reg r1 reg r2 (camlint_of_coqint imm)
+ | Omsub, [r1; r2; r3] -> fprintf pp "%a - %a * %a" reg r1 reg r2 reg r3
| Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
| Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
@@ -116,9 +133,15 @@ let print_operation reg pp = function
| Ocast32unsigned, [r1] -> fprintf pp "long32unsigned(%a)" reg r1
| Oaddl, [r1;r2] -> fprintf pp "%a +l %a" reg r1 reg r2
| Oaddlimm n, [r1] -> fprintf pp "%a +l %Ld" reg r1 (camlint64_of_coqint n)
+ | Oaddxl(s14), [r1; r2] -> fprintf pp "(%a <<l %d) +l %a" reg r1 (int_of_s14 s14) reg r2
+ | Oaddxlimm(s14, imm), [r1] -> fprintf pp "(%a <<l %d) +l %Ld" reg r1 (int_of_s14 s14) (camlint64_of_coqint imm)
+ | Orevsublimm(imm), [r1] -> fprintf pp "%Ld -l %a" (camlint64_of_coqint imm) reg r1
+ | Orevsubxl(s14), [r1; r2] -> fprintf pp "%a -l (%a <<l %d)" reg r2 reg r1 (int_of_s14 s14)
+ | Orevsubxlimm(s14, imm), [r1] -> fprintf pp "%Ld -l (%a <<l %d)" (camlint64_of_coqint imm) reg r1 (int_of_s14 s14)
| Onegl, [r1] -> fprintf pp "-l (%a)" reg r1
| Osubl, [r1;r2] -> fprintf pp "%a -l %a" reg r1 reg r2
| Omull, [r1;r2] -> fprintf pp "%a *l %a" reg r1 reg r2
+ | Omullimm(imm), [r1] -> fprintf pp "%a *l %Ld" reg r1 (camlint64_of_coqint imm)
| Omullhs, [r1;r2] -> fprintf pp "%a *lhs %a" reg r1 reg r2
| Omullhu, [r1;r2] -> fprintf pp "%a *lhu %a" reg r1 reg r2
| Odivl, [r1;r2] -> fprintf pp "%a /ls %a" reg r1 reg r2
@@ -129,8 +152,17 @@ let print_operation reg pp = function
| Oandlimm n, [r1] -> fprintf pp "%a &l %Ld" reg r1 (camlint64_of_coqint n)
| Oorl, [r1;r2] -> fprintf pp "%a |l %a" reg r1 reg r2
| Oorlimm n, [r1] -> fprintf pp "%a |l %Ld" reg r1 (camlint64_of_coqint n)
+ | Onorl, [r1; r2] -> fprintf pp "~(%a |l %a)" reg r1 reg r2
+ | Onorlimm n, [r1] -> fprintf pp "~(%a |l %Ld)" reg r1 (camlint64_of_coqint n)
| Oxorl, [r1;r2] -> fprintf pp "%a ^l %a" reg r1 reg r2
| Oxorlimm n, [r1] -> fprintf pp "%a ^l %Ld" reg r1 (camlint64_of_coqint n)
+ | Onxorl, [r1;r2] -> fprintf pp "~(%a ^l %a)" reg r1 reg r2
+ | Onxorlimm n, [r1] -> fprintf pp "~(%a ^l %Ld)" reg r1 (camlint64_of_coqint n)
+ | Onotl, [r1] -> fprintf pp "~%a" reg r1
+ | Oandnl, [r1;r2] -> fprintf pp "(~%a) &l %a" reg r1 reg r2
+ | Oandnlimm n, [r1] -> fprintf pp "(~%a) &l %Ld" reg r1 (camlint64_of_coqint n)
+ | Oornl, [r1;r2] -> fprintf pp "(~%a) |l %a" reg r1 reg r2
+ | Oornlimm n, [r1;r2] -> fprintf pp "(~%a) |l %Ld" reg r1 (camlint64_of_coqint n)
| Oshll, [r1;r2] -> fprintf pp "%a <<l %a" reg r1 reg r2
| Oshllimm n, [r1] -> fprintf pp "%a <<l %Ld" reg r1 (camlint64_of_coqint n)
| Oshrl, [r1;r2] -> fprintf pp "%a >>ls %a" reg r1 reg r2
@@ -138,6 +170,9 @@ let print_operation reg pp = function
| Oshrlu, [r1;r2] -> fprintf pp "%a >>lu %a" reg r1 reg r2
| Oshrluimm n, [r1] -> fprintf pp "%a >>lu %ld" reg r1 (camlint_of_coqint n)
| Oshrxlimm n, [r1] -> fprintf pp "%a >>lx %ld" reg r1 (camlint_of_coqint n)
+ | Omaddl, [r1; r2; r3] -> fprintf pp "%a +l %a *l %a" reg r1 reg r2 reg r3
+ | Omaddlimm imm, [r1; r2] -> fprintf pp "%a +l %a *l %Ld" reg r1 reg r2 (camlint64_of_coqint imm)
+ | Omsubl, [r1; r2; r3] -> fprintf pp "%a -l %a *l %a" reg r1 reg r2 reg r3
| Onegf, [r1] -> fprintf pp "negf(%a)" reg r1
| Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1
@@ -155,14 +190,14 @@ let print_operation reg pp = function
| Ofloatofsingle, [r1] -> fprintf pp "floatofsingle(%a)" reg r1
| Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
| Ointuoffloat, [r1] -> fprintf pp "intuoffloat(%a)" reg r1
- | Olongoffloat, [r1] -> fprintf pp "longoffloat(%a)" reg r1
- | Olonguoffloat, [r1] -> fprintf pp "longuoffloat(%a)" reg r1
- | Ofloatoflong, [r1] -> fprintf pp "floatoflong(%a)" reg r1
- | Ofloatoflongu, [r1] -> fprintf pp "floatoflongu(%a)" reg r1
| Ointofsingle, [r1] -> fprintf pp "intofsingle(%a)" reg r1
| Ointuofsingle, [r1] -> fprintf pp "intuofsingle(%a)" reg r1
| Osingleofint, [r1] -> fprintf pp "singleofint(%a)" reg r1
| Osingleofintu, [r1] -> fprintf pp "singleofintu(%a)" reg r1
+ | Olongoffloat, [r1] -> fprintf pp "longoffloat(%a)" reg r1
+ | Olonguoffloat, [r1] -> fprintf pp "longuoffloat(%a)" reg r1
+ | Ofloatoflong, [r1] -> fprintf pp "floatoflong(%a)" reg r1
+ | Ofloatoflongu, [r1] -> fprintf pp "floatoflongu(%a)" reg r1
| Olongofsingle, [r1] -> fprintf pp "longofsingle(%a)" reg r1
| Olonguofsingle, [r1] -> fprintf pp "longuofsingle(%a)" reg r1
| Osingleoflong, [r1] -> fprintf pp "singleoflong(%a)" reg r1
@@ -184,29 +219,12 @@ let print_operation reg pp = function
| Osellimm(cond0, imm), [r1; rc] ->
print_condition0 reg pp cond0 rc;
fprintf pp " ? %a :l %Ld" reg r1 (camlint64_of_coqint imm)
- | Oaddx(s14), [r1; r2] -> fprintf pp "(%a << %d) + %a" reg r1 (int_of_s14 s14) reg r2
- | Oaddximm(s14, imm), [r1] -> fprintf pp "(%a << %d) + %ld" reg r1 (int_of_s14 s14) (camlint_of_coqint imm)
- | Oaddxl(s14), [r1; r2] -> fprintf pp "(%a <<l %d) +l %a" reg r1 (int_of_s14 s14) reg r2
- | Oaddxlimm(s14, imm), [r1] -> fprintf pp "(%a <<l %d) +l %Ld" reg r1 (int_of_s14 s14) (camlint64_of_coqint imm)
- | Orevsubimm(imm), [r1] -> fprintf pp "%ld - %a" (camlint_of_coqint imm) reg r1
- | Orevsubximm(s14, imm), [r1] -> fprintf pp "%ld - (%a << %d)" (camlint_of_coqint imm) reg r1 (int_of_s14 s14)
- | Orevsublimm(imm), [r1] -> fprintf pp "%Ld -l %a" (camlint64_of_coqint imm) reg r1
- | Orevsubxlimm(s14, imm), [r1] -> fprintf pp "%Ld -l (%a <<l %d)" (camlint64_of_coqint imm) reg r1 (int_of_s14 s14)
- | Orevsubx(s14), [r1; r2] -> fprintf pp "%a - (%a << %d)" reg r2 reg r1 (int_of_s14 s14)
- | Orevsubxl(s14), [r1; r2] -> fprintf pp "%a -l (%a <<l %d)" reg r2 reg r1 (int_of_s14 s14)
- | Omulimm(imm), [r1] -> fprintf pp "%a * %ld" reg r1 (camlint_of_coqint imm)
- | Omullimm(imm), [r1] -> fprintf pp "%a *l %Ld" reg r1 (camlint64_of_coqint imm)
- | Omadd, [r1; r2; r3] -> fprintf pp "%a + %a * %a" reg r1 reg r2 reg r3
- | Omaddl, [r1; r2; r3] -> fprintf pp "%a +l %a *l %a" reg r1 reg r2 reg r3
- | (Omaddimm imm), [r1; r2] -> fprintf pp "%a + %a * %ld" reg r1 reg r2 (camlint_of_coqint imm)
- | (Omaddlimm imm), [r1; r2] -> fprintf pp "%a +l %a *l %Ld" reg r1 reg r2 (camlint64_of_coqint imm)
- | Omsub, [r1; r2; r3] -> fprintf pp "%a - %a * %a" reg r1 reg r2 reg r3
- | Omsubl, [r1; r2; r3] -> fprintf pp "%a -l %a *l %a" reg r1 reg r2 reg r3
| _, _ -> fprintf pp "<bad operator>"
let print_addressing reg pp = function
- | Aindexed n, [r1] -> fprintf pp "%a + %Ld" reg r1 (camlint64_of_ptrofs n)
+ | Aindexed2XS scale, [r1;r2] -> fprintf pp "%a + (%a << %ld)" reg r1 reg r2 (camlint_of_coqint scale)
| Aindexed2, [r1;r2] -> fprintf pp "%a + %a" reg r1 reg r2
+ | Aindexed n, [r1] -> fprintf pp "%a + %Ld" reg r1 (camlint64_of_ptrofs n)
| Aglobal(id, ofs), [] ->
fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs)
| Ainstack ofs, [] -> fprintf pp "stack(%Ld)" (camlint64_of_ptrofs ofs)
diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml
index 674695d9..c9822e13 100644
--- a/mppa_k1c/TargetPrinter.ml
+++ b/mppa_k1c/TargetPrinter.ml
@@ -399,10 +399,10 @@ module Target (*: TARGET*) =
fprintf oc " itouchl 0[%a]\n" ireg addr
| Pdzerol addr ->
fprintf oc " dzerol 0[%a]\n" ireg addr
- | Pafaddd(addr, incr_res) ->
- fprintf oc " afaddd 0[%a] = %a\n" ireg addr ireg incr_res
+(* | Pafaddd(addr, incr_res) ->
+ fprintfoc " afaddd 0[%a] = %a\n" ireg addr ireg incr_res
| Pafaddw(addr, incr_res) ->
- fprintf oc " afaddw 0[%a] = %a\n" ireg addr ireg incr_res
+ fprintfoc " afaddw 0[%a] = %a\n" ireg addr ireg incr_res *) (* see #157 *)
| Palclrd(res, addr) ->
fprintf oc " alclrd %a = 0[%a]\n" ireg res ireg addr
| Palclrw(res, addr) ->
diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h
index 8012a013..21541145 100644
--- a/test/monniaux/cycles.h
+++ b/test/monniaux/cycles.h
@@ -46,5 +46,5 @@ static inline cycle_t get_cycle(void) { return 0; }
#ifdef MAX_MEASURES
#define TIMEINIT(i) {_last_stop[i] = get_cycle();}
#define TIMESTOP(i) {cycle_t cur = get_cycle(); _total_cycles[i] += cur - _last_stop[i]; _last_stop[i] = cur;}
- #define TIMEPRINT(n) { for (int i = 0; i <= n; i++) printf("(%d) cycles: %" PRIu64 "\n", i, _total_cycles[i]); }
+ #define TIMEPRINT(n) { for (int i = 0; i <= n; i++) printf("%d cycles: %" PRIu64 "\n", i, _total_cycles[i]); }
#endif
diff --git a/test/monniaux/rules.mk b/test/monniaux/rules.mk
index 4dfeac3b..9d05b4d6 100644
--- a/test/monniaux/rules.mk
+++ b/test/monniaux/rules.mk
@@ -12,6 +12,7 @@ CLOCK=../clock
# Maximum amount of time measures (see cycles.h)
MAX_MEASURES=10
+MEASURES?=time
# Flags common to both compilers, then to gcc, then to ccomp
ALL_CFLAGS+=-Wall -D__K1C_COS__ -DMAX_MEASURES=$(MAX_MEASURES)
@@ -28,12 +29,12 @@ EXECUTE_CYCLES?=k1-cluster --syscall=libstd_scalls.so --cycle-based --
# You can define up to GCC4FLAGS and CCOMP4FLAGS
GCC0FLAGS?=
-GCC1FLAGS?=$(ALL_GCCFLAGS) -O1
+GCC1FLAGS?=$(ALL_GCCFLAGS) -O1 -g
GCC2FLAGS?=$(ALL_GCCFLAGS) -O2
GCC3FLAGS?=$(ALL_GCCFLAGS) -O3
GCC4FLAGS?=
CCOMP0FLAGS?=
-CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -O1
+CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -O1 -g
CCOMP2FLAGS?=$(ALL_CCOMPFLAGS)
CCOMP3FLAGS?=
CCOMP4FLAGS?=
@@ -131,7 +132,7 @@ endif
measures.csv: $(OUTFILES)
@echo $(FIRSTLINE) > $@
- @for i in "$(MEASURES)"; do\
+ @for i in $(MEASURES); do\
first=$$(grep "$$i cycles" $(firstword $(OUTFILES)));\
if test ! -z "$$first"; then\
if [ "$$i" != "time" ]; then\
diff --git a/test/monniaux/run_benches.sh b/test/monniaux/run_benches.sh
index a562a117..5f9f22cb 100755
--- a/test/monniaux/run_benches.sh
+++ b/test/monniaux/run_benches.sh
@@ -6,7 +6,7 @@ for bench in $benches; do
echo "(cd $bench && make -j5 run)" >> commands.txt
done
-cat commands.txt | xargs -n1 -I{} -P5 bash -c '{}'
+cat commands.txt | xargs -n1 -I{} -P4 bash -c '{}'
##
# Gather all the CSV files
diff --git a/test/mppa/asm_coverage b/test/mppa/asm_coverage
deleted file mode 160000
-Subproject a9c62b61552a9e9fd0ebf43df5ee0d5b88bb094
diff --git a/test/mppa/coverage.sh b/test/mppa/coverage.sh
index 0a057ff9..42ed4182 100644..100755
--- a/test/mppa/coverage.sh
+++ b/test/mppa/coverage.sh
@@ -1,17 +1,24 @@
-asmdir=$1
+#!/bin/bash
+
+printer=../../mppa_k1c/TargetPrinter.ml
+asmdir=instr/asm/
to_cover_raw=/tmp/to_cover_raw
to_cover=/tmp/to_cover
covered_raw=/tmp/covered_raw
covered=/tmp/covered
-sed -n "s/^.*fprintf oc \" \(.*\) .*/\1/p" ../../mppa_k1c/TargetPrinter.ml > $to_cover_raw
-sed -n "s/^.*fprintf oc \" \(.*\)\\n.*/\1/p" ../../mppa_k1c/TargetPrinter.ml >> $to_cover_raw
-python2.7 coverage_helper.py $to_cover_raw > $to_cover
+# Stop at any error
+set -e
+# Pipes do not mask errors
+set -o pipefail
+
+sed -n "s/^.*fprintf\s\+oc\s*\"\s*\([a-z][^[:space:]]*\)\s.*/\1/p" $printer > $to_cover_raw
+python2.7 coverage_helper.py $to_cover_raw | sort -u > $to_cover
rm -f $covered_raw
-for asm in $(ls $asmdir/*.s); do
- bash asm_coverage/asm-coverage.sh $asm >> $covered_raw
+for asm in $(ls $asmdir/*.ccomp.s); do
+ grep -v ":" $asm | sed -n "s/^\s*\([a-z][a-z0-9.]*\).*/\1/p" | sort -u >> $covered_raw
done
-python2.7 coverage_helper.py $covered_raw > $covered
+python2.7 coverage_helper.py $covered_raw | sort -u > $covered
vimdiff $to_cover $covered
diff --git a/test/mppa/coverage_helper.py b/test/mppa/coverage_helper.py
index b086aca9..cf7a84c9 100644
--- a/test/mppa/coverage_helper.py
+++ b/test/mppa/coverage_helper.py
@@ -1,35 +1,45 @@
import fileinput
+import sys
-occurs = {}
+all_loads_stores = "lbs lbz lhz lo lq ld lhs lws sb sd sh so sq sw".split(" ")
+
+all_bconds = "wnez weqz wltz wgez wlez wgtz dnez deqz dltz dgez dlez dgtz".split(" ")
+
+all_iconds = "ne eq lt ge le gt ltu geu leu gtu all nall any none".split(" ")
+
+all_fconds = "one ueq oeq une olt uge oge ult".split(" ")
+
+replaces_a = [(["cb.", "cmoved."], all_bconds),
+ (["compd.", "compw."], all_iconds),
+ (["fcompd.", "fcompw."], all_fconds),
+ (all_loads_stores, [".xs", ""])]
+replaces_dd = [(["addx", "sbfx"], ["2d", "4d", "8d", "16d"])]
+replaces_dw = [(["addx", "sbfx"], ["2w", "4w", "8w", "16w"])]
+
+macros_binds = {"%a": replaces_a, "%dd": replaces_dd, "%dw": replaces_dw}
+
+def expand_macro(fullinst, macro, replaceTable):
+ inst = fullinst.replace(macro, "")
+ for (searchlist, mods) in replaceTable:
+ if inst in searchlist:
+ return [fullinst.replace(macro, mod) for mod in mods]
+ raise NameError
+
+insts = []
for line in fileinput.input():
- line_noc = line.replace('\n', '')
- if line_noc not in occurs:
- occurs[line_noc] = 0
- occurs[line_noc] += 1
-
-# HACK: Removing all the instructions with "%a", replacing them with all their variations
-# Also removing all instructions starting with '.'
-pruned_occurs = dict(occurs)
-for inst in occurs:
- if inst[0] == '.':
- del pruned_occurs[inst]
- if "%a" not in inst:
- continue
- inst_no_a = inst.replace(".%a", "")
- if inst_no_a in ("compw", "compd"):
- del pruned_occurs[inst]
- for mod in ("ne", "eq", "lt", "gt", "le", "ge", "ltu", "leu", "geu",
- "gtu", "all", "any", "nall", "none"):
- pruned_occurs[inst_no_a + "." + mod] = 1
- elif inst_no_a in ("cb"):
- del pruned_occurs[inst]
- for mod in ("wnez", "weqz", "wltz", "wgez", "wlez", "wgtz", "deqz", "dnez",
- "dltz", "dgez", "dlez", "dgtz"):
- pruned_occurs[inst_no_a + "." + mod] = 1
- else:
- assert False, "Found instruction with %a: " + inst
-occurs = pruned_occurs
-
-for inst in sorted(occurs):
+ fullinst = line[:-1]
+ try:
+ for macro in macros_binds:
+ if macro in fullinst:
+ insts.extend(expand_macro(fullinst, macro, macros_binds[macro]))
+ break
+ else:
+ insts.append(fullinst)
+ except NameError:
+ print >> sys.stderr, fullinst + " could not be found any match for macro " + macro
+ sys.exit(1)
+
+for inst in insts:
print inst
+occurs = {}
diff --git a/test/mppa/instr/Makefile b/test/mppa/instr/Makefile
index 4129b628..69446796 100644
--- a/test/mppa/instr/Makefile
+++ b/test/mppa/instr/Makefile
@@ -5,6 +5,7 @@ CC ?= gcc
CCOMP ?= ccomp
OPTIM ?= -O2
CFLAGS ?= $(OPTIM)
+CCOMPFLAGS ?= $(CFLAGS) -faddx
SIMU ?= k1-mppa
TIMEOUT ?= --signal=SIGTERM 120s
DIFF ?= python2.7 floatcmp.py -reltol .00001
@@ -45,6 +46,7 @@ all: $(BIN)
GREEN=\033[0;32m
RED=\033[0;31m
+YELLOW=\033[0;33m
NC=\033[0m
.PHONY:
@@ -53,7 +55,9 @@ test: $(X86_GCC_OUT) $(GCC_OUT)
for test in $(TESTNAMES); do\
x86out=$(OUTDIR)/$$test.x86-gcc.out;\
gccout=$(OUTDIR)/$$test.gcc.out;\
- if $(DIFF) $$x86out $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
+ if grep "__K1C__" -q $$test.c; then\
+ printf "$(YELLOW)UNTESTED: $$test.c contains an \`#ifdef __K1C__\`\n";\
+ elif $(DIFF) $$x86out $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
>&2 printf "$(RED)ERROR: $$x86out and $$gccout differ$(NC)\n";\
else\
printf "$(GREEN)GOOD: $$x86out and $$gccout concur$(NC)\n";\
@@ -111,7 +115,7 @@ $(BINDIR)/%.gcc.bin: $(ASMDIR)/%.gcc.s $(K1LIB) $(K1CCPATH)
$(BINDIR)/%.ccomp.bin: $(ASMDIR)/%.ccomp.s $(K1LIB) $(CCOMPPATH)
@mkdir -p $(@D)
- $(CCOMP) $(CFLAGS) $(filter-out $(CCOMPPATH),$^) -o $@
+ $(CCOMP) $(CCOMPFLAGS) $(filter-out $(CCOMPPATH),$^) -o $@
# Source to assembly
@@ -125,4 +129,4 @@ $(ASMDIR)/%.gcc.s: $(SRCDIR)/%.c $(K1CCPATH)
$(ASMDIR)/%.ccomp.s: $(SRCDIR)/%.c $(CCOMPPATH)
@mkdir -p $(@D)
- $(CCOMP) $(CFLAGS) -S $< -o $@
+ $(CCOMP) $(CCOMPFLAGS) -S $< -o $@
diff --git a/test/mppa/instr/builtin64.c b/test/mppa/instr/builtin64.c
new file mode 100644
index 00000000..dbbb1886
--- /dev/null
+++ b/test/mppa/instr/builtin64.c
@@ -0,0 +1,17 @@
+#include "framework.h"
+
+BEGIN_TEST(long long)
+ long long *ptr = &c;
+#ifdef __K1C__
+ long long d = c;
+ a = __builtin_k1_alclrd(ptr);
+ c = d;
+ c += a;
+
+ c += __builtin_clzll(a);
+
+ /* Removed the AFADDD builtin who was incorrect in CompCert, see #157 */
+ // a = __builtin_k1_afaddd(ptr, a);
+ // a = __builtin_k1_afaddd(ptr, a);
+#endif
+END_TEST64()
diff --git a/test/mppa/instr/i32.c b/test/mppa/instr/i32.c
index c48531b1..4e389620 100644
--- a/test/mppa/instr/i32.c
+++ b/test/mppa/instr/i32.c
@@ -12,6 +12,14 @@ int tailsum(int a, int b){
return make(a+b);
}
+int fact(int a){
+ int r = 1;
+ int i;
+ for (i = 1; i < a; i++)
+ r *= i;
+ return r;
+}
+
float int2float(int v){
return v;
}
@@ -21,43 +29,43 @@ BEGIN_TEST(int)
c += a&b;
if ((a & 0x1) == 1)
- c += 1;
+ c += fact(1);
else
- c += 2;
+ c += fact(2);
if (a & 0x1 == 0)
- c += 4;
+ c += fact(4);
else
- c += 8;
+ c += fact(8);
b = !(a & 0x01);
if (!b)
- c += 16;
+ c += fact(16);
else
- c += 32;
+ c += fact(32);
c += sum(make(a), make(b));
c += (long long) a;
if (0 > (a & 0x1) - 1)
- c += 64;
+ c += fact(64);
else
- c += 128;
+ c += fact(128);
if (0 >= (a & 0x1))
- c += 256;
+ c += fact(256);
else
- c += 512;
+ c += fact(512);
if ((a & 0x1) > 0)
- c += 1024;
+ c += fact(1024);
else
- c += 2048;
+ c += fact(2048);
if ((a & 0x1) - 1 >= 0)
- c += 4096;
+ c += fact(4096);
else
- c += 8192;
+ c += fact(8192);
c += ((a & 0x1) == (b & 0x1));
c += (a > b);
@@ -65,6 +73,12 @@ BEGIN_TEST(int)
c += (a < b);
c += (a + b) / 2;
c += (int) int2float(a) + (int) int2float(b) + (int) int2float(42.3);
+ c += (a << 4); // addx16w
+ c += (a << 3); // addx8w
+ c += (a << 2); // addx4w
+ c += (a << 1); // addx2w
+
+ c += ~a & b; // andnw
int j;
for (j = 0 ; j < 10 ; j++)
diff --git a/test/mppa/instr/i64.c b/test/mppa/instr/i64.c
index 00eb159d..dc5fa6ee 100644
--- a/test/mppa/instr/i64.c
+++ b/test/mppa/instr/i64.c
@@ -30,6 +30,14 @@ long long random_op(long long a, long long b){
return op(a, b);
}
+long fact(long a){
+ long r = 1;
+ long i;
+ for (i = 1; i < a; i++)
+ r *= i;
+ return r;
+}
+
double long2double(long v){
return v;
}
@@ -43,6 +51,12 @@ BEGIN_TEST(long long)
c += a >> (b & 0x8LL);
c += a >> (b & 0x8ULL);
c += a % b;
+ c += (a << 4); // addx16d
+ c += (a << 3); // addx8d
+ c += (a << 2); // addx4d
+ c += (a << 1); // addx2d
+
+ c += ~a & b; // andnd
long long d = 3;
long long (*op)(long long, long long);
@@ -61,34 +75,39 @@ BEGIN_TEST(long long)
c += (unsigned int) a;
if (0 != (a & 0x1LL))
- c += 1;
+ c += fact(1);
else
- c += 2;
+ c += fact(2);
if (0 > (a & 0x1LL))
- c += 4;
+ c += fact(4);
else
- c += 8;
+ c += fact(8);
if (0 >= (a & 0x1LL) - 1)
- c += 16;
+ c += fact(16);
+ else
+ c += fact(32);
+
+ if (a-41414141 > 0)
+ c += fact(13);
else
- c += 32;
+ c += fact(31);
if (a & 0x1LL > 0)
- c += 64;
+ c += fact(64);
else
- c += 128;
+ c += fact(128);
if ((a & 0x1LL) - 1 >= 0)
- c += 256;
+ c += fact(256);
else
- c += 512;
+ c += fact(512);
if (0 == (a & 0x1LL))
- c += 1024;
+ c += fact(1024);
else
- c += 2048;
+ c += fact(2048);
c += ((a & 0x1LL) == (b & 0x1LL));
c += (a >= b);