Merge branch 'mppa-work' of gricad-gitlab.univ-grenoble-alpes.fr:sixcy/CompCert into mppa-work

author: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2019-08-31 09:26:36 +0200
committer: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2019-08-31 09:26:36 +0200
commit: dc8b24fa2dd7ede561dd75458899cf42e9be09d2 (patch)
tree: 3389a4f3f07af151b5a30c0e23ed46c4cafdd98b
parent: 7c790ecd1c32b529a5e5e5977ce84cfade8e1eb6 (diff)
parent: ccd2fa5638e50b5fd8308b4b0c26531f911ff087 (diff)
download: compcert-kvx-dc8b24fa2dd7ede561dd75458899cf42e9be09d2.tar.gz
compcert-kvx-dc8b24fa2dd7ede561dd75458899cf42e9be09d2.zip
29 files changed, 435 insertions, 689 deletions
diff --git a/.gitmodules b/.gitmodules
index 955c7fc2..e69de29b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "test/mppa/asm_coverage"]
-	path = test/mppa/asm_coverage
-	url = git@gricad-gitlab.univ-grenoble-alpes.fr:sixcy/asm-scanner.git
diff --git a/mppa_k1c/Archi.v b/mppa_k1c/Archi.v
index 113f5d51..96571841 100644
--- a/mppa_k1c/Archi.v
+++ b/mppa_k1c/Archi.v
@@ -14,10 +14,9 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Architecture-dependent parameters for RISC-V *)
+(** Architecture-dependent parameters for MPPA K1c. Mostly copied from the Risc-V backend *)
 
 Require Import ZArith.
-(*From Flocq*)
 Require Import Binary Bits.
 
 Definition ptr64 := true.
diff --git a/mppa_k1c/Asm.v b/mppa_k1c/Asm.v
index 620aa91e..a0c5e71c 100644
--- a/mppa_k1c/Asm.v
+++ b/mppa_k1c/Asm.v
@@ -15,7 +15,13 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Abstract syntax and semantics for K1c assembly language. *)
+(** * Abstract syntax for K1c textual assembly language.
+
+    Each emittable instruction is defined here. ';;' is also defined as an instruction.
+    The goal of this representation is to stay compatible with the rest of the generic backend of CompCert
+    We define [unfold : list bblock -> list instruction]
+    An Asm function is then defined as : [fn_sig], [fn_blocks], [fn_code], and a proof of [unfold fn_blocks = fn_code]
+    [fn_code] has no semantic. Instead, the semantic of Asm is given by using the AsmVLIW semantic on [fn_blocks] *)
 
 Require Import Coqlib.
 Require Import Maps.
@@ -57,10 +63,6 @@ Inductive instruction : Type :=
   | Psemi                                           (**r semi colon separating bundles *)
   | Pnop                                            (**r instruction that does nothing *)
 
-  (** builtins *)
-  | Pclzll (rd rs: ireg)
-  | Pstsud (rd rs1 rs2: ireg)
-
   (** Control flow instructions *)
   | Pget    (rd: ireg) (rs: preg)                   (**r get system register *)
   | Pset    (rd: preg) (rs: ireg)                   (**r set system register *)
@@ -97,10 +99,12 @@ Inductive instruction : Type :=
   | Piinvals (addr: ireg)
   | Pitouchl (addr: ireg)
   | Pdzerol (addr: ireg)
-  | Pafaddd (addr: ireg) (incr_res: ireg)
-  | Pafaddw (addr: ireg) (incr_res: ireg)
+(*| Pafaddd (addr: ireg) (incr_res: ireg)
+  | Pafaddw (addr: ireg) (incr_res: ireg) *) (* see #157 *)
   | Palclrd (dst: ireg) (addr: ireg)
   | Palclrw (dst: ireg) (addr: ireg)
+  | Pclzll (rd rs: ireg)
+  | Pstsud (rd rs1 rs2: ireg)
             
   (** Loads **)
   | Plb     (rd: ireg) (ra: ireg) (ofs: addressing)     (**r load byte *)
@@ -571,12 +575,6 @@ Definition genv := Genv.t fundef unit.
 
 Definition function_proj (f: function) := Asmvliw.mkfunction (fn_sig f) (fn_blocks f).
 
-(* 
-Definition fundef_proj (fu: fundef) : Asmblock.fundef := transf_fundef function_proj fu.
-
-Definition program_proj (p: program) : Asmblock.program := transform_program fundef_proj p.
- *)
-
 Definition fundef_proj (fu: fundef) : Asmvliw.fundef := 
   match fu with
   | Internal f => Internal (function_proj f)
@@ -650,35 +648,6 @@ Proof.
   rewrite transf_function_proj. auto.
 Qed.
 
-(* Definition transf_globdef (gd: globdef Asmblock.fundef unit) : globdef fundef unit :=
-  match gd with
-  | Gfun f => Gfun (transf_fundef f)
-  | Gvar gu => Gvar gu
-  end.
-
-Lemma transf_globdef_proj: forall gd, globdef_proj (transf_globdef gd) = gd.
-Proof.
-  intros gd. destruct gd as [f|v]; simpl; auto.
-  rewrite transf_fundef_proj; auto.
-Qed.
-
-Fixpoint transf_prog_defs (l: list (ident * globdef Asmblock.fundef unit))
-                            : list (ident * globdef fundef unit) :=
-  match l with
-  | nil => nil
-  | (i, gd) :: l => (i, transf_globdef gd) :: transf_prog_defs l
-  end.
-
-Lemma transf_prog_proj: forall p, prog_defs p = prog_defs_proj (transf_prog_defs (prog_defs p)).
-Proof.
-  intros p. destruct p as [defs pub main]. simpl.
-  induction defs; simpl; auto.
-  destruct a as [i gd]. simpl.
-  rewrite transf_globdef_proj.
-  congruence.
-Qed.
- *)
-
 Definition transf_program : Asmvliw.program -> program := transform_program transf_fundef.
 
 Lemma program_equals {A B: Type} : forall (p1 p2: AST.program A B),
@@ -716,7 +685,6 @@ Proof.
   intros. congruence.
 Qed.
 
-(* I think it is a special case of Asmblock -> Asm. Very handy to have *)
 Lemma match_program_transf:
   forall p tp, match_prog p tp -> transf_program p = tp.
 Proof.
diff --git a/mppa_k1c/Asmaux.v b/mppa_k1c/Asmaux.v
index 85359658..94b39f4e 100644
--- a/mppa_k1c/Asmaux.v
+++ b/mppa_k1c/Asmaux.v
@@ -1,5 +1,5 @@
 Require Import Asm.
 Require Import AST.
 
-(* Constant only needed by Asmexpandaux.ml *)
+(** Constant only needed by Asmexpandaux.ml *)
 Program Definition dummy_function := {| fn_code := nil; fn_sig := signature_main; fn_blocks := nil |}.
diff --git a/mppa_k1c/Asmblock.v b/mppa_k1c/Asmblock.v
index ddb7ce7d..9b4489c5 100644
--- a/mppa_k1c/Asmblock.v
+++ b/mppa_k1c/Asmblock.v
@@ -15,7 +15,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Abstract syntax and semantics for K1c assembly language. *)
+(** Sequential block semantics for K1c assembly. The syntax is given in AsmVLIW *)
 
 Require Import Coqlib.
 Require Import Maps.
@@ -172,7 +172,6 @@ Proof.
 Qed.
 
 Ltac bblock_auto_correct := (apply non_empty_bblock_refl; try discriminate; try (left; discriminate); try (right; discriminate)).
-(* Local Obligation Tactic := bblock_auto_correct. *)
 
 Lemma Istrue_proof_irrelevant (b: bool): forall (p1 p2:Is_true b), p1=p2.
 Proof.
@@ -250,9 +249,6 @@ Proof.
   intros. destruct bb as [hd bdy ex COR]. simpl. unfold no_header; unfold stick_header; simpl. reflexivity.
 Qed.
 
-
-
-
 (** * Sequential Semantics of basic blocks *)
 Section RELSEM.
 
@@ -286,7 +282,7 @@ Definition exec_store_regxs (chunk: memory_chunk) (rs: regset) (m: mem) (s a ro:
 
 (** * basic instructions *)
 
-Definition exec_basic_instr (bi: basic) (rs: regset) (m: mem) : outcome := parexec_basic_instr ge bi rs rs m m.
+Definition exec_basic_instr (bi: basic) (rs: regset) (m: mem) : outcome := bstep ge bi rs rs m m.
 
 Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome :=
   match body with
@@ -302,29 +298,8 @@ Fixpoint exec_body (body: list basic) (rs: regset) (m: mem): outcome :=
 
 Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) : outcome := par_goto_label f lbl rs rs m.
 
-(** Evaluating a branch 
-
-Warning: in m PC is assumed to be already pointing on the next instruction !
-
-*)
 Definition eval_branch (f: function) (l: label) (rs: regset) (m: mem) (res: option bool) : outcome := par_eval_branch f l rs rs m res.
 
-(** Execution of a single control-flow instruction [i] in initial state [rs] and
-    [m].  Return updated state.
-
-    As above: PC is assumed to be incremented on the next block before the control-flow instruction
-
-    For instructions that correspond tobuiltin
-    actual RISC-V instructions, the cases are straightforward
-    transliterations of the informal descriptions given in the RISC-V
-    user-mode specification.  For pseudo-instructions, refer to the
-    informal descriptions given above.
-
-    Note that we set to [Vundef] the registers used as temporaries by
-    the expansions of the pseudo-instructions, so that the RISC-V code
-    we generate cannot use those registers to hold values that must
-    survive the execution of the pseudo-instruction. *)
-
 Definition exec_control (f: function) (oc: option control) (rs: regset) (m: mem) : outcome := parexec_control ge f oc rs rs m.
 
 Definition exec_bblock (f: function) (b: bblock) (rs0: regset) (m: mem) : outcome :=
@@ -368,16 +343,11 @@ Inductive step: state -> trace -> state -> Prop :=
       step (State rs m) t (State rs' m')
   .
 
-
-
 End RELSEM.
 
-
-
 Definition semantics (p: program) :=
   Semantics step (initial_state p) final_state (Genv.globalenv p).
 
-
 Definition data_preg (r: preg) : bool :=
   match r with
   | RA  => false
@@ -386,4 +356,3 @@ Definition data_preg (r: preg) : bool :=
   | IR _   => true
   | PC     => false
   end.
-
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index 9855afa2..2d144bb6 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -1,3 +1,10 @@
+(** * Translation from Asmblock to AbstractBB 
+
+    We define a specific instance of AbstractBB, named L, translate bblocks from Asmblock into this instance
+    AbstractBB will then define two semantics for L : a sequential, and a semantic one
+    We prove a bisimulation between the parallel semantics of L and AsmVLIW
+    From this, we also deduce a bisimulation between the sequential semantics of L and Asmblock *)
+
 Require Import AST.
 Require Import Asmblock.
 Require Import Asmblockgenproof0.
@@ -17,6 +24,8 @@ Require Import Chunks.
 
 Open Scope impure.
 
+(** Definition of L *)
+
 Module P<: ImpParam.
 Module R := Pos.
 
@@ -459,18 +468,6 @@ Qed.
 Hint Resolve op_eq_correct: wlp.
 Global Opaque op_eq_correct.
 
-
-(* QUICK FIX WITH struct_eq *)
-
-(* Definition op_eq (o1 o2: op): ?? bool := struct_eq o1 o2. 
-
-Theorem op_eq_correct o1 o2: 
- WHEN op_eq o1 o2 ~> b THEN b=true -> o1 = o2.
-Proof.
-  wlp_simplify.
-Qed.
-*)
-
 End IMPPARAM.
 
 End P.
@@ -550,7 +547,7 @@ Proof.
   - unfold ppos. unfold pmem. discriminate.
 Qed.
 
-(** Inversion functions, used for debugging *)
+(** Inversion functions, used for debug traces *)
 
 Definition pos_to_ireg (p: R.t) : option gpreg :=
   match p with
@@ -574,9 +571,6 @@ Definition inv_ppos (p: R.t) : option preg :=
        end
   end.
 
-
-(** Traduction Asmblock -> Asmblockdeps *)
-
 Notation "a @ b" := (Econs a b) (at level 102, right associativity).
 
 Definition trans_control (ctl: control) : inst :=
@@ -720,7 +714,7 @@ Proof.
   intros. congruence.
 Qed.
 
-(** Parallelizability of a bblock (bundle) *)
+(** Parallelizability test of a bblock (bundle), and bisimulation of the Asmblock and L parallel semantics *)
 
 Module PChk := ParallelChecks L PosPseudoRegSet.
 
@@ -846,7 +840,7 @@ Theorem bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw bi:
   Ge = Genv ge fn ->
   match_states (State rsr mr) sr ->
   match_states (State rsw mw) sw ->
-  match_outcome (parexec_basic_instr ge bi rsr rsw mr mw) (inst_prun Ge (trans_basic bi) sw sr sr).
+  match_outcome (bstep ge bi rsr rsw mr mw) (inst_prun Ge (trans_basic bi) sw sr sr).
 Proof.
 
 (* a little tactic to automate reasoning on preg_eq *)
@@ -1004,7 +998,7 @@ Proof.
   induction bdy as [|i bdy]; simpl; eauto. 
   intros.
   exploit (bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw i); eauto.
-  destruct (parexec_basic_instr _ _ _ _ _ _); simpl.
+  destruct (bstep _ _ _ _ _ _); simpl.
   - intros (s' & X1 & X2). rewrite X1; simpl; eauto.
   - intros X; rewrite X; simpl; auto.
 Qed.
@@ -1015,7 +1009,7 @@ Theorem bisimu_par_control ex sz aux ge fn rsr rsw mr mw sr sw:
   match_states (State rsw mw) sw ->
   match_outcome (parexec_control ge fn ex (incrPC (Ptrofs.repr sz) rsr) (rsw#PC <- aux) mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr).
 Proof.
-  intros GENV MSR MSW; unfold parexec_exit.
+  intros GENV MSR MSW; unfold estep.
   simpl in *. inv MSR. inv MSW.
   destruct ex.
   - destruct c; destruct i; try discriminate; simpl.
@@ -1071,9 +1065,9 @@ Theorem bisimu_par_exit ex sz ge fn rsr rsw mr mw sr sw:
   Ge = Genv ge fn ->
   match_states (State rsr mr) sr ->
   match_states (State rsw mw) sw ->
-  match_outcome (parexec_exit ge fn ex (Ptrofs.repr sz) rsr rsw mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr).
+  match_outcome (estep ge fn ex (Ptrofs.repr sz) rsr rsw mw) (inst_prun Ge (trans_pcincr sz (trans_exit ex)) sw sr sr).
 Proof.
-  intros; unfold parexec_exit.
+  intros; unfold estep.
   exploit (bisimu_par_control ex sz rsw#PC ge fn rsr rsw mr mw sr sw); eauto.
   cutrewrite (rsw # PC <- (rsw PC) = rsw); auto.
   apply extensionality. intros; destruct x; simpl; auto.
@@ -1162,7 +1156,7 @@ Proof.
   destruct (prun_iw _ _ _ _); simpl; eauto.
 Qed.
 
-(* sequential execution *)
+(** sequential execution *)
 Theorem bisimu_basic ge fn bi rs m s:
   Ge = Genv ge fn ->
   match_states (State rs m) s ->
@@ -1264,7 +1258,6 @@ Qed.
 
 End SECT_PAR.
 
-
 Section SECT_BBLOCK_EQUIV.
 
 Variable Ge: genv.
@@ -1294,6 +1287,8 @@ Proof.
   * discriminate.
 Qed.
 
+(** Used for debug traces *)
+
 Definition gpreg_name (gpr: gpreg) :=
   match gpr with
   | GPR0 => Str ("GPR0") | GPR1 => Str ("GPR1") | GPR2 => Str ("GPR2") | GPR3 => Str ("GPR3") | GPR4 => Str ("GPR4")
@@ -1645,4 +1640,4 @@ Definition bblock_simub: Asmvliw.bblock -> Asmvliw.bblock -> bool := pure_bblock
 Lemma bblock_simub_correct p1 p2 ge fn: bblock_simub p1 p2 = true -> Asmblockgenproof0.bblock_simu ge fn p1 p2.
 Proof.
  eapply (pure_bblock_simu_test_correct true).
-Qed.
-\ No newline at end of file
+Qed.
diff --git a/mppa_k1c/Asmblockgen.v b/mppa_k1c/Asmblockgen.v
index e5b9b35a..7e415c2a 100644
--- a/mppa_k1c/Asmblockgen.v
+++ b/mppa_k1c/Asmblockgen.v
@@ -15,7 +15,8 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Translation from Machblock to K1c assembly language (Asmblock) *)
+(** * Translation from Machblock to K1c assembly language (Asmblock) 
+    Inspired from the Mach->Asm pass of other backends, but adapted to the block structure *)
 
 Require Archi.
 Require Import Coqlib Errors.
@@ -41,23 +42,15 @@ Definition ireg_of (r: mreg) : res ireg :=
 Definition freg_of (r: mreg) : res freg :=
   match preg_of r with IR mr => OK mr | _ => Error(msg "Asmgenblock.freg_of") end.
 
-(*
-(** Decomposition of 32-bit integer constants.  They are split into either
-  small signed immediates that fit in 12-bits, or, if they do not fit,
-  into a (20-bit hi, 12-bit lo) pair where lo is sign-extended. *)
-
-*)
 Inductive immed32 : Type :=
   | Imm32_single (imm: int).
 
 Definition make_immed32 (val: int) := Imm32_single val.
 
-(** Likewise, for 64-bit integer constants. *)
 Inductive immed64 : Type :=
   | Imm64_single (imm: int64)
 .
 
-(* For now, let's suppose all instructions of K1c can handle 64-bits immediate *)
 Definition make_immed64 (val: int64) := Imm64_single val.
 
 Notation "a ::g b" := (cons (A:=instruction) a b) (at level 49, right associativity).
@@ -66,12 +59,6 @@ Notation "a ::b lb" := ((bblock_single_inst a) :: lb) (at level 49, right associ
 Notation "a ++g b" := (app (A:=instruction) a b) (at level 49, right associativity).
 Notation "a @@ b" := (app a b) (at level 49, right associativity).
 
-(** Smart constructors for arithmetic operations involving
-  a 32-bit or 64-bit integer constant.  Depending on whether the
-  constant fits in 12 bits or not, one or several instructions
-  are generated as required to perform the operation
-  and prepended to the given instruction sequence [k]. *)
-
 Definition loadimm32 (r: ireg) (n: int) :=
   match make_immed32 n with
   | Imm32_single imm => Pmake r imm
@@ -92,10 +79,6 @@ Definition orimm32  := opimm32 Porw Poriw.
 Definition norimm32 := opimm32 Pnorw Pnoriw.
 Definition xorimm32 := opimm32 Pxorw Pxoriw.
 Definition nxorimm32 := opimm32 Pnxorw Pnxoriw.
-(*
-Definition sltimm32 := opimm32 Psltw Psltiw.
-Definition sltuimm32 := opimm32 Psltuw Psltiuw.
-*)
 
 Definition loadimm64 (r: ireg) (n: int64) :=
   match make_immed64 n with
@@ -118,11 +101,6 @@ Definition norimm64  := opimm64 Pnorl  Pnoril.
 Definition nandimm64 := opimm64 Pnandl Pnandil.
 Definition nxorimm64 := opimm64 Pnxorl  Pnxoril.
 
-(*
-Definition sltimm64 := opimm64 Psltl Psltil.
-Definition sltuimm64 := opimm64 Psltul Psltiul.
-*)
-
 Definition addptrofs (rd rs: ireg) (n: ptrofs) :=
   if Ptrofs.eq_dec n Ptrofs.zero then
     Pmv rd rs
@@ -170,19 +148,6 @@ Definition transl_opt_compuimm
     transl_compi c Unsigned r1 n lbl k
   .
 
-(* Definition transl_opt_compuimm
-    (n: int) (c: comparison) (r1: ireg) (lbl: label) (k: code) : list instruction :=
-  loadimm32 RTMP n ::g (transl_comp c Unsigned r1 RTMP lbl k). *)
-
-(*   match select_comp n c with
-  | Some Ceq => Pcbu BTweqz r1 lbl ::g k
-  | Some Cne => Pcbu BTwnez r1 lbl ::g k
-  | Some _   => nil (* Never happens *)
-  | None     => loadimm32 RTMP n ::g (transl_comp c Unsigned r1 RTMP lbl k)
-  end
-  .
- *)
-
 Definition select_compl (n: int64) (c: comparison) : option comparison :=
   if Int64.eq n Int64.zero then
     match c with
@@ -1052,7 +1017,7 @@ Definition make_epilogue (f: Machblock.function) (k: code) :=
   (loadind_ptr SP f.(fn_retaddr_ofs) GPRA) 
   ::g Pset RA GPRA ::g Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) ::g k.
 
-(** Translation of a Mach instruction. *)
+(** Translation of a Machblock instruction. *)
 
 Definition transl_instr_basic (f: Machblock.function) (i: Machblock.basic_inst)
                               (ep: bool) (k: bcode) :=
@@ -1096,20 +1061,12 @@ Definition transl_instr_control (f: Machblock.function) (oi: option Machblock.co
         transl_cbranch cond args lbl nil
     | MBreturn =>
         OK (make_epilogue f (Pret ::g nil))
-      (*OK (make_epilogue f (Pj_r RA f.(Mach.fn_sig) :: k))*) 
     | MBjumptable arg tbl =>
       do r <- ireg_of arg;
       OK (Pjumptable r tbl ::g nil)
     end
   end.
 
-(* TODO - dans l'idée, transl_instr_control renvoie une liste d'instructions sous la forme :
-  * transl_instr_control _ _ _ = lb ++ (ctl :: nil), où lb est une liste de basics, ctl est un control_inst
-
-  Il faut arriver à exprimer cet aspect là ; extraire le lb, le rajouter dans le body ; et extraire le ctl
-  qu'on met dans le exit
-*)
-
 (** Translation of a code sequence *)
 
 Definition fp_is_parent (before: bool) (i: Machblock.basic_inst) : bool :=
@@ -1120,8 +1077,7 @@ Definition fp_is_parent (before: bool) (i: Machblock.basic_inst) : bool :=
   | _ => false
   end.
 
-(** This is the naive definition that we no longer use because it
-  is not tail-recursive.  It is kept as specification. *)
+(** This is the naive definition, which is not tail-recursive unlike the other backends *)
 
 Fixpoint transl_basic_code (f: Machblock.function) (il: list Machblock.basic_inst) (it1p: bool) :=
   match il with
@@ -1147,20 +1103,11 @@ Definition transl_basic_code' (f: Machblock.function) (il: list Machblock.basic_
   transl_basic_rec f il it1p (fun c => OK c). *)
 
 (** Translation of a whole function.  Note that we must check
-  that the generated code contains less than [2^32] instructions,
+  that the generated code contains less than [2^64] instructions,
   otherwise the offset part of the [PC] code pointer could wrap
   around, leading to incorrect executions. *)
 
-(* Local Obligation Tactic := bblock_auto_correct. *)
-
-(* Program Definition gen_bblock_noctl (hd: list label) (c: list basic) :=
-  match c with
-  | nil => {| header := hd; body := Pnop::nil; exit := None |}
-  | i::c => {| header := hd; body := i::c; exit := None |}
-  end.
- *)
-
-(** Can generate two bblocks if the ctl is a PExpand (since the PExpand must be alone in its block) *)
+(* gen_bblocks can generate two bblocks if the ctl is a PExpand (since the PExpand must be alone in its block) *)
 Program Definition gen_bblocks (hd: list label) (c: list basic) (ctl: list instruction) :=
   match (extract_ctl ctl) with
   | None => 
@@ -1168,7 +1115,6 @@ Program Definition gen_bblocks (hd: list label) (c: list basic) (ctl: list instr
       | nil => {| header := hd; body := Pnop::nil; exit := None |} :: nil
       | i::c => {| header := hd; body := ((i::c) ++ extract_basic ctl); exit := None |} :: nil
       end
-(* gen_bblock_noctl hd (c ++ (extract_basic ctl)) :: nil *)
   | Some (PExpand (Pbuiltin ef args res)) =>
       match c with
       | nil => {| header := hd; body := nil; exit := Some (PExpand (Pbuiltin ef args res)) |} :: nil
diff --git a/mppa_k1c/Asmblockgenproof0.v b/mppa_k1c/Asmblockgenproof0.v
index 89d41017..decc3e2e 100644
--- a/mppa_k1c/Asmblockgenproof0.v
+++ b/mppa_k1c/Asmblockgenproof0.v
@@ -1,3 +1,9 @@
+(** * "block" version of Asmgenproof0
+
+    This module is largely adapted from Asmgenproof0.v of the other backends
+    It needs to stand apart because of the block structure, and the distinction control/basic that there isn't in the other backends
+    It has similar definitions than Asmgenproof0, but adapted to this new structure *)
+
 Require Import Coqlib.
 Require Intv.
 Require Import AST.
@@ -31,19 +37,13 @@ Lemma ireg_of_eq:
   forall r r', ireg_of r = OK r' -> preg_of r = IR r'.
 Proof.
   unfold ireg_of; intros. destruct (preg_of r); inv H; auto.
-(*   destruct b. all: try discriminate.
-  inv H1. auto.
- *)Qed.
+Qed.
 
-(* FIXME - Replaced FR by IR for MPPA *)
 Lemma freg_of_eq:
   forall r r', freg_of r = OK r' -> preg_of r = IR r'.
 Proof.
   unfold freg_of; intros. destruct (preg_of r); inv H; auto.
-(*   destruct b. all: try discriminate.
-  inv H1. auto.
- *)Qed.
-
+Qed.
 
 Lemma preg_of_injective:
   forall r1 r2, preg_of r1 = preg_of r2 -> r1 = r2.
@@ -277,24 +277,6 @@ Proof.
   exploit preg_of_injective; eauto. congruence.
 Qed.
 
-(* Lemma agree_undef_regs2:
-  forall ms sp rl rs rs',
-  agree (Mach.undef_regs rl ms) sp rs ->
-  (forall r', data_preg r' = true -> preg_notin r' rl -> rs'#r' = rs#r') ->
-  agree (Mach.undef_regs rl ms) sp rs'.
-Proof.
-  intros. destruct H. split; auto.
-  rewrite <- agree_sp0. apply H0; auto.
-  rewrite preg_notin_charact. intros. apply not_eq_sym. apply preg_of_not_SP.
-  intros. destruct (In_dec mreg_eq r rl).
-  rewrite Mach.undef_regs_same; auto.
-  rewrite H0; auto.
-  apply preg_of_data.
-  rewrite preg_notin_charact. intros; red; intros. elim n.
-  exploit preg_of_injective; eauto. congruence.
-Qed.
- *)
-
 Lemma agree_set_undef_mreg:
   forall ms sp rs r v rl rs',
   agree ms sp rs ->
@@ -607,15 +589,13 @@ Hypothesis transf_function_len:
   forall f tf, transf_function f = OK tf -> size_blocks (fn_blocks tf) <= Ptrofs.max_unsigned.
 
 
-(* NB: the hypothesis in comment on [b] is not needed in the proof ! *)
 Lemma return_address_exists:
-  forall b f (* sg ros *) c, (* b.(MB.exit) = Some (MBcall sg ros) -> *) is_tail (b :: c) f.(MB.fn_code) ->
+  forall b f c, is_tail (b :: c) f.(MB.fn_code) ->
   exists ra, return_address_offset f c ra.
 Proof.
   intros. destruct (transf_function f) as [tf|] eqn:TF.
   + exploit transf_function_inv; eauto. intros (tc1 & ep1 & TR1 & TL1).
     exploit transl_blocks_tail; eauto. intros (tc2 & ep2 & TR2 & TL2).
-(*     unfold return_address_offset. *)
     monadInv TR2.
     assert (TL3: is_tail x0 (fn_blocks tf)).
     { apply is_tail_trans with tc1; auto. 
@@ -632,7 +612,7 @@ Qed.
 End RETADDR_EXISTS.
 
 (** [transl_code_at_pc pc fb f c ep tf tc] holds if the code pointer [pc] points
-  within the Asm code generated by translating Mach function [f],
+  within the Asmblock code generated by translating Machblock function [f],
   and [tc] is the tail of the generated code at the position corresponding
   to the code pointer [pc]. *)
 
@@ -850,18 +830,6 @@ Proof.
   apply exec_straight_step with rs2 m2; auto.
 Qed.
 
-(* Theorem exec_straight_bblock:
-  forall rs1 m1 rs2 m2 rs3 m3 b,
-  exec_straight (body b) rs1 m1 nil rs2 m2 ->
-  exec_control_rel (exit b) b rs2 m2 rs3 m3 ->
-  exec_bblock_rel b rs1 m1 rs3 m3.
-Proof.
-  intros.
-  econstructor; eauto. unfold exec_bblock. erewrite exec_straight_body; eauto.
-  inv H0. auto.
-Qed. *)
-
-
 Lemma exec_straight_two:
   forall i1 i2 c rs1 m1 rs2 m2 rs3 m3,
   exec_basic_instr ge i1 rs1 m1 = Next rs2 m2 ->
@@ -973,18 +941,6 @@ Proof.
   - reflexivity.
 Qed.
 
-(* Lemma exec_straight_pc':
-  forall c rs1 m1 rs2 m2,
-  exec_straight c rs1 m1 nil rs2 m2 ->
-  rs2 PC = rs1 PC.
-Proof.
-  induction c; intros; try (inv H; fail).
-  inv H.
-  - erewrite exec_basic_instr_pc; eauto.
-  - rewrite (IHc rs3 m3 rs2 m2); auto.
-    erewrite exec_basic_instr_pc; eauto.
-Qed. *)
-
 Lemma exec_straight_pc:
   forall c c' rs1 m1 rs2 m2,
   exec_straight c rs1 m1 c' rs2 m2 ->
@@ -997,25 +953,6 @@ Proof.
     erewrite exec_basic_instr_pc; eauto.
 Qed.
 
-(* Lemma exec_straight_through:
-  forall c i b lb rs1 m1 rs2 m2 rs2' m2',
-  bblock_basic_ctl c i = b ->
-  exec_straight c rs1 m1 nil rs2 m2 ->
-  nextblock b rs2 = rs2' -> m2 = m2' ->
-  exec_control ge fn i rs2' m2' = Next rs2' m2' -> (* if the control does not jump *)
-  exec_straight_blocks (b::lb) rs1 m1 lb rs2' m2'.
-Proof.
-  intros. subst. destruct i.
-  - constructor 1. 
-      + unfold exec_bblock. simpl body. erewrite exec_straight_body; eauto.
-      + rewrite <- (exec_straight_pc c nil rs1 m1 rs2 m2'); auto.
-  - destruct c as [|i c]; try (inv H0; fail).
-    constructor 1.
-    + unfold exec_bblock. simpl body. erewrite exec_straight_body; eauto.
-    + rewrite <- (exec_straight_pc (i ::i c) nil rs1 m1 rs2 m2'); auto.
-Qed.
- *)
-
 Lemma regset_same_assign (rs: regset) r:
   rs # r <- (rs r) = rs.
 Proof.
@@ -1034,8 +971,6 @@ Proof.
   simpl; auto. unfold nextblock, incrPC; simpl. Simpl. erewrite exec_straight_pc; eauto.
 Qed.
 
-
-
 (** The following lemmas show that straight-line executions
   (predicate [exec_straight_blocks]) correspond to correct Asm executions. *)
 
@@ -1086,7 +1021,6 @@ Qed.
 
 End STRAIGHTLINE.
 
-
 (** * Properties of the Machblock call stack *)
 
 Section MATCH_STACK.
diff --git a/mppa_k1c/Asmblockgenproof1.v b/mppa_k1c/Asmblockgenproof1.v
index bc549b4a..e1e2b0b0 100644
--- a/mppa_k1c/Asmblockgenproof1.v
+++ b/mppa_k1c/Asmblockgenproof1.v
@@ -15,6 +15,8 @@
 (*                                                                     *)
 (* *********************************************************************)
 
+(** * Proof of correctness for individual instructions *)
+
 Require Import Coqlib Errors Maps.
 Require Import AST Integers Floats Values Memory Globalenvs.
 Require Import Op Locations Machblock Conventions.
@@ -86,31 +88,6 @@ Section CONSTRUCTORS.
 Variable ge: genv.
 Variable fn: function.
 
-(*
-(** 32-bit integer constants and arithmetic *)
-(*
-Lemma load_hilo32_correct:
-  forall rd hi lo k rs m,
-  exists rs',
-     exec_straight ge fn (load_hilo32 rd hi lo k) rs m k rs' m
-  /\ rs'#rd = Vint (Int.add (Int.shl hi (Int.repr 12)) lo)
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold load_hilo32; intros. 
-  predSpec Int.eq Int.eq_spec lo Int.zero.
-- subst lo. econstructor; split. 
-  apply exec_straight_one. simpl; eauto. auto.
-  split. rewrite Int.add_zero. Simpl.
-  intros; Simpl.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
-  split. Simpl. 
-  intros; Simpl.
-Qed.
-*)
-
-*)
-
 Lemma loadimm32_correct:
   forall rd n k rs m,
   exists rs',
@@ -141,60 +118,6 @@ Proof.
   intros; Simpl.
 Qed.
 
-(*
-(*
-Lemma opimm32_correct:
-  forall (op: ireg -> ireg0 -> ireg0 -> instruction)
-         (opi: ireg -> ireg0 -> int -> instruction)
-         (sem: val -> val -> val) m,
-  (forall d s1 s2 rs,
-   exec_instr ge fn (op d s1 s2) rs m = Next (nextinstr (rs#d <- (sem rs##s1 rs##s2))) m) ->
-  (forall d s n rs,
-   exec_instr ge fn (opi d s n) rs m = Next (nextinstr (rs#d <- (sem rs##s (Vint n)))) m) ->
-  forall rd r1 n k rs,
-  r1 <> RTMP ->
-  exists rs',
-     exec_straight ge fn (opimm32 op opi rd r1 n k) rs m k rs' m
-  /\ rs'#rd = sem rs##r1 (Vint n)
-  /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
-Proof.
-  intros. unfold opimm32. generalize (make_immed32_sound n); intros E.
-  destruct (make_immed32 n). 
-- subst imm. econstructor; split. 
-  apply exec_straight_one. rewrite H0. simpl; eauto. auto.
-  split. Simpl. intros; Simpl.
-- destruct (load_hilo32_correct RTMP hi lo (op rd r1 RTMP :: k) rs m)
-  as (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one. 
-  rewrite H; eauto. auto.
-  split. Simpl. simpl. rewrite B, C, E. auto. congruence. congruence.
-  intros; Simpl. 
-Qed.
-
-(** 64-bit integer constants and arithmetic *)
-
-Lemma load_hilo64_correct:
-  forall rd hi lo k rs m,
-  exists rs',
-     exec_straight ge fn (load_hilo64 rd hi lo k) rs m k rs' m
-  /\ rs'#rd = Vlong (Int64.add (Int64.sign_ext 32 (Int64.shl hi (Int64.repr 12))) lo)
-  /\ forall r, r <> PC -> r <> rd -> rs'#r = rs#r.
-Proof.
-  unfold load_hilo64; intros. 
-  predSpec Int64.eq Int64.eq_spec lo Int64.zero.
-- subst lo. econstructor; split. 
-  apply exec_straight_one. simpl; eauto. auto.
-  split. rewrite Int64.add_zero. Simpl.
-  intros; Simpl.
-- econstructor; split.
-  eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. 
-  split. Simpl. 
-  intros; Simpl.
-Qed.
-*)
-*)
-
 Lemma opimm64_correct:
   forall (op: arith_name_rrr)
          (opi: arith_name_rri64)
@@ -215,18 +138,6 @@ Proof.
 - subst imm. econstructor; split. 
   apply exec_straight_one. rewrite H0. simpl; eauto. auto.
   split. Simpl. intros; Simpl.
-(*
-- destruct (load_hilo64_correct RTMP hi lo (op rd r1 RTMP :: k) rs m)
-  as (rs' & A & B & C).
-  econstructor; split.
-  eapply exec_straight_trans. eexact A. apply exec_straight_one. 
-  rewrite H; eauto. auto.
-  split. Simpl. simpl. rewrite B, C, E. auto. congruence. congruence.
-  intros; Simpl. 
-- subst imm. econstructor; split. 
-  eapply exec_straight_two. simpl; eauto. rewrite H. simpl; eauto. auto. auto.
-  split. Simpl. intros; Simpl.
-*)
 Qed.
 
 (** Add offset to pointer *)
@@ -252,35 +163,6 @@ Proof.
   rewrite Ptrofs.of_int64_to_int64 by auto. auto.
 Qed.
 
-(*
-(*
-Lemma addptrofs_correct_2:
-  forall rd r1 n k (rs: regset) m b ofs,
-  r1 <> RTMP -> rs#r1 = Vptr b of
-s ->
-  exists rs',
-     exec_straight ge fn (addptrofs rd r1 n k) rs m k rs' m
-  /\ rs'#rd = Vptr b (Ptrofs.add ofs n)
-  /\ forall r, r <> PC -> r <> rd -> r <> RTMP -> rs'#r = rs#r.
-Proof.
-  intros. exploit (addptrofs_correct rd r1 n); eauto. intros (rs' & A & B & C).
-  exists rs'; intuition eauto. 
-  rewrite H0 in B. inv B. auto.
-Qed.
-
-(** Translation of conditional branches *)
-
-Remark branch_on_RTMP:
-  forall normal lbl (rs: regset) m b,
-  rs#RTMP = Val.of_bool (eqb normal b) ->
-  exec_instr ge fn (if normal then Pbnew RTMP X0 lbl else Pbeqw RTMP X0 lbl) rs m =
-  eval_branch fn lbl rs m (Some b).
-Proof.
-  intros. destruct normal; simpl; rewrite H; simpl; destruct b; reflexivity. 
-Qed.
-*)
-*)
-
 Ltac ArgsInv :=
   repeat (match goal with
   | [ H: Error _ = OK _ |- _ ] => discriminate
@@ -1522,99 +1404,6 @@ Proof.
   exploit transl_cond_nofloat32_correct; eauto. intros (rs' & A & B & C). exists rs'; eauto.
 Qed.
 
-(*
-(*
-+ (* cmpf *)
-  destruct (transl_cond_float c0 rd x x0) as [insn normal] eqn:TR.
-  fold (Val.cmpf c0 (rs x) (rs x0)).
-  set (v := Val.cmpf c0 (rs x) (rs x0)).
-  destruct normal; inv EQ2.
-* econstructor; split.
-  apply exec_straight_one. eapply transl_cond_float_correct with (v := v); eauto. auto.
-  split; intros; Simpl.
-* econstructor; split.
-  eapply exec_straight_two.
-  eapply transl_cond_float_correct with (v := Val.notbool v); eauto.
-  simpl; reflexivity.
-  auto. auto.
-  split; intros; Simpl. unfold v, Val.cmpf. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-+ (* notcmpf *)
-  destruct (transl_cond_float c0 rd x x0) as [insn normal] eqn:TR.
-  rewrite Val.notbool_negb_3. fold (Val.cmpf c0 (rs x) (rs x0)).
-  set (v := Val.cmpf c0 (rs x) (rs x0)).
-  destruct normal; inv EQ2.
-* econstructor; split.
-  eapply exec_straight_two.
-  eapply transl_cond_float_correct with (v := v); eauto.
-  simpl; reflexivity.
-  auto. auto.
-  split; intros; Simpl. unfold v, Val.cmpf. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-* econstructor; split.
-  apply exec_straight_one. eapply transl_cond_float_correct with (v := Val.notbool v); eauto. auto.
-  split; intros; Simpl.
-+ (* cmpfs *)
-  destruct (transl_cond_single c0 rd x x0) as [insn normal] eqn:TR.
-  fold (Val.cmpfs c0 (rs x) (rs x0)).
-  set (v := Val.cmpfs c0 (rs x) (rs x0)).
-  destruct normal; inv EQ2.
-* econstructor; split.
-  apply exec_straight_one. eapply transl_cond_single_correct with (v := v); eauto. auto.
-  split; intros; Simpl.
-* econstructor; split.
-  eapply exec_straight_two.
-  eapply transl_cond_single_correct with (v := Val.notbool v); eauto.
-  simpl; reflexivity.
-  auto. auto.
-  split; intros; Simpl. unfold v, Val.cmpfs. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-+ (* notcmpfs *)
-  destruct (transl_cond_single c0 rd x x0) as [insn normal] eqn:TR.
-  rewrite Val.notbool_negb_3. fold (Val.cmpfs c0 (rs x) (rs x0)).
-  set (v := Val.cmpfs c0 (rs x) (rs x0)).
-  destruct normal; inv EQ2.
-* econstructor; split.
-  eapply exec_straight_two.
-  eapply transl_cond_single_correct with (v := v); eauto.
-  simpl; reflexivity.
-  auto. auto.
-  split; intros; Simpl. unfold v, Val.cmpfs. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) as [[]|]; auto.
-* econstructor; split.
-  apply exec_straight_one. eapply transl_cond_single_correct with (v := Val.notbool v); eauto. auto.
-  split; intros; Simpl.
-*)
-*)
-
-(** Some arithmetic properties. *)
-
-(* Remark cast32unsigned_from_cast32signed:
-  forall i, Int64.repr (Int.unsigned i) = Int64.zero_ext 32 (Int64.repr (Int.signed i)).
-Proof.
-  intros. apply Int64.same_bits_eq; intros. 
-  rewrite Int64.bits_zero_ext, !Int64.testbit_repr by tauto.
-  rewrite Int.bits_signed by tauto. fold (Int.testbit i i0).
-  change Int.zwordsize with 32.
-  destruct (zlt i0 32). auto. apply Int.bits_above. auto.
-Qed.
-
-Lemma cast32signed_correct:
-  forall (d s: ireg) (k: code) (rs: regset) (m: mem),
-  exists rs': regset,
-    exec_straight ge (cast32signed d s ::g k) rs m k rs' m
- /\ Val.lessdef (Val.longofint (rs s)) (rs' d)
- /\ (forall r: preg, r <> PC -> r <> d -> rs' r = rs r).
-Proof.
-  intros. unfold cast32signed. destruct (ireg_eq d s).
-- econstructor; split.
-  + apply exec_straight_one. simpl. eauto with asmgen.
-  + split.
-    * rewrite e. Simpl.
-    * intros. destruct r; Simpl.
-- econstructor; split.
-  + apply exec_straight_one. simpl. eauto with asmgen.
-  + split.
-    * Simpl.
-    * intros. destruct r; Simpl.
-Qed. *)
-
 (* Translation of arithmetic operations *)
 
 Ltac SimplEval H :=
@@ -1868,33 +1657,6 @@ Proof.
 + econstructor; econstructor; econstructor; econstructor; split.
   apply exec_straight_opt_refl. 
   split; auto. simpl. subst imm. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
-(*
-+ econstructor; econstructor; econstructor; split. 
-  constructor. eapply exec_straight_two. 
-  simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. destruct (rs base); auto; simpl. rewrite SF. simpl.
-  rewrite Ptrofs.add_assoc. f_equal. f_equal. 
-  rewrite <- (Ptrofs.of_int64_to_int64 SF ofs). rewrite EQ. 
-  symmetry; auto with ptrofs.
-+ econstructor; econstructor; econstructor; split. 
-  constructor. eapply exec_straight_two. 
-  simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. unfold eval_offset. destruct (rs base); auto; simpl. rewrite SF. simpl.
-  rewrite Ptrofs.add_zero. subst imm. rewrite Ptrofs.of_int64_to_int64 by auto. auto.
-(* 32 bits part, irrelevant for us
-- generalize (make_immed32_sound (Ptrofs.to_int ofs)); intros EQ.
-  destruct (make_immed32 (Ptrofs.to_int ofs)).
-+ econstructor; econstructor; econstructor; split.
-  apply exec_straight_opt_refl. 
-  split; auto. simpl. subst imm. rewrite Ptrofs.of_int_to_int by auto. auto.
-+ econstructor; econstructor; econstructor; split. 
-  constructor. eapply exec_straight_two. 
-  simpl; eauto. simpl; eauto. auto. auto. 
-  split; intros; Simpl. destruct (rs base); auto; simpl. rewrite SF. simpl.
-  rewrite Ptrofs.add_assoc. f_equal. f_equal. 
-  rewrite <- (Ptrofs.of_int_to_int SF ofs). rewrite EQ. 
-  symmetry; auto with ptrofs.
-*)*)
 Qed.
 
 
@@ -2555,8 +2317,8 @@ Proof.
           { eapply A2. }
           { apply exec_straight_one. simpl.
             rewrite (C2 SP) by auto with asmgen. rewrite <- (sp_val _ _ rs1 AG1). simpl; rewrite LP'.
-            rewrite FREE'. eauto. (* auto. *) } }
-      * split. (* apply agree_nextinstr.  *)apply agree_set_other; auto with asmgen. 
+            rewrite FREE'. eauto. } }
+      * split. apply agree_set_other; auto with asmgen. 
     apply agree_change_sp with (Vptr stk soff).
     apply agree_exten with rs; auto. intros; rewrite C2; auto with asmgen.
     eapply parent_sp_def; eauto.
diff --git a/mppa_k1c/Asmexpand.ml b/mppa_k1c/Asmexpand.ml
index 65dee6c7..20d27951 100644
--- a/mppa_k1c/Asmexpand.ml
+++ b/mppa_k1c/Asmexpand.ml
@@ -465,14 +465,14 @@ let expand_builtin_inline name args res = let open Asmvliw in
      emit (Pitouchl addr)
   | "__builtin_k1_dzerol", [BA(IR addr)], _ ->
      emit (Pdzerol addr)
-  | "__builtin_k1_afaddd", [BA(IR addr); BA (IR incr_res)], BR(IR res) ->
+(*| "__builtin_k1_afaddd", [BA(IR addr); BA (IR incr_res)], BR(IR res) ->
      (if res <> incr_res
       then (emit (Pmv(res, incr_res)); emit Psemi));
      emit (Pafaddd(addr, res))
   | "__builtin_k1_afaddw", [BA(IR addr); BA (IR incr_res)], BR(IR res) ->
      (if res <> incr_res
       then (emit (Pmv(res, incr_res)); emit Psemi));
-     emit (Pafaddw(addr, res))
+     emit (Pafaddw(addr, res)) *) (* see #157 *)
   | "__builtin_alclrd", [BA(IR addr)], BR(IR res) ->
      emit (Palclrd(res, addr))
   | "__builtin_alclrw", [BA(IR addr)], BR(IR res) ->
diff --git a/mppa_k1c/Asmgenproof.v b/mppa_k1c/Asmgenproof.v
index e7e21a18..e0878c7d 100644
--- a/mppa_k1c/Asmgenproof.v
+++ b/mppa_k1c/Asmgenproof.v
@@ -10,7 +10,7 @@
 (*                                                                     *)
 (* *********************************************************************)
 
-(** Correctness proof for RISC-V generation: main proof. *)
+(** Correctness proof for Asmgen *)
 
 Require Import Coqlib Errors.
 Require Import Integers Floats AST Linking.
@@ -89,4 +89,4 @@ Module Asmgenproof0.
 
 Definition return_address_offset := return_address_offset.
 
-End Asmgenproof0.
-\ No newline at end of file
+End Asmgenproof0.
diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v
index c5b7db45..72584d2a 100644
--- a/mppa_k1c/Asmvliw.v
+++ b/mppa_k1c/Asmvliw.v
@@ -17,8 +17,6 @@
 
 (** Abstract syntax and semantics for VLIW semantics of K1c assembly language. *)
 
-(* FIXME: develop/fix the comments in this file *)
-
 Require Import Coqlib.
 Require Import Maps.
 Require Import AST.
@@ -45,8 +43,7 @@ Require Import Chunks.
     this view induces our sequential semantics of bundles defined in [Asmblock].
 *)
 
-(** General Purpose registers.
-*)
+(** General Purpose registers. *)
 
 Inductive gpreg: Type :=
   | GPR0:  gpreg | GPR1:  gpreg | GPR2:  gpreg | GPR3:  gpreg | GPR4:  gpreg
@@ -152,9 +149,6 @@ Definition gpreg_o_expand (x : gpreg_o) : gpreg * gpreg * gpreg * gpreg :=
 Lemma gpreg_o_eq : forall (x y : gpreg_o), {x=y} + {x<>y}.
 Proof. decide equality. Defined.
 
-(** We model the following registers of the RISC-V architecture. *)
-
-(** basic register *)
 Inductive preg: Type :=
   | IR: gpreg -> preg                   (**r integer general purpose registers *)
   | RA: preg
@@ -173,7 +167,7 @@ End PregEq.
 
 Module Pregmap := EMap(PregEq).
 
-(** Conventional names for stack pointer ([SP]) and return address ([RA]). *)
+(** Conventional names for stack pointer ([SP]), return address ([RA]), frame pointer ([FP]) and other temporaries used *)
 
 Notation "'SP'" := GPR12 (only parsing) : asm.
 Notation "'FP'" := GPR17 (only parsing) : asm.
@@ -188,9 +182,7 @@ Inductive btest: Type :=
   | BTdgez                              (**r Double Greater Than or Equal to Zero *)
   | BTdlez                              (**r Double Less Than or Equal to Zero *)
   | BTdgtz                              (**r Double Greater Than Zero *)
-(*| BTodd                               (**r Odd (LSB Set) *)
-  | BTeven                              (**r Even (LSB Clear) *)
-*)| BTwnez                              (**r Word Not Equal to Zero *)
+  | BTwnez                              (**r Word Not Equal to Zero *)
   | BTweqz                              (**r Word Equal to Zero *)
   | BTwltz                              (**r Word Less Than Zero *)
   | BTwgez                              (**r Word Greater Than or Equal to Zero *)
@@ -251,16 +243,7 @@ Definition offset : Type := ptrofs.
 
 Definition label := positive.
 
-(* FIXME - rewrite the comment *)
-(** A note on immediates: there are various constraints on immediate
-  operands to K1c instructions.  We do not attempt to capture these
-  restrictions in the abstract syntax nor in the semantics.  The
-  assembler will emit an error if immediate operands exceed the
-  representable range.  Of course, our K1c generator (file
-  [Asmgen]) is careful to respect this range. *)
-
-(** Instructions to be expanded in control-flow 
-*)
+(** Instructions to be expanded in control-flow *)
 Inductive ex_instruction : Type :=
   (* Pseudo-instructions *)
   | Pbuiltin: external_function -> list (builtin_arg preg)
@@ -1355,7 +1338,7 @@ Definition store_chunk n :=
 
 (** * basic instructions *)
 
-Definition parexec_basic_instr (bi: basic) (rsr rsw: regset) (mr mw: mem) :=
+Definition bstep (bi: basic) (rsr rsw: regset) (mr mw: mem) :=
   match bi with
   | PArith ai => Next (parexec_arith_instr ai rsr rsw) mw
 
@@ -1414,7 +1397,7 @@ Fixpoint parexec_wio_body (body: list basic) (rsr rsw: regset) (mr mw: mem) :=
   match body with
   | nil => Next rsw mw
   | bi::body' => 
-     match parexec_basic_instr bi rsr rsw mr mw with
+     match bstep bi rsr rsw mr mw with
      | Next rsw mw => parexec_wio_body body' rsr rsw mr mw
      | Stuck => Stuck
      end
@@ -1550,12 +1533,12 @@ Definition incrPC size_b (rs: regset) :=
   rs#PC <- (Val.offset_ptr rs#PC size_b).
 
 (** parallel execution of the exit instruction of a bundle *)
-Definition parexec_exit (f: function) ext size_b (rsr rsw: regset) (mw: mem) 
+Definition estep (f: function) ext size_b (rsr rsw: regset) (mw: mem) 
   := parexec_control f ext (incrPC size_b rsr) rsw mw.
 
 Definition parexec_wio f bdy ext size_b (rs: regset) (m: mem): outcome :=
   match parexec_wio_body bdy rs rs m m with
-  | Next rsw mw => parexec_exit f ext size_b rs rsw mw
+  | Next rsw mw => estep f ext size_b rs rsw mw
   | Stuck => Stuck
   end.
 
diff --git a/mppa_k1c/CBuiltins.ml b/mppa_k1c/CBuiltins.ml
index 2f80c90f..5fb69f62 100644
--- a/mppa_k1c/CBuiltins.ml
+++ b/mppa_k1c/CBuiltins.ml
@@ -43,8 +43,8 @@ let builtins = {
       
       (* LSU Instructions *)
       (* acswapd and acswapw done using headers and assembly *)
-       "__builtin_k1_afaddd", (TInt(IULongLong, []), [TPtr(TVoid [], []); TInt(ILongLong, [])], false);
-       "__builtin_k1_afaddw", (TInt(IUInt, []), [TPtr(TVoid [], []); TInt(IInt, [])], false);
+(*     "__builtin_k1_afaddd", (TInt(IULongLong, []), [TPtr(TVoid [], []); TInt(ILongLong, [])], false);
+       "__builtin_k1_afaddw", (TInt(IUInt, []), [TPtr(TVoid [], []); TInt(IInt, [])], false); *) (* see #157 *)
       "__builtin_k1_alclrd", (TInt(IULongLong, []), [TPtr(TVoid [], [])], false); (* DONE *)
       "__builtin_k1_alclrw", (TInt(IUInt, []), [TPtr(TVoid [], [])], false); (* DONE *)
       "__builtin_k1_dinval", (TVoid [], [], false); (* DONE *)
@@ -63,7 +63,6 @@ let builtins = {
       "__builtin_k1_lwzu", (TInt(IUInt, []), [TPtr(TVoid [], [])], false);
 
       (* ALU Instructions *)
-      "__builtin_clzll", (TInt(IULongLong, []), [TInt(IULongLong, [])], false);
       (* "__builtin_k1_addhp", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); *)
       (* "__builtin_k1_adds", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, [])], false); *)
       (* "__builtin_k1_bwlu", (TInt(IUInt, []), 
@@ -74,8 +73,8 @@ let builtins = {
       (* "__builtin_k1_cbs", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
       (* "__builtin_k1_cbsdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *)
       (* "__builtin_k1_clz", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
-      "__builtin_k1_clzw", (TInt(IInt, []), [TInt(IUInt, [])], false);
-      "__builtin_k1_clzd", (TInt(ILongLong, []), [TInt(IULongLong, [])], false);
+      "__builtin_clzw", (TInt(IInt, []), [TInt(IUInt, [])], false);
+      "__builtin_clzll", (TInt(ILongLong, []), [TInt(IULongLong, [])], false);
       (* "__builtin_k1_clzdl", (TInt(ILongLong, []), [TInt(IULongLong, [])], false); *)
       (* "__builtin_k1_cmove", (TInt(IInt, []), [TInt(IInt, []); TInt(IInt, []); TInt(IInt, [])], false); *)
       (* "__builtin_k1_ctz", (TInt(IInt, []), [TInt(IUInt, [])], false); *)
diff --git a/mppa_k1c/Op.v b/mppa_k1c/Op.v
index 35fbb596..815d3958 100644
--- a/mppa_k1c/Op.v
+++ b/mppa_k1c/Op.v
@@ -100,23 +100,23 @@ Inductive operation : Type :=
   | Onandimm (n: int)        (**r [rd = ~(r1 & n)] *)
   | Oor                      (**r [rd = r1 | r2] *)
   | Oorimm (n: int)          (**r [rd = r1 | n] *)
-  | Onor                     (**r [rd = r1 | r2] *)
-  | Onorimm (n: int)         (**r [rd = r1 | n] *)
+  | Onor                     (**r [rd = ~(r1 | r2)] *)
+  | Onorimm (n: int)         (**r [rd = ~(r1 | n)] *)
   | Oxor                     (**r [rd = r1 ^ r2] *)
   | Oxorimm (n: int)         (**r [rd = r1 ^ n] *)
   | Onxor                    (**r [rd = ~(r1 ^ r2)] *)
   | Onxorimm (n: int)        (**r [rd = ~(r1 ^ n)] *)
   | Onot                     (**r [rd = ~r1] *)
-  | Oandn                    (**r [rd = (~r1) ^ r2] *)
-  | Oandnimm (n: int)        (**r [rd = (~r1) ^ n] *)
+  | Oandn                    (**r [rd = (~r1) & r2] *)
+  | Oandnimm (n: int)        (**r [rd = (~r1) & n] *)
   | Oorn                     (**r [rd = (~r1) | r2] *)
   | Oornimm (n: int)         (**r [rd = (~r1) | n] *)
   | Oshl                     (**r [rd = r1 << r2] *)
   | Oshlimm (n: int)         (**r [rd = r1 << n] *)
-  | Oshr                     (**r [rd = r1 >> r2] (signed) *)
-  | Oshrimm (n: int)         (**r [rd = r1 >> n] (signed) *)
-  | Oshru                    (**r [rd = r1 >> r2] (unsigned) *)
-  | Oshruimm (n: int)        (**r [rd = r1 >> n] (unsigned) *)
+  | Oshr                     (**r [rd = r1 >>s r2] (signed) *)
+  | Oshrimm (n: int)         (**r [rd = r1 >>s n] (signed) *)
+  | Oshru                    (**r [rd = r1 >>u r2] (unsigned) *)
+  | Oshruimm (n: int)        (**r [rd = r1 >>x n] (unsigned) *)
   | Oshrximm (n: int)        (**r [rd = r1 / 2^n] (signed) *)
   | Ororimm (n: int)         (**r rotate right immediate *)
   | Omadd                    (**r [rd = rd + r1 * r2] *)
@@ -158,8 +158,8 @@ Inductive operation : Type :=
   | Onxorl                   (**r [rd = ~(r1 ^ r2)] *)
   | Onxorlimm (n: int64)     (**r [rd = ~(r1 ^ n)] *)
   | Onotl                    (**r [rd = ~r1] *)
-  | Oandnl                   (**r [rd = (~r1) ^ r2] *)
-  | Oandnlimm (n: int64)     (**r [rd = (~r1) ^ n] *)
+  | Oandnl                   (**r [rd = (~r1) & r2] *)
+  | Oandnlimm (n: int64)     (**r [rd = (~r1) & n] *)
   | Oornl                    (**r [rd = (~r1) | r2] *)
   | Oornlimm (n: int64)      (**r [rd = (~r1) | n] *)
   | Oshll                    (**r [rd = r1 << r2] *)
diff --git a/mppa_k1c/PostpassScheduling.v b/mppa_k1c/PostpassScheduling.v
index 15cb4c48..8b6de1e2 100644
--- a/mppa_k1c/PostpassScheduling.v
+++ b/mppa_k1c/PostpassScheduling.v
@@ -19,7 +19,7 @@ Local Open Scope error_monad_scope.
 
 (** Oracle taking as input a basic block,
     returns a schedule expressed as a list of bundles *)
-Axiom schedule: bblock -> list bblock.
+Axiom schedule: bblock -> (list (list basic)) * option control.
 
 Extract Constant schedule => "PostpassSchedulingOracle.schedule".
 
@@ -208,7 +208,8 @@ Proof.
     + apply IHlbb in EQ. assumption.
 Qed.
 
-
+Inductive is_concat : bblock -> list bblock -> Prop :=
+  | mk_is_concat: forall tbb lbb, concat_all lbb = OK tbb -> is_concat tbb lbb.
 
 Definition verify_schedule (bb bb' : bblock) : res unit :=
   match bblock_simub bb bb' with
@@ -333,10 +334,49 @@ Proof.
     apply stick_header_concat_all. assumption.
 Qed.
 
+Program Definition make_bblock_from_basics lb :=
+  match lb with
+  | nil => Error (msg "PostpassScheduling.make_bblock_from_basics")
+  | b :: lb => OK {| header := nil; body := b::lb; exit := None |}
+  end.
+
+Fixpoint schedule_to_bblocks_nocontrol llb :=
+  match llb with
+  | nil => OK nil
+  | lb :: llb => do bb <- make_bblock_from_basics lb;
+                 do lbb <- schedule_to_bblocks_nocontrol llb;
+                 OK (bb :: lbb)
+  end.
+
+Program Definition make_bblock_from_basics_and_control lb c := 
+  match c with
+  | PExpand (Pbuiltin _ _ _) => Error (msg "PostpassScheduling.make_bblock_from_basics_and_control")
+  | PCtlFlow cf => OK {| header := nil; body := lb; exit := Some (PCtlFlow cf) |}
+  end.
+Next Obligation.
+  apply wf_bblock_refl. constructor.
+  - right. discriminate.
+  - discriminate.
+Qed.
+
+Fixpoint schedule_to_bblocks_wcontrol llb c :=
+  match llb with
+  | nil => OK ((bblock_single_inst (PControl c)) :: nil)
+  | lb :: nil => do bb <- make_bblock_from_basics_and_control lb c; OK (bb :: nil)
+  | lb :: llb => do bb <- make_bblock_from_basics lb;
+                 do lbb <- schedule_to_bblocks_wcontrol llb c;
+                 OK (bb :: lbb)
+  end.
 
+Definition schedule_to_bblocks (llb: list (list basic)) (oc: option control) : res (list bblock) :=
+  match oc with
+  | None => schedule_to_bblocks_nocontrol llb
+  | Some c => schedule_to_bblocks_wcontrol llb c
+  end.
 
-Definition do_schedule (bb: bblock) : list bblock :=
-  if (Z.eqb (size bb) 1) then bb::nil else schedule bb.
+Definition do_schedule (bb: bblock) : res (list bblock) :=
+  if (Z.eqb (size bb) 1) then OK (bb::nil) 
+  else match (schedule bb) with (llb, oc) => schedule_to_bblocks llb oc end.
 
 Definition verify_par_bblock (bb: bblock) : res unit :=
   if (bblock_para_check bb) then OK tt else Error (msg "PostpassScheduling.verify_par_bblock").
@@ -350,7 +390,7 @@ Fixpoint verify_par (lbb: list bblock) :=
 Definition verified_schedule_nob (bb : bblock) : res (list bblock) :=
   let bb'  := no_header bb in
   let bb'' := Peephole.optimize_bblock bb' in
-  let lbb := do_schedule bb'' in
+  do lbb <- do_schedule bb'';
   do tbb <- concat_all lbb;
   do sizecheck <- verify_size bb lbb;
   do schedcheck <- verify_schedule bb' tbb;
@@ -363,7 +403,7 @@ Lemma verified_schedule_nob_size:
 Proof.
   intros. monadInv H. erewrite <- stick_header_code_size; eauto.
   apply verify_size_size.
-  destruct x0; try discriminate. assumption.
+  destruct x1; try discriminate. assumption.
 Qed.
 
 Lemma verified_schedule_nob_no_header_in_middle:
@@ -382,7 +422,7 @@ Lemma verified_schedule_nob_header:
   /\ Forall (fun b => header b = nil) lbb.
 Proof.
   intros. split.
-  - monadInv H. unfold stick_header_code in EQ2. destruct (hd_error _); try discriminate. inv EQ2.
+  - monadInv H. unfold stick_header_code in EQ3. destruct (hd_error _); try discriminate. inv EQ3.
     simpl. reflexivity.
   - apply verified_schedule_nob_no_header_in_middle in H. assumption.
 Qed.
@@ -427,29 +467,29 @@ Qed.
 Lemma verified_schedule_nob_correct:
   forall ge f bb lbb,
   verified_schedule_nob bb = OK lbb ->
-  exists tbb, 
-     concat_all lbb = OK tbb
+  exists tbb,
+     is_concat tbb lbb
   /\ bblock_simu ge f bb tbb.
 Proof.
   intros. monadInv H.
   exploit stick_header_code_concat_all; eauto.
   intros (tbb & CONC & STH).
-  exists tbb. split; auto.
-  rewrite verify_schedule_no_header in EQ0. erewrite stick_header_verify_schedule in EQ0; eauto.
-  eapply bblock_simub_correct; eauto. unfold verify_schedule in EQ0.
+  exists tbb. split; auto. constructor; auto.
+  rewrite verify_schedule_no_header in EQ2. erewrite stick_header_verify_schedule in EQ2; eauto.
+  eapply bblock_simub_correct; eauto. unfold verify_schedule in EQ2.
   destruct (bblock_simub _ _); auto; try discriminate.
 Qed.
 
 Theorem verified_schedule_correct:
   forall ge f bb lbb,
   verified_schedule bb = OK lbb ->
-  exists tbb, 
-     concat_all lbb = OK tbb
+  exists tbb,
+     is_concat tbb lbb
   /\ bblock_simu ge f bb tbb.
 Proof.
   intros. unfold verified_schedule in H. destruct (exit bb). destruct c. destruct i.
   all: try (eapply verified_schedule_nob_correct; eauto; fail).
-  inv H. eexists. split; simpl; auto. constructor; auto.
+  inv H. eexists. split; simpl; auto. constructor; auto. simpl; auto. constructor; auto.
 Qed.
 
 Lemma verified_schedule_builtin_idem:
diff --git a/mppa_k1c/PostpassSchedulingOracle.ml b/mppa_k1c/PostpassSchedulingOracle.ml
index 895f9f40..40f1d9c7 100644
--- a/mppa_k1c/PostpassSchedulingOracle.ml
+++ b/mppa_k1c/PostpassSchedulingOracle.ml
@@ -38,6 +38,8 @@ type ab_inst_rec = {
   inst: real_instruction;
   write_locs : location list;
   read_locs : location list;
+  read_at_id : location list; (* Must be contained in read_locs *)
+  read_at_e1 : location list; (* idem *)
   imm : immediate option;
   is_control : bool;
 }
@@ -232,25 +234,40 @@ let jl_real = Goto
 let cb_real = Cb
 let cbu_real = Cb
 
-let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false }
+let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false;
+                                      read_at_id = []; read_at_e1 = [] }
 
-let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false }
+let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false;
+                                      read_at_id = []; read_at_e1 = [] }
 
-let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false}
+let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false;
+                                      read_at_id = []; read_at_e1 = [] }
 
-let arith_arri32_rec i rd rs imm32 = { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false }
+let arith_arri32_rec i rd rs imm32 = 
+  let rae1 = match i with Pmaddiw -> [Reg rd] | _ -> []
+  in {  inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false;
+        read_at_id = [] ; read_at_e1 = rae1 }
 
-let arith_arri64_rec i rd rs imm64 = { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false }
+let arith_arri64_rec i rd rs imm64 = 
+  let rae1 = match i with Pmaddil -> [Reg rd] | _ -> []
+  in {  inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false;
+        read_at_id = []; read_at_e1 = rae1 }
 
-let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false}
+let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false;
+                              read_at_id = []; read_at_e1 = [] }
 
-let arith_arrr_rec i rd rs1 rs2 = { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false}
+let arith_arrr_rec i rd rs1 rs2 = 
+  let rae1 = match i with Pmaddl | Pmaddw | Pmsubl | Pmsubw -> [Reg rd] | _ -> []
+  in {  inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false;
+        read_at_id = []; read_at_e1 = rae1 }
 
-let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false}
+let arith_rr_rec i rd rs = {  inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false;
+                              read_at_id = []; read_at_e1 = [] }
 
 let arith_r_rec i rd = match i with
     (* For Ploadsymbol, writing the highest integer since we do not know how many bits does a symbol have *)
-  | Ploadsymbol (id, ofs) -> { inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed); is_control = false}
+  | Ploadsymbol (id, ofs) -> {  inst = loadsymbol_real; write_locs = [Reg rd]; read_locs = []; imm = Some (I64 Integers.Int64.max_signed);
+                                is_control = false; read_at_id = []; read_at_e1 = [] }
 
 let arith_rec i =
   match i with
@@ -262,45 +279,54 @@ let arith_rec i =
   | PArithARRI32 (i, rd, rs, imm32) -> arith_arri32_rec i (IR rd) (IR rs) (Some (I32 imm32))
   | PArithARRI64 (i, rd, rs, imm64) -> arith_arri64_rec i (IR rd) (IR rs) (Some (I64 imm64))
   | PArithARRR (i, rd, rs1, rs2) -> arith_arrr_rec i (IR rd) (IR rs1) (IR rs2)
-  | PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false}
-  | PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false}
+  | PArithRI32 (rd, imm32) -> { inst = arith_ri32_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I32 imm32)) ; is_control = false;
+                                read_at_id = []; read_at_e1 = [] }
+  | PArithRI64 (rd, imm64) -> { inst = arith_ri64_real; write_locs = [Reg (IR rd)]; read_locs = []; imm = (Some (I64 imm64)) ; is_control = false;
+                                read_at_id = []; read_at_e1 = [] }
   | PArithRF32 (rd, f) -> { inst = arith_rf32_real; write_locs = [Reg (IR rd)]; read_locs = [];
-                            imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false}
+                            imm = (Some (I32 (Floats.Float32.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}
   | PArithRF64 (rd, f) -> { inst = arith_rf64_real; write_locs = [Reg (IR rd)]; read_locs = [];
-                            imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false}
+                            imm = (Some (I64 (Floats.Float.to_bits f))); is_control = false; read_at_id = []; read_at_e1 = []}
   | PArithRR (i, rd, rs) -> arith_rr_rec i (IR rd) (IR rs)
   | PArithR (i, rd) -> arith_r_rec i (IR rd)
 
 let load_rec i = match i with
   | PLoadRRO (i, rs1, rs2, imm) ->
-     { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false}
+      { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false;
+        read_at_id = []; read_at_e1 = [] }
   | PLoadQRRO(rs, ra, imm) ->
      let (rs0, rs1) = gpreg_q_expand rs in
-     { inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false}
+     { inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false;
+       read_at_id = []; read_at_e1 = [] }
   | PLoadORRO(rs, ra, imm) ->
      let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
-     { inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false}
+     {  inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)];
+        imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []}
   | PLoadRRR (i, rs1, rs2, rs3) | PLoadRRRXS (i, rs1, rs2, rs3) ->
-     { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false}
+      { inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false;
+        read_at_id = []; read_at_e1 = [] }
 
 let store_rec i = match i with
-  | PStoreRRO (i, rs1, rs2, imm) ->
-     { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs1); Reg (IR rs2)]; imm = (Some (Off imm))
-                                      ; is_control = false}
+  | PStoreRRO (i, rs, ra, imm) ->
+      {  inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra)]; imm = (Some (Off imm));
+        read_at_id = []; read_at_e1 = [Reg (IR rs)] ; is_control = false}
   | PStoreQRRO (rs, ra, imm) ->
      let (rs0, rs1) = gpreg_q_expand rs in
-     { inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm))
-                                      ; is_control = false}
+     {  inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm));
+        read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1)] ; is_control = false}
   | PStoreORRO (rs, ra, imm) ->
      let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in
-     { inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)]; imm = (Some (Off imm))
-                                      ; is_control = false}
-  | PStoreRRR (i, rs1, rs2, rs3)  | PStoreRRRXS (i, rs1, rs2, rs3) -> { inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; imm = None
-                                      ; is_control = false}
+     {  inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)];
+        imm = (Some (Off imm)); read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; is_control = false}
+  | PStoreRRR (i, rs, ra1, ra2)  | PStoreRRRXS (i, rs, ra1, ra2) ->
+     {  inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra1); Reg (IR ra2)]; imm = None;
+        read_at_id = []; read_at_e1 = [Reg (IR rs)]; is_control = false}
 
-let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false }
+let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false;
+                              read_at_id = []; read_at_e1 = [] }
 
-let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false }
+let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false;
+                              read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
 
 let basic_rec i =
   match i with
@@ -311,20 +337,24 @@ let basic_rec i =
   | Pfreeframe (_, _) -> raise OpaqueInstruction
   | Pget (rd, rs) -> get_rec rd rs
   | Pset (rd, rs) -> set_rec rd rs
-  | Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false}
+  | Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false; read_at_id = []; read_at_e1 = []}
 
 let expand_rec = function
   | Pbuiltin _ -> raise OpaqueInstruction
 
 let ctl_flow_rec = function
-  | Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true}
-  | Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true}
-  | Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true}
-  | Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true}
-  | Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true}
-  | Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true}
-  | Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true}
-  | Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true}
+  | Pret -> { inst = ret_real; write_locs = []; read_locs = [Reg RA]; imm = None ; is_control = true; read_at_id = [Reg RA]; read_at_e1 = []}
+  | Pcall lbl -> { inst = call_real; write_locs = [Reg RA]; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+  | Picall r -> { inst = icall_real; write_locs = [Reg RA]; read_locs = [Reg (IR r)]; imm = None; is_control = true;
+                  read_at_id = [Reg (IR r)]; read_at_e1 = [] }
+  | Pgoto lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+  | Pigoto r -> { inst = igoto_real; write_locs = []; read_locs = [Reg (IR r)]; imm = None ; is_control = true;
+                  read_at_id = [Reg (IR r)]; read_at_e1 = [] }
+  | Pj_l lbl -> { inst = goto_real; write_locs = []; read_locs = []; imm = None ; is_control = true; read_at_id = []; read_at_e1 = []}
+  | Pcb (bt, rs, lbl) -> { inst = cb_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;
+                           read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
+  | Pcbu (bt, rs, lbl) -> { inst = cbu_real; write_locs = []; read_locs = [Reg (IR rs)]; imm = None ; is_control = true;
+                           read_at_id = [Reg (IR rs)]; read_at_e1 = [] }
   | Pjumptable (r, _) -> raise OpaqueInstruction (* { inst = "Pjumptable"; write_locs = [Reg (IR GPR62); Reg (IR GPR63)]; read_locs = [Reg (IR r)]; imm = None ; is_control = true} *)
 
 let control_rec i =
@@ -350,6 +380,8 @@ let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit)
 type inst_info = {
   write_locs : location list;
   read_locs : location list;
+  reads_at_id : bool;
+  reads_at_e1 : bool;
   is_control : bool;
   usage: int array; (* resources consumed by the instruction *)
   latency: int;
@@ -582,6 +614,16 @@ let rec_to_usage r =
   | Fnarrowdw -> alu_full
   | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw -> mau
 
+let inst_info_to_dlatency i = 
+  begin
+  assert (not (i.reads_at_id && i.reads_at_e1));
+  match i.reads_at_id with
+  | true -> +1
+  | false -> (match i.reads_at_e1 with
+              | true -> -1
+              | false -> 0)
+  end
+
 let real_inst_to_latency = function
   | Nop -> 0 (* Only goes through ID *)
   | Addw | Andw | Compw | Orw | Sbfw | Sbfxw | Sraw | Srsw | Srlw | Sllw | Xorw
@@ -601,10 +643,17 @@ let real_inst_to_latency = function
   | Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1
   | Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw -> 4
 
+let rec empty_inter la = function
+  | [] -> true
+  | b::lb -> if (List.mem b la) then false else empty_inter la lb
+
 let rec_to_info r : inst_info =
   let usage = rec_to_usage r
   and latency = real_inst_to_latency r.inst
-  in { write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control }
+  and reads_at_id = not (empty_inter r.read_locs r.read_at_id)
+  and reads_at_e1 = not (empty_inter r.read_locs r.read_at_e1)
+  in {  write_locs = r.write_locs; read_locs = r.read_locs; usage=usage; latency=latency; is_control=r.is_control;
+        reads_at_id = reads_at_id; reads_at_e1 = reads_at_e1 }
 
 let instruction_infos bb = List.map rec_to_info (instruction_recs bb)
 
@@ -651,6 +700,11 @@ let rec get_accesses hashloc (ll: location list) = match ll with
   | [] -> []
   | loc :: llocs -> (find_in_hash hashloc loc) @ (get_accesses hashloc llocs)
 
+let compute_latency (ifrom: inst_info) (ito: inst_info) =
+  let dlat = inst_info_to_dlatency ito
+  in let lat = ifrom.latency + dlat
+  in assert (lat >= 0); if (lat == 0) then 1 else lat
+
 let latency_constraints bb =
   let written = LocHash.create 70
   and read = LocHash.create 70
@@ -662,8 +716,10 @@ let latency_constraints bb =
     and waw = get_accesses written i.write_locs
     and war = get_accesses read i.write_locs
     in begin
-      List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = (List.nth instr_infos i).latency} :: !constraints) raw;
-      List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = (List.nth instr_infos i).latency} :: !constraints) waw;
+      List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;
+                                          latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) raw;
+      List.iter (fun i -> constraints := {instr_from = i; instr_to = !count;
+                                          latency = compute_latency (List.nth instr_infos i) (List.nth instr_infos !count)} :: !constraints) waw;
       List.iter (fun i -> constraints := {instr_from = i; instr_to = !count; latency = 0} :: !constraints) war;
       if i.is_control then List.iter (fun n -> constraints := {instr_from = n; instr_to = !count; latency = 0} :: !constraints) (intlist !count);
       (* Updating "read" and "written" hashmaps *)
@@ -905,10 +961,18 @@ let smart_schedule bb =
       in bundles @ (f lbb)
   in f lbb
 
-(** Called schedule function from Coq *)
-
-let schedule bb = 
+let bblock_to_bundles bb =
   if debug then (eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"; print_bb stderr bb);
   (* print_problem (build_problem bb); *)
   if Compopts.optim_postpass () then smart_schedule bb else dumb_schedule bb
 
+(** To deal with the Coq Axiom schedule : bblock -> (list (list basic)) * option control *)
+
+let rec bundles_to_coq_schedule = function
+  | [] -> ([], None)
+  | bb :: [] -> ([bb.body], bb.exit)
+  | bb :: lbb -> let (llb, oc) = bundles_to_coq_schedule lbb in (bb.body :: llb, oc)
+
+(** Called schedule function from Coq *)
+
+let schedule bb = let toto = bundles_to_coq_schedule @@ bblock_to_bundles bb in toto
diff --git a/mppa_k1c/PostpassSchedulingproof.v b/mppa_k1c/PostpassSchedulingproof.v
index 5d4fc881..21af276b 100644
--- a/mppa_k1c/PostpassSchedulingproof.v
+++ b/mppa_k1c/PostpassSchedulingproof.v
@@ -240,7 +240,7 @@ Lemma exec_basic_instr_pc_var:
   exec_basic_instr ge i rs m = Next rs' m' ->
   exec_basic_instr ge i (rs # PC <- v) m = Next (rs' # PC <- v) m'.
 Proof.
-  intros. unfold exec_basic_instr in *. unfold parexec_basic_instr in *. destruct i.
+  intros. unfold exec_basic_instr in *. unfold bstep in *. destruct i.
   - unfold exec_arith_instr in *. destruct i; destruct i.
       all: try (exploreInst; inv H; apply next_eq; auto;
       apply functional_extensionality; intros; rewrite regset_double_set; auto; discriminate).
@@ -681,7 +681,7 @@ Lemma transf_exec_basic_instr:
   forall i rs m, exec_basic_instr ge i rs m = exec_basic_instr tge i rs m.
 Proof.
   intros. pose symbol_address_preserved.
-  unfold exec_basic_instr. unfold parexec_basic_instr. exploreInst; simpl; auto; try congruence.
+  unfold exec_basic_instr. unfold bstep. exploreInst; simpl; auto; try congruence.
   unfold parexec_arith_instr; unfold arith_eval_r; exploreInst; simpl; auto; try congruence.
 Qed.
 
@@ -736,7 +736,7 @@ Proof.
   induction 1; intros; inv MS.
   - exploit function_ptr_translated; eauto. intros (tf & FFP & TRANSF). monadInv TRANSF.
     exploit transf_find_bblock; eauto. intros (lbb & VES & c & TAIL).
-    exploit verified_schedule_correct; eauto. intros (tbb & CONC & BBEQ).
+    exploit verified_schedule_correct; eauto. intros (tbb & CONC & BBEQ). inv CONC. rename H3 into CONC.
     assert (NOOV: size_blocks x.(fn_blocks) <= Ptrofs.max_unsigned).
       eapply transf_function_no_overflow; eauto. 
 
@@ -798,7 +798,7 @@ Lemma verified_schedule_nob_checks_alls_bundles bb lb bundle:
   List.In bundle lb -> verify_par_bblock bundle = OK tt.
 Proof.
   unfold verified_schedule_nob. intros H;
-  monadInv H. destruct x3. 
+  monadInv H. destruct x4.
   intros; eapply verified_par_checks_alls_bundles; eauto.
 Qed.
 
diff --git a/mppa_k1c/PrintOp.ml b/mppa_k1c/PrintOp.ml
index 575fa94f..67f87000 100644
--- a/mppa_k1c/PrintOp.ml
+++ b/mppa_k1c/PrintOp.ml
@@ -72,7 +72,7 @@ let int_of_s14 = function
   | SHIFT3 -> 3
   | SHIFT4 -> 4
             
-let print_operation reg pp = function
+let print_operation reg pp op = match op with
   | Omove, [r1] -> reg pp r1
   | Ointconst n, [] -> fprintf pp "%ld" (camlint_of_coqint n)
   | Olongconst n, [] -> fprintf pp "%LdL" (camlint64_of_coqint n)
@@ -86,9 +86,15 @@ let print_operation reg pp = function
   | Ocast16signed, [r1] -> fprintf pp "int16signed(%a)" reg r1
   | Oadd, [r1;r2] -> fprintf pp "%a + %a" reg r1 reg r2
   | Oaddimm n, [r1] -> fprintf pp "%a + %ld" reg r1 (camlint_of_coqint n)
+  | Oaddx(s14), [r1; r2] -> fprintf pp "(%a << %d) + %a" reg r1 (int_of_s14 s14) reg r2
+  | Oaddximm(s14, imm), [r1] -> fprintf pp "(%a << %d) + %ld" reg r1 (int_of_s14 s14) (camlint_of_coqint imm)
   | Oneg, [r1] -> fprintf pp "-(%a)" reg r1
   | Osub, [r1;r2] -> fprintf pp "%a - %a" reg r1 reg r2
+  | Orevsubimm(imm), [r1] -> fprintf pp "%ld - %a" (camlint_of_coqint imm) reg r1
+  | Orevsubx(s14), [r1; r2] -> fprintf pp "%a - (%a << %d)" reg r2 reg r1 (int_of_s14 s14)
+  | Orevsubximm(s14, imm), [r1] -> fprintf pp "%ld - (%a << %d)" (camlint_of_coqint imm) reg r1 (int_of_s14 s14)
   | Omul, [r1;r2] -> fprintf pp "%a * %a" reg r1 reg r2
+  | Omulimm(imm), [r1] -> fprintf pp "%a * %ld" reg r1 (camlint_of_coqint imm)
   | Omulhs, [r1;r2] -> fprintf pp "%a *hs %a" reg r1 reg r2
   | Omulhu, [r1;r2] -> fprintf pp "%a *hu %a" reg r1 reg r2
   | Odiv, [r1;r2] -> fprintf pp "%a /s %a" reg r1 reg r2
@@ -101,6 +107,13 @@ let print_operation reg pp = function
   | Oorimm n, [r1] ->  fprintf pp "%a | %ld" reg r1 (camlint_of_coqint n)
   | Oxor, [r1;r2] -> fprintf pp "%a ^ %a" reg r1 reg r2
   | Oxorimm n, [r1] -> fprintf pp "%a ^ %ld" reg r1 (camlint_of_coqint n)
+  | Onxor, [r1;r2] -> fprintf pp "~(%a ^ %a)" reg r1 reg r2
+  | Onxorimm n, [r1] -> fprintf pp "~(%a ^ %ld)" reg r1 (camlint_of_coqint n)
+  | Onot, [r1] -> fprintf pp "~%a" reg r1
+  | Oandn, [r1; r2] -> fprintf pp "(~%a) & %a" reg r1 reg r2
+  | Oandnimm n, [r1] -> fprintf pp "(~%a) & %ld" reg r1 (camlint_of_coqint n)
+  | Oorn, [r1;r2] -> fprintf pp "(~%a) | %a" reg r1 reg r2
+  | Oornimm n, [r1] -> fprintf pp "(~%a) | %ld" reg r1 (camlint_of_coqint n)
   | Oshl, [r1;r2] -> fprintf pp "%a << %a" reg r1 reg r2
   | Oshlimm n, [r1] -> fprintf pp "%a << %ld" reg r1 (camlint_of_coqint n)
   | Oshr, [r1;r2] -> fprintf pp "%a >>s %a" reg r1 reg r2
@@ -108,6 +121,10 @@ let print_operation reg pp = function
   | Oshru, [r1;r2] -> fprintf pp "%a >>u %a" reg r1 reg r2
   | Oshruimm n, [r1] -> fprintf pp "%a >>u %ld" reg r1 (camlint_of_coqint n)
   | Oshrximm n, [r1] -> fprintf pp "%a >>x %ld" reg r1 (camlint_of_coqint n)
+  | Ororimm n, [r1] -> fprintf pp "(%a ror %ld)" reg r1 (camlint_of_coqint n)
+  | Omadd, [r1; r2; r3] -> fprintf pp "%a + %a * %a" reg r1 reg r2 reg r3
+  | Omaddimm imm, [r1; r2] -> fprintf pp "%a + %a * %ld" reg r1 reg r2 (camlint_of_coqint imm)
+  | Omsub, [r1; r2; r3] -> fprintf pp "%a - %a * %a" reg r1 reg r2 reg r3
 
   | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
   | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
@@ -116,9 +133,15 @@ let print_operation reg pp = function
   | Ocast32unsigned, [r1] -> fprintf pp "long32unsigned(%a)" reg r1
   | Oaddl, [r1;r2] -> fprintf pp "%a +l %a" reg r1 reg r2
   | Oaddlimm n, [r1] -> fprintf pp "%a +l %Ld" reg r1 (camlint64_of_coqint n)
+  | Oaddxl(s14), [r1; r2] -> fprintf pp "(%a <<l %d) +l %a" reg r1 (int_of_s14 s14) reg r2
+  | Oaddxlimm(s14, imm), [r1] -> fprintf pp "(%a <<l %d) +l %Ld" reg r1 (int_of_s14 s14) (camlint64_of_coqint imm)
+  | Orevsublimm(imm), [r1] -> fprintf pp "%Ld -l %a" (camlint64_of_coqint imm) reg r1
+  | Orevsubxl(s14), [r1; r2] -> fprintf pp "%a -l (%a <<l %d)" reg r2 reg r1  (int_of_s14 s14)
+  | Orevsubxlimm(s14, imm), [r1] -> fprintf pp "%Ld -l (%a <<l %d)" (camlint64_of_coqint imm) reg r1 (int_of_s14 s14)
   | Onegl, [r1] -> fprintf pp "-l (%a)" reg r1
   | Osubl, [r1;r2] -> fprintf pp "%a -l %a" reg r1 reg r2
   | Omull, [r1;r2] -> fprintf pp "%a *l %a" reg r1 reg r2
+  | Omullimm(imm), [r1] -> fprintf pp "%a *l %Ld" reg r1 (camlint64_of_coqint imm)
   | Omullhs, [r1;r2] -> fprintf pp "%a *lhs %a" reg r1 reg r2
   | Omullhu, [r1;r2] -> fprintf pp "%a *lhu %a" reg r1 reg r2
   | Odivl, [r1;r2] -> fprintf pp "%a /ls %a" reg r1 reg r2
@@ -129,8 +152,17 @@ let print_operation reg pp = function
   | Oandlimm n, [r1] -> fprintf pp "%a &l %Ld" reg r1 (camlint64_of_coqint n)
   | Oorl, [r1;r2] -> fprintf pp "%a |l %a" reg r1 reg r2
   | Oorlimm n, [r1] ->  fprintf pp "%a |l %Ld" reg r1 (camlint64_of_coqint n)
+  | Onorl, [r1; r2] -> fprintf pp "~(%a |l %a)" reg r1 reg r2
+  | Onorlimm n, [r1] -> fprintf pp "~(%a |l %Ld)" reg r1 (camlint64_of_coqint n)
   | Oxorl, [r1;r2] -> fprintf pp "%a ^l %a" reg r1 reg r2
   | Oxorlimm n, [r1] -> fprintf pp "%a ^l %Ld" reg r1 (camlint64_of_coqint n)
+  | Onxorl, [r1;r2] -> fprintf pp "~(%a ^l %a)" reg r1 reg r2
+  | Onxorlimm n, [r1] -> fprintf pp "~(%a ^l %Ld)" reg r1 (camlint64_of_coqint n)
+  | Onotl, [r1] -> fprintf pp "~%a" reg r1
+  | Oandnl, [r1;r2] -> fprintf pp "(~%a) &l %a" reg r1 reg r2
+  | Oandnlimm n, [r1] -> fprintf pp "(~%a) &l %Ld" reg r1 (camlint64_of_coqint n)
+  | Oornl, [r1;r2] -> fprintf pp "(~%a) |l %a" reg r1 reg r2
+  | Oornlimm n, [r1;r2] -> fprintf pp "(~%a) |l %Ld" reg r1 (camlint64_of_coqint n)
   | Oshll, [r1;r2] -> fprintf pp "%a <<l %a" reg r1 reg r2
   | Oshllimm n, [r1] -> fprintf pp "%a <<l %Ld" reg r1 (camlint64_of_coqint n)
   | Oshrl, [r1;r2] -> fprintf pp "%a >>ls %a" reg r1 reg r2
@@ -138,6 +170,9 @@ let print_operation reg pp = function
   | Oshrlu, [r1;r2] -> fprintf pp "%a >>lu %a" reg r1 reg r2
   | Oshrluimm n, [r1] -> fprintf pp "%a >>lu %ld" reg r1 (camlint_of_coqint n)
   | Oshrxlimm n, [r1] -> fprintf pp "%a >>lx %ld" reg r1 (camlint_of_coqint n)
+  | Omaddl, [r1; r2; r3] -> fprintf pp "%a +l %a *l %a" reg r1 reg r2 reg r3
+  | Omaddlimm imm, [r1; r2] -> fprintf pp "%a +l %a *l %Ld" reg r1 reg r2 (camlint64_of_coqint imm)
+  | Omsubl, [r1; r2; r3] -> fprintf pp "%a -l %a *l %a" reg r1 reg r2 reg r3
 
   | Onegf, [r1] -> fprintf pp "negf(%a)" reg r1
   | Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1
@@ -155,14 +190,14 @@ let print_operation reg pp = function
   | Ofloatofsingle, [r1] -> fprintf pp "floatofsingle(%a)" reg r1
   | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
   | Ointuoffloat, [r1] -> fprintf pp "intuoffloat(%a)" reg r1
-  | Olongoffloat, [r1] -> fprintf pp "longoffloat(%a)" reg r1
-  | Olonguoffloat, [r1] -> fprintf pp "longuoffloat(%a)" reg r1
-  | Ofloatoflong, [r1] -> fprintf pp "floatoflong(%a)" reg r1
-  | Ofloatoflongu, [r1] -> fprintf pp "floatoflongu(%a)" reg r1
   | Ointofsingle, [r1] -> fprintf pp "intofsingle(%a)" reg r1
   | Ointuofsingle, [r1] -> fprintf pp "intuofsingle(%a)" reg r1
   | Osingleofint, [r1] -> fprintf pp "singleofint(%a)" reg r1
   | Osingleofintu, [r1] -> fprintf pp "singleofintu(%a)" reg r1
+  | Olongoffloat, [r1] -> fprintf pp "longoffloat(%a)" reg r1
+  | Olonguoffloat, [r1] -> fprintf pp "longuoffloat(%a)" reg r1
+  | Ofloatoflong, [r1] -> fprintf pp "floatoflong(%a)" reg r1
+  | Ofloatoflongu, [r1] -> fprintf pp "floatoflongu(%a)" reg r1
   | Olongofsingle, [r1] -> fprintf pp "longofsingle(%a)" reg r1
   | Olonguofsingle, [r1] -> fprintf pp "longuofsingle(%a)" reg r1
   | Osingleoflong, [r1] -> fprintf pp "singleoflong(%a)" reg r1
@@ -184,29 +219,12 @@ let print_operation reg pp = function
   | Osellimm(cond0, imm), [r1; rc] ->
      print_condition0 reg pp cond0 rc;
      fprintf pp " ? %a :l %Ld" reg r1 (camlint64_of_coqint imm)
-  | Oaddx(s14), [r1; r2] -> fprintf pp "(%a << %d) + %a" reg r1 (int_of_s14 s14) reg r2
-  | Oaddximm(s14, imm), [r1] -> fprintf pp "(%a << %d) + %ld" reg r1 (int_of_s14 s14) (camlint_of_coqint imm)
-  | Oaddxl(s14), [r1; r2] -> fprintf pp "(%a <<l %d) +l %a" reg r1 (int_of_s14 s14) reg r2
-  | Oaddxlimm(s14, imm), [r1] -> fprintf pp "(%a <<l %d) +l %Ld" reg r1 (int_of_s14 s14) (camlint64_of_coqint imm)
-  | Orevsubimm(imm), [r1] -> fprintf pp "%ld - %a" (camlint_of_coqint imm) reg r1 
-  | Orevsubximm(s14, imm), [r1] -> fprintf pp "%ld - (%a << %d)" (camlint_of_coqint imm) reg r1 (int_of_s14 s14)
-  | Orevsublimm(imm), [r1] -> fprintf pp "%Ld -l %a" (camlint64_of_coqint imm) reg r1 
-  | Orevsubxlimm(s14, imm), [r1] -> fprintf pp "%Ld -l (%a <<l %d)" (camlint64_of_coqint imm) reg r1 (int_of_s14 s14)
-  | Orevsubx(s14), [r1; r2] -> fprintf pp "%a - (%a << %d)" reg r2 reg r1 (int_of_s14 s14)
-  | Orevsubxl(s14), [r1; r2] -> fprintf pp "%a -l (%a <<l %d)" reg r2 reg r1  (int_of_s14 s14)
-  | Omulimm(imm), [r1] -> fprintf pp "%a * %ld" reg r1 (camlint_of_coqint imm)
-  | Omullimm(imm), [r1] -> fprintf pp "%a *l %Ld" reg r1 (camlint64_of_coqint imm)
-  | Omadd, [r1; r2; r3] -> fprintf pp "%a + %a * %a" reg r1 reg r2 reg r3
-  | Omaddl, [r1; r2; r3] -> fprintf pp "%a +l %a *l %a" reg r1 reg r2 reg r3
-  | (Omaddimm imm), [r1; r2] -> fprintf pp "%a + %a * %ld" reg r1 reg r2 (camlint_of_coqint imm)
-  | (Omaddlimm imm), [r1; r2] -> fprintf pp "%a +l %a *l %Ld" reg r1 reg r2 (camlint64_of_coqint imm)
-  | Omsub, [r1; r2; r3] -> fprintf pp "%a - %a * %a" reg r1 reg r2 reg r3
-  | Omsubl, [r1; r2; r3] -> fprintf pp "%a -l %a *l %a" reg r1 reg r2 reg r3
  | _, _ -> fprintf pp "<bad operator>"
 
 let print_addressing reg pp = function
-  | Aindexed n, [r1] -> fprintf pp "%a + %Ld" reg r1 (camlint64_of_ptrofs n)
+  | Aindexed2XS scale, [r1;r2] -> fprintf pp "%a + (%a << %ld)" reg r1 reg r2 (camlint_of_coqint scale)
   | Aindexed2, [r1;r2] -> fprintf pp "%a + %a" reg r1 reg r2
+  | Aindexed n, [r1] -> fprintf pp "%a + %Ld" reg r1 (camlint64_of_ptrofs n)
   | Aglobal(id, ofs), [] ->
       fprintf pp "\"%s\" + %Ld" (extern_atom id) (camlint64_of_ptrofs ofs)
   | Ainstack ofs, [] -> fprintf pp "stack(%Ld)" (camlint64_of_ptrofs ofs)
diff --git a/mppa_k1c/TargetPrinter.ml b/mppa_k1c/TargetPrinter.ml
index 674695d9..c9822e13 100644
--- a/mppa_k1c/TargetPrinter.ml
+++ b/mppa_k1c/TargetPrinter.ml
@@ -399,10 +399,10 @@ module Target (*: TARGET*) =
          fprintf oc "	itouchl	0[%a]\n" ireg addr
       | Pdzerol addr ->
          fprintf oc "	dzerol	0[%a]\n" ireg addr
-      | Pafaddd(addr, incr_res) ->
-         fprintf oc "	afaddd	0[%a] = %a\n" ireg addr ireg incr_res
+(*    | Pafaddd(addr, incr_res) ->
+         fprintfoc "	afaddd	0[%a] = %a\n" ireg addr ireg incr_res
       | Pafaddw(addr, incr_res) ->
-         fprintf oc "	afaddw	0[%a] = %a\n" ireg addr ireg incr_res
+         fprintfoc "	afaddw	0[%a] = %a\n" ireg addr ireg incr_res *) (* see #157 *)
       | Palclrd(res, addr) ->
          fprintf oc "	alclrd	%a = 0[%a]\n" ireg res ireg addr
       | Palclrw(res, addr) ->
diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h
index 8012a013..21541145 100644
--- a/test/monniaux/cycles.h
+++ b/test/monniaux/cycles.h
@@ -46,5 +46,5 @@ static inline cycle_t get_cycle(void) { return 0; }
 #ifdef MAX_MEASURES
   #define TIMEINIT(i) {_last_stop[i] = get_cycle();}
   #define TIMESTOP(i) {cycle_t cur = get_cycle(); _total_cycles[i] += cur - _last_stop[i]; _last_stop[i] = cur;}
-  #define TIMEPRINT(n) { for (int i = 0; i <= n; i++) printf("(%d) cycles: %" PRIu64 "\n", i, _total_cycles[i]); }
+  #define TIMEPRINT(n) { for (int i = 0; i <= n; i++) printf("%d cycles: %" PRIu64 "\n", i, _total_cycles[i]); }
 #endif
diff --git a/test/monniaux/rules.mk b/test/monniaux/rules.mk
index 4dfeac3b..9d05b4d6 100644
--- a/test/monniaux/rules.mk
+++ b/test/monniaux/rules.mk
@@ -12,6 +12,7 @@ CLOCK=../clock
 
 # Maximum amount of time measures (see cycles.h)
 MAX_MEASURES=10
+MEASURES?=time
 
 # Flags common to both compilers, then to gcc, then to ccomp
 ALL_CFLAGS+=-Wall -D__K1C_COS__ -DMAX_MEASURES=$(MAX_MEASURES)
@@ -28,12 +29,12 @@ EXECUTE_CYCLES?=k1-cluster --syscall=libstd_scalls.so --cycle-based --
 
 # You can define up to GCC4FLAGS and CCOMP4FLAGS
 GCC0FLAGS?=
-GCC1FLAGS?=$(ALL_GCCFLAGS) -O1
+GCC1FLAGS?=$(ALL_GCCFLAGS) -O1 -g
 GCC2FLAGS?=$(ALL_GCCFLAGS) -O2
 GCC3FLAGS?=$(ALL_GCCFLAGS) -O3
 GCC4FLAGS?=
 CCOMP0FLAGS?=
-CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -O1
+CCOMP1FLAGS?=$(ALL_CCOMPFLAGS) -O1 -g
 CCOMP2FLAGS?=$(ALL_CCOMPFLAGS)
 CCOMP3FLAGS?=
 CCOMP4FLAGS?=
@@ -131,7 +132,7 @@ endif
 
 measures.csv: $(OUTFILES)
 	@echo $(FIRSTLINE) > $@
-	@for i in "$(MEASURES)"; do\
+	@for i in $(MEASURES); do\
 		first=$$(grep "$$i cycles" $(firstword $(OUTFILES)));\
 		if test ! -z "$$first"; then\
 			if [ "$$i" != "time" ]; then\
diff --git a/test/monniaux/run_benches.sh b/test/monniaux/run_benches.sh
index a562a117..5f9f22cb 100755
--- a/test/monniaux/run_benches.sh
+++ b/test/monniaux/run_benches.sh
@@ -6,7 +6,7 @@ for bench in $benches; do
   echo "(cd $bench && make -j5 run)" >> commands.txt
 done
 
-cat commands.txt | xargs -n1 -I{} -P5 bash -c '{}'
+cat commands.txt | xargs -n1 -I{} -P4 bash -c '{}'
 
 ##
 # Gather all the CSV files
diff --git a/test/mppa/asm_coverage b/test/mppa/asm_coverage
deleted file mode 160000
-Subproject a9c62b61552a9e9fd0ebf43df5ee0d5b88bb094
diff --git a/test/mppa/coverage.sh b/test/mppa/coverage.sh
index 0a057ff9..42ed4182 100644..100755
--- a/test/mppa/coverage.sh
+++ b/test/mppa/coverage.sh
@@ -1,17 +1,24 @@
-asmdir=$1
+#!/bin/bash
+
+printer=../../mppa_k1c/TargetPrinter.ml
+asmdir=instr/asm/
 to_cover_raw=/tmp/to_cover_raw
 to_cover=/tmp/to_cover
 covered_raw=/tmp/covered_raw
 covered=/tmp/covered
 
-sed -n "s/^.*fprintf oc \"	\(.*\)	.*/\1/p" ../../mppa_k1c/TargetPrinter.ml > $to_cover_raw
-sed -n "s/^.*fprintf oc \"	\(.*\)\\n.*/\1/p" ../../mppa_k1c/TargetPrinter.ml >> $to_cover_raw
-python2.7 coverage_helper.py $to_cover_raw > $to_cover
+# Stop at any error
+set -e
+# Pipes do not mask errors
+set -o pipefail
+
+sed -n "s/^.*fprintf\s\+oc\s*\"\s*\([a-z][^[:space:]]*\)\s.*/\1/p" $printer > $to_cover_raw
+python2.7 coverage_helper.py $to_cover_raw | sort -u > $to_cover
 
 rm -f $covered_raw
-for asm in $(ls $asmdir/*.s); do
-    bash asm_coverage/asm-coverage.sh $asm >> $covered_raw
+for asm in $(ls $asmdir/*.ccomp.s); do
+  grep -v ":" $asm | sed -n "s/^\s*\([a-z][a-z0-9.]*\).*/\1/p" | sort -u >> $covered_raw
 done
-python2.7 coverage_helper.py $covered_raw > $covered
+python2.7 coverage_helper.py $covered_raw | sort -u > $covered
 
 vimdiff $to_cover $covered
diff --git a/test/mppa/coverage_helper.py b/test/mppa/coverage_helper.py
index b086aca9..cf7a84c9 100644
--- a/test/mppa/coverage_helper.py
+++ b/test/mppa/coverage_helper.py
@@ -1,35 +1,45 @@
 import fileinput
+import sys
 
-occurs = {}
+all_loads_stores = "lbs lbz lhz lo lq ld lhs lws sb sd sh so sq sw".split(" ")
+
+all_bconds = "wnez weqz wltz wgez wlez wgtz dnez deqz dltz dgez dlez dgtz".split(" ")
+
+all_iconds = "ne eq lt ge le gt ltu geu leu gtu all nall any none".split(" ")
+
+all_fconds = "one ueq oeq une olt uge oge ult".split(" ")
+
+replaces_a = [(["cb.", "cmoved."], all_bconds),
+              (["compd.", "compw."], all_iconds),
+              (["fcompd.", "fcompw."], all_fconds),
+              (all_loads_stores, [".xs", ""])]
 
+replaces_dd = [(["addx", "sbfx"], ["2d", "4d", "8d", "16d"])]
+replaces_dw = [(["addx", "sbfx"], ["2w", "4w", "8w", "16w"])]
+
+macros_binds = {"%a": replaces_a, "%dd": replaces_dd, "%dw": replaces_dw}
+
+def expand_macro(fullinst, macro, replaceTable):
+    inst = fullinst.replace(macro, "")
+    for (searchlist, mods) in replaceTable:
+        if inst in searchlist:
+            return [fullinst.replace(macro, mod) for mod in mods]
+    raise NameError
+
+insts = []
 for line in fileinput.input():
-    line_noc = line.replace('\n', '')
-    if line_noc not in occurs:
-        occurs[line_noc] = 0
-    occurs[line_noc] += 1
-
-# HACK: Removing all the instructions with "%a", replacing them with all their variations
-# Also removing all instructions starting with '.'
-pruned_occurs = dict(occurs)
-for inst in occurs:
-    if inst[0] == '.':
-        del pruned_occurs[inst]
-    if "%a" not in inst:
-        continue
-    inst_no_a = inst.replace(".%a", "")
-    if inst_no_a in ("compw", "compd"):
-        del pruned_occurs[inst]
-        for mod in ("ne", "eq", "lt", "gt", "le", "ge", "ltu", "leu", "geu",
-                    "gtu", "all", "any", "nall", "none"):
-            pruned_occurs[inst_no_a + "." + mod] = 1
-    elif inst_no_a in ("cb"):
-        del pruned_occurs[inst]
-        for mod in ("wnez", "weqz", "wltz", "wgez", "wlez", "wgtz", "deqz", "dnez",
-                    "dltz", "dgez", "dlez", "dgtz"):
-            pruned_occurs[inst_no_a + "." + mod] = 1
-    else:
-        assert False, "Found instruction with %a: " + inst
-occurs = pruned_occurs
-
-for inst in sorted(occurs):
+    fullinst = line[:-1]
+    try:
+        for macro in macros_binds:
+            if macro in fullinst:
+                insts.extend(expand_macro(fullinst, macro, macros_binds[macro]))
+                break
+        else:
+            insts.append(fullinst)
+    except NameError:
+        print >> sys.stderr, fullinst + " could not be found any match for macro " + macro
+        sys.exit(1)
+
+for inst in insts:
     print inst
+occurs = {}
diff --git a/test/mppa/instr/Makefile b/test/mppa/instr/Makefile
index 4129b628..69446796 100644
--- a/test/mppa/instr/Makefile
+++ b/test/mppa/instr/Makefile
@@ -5,6 +5,7 @@ CC ?= gcc
 CCOMP ?= ccomp
 OPTIM ?= -O2
 CFLAGS ?= $(OPTIM)
+CCOMPFLAGS ?= $(CFLAGS) -faddx
 SIMU ?= k1-mppa
 TIMEOUT ?= --signal=SIGTERM 120s
 DIFF ?= python2.7 floatcmp.py -reltol .00001
@@ -45,6 +46,7 @@ all: $(BIN)
 
 GREEN=\033[0;32m
 RED=\033[0;31m
+YELLOW=\033[0;33m
 NC=\033[0m
 
 .PHONY: 
@@ -53,7 +55,9 @@ test: $(X86_GCC_OUT) $(GCC_OUT)
 	for test in $(TESTNAMES); do\
 		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
 		gccout=$(OUTDIR)/$$test.gcc.out;\
-		if $(DIFF) $$x86out $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
+		if grep "__K1C__" -q $$test.c; then\
+			printf "$(YELLOW)UNTESTED: $$test.c contains an \`#ifdef __K1C__\`\n";\
+		elif $(DIFF) $$x86out $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
 			>&2 printf "$(RED)ERROR: $$x86out and $$gccout differ$(NC)\n";\
 		else\
 			printf "$(GREEN)GOOD: $$x86out and $$gccout concur$(NC)\n";\
@@ -111,7 +115,7 @@ $(BINDIR)/%.gcc.bin: $(ASMDIR)/%.gcc.s $(K1LIB) $(K1CCPATH)
 
 $(BINDIR)/%.ccomp.bin: $(ASMDIR)/%.ccomp.s $(K1LIB) $(CCOMPPATH)
 	@mkdir -p $(@D)
-	$(CCOMP) $(CFLAGS) $(filter-out $(CCOMPPATH),$^) -o $@
+	$(CCOMP) $(CCOMPFLAGS) $(filter-out $(CCOMPPATH),$^) -o $@
 
 # Source to assembly
 
@@ -125,4 +129,4 @@ $(ASMDIR)/%.gcc.s: $(SRCDIR)/%.c $(K1CCPATH)
 
 $(ASMDIR)/%.ccomp.s: $(SRCDIR)/%.c $(CCOMPPATH)
 	@mkdir -p $(@D)
-	$(CCOMP) $(CFLAGS) -S $< -o $@
+	$(CCOMP) $(CCOMPFLAGS) -S $< -o $@
diff --git a/test/mppa/instr/builtin64.c b/test/mppa/instr/builtin64.c
new file mode 100644
index 00000000..dbbb1886
--- /dev/null
+++ b/test/mppa/instr/builtin64.c
@@ -0,0 +1,17 @@
+#include "framework.h"
+
+BEGIN_TEST(long long)
+  long long *ptr = &c;
+#ifdef __K1C__
+  long long d = c;
+  a = __builtin_k1_alclrd(ptr);
+  c = d;
+  c += a;
+
+  c += __builtin_clzll(a);
+
+  /* Removed the AFADDD builtin who was incorrect in CompCert, see #157 */
+  // a = __builtin_k1_afaddd(ptr, a);
+  // a = __builtin_k1_afaddd(ptr, a);
+#endif
+END_TEST64()
diff --git a/test/mppa/instr/i32.c b/test/mppa/instr/i32.c
index c48531b1..4e389620 100644
--- a/test/mppa/instr/i32.c
+++ b/test/mppa/instr/i32.c
@@ -12,6 +12,14 @@ int tailsum(int a, int b){
     return make(a+b);
 }
 
+int fact(int a){
+  int r = 1;
+  int i;
+  for (i = 1; i < a; i++)
+    r *= i;
+  return r;
+}
+
 float int2float(int v){
   return v;
 }
@@ -21,43 +29,43 @@ BEGIN_TEST(int)
     c += a&b;
 
     if ((a & 0x1) == 1)
-        c += 1;
+        c += fact(1);
     else
-        c += 2;
+        c += fact(2);
 
     if (a & 0x1 == 0)
-        c += 4;
+        c += fact(4);
     else
-        c += 8;
+        c += fact(8);
 
     b = !(a & 0x01);
     if (!b)
-        c += 16;
+        c += fact(16);
     else
-        c += 32;
+        c += fact(32);
 
     c += sum(make(a), make(b));
     c += (long long) a;
 
     if (0 > (a & 0x1) - 1)
-        c += 64;
+        c += fact(64);
     else
-        c += 128;
+        c += fact(128);
 
     if (0 >= (a & 0x1))
-        c += 256;
+        c += fact(256);
     else
-        c += 512;
+        c += fact(512);
 
     if ((a & 0x1) > 0)
-        c += 1024;
+        c += fact(1024);
     else
-        c += 2048;
+        c += fact(2048);
 
     if ((a & 0x1) - 1 >= 0)
-        c += 4096;
+        c += fact(4096);
     else
-        c += 8192;
+        c += fact(8192);
 
     c += ((a & 0x1) == (b & 0x1));
     c += (a > b);
@@ -65,6 +73,12 @@ BEGIN_TEST(int)
     c += (a < b);
     c += (a + b) / 2;
     c += (int) int2float(a) + (int) int2float(b) + (int) int2float(42.3);
+    c += (a << 4); // addx16w
+    c += (a << 3); // addx8w
+    c += (a << 2); // addx4w
+    c += (a << 1); // addx2w
+
+    c += ~a & b; // andnw
 
     int j;
     for (j = 0 ; j < 10 ; j++)
diff --git a/test/mppa/instr/i64.c b/test/mppa/instr/i64.c
index 00eb159d..dc5fa6ee 100644
--- a/test/mppa/instr/i64.c
+++ b/test/mppa/instr/i64.c
@@ -30,6 +30,14 @@ long long random_op(long long a, long long b){
   return op(a, b);
 }
 
+long fact(long a){
+  long r = 1;
+  long i;
+  for (i = 1; i < a; i++)
+    r *= i;
+  return r;
+}
+
 double long2double(long v){
   return v;
 }
@@ -43,6 +51,12 @@ BEGIN_TEST(long long)
     c += a >> (b & 0x8LL);
     c += a >> (b & 0x8ULL);
     c += a % b;
+    c += (a << 4); // addx16d
+    c += (a << 3); // addx8d
+    c += (a << 2); // addx4d
+    c += (a << 1); // addx2d
+
+    c += ~a & b; // andnd
 
   long long d = 3;
   long long (*op)(long long, long long);
@@ -61,34 +75,39 @@ BEGIN_TEST(long long)
     c += (unsigned int) a;
 
     if (0 != (a & 0x1LL))
-        c += 1;
+        c += fact(1);
     else
-        c += 2;
+        c += fact(2);
 
     if (0 > (a & 0x1LL))
-        c += 4;
+        c += fact(4);
     else
-        c += 8;
+        c += fact(8);
 
     if (0 >= (a & 0x1LL) - 1)
-        c += 16;
+        c += fact(16);
+    else
+        c += fact(32);
+
+    if (a-41414141 > 0)
+        c += fact(13);
     else
-        c += 32;
+        c += fact(31);
 
     if (a & 0x1LL > 0)
-        c += 64;
+        c += fact(64);
     else
-        c += 128;
+        c += fact(128);
 
     if ((a & 0x1LL) - 1 >= 0)
-        c += 256;
+        c += fact(256);
     else
-        c += 512;
+        c += fact(512);
 
     if (0 == (a & 0x1LL))
-        c += 1024;
+        c += fact(1024);
     else
-        c += 2048;
+        c += fact(2048);
 
     c += ((a & 0x1LL) == (b & 0x1LL));
     c += (a >= b);
author	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2019-08-31 09:26:36 +0200
committer	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2019-08-31 09:26:36 +0200
commit	dc8b24fa2dd7ede561dd75458899cf42e9be09d2 (patch)
tree	3389a4f3f07af151b5a30c0e23ed46c4cafdd98b
parent	7c790ecd1c32b529a5e5e5977ce84cfade8e1eb6 (diff)
parent	ccd2fa5638e50b5fd8308b4b0c26531f911ff087 (diff)
download	compcert-kvx-dc8b24fa2dd7ede561dd75458899cf42e9be09d2.tar.gz compcert-kvx-dc8b24fa2dd7ede561dd75458899cf42e9be09d2.zip