27 files changed, 484 insertions, 205 deletions
diff --git a/backend/CSE2.v b/backend/CSE2.v
index 99ecc623..efa70b40 100644
--- a/backend/CSE2.v
+++ b/backend/CSE2.v
@@ -356,7 +356,7 @@ Definition transf_instr (fmap : option (PMap.t RB.t))
   match instr with
   | Iop op args dst s =>
     let args' := subst_args fmap pc args in
-    match find_op_in_fmap fmap pc op args' with
+    match (if is_trivial_op op then None else find_op_in_fmap fmap pc op args') with
     | None => Iop op args' dst s
     | Some src => Iop Omove (src::nil) dst s
     end
diff --git a/backend/CSE2proof.v b/backend/CSE2proof.v
index 7e1dd430..6368e585 100644
--- a/backend/CSE2proof.v
+++ b/backend/CSE2proof.v
@@ -1200,8 +1200,11 @@ Proof.
   reflexivity.
 - (* op *)
   unfold transf_instr in *.
-  destruct find_op_in_fmap eqn:FIND_OP.
+  destruct (if is_trivial_op op then None else find_op_in_fmap (forward_map f) pc op
+               (subst_args (forward_map f) pc args)) eqn:FIND_OP.
   {
+    destruct (is_trivial_op op).
+    discriminate.
     unfold find_op_in_fmap, fmap_sem', fmap_sem in *.
     destruct (forward_map f) as [map |] eqn:MAP.
     2: discriminate.
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index d0b7129e..209527b9 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -95,52 +95,6 @@ let print_intset s =
     Printf.printf "}"
   end
 
-(* FIXME - dominators not working well because the order of dataflow update isn't right *)
-  (*
-let get_dominators code entrypoint =
-  let bfs_order = bfs code entrypoint
-  and predecessors = get_predecessors_rtl code
-  in let doms = ref (PTree.map (fun n i -> PSet.of_list bfs_order) code)
-  in begin
-    Printf.printf "BFS: ";
-    print_intlist bfs_order;
-    Printf.printf "\n";
-    List.iter (fun n ->
-      let preds = get_some @@ PTree.get n predecessors
-      and single = PSet.singleton n
-      in match preds with
-      | [] -> doms := PTree.set n single !doms
-      | p::lp ->
-          let set_p = get_some @@ PTree.get p !doms
-          and set_lp = List.map (fun p -> get_some @@ PTree.get p !doms) lp
-          in let inter = List.fold_left PSet.inter set_p set_lp
-          in let union = PSet.union inter single
-          in begin
-            Printf.printf "----------------------------------------\n";
-            Printf.printf "n = %d\n" (P.to_int n);
-            Printf.printf "set_p = "; print_intset set_p; Printf.printf "\n";
-            Printf.printf "set_lp = ["; List.iter (fun s -> print_intset s; Printf.printf ", ") set_lp; Printf.printf "]\n";
-            Printf.printf "=> inter = "; print_intset inter; Printf.printf "\n";
-            Printf.printf "=> union = "; print_intset union; Printf.printf "\n";
-            doms := PTree.set n union !doms
-          end
-    ) bfs_order;
-    !doms
-  end
-*)
-
-let print_dominators dominators =
-  let domlist = PTree.elements dominators
-  in begin
-    Printf.printf "{\n";
-    List.iter (fun (n, doms) ->
-      Printf.printf "\t";
-      Printf.printf "%d:" (P.to_int n);
-      print_intset doms;
-      Printf.printf "\n"
-    ) domlist
-  end
-
 type vstate = Unvisited | Processed | Visited
 
 (** Getting loop branches with a DFS visit :
@@ -206,78 +160,62 @@ let rec look_ahead code node is_loop_header predicate =
         else look_ahead code n is_loop_header predicate
       )
 
-exception HeuristicSucceeded
-
-let do_call_heuristic code ifso ifnot is_loop_header preferred =
+let do_call_heuristic code cond ifso ifnot is_loop_header =
   let predicate n = (function
   | Icall _ -> true
   | _ -> false) @@ get_some @@ PTree.get n code
-  in if (look_ahead code ifso is_loop_header predicate) then
-    (preferred := false; raise HeuristicSucceeded)
-  else if (look_ahead code ifnot is_loop_header predicate) then
-    (preferred := true; raise HeuristicSucceeded)
-  else ()
+  in if (look_ahead code ifso is_loop_header predicate) then Some false
+  else if (look_ahead code ifnot is_loop_header predicate) then Some true
+  else None
 
-let do_opcode_heuristic code cond ifso ifnot preferred = DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot preferred
+let do_opcode_heuristic code cond ifso ifnot is_loop_header = DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot is_loop_header
 
-let do_return_heuristic code ifso ifnot is_loop_header preferred =
+let do_return_heuristic code cond ifso ifnot is_loop_header =
   let predicate n = (function
   | Ireturn _ -> true
   | _ -> false) @@ get_some @@ PTree.get n code
-  in if (look_ahead code ifso is_loop_header predicate) then
-    (preferred := false; raise HeuristicSucceeded)
-  else if (look_ahead code ifnot is_loop_header predicate) then
-    (preferred := true; raise HeuristicSucceeded)
-  else ()
+  in if (look_ahead code ifso is_loop_header predicate) then Some false
+  else if (look_ahead code ifnot is_loop_header predicate) then Some true
+  else None
 
-let do_store_heuristic code ifso ifnot is_loop_header preferred =
+let do_store_heuristic code cond ifso ifnot is_loop_header =
   let predicate n = (function
   | Istore _ -> true
   | _ -> false) @@ get_some @@ PTree.get n code
-  in if (look_ahead code ifso is_loop_header predicate) then
-    (preferred := false; raise HeuristicSucceeded)
-  else if (look_ahead code ifnot is_loop_header predicate) then
-    (preferred := true; raise HeuristicSucceeded)
-  else ()
+  in if (look_ahead code ifso is_loop_header predicate) then Some false
+  else if (look_ahead code ifnot is_loop_header predicate) then Some true
+  else None
 
-let do_loop_heuristic code ifso ifnot is_loop_header preferred =
+let do_loop_heuristic code cond ifso ifnot is_loop_header =
   let predicate n = get_some @@ PTree.get n is_loop_header
-  in if (look_ahead code ifso is_loop_header predicate) then
-    (preferred := true; raise HeuristicSucceeded)
-  else if (look_ahead code ifnot is_loop_header predicate) then
-    (preferred := false; raise HeuristicSucceeded)
-  else ()
+  in if (look_ahead code ifso is_loop_header predicate) then Some true
+  else if (look_ahead code ifnot is_loop_header predicate) then Some false
+  else None
 
 let get_directions code entrypoint =
   let bfs_order = bfs code entrypoint
   and is_loop_header = get_loop_headers code entrypoint
   and directions = ref (PTree.map (fun n i -> false) code) (* false <=> fallthru *)
   in begin
-    Printf.printf "Loop headers: ";
-    ptree_printbool is_loop_header;
-    Printf.printf "\n";
+    (* Printf.printf "Loop headers: "; *)
+    (* ptree_printbool is_loop_header; *)
+    (* Printf.printf "\n"; *)
     List.iter (fun n ->
       match (get_some @@ PTree.get n code) with
       | Icond (cond, lr, ifso, ifnot) ->
-          Printf.printf "Analyzing %d.." (P.to_int n);
-          let preferred = ref false
-          in (try
-            Printf.printf " call..";
-            do_call_heuristic code ifso ifnot is_loop_header preferred;
-            Printf.printf " opcode..";
-            do_opcode_heuristic code cond ifso ifnot preferred;
-            Printf.printf " return..";
-            do_return_heuristic code ifso ifnot is_loop_header preferred;
-            Printf.printf " store..";
-            do_store_heuristic code ifso ifnot is_loop_header preferred;
-            Printf.printf " loop..";
-            do_loop_heuristic code ifso ifnot is_loop_header preferred;
-            Printf.printf "Random choice for %d\n" (P.to_int n);
-            preferred := Random.bool ()
-            with HeuristicSucceeded | DuplicateOpcodeHeuristic.HeuristicSucceeded
-              -> Printf.printf " %s\n" (match !preferred with true -> "BRANCH"
-                                        | false -> "FALLTHROUGH")
-          ); directions := PTree.set n !preferred !directions
+          (* Printf.printf "Analyzing %d.." (P.to_int n); *)
+          let heuristics = [ do_call_heuristic; do_opcode_heuristic;
+            do_return_heuristic; do_store_heuristic; do_loop_heuristic ] in
+          let preferred = ref None in
+          begin
+            List.iter (fun do_heur ->
+              match !preferred with
+              | None -> preferred := do_heur code cond ifso ifnot is_loop_header
+              | Some _ -> ()
+            ) heuristics;
+            (match !preferred with None -> preferred := Some (Random.bool ()) | Some _ -> ());
+            directions := PTree.set n (get_some !preferred) !directions
+          end
       | _ -> ()
     ) bfs_order;
     !directions
@@ -306,9 +244,9 @@ let rec to_ttl_code_rec directions = function
 let to_ttl_code code entrypoint =
   let directions = get_directions code entrypoint
   in begin
-    Printf.printf "Ifso directions: ";
+    (* Printf.printf "Ifso directions: ";
     ptree_printbool directions;
-    Printf.printf "\n";
+    Printf.printf "\n"; *)
     Random.init(0); (* using same seed to make it deterministic *)
     to_ttl_code_rec directions (PTree.elements code)
   end
@@ -423,7 +361,7 @@ let select_traces code entrypoint =
         end
       end
     done;
-    Printf.printf "DFS: \t"; print_intlist order; Printf.printf "\n";
+    (* Printf.printf "DFS: \t"; print_intlist order; Printf.printf "\n"; *)
     !traces
   end
 
@@ -471,7 +409,7 @@ let rec change_pointers code n n' = function
  * n': the integer which should contain the duplicate of n
  * returns: new code, new ptree *)
 let duplicate code ptree parent n preds n' =
-  Printf.printf "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n');
+  (* Printf.printf "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n'); *)
   match PTree.get n' code with
   | Some _ -> failwith "The PTree already has a node n'"
   | None ->
@@ -529,7 +467,7 @@ let tail_duplicate code preds ptree trace =
   in (new_code, new_ptree, !nb_duplicated)
 
 let superblockify_traces code preds traces =
-  let max_nb_duplicated = 1 (* FIXME - should be architecture dependent *)
+  let max_nb_duplicated = !Clflags.option_fduplicate (* FIXME - should be architecture dependent *)
   in let ptree = make_identity_ptree code
   in let rec f code ptree = function
     | [] -> (code, ptree, 0)
@@ -548,7 +486,7 @@ let rec invert_iconds_trace code = function
         | Icond (c, lr, ifso, ifnot) ->
             assert (n' == ifso || n' == ifnot);
             if (n' == ifso) then (
-              Printf.printf "Reversing ifso/ifnot for node %d\n" (P.to_int n);
+              (* Printf.printf "Reversing ifso/ifnot for node %d\n" (P.to_int n); *)
               PTree.set n (Icond (Op.negate_condition c, lr, ifnot, ifso)) code )
             else code
         | _ -> code
@@ -561,12 +499,14 @@ let rec invert_iconds code = function
                   else code
       in invert_iconds code' ts
 
-(* For now, identity function *)
 let duplicate_aux f =
   let entrypoint = f.fn_entrypoint in
   let code = f.fn_code in
   let traces = select_traces (to_ttl_code code entrypoint) entrypoint in
   let icond_code = invert_iconds code traces in
   let preds = get_predecessors_rtl icond_code in
-  let (new_code, pTreeId) = (print_traces traces; superblockify_traces icond_code preds traces) in
-  ((new_code, f.fn_entrypoint), pTreeId)
+  if !Clflags.option_fduplicate >= 1 then
+    let (new_code, pTreeId) = ((* print_traces traces; *) superblockify_traces icond_code preds traces) in
+    ((new_code, f.fn_entrypoint), pTreeId)
+  else
+    ((icond_code, entrypoint), make_identity_ptree code)
diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v
index a8e9b16b..466b4b75 100644
--- a/backend/Duplicateproof.v
+++ b/backend/Duplicateproof.v
@@ -378,7 +378,7 @@ Theorem step_simulation:
      step tge s2 t s2'
   /\ match_states s1' s2'.
 Proof.
-  Local Hint Resolve transf_fundef_correct.
+  Local Hint Resolve transf_fundef_correct: core.
   induction 1; intros; inv MS.
 (* Inop *)
   - eapply dupmap_correct in DUPLIC; eauto.
diff --git a/backend/Linearizeaux.ml b/backend/Linearizeaux.ml
index a6964233..a813ac96 100644
--- a/backend/Linearizeaux.ml
+++ b/backend/Linearizeaux.ml
@@ -122,7 +122,11 @@ let enumerate_aux_flat f reach =
  * rather than a branch (ifso).
  *
  * The enumeration below takes advantage of this - preferring to layout nodes
- * following the fallthroughs of the Lcond branches
+ * following the fallthroughs of the Lcond branches.
+ *
+ * It is slightly adapted from the work of Petris and Hansen 90 on intraprocedural
+ * code positioning - only we do it on a broader grain, since we don't have the exact
+ * frequencies (we only know which branch is the preferred one)
  *)
 
 let get_some = function
@@ -136,29 +140,320 @@ let rec last_element = function
   | e :: [] -> e
   | e' :: e :: l -> last_element (e::l)
 
-let dfs code entrypoint =
+let print_plist l =
+  let rec f = function
+  | [] -> ()
+  | n :: l -> Printf.printf "%d, " (P.to_int n); f l
+  in begin
+    Printf.printf "[";
+    f l;
+    Printf.printf "]"
+  end
+
+let forward_sequences code entry =
   let visited = ref (PTree.map (fun n i -> false) code) in
-  let rec dfs_list code = function
+  (* returns the list of traversed nodes, and a list of nodes to start traversing next *)
+  let rec traverse_fallthrough code node =
+    (* Printf.printf "Traversing %d..\n" (P.to_int node); *)
+    if not (get_some @@ PTree.get node !visited) then begin
+      visited := PTree.set node true !visited;
+      match PTree.get node code with
+      | None -> failwith "No such node"
+      | Some bb ->
+          let ln, rem = match (last_element bb) with
+          | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+          | Lbuiltin _ -> assert false
+          | Ltailcall _ | Lreturn -> ([], [])
+          | Lbranch n -> let ln, rem = traverse_fallthrough code n in (ln, rem)
+          | Lcond (_, _, ifso, ifnot) -> let ln, rem = traverse_fallthrough code ifnot in (ln, [ifso] @ rem)
+          | Ljumptable(_, ln) -> match ln with
+              | [] -> ([], [])
+              | n :: ln -> let lln, rem = traverse_fallthrough code n in (lln, ln @ rem)
+          in ([node] @ ln, rem)
+      end
+    else ([], [])
+  in let rec f code = function
   | [] -> []
   | node :: ln ->
-      let node_dfs =
-        if not (get_some @@ PTree.get node !visited) then begin
-          visited := PTree.set node true !visited;
-          match PTree.get node code with
-          | None -> failwith "No such node"
-          | Some bb -> [node] @ match (last_element bb) with
-            | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
-            | Lbuiltin _ -> assert false
-            | Ltailcall _ | Lreturn -> []
-            | Lbranch n -> dfs_list code [n]
-            | Lcond (_, _, ifso, ifnot) -> dfs_list code [ifnot; ifso]
-            | Ljumptable(_, ln) -> dfs_list code ln
-          end
-        else []
-      in node_dfs @ (dfs_list code ln)
-  in dfs_list code [entrypoint]
-
-let enumerate_aux_trace f reach = dfs f.fn_code f.fn_entrypoint
+      let fs, rem_from_node = traverse_fallthrough code node
+      in [fs] @ ((f code rem_from_node) @ (f code ln))
+  in (f code [entry])
+
+module PInt = struct
+  type t = P.t
+  let compare x y = compare (P.to_int x) (P.to_int y)
+end
+
+module PSet = Set.Make(PInt)
+
+module LPInt = struct
+  type t = P.t list
+  let rec compare x y =
+    match x with
+    | [] -> ( match y with
+      | [] -> 0
+      | _ -> 1 )
+    | e :: l -> match y with
+      | [] -> -1
+      | e' :: l' ->
+          let e_cmp = PInt.compare e e' in
+          if e_cmp == 0 then compare l l' else e_cmp
+end
+
+module LPSet = Set.Make(LPInt)
+
+let iter_lpset f s = Seq.iter f (LPSet.to_seq s)
+
+let first_of = function
+  | [] -> None
+  | e :: l -> Some e
+
+let rec last_of = function
+  | [] -> None
+  | e :: l -> (match l with [] -> Some e | e :: l -> last_of l)
+
+let can_be_merged code s s' =
+  let last_s = get_some @@ last_of s in
+  let first_s' = get_some @@ first_of s' in
+  match get_some @@ PTree.get last_s code with
+  | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+  | Lbuiltin _ | Ltailcall _ | Lreturn -> false
+  | Lbranch n -> n == first_s'
+  | Lcond (_, _, ifso, ifnot) -> ifnot == first_s'
+  | Ljumptable (_, ln) ->
+      match ln with
+      | [] -> false
+      | n :: ln -> n == first_s'
+
+let merge s s' = Some s
+
+let try_merge code (fs: (BinNums.positive list) list) =
+  let seqs = ref (LPSet.of_list fs) in
+  let oldLength = ref (LPSet.cardinal !seqs) in
+  let continue = ref true in
+  let found = ref false in
+  while !continue do
+    begin
+      found := false;
+      iter_lpset (fun s ->
+        if !found then ()
+        else iter_lpset (fun s' ->
+          if (!found || s == s') then ()
+          else if (can_be_merged code s s') then
+            begin
+              seqs := LPSet.remove s !seqs;
+              seqs := LPSet.remove s' !seqs;
+              seqs := LPSet.add (get_some (merge s s')) !seqs;
+              found := true;
+            end
+          else ()
+        ) !seqs
+      ) !seqs;
+      if !oldLength == LPSet.cardinal !seqs then
+        continue := false
+      else
+        oldLength := LPSet.cardinal !seqs
+    end
+  done;
+  !seqs
+
+(** Code adapted from Duplicateaux.get_loop_headers
+  *
+  * Getting loop branches with a DFS visit :
+  * Each node is either Unvisited, Visited, or Processed
+  * pre-order: node becomes Processed
+  * post-order: node becomes Visited
+  *
+  * If we come accross an edge to a Processed node, it's a loop!
+  *)
+type pos = BinNums.positive
+
+module PP = struct
+  type t = pos * pos
+  let compare a b =
+    let ax, ay = a in
+    let bx, by = b in
+    let dx = compare ax bx in
+    if (dx == 0) then compare ay by
+    else dx
+end
+
+module PPMap = Map.Make(PP)
+
+type vstate = Unvisited | Processed | Visited
+
+let get_loop_edges code entry =
+  let visited = ref (PTree.map (fun n i -> Unvisited) code) in
+  let is_loop_edge = ref PPMap.empty
+  in let rec dfs_visit code from = function
+  | [] -> ()
+  | node :: ln ->
+      match (get_some @@ PTree.get node !visited) with
+      | Visited -> ()
+      | Processed -> begin
+          let from_node = get_some from in
+          is_loop_edge := PPMap.add (from_node, node) true !is_loop_edge;
+          visited := PTree.set node Visited !visited
+        end
+      | Unvisited -> begin
+          visited := PTree.set node Processed !visited;
+          let bb = get_some @@ PTree.get node code in
+          let next_visits = (match (last_element bb) with
+          | Lop _ | Lload _ | Lgetstack _ | Lsetstack _ | Lstore _ | Lcall _
+          | Lbuiltin _ -> assert false
+          | Ltailcall _ | Lreturn -> []
+          | Lbranch n -> [n]
+          | Lcond (_, _, ifso, ifnot) -> [ifso; ifnot]
+          | Ljumptable(_, ln) -> ln
+          ) in dfs_visit code (Some node) next_visits;
+          visited := PTree.set node Visited !visited;
+          dfs_visit code from ln
+        end
+  in begin
+    dfs_visit code None [entry];
+    !is_loop_edge
+  end
+
+let ppmap_is_true pp ppmap = PPMap.mem pp ppmap && PPMap.find pp ppmap
+
+module Int = struct
+  type t = int
+  let compare x y = compare x y
+end
+
+module ISet = Set.Make(Int)
+
+let print_iset s = begin
+  Printf.printf "{";
+  ISet.iter (fun e -> Printf.printf "%d, " e) s;
+  Printf.printf "}"
+end
+
+let print_depmap dm = begin
+  Printf.printf "[|";
+  Array.iter (fun s -> print_iset s; Printf.printf ", ") dm;
+  Printf.printf "|]\n"
+end
+
+let construct_depmap code entry fs =
+  let is_loop_edge = get_loop_edges code entry in
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let depmap = Array.map (fun e -> ISet.empty) fs in
+  let find_index_of_node n =
+    let index = ref 0 in
+    begin
+      Array.iteri (fun i s ->
+        match List.find_opt (fun e -> e == n) s with
+        | Some _ -> index := i
+        | None -> ()
+      ) fs;
+      !index
+    end
+  in let check_and_update_depmap from target =
+    (* Printf.printf "From %d to %d\n" (P.to_int from) (P.to_int target); *)
+    if not (ppmap_is_true (from, target) is_loop_edge) then
+      let in_index_fs = find_index_of_node from in
+      let out_index_fs = find_index_of_node target in
+      if out_index_fs != in_index_fs then
+        depmap.(out_index_fs) <- ISet.add in_index_fs depmap.(out_index_fs)
+      else ()
+    else ()
+  in let rec dfs_visit code = function
+  | [] -> ()
+  | node :: ln ->
+      begin
+        match (get_some @@ PTree.get node !visited) with
+        | true -> ()
+        | false -> begin
+            visited := PTree.set node true !visited;
+            let bb = get_some @@ PTree.get node code in
+            let next_visits =
+              match (last_element bb) with
+              | Ltailcall _ | Lreturn -> []
+              | Lbranch n -> (check_and_update_depmap node n; [n])
+              | Lcond (_, _, ifso, ifnot) -> begin
+                  check_and_update_depmap node ifso;
+                  check_and_update_depmap node ifnot;
+                  [ifso; ifnot]
+                end
+              | Ljumptable(_, ln) -> begin
+                  List.iter (fun n -> check_and_update_depmap node n) ln;
+                  ln
+                end
+              (* end of bblocks should not be another value than one of the above *)
+              | _ -> failwith "last_element gave an invalid output"
+            in dfs_visit code next_visits
+          end;
+        dfs_visit code ln
+      end
+  in begin
+    dfs_visit code [entry];
+    depmap
+  end
+
+let print_sequence s =
+  Printf.printf "[";
+  List.iter (fun n -> Printf.printf "%d, " (P.to_int n)) s;
+  Printf.printf "]\n"
+
+let print_ssequence ofs =
+  Printf.printf "[";
+  List.iter (fun s -> print_sequence s) ofs;
+  Printf.printf "]\n"
+
+let order_sequences code entry fs =
+  let fs_a = Array.of_list fs in
+  let depmap = construct_depmap code entry fs_a in
+  let fs_evaluated = Array.map (fun e -> false) fs_a in
+  let ordered_fs = ref [] in
+  let evaluate s_id =
+    begin
+      assert (not fs_evaluated.(s_id));
+      ordered_fs := fs_a.(s_id) :: !ordered_fs;
+      fs_evaluated.(s_id) <- true;
+      Array.iteri (fun i deps ->
+        depmap.(i) <- ISet.remove s_id deps
+      ) depmap
+    end
+  in let select_next () =
+    let selected_id = ref None in
+    begin
+      Array.iteri (fun i deps ->
+        begin
+          (* Printf.printf "Deps: "; print_iset deps; Printf.printf "\n"; *)
+          match !selected_id with
+          | None -> if (deps == ISet.empty && not fs_evaluated.(i)) then selected_id := Some i
+          | Some id -> ()
+        end
+      ) depmap;
+      match !selected_id with
+      | Some id -> id
+      | None -> begin
+          Array.iteri (fun i deps ->
+            match !selected_id with
+            | None -> if not fs_evaluated.(i) then selected_id := Some i
+            | Some id -> ()
+          ) depmap;
+          get_some !selected_id
+        end
+    end
+  in begin
+    (* Printf.printf "depmap: "; print_depmap depmap; *)
+    (* Printf.printf "forward sequences identified: "; print_ssequence fs; *)
+    while List.length !ordered_fs != List.length fs do
+      let next_id = select_next () in
+      evaluate next_id
+    done;
+    (* Printf.printf "forward sequences ordered: "; print_ssequence (List.rev (!ordered_fs)); *)
+    List.rev (!ordered_fs)
+  end
+
+let enumerate_aux_trace f reach =
+  let code = f.fn_code in
+  let entry = f.fn_entrypoint in
+  let fs = forward_sequences code entry in
+  let ofs = order_sequences code entry fs in
+  List.flatten ofs
 
 let enumerate_aux f reach =
   if !Clflags.option_ftracelinearize then enumerate_aux_trace f reach
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index f4022941..7e3b23d8 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -29,8 +29,8 @@ let option_fcse = ref true
 let option_fcse2 = ref true
 let option_fcse3 = ref true
 let option_fredundancy = ref true
-let option_fduplicate = ref false
-let option_finvertcond = ref true (* only active if option_fduplicate is also true *)
+let option_fduplicate = ref 0
+let option_finvertcond = ref true
 let option_ftracelinearize = ref false
 let option_fpostpass = ref true
 let option_fpostpass_sched = ref "list"
diff --git a/driver/Compiler.v b/driver/Compiler.v
index 22955160..c2428d94 100644
--- a/driver/Compiler.v
+++ b/driver/Compiler.v
@@ -136,7 +136,7 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program :=
    @@ print (print_RTL 2)
    @@ time "Renumbering" Renumber.transf_program
    @@ print (print_RTL 3)
-  @@@ partial_if Compopts.optim_duplicate (time "Tail-duplicating" Duplicate.transf_program)
+  @@@ time "Tail-duplicating" Duplicate.transf_program
    @@ print (print_RTL 4)
    @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program)
    @@ print (print_RTL 5)
@@ -258,7 +258,7 @@ Definition CompCert's_passes :=
   ::: mkpass (match_if Compopts.optim_tailcalls Tailcallproof.match_prog)
   ::: mkpass Inliningproof.match_prog
   ::: mkpass Renumberproof.match_prog
-  ::: mkpass (match_if Compopts.optim_duplicate Duplicateproof.match_prog)
+  ::: mkpass Duplicateproof.match_prog
   ::: mkpass (match_if Compopts.optim_constprop Constpropproof.match_prog)
   ::: mkpass (match_if Compopts.optim_constprop Renumberproof.match_prog)
   ::: mkpass (match_if Compopts.optim_CSE CSEproof.match_prog)
@@ -306,7 +306,7 @@ Proof.
   set (p7 := total_if optim_tailcalls Tailcall.transf_program p6) in *.
   destruct (Inlining.transf_program p7) as [p8|e] eqn:P8; simpl in T; try discriminate.
   set (p9 := Renumber.transf_program p8) in *.
-  destruct (partial_if optim_duplicate Duplicate.transf_program p9) as [p10|e] eqn:P10; simpl in T; try discriminate.
+  destruct (Duplicate.transf_program p9) as [p10|e] eqn:P10; simpl in T; try discriminate.
   set (p11 := total_if optim_constprop Constprop.transf_program p10) in *.
   set (p12 := total_if optim_constprop Renumber.transf_program p11) in *.
   destruct (partial_if optim_CSE CSE.transf_program p12) as [p13|e] eqn:P13; simpl in T; try discriminate.
@@ -332,7 +332,7 @@ Proof.
   exists p7; split. apply total_if_match. apply Tailcallproof.transf_program_match.
   exists p8; split. apply Inliningproof.transf_program_match; auto.
   exists p9; split. apply Renumberproof.transf_program_match; auto.
-  exists p10; split. eapply partial_if_match; eauto. apply Duplicateproof.transf_program_match; auto.
+  exists p10; split. apply Duplicateproof.transf_program_match; auto.
   exists p11; split. apply total_if_match. apply Constpropproof.transf_program_match.
   exists p12; split. apply total_if_match. apply Renumberproof.transf_program_match.
   exists p13; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match.
@@ -419,7 +419,7 @@ Ltac DestructM :=
     eapply Inliningproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations. eapply Renumberproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
-    eapply match_if_simulation. eassumption. exact Duplicateproof.transf_program_correct.
+    eapply Duplicateproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations.
     eapply match_if_simulation. eassumption. exact Constpropproof.transf_program_correct.
   eapply compose_forward_simulations.
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 6f32fc33..12b61d86 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -205,7 +205,7 @@ Processing options:
     -finvertcond    Invert conditions based on predicted paths (to prefer fallthrough).
                     Requires -fduplicate to be also activated [on]
     -ftracelinearize Linearizes based on the traces identified by duplicate phase
-                    It is recommended to also activate -fduplicate with this pass [off]
+                    It is heavily recommended to activate -finvertcond with this pass [off]
   -fforward-moves   Forward moves after CSE
   -finline       Perform inlining of functions [on]
   -finline-functions-called-once Integrate functions only required by their
@@ -319,7 +319,7 @@ let cmdline_actions =
  [
   Exact "-O0", Unit (unset_all optimization_options);
   Exact "-O", Unit (set_all optimization_options);
-  _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false; option_fduplicate := false);
+  _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false);
   _Regexp "-O[123]$", Unit (set_all optimization_options);
   Exact "-Os", Set option_Osize;
   Exact "-Obranchless", Set option_Obranchless;
@@ -395,7 +395,7 @@ let cmdline_actions =
   @ f_opt "cse3" option_fcse3
   @ f_opt "redundancy" option_fredundancy
   @ f_opt "postpass" option_fpostpass
-  @ f_opt "duplicate" option_fduplicate
+  @ [ Exact "-fduplicate", Integer (fun n -> option_fduplicate := n) ]
   @ f_opt "invertcond" option_finvertcond
   @ f_opt "tracelinearize" option_ftracelinearize
   @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
diff --git a/extraction/extraction.v b/extraction/extraction.v
index bf51da42..79393cf8 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -108,8 +108,6 @@ Extract Constant Compopts.generate_float_constants =>
   "fun _ -> !Clflags.option_ffloatconstprop >= 2".
 Extract Constant Compopts.optim_tailcalls =>
   "fun _ -> !Clflags.option_ftailcalls".
-Extract Constant Compopts.optim_duplicate =>
-  "fun _ -> !Clflags.option_fduplicate".
 Extract Constant Compopts.optim_constprop =>
   "fun _ -> !Clflags.option_fconstprop".
 Extract Constant Compopts.optim_CSE =>
diff --git a/mppa_k1c/Asmblockdeps.v b/mppa_k1c/Asmblockdeps.v
index 02f9141b..01eda623 100644
--- a/mppa_k1c/Asmblockdeps.v
+++ b/mppa_k1c/Asmblockdeps.v
@@ -339,7 +339,7 @@ Proof.
   }
   destruct (Mem.load _ m1 _ _) in *; destruct (Mem.load _ m0 _ _) in *; congruence.
 Qed.
-             
+
 Definition goto_label_deps (f: function) (lbl: label) (vpc: val) :=
   match label_pos lbl 0 (fn_blocks f) with
   | None => None
@@ -1005,7 +1005,7 @@ Theorem bisimu_par_wio_basic ge fn rsr rsw mr mw sr sw bi:
 Proof.
 
 (* a little tactic to automate reasoning on preg_eq *)
-Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr.
+Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
 Local Ltac preg_eq_discr r rd :=
   destruct (preg_eq r rd); try (subst r; rewrite assign_eq, Pregmap.gss; auto);
   rewrite (assign_diff _ (#rd) (#r) _); auto;
@@ -1053,7 +1053,7 @@ Local Ltac preg_eq_discr r rd :=
         preg_eq_discr r rd0. }
 
     (* Load Octuple word *)
-    + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr.
+    + Local Hint Resolve not_eq_sym ppos_pmem_discr ppos_discr: core.
       unfold parexec_load_o_offset.
       destruct (gpreg_o_expand rd) as [[[rd0 rd1] rd2] rd3]; destruct Ge; simpl.
       rewrite H0, H.
@@ -1423,7 +1423,7 @@ Section SECT_BBLOCK_EQUIV.
 
 Variable Ge: genv.
 
-Local Hint Resolve trans_state_match.
+Local Hint Resolve trans_state_match: core.
 
 Lemma bblock_simu_reduce:
   forall p1 p2 ge fn,
diff --git a/mppa_k1c/Asmblockgen.v b/mppa_k1c/Asmblockgen.v
index 50637723..36269954 100644
--- a/mppa_k1c/Asmblockgen.v
+++ b/mppa_k1c/Asmblockgen.v
@@ -28,6 +28,8 @@ Require Import Chunks.
 Local Open Scope string_scope.
 Local Open Scope error_monad_scope.
 
+Import PArithCoercions.
+
 (** The code generation functions take advantage of several
   characteristics of the [Mach] code generated by earlier passes of the
   compiler, mostly that argument and result registers are of the correct
diff --git a/mppa_k1c/Asmblockgenproof1.v b/mppa_k1c/Asmblockgenproof1.v
index ecb4629b..5b44ddaa 100644
--- a/mppa_k1c/Asmblockgenproof1.v
+++ b/mppa_k1c/Asmblockgenproof1.v
@@ -23,6 +23,8 @@ Require Import Op Locations Machblock Conventions.
 Require Import Asmblock Asmblockgen Asmblockgenproof0 Asmblockprops.
 Require Import Chunks.
 
+Import PArithCoercions.
+
 (** Decomposition of integer constants. *)
 
 Lemma make_immed32_sound:
@@ -859,7 +861,7 @@ Proof.
     destruct cmp; discriminate.
 Qed.
 
-Local Hint Resolve Val_cmpu_bool_correct Val_cmplu_bool_correct.
+Local Hint Resolve Val_cmpu_bool_correct Val_cmplu_bool_correct: core.
 
 Lemma transl_cbranch_correct_1:
   forall cond args lbl k c m ms b sp rs m' tbb,
@@ -1163,7 +1165,7 @@ Proof.
   split; intros; Simpl.
 Qed.
 
-Local Hint Resolve Val_cmpu_correct Val_cmplu_correct.
+Local Hint Resolve Val_cmpu_correct Val_cmplu_correct: core.
 
 Lemma transl_condimm_int32u_correct:
   forall cmp rd r1 n k rs m,
diff --git a/mppa_k1c/Asmvliw.v b/mppa_k1c/Asmvliw.v
index e042d95a..946007c1 100644
--- a/mppa_k1c/Asmvliw.v
+++ b/mppa_k1c/Asmvliw.v
@@ -555,6 +555,8 @@ Inductive ar_instruction : Type :=
   | PArithARRI64 (i: arith_name_arri64) (rd rs: ireg) (imm: int64)
 .
 
+Module PArithCoercions.
+
 Coercion PArithR:       arith_name_r        >-> Funclass.
 Coercion PArithRR:      arith_name_rr       >-> Funclass.
 Coercion PArithRI32:    arith_name_ri32     >-> Funclass.
@@ -569,6 +571,8 @@ Coercion PArithARR:     arith_name_arr      >-> Funclass.
 Coercion PArithARRI32:   arith_name_arri32    >-> Funclass.
 Coercion PArithARRI64:   arith_name_arri64    >-> Funclass.
 
+End PArithCoercions.
+
 Inductive basic : Type :=
   | PArith          (i: ar_instruction)
   | PLoad           (i: ld_instruction)
@@ -1709,7 +1713,7 @@ Proof.
 Qed.
 
 
-Local Hint Resolve parexec_bblock_write_in_order.
+Local Hint Resolve parexec_bblock_write_in_order: core.
 
 Lemma det_parexec_write_in_order f b rs m rs' m':
    det_parexec f b rs m rs' m' -> parexec_wio_bblock f b rs m = Next rs' m'.
diff --git a/mppa_k1c/DuplicateOpcodeHeuristic.ml b/mppa_k1c/DuplicateOpcodeHeuristic.ml
index 690553ce..2ec314c1 100644
--- a/mppa_k1c/DuplicateOpcodeHeuristic.ml
+++ b/mppa_k1c/DuplicateOpcodeHeuristic.ml
@@ -2,10 +2,8 @@
 open Op
 open Integers
 
-exception HeuristicSucceeded
-
-let opcode_heuristic code cond ifso ifnot preferred =
-  let decision = match cond with
+let opcode_heuristic code cond ifso ifnot is_loop_header =
+  match cond with
   | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
       | Clt | Cle -> Some false
       | Cgt | Cge -> Some true
@@ -27,6 +25,3 @@ let opcode_heuristic code cond ifso ifnot preferred =
       | _ -> None
       )
   | _ -> None
-  in match decision with
-  | Some b -> (preferred := b; raise HeuristicSucceeded)
-  | None -> ()
diff --git a/mppa_k1c/PostpassSchedulingproof.v b/mppa_k1c/PostpassSchedulingproof.v
index fbb06c9b..3b123c75 100644
--- a/mppa_k1c/PostpassSchedulingproof.v
+++ b/mppa_k1c/PostpassSchedulingproof.v
@@ -567,7 +567,7 @@ Proof.
   unfold builtin_alone in H0. erewrite H0; eauto.
 Qed.
 
-Local Hint Resolve verified_schedule_nob_checks_alls_bundles.
+Local Hint Resolve verified_schedule_nob_checks_alls_bundles: core.
 
 Lemma verified_schedule_checks_alls_bundles bb lb bundle:
   verified_schedule bb = OK lb ->
diff --git a/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v b/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v
index 5c94d435..cf46072f 100644
--- a/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v
+++ b/mppa_k1c/abstractbb/AbstractBasicBlocksDef.v
@@ -403,7 +403,7 @@ Proof.
     * eapply H2; eauto. intros; eapply H0; eauto. rewrite rev_append_rev, in_app_iff; auto.
     * intros; eapply H0; eauto. rewrite rev_append_rev, in_app_iff, <- in_rev; auto.
 Qed.
-Local Hint Resolve app_fail_allvalid_correct.
+Local Hint Resolve app_fail_allvalid_correct: core.
 
 Lemma app_fail_correct l pt t1 t2: 
   match_pt t1 pt -> 
diff --git a/mppa_k1c/abstractbb/ImpSimuTest.v b/mppa_k1c/abstractbb/ImpSimuTest.v
index ea55b735..7a77ec15 100644
--- a/mppa_k1c/abstractbb/ImpSimuTest.v
+++ b/mppa_k1c/abstractbb/ImpSimuTest.v
@@ -304,12 +304,12 @@ Proof.
       rewrite <- EQT; eauto.
     + exploit smem_valid_set_decompose_1; eauto.
   - clear DM0. unfold hsmem_post_eval, hsmem_post_eval in * |- *; simpl.
-    Local Hint Resolve smem_valid_set_decompose_1.
+    Local Hint Resolve smem_valid_set_decompose_1: core.
     intros; case (R.eq_dec x x0).
     + intros; subst; rewrite !Dict.set_spec_eq; simpl; eauto.
     + intros; rewrite !Dict.set_spec_diff; simpl; eauto.
 Qed.
-Local Hint Resolve naive_set_correct.
+Local Hint Resolve naive_set_correct: core.
 
 Definition equiv_hsmem ge (hd1 hd2: hsmem) := 
       (forall m, allvalid ge hd1.(hpre) m <-> allvalid ge hd2.(hpre) m)
@@ -523,7 +523,7 @@ Lemma hinst_smem_correct i: forall hd hod,
   WHEN hinst_smem i hd hod ~> hd' THEN
     forall ge od d, smem_model ge od hod -> smem_model ge d hd -> (forall m, smem_valid ge d m -> smem_valid ge od m) -> smem_model ge (inst_smem i d od) hd'.
 Proof.
-  Local Hint Resolve smem_valid_set_proof.
+  Local Hint Resolve smem_valid_set_proof: core.
   induction i; simpl; wlp_simplify; eauto 15 with wlp.
 Qed.
 Global Opaque hinst_smem.
@@ -563,7 +563,7 @@ Definition bblock_hsmem: bblock -> ?? hsmem
 Lemma bblock_hsmem_correct p:
   WHEN bblock_hsmem p ~> hd THEN forall ge, smem_model ge (bblock_smem p) hd.
 Proof.
-  Local Hint Resolve hsmem_empty_correct.
+  Local Hint Resolve hsmem_empty_correct: core.
   wlp_simplify.
 Qed.
 Global Opaque bblock_hsmem.
@@ -775,7 +775,7 @@ Proof.
   intro H; erewrite <- list_term_eval_set_hid; rewrite H. apply list_term_eval_set_hid.
 Qed.
 
-Local Hint Resolve term_eval_set_hid_equiv list_term_eval_set_hid_equiv.
+Local Hint Resolve term_eval_set_hid_equiv list_term_eval_set_hid_equiv: core.
 
 Program Definition bblock_simu_test (p1 p2: bblock): ?? bool :=
   DO log <~ count_logger ();;
@@ -802,7 +802,7 @@ Obligation 2.
   wlp_simplify.
 Qed.
 
-Local Hint Resolve g_bblock_simu_test_correct.
+Local Hint Resolve g_bblock_simu_test_correct: core.
 
 Theorem bblock_simu_test_correct p1 p2:
   WHEN bblock_simu_test p1 p2 ~> b THEN b=true -> forall ge, bblock_simu ge p1 p2.
@@ -1123,7 +1123,7 @@ Definition get {A} (d:t A) (x:R.t): option A
 Definition set {A} (d:t A) (x:R.t) (v:A): t A
  := PositiveMap.add x v d.
 
-Local Hint Unfold PositiveMap.E.eq.
+Local Hint Unfold PositiveMap.E.eq: core.
 
 Lemma set_spec_eq A d x (v: A):
   get (set d x v) x = Some v.
diff --git a/mppa_k1c/abstractbb/Impure/ImpHCons.v b/mppa_k1c/abstractbb/Impure/ImpHCons.v
index d8002375..637116cc 100644
--- a/mppa_k1c/abstractbb/Impure/ImpHCons.v
+++ b/mppa_k1c/abstractbb/Impure/ImpHCons.v
@@ -95,7 +95,7 @@ Proof.
   wlp_simplify.
 Qed.
 Global Opaque assert_list_incl.
-Hint Resolve assert_list_incl_correct.
+Hint Resolve assert_list_incl_correct: wlp.
 
 End Sets.
 
@@ -165,7 +165,7 @@ Lemma hConsV_correct A (hasheq: A -> A -> ?? bool):
     (forall x y, WHEN hasheq x y ~> b THEN b=true -> x=y) -> 
     forall x, WHEN hco.(hC) x ~> x' THEN x.(hdata).(data)=x'.(data).
 Proof.
-  Local Hint Resolve f_equal2.
+  Local Hint Resolve f_equal2: core.
   wlp_simplify.
   exploit H; eauto.
   + wlp_simplify.
diff --git a/mppa_k1c/abstractbb/Parallelizability.v b/mppa_k1c/abstractbb/Parallelizability.v
index 22809095..30904b5d 100644
--- a/mppa_k1c/abstractbb/Parallelizability.v
+++ b/mppa_k1c/abstractbb/Parallelizability.v
@@ -332,7 +332,7 @@ Fixpoint bblock_wframe(p:bblock): list R.t :=
   | i::p' => (inst_wframe i)++(bblock_wframe p') 
   end.
 
-Local Hint Resolve Permutation_app_head Permutation_app_tail Permutation_app_comm.
+Local Hint Resolve Permutation_app_head Permutation_app_tail Permutation_app_comm: core.
 
 Lemma bblock_wframe_Permutation p p': 
  Permutation p p' -> Permutation (bblock_wframe p)  (bblock_wframe p').
@@ -620,7 +620,7 @@ Include ParallelizablityChecking L.
 Section PARALLEL2.
 Variable ge: genv.
 
-Local Hint Resolve S.empty_match_frame S.add_match_frame S.union_match_frame S.is_disjoint_match_frame.
+Local Hint Resolve S.empty_match_frame S.add_match_frame S.union_match_frame S.is_disjoint_match_frame: core.
 
 (** Now, refinement of each operation toward parallelizable *)
 
@@ -659,14 +659,14 @@ Fixpoint inst_sframe (i: inst): S.t :=
   | a::i' => S.add (fst a) (S.union (exp_sframe (snd a)) (inst_sframe i'))
   end.
 
-Local Hint Resolve exp_sframe_correct.
+Local Hint Resolve exp_sframe_correct: core.
 
 Lemma inst_sframe_correct i: S.match_frame (inst_sframe i) (inst_frame i).
 Proof.
   induction i as [|[y e] i']; simpl; auto.
 Qed.
 
-Local Hint Resolve inst_wsframe_correct inst_sframe_correct.
+Local Hint Resolve inst_wsframe_correct inst_sframe_correct: core.
 
 Fixpoint is_pararec (p: bblock) (wsframe: S.t): bool :=
   match p with
diff --git a/mppa_k1c/abstractbb/SeqSimuTheory.v b/mppa_k1c/abstractbb/SeqSimuTheory.v
index 649dd083..e234883f 100644
--- a/mppa_k1c/abstractbb/SeqSimuTheory.v
+++ b/mppa_k1c/abstractbb/SeqSimuTheory.v
@@ -102,9 +102,6 @@ Fixpoint bblock_smem_rec (p: bblock) (d: smem): smem :=
      let d':=inst_smem i d d in
      bblock_smem_rec p' d'
   end.
-(*
-Local Hint Resolve smem_eval_empty.
-*)
 
 Definition bblock_smem: bblock -> smem
  := fun p => bblock_smem_rec p smem_empty.
@@ -124,7 +121,7 @@ Proof.
   intros d a H; eapply inst_smem_pre_monotonic; eauto.
 Qed.
 
-Local Hint Resolve inst_smem_pre_monotonic bblock_smem_pre_monotonic.
+Local Hint Resolve inst_smem_pre_monotonic bblock_smem_pre_monotonic: core.
 
 Lemma term_eval_exp e (od:smem) m0 old:
   (forall x, term_eval ge (od x) m0 = Some (old x)) ->
@@ -185,7 +182,7 @@ Lemma bblocks_smem_rec_Some_correct1 p m0: forall (m1 m2: mem) (d: smem),
   (forall x, term_eval ge (d x) m0 = Some (m1 x)) ->
   forall x, term_eval ge (bblock_smem_rec p d x) m0 = Some (m2 x).
 Proof.
-  Local Hint Resolve inst_smem_Some_correct1.
+  Local Hint Resolve inst_smem_Some_correct1: core.
   induction p as [ | i p]; simpl; intros m1 m2 d H.
   - inversion_clear H; eauto.
   - intros H0 x0.
@@ -299,7 +296,7 @@ Lemma block_smem_rec_valid p m0: forall (m1 m2: mem) (d:smem),
   (forall x, term_eval ge (d x) m0 = Some (m1 x)) ->
   pre (bblock_smem_rec p d) ge m0.
 Proof.
-  Local Hint Resolve inst_valid.
+  Local Hint Resolve inst_valid: core.
   induction p as [ | i p]; simpl; intros m1 d H; auto.
   intros H0 H1.
   destruct (inst_run ge i m1 m1) eqn: Heqov; eauto.
@@ -326,7 +323,7 @@ Theorem bblock_smem_simu p1 p2:
    smem_simu (bblock_smem p1) (bblock_smem p2) ->
    bblock_simu ge p1 p2.
 Proof.
-  Local Hint Resolve bblock_smem_valid bblock_smem_Some_correct1.
+  Local Hint Resolve bblock_smem_valid bblock_smem_Some_correct1: core.
   intros (INCL & EQUIV) m DONTFAIL; unfold smem_valid in * |-.
   destruct (run ge p1 m) as [m1|] eqn: RUN1; simpl; try congruence.
   assert (X: forall x, term_eval ge (bblock_smem p1 x) m = Some (m1 x)); eauto.
diff --git a/mppa_k1c/lib/Asmblockgenproof0.v b/mppa_k1c/lib/Asmblockgenproof0.v
index 940c6563..58455ada 100644
--- a/mppa_k1c/lib/Asmblockgenproof0.v
+++ b/mppa_k1c/lib/Asmblockgenproof0.v
@@ -414,7 +414,7 @@ Proof.
 Qed.
 
 
-Local Hint Resolve code_tail_0 code_tail_S.
+Local Hint Resolve code_tail_0 code_tail_S: core.
 
 Lemma code_tail_next:
   forall fn ofs c0,
@@ -458,7 +458,7 @@ Proof.
   omega.
 Qed.
 
-Local Hint Resolve code_tail_next.
+Local Hint Resolve code_tail_next: core.
 
 Lemma code_tail_next_int:
   forall fn ofs bi c,
diff --git a/mppa_k1c/lib/ForwardSimulationBlock.v b/mppa_k1c/lib/ForwardSimulationBlock.v
index 39dd2234..224eda0a 100644
--- a/mppa_k1c/lib/ForwardSimulationBlock.v
+++ b/mppa_k1c/lib/ForwardSimulationBlock.v
@@ -21,7 +21,7 @@ Section starN_lemma.
 
 Variable L: semantics.
 
-Local Hint Resolve starN_refl starN_step Eapp_assoc.
+Local Hint Resolve starN_refl starN_step Eapp_assoc: core.
 
 Lemma starN_split n s t s':
   starN (step L) (globalenv L) n s t s' ->
@@ -93,7 +93,7 @@ Hypothesis simu_end_block:
 
 (** Introduction d'une sémantique par bloc sur L1 appelée "memoL1" *)
 
-Local Hint Resolve starN_refl starN_step.
+Local Hint Resolve starN_refl starN_step: core.
 
 Definition follows_in_block (head current: state L1): Prop :=
   dist_end_block head >= dist_end_block current 
@@ -164,7 +164,7 @@ Inductive is_well_memorized (s s': memostate): Prop :=
     memorized s' = None ->
     is_well_memorized s s'.
 
-Local Hint Resolve StartBloc MidBloc ExitBloc.
+Local Hint Resolve StartBloc MidBloc ExitBloc: core.
 
 Definition memoL1 := {| 
   state := memostate;
diff --git a/mppa_k1c/lib/Machblockgen.v b/mppa_k1c/lib/Machblockgen.v
index a65b218f..2ba42814 100644
--- a/mppa_k1c/lib/Machblockgen.v
+++ b/mppa_k1c/lib/Machblockgen.v
@@ -105,7 +105,7 @@ Inductive is_end_block: Machblock_inst -> code -> Prop :=
   | End_basic bi bh bl: header bh <> nil -> is_end_block (MB_basic bi) (bh::bl)
   | End_cfi cfi bl: bl <> nil -> is_end_block (MB_cfi cfi) bl. 
 
-Local Hint Resolve End_empty End_basic End_cfi.
+Local Hint Resolve End_empty End_basic End_cfi: core.
 
 Inductive is_trans_code: Mach.code -> code -> Prop :=
   | Tr_nil: is_trans_code nil nil
@@ -123,7 +123,7 @@ Inductive is_trans_code: Mach.code -> code -> Prop :=
       header bh = nil ->
       is_trans_code (i::c) (add_basic bi bh::bl).
 
-Local Hint Resolve Tr_nil Tr_end_block.
+Local Hint Resolve Tr_nil Tr_end_block: core.
 
 Lemma add_to_code_is_trans_code i c bl:
   is_trans_code c bl ->
@@ -145,7 +145,7 @@ Proof.
       rewrite <- Heqti. eapply End_cfi. congruence.
 Qed.
 
-Local Hint Resolve add_to_code_is_trans_code.
+Local Hint Resolve add_to_code_is_trans_code: core.
 
 Lemma trans_code_is_trans_code_rev c1: forall c2 mbi, 
   is_trans_code c2 mbi ->
@@ -185,7 +185,7 @@ Proof.
   exists mbi1. split; congruence.
 Qed.
 
-Local Hint Resolve trans_code_is_trans_code.
+Local Hint Resolve trans_code_is_trans_code: core.
 
 Theorem is_trans_code_inv c bl: is_trans_code c bl <-> bl=(trans_code c).
 Proof.
diff --git a/mppa_k1c/lib/Machblockgenproof.v b/mppa_k1c/lib/Machblockgenproof.v
index 91be5e2e..0de2df52 100644
--- a/mppa_k1c/lib/Machblockgenproof.v
+++ b/mppa_k1c/lib/Machblockgenproof.v
@@ -72,7 +72,7 @@ Proof.
   apply match_states_trans_state.
 Qed.
 
-Local Hint Resolve match_states_trans_state.
+Local Hint Resolve match_states_trans_state: core.
 
 Lemma symbols_preserved:
   forall (s: ident), Genv.find_symbol tge s = Genv.find_symbol ge s.
@@ -284,7 +284,7 @@ Proof.
 Qed.
 
 Local Hint Resolve symbols_preserved senv_preserved init_mem_preserved prog_main_preserved functions_translated
-                   parent_sp_preserved.
+                   parent_sp_preserved: core.
 
 
 Definition dist_end_block_code (c: Mach.code) := 
@@ -299,8 +299,8 @@ Definition dist_end_block (s: Mach.state): nat :=
   | _ => 0
   end.
 
-Local Hint Resolve exec_nil_body exec_cons_body.
-Local Hint Resolve exec_MBgetstack exec_MBsetstack exec_MBgetparam exec_MBop exec_MBload exec_MBstore.
+Local Hint Resolve exec_nil_body exec_cons_body: core.
+Local Hint Resolve exec_MBgetstack exec_MBsetstack exec_MBgetparam exec_MBop exec_MBload exec_MBstore: core.
 
 Lemma size_add_label l bh: size (add_label l bh) = size bh + 1.
 Proof.
@@ -336,7 +336,7 @@ Proof.
     omega.
 Qed.
 
-Local Hint Resolve dist_end_block_code_simu_mid_block.
+Local Hint Resolve dist_end_block_code_simu_mid_block: core.
 
 
 Lemma size_nonzero c b bl:
@@ -392,8 +392,8 @@ destruct i; congruence.
 Qed.
 
 
-Local Hint Resolve Mlabel_is_not_cfi.
-Local Hint Resolve MBbasic_is_not_cfi.
+Local Hint Resolve Mlabel_is_not_cfi: core.
+Local Hint Resolve MBbasic_is_not_cfi: core.
 
 Lemma add_to_new_block_is_label i:
   header (add_to_new_bblock (trans_inst i)) <> nil -> exists l, i = Mlabel l.
@@ -408,7 +408,7 @@ Proof.
   + unfold cfi_bblock in H; simpl in H; congruence.
 Qed.
 
-Local Hint Resolve Mlabel_is_not_basic.
+Local Hint Resolve Mlabel_is_not_basic: core.
 
 Lemma trans_code_decompose c: forall b bl,
   is_trans_code c (b::bl) ->
@@ -510,8 +510,8 @@ Proof.
     rewrite Hs2, Hb2; eauto.
     Qed. 
 
-Local Hint Resolve exec_MBcall exec_MBtailcall exec_MBbuiltin exec_MBgoto exec_MBcond_true exec_MBcond_false exec_MBjumptable exec_MBreturn exec_Some_exit exec_None_exit.
-Local Hint Resolve eval_builtin_args_preserved external_call_symbols_preserved find_funct_ptr_same.
+Local Hint Resolve exec_MBcall exec_MBtailcall exec_MBbuiltin exec_MBgoto exec_MBcond_true exec_MBcond_false exec_MBjumptable exec_MBreturn exec_Some_exit exec_None_exit: core.
+Local Hint Resolve eval_builtin_args_preserved external_call_symbols_preserved find_funct_ptr_same: core.
 
 
 Lemma match_states_concat_trans_code st f sp c rs m h: 
diff --git a/test/monniaux/clock.c b/test/monniaux/clock.c
index fb636667..4ec679f6 100644
--- a/test/monniaux/clock.c
+++ b/test/monniaux/clock.c
@@ -24,9 +24,9 @@ cycle_t get_current_cycle(void) {
 }
 
 void print_total_clock(void) {
-  printf("time cycles: %lu\n", total_clock);
+  printf("time cycles: %" PRcycle "\n", total_clock);
 }
 
 void printerr_total_clock(void) {
-  fprintf(stderr, "time cycles: %lu\n", total_clock);
+  fprintf(stderr, "time cycles: %" PRcycle "\n", total_clock);
 }
diff --git a/test/monniaux/cycles.h b/test/monniaux/cycles.h
index 21541145..c7dc582b 100644
--- a/test/monniaux/cycles.h
+++ b/test/monniaux/cycles.h
@@ -1,13 +1,11 @@
+#include <stdint.h>
 #include <inttypes.h>
 #include <stdio.h>
-typedef unsigned long cycle_t;
-
-#ifdef MAX_MEASURES
-  static cycle_t _last_stop[MAX_MEASURES] = {0};
-  static cycle_t _total_cycles[MAX_MEASURES] = {0};
-#endif
 
 #ifdef __K1C__
+typedef uint64_t cycle_t;
+#define PRcycle PRId64
+
 #include <../../k1-cos/include/hal/cos_registers.h>
 
 static inline void cycle_count_config(void)
@@ -27,18 +25,57 @@ static inline cycle_t get_cycle(void)
 #else // not K1C
 static inline void cycle_count_config(void) { }
 
-#ifdef  __x86_64__
+#if defined(__i386__) || defined( __x86_64__)
+#define PRcycle PRId64
+typedef uint64_t cycle_t;
 #include <x86intrin.h>
 static inline cycle_t get_cycle(void) { return __rdtsc(); }
 
 #elif __riscv
+#ifdef __riscv32
+#define PRcycle PRId32
+typedef uint32_t cycle_t;
+#else
+#define PRcycle PRId64
+typedef uint64_t cycle_t;
+#endif
 static inline cycle_t get_cycle(void) {
   cycle_t cycles;
   asm volatile ("rdcycle %0" : "=r" (cycles));
   return cycles;
 }
 
+#elif defined (__ARM_ARCH) && (__ARM_ARCH >= 6)
+#if (__ARM_ARCH < 8)
+typedef uint32_t cycle_t;
+#define PRcycle PRId32
+
+/* need this kernel module
+https://github.com/zertyz/MTL/tree/master/cpp/time/kernel/arm */
+static inline cycle_t get_cycle(void) {
+  cycle_t cycles;
+  __asm__ volatile ("mrc p15, 0, %0, c9, c13, 0":"=r" (cycles));
+  return cycles;
+}
 #else
+#define PRcycle PRId64
+typedef uint64_t cycle_t;
+/* need this kernel module:
+https://github.com/jerinjacobk/armv8_pmu_cycle_counter_el0
+
+on 5+ kernels, remove first argument of access_ok macro */
+
+static inline cycle_t get_cycle(void)
+{
+  uint64_t val;
+  __asm__ volatile("mrs %0, pmccntr_el0" : "=r"(val));
+  return val;
+}
+#endif
+
+#else
+#define PRcycle PRId32
+typedef uint32_t cycle_t;
 static inline cycle_t get_cycle(void) { return 0; }
 #endif
 #endif
@@ -48,3 +85,9 @@ static inline cycle_t get_cycle(void) { return 0; }
   #define TIMESTOP(i) {cycle_t cur = get_cycle(); _total_cycles[i] += cur - _last_stop[i]; _last_stop[i] = cur;}
   #define TIMEPRINT(n) { for (int i = 0; i <= n; i++) printf("%d cycles: %" PRIu64 "\n", i, _total_cycles[i]); }
 #endif
+
+
+#ifdef MAX_MEASURES
+  static cycle_t _last_stop[MAX_MEASURES] = {0};
+  static cycle_t _total_cycles[MAX_MEASURES] = {0};
+#endif
diff --git a/test/monniaux/quicksort/quicksort_run.c b/test/monniaux/quicksort/quicksort_run.c
index c35d0752..3c640b24 100644
--- a/test/monniaux/quicksort/quicksort_run.c
+++ b/test/monniaux/quicksort/quicksort_run.c
@@ -13,7 +13,7 @@ int main (void) {
   quicksort(vec, len);
   quicksort_time = get_cycle() - quicksort_time;
   printf("sorted=%s\n"
-	 "time cycles:%" PRIu64 "\n",
+	 "time cycles:%" PRcycle "\n",
 	 data_vec_is_sorted(vec, len)?"true":"false",
 	 quicksort_time);
   free(vec);