Merge remote-tracking branch 'origin/mppa-work' into mppa-cse2

author: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-02-06 11:00:51 +0100
committer: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-02-06 11:00:51 +0100
commit: 90e7548f0dae5be1c896fcb964a21fe70b514e0d (patch)
tree: 9f3886dc181105993d9da8a2832660c5b123c6ce
parent: 1cd3c4c9b7372ca8de128a9ce60ed00210fd0e28 (diff)
parent: 7dca7590aa212806ee939244b253a6a067f34bfc (diff)
download: compcert-kvx-90e7548f0dae5be1c896fcb964a21fe70b514e0d.tar.gz
compcert-kvx-90e7548f0dae5be1c896fcb964a21fe70b514e0d.zip
16 files changed, 832 insertions, 66 deletions
diff --git a/backend/Duplicate.v b/backend/Duplicate.v
index 46f0855d..82c17367 100644
--- a/backend/Duplicate.v
+++ b/backend/Duplicate.v
@@ -136,12 +136,15 @@ Definition verify_match_inst dupmap inst tinst :=
 
   | Icond cond lr n1 n2 => match tinst with
       | Icond cond' lr' n1' n2' =>
-          do u1 <- verify_is_copy dupmap n1 n1';
-          do u2 <- verify_is_copy dupmap n2 n2';
-          if (eq_condition cond cond') then
-            if (list_eq_dec Pos.eq_dec lr lr') then OK tt
-            else Error (msg "Different lr in Icond")
-          else Error (msg "Different cond in Icond")
+          if (list_eq_dec Pos.eq_dec lr lr') then
+            if (eq_condition cond cond') then
+              do u1 <- verify_is_copy dupmap n1 n1';
+              do u2 <- verify_is_copy dupmap n2 n2'; OK tt
+            else if (eq_condition (negate_condition cond) cond') then
+              do u1 <- verify_is_copy dupmap n1 n2';
+              do u2 <- verify_is_copy dupmap n2 n1'; OK tt
+            else Error (msg "Incompatible conditions in Icond")
+          else Error (msg "Different lr in Icond")
       | _ => Error (msg "verify_match_inst Icond") end
 
   | Ijumptable r ln => match tinst with
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index a64f4862..84daa329 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -1,13 +1,572 @@
 open RTL
 open Maps
+open Camlcoq
+
+(* TTL : IR emphasizing the preferred next node *)
+module TTL = struct
+  type instruction =
+  | Tleaf of RTL.instruction
+  | Tnext of node * RTL.instruction
+
+  type code = instruction PTree.t
+end;;
+
+open TTL
+
+(** RTL to TTL *)
+
+let get_some = function
+| None -> failwith "Did not get some"
+| Some thing -> thing
+
+let bfs code entrypoint =
+  let visited = ref (PTree.map (fun n i -> false) code)
+  and bfs_list = ref []
+  and to_visit = Queue.create ()
+  and node = ref entrypoint
+  in begin
+    Queue.add entrypoint to_visit;
+    while not (Queue.is_empty to_visit) do
+      node := Queue.pop to_visit;
+      if not (get_some @@ PTree.get !node !visited) then begin
+        visited := PTree.set !node true !visited;
+        match PTree.get !node code with
+        | None -> failwith "No such node"
+        | Some i ->
+            bfs_list := !bfs_list @ [!node];
+            match i with
+            | Icall(_, _, _, _, n) -> Queue.add n to_visit
+            | Ibuiltin(_, _, _, n) -> Queue.add n to_visit
+            | Ijumptable(_, ln) -> List.iter (fun n -> Queue.add n to_visit) ln
+            | Itailcall _ | Ireturn _ -> ()
+            | Icond (_, _, n1, n2) -> Queue.add n1 to_visit; Queue.add n2 to_visit
+            | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) -> Queue.add n to_visit
+      end
+    done;
+    !bfs_list
+  end
+
+let optbool o = match o with Some _ -> true | None -> false
+
+let ptree_get_some n ptree = get_some @@ PTree.get n ptree
+
+let get_predecessors_rtl code =
+  let preds = ref (PTree.map (fun n i -> []) code) in
+  let process_inst (node, i) =
+    let succ = match i with
+      | Inop n | Iop (_,_,_,n) | Iload (_, _,_,_,_,n) | Istore (_,_,_,_,n)
+      | Icall (_,_,_,_,n) | Ibuiltin (_, _, _, n) -> [n]
+      | Icond (_,_,n1,n2) -> [n1;n2]
+      | Ijumptable (_,ln) -> ln
+      | Itailcall _ | Ireturn _ -> []
+    in List.iter (fun s ->
+      let previous_preds = ptree_get_some s !preds in
+      if optbool @@ List.find_opt (fun e -> e == node) previous_preds then ()
+      else preds := PTree.set s (node::previous_preds) !preds) succ
+  in begin
+    List.iter process_inst (PTree.elements code);
+    !preds
+  end
+
+module PInt = struct
+  type t = P.t
+  let compare x y = compare (P.to_int x) (P.to_int y)
+end
+
+module PSet = Set.Make(PInt)
+
+let print_intlist l =
+  let rec f = function
+  | [] -> ()
+  | n::ln -> (Printf.printf "%d " (P.to_int n); f ln)
+  in begin
+    Printf.printf "[";
+    f l;
+    Printf.printf "]"
+  end
+
+let print_intset s =
+  let seq = PSet.to_seq s
+  in begin
+    Printf.printf "{";
+    Seq.iter (fun n ->
+      Printf.printf "%d " (P.to_int n)
+    ) seq;
+    Printf.printf "}"
+  end
+
+(* FIXME - dominators not working well because the order of dataflow update isn't right *)
+  (*
+let get_dominators code entrypoint =
+  let bfs_order = bfs code entrypoint
+  and predecessors = get_predecessors_rtl code
+  in let doms = ref (PTree.map (fun n i -> PSet.of_list bfs_order) code)
+  in begin
+    Printf.printf "BFS: ";
+    print_intlist bfs_order;
+    Printf.printf "\n";
+    List.iter (fun n ->
+      let preds = get_some @@ PTree.get n predecessors
+      and single = PSet.singleton n
+      in match preds with
+      | [] -> doms := PTree.set n single !doms
+      | p::lp ->
+          let set_p = get_some @@ PTree.get p !doms
+          and set_lp = List.map (fun p -> get_some @@ PTree.get p !doms) lp
+          in let inter = List.fold_left PSet.inter set_p set_lp
+          in let union = PSet.union inter single
+          in begin
+            Printf.printf "----------------------------------------\n";
+            Printf.printf "n = %d\n" (P.to_int n);
+            Printf.printf "set_p = "; print_intset set_p; Printf.printf "\n";
+            Printf.printf "set_lp = ["; List.iter (fun s -> print_intset s; Printf.printf ", ") set_lp; Printf.printf "]\n";
+            Printf.printf "=> inter = "; print_intset inter; Printf.printf "\n";
+            Printf.printf "=> union = "; print_intset union; Printf.printf "\n";
+            doms := PTree.set n union !doms
+          end
+    ) bfs_order;
+    !doms
+  end
+*)
+
+let print_dominators dominators =
+  let domlist = PTree.elements dominators
+  in begin
+    Printf.printf "{\n";
+    List.iter (fun (n, doms) ->
+      Printf.printf "\t";
+      Printf.printf "%d:" (P.to_int n);
+      print_intset doms;
+      Printf.printf "\n"
+    ) domlist
+  end
+
+type vstate = Unvisited | Processed | Visited
+
+(** Getting loop branches with a DFS visit :
+  * Each node is either Unvisited, Visited, or Processed
+  * pre-order: node becomes Processed
+  * post-order: node becomes Visited
+  *
+  * If we come accross an edge to a Processed node, it's a loop!
+  *)
+let get_loop_headers code entrypoint =
+  let visited = ref (PTree.map (fun n i -> Unvisited) code)
+  and is_loop_header = ref (PTree.map (fun n i -> false) code)
+  in let rec dfs_visit code = function
+  | [] -> ()
+  | node :: ln ->
+      match (get_some @@ PTree.get node !visited) with
+      | Visited -> ()
+      | Processed -> begin
+          is_loop_header := PTree.set node true !is_loop_header;
+          visited := PTree.set node Visited !visited
+        end
+      | Unvisited -> begin
+          visited := PTree.set node Processed !visited;
+          match PTree.get node code with
+          | None -> failwith "No such node"
+          | Some i -> let next_visits = (match i with
+            | Icall (_, _, _, _, n) | Ibuiltin (_, _, _, n) | Inop n | Iop (_, _, _, n)
+            | Iload (_, _, _, _, _, n) | Istore (_, _, _, _, n) -> [n]
+            | Icond (_, _, n1, n2) -> [n1; n2]
+            | Itailcall _ | Ireturn _ -> []
+            | Ijumptable (_, ln) -> ln
+            ) in dfs_visit code next_visits;
+          visited := PTree.set node Visited !visited;
+          dfs_visit code ln
+        end
+  in begin
+    dfs_visit code [entrypoint];
+    !is_loop_header
+  end
+
+let ptree_printbool pt =
+  let elements = PTree.elements pt
+  in begin
+    Printf.printf "[";
+    List.iter (fun (n, b) ->
+      if b then Printf.printf "%d, " (P.to_int n) else ()
+    ) elements;
+    Printf.printf "]"
+  end
+
+(* Looks ahead (until a branch) to see if a node further down verifies
+ * the given predicate *)
+let rec look_ahead code node is_loop_header predicate =
+  if (predicate node) then true
+  else match (get_some @@ PTree.get node code) with
+    | Ireturn _ | Itailcall _ | Icond _ | Ijumptable _ -> false
+    | Inop n | Iop (_, _, _, n) | Iload (_, _, _, _, _, n)
+    | Istore (_, _, _, _, n) | Icall (_, _, _, _, n)
+    | Ibuiltin (_, _, _, n) ->
+      if (predicate n) then true
+      else (
+        if (get_some @@ PTree.get n is_loop_header) then false
+        else look_ahead code n is_loop_header predicate
+      )
+
+exception HeuristicSucceeded
+
+let do_call_heuristic code ifso ifnot is_loop_header preferred =
+  let predicate n = (function
+  | Icall _ -> true
+  | _ -> false) @@ get_some @@ PTree.get n code
+  in if (look_ahead code ifso is_loop_header predicate) then
+    (preferred := false; raise HeuristicSucceeded)
+  else if (look_ahead code ifnot is_loop_header predicate) then
+    (preferred := true; raise HeuristicSucceeded)
+  else ()
+
+let do_opcode_heuristic code cond ifso ifnot preferred = DuplicateOpcodeHeuristic.opcode_heuristic code cond ifso ifnot preferred
+
+let do_return_heuristic code ifso ifnot is_loop_header preferred =
+  let predicate n = (function
+  | Ireturn _ -> true
+  | _ -> false) @@ get_some @@ PTree.get n code
+  in if (look_ahead code ifso is_loop_header predicate) then
+    (preferred := false; raise HeuristicSucceeded)
+  else if (look_ahead code ifnot is_loop_header predicate) then
+    (preferred := true; raise HeuristicSucceeded)
+  else ()
+
+let do_store_heuristic code ifso ifnot is_loop_header preferred =
+  let predicate n = (function
+  | Istore _ -> true
+  | _ -> false) @@ get_some @@ PTree.get n code
+  in if (look_ahead code ifso is_loop_header predicate) then
+    (preferred := false; raise HeuristicSucceeded)
+  else if (look_ahead code ifnot is_loop_header predicate) then
+    (preferred := true; raise HeuristicSucceeded)
+  else ()
+
+let do_loop_heuristic code ifso ifnot is_loop_header preferred =
+  let predicate n = get_some @@ PTree.get n is_loop_header
+  in if (look_ahead code ifso is_loop_header predicate) then
+    (preferred := true; raise HeuristicSucceeded)
+  else if (look_ahead code ifnot is_loop_header predicate) then
+    (preferred := false; raise HeuristicSucceeded)
+  else ()
+
+let get_directions code entrypoint =
+  let bfs_order = bfs code entrypoint
+  and is_loop_header = get_loop_headers code entrypoint
+  and directions = ref (PTree.map (fun n i -> false) code) (* false <=> fallthru *)
+  in begin
+    Printf.printf "Loop headers: ";
+    ptree_printbool is_loop_header;
+    Printf.printf "\n";
+    List.iter (fun n ->
+      match (get_some @@ PTree.get n code) with
+      | Icond (cond, lr, ifso, ifnot) ->
+          Printf.printf "Analyzing %d.." (P.to_int n);
+          let preferred = ref false
+          in (try
+            Printf.printf " call..";
+            do_call_heuristic code ifso ifnot is_loop_header preferred;
+            Printf.printf " opcode..";
+            do_opcode_heuristic code cond ifso ifnot preferred;
+            Printf.printf " return..";
+            do_return_heuristic code ifso ifnot is_loop_header preferred;
+            Printf.printf " store..";
+            do_store_heuristic code ifso ifnot is_loop_header preferred;
+            Printf.printf " loop..";
+            do_loop_heuristic code ifso ifnot is_loop_header preferred;
+            Printf.printf "Random choice for %d\n" (P.to_int n);
+            preferred := Random.bool ()
+            with HeuristicSucceeded | DuplicateOpcodeHeuristic.HeuristicSucceeded
+              -> Printf.printf " %s\n" (match !preferred with true -> "BRANCH"
+                                        | false -> "FALLTHROUGH")
+          ); directions := PTree.set n !preferred !directions
+      | _ -> ()
+    ) bfs_order;
+    !directions
+  end
+
+let to_ttl_inst direction = function
+| Ireturn o -> Tleaf (Ireturn o)
+| Inop n -> Tnext (n, Inop n)
+| Iop (op, lr, r, n) -> Tnext (n, Iop(op, lr, r, n))
+| Iload (tm, m, a, lr, r, n) -> Tnext (n, Iload(tm, m, a, lr, r, n))
+| Istore (m, a, lr, r, n) -> Tnext (n, Istore(m, a, lr, r, n))
+| Icall (s, ri, lr, r, n) -> Tleaf (Icall(s, ri, lr, r, n))
+| Itailcall (s, ri, lr) -> Tleaf (Itailcall(s, ri, lr))
+| Ibuiltin (ef, lbr, br, n) -> Tleaf (Ibuiltin(ef, lbr, br, n))
+| Icond (cond, lr, n, n') -> (match direction with
+    | false -> Tnext (n', Icond(cond, lr, n, n'))
+    | true -> Tnext (n, Icond(cond, lr, n, n')))
+| Ijumptable (r, ln) -> Tleaf (Ijumptable(r, ln))
+
+let rec to_ttl_code_rec directions = function
+| [] -> PTree.empty
+| m::lm -> let (n, i) = m
+    in let direction = get_some @@ PTree.get n directions
+    in PTree.set n (to_ttl_inst direction i) (to_ttl_code_rec directions lm)
+
+let to_ttl_code code entrypoint =
+  let directions = get_directions code entrypoint
+  in begin
+    Printf.printf "Ifso directions: ";
+    ptree_printbool directions;
+    Printf.printf "\n";
+    Random.init(0); (* using same seed to make it deterministic *)
+    to_ttl_code_rec directions (PTree.elements code)
+  end
+
+(** Trace selection on TTL *)
+
+let rec exists_false_rec = function
+  | [] -> false
+  | m::lm -> let (_, b) = m in if b then exists_false_rec lm else true
+
+let exists_false boolmap = exists_false_rec (PTree.elements boolmap)
+
+(* DFS on TTL to guide the exploration *)
+let dfs code entrypoint =
+  let visited = ref (PTree.map (fun n i -> false) code) in
+  let rec dfs_list code = function
+  | [] -> []
+  | node :: ln ->
+      let node_dfs =
+        if not (get_some @@ PTree.get node !visited) then begin
+          visited := PTree.set node true !visited;
+          match PTree.get node code with
+          | None -> failwith "No such node"
+          | Some ti -> [node] @ match ti with
+              | Tleaf i -> (match i with
+                  | Icall(_, _, _, _, n) -> dfs_list code [n]
+                  | Ibuiltin(_, _, _, n) -> dfs_list code [n]
+                  | Ijumptable(_, ln) -> dfs_list code ln
+                  | Itailcall _ | Ireturn _ -> [] 
+                  | _ -> failwith "Tleaf case not handled in dfs" )
+              | Tnext (n,i) -> (dfs_list code [n]) @ match i with
+                  | Icond (_, _, n1, n2) -> dfs_list code [n1; n2]
+                  | Inop _ | Iop _ | Iload _ | Istore _ -> []
+                  | _ -> failwith "Tnext case not handled in dfs"
+          end
+        else []
+      in node_dfs @ (dfs_list code ln)
+  in dfs_list code [entrypoint]
+
+let get_predecessors_ttl code =
+  let preds = ref (PTree.map (fun n i -> []) code) in
+  let process_inst (node, ti) = match ti with
+  | Tleaf _ -> ()
+  | Tnext (_, i) -> let succ = match i with
+      | Inop n | Iop (_,_,_,n) | Iload (_, _,_,_,_,n) | Istore (_,_,_,_,n)
+      | Icall (_,_,_,_,n) | Ibuiltin (_, _, _, n) -> [n]
+      | Icond (_,_,n1,n2) -> [n1;n2]
+      | Ijumptable (_,ln) -> ln
+      | _ -> []
+      in List.iter (fun s -> preds := PTree.set s (node::(get_some @@ PTree.get s !preds)) !preds) succ
+  in begin
+    List.iter process_inst (PTree.elements code);
+    !preds
+  end
+
+let rtl_proj code = PTree.map (fun n ti -> match ti with Tleaf i | Tnext(_, i) -> i) code
+
+let rec select_unvisited_node is_visited = function
+| [] -> failwith "Empty list"
+| n :: ln -> if not (ptree_get_some n is_visited) then n else select_unvisited_node is_visited ln
+
+let best_successor_of node code is_visited =
+  match (PTree.get node code) with
+  | None -> failwith "No such node in the code"
+  | Some ti -> match ti with
+      | Tleaf _ -> None
+      | Tnext (n,_) -> if not (ptree_get_some n is_visited) then Some n
+                       else None
+
+let best_predecessor_of node predecessors order is_visited =
+  match (PTree.get node predecessors) with
+  | None -> failwith "No predecessor list found"
+  | Some lp -> try Some (List.find (fun n -> (List.mem n lp) && (not (ptree_get_some n is_visited))) order)
+               with Not_found -> None
+
+(* Algorithm mostly inspired from Chang and Hwu 1988
+ * "Trace Selection for Compiling Large C Application Programs to Microcode" *)
+let select_traces code entrypoint =
+  let order = dfs code entrypoint in
+  let predecessors = get_predecessors_ttl code in
+  let traces = ref [] in
+  let is_visited = ref (PTree.map (fun n i -> false) code) in begin (* mark all nodes visited *)
+    while exists_false !is_visited do (* while (there are unvisited nodes) *)
+      let seed = select_unvisited_node !is_visited order in
+      let trace = ref [seed] in
+      let current = ref seed in begin
+        is_visited := PTree.set seed true !is_visited; (* mark seed visited *)
+        let quit_loop = ref false in begin
+          while not !quit_loop do
+            let s = best_successor_of !current code !is_visited in
+            match s with
+            | None -> quit_loop := true (* if (s==0) exit loop *)
+            | Some succ -> begin
+                trace := !trace @ [succ];
+                is_visited := PTree.set succ true !is_visited; (* mark s visited *)
+                current := succ
+                end
+          done;
+          current := seed;
+          quit_loop := false;
+          while not !quit_loop do
+            let s = best_predecessor_of !current predecessors order !is_visited in
+            match s with
+            | None -> quit_loop := true (* if (s==0) exit loop *)
+            | Some pred -> begin
+                trace := pred :: !trace;
+                is_visited := PTree.set pred true !is_visited; (* mark s visited *)
+                current := pred
+                end
+          done;
+          traces := !trace :: !traces;
+        end
+      end
+    done;
+    Printf.printf "DFS: \t"; print_intlist order; Printf.printf "\n";
+    !traces
+  end
+
+let print_trace t = print_intlist t
+
+let print_traces traces =
+  let rec f = function
+  | [] -> ()
+  | t::lt -> Printf.printf "\n\t"; print_trace t; Printf.printf ",\n"; f lt
+  in begin
+    Printf.printf "Traces: {";
+    f traces;
+    Printf.printf "}\n";
+  end
 
 let rec make_identity_ptree_rec = function
 | [] -> PTree.empty
 | m::lm -> let (n, _) = m in PTree.set n n (make_identity_ptree_rec lm)
 
-let make_identity_ptree f = make_identity_ptree_rec (PTree.elements f.fn_code)
+let make_identity_ptree code = make_identity_ptree_rec (PTree.elements code)
+
+(* Change the pointers of preds nodes to point to n' instead of n *)
+let rec change_pointers code n n' = function
+  | [] -> code
+  | pred :: preds ->
+      let new_pred_inst = match ptree_get_some pred code with
+        | Icall(a, b, c, d, n0) -> assert (n0 == n); Icall(a, b, c, d, n')
+        | Ibuiltin(a, b, c, n0) -> assert (n0 == n); Ibuiltin(a, b, c, n')
+        | Ijumptable(a, ln) -> assert (optbool @@ List.find_opt (fun e -> e == n) ln);
+                               Ijumptable(a, List.map (fun e -> if (e == n) then n' else e) ln)
+        | Icond(a, b, n1, n2) -> assert (n1 == n || n2 == n);
+                                 let n1' = if (n1 == n) then n' else n1
+                                 in let n2' = if (n2 == n) then n' else n2
+                                 in Icond(a, b, n1', n2')
+        | Inop n0 -> assert (n0 == n); Inop n'
+        | Iop (a, b, c, n0) -> assert (n0 == n); Iop (a, b, c, n')
+        | Iload (a, b, c, d, e, n0) -> assert (n0 == n); Iload (a, b, c, d, e, n')
+        | Istore (a, b, c, d, n0) -> assert (n0 == n); Istore (a, b, c, d, n')
+        | Itailcall _ | Ireturn _ -> failwith "That instruction cannot be a predecessor"
+      in let new_code = PTree.set pred new_pred_inst code
+      in change_pointers new_code n n' preds
+
+(* parent: parent of n to keep as parent
+ * preds: all the other parents of n
+ * n': the integer which should contain the duplicate of n
+ * returns: new code, new ptree *)
+let duplicate code ptree parent n preds n' =
+  Printf.printf "Duplicating node %d into %d..\n" (P.to_int n) (P.to_int n');
+  match PTree.get n' code with
+  | Some _ -> failwith "The PTree already has a node n'"
+  | None ->
+      let c' = change_pointers code n n' preds
+      in let new_code = PTree.set n' (ptree_get_some n code) c'
+      and new_ptree = PTree.set n' n ptree
+      in (new_code, new_ptree)
+
+let rec maxint = function
+  | [] -> 0
+  | i :: l -> assert (i >= 0); let m = maxint l in if i > m then i else m
+
+let is_empty = function
+  | [] -> true
+  | _ -> false
+
+(* code: RTL code
+ * preds: mapping node -> predecessors
+ * ptree: the revmap
+ * trace: the trace to follow tail duplication on *)
+let tail_duplicate code preds ptree trace =
+  (* next_int: unused integer that can be used for the next duplication *)
+  let next_int = ref (maxint (List.map (fun e -> let (n, _) = e in P.to_int n) (PTree.elements code)) + 1)
+  (* last_node and last_duplicate store resp. the last processed node of the trace, and its duplication *)
+  in let last_node = ref None
+  in let last_duplicate = ref None
+  in let nb_duplicated = ref 0
+  (* recursive function on a trace *)
+  in let rec f code ptree is_first = function
+    | [] -> (code, ptree)
+    | n :: t ->
+        let (new_code, new_ptree) =
+          if is_first then (code, ptree) (* first node is never duplicated regardless of its inputs *)
+          else
+            let node_preds = ptree_get_some n preds
+            in let node_preds_nolast = List.filter (fun e -> e <> get_some !last_node) node_preds
+            in let final_node_preds = match !last_duplicate with
+              | None -> node_preds_nolast
+              | Some n' -> n' :: node_preds_nolast
+            in if not (is_empty final_node_preds) then
+              let n' = !next_int
+              in let (newc, newp) = duplicate code ptree !last_node n final_node_preds (P.of_int n')
+              in begin
+                next_int := !next_int + 1;
+                nb_duplicated := !nb_duplicated + 1;
+                last_duplicate := Some (P.of_int n');
+                (newc, newp)
+              end
+            else (code, ptree)
+        in begin
+          last_node := Some n;
+          f new_code new_ptree false t
+        end
+  in let new_code, new_ptree = f code ptree true trace
+  in (new_code, new_ptree, !nb_duplicated)
+
+let superblockify_traces code preds traces =
+  let max_nb_duplicated = 2 (* FIXME - should be architecture dependent *)
+  in let ptree = make_identity_ptree code
+  in let rec f code ptree = function
+    | [] -> (code, ptree, 0)
+    | trace :: traces ->
+        let new_code, new_ptree, nb_duplicated = tail_duplicate code preds ptree trace
+        in if (nb_duplicated < max_nb_duplicated) then f new_code new_ptree traces
+           else (Printf.printf "Too many duplicated nodes, aborting tail duplication\n"; (code, ptree, 0))
+  in let new_code, new_ptree, _ = f code ptree traces
+  in (new_code, new_ptree)
+
+let rec invert_iconds_trace code = function
+  | [] -> code
+  | n::[] -> code
+  | n :: n' :: t ->
+      let code' = match ptree_get_some n code with
+        | Icond (c, lr, ifso, ifnot) ->
+            assert (n' == ifso || n' == ifnot);
+            if (n' == ifso) then (
+              Printf.printf "Reversing ifso/ifnot for node %d\n" (P.to_int n);
+              PTree.set n (Icond (Op.negate_condition c, lr, ifnot, ifso)) code )
+            else code
+        | _ -> code
+      in invert_iconds_trace code' (n'::t)
+
+let rec invert_iconds code = function
+  | [] -> code
+  | t :: ts ->
+      let code' = if !Clflags.option_finvertcond then invert_iconds_trace code t
+                  else code
+      in invert_iconds code' ts
 
 (* For now, identity function *)
 let duplicate_aux f =
-  let pTreeId = make_identity_ptree f
-  in ((f.fn_code, f.fn_entrypoint), pTreeId)
+  let entrypoint = fn_entrypoint f in
+  let code = fn_code f in
+  let traces = select_traces (to_ttl_code code entrypoint) entrypoint in
+  let icond_code = invert_iconds code traces in
+  let preds = get_predecessors_rtl icond_code in
+  let (new_code, pTreeId) = (print_traces traces; superblockify_traces icond_code preds traces) in
+  ((new_code, (fn_entrypoint f)), pTreeId)
diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v
index ebb17774..a8e9b16b 100644
--- a/backend/Duplicateproof.v
+++ b/backend/Duplicateproof.v
@@ -26,6 +26,9 @@ Inductive match_inst (dupmap: PTree.t node): instruction -> instruction -> Prop
   | match_inst_cond: forall ifso ifso' ifnot ifnot' c lr,
       dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) ->
       match_inst dupmap (Icond c lr ifso ifnot) (Icond c lr ifso' ifnot')
+  | match_inst_revcond: forall ifso ifso' ifnot ifnot' c lr,
+      dupmap!ifso' = (Some ifso) -> dupmap!ifnot' = (Some ifnot) ->
+      match_inst dupmap (Icond c lr ifso ifnot) (Icond (negate_condition c) lr ifnot' ifso')
   | match_inst_jumptable: forall ln ln' r,
       list_forall2 (fun n n' => (dupmap!n' = (Some n))) ln ln' ->
       match_inst dupmap (Ijumptable r ln) (Ijumptable r ln')
@@ -173,12 +176,16 @@ Proof.
     destruct (builtin_res_eq_pos _ _); try discriminate. subst.
     constructor. assumption.
 (* Icond *)
-  - destruct i'; try (inversion H; fail). monadInv H.
-    destruct x. eapply verify_is_copy_correct in EQ.
-    destruct x0. eapply verify_is_copy_correct in EQ1.
-    destruct (eq_condition _ _); try discriminate.
+  - destruct i'; try (inversion H; fail).
     destruct (list_eq_dec _ _ _); try discriminate. subst.
-    constructor; assumption.
+    destruct (eq_condition _ _); try discriminate.
+    + monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
+      destruct x0. eapply verify_is_copy_correct in EQ1.
+      constructor; assumption.
+    + destruct (eq_condition _ _); try discriminate.
+      monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
+      destruct x0. eapply verify_is_copy_correct in EQ1.
+      constructor; assumption.
 (* Ijumptable *)
   - destruct i'; try (inversion H; fail). monadInv H.
     destruct x. eapply verify_is_copy_list_correct in EQ.
@@ -463,10 +470,16 @@ Proof.
 (* Icond *)
   - eapply dupmap_correct in DUPLIC; eauto.
     destruct DUPLIC as (i' & H2 & H3). inv H3.
-    pose symbols_preserved as SYMPRES.
-    eexists. split.
-    + eapply exec_Icond; eauto.
-    + econstructor; eauto. destruct b; auto.
+    * (* match_inst_cond *)
+      pose symbols_preserved as SYMPRES.
+      eexists. split.
+      + eapply exec_Icond; eauto.
+      + econstructor; eauto. destruct b; auto.
+    * (* match_inst_revcond *)
+      pose symbols_preserved as SYMPRES.
+      eexists. split.
+      + eapply exec_Icond; eauto. rewrite eval_negate_condition. rewrite H0. simpl. eauto.
+      + econstructor; eauto. destruct b; auto.
 (* Ijumptable *)
   - eapply dupmap_correct in DUPLIC; eauto.
     destruct DUPLIC as (i' & H2 & H3). inv H3.
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index ba0246df..e373b2e1 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -28,6 +28,8 @@ let option_fconstprop = ref true
 let option_fcse = ref true
 let option_fcse2 = ref true
 let option_fredundancy = ref true
+let option_fduplicate = ref false
+let option_finvertcond = ref true (* only active if option_fduplicate is also true *)
 let option_fpostpass = ref true
 let option_fpostpass_sched = ref "list"
 let option_fifconversion = ref true
diff --git a/driver/Compiler.v b/driver/Compiler.v
index 24c3518b..499feff2 100644
--- a/driver/Compiler.v
+++ b/driver/Compiler.v
@@ -134,7 +134,7 @@ Definition transf_rtl_program (f: RTL.program) : res Asm.program :=
    @@ print (print_RTL 2)
    @@ time "Renumbering" Renumber.transf_program
    @@ print (print_RTL 3)
-  @@@ time "Duplicating" Duplicate.transf_program
+  @@@ partial_if Compopts.optim_duplicate (time "Tail-duplicating" Duplicate.transf_program)
    @@ print (print_RTL 4)
    @@ total_if Compopts.optim_constprop (time "Constant propagation" Constprop.transf_program)
    @@ print (print_RTL 5)
@@ -254,7 +254,7 @@ Definition CompCert's_passes :=
   ::: mkpass (match_if Compopts.optim_tailcalls Tailcallproof.match_prog)
   ::: mkpass Inliningproof.match_prog
   ::: mkpass Renumberproof.match_prog
-  ::: mkpass Duplicateproof.match_prog
+  ::: mkpass (match_if Compopts.optim_duplicate Duplicateproof.match_prog)
   ::: mkpass (match_if Compopts.optim_constprop Constpropproof.match_prog)
   ::: mkpass (match_if Compopts.optim_constprop Renumberproof.match_prog)
   ::: mkpass (match_if Compopts.optim_CSE CSEproof.match_prog)
@@ -301,7 +301,7 @@ Proof.
   set (p7 := total_if optim_tailcalls Tailcall.transf_program p6) in *.
   destruct (Inlining.transf_program p7) as [p8|e] eqn:P8; simpl in T; try discriminate.
   set (p9 := Renumber.transf_program p8) in *.
-  destruct (Duplicate.transf_program p9) as [p10|e] eqn:P10; simpl in T; try discriminate.
+  destruct (partial_if optim_duplicate Duplicate.transf_program p9) as [p10|e] eqn:P10; simpl in T; try discriminate.
   set (p11 := total_if optim_constprop Constprop.transf_program p10) in *.
   set (p12 := total_if optim_constprop Renumber.transf_program p11) in *.
   destruct (partial_if optim_CSE CSE.transf_program p12) as [p13|e] eqn:P13; simpl in T; try discriminate.
@@ -326,7 +326,7 @@ Proof.
   exists p7; split. apply total_if_match. apply Tailcallproof.transf_program_match.
   exists p8; split. apply Inliningproof.transf_program_match; auto.
   exists p9; split. apply Renumberproof.transf_program_match; auto.
-  exists p10; split. apply Duplicateproof.transf_program_match; auto.
+  exists p10; split. eapply partial_if_match; eauto. apply Duplicateproof.transf_program_match; auto.
   exists p11; split. apply total_if_match. apply Constpropproof.transf_program_match.
   exists p12; split. apply total_if_match. apply Renumberproof.transf_program_match.
   exists p13; split. eapply partial_if_match; eauto. apply CSEproof.transf_program_match.
@@ -411,7 +411,8 @@ Ltac DestructM :=
   eapply compose_forward_simulations.
     eapply Inliningproof.transf_program_correct; eassumption.
   eapply compose_forward_simulations. eapply Renumberproof.transf_program_correct; eassumption.
-  eapply compose_forward_simulations. eapply Duplicateproof.transf_program_correct; eassumption.
+  eapply compose_forward_simulations.
+    eapply match_if_simulation. eassumption. exact Duplicateproof.transf_program_correct.
   eapply compose_forward_simulations.
     eapply match_if_simulation. eassumption. exact Constpropproof.transf_program_correct.
   eapply compose_forward_simulations.
diff --git a/driver/Compopts.v b/driver/Compopts.v
index 4d6a096c..b4b9f30d 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -27,6 +27,10 @@ Parameter generate_float_constants: unit -> bool.
 (** For value analysis.  Currently always false. *)
 Parameter va_strict: unit -> bool.
 
+(** Flag -fduplicate. For tail duplication optimization. Necessary to have
+  * bigger superblocks *)
+Parameter optim_duplicate: unit -> bool.
+
 (** Flag -ftailcalls.  For tail call optimization. *)
 Parameter optim_tailcalls: unit -> bool.
 
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 3c97e17f..ffe1fc4b 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -200,6 +200,9 @@ Processing options:
   -fpostpass     Perform postpass scheduling (only for K1 architecture) [on]
   -fpostpass= <optim> Perform postpass scheduling with the specified optimization [list]
                    (<optim>=list: list scheduling, <optim>=ilp: ILP, <optim>=greedy: just packing bundles)
+  -fduplicate    Perform tail duplication to form superblocks on predicted traces
+    -finvertcond    Invert conditions based on predicted paths (to prefer fallthrough).
+                    Requires -fduplicate to be also activated [on]
   -fforward-moves   Forward moves after CSE
   -finline       Perform inlining of functions [on]
   -finline-functions-called-once Integrate functions only required by their
@@ -313,7 +316,7 @@ let cmdline_actions =
  [
   Exact "-O0", Unit (unset_all optimization_options);
   Exact "-O", Unit (set_all optimization_options);
-  _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false);
+  _Regexp "-O1", Self (fun _ -> set_all optimization_options (); option_fpostpass := false; option_fduplicate := false);
   _Regexp "-O[123]$", Unit (set_all optimization_options);
   Exact "-Os", Set option_Osize;
   Exact "-Obranchless", Set option_Obranchless;
@@ -388,6 +391,8 @@ let cmdline_actions =
   @ f_opt "cse2" option_fcse2
   @ f_opt "redundancy" option_fredundancy
   @ f_opt "postpass" option_fpostpass
+  @ f_opt "duplicate" option_fduplicate
+  @ f_opt "invertcond" option_finvertcond
   @ f_opt_str "postpass" option_fpostpass option_fpostpass_sched
   @ f_opt "inline" option_finline
   @ f_opt "inline-functions-called-once" option_finline_functions_called_once
diff --git a/extraction/extraction.v b/extraction/extraction.v
index ba6b080b..929c21e0 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -105,6 +105,8 @@ Extract Constant Compopts.generate_float_constants =>
   "fun _ -> !Clflags.option_ffloatconstprop >= 2".
 Extract Constant Compopts.optim_tailcalls =>
   "fun _ -> !Clflags.option_ftailcalls".
+Extract Constant Compopts.optim_duplicate =>
+  "fun _ -> !Clflags.option_fduplicate".
 Extract Constant Compopts.optim_constprop =>
   "fun _ -> !Clflags.option_fconstprop".
 Extract Constant Compopts.optim_CSE =>
diff --git a/mppa_k1c/DuplicateOpcodeHeuristic.ml b/mppa_k1c/DuplicateOpcodeHeuristic.ml
new file mode 100644
index 00000000..690553ce
--- /dev/null
+++ b/mppa_k1c/DuplicateOpcodeHeuristic.ml
@@ -0,0 +1,32 @@
+(* open Camlcoq *)
+open Op
+open Integers
+
+exception HeuristicSucceeded
+
+let opcode_heuristic code cond ifso ifnot preferred =
+  let decision = match cond with
+  | Ccompimm (c, n) | Ccompuimm (c, n) -> if n == Integers.Int.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccomplimm (c, n) | Ccompluimm (c, n) -> if n == Integers.Int64.zero then (match c with
+      | Clt | Cle -> Some false
+      | Cgt | Cge -> Some true
+      | _ -> None
+      ) else None
+  | Ccompf c | Ccompfs c -> (match c with
+      | Ceq -> Some false
+      | Cne -> Some true
+      | _ -> None
+      )
+  | Cnotcompf c | Cnotcompfs c -> (match c with
+      | Ceq -> Some true
+      | Cne -> Some false
+      | _ -> None
+      )
+  | _ -> None
+  in match decision with
+  | Some b -> (preferred := b; raise HeuristicSucceeded)
+  | None -> ()
diff --git a/test/mppa/check.sh b/test/mppa/check.sh
deleted file mode 100755
index f25c3e31..00000000
--- a/test/mppa/check.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-# Tests the execution of the binaries produced by CompCert
-
-source do_test.sh
-
-do_test check $1
diff --git a/test/mppa/hardcheck.sh b/test/mppa/hardcheck.sh
new file mode 100755
index 00000000..82b63182
--- /dev/null
+++ b/test/mppa/hardcheck.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Tests the execution of the binaries produced by CompCert, in hardware
+
+source do_test.sh
+
+do_test hardcheck
diff --git a/test/mppa/hardtest.sh b/test/mppa/hardtest.sh
new file mode 100755
index 00000000..09511da6
--- /dev/null
+++ b/test/mppa/hardtest.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Tests the validity of the tests, in hardware
+
+source do_test.sh
+
+do_test hardtest
diff --git a/test/mppa/instr/Makefile b/test/mppa/instr/Makefile
index 69446796..37f7d0ab 100644
--- a/test/mppa/instr/Makefile
+++ b/test/mppa/instr/Makefile
@@ -5,10 +5,11 @@ CC ?= gcc
 CCOMP ?= ccomp
 OPTIM ?= -O2
 CFLAGS ?= $(OPTIM)
-CCOMPFLAGS ?= $(CFLAGS) -faddx
+CCOMPFLAGS ?= $(CFLAGS)
 SIMU ?= k1-mppa
 TIMEOUT ?= --signal=SIGTERM 120s
 DIFF ?= python2.7 floatcmp.py -reltol .00001
+HARDRUN ?= k1-jtag-runner
 
 DIR=./
 SRCDIR=$(DIR)
@@ -30,10 +31,11 @@ SIMUPATH=$(shell which $(SIMU))
 
 TESTNAMES?=$(notdir $(subst .c,,$(wildcard $(DIR)/*.c)))
 X86_GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .x86-gcc.out,$(TESTNAMES)))
-GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.out,$(TESTNAMES)))
-CCOMP_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.out,$(TESTNAMES)))
+GCC_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.simu.out,$(TESTNAMES)))
+CCOMP_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.simu.out,$(TESTNAMES)))
+GCC_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.hard.out,$(TESTNAMES)))
+CCOMP_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.hard.out,$(TESTNAMES)))
 
-OUT=$(X86_GCC_OUT) $(GCC_OUT) $(CCOMP_OUT)
 BIN=$(addprefix $(BINDIR)/,$(addsuffix .x86-gcc.bin,$(TESTNAMES)))\
 	$(addprefix $(BINDIR)/,$(addsuffix .gcc.bin,$(TESTNAMES)))\
 	$(addprefix $(BINDIR)/,$(addsuffix .ccomp.bin,$(TESTNAMES)))
@@ -49,12 +51,18 @@ RED=\033[0;31m
 YELLOW=\033[0;33m
 NC=\033[0m
 
+.PHONY:
+test: simutest
+
+.PHONY:
+check: simucheck
+
 .PHONY: 
-test: $(X86_GCC_OUT) $(GCC_OUT)
+simutest: $(X86_GCC_OUT) $(GCC_SIMUOUT)
 	@echo "Comparing x86 gcc output to k1 gcc.."
 	for test in $(TESTNAMES); do\
 		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
-		gccout=$(OUTDIR)/$$test.gcc.out;\
+		gccout=$(OUTDIR)/$$test.gcc.simu.out;\
 		if grep "__K1C__" -q $$test.c; then\
 			printf "$(YELLOW)UNTESTED: $$test.c contains an \`#ifdef __K1C__\`\n";\
 		elif $(DIFF) $$x86out $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
@@ -65,11 +73,39 @@ test: $(X86_GCC_OUT) $(GCC_OUT)
 	done
 
 .PHONY:
-check: $(GCC_OUT) $(CCOMP_OUT)
+simucheck: $(GCC_SIMUOUT) $(CCOMP_SIMUOUT)
 	@echo "Comparing k1 gcc output to ccomp.."
 	@for test in $(TESTNAMES); do\
-		gccout=$(OUTDIR)/$$test.gcc.out;\
-		ccompout=$(OUTDIR)/$$test.ccomp.out;\
+		gccout=$(OUTDIR)/$$test.gcc.simu.out;\
+		ccompout=$(OUTDIR)/$$test.ccomp.simu.out;\
+		if $(DIFF) $$ccompout $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
+			>&2 printf "$(RED)ERROR: $$ccompout and $$gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$ccompout and $$gccout concur$(NC)\n";\
+		fi;\
+	done
+
+.PHONY: 
+hardtest: $(X86_GCC_OUT) $(GCC_HARDOUT)
+	@echo "Comparing x86 gcc output to k1 gcc.."
+	for test in $(TESTNAMES); do\
+		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
+		gccout=$(OUTDIR)/$$test.gcc.hard.out;\
+		if grep "__K1C__" -q $$test.c; then\
+			printf "$(YELLOW)UNTESTED: $$test.c contains an \`#ifdef __K1C__\`\n";\
+		elif $(DIFF) $$x86out $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
+			>&2 printf "$(RED)ERROR: $$x86out and $$gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$x86out and $$gccout concur$(NC)\n";\
+		fi;\
+	done
+
+.PHONY:
+hardcheck: $(GCC_HARDOUT) $(CCOMP_HARDOUT)
+	@echo "Comparing k1 gcc output to ccomp.."
+	@for test in $(TESTNAMES); do\
+		gccout=$(OUTDIR)/$$test.gcc.hard.out;\
+		ccompout=$(OUTDIR)/$$test.ccomp.hard.out;\
 		if $(DIFF) $$ccompout $$gccout > /dev/null; test $${PIPESTATUS[0]} -ne 0; then\
 			>&2 printf "$(RED)ERROR: $$ccompout and $$gccout differ$(NC)\n";\
 		else\
@@ -95,14 +131,22 @@ $(OUTDIR)/%.x86-gcc.out: $(BINDIR)/%.x86-gcc.bin
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) ./$< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.gcc.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
+$(OUTDIR)/%.gcc.simu.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.ccomp.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
+$(OUTDIR)/%.ccomp.simu.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
+$(OUTDIR)/%.gcc.hard.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
+$(OUTDIR)/%.ccomp.hard.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
 # Assembly to binary
 
 $(BINDIR)/%.x86-gcc.bin: $(ASMDIR)/%.x86-gcc.s $(LIB) $(CCPATH)
diff --git a/test/mppa/interop/Makefile b/test/mppa/interop/Makefile
index e615e89a..3a83d51c 100644
--- a/test/mppa/interop/Makefile
+++ b/test/mppa/interop/Makefile
@@ -6,6 +6,7 @@ CCOMP ?= ccomp
 CFLAGS ?= -O2 -Wno-varargs
 SIMU ?= k1-mppa
 TIMEOUT ?= --signal=SIGTERM 120s
+HARDRUN ?= k1-jtag-runner
 
 DIR=./
 SRCDIR=$(DIR)
@@ -33,17 +34,23 @@ SIMUPATH=$(shell which $(SIMU))
 TESTNAMES ?= $(filter-out $(VAARG_COMMON),$(filter-out $(COMMON),$(notdir $(subst .c,,$(wildcard $(DIR)/*.c)))))
 
 X86_GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .x86-gcc.out,$(TESTNAMES)))
-GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.out,$(TESTNAMES)))
-GCC_REV_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.out,$(TESTNAMES)))
-CCOMP_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.out,$(TESTNAMES)))
+GCC_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.simu.out,$(TESTNAMES)))
+GCC_REV_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.simu.out,$(TESTNAMES)))
+CCOMP_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.simu.out,$(TESTNAMES)))
+
+GCC_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.hard.out,$(TESTNAMES)))
+GCC_REV_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.hard.out,$(TESTNAMES)))
+CCOMP_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.hard.out,$(TESTNAMES)))
 
 VAARG_X86_GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .x86-gcc.vaarg.out,$(TESTNAMES)))
-VAARG_GCC_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.vaarg.out,$(TESTNAMES)))
-VAARG_GCC_REV_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.vaarg.out,$(TESTNAMES)))
-VAARG_CCOMP_OUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.vaarg.out,$(TESTNAMES)))
+VAARG_GCC_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.vaarg.simu.out,$(TESTNAMES)))
+VAARG_GCC_REV_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.vaarg.simu.out,$(TESTNAMES)))
+VAARG_CCOMP_SIMUOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.vaarg.simu.out,$(TESTNAMES)))
+
+VAARG_GCC_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.vaarg.hard.out,$(TESTNAMES)))
+VAARG_GCC_REV_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .gcc.rev.vaarg.hard.out,$(TESTNAMES)))
+VAARG_CCOMP_HARDOUT=$(addprefix $(OUTDIR)/,$(addsuffix .ccomp.vaarg.hard.out,$(TESTNAMES)))
 
-OUT=$(X86_GCC_OUT) $(GCC_OUT) $(CCOMP_OUT) $(GCC_REV_OUT)\
-		$(VAARG_GCC_OUT) $(VAARG_GCC_OUT) $(VAARG_CCOMP_OUT) $(VAARG_GCC_REV_OUT)
 BIN=$(addprefix $(BINDIR)/,$(addsuffix .x86-gcc.bin,$(TESTNAMES)))\
 	$(addprefix $(BINDIR)/,$(addsuffix .gcc.bin,$(TESTNAMES)))\
 	$(addprefix $(BINDIR)/,$(addsuffix .ccomp.bin,$(TESTNAMES)))\
@@ -63,14 +70,72 @@ GREEN=\033[0;32m
 RED=\033[0;31m
 NC=\033[0m
 
+.PHONY:
+test: simutest
+
+.PHONY:
+simutest: $(X86_GCC_OUT) $(GCC_SIMUOUT) $(VAARG_X86_GCC_OUT) $(VAARG_GCC_SIMUOUT)
+	@echo "Comparing x86 gcc output to k1 gcc.."
+	@for test in $(TESTNAMES); do\
+		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
+		gccout=$(OUTDIR)/$$test.gcc.simu.out;\
+		vaarg_x86out=$(OUTDIR)/$$test.x86-gcc.vaarg.out;\
+		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.simu.out;\
+		if ! diff $$x86out $$gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$x86out and $$gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$x86out and $$gccout concur$(NC)\n";\
+		fi;\
+		if ! diff $$vaarg_x86out $$vaarg_gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$vaarg_x86out and $$vaarg_gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$vaarg_x86out and $$vaarg_gccout concur$(NC)\n";\
+		fi;\
+	done
+
+.PHONY:
+check: simucheck
+
+.PHONY:
+simucheck: $(GCC_SIMUOUT) $(CCOMP_SIMUOUT) $(GCC_REV_SIMUOUT) $(VAARG_GCC_SIMUOUT) $(VAARG_CCOMP_SIMUOUT) $(VAARG_GCC_REV_SIMUOUT)
+	@echo "Comparing k1 gcc output to ccomp.."
+	@for test in $(TESTNAMES); do\
+		gccout=$(OUTDIR)/$$test.gcc.simu.out;\
+		ccompout=$(OUTDIR)/$$test.ccomp.simu.out;\
+		gccrevout=$(OUTDIR)/$$test.gcc.rev.simu.out;\
+		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.simu.out;\
+		vaarg_ccompout=$(OUTDIR)/$$test.ccomp.vaarg.simu.out;\
+		vaarg_gccrevout=$(OUTDIR)/$$test.gcc.rev.vaarg.simu.out;\
+		if ! diff $$ccompout $$gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$ccompout and $$gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$ccompout and $$gccout concur$(NC)\n";\
+		fi;\
+		if ! diff $$gccrevout $$gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$gccrevout and $$gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$gccrevout and $$gccout concur$(NC)\n";\
+		fi;\
+		if ! diff $$vaarg_ccompout $$vaarg_gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$vaarg_ccompout and $$vaarg_gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$vaarg_ccompout and $$vaarg_gccout concur$(NC)\n";\
+		fi;\
+		if ! diff $$vaarg_gccrevout $$vaarg_gccout > /dev/null; then\
+			>&2 printf "$(RED)ERROR: $$vaarg_gccrevout and $$vaarg_gccout differ$(NC)\n";\
+		else\
+			printf "$(GREEN)GOOD: $$vaarg_gccrevout and $$vaarg_gccout concur$(NC)\n";\
+		fi;\
+	done
+
 .PHONY: 
-test: $(X86_GCC_OUT) $(GCC_OUT) $(VAARG_X86_GCC_OUT) $(VAARG_GCC_OUT)
+hardtest: $(X86_GCC_OUT) $(GCC_HARDOUT) $(VAARG_X86_GCC_OUT) $(VAARG_GCC_HARDOUT)
 	@echo "Comparing x86 gcc output to k1 gcc.."
 	@for test in $(TESTNAMES); do\
 		x86out=$(OUTDIR)/$$test.x86-gcc.out;\
-		gccout=$(OUTDIR)/$$test.gcc.out;\
+		gccout=$(OUTDIR)/$$test.gcc.hard.out;\
 		vaarg_x86out=$(OUTDIR)/$$test.x86-gcc.vaarg.out;\
-		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.out;\
+		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.hard.out;\
 		if ! diff $$x86out $$gccout > /dev/null; then\
 			>&2 printf "$(RED)ERROR: $$x86out and $$gccout differ$(NC)\n";\
 		else\
@@ -84,15 +149,15 @@ test: $(X86_GCC_OUT) $(GCC_OUT) $(VAARG_X86_GCC_OUT) $(VAARG_GCC_OUT)
 	done
 
 .PHONY:
-check: $(GCC_OUT) $(CCOMP_OUT) $(GCC_REV_OUT) $(VAARG_GCC_OUT) $(VAARG_CCOMP_OUT) $(VAARG_GCC_REV_OUT)
+hardcheck: $(GCC_HARDOUT) $(CCOMP_HARDOUT) $(GCC_REV_HARDOUT) $(VAARG_GCC_HARDOUT) $(VAARG_CCOMP_HARDOUT) $(VAARG_GCC_REV_HARDOUT)
 	@echo "Comparing k1 gcc output to ccomp.."
 	@for test in $(TESTNAMES); do\
-		gccout=$(OUTDIR)/$$test.gcc.out;\
-		ccompout=$(OUTDIR)/$$test.ccomp.out;\
-		gccrevout=$(OUTDIR)/$$test.gcc.rev.out;\
-		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.out;\
-		vaarg_ccompout=$(OUTDIR)/$$test.ccomp.vaarg.out;\
-		vaarg_gccrevout=$(OUTDIR)/$$test.gcc.rev.vaarg.out;\
+		gccout=$(OUTDIR)/$$test.gcc.hard.out;\
+		ccompout=$(OUTDIR)/$$test.ccomp.hard.out;\
+		gccrevout=$(OUTDIR)/$$test.gcc.rev.hard.out;\
+		vaarg_gccout=$(OUTDIR)/$$test.gcc.vaarg.hard.out;\
+		vaarg_ccompout=$(OUTDIR)/$$test.ccomp.vaarg.hard.out;\
+		vaarg_gccrevout=$(OUTDIR)/$$test.gcc.rev.vaarg.hard.out;\
 		if ! diff $$ccompout $$gccout > /dev/null; then\
 			>&2 printf "$(RED)ERROR: $$ccompout and $$gccout differ$(NC)\n";\
 		else\
@@ -144,36 +209,60 @@ $(OUTDIR)/%.x86-gcc.out: $(BINDIR)/%.x86-gcc.bin
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) ./$< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.gcc.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
+$(OUTDIR)/%.gcc.simu.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.gcc.rev.out: $(BINDIR)/%.gcc.rev.bin $(SIMUPATH)
+$(OUTDIR)/%.gcc.rev.simu.out: $(BINDIR)/%.gcc.rev.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.ccomp.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
+$(OUTDIR)/%.ccomp.simu.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
+$(OUTDIR)/%.gcc.hard.out: $(BINDIR)/%.gcc.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
+$(OUTDIR)/%.gcc.rev.hard.out: $(BINDIR)/%.gcc.rev.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
+$(OUTDIR)/%.ccomp.hard.out: $(BINDIR)/%.ccomp.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
 ## With vaarg
 
 $(OUTDIR)/%.x86-gcc.vaarg.out: $(BINDIR)/%.x86-gcc.vaarg.bin
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) ./$< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.gcc.vaarg.out: $(BINDIR)/%.gcc.vaarg.bin $(SIMUPATH)
+$(OUTDIR)/%.gcc.vaarg.simu.out: $(BINDIR)/%.gcc.vaarg.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.gcc.rev.vaarg.out: $(BINDIR)/%.gcc.rev.vaarg.bin $(SIMUPATH)
+$(OUTDIR)/%.gcc.rev.vaarg.simu.out: $(BINDIR)/%.gcc.rev.vaarg.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
-$(OUTDIR)/%.ccomp.vaarg.out: $(BINDIR)/%.ccomp.vaarg.bin $(SIMUPATH)
+$(OUTDIR)/%.ccomp.vaarg.simu.out: $(BINDIR)/%.ccomp.vaarg.bin $(SIMUPATH)
 	@mkdir -p $(@D)
 	ret=0; timeout $(TIMEOUT) $(SIMU) -- $< > $@ || { ret=$$?; }; echo $$ret >> $@
 
+$(OUTDIR)/%.gcc.vaarg.hard.out: $(BINDIR)/%.gcc.vaarg.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
+$(OUTDIR)/%.gcc.rev.vaarg.hard.out: $(BINDIR)/%.gcc.rev.vaarg.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
+$(OUTDIR)/%.ccomp.vaarg.hard.out: $(BINDIR)/%.ccomp.vaarg.bin $(SIMUPATH)
+	@mkdir -p $(@D)
+	ret=0; timeout $(TIMEOUT) $(HARDRUN) --exec-file=Cluster0:$< > $@ || { ret=$$?; }; echo $$ret >> $@
+
 ##
 # Object to binary
 ##
diff --git a/test/mppa/simucheck.sh b/test/mppa/simucheck.sh
new file mode 100755
index 00000000..25fb9947
--- /dev/null
+++ b/test/mppa/simucheck.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Tests the execution of the binaries produced by CompCert, by simulation
+
+source do_test.sh
+
+do_test check $1
diff --git a/test/mppa/test.sh b/test/mppa/simutest.sh
index 30806a6b..3b1021e6 100755
--- a/test/mppa/test.sh
+++ b/test/mppa/simutest.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Tests the validity of the tests
+# Tests the validity of the tests, in simulator
 
 source do_test.sh
author	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-02-06 11:00:51 +0100
committer	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-02-06 11:00:51 +0100
commit	90e7548f0dae5be1c896fcb964a21fe70b514e0d (patch)
tree	9f3886dc181105993d9da8a2832660c5b123c6ce
parent	1cd3c4c9b7372ca8de128a9ce60ed00210fd0e28 (diff)
parent	7dca7590aa212806ee939244b253a6a067f34bfc (diff)
download	compcert-kvx-90e7548f0dae5be1c896fcb964a21fe70b514e0d.tar.gz compcert-kvx-90e7548f0dae5be1c896fcb964a21fe70b514e0d.zip