6 files changed, 227 insertions, 228 deletions
diff --git a/backend/CSE3.v b/backend/CSE3.v
index 3a680cf7..746ba399 100644
--- a/backend/CSE3.v
+++ b/backend/CSE3.v
@@ -79,7 +79,8 @@ Definition transf_instr (fmap : PMap.t RB.t)
   match instr with
   | Iop op args dst s =>
     let args' := subst_args fmap pc args in
-    match (if is_trivial_op op then None else find_op_in_fmap fmap pc op args') with
+    match (if (negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)
+           then None else find_op_in_fmap fmap pc op args') with
     | None => Iop op args' dst s
     | Some src => Iop Omove (src::nil) dst s
     end
diff --git a/backend/CSE3proof.v b/backend/CSE3proof.v
index ca43d0bd..0722f904 100644
--- a/backend/CSE3proof.v
+++ b/backend/CSE3proof.v
@@ -504,12 +504,12 @@ Proof.
       destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
       pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SOp op)
                 (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
-      * destruct (if is_trivial_op op
+      * destruct (if (negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)
                then None
                else
                 rhs_find pc (SOp op)
                   (subst_args (fst (preanalysis tenv f)) pc args) t) eqn:FIND.
-        ** destruct (is_trivial_op op). discriminate.
+        ** destruct ((negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)). discriminate.
            apply exec_Iop with (op := Omove) (args := r :: nil).
            TR_AT.
            subst instr'.
diff --git a/backend/Duplicate.v b/backend/Duplicate.v
index 7dea752b..3fd86728 100644
--- a/backend/Duplicate.v
+++ b/backend/Duplicate.v
@@ -86,7 +86,7 @@ Global Opaque builtin_res_eq_pos.
 
 Definition verify_match_inst dupmap inst tinst :=
   match inst with
-  | Inop n => match tinst with Inop n' => do u <- verify_is_copy dupmap n n'; OK tt | _ => Error(msg "verify_match_inst Inop") end
+  | Inop n => match tinst with Inop n' => verify_is_copy dupmap n n' | _ => Error(msg "verify_match_inst Inop") end
 
   | Iop op lr r n => match tinst with
       Iop op' lr' r' n' =>
@@ -167,10 +167,10 @@ Definition verify_match_inst dupmap inst tinst :=
           if (list_eq_dec Pos.eq_dec lr lr') then
             if (eq_condition cond cond') then
               do u1 <- verify_is_copy dupmap n1 n1';
-              do u2 <- verify_is_copy dupmap n2 n2'; OK tt
+              verify_is_copy dupmap n2 n2'
             else if (eq_condition (negate_condition cond) cond') then
               do u1 <- verify_is_copy dupmap n1 n2';
-              do u2 <- verify_is_copy dupmap n2 n1'; OK tt
+              verify_is_copy dupmap n2 n1'
             else Error (msg "Incompatible conditions in Icond")
           else Error (msg "Different lr in Icond")
       | _ => Error (msg "verify_match_inst Icond") end
@@ -203,8 +203,7 @@ Fixpoint verify_mapping_mn_rec dupmap f f' lm :=
   match lm with
   | nil => OK tt
   | m :: lm => do u <- verify_mapping_mn dupmap f f' m;
-               do u2 <- verify_mapping_mn_rec dupmap f f' lm;
-               OK tt
+               verify_mapping_mn_rec dupmap f f' lm
   end.
 
 Definition verify_mapping_match_nodes dupmap (f f': function): res unit :=
@@ -213,7 +212,7 @@ Definition verify_mapping_match_nodes dupmap (f f': function): res unit :=
 (** Verifies that the [dupmap] of the translated function [f'] is giving correct information in regards to [f] *)
 Definition verify_mapping dupmap (f f': function) : res unit :=
   do u <- verify_mapping_entrypoint dupmap f f';
-  do v <- verify_mapping_match_nodes dupmap f f'; OK tt.
+  verify_mapping_match_nodes dupmap f f'.
 
 (** * Entry points *)
 
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index c9985dc4..b3635527 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -22,9 +22,8 @@
 open RTL
 open Maps
 open Camlcoq
+open DebugPrint
 
-let debug_flag = LICMaux.debug_flag
-let debug = LICMaux.debug
 let get_loop_headers = LICMaux.get_loop_headers
 let get_some = LICMaux.get_some
 let rtl_successors = LICMaux.rtl_successors
@@ -87,8 +86,6 @@ end
 
 module PSet = Set.Make(PInt)
 
-let print_intlist = LICMaux.print_intlist
-
 let print_intset s =
   let seq = PSet.to_seq s
   in begin
@@ -101,18 +98,6 @@ let print_intset s =
     end
   end
 
-let ptree_printbool pt =
-  let elements = PTree.elements pt
-  in begin
-    if !debug_flag then begin
-      Printf.printf "[";
-      List.iter (fun (n, b) ->
-        if b then Printf.printf "%d, " (P.to_int n) else ()
-      ) elements;
-      Printf.printf "]"
-    end
-  end
-
 (* Looks ahead (until a branch) to see if a node further down verifies
  * the given predicate *)
 let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate =
@@ -185,7 +170,7 @@ let do_loop_heuristic code cond ifso ifnot is_loop_header =
     let predicate n = get_some @@ PTree.get n is_loop_header in
     let ifso_loop = look_ahead code ifso is_loop_header predicate in
     let ifnot_loop = look_ahead code ifnot is_loop_header predicate in
-    if ifso_loop && ifnot_loop then None (* TODO - take the innermost loop ? *)
+    if ifso_loop && ifnot_loop then (debug "\t\tLOOP but can't choose which\n"; None) (* TODO - take the innermost loop ? *)
     else if ifso_loop then Some true
     else if ifnot_loop then Some false
     else None
@@ -199,23 +184,155 @@ let do_loop2_heuristic loop_info n code cond ifso ifnot is_loop_header =
     | Some b -> Some b
   end
 
+(** Innermost loop detection *)
+
+type innerLoop = {
+  preds: P.t list;
+  body: P.t list;
+  head: P.t; (* head of the loop *)
+  finals: P.t list; (* the final instructions, which loops back to the head *)
+  (* There may be more than one ; for instance if there is an if inside the loop with both
+   * branches leading to a goto backedge
+   * Such cases usually happen after a tail-duplication *)
+  sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
+    (* may be None if we predict that we do not loop *)
+}
+
+let print_pset = LICMaux.pp_pset
+
+let rtl_successors_pref = function
+| Itailcall _ | Ireturn _ -> []
+| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
+| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
+| Icond (_,_,n1,n2,p) -> (match p with
+  | Some true -> [n1]
+  | Some false -> [n2]
+  | None -> [n1; n2])
+| Ijumptable (_,ln) -> ln
+
+(* Find the last node of a trace (starting at "node"), until a loop is encountered.
+ * If a non-predicted branch is encountered, returns None *)
+let rec find_last_node_before_loop code node trace is_loop_header =
+  let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
+  let headers = List.filter (fun n -> 
+    get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in 
+  match headers with
+  | [] -> (
+      let next_nodes = rtl_successors_pref @@ get_some @@ PTree.get node code in
+      match next_nodes with
+      | [n] -> (
+          (* To prevent getting out of the superblock and loop infinitely when the prediction is false *)
+          if HashedSet.PSet.contains trace n then
+            find_last_node_before_loop code n trace is_loop_header
+          else None
+        )
+      | _ -> None (* May happen when we predict that a loop is not taken *)
+    )
+  | [h] -> Some node
+  | _ -> failwith "Multiple branches leading to a loop"
+
+(* The computation of sb_final requires to already have branch prediction *)
+let get_inner_loops f code is_loop_header =
+  let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; 
+    fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
+  let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
+  begin
+    debug "PREDMAP: "; print_ptree print_intlist predmap;
+    debug "LOOPMAP: "; print_ptree print_pset loopmap;
+    List.map (fun (n, body) ->
+      let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) 
+        @@ get_some @@ PTree.get n predmap in
+      let head = (* the instruction from body which is a loop header *)
+        let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter 
+          (fun n -> ptree_get_some n is_loop_header) body in
+        begin
+          assert (List.length heads == 1);
+          List.hd heads
+        end in
+      let finals = (* the predecessors from head that are in the body *)
+        let head_preds = ptree_get_some head predmap in
+        let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
+        begin
+          debug "HEAD: %d\n" (P.to_int head);
+          debug "BODY: %a\n" print_pset body;
+          debug "HEADPREDS: %a\n" print_intlist head_preds;
+          filtered
+        end in
+      let sb_final = find_last_node_before_loop code head body is_loop_header in
+      let body = HashedSet.PSet.elements body in
+      { preds = preds; body = body; head = head; finals = finals;
+        sb_final = sb_final; }
+    ) 
+    (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
+     * We remove those to get just the inner loops *)
+    @@ List.filter (fun (n, body) ->
+      let count = List.length @@ HashedSet.PSet.elements body in count != 1
+    ) (PTree.elements loopmap)
+  end
+
+
 (* Returns a PTree of either None or Some b where b determines the node following the loop, for a cb instruction *)
 (* It uses the fact that loops in CompCert are done by a branch (backedge) instruction followed by a cb *)
-let get_loop_info is_loop_header bfs_order code =
+let get_loop_info f is_loop_header bfs_order code =
+  debug "GET LOOP INFO\n";
+  debug "==================================\n";
   let loop_info = ref (PTree.map (fun n i -> None) code) in
-  let mark_path s n =
+  let mark_path n lbody =
+    let cb_info = ref PTree.empty in
     let visited = ref (PTree.map (fun n i -> false) code) in
-    let rec explore src dest =
-      if (get_some @@ PTree.get src !visited) then false
-      else if src == dest then true
-      else begin
-        visited := PTree.set src true !visited;
-        match rtl_successors @@ get_some @@ PTree.get src code with
-        | [] -> false
-        | [s] -> explore s dest
-        | [s1; s2] -> (explore s1 dest) || (explore s2 dest)
-        | _ -> false
-      end
+    (* Returns true if there is a path from src to dest (not involving jumptables) *)
+    (* Mark nodes as visited along the way *)
+    let explore src dest =
+      debug "Trying to dive a path from %d to %d\n" (P.to_int src) (P.to_int dest);
+      (* Memoizing the results to avoid exponential blow-up *)
+      let memory = ref PTree.empty in
+      let rec explore_rec src =
+        debug "explore_rec %d vs %d... " (P.to_int src) (P.to_int dest);
+        if (P.to_int src) == (P.to_int dest) then (debug "FOUND\n"; true)
+        else if (get_some @@ PTree.get src !visited) then (debug "VISITED... :( \n"; false)
+        (* if we went out of the innermost loop *)
+        else if (not @@ List.mem src lbody) then (debug "Out of innermost...\n"; false)
+        else begin
+          let inst = get_some @@ PTree.get src code in
+          visited := PTree.set src true !visited;
+          match rtl_successors inst with
+          | [] -> false
+          | [s] -> explore_wrap s
+          | [s1; s2] -> let snapshot_visited = ref !visited in begin
+              debug "\t\tSplit at %d: either %d or %d\n" (P.to_int src) (P.to_int s1) (P.to_int s2);
+              (* Remembering that we tried the ifso node *)
+              cb_info := PTree.set src true !cb_info;
+              match explore_wrap s1 with
+              | true -> (
+                  visited := !snapshot_visited;
+                  match explore_wrap s2 with
+                  | true -> begin
+                      (* Both paths lead to a loop: we cannot predict the CB
+                       * (but the explore still succeeds) *)
+                      cb_info := PTree.remove src !cb_info;
+                      true
+                    end
+                  | false -> true (* nothing to do, the explore succeeded *)
+                )
+              | false -> begin
+                  cb_info := PTree.set src false !cb_info;
+                  match explore_wrap s2 with
+                  | true -> true
+                  | false -> (cb_info := PTree.remove src !cb_info; false)
+                end
+            end
+          | _ -> false
+        end
+      and explore_wrap src = begin
+        match PTree.get src !memory with
+        | Some b -> b
+        | None ->
+            let result = explore_rec src in
+            (memory := PTree.set src result !memory; result)
+      end in explore_wrap src
+    (* Goes forward until a CB is encountered
+     * Returns None if no CB was found, or Some the_cb_node
+     * Marks nodes as visited along the way *)
     in let rec advance_to_cb src =
       if (get_some @@ PTree.get src !visited) then None
       else begin
@@ -227,44 +344,53 @@ let get_loop_info is_loop_header bfs_order code =
         | Ijumptable _ | Itailcall _ | Ireturn _ -> None
       end
     in begin
-      debug "Marking path from %d to %d\n" (P.to_int n) (P.to_int s);
-      match advance_to_cb s with
-      | None -> (debug "Nothing found\n")
-      | Some s -> ( debug "Advancing to %d\n" (P.to_int s);
+      debug "Attempting to find natural loop from HEAD %d..\n" (P.to_int n);
+      match advance_to_cb n with
+      | None -> (debug "\tNo CB found\n")
+      | Some s -> ( debug "\tFound a CB! %d\n" (P.to_int s);
           match get_some @@ PTree.get s !loop_info with
           | None | Some _ -> begin
               match get_some @@ PTree.get s code with
-              | Icond (_, _, n1, n2, _) ->
+              | Icond (_, _, n1, n2, _) -> (
                   let b1 = explore n1 n in
                   let b2 = explore n2 n in
-                  if (b1 && b2) then (debug "both true\n")
-                  else if b1 then (debug "true privileged\n"; loop_info := PTree.set s (Some true) !loop_info)
-                  else if b2 then (debug "false privileged\n"; loop_info := PTree.set s (Some false) !loop_info)
-                  else (debug "none true\n")
-              | _ -> ( debug "not an icond\n" )
+                  if (b1 && b2) then 
+                    debug "\tBoth paths lead back to the head: NONE\n"
+                  else if (b1 || b2) then begin
+                    if b1 then begin 
+                      debug "\tTrue path leads to the head: TRUE\n"; 
+                      loop_info := PTree.set s (Some true) !loop_info;
+                    end else if b2 then begin
+                      debug "\tFalse path leads to the head: FALSE\n";
+                      loop_info := PTree.set s (Some false) !loop_info
+                    end;
+                    debug "\tSetting other CBs encountered..\n";
+                    List.iter (fun (cb, dir) ->
+                      debug "\t\t%d is %B\n" (P.to_int cb) dir;
+                      loop_info := PTree.set cb (Some dir) !loop_info
+                    ) (PTree.elements !cb_info)
+                  end else
+                    debug "\tNo path leads back to the head: NONE\n"
+                )
+              | _ -> failwith "\tNot an Icond\n"
             end
           (* | Some _ -> ( debug "already loop info there\n" ) FIXME - we don't know yet whether a branch to a loop head is a backedge or not *)
         )
     end
-  in begin
-    List.iter (fun n ->
-      match get_some @@ PTree.get n code with
-      | Inop s | Iop (_,_,_,s) | Iload (_,_,_,_,_,s) | Istore (_,_,_,_,s) | Icall (_,_,_,_,s)
-      | Ibuiltin (_, _, _, s) ->
-          if get_some @@ PTree.get s is_loop_header then mark_path s n
-      | Icond _ -> () (* loop backedges are never Icond in CompCert RTL.3 *)
-      | Ijumptable _ -> ()
-      | Itailcall _ | Ireturn _ -> ()
-    ) bfs_order;
+  in let iloops = get_inner_loops f code is_loop_header in
+  begin
+    List.iter (fun il -> mark_path il.head il.body) iloops;
+    (* List.iter mark_path @@ List.filter (fun n -> get_some @@ PTree.get n is_loop_header) bfs_order; *)
+    debug "==================================\n";
     !loop_info
   end
 
 (* Remark - compared to the original Branch Prediction for Free paper, we don't use the store heuristic *)
-let get_directions code entrypoint = begin
+let get_directions f code entrypoint = begin
   debug "get_directions\n";
   let bfs_order = bfs code entrypoint in
   let is_loop_header = get_loop_headers code entrypoint in
-  let loop_info = get_loop_info is_loop_header bfs_order code in
+  let loop_info = get_loop_info f is_loop_header bfs_order code in
   let directions = ref (PTree.map (fun n i -> None) code) in (* None <=> no predicted direction *)
   begin
     (* ptree_printbool is_loop_header; *)
@@ -307,21 +433,21 @@ let update_direction direction = function
     | Some _ -> Icond (cond, lr, n, n', pred) )
 | i -> i
 
-let rec update_direction_rec directions = function
-| [] -> PTree.empty
-| m::lm -> let (n, i) = m
-    in let direction = get_some @@ PTree.get n directions
-    in PTree.set n (update_direction direction i) (update_direction_rec directions lm)
-
 (* Uses branch prediction to write prediction annotations in Icond *)
-let update_directions code entrypoint = begin
+let update_directions f code entrypoint = begin
   debug "Update_directions\n";
-  let directions = get_directions code entrypoint
-  in begin
+  let directions = get_directions f code entrypoint in
+  let code' = ref code in
+  begin
+    debug "Get Directions done, now proceeding to update all direction information..\n";
     (* debug "Ifso directions: ";
     ptree_printbool directions;
     debug "\n"; *)
-    update_direction_rec directions (PTree.elements code)
+    List.iter (fun (n, i) ->
+      let direction = get_some @@ PTree.get n directions in
+      code' := PTree.set n (update_direction direction i) !code'
+    ) (PTree.elements code);
+    !code'
   end
 end
 
@@ -403,8 +529,6 @@ let print_traces oc traces =
       Printf.fprintf oc "Traces: {%a}\n" f traces
   end
 
-let print_code code = LICMaux.print_code code
-
 (* Dumb (but linear) trace selection *)
 let select_traces_linear code entrypoint =
   let is_visited = ref (PTree.map (fun n i -> false) code) in
@@ -619,26 +743,6 @@ let invert_iconds code =
  * cyclic dependencies between LICMaux and Duplicateaux
  *)
 
-type innerLoop = {
-  preds: P.t list;
-  body: P.t list;
-  head: P.t; (* head of the loop *)
-  finals: P.t list; (* the final instructions, which loops back to the head *)
-  (* There may be more than one ; for instance if there is an if inside the loop with both
-   * branches leading to a goto backedge
-   * Such cases usually happen after a tail-duplication *)
-  sb_final: P.t option; (* if the innerloop wraps a superblock, this is its final instruction *)
-    (* may be None if we predict that we do not loop *)
-}
-
-let print_pset = LICMaux.pp_pset
-
-let print_option_pint oc o =
-  if !debug_flag then
-    match o with
-    | None -> Printf.fprintf oc "None"
-    | Some n -> Printf.fprintf oc "Some %d" (P.to_int n)
-
 let print_inner_loop iloop =
   debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n"
     print_intlist iloop.preds
@@ -655,18 +759,6 @@ let rec print_inner_loops = function
     print_inner_loops iloops
   end
 
-let print_ptree printer pt =
-  let elements = PTree.elements pt in
-  begin
-    debug "[\n";
-    List.iter (fun (n, elt) ->
-      debug "\t%d: %a\n" (P.to_int n) printer elt
-    ) elements;
-    debug "]\n"
-  end
-
-let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else ()
-
 let cb_exit_node = function
   | Icond (_,_,n1,n2,p) -> begin match p with
       | Some true -> Some n2
@@ -714,72 +806,6 @@ let get_inner_loops f code is_loop_header =
   !iloops
   *)
 
-let rtl_successors_pref = function
-| Itailcall _ | Ireturn _ -> []
-| Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
-| Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
-| Icond (_,_,n1,n2,p) -> (match p with
-  | Some true -> [n1]
-  | Some false -> [n2]
-  | None -> [n1; n2])
-| Ijumptable (_,ln) -> ln
-
-(* Find the last node of a trace (starting at "node"), until a loop is encountered.
- * If a non-predicted branch is encountered, returns None *)
-let rec find_last_node_before_loop code node trace is_loop_header =
-  let rtl_succ = rtl_successors @@ get_some @@ PTree.get node code in
-  let headers = List.filter (fun n -> 
-    get_some @@ PTree.get n is_loop_header && HashedSet.PSet.contains trace n) rtl_succ in 
-  match headers with
-  | [] -> (
-      let next_nodes = List.filter (fun n -> HashedSet.PSet.contains trace n)
-        (rtl_successors_pref @@ get_some @@ PTree.get node code) in
-      match next_nodes with
-      | [n] -> find_last_node_before_loop code n trace is_loop_header
-      | _ -> None (* May happen when we predict that a loop is not taken *)
-    )
-  | [h] -> Some node
-  | _ -> failwith "Multiple branches leading to a loop"
-
-(* The computation of sb_final requires to already have branch prediction *)
-let get_inner_loops f code is_loop_header =
-  let fake_f = { fn_sig = f.fn_sig; fn_params = f.fn_params; 
-    fn_stacksize = f.fn_stacksize; fn_code = code; fn_entrypoint = f.fn_entrypoint } in
-  let (_, predmap, loopmap) = LICMaux.inner_loops fake_f in
-  begin
-    debug "PREDMAP: "; print_ptree print_intlist predmap;
-    debug "LOOPMAP: "; print_ptree print_pset loopmap;
-    List.map (fun (n, body) ->
-      let preds = List.filter (fun p -> not @@ HashedSet.PSet.contains body p) 
-        @@ get_some @@ PTree.get n predmap in
-      let head = (* the instruction from body which is a loop header *)
-        let heads = HashedSet.PSet.elements @@ HashedSet.PSet.filter 
-          (fun n -> ptree_get_some n is_loop_header) body in
-        begin
-          assert (List.length heads == 1);
-          List.hd heads
-        end in
-      let finals = (* the predecessors from head that are in the body *)
-        let head_preds = ptree_get_some head predmap in
-        let filtered = List.filter (fun n -> HashedSet.PSet.contains body n) head_preds in
-        begin
-          debug "HEAD: %d\n" (P.to_int head);
-          debug "BODY: %a\n" print_pset body;
-          debug "HEADPREDS: %a\n" print_intlist head_preds;
-          filtered
-        end in
-      let sb_final = find_last_node_before_loop code head body is_loop_header in
-      let body = HashedSet.PSet.elements body in
-      { preds = preds; body = body; head = head; finals = finals;
-        sb_final = sb_final; }
-    ) 
-    (* LICMaux.inner_loops also returns non-inner loops, but with a body of 1 instruction
-     * We remove those to get just the inner loops *)
-    @@ List.filter (fun (n, body) ->
-      let count = List.length @@ HashedSet.PSet.elements body in count != 1
-    ) (PTree.elements loopmap)
-  end
-
 let rec generate_fwmap ln ln' fwmap =
   match ln with
   | [] -> begin
@@ -891,6 +917,18 @@ let unroll_inner_loops_single f code revmap =
 
 let is_some o = match o with Some _ -> true | None -> false
 
+let rec go_through_predicted code start final =
+  if start == final then
+    Some [start]
+  else
+    match rtl_successors_pref @@ get_some @@ PTree.get start code with
+    | [n] -> (
+        match go_through_predicted code n final with
+        | Some ln -> Some (start :: ln)
+        | None -> None
+      )
+    | _ -> None
+
 (* Unrolls the body of the inner loop once - duplicating the exit condition as well 
  * 1) Clones body into body'
  * 2) Links the last instruction of body (sb_final) into the first of body' 
@@ -901,19 +939,21 @@ let unroll_inner_loop_body code revmap iloop =
   let body = iloop.body in
   let limit = !Clflags.option_funrollbody in
   if count_ignore_nops code body > limit then begin
-    debug "Too many nodes in the loop body (%d > %d)" (List.length body) limit;
+    debug "Too many nodes in the loop body (%d > %d)\n" (List.length body) limit;
     (code, revmap)
   end else if not @@ is_some iloop.sb_final then begin
-    debug "The loop body does not form a superblock OR we have predicted that we do not loop";
+    debug "The loop body does not form a superblock OR we have predicted that we do not loop\n";
     (code, revmap)
   end else
-    let (code2, revmap2, dupbody, fwmap) = clone code revmap body in
+    let sb_final = get_some @@ iloop.sb_final in
+    let sb_body = get_some @@ go_through_predicted code iloop.head sb_final in
+    let (code2, revmap2, dupbody, fwmap) = clone code revmap sb_body in
     let code' = ref code2 in
     let head' = apply_map fwmap (iloop.head) in
-    let finals' = apply_map_list fwmap (iloop.finals) in
+    let sb_final' = apply_map fwmap sb_final in
     begin
-      code' := change_pointers !code' iloop.head head' [get_some @@ iloop.sb_final];
-      code' := change_pointers !code' head' iloop.head finals'; 
+      code' := change_pointers !code' iloop.head head' [sb_final];
+      code' := change_pointers !code' head' iloop.head [sb_final']; 
       (!code', revmap2)
     end
 
@@ -988,7 +1028,7 @@ let static_predict f =
   let revmap = make_identity_ptree code in
   let code =
     if !Clflags.option_fpredict then
-      update_directions code entrypoint
+      update_directions f code entrypoint
     else code in
   let code =
     if !Clflags.option_fpredict then
diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v
index 2480ccf0..2f3bad2f 100644
--- a/backend/Duplicateproof.v
+++ b/backend/Duplicateproof.v
@@ -111,7 +111,7 @@ Proof.
   - monadInv H0. inversion H.
   - inversion H.
     + subst. monadInv H0. destruct x. assumption.
-    + monadInv H0. destruct x0. eapply IHlb; assumption.
+    + monadInv H0. destruct x. eapply IHlb; assumption.
 Qed.
 
 Lemma verify_is_copy_correct:
@@ -144,8 +144,8 @@ Proof.
   intros. unfold verify_match_inst in H.
   destruct i; try (inversion H; fail).
 (* Inop *)
-  - destruct i'; try (inversion H; fail). monadInv H.
-    destruct x. eapply verify_is_copy_correct in EQ.
+  - destruct i'; try (inversion H; fail).
+    eapply verify_is_copy_correct in H.
     constructor; eauto.
 (* Iop *)
   - destruct i'; try (inversion H; fail). monadInv H.
@@ -197,12 +197,12 @@ Proof.
     destruct (list_eq_dec _ _ _); try discriminate. subst.
     destruct (eq_condition _ _); try discriminate.
     + monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
-      destruct x0. eapply verify_is_copy_correct in EQ1.
-      constructor; assumption.
+      eapply verify_is_copy_correct in EQ0.
+      subst; constructor; assumption.
     + destruct (eq_condition _ _); try discriminate.
       monadInv H. destruct x. eapply verify_is_copy_correct in EQ.
-      destruct x0. eapply verify_is_copy_correct in EQ1.
-      constructor; assumption.
+      eapply verify_is_copy_correct in EQ0.
+      subst; constructor; assumption.
 (* Ijumptable *)
   - destruct i'; try (inversion H; fail). monadInv H.
     destruct x. eapply verify_is_copy_list_correct in EQ.
@@ -257,7 +257,7 @@ Proof.
   exists mp; constructor 1; simpl; auto.
   + (* correct *) 
   intros until n'. intros REVM i FNC.
-  unfold verify_mapping_match_nodes in EQ. simpl in EQ. destruct x1.
+  unfold verify_mapping_match_nodes in EQ1. simpl in EQ1. destruct x.
   eapply verify_mapping_mn_rec_correct; eauto.
   simpl; eauto.
   + (* entrypoint *)
diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml
index 602d078d..1f6b8817 100644
--- a/backend/LICMaux.ml
+++ b/backend/LICMaux.ml
@@ -16,16 +16,11 @@ open Maps;;
 open Kildall;;
 open HashedSet;;
 open Inject;;
+open DebugPrint;;
 
 type reg = P.t;;
 
 (** get_loop_headers moved from Duplicateaux.ml to LICMaux.ml to prevent cycle dependencies *)
-let debug_flag = ref false
-
-let debug fmt =
-  if !debug_flag then Printf.eprintf fmt
-  else Printf.ifprintf stderr fmt
-
 type vstate = Unvisited | Processed | Visited
 
 let get_some = function
@@ -39,42 +34,6 @@ let rtl_successors = function
 | Icond (_,_,n1,n2,_) -> [n1; n2]
 | Ijumptable (_,ln) -> ln
 
-let print_ptree_bool oc pt =
-  if !debug_flag then
-    let elements = PTree.elements pt in
-    begin
-      Printf.fprintf oc "[";
-      List.iter (fun (n, b) ->
-        if b then Printf.fprintf oc "%d, " (P.to_int n)
-      ) elements;
-      Printf.fprintf oc "]\n"
-    end
-  else ()
-
-let print_intlist oc l =
-  let rec f oc = function
-  | [] -> ()
-  | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln)
-  in begin
-    if !debug_flag then begin
-      Printf.fprintf oc "[%a]" f l
-    end
-  end
-
-(* Adapted from backend/PrintRTL.ml: print_function *)
-let print_code code = let open PrintRTL in let open Printf in
-  if (!debug_flag) then begin
-    fprintf stdout "{\n";
-    let instrs =
-      List.sort
-        (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
-        (List.rev_map
-          (fun (pc, i) -> (P.to_int pc, i))
-          (PTree.elements code)) in
-    List.iter (print_instruction stdout) instrs;
-    fprintf stdout "}"
-  end
-
 (** Getting loop branches with a DFS visit :
   * Each node is either Unvisited, Visited, or Processed
   * pre-order: node becomes Processed