Merge branch 'kvx-work' into aarch64-peephole

author: Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr> 2021-01-07 18:26:17 +0100
committer: Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr> 2021-01-07 18:26:17 +0100
commit: 2d086fdfe0ee11204116f6bc442a35a00e77fe38 (patch)
tree: 9348a282409885a85a15cb55bbddc17421dc3e4d
parent: 10da6dfa77f155d9d1e36e28c8bb8552601355f9 (diff)
parent: 13b0e8f162f751a9bdbe9952ebe1f22eb77d0f87 (diff)
download: compcert-kvx-2d086fdfe0ee11204116f6bc442a35a00e77fe38.tar.gz
compcert-kvx-2d086fdfe0ee11204116f6bc442a35a00e77fe38.zip
13 files changed, 280 insertions, 267 deletions
diff --git a/backend/CSE3.v b/backend/CSE3.v
index df1c2bfc..13d07e65 100644
--- a/backend/CSE3.v
+++ b/backend/CSE3.v
@@ -58,7 +58,8 @@ Definition transf_instr (fmap : PMap.t RB.t)
   match instr with
   | Iop op args dst s =>
     let args' := subst_args fmap pc args in
-    match (if is_trivial_op op then None else find_op_in_fmap fmap pc op args') with
+    match (if (negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)
+           then None else find_op_in_fmap fmap pc op args') with
     | None => Iop op args' dst s
     | Some src => Iop Omove (src::nil) dst s
     end
diff --git a/backend/CSE3proof.v b/backend/CSE3proof.v
index 3fbc9912..2257b4de 100644
--- a/backend/CSE3proof.v
+++ b/backend/CSE3proof.v
@@ -463,12 +463,12 @@ Proof.
       destruct (@PMap.get (option RELATION.t) pc) eqn:INV_PC.
       pose proof (rhs_find_sound (sp:=sp) (genv:=ge) (ctx:=(context_from_hints (snd (preanalysis tenv f)))) pc (SOp op)
                 (subst_args (ctx:=(context_from_hints (snd (preanalysis tenv f)))) (fst (preanalysis tenv f)) pc args) t) as FIND_SOUND.
-      * destruct (if is_trivial_op op
+      * destruct (if (negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)
                then None
                else
                 rhs_find pc (SOp op)
                   (subst_args (fst (preanalysis tenv f)) pc args) t) eqn:FIND.
-        ** destruct (is_trivial_op op). discriminate.
+        ** destruct ((negb (Compopts.optim_CSE3_trivial_ops tt)) && (is_trivial_op op)). discriminate.
            apply exec_Iop with (op := Omove) (args := r :: nil).
            TR_AT.
            subst instr'.
diff --git a/backend/Duplicate.v b/backend/Duplicate.v
index 7dea752b..40db4e45 100644
--- a/backend/Duplicate.v
+++ b/backend/Duplicate.v
@@ -203,8 +203,7 @@ Fixpoint verify_mapping_mn_rec dupmap f f' lm :=
   match lm with
   | nil => OK tt
   | m :: lm => do u <- verify_mapping_mn dupmap f f' m;
-               do u2 <- verify_mapping_mn_rec dupmap f f' lm;
-               OK tt
+               verify_mapping_mn_rec dupmap f f' lm
   end.
 
 Definition verify_mapping_match_nodes dupmap (f f': function): res unit :=
diff --git a/backend/Duplicateaux.ml b/backend/Duplicateaux.ml
index 861df3cd..b3635527 100644
--- a/backend/Duplicateaux.ml
+++ b/backend/Duplicateaux.ml
@@ -22,9 +22,8 @@
 open RTL
 open Maps
 open Camlcoq
+open DebugPrint
 
-let debug_flag = LICMaux.debug_flag
-let debug = LICMaux.debug
 let get_loop_headers = LICMaux.get_loop_headers
 let get_some = LICMaux.get_some
 let rtl_successors = LICMaux.rtl_successors
@@ -87,8 +86,6 @@ end
 
 module PSet = Set.Make(PInt)
 
-let print_intlist = LICMaux.print_intlist
-
 let print_intset s =
   let seq = PSet.to_seq s
   in begin
@@ -101,18 +98,6 @@ let print_intset s =
     end
   end
 
-let ptree_printbool pt =
-  let elements = PTree.elements pt
-  in begin
-    if !debug_flag then begin
-      Printf.printf "[";
-      List.iter (fun (n, b) ->
-        if b then Printf.printf "%d, " (P.to_int n) else ()
-      ) elements;
-      Printf.printf "]"
-    end
-  end
-
 (* Looks ahead (until a branch) to see if a node further down verifies
  * the given predicate *)
 let rec look_ahead_gen (successors: RTL.instruction -> P.t list) code node is_loop_header predicate =
@@ -215,16 +200,6 @@ type innerLoop = {
 
 let print_pset = LICMaux.pp_pset
 
-let print_ptree printer pt =
-  let elements = PTree.elements pt in
-  begin
-    debug "[\n";
-    List.iter (fun (n, elt) ->
-      debug "\t%d: %a\n" (P.to_int n) printer elt
-    ) elements;
-    debug "]\n"
-  end
-
 let rtl_successors_pref = function
 | Itailcall _ | Ireturn _ -> []
 | Icall(_,_,_,_,n) | Ibuiltin(_,_,_,n) | Inop n | Iop (_,_,_,n)
@@ -458,21 +433,21 @@ let update_direction direction = function
     | Some _ -> Icond (cond, lr, n, n', pred) )
 | i -> i
 
-let rec update_direction_rec directions = function
-| [] -> PTree.empty
-| m::lm -> let (n, i) = m
-    in let direction = get_some @@ PTree.get n directions
-    in PTree.set n (update_direction direction i) (update_direction_rec directions lm)
-
 (* Uses branch prediction to write prediction annotations in Icond *)
 let update_directions f code entrypoint = begin
   debug "Update_directions\n";
-  let directions = get_directions f code entrypoint
-  in begin
+  let directions = get_directions f code entrypoint in
+  let code' = ref code in
+  begin
+    debug "Get Directions done, now proceeding to update all direction information..\n";
     (* debug "Ifso directions: ";
     ptree_printbool directions;
     debug "\n"; *)
-    update_direction_rec directions (PTree.elements code)
+    List.iter (fun (n, i) ->
+      let direction = get_some @@ PTree.get n directions in
+      code' := PTree.set n (update_direction direction i) !code'
+    ) (PTree.elements code);
+    !code'
   end
 end
 
@@ -554,8 +529,6 @@ let print_traces oc traces =
       Printf.fprintf oc "Traces: {%a}\n" f traces
   end
 
-let print_code code = LICMaux.print_code code
-
 (* Dumb (but linear) trace selection *)
 let select_traces_linear code entrypoint =
   let is_visited = ref (PTree.map (fun n i -> false) code) in
@@ -770,14 +743,6 @@ let invert_iconds code =
  * cyclic dependencies between LICMaux and Duplicateaux
  *)
 
-
-
-let print_option_pint oc o =
-  if !debug_flag then
-    match o with
-    | None -> Printf.fprintf oc "None"
-    | Some n -> Printf.fprintf oc "Some %d" (P.to_int n)
-
 let print_inner_loop iloop =
   debug "{preds: %a, body: %a, head: %d, finals: %a, sb_final: %a}\n"
     print_intlist iloop.preds
@@ -794,8 +759,6 @@ let rec print_inner_loops = function
     print_inner_loops iloops
   end
 
-let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else ()
-
 let cb_exit_node = function
   | Icond (_,_,n1,n2,p) -> begin match p with
       | Some true -> Some n2
diff --git a/backend/Duplicateproof.v b/backend/Duplicateproof.v
index 2480ccf0..cc24104f 100644
--- a/backend/Duplicateproof.v
+++ b/backend/Duplicateproof.v
@@ -111,7 +111,7 @@ Proof.
   - monadInv H0. inversion H.
   - inversion H.
     + subst. monadInv H0. destruct x. assumption.
-    + monadInv H0. destruct x0. eapply IHlb; assumption.
+    + monadInv H0. destruct x. eapply IHlb; assumption.
 Qed.
 
 Lemma verify_is_copy_correct:
diff --git a/backend/LICMaux.ml b/backend/LICMaux.ml
index df98077b..1f6b8817 100644
--- a/backend/LICMaux.ml
+++ b/backend/LICMaux.ml
@@ -16,16 +16,11 @@ open Maps;;
 open Kildall;;
 open HashedSet;;
 open Inject;;
+open DebugPrint;;
 
 type reg = P.t;;
 
 (** get_loop_headers moved from Duplicateaux.ml to LICMaux.ml to prevent cycle dependencies *)
-let debug_flag = ref false
-
-let debug fmt =
-  if !debug_flag then (flush stderr; Printf.eprintf fmt)
-  else Printf.ifprintf stderr fmt
-
 type vstate = Unvisited | Processed | Visited
 
 let get_some = function
@@ -39,42 +34,6 @@ let rtl_successors = function
 | Icond (_,_,n1,n2,_) -> [n1; n2]
 | Ijumptable (_,ln) -> ln
 
-let print_ptree_bool oc pt =
-  if !debug_flag then
-    let elements = PTree.elements pt in
-    begin
-      Printf.fprintf oc "[";
-      List.iter (fun (n, b) ->
-        if b then Printf.fprintf oc "%d, " (P.to_int n)
-      ) elements;
-      Printf.fprintf oc "]\n"
-    end
-  else ()
-
-let print_intlist oc l =
-  let rec f oc = function
-  | [] -> ()
-  | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln)
-  in begin
-    if !debug_flag then begin
-      Printf.fprintf oc "[%a]" f l
-    end
-  end
-
-(* Adapted from backend/PrintRTL.ml: print_function *)
-let print_code code = let open PrintRTL in let open Printf in
-  if (!debug_flag) then begin
-    fprintf stdout "{\n";
-    let instrs =
-      List.sort
-        (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
-        (List.rev_map
-          (fun (pc, i) -> (P.to_int pc, i))
-          (PTree.elements code)) in
-    List.iter (print_instruction stdout) instrs;
-    fprintf stdout "}"
-  end
-
 (** Getting loop branches with a DFS visit :
   * Each node is either Unvisited, Visited, or Processed
   * pre-order: node becomes Processed
diff --git a/common/DebugPrint.ml b/common/DebugPrint.ml
new file mode 100644
index 00000000..64efe727
--- /dev/null
+++ b/common/DebugPrint.ml
@@ -0,0 +1,118 @@
+open Maps
+open Camlcoq
+open Registers
+
+let debug_flag = ref false
+
+let debug fmt =
+  if !debug_flag then (flush stderr; Printf.eprintf fmt)
+  else Printf.ifprintf stderr fmt
+
+let print_ptree_bool oc pt =
+  if !debug_flag then
+    let elements = PTree.elements pt in
+    begin
+      Printf.fprintf oc "[";
+      List.iter (fun (n, b) ->
+        if b then Printf.fprintf oc "%d, " (P.to_int n)
+      ) elements;
+      Printf.fprintf oc "]\n"
+    end
+  else ()
+
+let print_intlist oc l =
+  let rec f oc = function
+  | [] -> ()
+  | n::ln -> (Printf.fprintf oc "%d %a" (P.to_int n) f ln)
+  in begin
+    if !debug_flag then begin
+      Printf.fprintf oc "[%a]" f l
+    end
+  end
+
+(* Adapted from backend/PrintRTL.ml: print_function *)
+let print_code code = let open PrintRTL in let open Printf in
+  if (!debug_flag) then begin
+    fprintf stdout "{\n";
+    let instrs =
+      List.sort
+        (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
+        (List.rev_map
+          (fun (pc, i) -> (P.to_int pc, i))
+          (PTree.elements code)) in
+    List.iter (print_instruction stdout) instrs;
+    fprintf stdout "}"
+  end
+
+let ptree_printbool pt =
+  let elements = PTree.elements pt
+  in begin
+    if !debug_flag then begin
+      Printf.printf "[";
+      List.iter (fun (n, b) ->
+        if b then Printf.printf "%d, " (P.to_int n) else ()
+      ) elements;
+      Printf.printf "]"
+    end
+  end
+
+let print_ptree printer pt =
+  let elements = PTree.elements pt in
+  begin
+    debug "[\n";
+    List.iter (fun (n, elt) ->
+      debug "\t%d: %a\n" (P.to_int n) printer elt
+    ) elements;
+    debug "]\n"
+  end
+
+let print_option_pint oc o =
+  if !debug_flag then
+    match o with
+    | None -> Printf.fprintf oc "None"
+    | Some n -> Printf.fprintf oc "Some %d" (P.to_int n)
+
+let print_pint oc i = if !debug_flag then Printf.fprintf oc "%d" (P.to_int i) else ()
+
+let print_regset rs = begin
+  debug "[";
+  List.iter (fun n -> debug "%d " (P.to_int n)) (Regset.elements rs);
+  debug "]"
+end
+
+let print_ptree_regset pt = begin
+  debug "[";
+  List.iter (fun (n, rs) ->
+    debug "\n\t";
+    debug "%d: " (P.to_int n);
+    print_regset rs
+  ) (PTree.elements pt);
+  debug "]"
+end
+
+let print_true_nodes booltree = begin
+  debug "[";
+  List.iter (fun (n,b) ->
+    if b then debug "%d " (P.to_int n)
+  ) (PTree.elements booltree);
+  debug "]";
+end
+
+
+let print_instructions insts code =
+  let get_some = function
+  | None -> failwith "Did not get some"
+  | Some thing -> thing
+  in if (!debug_flag) then begin
+    debug "[ ";
+    List.iter (
+      fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code))
+    ) insts; debug "]"
+  end
+
+let print_arrayp arr = begin
+  debug "[| ";
+  Array.iter (fun n -> debug "%d, " (P.to_int n)) arr;
+  debug "|]"
+end
+
diff --git a/cparser/Machine.ml b/cparser/Machine.ml
index 97ca9223..73b71ea0 100644
--- a/cparser/Machine.ml
+++ b/cparser/Machine.ml
@@ -61,6 +61,7 @@ type t = {
   supports_unaligned_accesses: bool;
   struct_passing_style: struct_passing_style;
   struct_return_style : struct_return_style;
+  has_non_trapping_loads : bool;
 }
 
 let ilp32ll64 = {
@@ -96,6 +97,7 @@ let ilp32ll64 = {
   supports_unaligned_accesses = false;
   struct_passing_style = SP_ref_callee;
   struct_return_style = SR_ref;
+  has_non_trapping_loads = false;
 }
 
 let i32lpll64 = {
@@ -131,6 +133,7 @@ let i32lpll64 = {
   supports_unaligned_accesses = false;
   struct_passing_style = SP_ref_callee;
   struct_return_style = SR_ref;
+  has_non_trapping_loads = false;
 }
 
 let il32pll64 = {
@@ -166,6 +169,7 @@ let il32pll64 = {
   supports_unaligned_accesses = false;
   struct_passing_style = SP_ref_callee;
   struct_return_style = SR_ref;
+  has_non_trapping_loads = false;
 }
 
 (* Canned configurations for some ABIs *)
@@ -270,7 +274,9 @@ let kvx =
     bitfields_msb_first = false; (* TO CHECK *)
     supports_unaligned_accesses = true;
     struct_passing_style = SP_value32_ref_callee;
-    struct_return_style = SR_int1to4 }
+    struct_return_style = SR_int1to4;
+    has_non_trapping_loads = true;
+}
 
 let aarch64 =
   { i32lpll64 with name = "aarch64";
@@ -323,6 +329,7 @@ let undef = {
   supports_unaligned_accesses = false;
   struct_passing_style = SP_ref_callee;
   struct_return_style = SR_ref;
+  has_non_trapping_loads = false;
 }
 
 (* The current configuration.  Must be initialized before use. *)
diff --git a/cparser/Machine.mli b/cparser/Machine.mli
index 0e1e22d1..54436758 100644
--- a/cparser/Machine.mli
+++ b/cparser/Machine.mli
@@ -60,6 +60,7 @@ type t = {
   supports_unaligned_accesses: bool;
   struct_passing_style: struct_passing_style;
   struct_return_style: struct_return_style;
+  has_non_trapping_loads: bool;
 }
 
 (* The current configuration *)
diff --git a/doc/index-kvx.html b/doc/index-kvx.html
index 39fdf799..dc646a67 100644
--- a/doc/index-kvx.html
+++ b/doc/index-kvx.html
@@ -254,6 +254,23 @@ This IR is generic over the processor, even if currently, only used for KVX.
 </TR>
 
 <TR valign="top" style="color:#000000">
+  <TD colspan="4"><b>Passes introduced for profiling (for later use in trace selection)</b></TD>
+</TR>
+<TR valign="top" style="color:#000000">
+  <TD>Insert profiling annotations (for recording experiments -- see PROFILE.md).
+  </TD>
+  <TD>RTL to RTL</TD>
+  <TD><A HREF="html/compcert.backend.Profiling.html">Profiling</A></TD>
+  <TD><A HREF="html/compcert.backend.Profilingproof.html">Profilingproof</A></TD>
+</TR>
+<TR valign="top" style="color:#000000">
+  <TD>Update ICond nodes (from recorded experiments -- see PROFILE.md).
+  </TD>
+  <TD>RTL to RTL</TD>
+  <TD><A HREF="html/compcert.backend.ProfilingExploit.html">ProfilingExploit</A></TD>
+  <TD><A HREF="html/compcert.backend.ProfilingExploitproof.html">ProfilingExploitproof</A></TD>
+</TR>
+<TR valign="top" style="color:#000000">
   <TD colspan="4"><b>Passes introduced for superblock prepass scheduling</b></TD>
 </TR>
 <TR valign="top" style="color:#000000">
@@ -372,12 +389,13 @@ This IR is generic over the processor, even if currently, only used for KVX.
 </TR>
 </TABLE>
 
-<font color=gray>
-<H3>All together</H3>
+<H3>All together (there are many more RTL passes than on vanilla CompCert: their order is specified in Compiler)</H3>
 
 <UL>
+</font>
 <LI> <A HREF="html/compcert.driver.Compiler.html">Compiler</A>: composing the passes together;
 whole-compiler semantic preservation theorems.
+<font color=gray>
 <LI> <A HREF="html/compcert.driver.Complements.html">Complements</A>: interesting consequences of the semantic preservation theorems.
 </UL>
 
diff --git a/scheduling/RTLpathLivegenaux.ml b/scheduling/RTLpathLivegenaux.ml
index dd971db8..ab921954 100644
--- a/scheduling/RTLpathLivegenaux.ml
+++ b/scheduling/RTLpathLivegenaux.ml
@@ -6,13 +6,7 @@ open Camlcoq
 open Datatypes
 open Kildall
 open Lattice
-
-let debug_flag = ref false
-
-let dprintf fmt = let open Printf in
-  match !debug_flag with
-  | true -> printf fmt
-  | false -> ifprintf stdout fmt
+open DebugPrint
 
 let get_some = function
 | None -> failwith "Got None instead of Some _"
@@ -122,22 +116,6 @@ let get_path_map code entry join_points =
     !path_map
   end
 
-let print_regset rs = begin
-  dprintf "[";
-  List.iter (fun n -> dprintf "%d " (P.to_int n)) (Regset.elements rs);
-  dprintf "]"
-end
-
-let print_ptree_regset pt = begin
-  dprintf "[";
-  List.iter (fun (n, rs) ->
-    dprintf "\n\t";
-    dprintf "%d: " (P.to_int n);
-    print_regset rs
-  ) (PTree.elements pt);
-  dprintf "]"
-end
-
 let transfer f pc after = let open Liveness in
   match PTree.get pc f.fn_code with
   | Some i ->
@@ -257,7 +235,7 @@ let set_pathmap_liveness f pm =
   let new_pm = ref PTree.empty in
   let code = f.fn_code in
   begin
-    dprintf "Liveness: "; print_ptree_regset liveness; dprintf "\n";
+    debug "Liveness: "; print_ptree_regset liveness; debug "\n";
     List.iter (fun (n, pi) ->
       let inputs = get_some @@ PTree.get n liveness in
       let outputs = get_outputs liveness code n pi in
@@ -267,31 +245,23 @@ let set_pathmap_liveness f pm =
     !new_pm
   end
 
-let print_true_nodes booltree = begin
-  dprintf "[";
-  List.iter (fun (n,b) ->
-    if b then dprintf "%d " (P.to_int n)
-  ) (PTree.elements booltree);
-  dprintf "]";
-end
-
 let print_path_info pi = begin
-  dprintf "(psize=%d; " (Camlcoq.Nat.to_int pi.psize);
-  dprintf "input_regs=";
+  debug "(psize=%d; " (Camlcoq.Nat.to_int pi.psize);
+  debug "input_regs=";
   print_regset pi.input_regs;
-  dprintf "; output_regs=";
+  debug "; output_regs=";
   print_regset pi.output_regs;
-  dprintf ")"
+  debug ")"
 end
 
 let print_path_map path_map = begin
-  dprintf "[";
+  debug "[";
   List.iter (fun (n,pi) ->
-    dprintf "\n\t";
-    dprintf "%d: " (P.to_int n);
+    debug "\n\t";
+    debug "%d: " (P.to_int n);
     print_path_info pi
   ) (PTree.elements path_map);
-  dprintf "]"
+  debug "]"
 end
 
 let build_path_map f = 
@@ -300,10 +270,10 @@ let build_path_map f =
   let join_points = get_join_points code entry in
   let path_map = set_pathmap_liveness f @@ get_path_map code entry join_points in
   begin
-    dprintf "Join points: ";
+    debug "Join points: ";
     print_true_nodes join_points;
-    dprintf "\nPath map: ";
+    debug "\nPath map: ";
     print_path_map path_map;
-    dprintf "\n";
+    debug "\n";
     path_map
   end
diff --git a/scheduling/RTLpathScheduleraux.ml b/scheduling/RTLpathScheduleraux.ml
index 66910bdf..a294d0b5 100644
--- a/scheduling/RTLpathScheduleraux.ml
+++ b/scheduling/RTLpathScheduleraux.ml
@@ -4,6 +4,10 @@ open Maps
 open RTLpathLivegenaux
 open Registers
 open Camlcoq
+open Machine
+open DebugPrint
+
+let config = Machine.config
 
 type superblock = {
   instructions: P.t array; (* pointers to code instructions *)
@@ -15,54 +19,26 @@ type superblock = {
   typing: RTLtyping.regenv
 }
 
-let print_instructions insts code =
-  if (!debug_flag) then begin
-    dprintf "[ ";
-    List.iter (
-      fun n -> (PrintRTL.print_instruction stdout (P.to_int n, get_some @@ PTree.get n code))
-    ) insts; dprintf "]"
-  end
-
 let print_superblock sb code =
   let insts = sb.instructions in
   let li = sb.liveins in
   let outs = sb.output_regs in
   begin
-    dprintf "{ instructions = "; print_instructions (Array.to_list insts) code; dprintf "\n";
-    dprintf "  liveins = "; print_ptree_regset li; dprintf "\n";
-    dprintf "  output_regs = "; print_regset outs; dprintf "}"
+    debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n";
+    debug "  liveins = "; print_ptree_regset li; debug "\n";
+    debug "  output_regs = "; print_regset outs; debug "}"
   end
 
 let print_superblocks lsb code =
   let rec f = function
     | [] -> ()
-    | sb :: lsb -> (print_superblock sb code; dprintf ",\n"; f lsb)
+    | sb :: lsb -> (print_superblock sb code; debug ",\n"; f lsb)
   in begin
-    dprintf "[\n";
+    debug "[\n";
     f lsb;
-    dprintf "]"
+    debug "]"
   end
 
-(* Adapted from backend/PrintRTL.ml: print_function *)
-let print_code code = let open PrintRTL in let open Printf in
-  if (!debug_flag) then begin
-    fprintf stdout "{\n";
-    let instrs =
-      List.sort
-        (fun (pc1, _) (pc2, _) -> compare pc2 pc1)
-        (List.rev_map
-          (fun (pc, i) -> (P.to_int pc, i))
-          (PTree.elements code)) in
-    List.iter (print_instruction stdout) instrs;
-    fprintf stdout "}"
-  end
-
-let print_arrayp arr = begin
-  dprintf "[| ";
-  Array.iter (fun n -> dprintf "%d, " (P.to_int n)) arr;
-  dprintf "|]"
-end
-
 let get_superblocks code entry pm typing =
   let visited = ref (PTree.map (fun n i -> false) code) in
   let rec get_superblocks_rec pc =
@@ -100,7 +76,7 @@ let get_superblocks code entry pm typing =
     end
   in let lsb = get_superblocks_rec entry in begin
     (* debug_flag := true; *)
-    dprintf "Superblocks identified:"; print_superblocks lsb code; dprintf "\n";
+    debug "Superblocks identified:"; print_superblocks lsb code; debug "\n";
     (* debug_flag := false; *)
     lsb
 end
@@ -219,11 +195,45 @@ let sinst_to_rinst = function
       )
   | Send i -> i
 
+let is_a_cb = function Icond _ -> true | _ -> false
+let is_a_load = function Iload _ -> true | _ -> false
+
+let find_array arr n =
+  let index = ref None in
+  begin
+    Array.iteri (fun i n' ->
+      if n = n' then
+        match !index with
+        | Some _ -> failwith "More than one element present"
+        | None -> index := Some i
+    ) arr;
+    !index
+  end
+
+let rec hashedset_from_list = function
+  | [] -> HashedSet.PSet.empty
+  | n::ln -> HashedSet.PSet.add n (hashedset_from_list ln)
+
+let hashedset_map f hs = hashedset_from_list @@ List.map f @@ HashedSet.PSet.elements hs
+
 let apply_schedule code sb new_order =
   let tc = ref code in
   let old_order = sb.instructions in
-  begin
+  let count_cbs order code =
+    let current_cbs = ref HashedSet.PSet.empty in
+    let cbs_above = ref PTree.empty in
+    Array.iter (fun n ->
+      let inst = get_some @@ PTree.get n code in
+      if is_a_cb inst then current_cbs := HashedSet.PSet.add n !current_cbs
+      else if is_a_load inst then cbs_above := PTree.set n !current_cbs !cbs_above
+    ) order;
+    !cbs_above
+  in let fmap n =
+    let index = get_some @@ find_array new_order n in
+    old_order.(index)
+  in begin
     check_order code old_order new_order;
+    (* First pass - modify the positions, nothing else *)
     Array.iteri (fun i n' ->
       let inst' = get_some @@ PTree.get n' code in
       let iend = Array.length old_order - 1 in
@@ -246,103 +256,56 @@ let apply_schedule code sb new_order =
           @@ rinst_to_sinst inst'
       in tc := PTree.set (Array.get old_order i) new_inst !tc
     ) new_order;
+    (* Second pass - turn the loads back into trapping when it was not needed *)
+    (* 1) We remember which CBs are "above" a given load *)
+    let cbs_above = count_cbs old_order code in
+    (* 2) We do the same for new_order *)
+    let cbs_above' = count_cbs (Array.map fmap new_order) !tc in
+    (* 3) We examine each load, turn it back into trapping if cbs_above is included in cbs_above' *)
+    Array.iter (fun n ->
+      let n' = fmap n in
+      let inst' = get_some @@ PTree.get n' !tc in
+      match inst' with
+      | Iload (t,a,b,c,d,s) ->
+          let pset = hashedset_map fmap @@ get_some @@ PTree.get n cbs_above in
+          let pset' = get_some @@ PTree.get n' cbs_above' in
+          if HashedSet.PSet.is_subset pset pset' then tc := PTree.set n' (Iload (AST.TRAP,a,b,c,d,s)) !tc
+          else assert !config.has_non_trapping_loads
+      | _ -> ()
+    ) old_order;
     !tc
   end
 
-  (*
-let main_successors = function
-| Inop n | Iop (_,_,_,n) | Iload (_,_,_,_,_,n) | Istore (_,_,_,_,n) -> [n]
-| Icall (_,_,_,_,n) | Ibuiltin (_,_,_,n) -> [n]
-| Icond (_,_,n1,n2,p) -> (
-    match p with
-    | Some true -> [n1; n2]
-    | Some false ->  [n2; n1]
-    | None -> [n1; n2] )
-| Ijumptable _ | Itailcall _ | Ireturn _ -> []
-
-let change_predicted_successor i s = match i with
-  | Itailcall _ | Ireturn _ -> failwith "Wrong instruction (5) - Tailcalls and returns should not be moved in the middle of a superblock"
-  | Ijumptable _ -> failwith "Wrong instruction (6) - Jumptables should not be moved in the middle of a superblock"
-  | Inop n -> Inop s
-  | Iop (a,b,c,n) -> Iop (a,b,c,s)
-  | Iload (a,b,c,d,e,n) -> Iload (a,b,c,d,e,s)
-  | Istore (a,b,c,d,n) -> Istore (a,b,c,d,s)
-  | Icall (a,b,c,d,n) -> Icall (a,b,c,d,s)
-  | Ibuiltin (a,b,c,n) -> Ibuiltin (a,b,c,s)
-  | Icond (a,b,n1,n2,p) -> (
-      match p with
-      | Some true -> Icond (a,b,s,n2,p)
-      | Some false -> Icond (a,b,n1,s,p)
-      | None -> failwith "Predicted a successor for an Icond with p=None - unpredicted CB should not be moved in the middle of the superblock"
-      )
-
-let rec change_successors i = function
-  | [] -> (
-      match i with
-      | Itailcall _ | Ireturn _ -> i
-      | _ -> failwith "Wrong instruction (1)")
-  | [s] -> (
-      match i with
-      | Inop n -> Inop s
-      | Iop (a,b,c,n) -> Iop (a,b,c,s)
-      | Iload (a,b,c,d,e,n) -> Iload (a,b,c,d,e,s)
-      | Istore (a,b,c,d,n) -> Istore (a,b,c,d,s)
-      | Icall (a,b,c,d,n) -> Icall (a,b,c,d,s)
-      | Ibuiltin (a,b,c,n) -> Ibuiltin (a,b,c,s)
-      | Ijumptable (a,[n]) -> Ijumptable (a,[s])
-      | Icond (a,b,n1,n2,p) -> (
-          match p with
-          | Some true -> Icond (a,b,s,n2,p)
-          | Some false -> Icond (a,b,n1,s,p)
-          | None -> failwith "Icond Wrong instruction (2) ; should not happen?"
-        )
-      | _ -> failwith "Wrong instruction (2)")
-  | [s1; s2] -> (
-      match i with
-      | Icond (a,b,n1,n2,p) -> Icond (a,b,s1,s2,p)
-      | Ijumptable (a, [n1; n2]) -> Ijumptable (a, [s1; s2])
-      | _ -> change_successors i [s1])
-  | ls -> (
-      match i with
-      | Ijumptable (a, ln) -> begin
-          assert ((List.length ln) == (List.length ls));
-          Ijumptable (a, ls)
-        end
-      | _ -> failwith "Wrong instruction (4)")
-
-
-let apply_schedule code sb new_order =
-  let tc = ref code in
-  let old_order = sb.instructions in
-  let last_node = Array.get old_order (Array.length old_order - 1) in
-  let last_successors = main_successors
-    @@ get_some @@ PTree.get last_node code in
-  begin
-    check_order code old_order new_order;
-    Array.iteri (fun i n' ->
-      let inst' = get_some @@ PTree.get n' code in
-      let new_inst =
-        if (i == (Array.length old_order - 1)) then
-          change_successors inst' last_successors
-        else
-          change_predicted_successor inst' (Array.get old_order (i+1))
-      in tc := PTree.set (Array.get old_order i) new_inst !tc
-    ) new_order;
-    !tc
+let turn_all_loads_nontrap sb code =
+  if not !config.has_non_trapping_loads then code
+  else begin
+    let code' = ref code in
+    Array.iter (fun n ->
+      let inst = get_some @@ PTree.get n code in
+      match inst with
+      | Iload (t,a,b,c,d,s) -> code' := PTree.set n (Iload (AST.NOTRAP,a,b,c,d,s)) !code'
+      | _ -> ()
+    ) sb.instructions;
+    !code'
   end
-*)
 
 let rec do_schedule code = function
   | [] -> code
   | sb :: lsb ->
-      let schedule = schedule_superblock sb code in
-      let new_code = apply_schedule code sb schedule in
+      (* Trick: instead of turning loads into non trap as needed..
+       * First, we turn them all into non-trap.
+       * Then, we turn back those who didn't need to be turned, into TRAP again
+       * This is because the scheduler (rightfully) refuses to schedule ahead of a branch
+       * operations that might trap *)
+      let code' = turn_all_loads_nontrap sb code in
+      let schedule = schedule_superblock sb code' in
+      let new_code = apply_schedule code' sb schedule in
       begin
         (* debug_flag := true; *)
-        dprintf "Old Code: "; print_code code;
-        dprintf "\nSchedule to apply: "; print_arrayp schedule;
-        dprintf "\nNew Code: "; print_code new_code;
-        dprintf "\n";
+        debug "Old Code: "; print_code code;
+        debug "\nSchedule to apply: "; print_arrayp schedule;
+        debug "\nNew Code: "; print_code new_code;
+        debug "\n";
         (* debug_flag := false; *)
         do_schedule new_code lsb
       end
@@ -358,10 +321,10 @@ let scheduler f =
   let lsb = get_superblocks code entry pm typing in
   begin
     (* debug_flag := true; *)
-    dprintf "Pathmap:\n"; dprintf "\n";
+    debug "Pathmap:\n"; debug "\n";
     print_path_map pm;
-    dprintf "Superblocks:\n";
-    print_superblocks lsb code; dprintf "\n";
+    debug "Superblocks:\n";
+    print_superblocks lsb code; debug "\n";
     (* debug_flag := false; *)
     let tc = do_schedule code lsb in
     (((tc, entry), pm), id_ptree)
diff --git a/test/monniaux/yarpgen/Makefile b/test/monniaux/yarpgen/Makefile
index 24dd19c3..f9efd5a0 100644
--- a/test/monniaux/yarpgen/Makefile
+++ b/test/monniaux/yarpgen/Makefile
@@ -39,6 +39,20 @@ TESTS_GCC_TARGET_OUT=$(shell seq --format $(PREFIX)/example.gcc.target.out 1 $(M
 TESTS_GCC_HOST_OUT=$(shell seq --format $(PREFIX)/example.gcc.host.out 1 $(MAX))
 TESTS_CMP=$(shell seq --format $(PREFIX)/example.target.cmp 1 $(MAX)) # $(shell seq --format $(PREFIX)/example.host_target.cmp 1 $(MAX))
 
+# FIXME - test000089 fails in CI in arm and armhf because of memory consumption during register allocation being too high
+# Removing it from the pool
+BADID:=89
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/func.c $(BADID) $(BADID)),$(TESTS_C))
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/driver.c $(BADID) $(BADID)),$(TESTS_C))
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/init.c $(BADID) $(BADID)),$(TESTS_C))
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/hash.c $(BADID) $(BADID)),$(TESTS_C))
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/check.c $(BADID) $(BADID)),$(TESTS_C))
+TESTS_C:=$(filter-out $(shell seq --format $(PREFIX)/init.h $(BADID) $(BADID)),$(TESTS_C))
+TESTS_CMP:=$(filter-out $(shell seq --format $(PREFIX)/example.target.cmp $(BADID) $(BADID)),$(TESTS_CMP))
+TESTS_GCC_HOST_OUT:=$(filter-out $(shell seq --format $(PREFIX)/example.gcc.host.out $(BADID) $(BADID)),$(TESTS_GCC_HOST_OUT))
+TESTS_CCOMP_TARGET_OUT:=$(filter-out $(shell seq --format $(PREFIX)/example.ccomp.target.out $(BADID) $(BADID)),$(TESTS_CCOMP_TARGET_OUT))
+TESTS_GCC_TARGET_OUT:=$(filter-out $(shell seq --format $(PREFIX)/example.gcc.target.out $(BADID) $(BADID)),$(TESTS_GCC_TARGET_OUT))
+
 all:	$(TESTS_CCOMP_TARGET_OUT) $(TESTS_GCC_TARGET_OUT) $(TESTS_CCOMP_TARGET_S) $(TESTS_GCC_TARGET_S) $(TESTS_CMP) $(TESTS_C)
 
 tests_c: $(TESTS_C)
author	Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr>	2021-01-07 18:26:17 +0100
committer	Sylvain Boulmé <sylvain.boulme@univ-grenoble-alpes.fr>	2021-01-07 18:26:17 +0100
commit	2d086fdfe0ee11204116f6bc442a35a00e77fe38 (patch)
tree	9348a282409885a85a15cb55bbddc17421dc3e4d
parent	10da6dfa77f155d9d1e36e28c8bb8552601355f9 (diff)
parent	13b0e8f162f751a9bdbe9952ebe1f22eb77d0f87 (diff)
download	compcert-kvx-2d086fdfe0ee11204116f6bc442a35a00e77fe38.tar.gz compcert-kvx-2d086fdfe0ee11204116f6bc442a35a00e77fe38.zip