aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJustus Fasse <justus.fasse@etu.univ-grenoble-alpes.fr>2021-07-13 21:12:39 +0200
committerJustus Fasse <justus.fasse@etu.univ-grenoble-alpes.fr>2021-07-13 21:13:07 +0200
commit931e10341d58840b5747ec607cdac72444cde0b2 (patch)
treeb228437ed1d13610427bd235ada0b570d64b38c3
parentf30bf16eadba6824354cab1be131d10f3f3abd67 (diff)
downloadcompcert-kvx-931e10341d58840b5747ec607cdac72444cde0b2.tar.gz
compcert-kvx-931e10341d58840b5747ec607cdac72444cde0b2.zip
Try to somehwat separate downward scheduling from register renaming
-rw-r--r--scheduling/MyRTLpathScheduleraux.ml97
1 files changed, 69 insertions, 28 deletions
diff --git a/scheduling/MyRTLpathScheduleraux.ml b/scheduling/MyRTLpathScheduleraux.ml
index 1b23bc92..c35f3bea 100644
--- a/scheduling/MyRTLpathScheduleraux.ml
+++ b/scheduling/MyRTLpathScheduleraux.ml
@@ -743,6 +743,13 @@ let downschedule_compensation_code sb code pm live_renames ~next_free_pc ~next_f
PTree.empty
in
let to_insert_pcs = PTree.combine merge_append to_insert_pcs to_insert_below_side_exit in
+ let to_rename =
+ PTree.elements to_insert_pcs
+ |> List.map (fun (_side_exit_pc, pcs) ->
+ List.filter_map (fun pc -> RTL.instr_defs @@ get_some @@ PTree.get pc code) pcs )
+ |> List.flatten
+ |> ListLabels.fold_left ~init:Regset.empty ~f:(fun regs r -> Regset.add r regs)
+ in
let to_insert = PTree.map (fun side_exit_pc pcs -> List.map (fun pc -> get_some @@ PTree.get pc code) pcs) to_insert_pcs in
let code = PTree.fold
(fun code _pc pcs ->
@@ -753,7 +760,7 @@ let downschedule_compensation_code sb code pm live_renames ~next_free_pc ~next_f
in
debug_flag := old_debug_flag;
- (to_insert, code)
+ (to_insert, to_rename, code)
let my_merge_no_overwrite m1 m2 =
PTree.combine (fun x y -> match (x, y) with
@@ -974,8 +981,8 @@ let scheduler f =
let module P = Camlcoq.P in
let module N = Camlcoq.Nat in *)
(* TODO: - make pmSSA + if-lifting a lot more customizable via compiler flags
- * - investigate how to alleviate problem of hidden (staged) new registers/pcs
- * + use higher pcs in non-staged code so that the max_pc approach "works"? *)
+ * - add compiler flags to select between ideal_schedule and ideal_schedule'
+ * - control for amount of code duplication *)
let open! Duplicateaux in
let f_rtl = f.fn_RTL in
let code = f_rtl.fn_code in
@@ -983,6 +990,11 @@ let scheduler f =
let id_ptree = PTree.map (fun n i -> n) (f.fn_path) in
let entry = f.fn_RTL.fn_entrypoint in
let pm = f.fn_path in
+ let do_nothing = ((((code, entry), pm), id_ptree), None) in
+ (* TODO: Add flag to select between "conserative" downard scheduling and the one that
+ * allows moving memory stores below the next side exit (singular). *)
+ if not !Clflags.option_fpoormansssa && !Clflags.option_fliftif < 1 then do_nothing
+ else (* NB: Early exit above *)
let _orig_pm = pm in
let typing = get_ok @@ RTLtyping.type_function f.fn_RTL in
let is_loop_header = Duplicateaux.get_loop_headers code (f_rtl.fn_entrypoint) in
@@ -1009,8 +1021,9 @@ let scheduler f =
in
let next_free_pc = next_free_pc code |> Camlcoq.P.of_int in
- (* Apply aliasing code *)
+ let next_free_reg = max_reg_function f.fn_RTL |> Camlcoq.P.succ in
+ (* Apply aliasing code *)
let old_debug_flag = !debug_flag in
debug_flag := false;
debug "Before adding aliasing code.\n";
@@ -1049,20 +1062,19 @@ let scheduler f =
debug "\n";
flush_all ();
- let next_free_reg = max_reg_function f.fn_RTL |> Camlcoq.P.succ in
let (code, sb_renamings, next_free_reg) =
- ListLabels.fold_left superblocks
- ~init:(code, [], next_free_reg)
- ~f:(fun (code, sb_renamings, next_free_reg) sb ->
- let first_pc = sb.instructions.(0) in
- let pi = match PTree.get first_pc pm with
- | Some x -> x
- | None ->
- Printf.eprintf "Failed to get path_info for pc = %d" (Camlcoq.P.to_int first_pc);
- failwith "Did not get some" in
- let (code, live_renames, next_free_reg) = local_single_assignment sb code pi.input_regs ~next_free_reg in
- (code, (sb, live_renames)::sb_renamings, next_free_reg)
- )
+ if not !Clflags.option_fpoormansssa then
+ (code, List.map (fun sb -> (sb, PTree.empty)) superblocks, next_free_reg)
+ else (
+ debug "pmSSA path\n"; flush_all ();
+ ListLabels.fold_left superblocks
+ ~init:(code, [], next_free_reg)
+ ~f:(fun (code, sb_renamings, next_free_reg) sb ->
+ let first_pc = sb.instructions.(0) in
+ let pi = get_some @@ PTree.get first_pc pm in
+ let (code, live_renames, next_free_reg) = local_single_assignment sb code pi.input_regs ~next_free_reg in
+ (code, (sb, live_renames)::sb_renamings, next_free_reg)
+ ) )
in
debug "After renaming :\n";
@@ -1082,21 +1094,23 @@ let scheduler f =
(* WARNING: mutation *)
let code = ref code in
- let sb_renamings_insts =
+ let sb_toinsert_torename =
ListLabels.map sb_renamings_insts
~f:(fun (sb, live_renames, to_insert) ->
- let (to_insert_compensation, code') = downschedule_compensation_code sb !code pm live_renames ~next_free_pc ~next_free_reg in
+ let (to_insert_compensation, to_rename, code') = downschedule_compensation_code sb !code pm live_renames ~next_free_pc ~next_free_reg in
code := code';
let to_insert = PTree.combine merge_append to_insert_compensation to_insert in
- (sb, live_renames, to_insert) )
+ (sb, to_insert, to_rename) )
in
let code = !code in
- let (superblocks, code, pm, next_free_pc) = ListLabels.fold_left sb_renamings_insts
+ (* Insert the compensation code (downward scheduling) and restoration code (register
+ * renaming). *)
+ let (superblocks_torename, code, pm, next_free_pc) = ListLabels.fold_left sb_toinsert_torename
~init:([], code, pm, next_free_pc)
- ~f:(fun (sbs, code, pm, next_free_pc) (sb, _live_renames, to_insert) ->
+ ~f:(fun (sbs, code, pm, next_free_pc) (sb, to_insert, to_rename) ->
let (sb, code, pm, next_free_pc) = insert_code sb code pm to_insert ~next_free_pc in
- (sb::sbs, code, pm, next_free_pc) )
+ ((sb, to_rename)::sbs, code, pm, next_free_pc) )
in
debug "After inserting compensation and restoration code:\n";
@@ -1106,10 +1120,37 @@ let scheduler f =
debug "\n";
flush_all ();
- let (code, staged_dupcode, staged_revmap, next_free_pc) = ListLabels.fold_left superblocks
- ~init:(code, PTree.empty, PTree.empty, (Camlcoq.P.to_int next_free_pc))
- ~f:(fun (code, staged_dupcode, staged_revmap, next_free_pc) sb ->
- stage_duplication sb code staged_dupcode staged_revmap ~next_free_pc )
+ let (superblocks, code, pm, next_free_pc, next_free_reg) =
+ if !Clflags.option_fpoormansssa then (
+ (* In principle we need to do something like this because if we do not systematically
+ * rename registers, the downward scheduling might copy a series of instructions like
+ * this: i = i + 1, which when copied twice (to move below two side exits) is
+ * incorrect unless we rename the register i (or at least its second redefinition).
+ * However, at least the benchmarks do not seem to trigger this special case.
+ ListLabels.fold_left superblocks_torename
+ ~init:([], code, pm, next_free_pc, next_free_reg)
+ ~f:(fun (sbs, code, pm, next_free_pc, next_free_reg) (sb, to_rename) ->
+ let pi = get_some @@ PTree.get sb.instructions.(0) pm in
+ let liveatentry = pi.input_regs in
+ let (code, live_renames, next_free_reg) = rename_regs ~liveatentry ~only_rename:to_rename sb code ~next_free_reg in
+ let restoration_insts = restoration_instructions live_renames in
+ let (sb, code, pm, next_free_pc) = insert_code sb code pm restoration_insts ~next_free_pc in
+ (sb::sbs, code, pm, next_free_pc, next_free_reg) ) *)
+ (fst @@ List.split superblocks_torename, code, pm, next_free_pc, next_free_reg)
+ ) else
+ (fst @@ List.split superblocks_torename, code, pm, next_free_pc, next_free_reg)
+ in
+
+ let (code, to_lift) =
+ if !Clflags.option_fliftif > 0 then
+ let (code, staged_dupcode, staged_revmap, next_free_pc) = ListLabels.fold_left superblocks
+ ~init:(code, PTree.empty, PTree.empty, (Camlcoq.P.to_int next_free_pc))
+ ~f:(fun (code, staged_dupcode, staged_revmap, next_free_pc) sb ->
+ stage_duplication sb code staged_dupcode staged_revmap ~next_free_pc )
+ in
+ (code, Some(staged_revmap, staged_dupcode))
+ else
+ (code, None)
in
debug "After staging the duplication (\"if-lifting\") :\n";
@@ -1121,4 +1162,4 @@ let scheduler f =
debug_flag := old_debug_flag;
- ((((code, entry), pm), id_ptree), Some (staged_revmap, staged_dupcode))
+ ((((code, entry), pm), id_ptree), to_lift)