diff options
author | Justus Fasse <justus.fasse@etu.univ-grenoble-alpes.fr> | 2021-07-13 21:12:39 +0200 |
---|---|---|
committer | Justus Fasse <justus.fasse@etu.univ-grenoble-alpes.fr> | 2021-07-13 21:13:07 +0200 |
commit | 931e10341d58840b5747ec607cdac72444cde0b2 (patch) | |
tree | b228437ed1d13610427bd235ada0b570d64b38c3 | |
parent | f30bf16eadba6824354cab1be131d10f3f3abd67 (diff) | |
download | compcert-kvx-931e10341d58840b5747ec607cdac72444cde0b2.tar.gz compcert-kvx-931e10341d58840b5747ec607cdac72444cde0b2.zip |
Try to somehwat separate downward scheduling from register renaming
-rw-r--r-- | scheduling/MyRTLpathScheduleraux.ml | 97 |
1 files changed, 69 insertions, 28 deletions
diff --git a/scheduling/MyRTLpathScheduleraux.ml b/scheduling/MyRTLpathScheduleraux.ml index 1b23bc92..c35f3bea 100644 --- a/scheduling/MyRTLpathScheduleraux.ml +++ b/scheduling/MyRTLpathScheduleraux.ml @@ -743,6 +743,13 @@ let downschedule_compensation_code sb code pm live_renames ~next_free_pc ~next_f PTree.empty in let to_insert_pcs = PTree.combine merge_append to_insert_pcs to_insert_below_side_exit in + let to_rename = + PTree.elements to_insert_pcs + |> List.map (fun (_side_exit_pc, pcs) -> + List.filter_map (fun pc -> RTL.instr_defs @@ get_some @@ PTree.get pc code) pcs ) + |> List.flatten + |> ListLabels.fold_left ~init:Regset.empty ~f:(fun regs r -> Regset.add r regs) + in let to_insert = PTree.map (fun side_exit_pc pcs -> List.map (fun pc -> get_some @@ PTree.get pc code) pcs) to_insert_pcs in let code = PTree.fold (fun code _pc pcs -> @@ -753,7 +760,7 @@ let downschedule_compensation_code sb code pm live_renames ~next_free_pc ~next_f in debug_flag := old_debug_flag; - (to_insert, code) + (to_insert, to_rename, code) let my_merge_no_overwrite m1 m2 = PTree.combine (fun x y -> match (x, y) with @@ -974,8 +981,8 @@ let scheduler f = let module P = Camlcoq.P in let module N = Camlcoq.Nat in *) (* TODO: - make pmSSA + if-lifting a lot more customizable via compiler flags - * - investigate how to alleviate problem of hidden (staged) new registers/pcs - * + use higher pcs in non-staged code so that the max_pc approach "works"? *) + * - add compiler flags to select between ideal_schedule and ideal_schedule' + * - control for amount of code duplication *) let open! Duplicateaux in let f_rtl = f.fn_RTL in let code = f_rtl.fn_code in @@ -983,6 +990,11 @@ let scheduler f = let id_ptree = PTree.map (fun n i -> n) (f.fn_path) in let entry = f.fn_RTL.fn_entrypoint in let pm = f.fn_path in + let do_nothing = ((((code, entry), pm), id_ptree), None) in + (* TODO: Add flag to select between "conserative" downard scheduling and the one that + * allows moving memory stores below the next side exit (singular). *) + if not !Clflags.option_fpoormansssa && !Clflags.option_fliftif < 1 then do_nothing + else (* NB: Early exit above *) let _orig_pm = pm in let typing = get_ok @@ RTLtyping.type_function f.fn_RTL in let is_loop_header = Duplicateaux.get_loop_headers code (f_rtl.fn_entrypoint) in @@ -1009,8 +1021,9 @@ let scheduler f = in let next_free_pc = next_free_pc code |> Camlcoq.P.of_int in - (* Apply aliasing code *) + let next_free_reg = max_reg_function f.fn_RTL |> Camlcoq.P.succ in + (* Apply aliasing code *) let old_debug_flag = !debug_flag in debug_flag := false; debug "Before adding aliasing code.\n"; @@ -1049,20 +1062,19 @@ let scheduler f = debug "\n"; flush_all (); - let next_free_reg = max_reg_function f.fn_RTL |> Camlcoq.P.succ in let (code, sb_renamings, next_free_reg) = - ListLabels.fold_left superblocks - ~init:(code, [], next_free_reg) - ~f:(fun (code, sb_renamings, next_free_reg) sb -> - let first_pc = sb.instructions.(0) in - let pi = match PTree.get first_pc pm with - | Some x -> x - | None -> - Printf.eprintf "Failed to get path_info for pc = %d" (Camlcoq.P.to_int first_pc); - failwith "Did not get some" in - let (code, live_renames, next_free_reg) = local_single_assignment sb code pi.input_regs ~next_free_reg in - (code, (sb, live_renames)::sb_renamings, next_free_reg) - ) + if not !Clflags.option_fpoormansssa then + (code, List.map (fun sb -> (sb, PTree.empty)) superblocks, next_free_reg) + else ( + debug "pmSSA path\n"; flush_all (); + ListLabels.fold_left superblocks + ~init:(code, [], next_free_reg) + ~f:(fun (code, sb_renamings, next_free_reg) sb -> + let first_pc = sb.instructions.(0) in + let pi = get_some @@ PTree.get first_pc pm in + let (code, live_renames, next_free_reg) = local_single_assignment sb code pi.input_regs ~next_free_reg in + (code, (sb, live_renames)::sb_renamings, next_free_reg) + ) ) in debug "After renaming :\n"; @@ -1082,21 +1094,23 @@ let scheduler f = (* WARNING: mutation *) let code = ref code in - let sb_renamings_insts = + let sb_toinsert_torename = ListLabels.map sb_renamings_insts ~f:(fun (sb, live_renames, to_insert) -> - let (to_insert_compensation, code') = downschedule_compensation_code sb !code pm live_renames ~next_free_pc ~next_free_reg in + let (to_insert_compensation, to_rename, code') = downschedule_compensation_code sb !code pm live_renames ~next_free_pc ~next_free_reg in code := code'; let to_insert = PTree.combine merge_append to_insert_compensation to_insert in - (sb, live_renames, to_insert) ) + (sb, to_insert, to_rename) ) in let code = !code in - let (superblocks, code, pm, next_free_pc) = ListLabels.fold_left sb_renamings_insts + (* Insert the compensation code (downward scheduling) and restoration code (register + * renaming). *) + let (superblocks_torename, code, pm, next_free_pc) = ListLabels.fold_left sb_toinsert_torename ~init:([], code, pm, next_free_pc) - ~f:(fun (sbs, code, pm, next_free_pc) (sb, _live_renames, to_insert) -> + ~f:(fun (sbs, code, pm, next_free_pc) (sb, to_insert, to_rename) -> let (sb, code, pm, next_free_pc) = insert_code sb code pm to_insert ~next_free_pc in - (sb::sbs, code, pm, next_free_pc) ) + ((sb, to_rename)::sbs, code, pm, next_free_pc) ) in debug "After inserting compensation and restoration code:\n"; @@ -1106,10 +1120,37 @@ let scheduler f = debug "\n"; flush_all (); - let (code, staged_dupcode, staged_revmap, next_free_pc) = ListLabels.fold_left superblocks - ~init:(code, PTree.empty, PTree.empty, (Camlcoq.P.to_int next_free_pc)) - ~f:(fun (code, staged_dupcode, staged_revmap, next_free_pc) sb -> - stage_duplication sb code staged_dupcode staged_revmap ~next_free_pc ) + let (superblocks, code, pm, next_free_pc, next_free_reg) = + if !Clflags.option_fpoormansssa then ( + (* In principle we need to do something like this because if we do not systematically + * rename registers, the downward scheduling might copy a series of instructions like + * this: i = i + 1, which when copied twice (to move below two side exits) is + * incorrect unless we rename the register i (or at least its second redefinition). + * However, at least the benchmarks do not seem to trigger this special case. + ListLabels.fold_left superblocks_torename + ~init:([], code, pm, next_free_pc, next_free_reg) + ~f:(fun (sbs, code, pm, next_free_pc, next_free_reg) (sb, to_rename) -> + let pi = get_some @@ PTree.get sb.instructions.(0) pm in + let liveatentry = pi.input_regs in + let (code, live_renames, next_free_reg) = rename_regs ~liveatentry ~only_rename:to_rename sb code ~next_free_reg in + let restoration_insts = restoration_instructions live_renames in + let (sb, code, pm, next_free_pc) = insert_code sb code pm restoration_insts ~next_free_pc in + (sb::sbs, code, pm, next_free_pc, next_free_reg) ) *) + (fst @@ List.split superblocks_torename, code, pm, next_free_pc, next_free_reg) + ) else + (fst @@ List.split superblocks_torename, code, pm, next_free_pc, next_free_reg) + in + + let (code, to_lift) = + if !Clflags.option_fliftif > 0 then + let (code, staged_dupcode, staged_revmap, next_free_pc) = ListLabels.fold_left superblocks + ~init:(code, PTree.empty, PTree.empty, (Camlcoq.P.to_int next_free_pc)) + ~f:(fun (code, staged_dupcode, staged_revmap, next_free_pc) sb -> + stage_duplication sb code staged_dupcode staged_revmap ~next_free_pc ) + in + (code, Some(staged_revmap, staged_dupcode)) + else + (code, None) in debug "After staging the duplication (\"if-lifting\") :\n"; @@ -1121,4 +1162,4 @@ let scheduler f = debug_flag := old_debug_flag; - ((((code, entry), pm), id_ptree), Some (staged_revmap, staged_dupcode)) + ((((code, entry), pm), id_ptree), to_lift) |