diff options
Diffstat (limited to 'scheduling/MyRTLpathScheduleraux.ml')
-rw-r--r-- | scheduling/MyRTLpathScheduleraux.ml | 1619 |
1 files changed, 0 insertions, 1619 deletions
diff --git a/scheduling/MyRTLpathScheduleraux.ml b/scheduling/MyRTLpathScheduleraux.ml deleted file mode 100644 index 0375d26c..00000000 --- a/scheduling/MyRTLpathScheduleraux.ml +++ /dev/null @@ -1,1619 +0,0 @@ -open DebugPrint -open RTLpathLivegenaux -open RTLpath -open RTLpathCommon -open RTL -open Maps -open Registers - -let print_superblock sb code = - let insts = sb.instructions in - let li = sb.liveins in - let outs = sb.s_output_regs in - begin - debug "{ instructions = "; print_instructions (Array.to_list insts) code; debug "\n"; - debug " liveins = "; print_ptree_regset li; debug "\n"; - debug " output_regs = "; print_regset outs; debug "}" - end - -let print_superblocks lsb code = - let rec f = function - | [] -> () - | sb :: lsb -> (print_superblock sb code; debug ",\n"; f lsb) - in begin - debug "[\n"; - f lsb; - debug "]" - end - -let get_superblocks = RTLpathScheduleraux.get_superblocks - -let get_ok = RTLpathScheduleraux.get_ok - -let apply_map' fw n = Duplicateaux.ptree_get_some n fw - -let apply_map_opt fw n = - match PTree.get n fw with - | Some n' -> n' - | None -> n - -let change_arg_regs inst fwmap = - let open Datatypes in - match inst with - | Icall (sgn, fn, args, dest, succ) -> - let fn' = - ( match fn with - | Coq_inl r -> Datatypes.Coq_inl (apply_map_opt fwmap r) - | Coq_inr _ as ident -> (* function name *) ident ) - in - let args' = List.map (apply_map_opt fwmap) args in - Icall (sgn, fn', args', dest, succ) - | Ibuiltin (ef, args, dest, succ) -> - let args' = List.map (AST.map_builtin_arg (apply_map_opt fwmap)) args in - Ibuiltin (ef, args', dest, succ) - | Ijumptable (arg, tbl) -> Ijumptable (apply_map_opt fwmap arg, tbl) - | Itailcall (sgn, fn, args) -> - let fn' = - ( match fn with - | Coq_inl r -> Datatypes.Coq_inl (apply_map_opt fwmap r) - | Coq_inr _ as ident -> (* function name *) ident ) - in - let args' = List.map (apply_map_opt fwmap) args in - Itailcall (sgn, fn', args') - | Ireturn (reg_opt) -> - ( match reg_opt with - | None -> Ireturn None - | Some(reg) -> Ireturn (Some (apply_map_opt fwmap reg)) ) - | Icond (a, b, n1, n2, i) -> Icond (a, List.map (apply_map_opt fwmap) b, n1, n2, i) - | Inop n -> Inop n - | Iop (a, b, c, n) -> Iop (a, List.map (apply_map_opt fwmap) b, c, n) - | Iload (a, b, c, d, e, n) -> - Iload (a, b, c, List.map (apply_map_opt fwmap) d, e, n) - | Istore (a, b, c, d, n) -> Istore (a, b, List.map (apply_map_opt fwmap) c, apply_map_opt fwmap d, n) - -let change_dest_reg inst fwmap = - match inst with - | Inop _ - | Istore _ - | Itailcall _ - (* XXX builtin is a special case?! *) - | Ibuiltin _ - | Icond _ - | Ijumptable _ - | Ireturn _ - | Icall _ -> failwith "unexpectedly asked to change dest reg" - | Iop(op, args, res, s) -> Iop(op, args, (apply_map' fwmap res), s) - | Iload(trap, chunk, addr, args, dst, s) -> Iload(trap, chunk, addr, args, (apply_map' fwmap dst), s) - -let maybe_change_dest_reg ?only_rename inst fwmap ~next_free_reg = - let do_nothing = (inst, fwmap, next_free_reg) in - match inst with - | Icall _ - | Ibuiltin _ - | Ijumptable _ - | Itailcall _ - | Ireturn _ -> - (* Do not rename registers if the instructions MUST end a path because we cannot add - * restoration code afterwards. *) - do_nothing - | _ as i -> - match RTL.instr_defs i with - | None -> do_nothing - | Some r -> - if Option.is_some only_rename && not (Regset.mem r (get_some only_rename)) then - do_nothing - else - (match PTree.get r fwmap with - | None -> (i, PTree.set r r fwmap, next_free_reg) - | Some _previous_name -> - let new_name = next_free_reg in - let fwmap = PTree.set r new_name fwmap in - (change_dest_reg i fwmap, fwmap, Camlcoq.P.succ next_free_reg) ) - -let ptree_get_or_default ptree key default = - match PTree.get key ptree with - | None -> default - | Some value -> value - -let is_icond = function - | Icond _ -> true - | _ -> false - -let side_exit_idxs sb code = - Array.to_list sb.instructions - |> List.map (fun pc -> get_some @@ PTree.get pc code) - |> List.mapi (fun i inst -> (i, inst)) - |> List.filter (fun (_i, inst) -> is_icond inst - && RTLpathLivegenaux.predicted_successor inst - |> Option.is_some) - |> List.map (fun (i, _inst) -> i) - -let side_exit_pcs sb code = - side_exit_idxs sb code |> List.map (fun i -> sb.instructions.(i)) - -module InsertPosition = struct - type t = - | Above of Camlcoq.P.t - | Below of Camlcoq.P.t - let anchor = function - | Above x | Below x -> x - - let pseudo_map pos ~f = match pos with - | Above x -> Above (f x) - | Below x -> Below (f x) - let compare x y = - match Camlcoq.P.compare (anchor x) (anchor y) with - | 0 -> (match x, y with - | Above _, Above _ | Below _, Below _ -> 0 - | Above _, Below _ -> 1 - | Below _, Above _ -> -1 ) - | c -> c -end - -module InsertPositionMap = Map.Make(InsertPosition) - - let insert_code sb code pm (to_insert : RTL.instruction list InsertPositionMap.t) ~next_free_pc = - let old_debug_flag = !debug_flag in - debug_flag := false; - debug "Before code insertion:\n"; - print_superblock sb code; - debug "\n"; flush_all (); - - debug "Code to insert:\n"; - InsertPositionMap.iter - (fun pos insts -> - debug "%s %d: " - (match pos with - | InsertPosition.Above _ -> "Above" - | InsertPosition.Below _ -> "Below") - (Camlcoq.P.to_int @@ InsertPosition.anchor pos) - ; - List.iter (fun inst -> - if !debug_flag then PrintRTL.print_instruction stdout (0, inst)) insts; - debug "\n"; flush_all () - ) - to_insert - ; - - let next_free_pc = - let next_free_pc = ref next_free_pc in - (fun () -> - let pc = !next_free_pc in - next_free_pc := Camlcoq.P.succ !next_free_pc; - pc ) - in - let original_length = Array.length sb.instructions in - let orig_sort_keys = - Duplicateaux.generate_fwmap - (Array.to_list sb.instructions) - (List.init original_length (fun i -> i * 2)) - PTree.empty - in - let new_key pos = - let open InsertPosition in - let anchor_key = - InsertPosition.anchor pos - |> apply_map' orig_sort_keys - in - match pos with - | Above _ -> anchor_key - 1 - | Below _ -> anchor_key + 1 - in - let (code, pc_lists, sort_keys) = - InsertPositionMap.fold - (fun (pos : InsertPosition.t) insts (code, pc_lists, sort_keys) -> - let insts_length = List.length insts in - let key = new_key pos in - let pcs = List.init insts_length (fun _ -> next_free_pc ()) in - let new_sort_keys = List.init insts_length (fun _ -> key) in - let code = - ListLabels.fold_left2 pcs insts - ~init:code - ~f:(fun code pc inst -> PTree.set pc inst code) - in - let sort_keys = Duplicateaux.generate_fwmap pcs new_sort_keys sort_keys in - (code, pcs::pc_lists, sort_keys) ) - to_insert - (code, [], orig_sort_keys) - in - let new_pcs = List.flatten pc_lists |> Array.of_list in - let last_instruction = [| sb.instructions.(original_length - 1) |] in - let upto_last = - if original_length > 1 then - Array.sub sb.instructions 0 (original_length - 1) - else [| |] - in - let instructions = Array.concat [upto_last; new_pcs; last_instruction ] in - let instructions_order = Array.copy instructions in - ArrayLabels.stable_sort instructions_order - ~cmp:(fun x y -> Int.compare (apply_map' sort_keys x) (apply_map' sort_keys y)); - - let new_length = Array.length instructions in - let fwmap = Duplicateaux.generate_fwmap (Array.to_list instructions_order) (Array.init new_length (fun i -> i) |> Array.to_list) PTree.empty in - let fwmap_pc = - Duplicateaux.generate_fwmap - (Array.to_list sb.instructions) - (Array.to_list @@ Array.map (fun pc -> instructions.(apply_map' fwmap pc)) sb.instructions) - PTree.empty - in - let liveins' = - PTree.fold - (fun liveins' pc live_regs -> - PTree.set (apply_map' fwmap_pc pc) live_regs liveins') - sb.liveins PTree.empty - in - - let sb' = {sb with instructions = instructions; liveins = liveins'} in - let code = RTLpathScheduleraux.apply_schedule code sb' instructions_order in - - let num_added = new_length - original_length in - let first_pc = sb.instructions.(0) in - let pi = get_some @@ PTree.get first_pc pm in - let module N = Camlcoq.Nat in - let new_size = N.to_int pi.psize + num_added |> N.of_int in - let pm = PTree.set first_pc {pi with psize = new_size} pm in - - debug_flag := old_debug_flag; - (sb', code, pm, next_free_pc (), fwmap_pc) - -let prepend_nops_before_iconds sb code = - (* We need the a first and last instruction so that - * a) the pc of the superblock entry stays the same and - * b) apply_schedule correclty preserved the successors of the last instruction *) - if Array.length sb.instructions < 2 then - (* Early exit, this should probably be replaced by a more general exclusion of - * superblock with just one instruction. *) - InsertPositionMap.empty - else - - (* TODO, probably only need it before side exits *) - let icond_pcs = - Array.to_list sb.instructions - |> List.filter (fun pc -> - let inst = get_some @@ PTree.get pc code in - is_icond inst ) - in - let to_insert = - ListLabels.fold_left icond_pcs - ~init:InsertPositionMap.empty - ~f:(fun acc icond_pc -> InsertPositionMap.add (InsertPosition.Above icond_pc) [Inop Camlcoq.P.one] acc) - in - to_insert - -type renamed = - { old_name : reg - ; new_name : reg } - -let update_live_renames pc live_renames fwmap regs = - Regset.fold - (fun live_reg renames -> - match PTree.get live_reg fwmap with - | None -> renames - | Some(r) when r = live_reg -> renames - | Some(new_name) -> - let old = ptree_get_or_default renames pc [] in - let upd = {old_name = live_reg; new_name} :: old in - PTree.set pc upd renames ) - regs - live_renames - -let my_merge_overwrite m1 m2 = - PTree.combine (fun x y -> match (x, y) with - | None, None -> None - | Some x, None - | None, Some x -> Some x - | Some _, Some y -> Some y - ) m1 m2 - -let rename_regs ?only_rename sb code ~liveatentry ~next_free_reg = - let old_debug_flag = !debug_flag in - - let length = Array.length sb.instructions in - assert (length > 0); - (* Early exit *) - if length = 1 then (code, PTree.empty, next_free_reg) else - (* The last instruction is treated in a special way because if it defines a register, - * that register cannot possibly be used afterwards in the path AND often we cannot - * insert restoration code later after it since it must remain at the end of the path. - * In the future, this may be resolved by only renaming registers which are used - * afterwards in path, which would exclude the register possibly assigned by the last - * instruction. *) - let last_pc = sb.instructions.(length - 1) in - let upto_last = Array.init (length - 1) (fun i -> sb.instructions.(i)) in - let liveatentry = Regset.elements liveatentry in - let fwmap = Duplicateaux.generate_fwmap liveatentry liveatentry PTree.empty in - - let (code, fwmap, live_renames, next_free_reg) = - ArrayLabels.fold_left upto_last - ~init:(code, fwmap, PTree.empty, next_free_reg) - ~f:(fun (code, fwmap, live_renames, next_free_reg) pc -> - (* Rewrite instruction to use potentially renamed registers *) - let inst = get_some @@ PTree.get pc code in - let inst = change_arg_regs inst fwmap in - let (inst, fwmap, next_free_reg) = - maybe_change_dest_reg ?only_rename inst fwmap ~next_free_reg - in - let code = PTree.set pc inst code in - - let (live_renames, fwmap)= - if is_icond inst then ( - (* Pretend that registers that are live at an exit already have a definition, so - * this catches a couple of edge cases where an instruction was not renamed and - * could thus not be moved up. *) - let live_regs = Regset.elements @@ get_some @@ PTree.get pc sb.liveins in - let fwmap' = Duplicateaux.generate_fwmap live_regs live_regs PTree.empty in - let fwmap = my_merge_overwrite fwmap' fwmap in - (update_live_renames pc live_renames fwmap (get_some @@ PTree.get pc sb.liveins) - , fwmap) - ) else - (live_renames, fwmap) - in - (code, fwmap, live_renames, next_free_reg) - ) - in - - let last_inst = get_some @@ PTree.get last_pc code in - let last_inst = change_arg_regs last_inst fwmap in - let (last_inst, fwmap, next_free_reg) = - maybe_change_dest_reg ?only_rename last_inst fwmap ~next_free_reg - in - let code = PTree.set last_pc last_inst code in - - let live_renames = update_live_renames last_pc live_renames fwmap sb.s_output_regs in - - debug_flag := old_debug_flag; - - (code, live_renames, next_free_reg) - -(* Pass over the superblock instruction in-order - * For each redefinition of a register, create a new register name and use that one from - * then on. There is one exception, if the last instruction defines a register, it will be - * left unchanged since the rest of the path cannot possibly use it. - * WARNING: This invalidates the superblock, it will need to be repaired with the - * information returned. *) -let local_single_assignment sb code liveatentry ~next_free_reg = - let old_debug_flag = !debug_flag in - - let (code, live_renames, next_free_reg) = - rename_regs sb code ~liveatentry ~next_free_reg - in - - debug_flag := old_debug_flag; - (code, live_renames, next_free_reg) - -let final_restoration_code sb code live_renames = - let last_inst_pc = sb.instructions.(Array.length sb.instructions - 1) in - let last_inst = get_some @@ PTree.get last_inst_pc code in - let last_inst_is_basic = match last_inst with - | Icall _ - | Ibuiltin _ - | Ijumptable _ - | Itailcall _ - | Ireturn _ - | Icond _ -> false - | _ -> true - in - let final_renames = ptree_get_or_default live_renames last_inst_pc [] in - let live_regs_opt = PTree.get last_inst_pc sb.liveins in - let (above, below) = let open Either in - ListLabels.partition_map final_renames - ~f:(fun {old_name; new_name} -> - let inst = Iop (Op.Omove, [new_name], old_name, Camlcoq.P.one) in - match last_inst_is_basic, live_regs_opt with - | true, _ -> - (* Printf.eprintf "Putting %d below basic inst\n" (Camlcoq.P.to_int old_name); *) - Right inst - | false, None -> - (* Printf.eprintf "Putting %d above unpredicted icond.\n" (Camlcoq.P.to_int old_name); *) - Left inst - | false, Some live_regs -> - if Regset.mem old_name live_regs then ( - (* Printf.eprintf "Putting %d above icond\n" (Camlcoq.P.to_int old_name); *) - Left inst - ) else ( - (* Printf.eprintf "Putting %d below icond\n" (Camlcoq.P.to_int old_name); *) - Right inst)) - in - InsertPositionMap.empty - |> InsertPositionMap.add (InsertPosition.Above last_inst_pc) above - |> InsertPositionMap.add (InsertPosition.Below last_inst_pc) below - -let used_before_redefinition sb code ~offset ~reg = - let length = Array.length sb.instructions in - if not (offset < length) then - raise (Invalid_argument (Printf.sprintf "offset must be less than the superblock's length: %d is not less than %d" offset length)) - ; - let finished = ref false in - let i = ref offset in - let res = ref false in - while (!i < length && not !finished) do - let inst = get_some @@ PTree.get sb.instructions.(!i) code in - if List.mem reg (RTL.instr_uses inst) then ( - res := true; - finished := true; - ) else - () - ; - let defined_reg = RTL.instr_defs inst in - (match defined_reg with - | None -> () - | Some r -> if r = reg then finished := true else ()); - i := !i + 1; - done; - !res - -type restoration_actions = - | Just_restore of renamed - | Restore_and_alias of renamed - -let restoration_instructions' sb code live_renames ~next_free_reg = - let next_free_reg = - let next_free_reg = ref next_free_reg in - (fun () -> - let r = !next_free_reg in - next_free_reg := Camlcoq.P.succ !next_free_reg; - r ) - in - - let length = Array.length sb.instructions in - let pc_to_idx = - Duplicateaux.generate_fwmap - (Array.to_list sb.instructions) - (List.init length (fun i -> i)) - PTree.empty - in - - let live_renames = PTree.map - (fun pc renames -> - let offset = apply_map' pc_to_idx pc in - List.map - (fun rename -> - let {old_name; new_name} = rename in - if used_before_redefinition sb code ~offset ~reg:old_name then - Restore_and_alias rename - else - Just_restore rename) - renames) - live_renames - in - - let to_rename = PTree.map1 - (fun renames -> - let old_names = List.filter_map (fun rename -> - match rename with - | Just_restore _ -> None - | Restore_and_alias {old_name; new_name} -> Some old_name) renames - in - let aliases = List.init (List.length old_names) (fun _ -> next_free_reg ()) in - let fwmap = Duplicateaux.generate_fwmap old_names aliases PTree.empty in - fwmap - ) - live_renames - in - - let to_insert_restore = - PTree.fold - (fun to_insert side_exit_pc renames -> - let alias_map = get_some @@ PTree.get side_exit_pc to_rename in - let insts = ListLabels.map renames - ~f:(fun rename -> - match rename with - | Just_restore {old_name; new_name} -> [Iop (Op.Omove, [new_name], old_name, Camlcoq.P.one)] - | Restore_and_alias {old_name; new_name} -> - [ Iop (Op.Omove, [old_name], apply_map' alias_map old_name, Camlcoq.P.one) - ; Iop (Op.Omove, [new_name], old_name, Camlcoq.P.one)] ) - in - let insts = List.flatten insts in - InsertPositionMap.add (InsertPosition.Above side_exit_pc) insts to_insert) - live_renames - InsertPositionMap.empty - in - (to_insert_restore, to_rename, next_free_reg ()) - -(* Assumes the path is in local single assignment form *) -let intra_path_dependencies (sb : superblock) (code : code) = - let old_debug_flag = !debug_flag in - debug_flag := false; - - (* Directly taken from RTLpathScheduleraux *) - let nr_instr = Array.length sb.instructions in - let trailer_length = - match PTree.get (sb.instructions.(nr_instr-1)) code with - | None -> 0 - | Some ii -> - (match predicted_successor ii with - | Some _ -> 0 - | None -> 1 ) - in - - let module IS = InstructionScheduler in - let opweights = OpWeights.get_opweights () in - let seqa = - ArrayLabels.map (Array.sub sb.instructions 0 (Array.length sb.instructions - trailer_length)) - ~f:(fun i -> - (match PTree.get i code with - | Some ii -> ii - | None -> failwith "MyRTLpathScheduleraux.intra_path_dependencies"), - (match PTree.get i sb.liveins with - | Some s -> s - | None -> Regset.empty) ) - in - let latency_constraints = PrepassSchedulingOracle.get_simple_dependencies opweights seqa in - debug "intra_path_dependencies for superblock:\n"; - print_superblock sb code; - debug "\nlatency_constraints:\n\n"; - ListLabels.iter latency_constraints - ~f:(fun {IS.instr_from; instr_to; latency} -> - debug "instr_from: %2d\ninstr_to: %4d\nlatency: %3d\n\n" instr_from instr_to latency ); - flush_all (); - - let deps = - ListLabels.fold_left latency_constraints - ~init:PTree.empty - ~f:(fun deps {IS.instr_from; instr_to; latency = _} -> - let to_pc = sb.instructions.(instr_to) in - let from_pc = sb.instructions.(instr_from) in - let inst_deps = ptree_get_or_default deps to_pc HashedSet.PSet.empty in - let inst_deps' = HashedSet.PSet.add from_pc inst_deps in - PTree.set to_pc inst_deps' deps ) - in - debug_flag := old_debug_flag; - deps - -let transitive_dependencies deps pc = - let old_debug_flag = !debug_flag in - let immediate_deps = ptree_get_or_default deps pc HashedSet.PSet.empty in - - let rec iter ~acc ~todo ~seen = - match todo with - | [] -> acc - | pc'::todo -> (* NB: todo has just shrunk *) - if HashedSet.PSet.contains seen pc' then - iter ~acc ~todo ~seen - else - let deps_of_dep = ptree_get_or_default deps pc' HashedSet.PSet.empty in - let new_todo = HashedSet.PSet.subtract deps_of_dep acc |> HashedSet.PSet.elements in - iter - ~acc:(HashedSet.PSet.add pc' acc) - ~todo:(new_todo @ todo) - ~seen:(HashedSet.PSet.add pc' seen) - in - let transitive_dependencies = - iter - ~acc:HashedSet.PSet.empty - ~todo:(HashedSet.PSet.elements immediate_deps) - ~seen:HashedSet.PSet.empty - in - debug_flag := old_debug_flag; - transitive_dependencies - -let moved_dependencies deps order side_exit_pc = - let old_debug_flag = !debug_flag in - - let dependencies = transitive_dependencies deps side_exit_pc in - let side_exit_pc_idx = apply_map' order side_exit_pc in - let moved_dependencies = - HashedSet.PSet.filter - (fun pc -> - let dep_idx = apply_map' order pc in - dep_idx > side_exit_pc_idx ) - dependencies - in - debug_flag := old_debug_flag; - moved_dependencies - -let update_liveins liveins live_renames = - PTree.map - (fun pc liveregs -> - match PTree.get pc live_renames with - | None -> liveregs - | Some renames -> - let old_to_new = ListLabels.fold_left renames - ~init:PTree.empty - ~f:(fun acc {old_name; new_name} -> PTree.set old_name new_name acc) - in - (* There doesn't seem to be a proper map function for Regset.t *) - let liveregs' = - Regset.fold - (fun r acc -> - let r' = ptree_get_or_default old_to_new r r in - Regset.add r' acc) - liveregs - Regset.empty - in - liveregs' - ) - liveins - -let replace_iconds_by_ocmps sb code ~next_free_reg = - let module P = Camlcoq.P in - let (code, _previous_icond, next_free_reg) = - ArrayLabels.fold_left sb.instructions - ~init:(code, None, next_free_reg) - ~f:(fun (code, previous_icond_proxy_reg, next_free_reg) pc -> - let inst = get_some @@ PTree.get pc code in - match inst with - | Istore(chunk, addr, args, src, succ) -> - ( match previous_icond_proxy_reg with - | None -> (code, previous_icond_proxy_reg, next_free_reg) - | Some r -> - let istore' = Istore(chunk, addr, r::args, src, succ) in - let code' = PTree.set pc istore' code in - (code', previous_icond_proxy_reg, next_free_reg) ) - | Icond(cond, args, ifso, ifnot, prediction) -> - (match prediction with - | None -> - (* Case only happens at the very end of the path; no transformation necessary *) - assert(sb.instructions.(Array.length sb.instructions - 1) = pc); - (code, None, next_free_reg) - | Some true -> ( - let ocmp = match previous_icond_proxy_reg with - | None -> Iop((Op.Ocmp cond), args, next_free_reg, ifso) - | Some r -> Iop((Op.Ocmp cond), r::args, next_free_reg, ifso) - in - let code' = PTree.set pc ocmp code in - (code', Some next_free_reg, P.succ next_free_reg) ) - | Some false -> ( - let ocmp = match previous_icond_proxy_reg with - | None -> Iop((Op.Ocmp cond), args, next_free_reg, ifnot) - | Some r -> Iop((Op.Ocmp cond), r::args, next_free_reg, ifnot) - in - let code' = PTree.set pc ocmp code in - (code', Some next_free_reg, P.succ next_free_reg) )) - | Iload(trap, chunk, addr, args, dest, succ) -> ( - if !Machine.config.Machine.has_non_trapping_loads then - (code, previous_icond_proxy_reg, next_free_reg) - else - match previous_icond_proxy_reg with - | None -> (code, previous_icond_proxy_reg, next_free_reg) - | Some r -> - let load' = Iload(trap, chunk, addr, r::args, dest, succ) in - let code' = PTree.set pc load' code in - (code', previous_icond_proxy_reg, next_free_reg) ) - | _ -> (code, previous_icond_proxy_reg, next_free_reg) - ) - in - (code, next_free_reg) - -let is_store = function - | Istore _ -> true - | _ -> false - -type heuristic_mode = - | Default - | Ignore_liveness - | Move_stores - -let ideal_schedule'' sb code mode = - let old_debug_flag = !debug_flag in - - let dep_function = match mode with - | Default -> PrepassSchedulingOracle.get_simple_dependencies' - | Ignore_liveness -> PrepassSchedulingOracle.get_fake_deps_liveness - | Move_stores -> PrepassSchedulingOracle.get_fake_deps_liveness_stores - in - - (* copy-paste from RTLpathScheduleraux.schedule_superblock *) - let nr_instr = Array.length sb.instructions in - let trailer_length = - match PTree.get (sb.instructions.(nr_instr-1)) code with - | None -> 0 - | Some ii -> - match predicted_successor ii with - | Some _ -> 0 - | None -> 1 in - let live_regs_entry = RTLpathScheduleraux.get_live_regs_entry sb code in - let seqa = - Array.map (fun i -> - (match PTree.get i code with - | Some ii -> ii - | None -> failwith "RTLpathScheduleraux.schedule_superblock"), - (match PTree.get i sb.liveins with - | Some s -> s - | None -> Regset.empty)) - (Array.sub sb.instructions 0 (nr_instr-trailer_length)) - in - let nr_scheduled_instr = nr_instr - trailer_length in - (* Copy-pasted from PrepassSchedulingOracle.schedule_sequence *) - let opweights = OpWeights.get_opweights () in - (* NB: Early exit *) - if (Array.length seqa) <= 1 then None else - (* Copy-pasted from PrepassSchedulingOracle.define_problem *) - let problem = - let deps = dep_function opweights seqa in - debug_flag := false; - debug "Fake deps:\n"; - if !debug_flag then ( - deps - |> List.iter (fun {InstructionScheduler.instr_from; instr_to; latency} -> - debug "%2d depends on %2d\n" (Camlcoq.P.to_int sb.instructions.(instr_to)) (Camlcoq.P.to_int sb.instructions.(instr_from))); - flush_all (); - ); - { InstructionScheduler.max_latency = -1 - ; resource_bounds = opweights.PrepassSchedulingOracleDeps.pipelined_resource_bounds - ; live_regs_entry = live_regs_entry - ; typing = sb.typing - ; reference_counting = Option.some @@ RTLpathScheduleraux.reference_counting seqa sb.s_output_regs sb.typing - ; instruction_usages = Array.map (PrepassSchedulingOracle.resources_of_instruction opweights) (Array.map fst seqa) - ; latency_constraints = deps } - in - match PrepassSchedulingOracle.prepass_scheduler_by_name - (!Clflags.option_fprepass_sched) - problem - (Array.map (fun (ins, _) -> - match ins with - | Icond _ -> true - | _ -> false) seqa) - with - | None -> - debug_flag := old_debug_flag; - failwith "no solution in prepass scheduling\n" - | Some solution -> - let positions = Array.init nr_scheduled_instr (fun i -> i) in - let final_time = solution.(nr_scheduled_instr) in - Array.sort (fun i j -> - let si = solution.(i) and sj = solution.(j) in - if si < sj then -1 - else if si > sj then 1 - else i - j) positions; - - let ins' = - Array.append - (Array.map (fun i -> sb.instructions.(i)) positions) - (Array.sub sb.instructions (nr_instr - trailer_length) trailer_length) in - Some (ins', final_time) - -(* Improved scheduling heuristic which allows moving memory writes downwards by turning - * Iconds into Ocmps for the purpose of dependency calculations. *) -let ideal_schedule' sb code ~next_free_reg = - let old_debug_flag = !debug_flag in - let (fake_code, _next_free_reg) = replace_iconds_by_ocmps sb code ~next_free_reg in - (* Does PTree.empty work or do I need to map the entries to Regset.empty *) - let fake_sb = { sb with liveins = PTree.empty } in - (* Copied from RTLpathScheduleraux.schedule_superblock *) - let nr_instr_sb = Array.length sb.instructions in - assert (nr_instr_sb = Array.length fake_sb.instructions); - - let trailer_length = - match PTree.get (sb.instructions.(nr_instr_sb - 1)) code with - | None -> 0 - | Some ii -> - match predicted_successor ii with - | Some _ -> 0 - | None -> 1 - in - let seqa = - (Array.map (fun i -> - (match PTree.get i code with - | Some ii -> ii - | None -> failwith "MyRTLpathScheduleraux.ideal_schedule'"), - (match PTree.get i sb.liveins with - | Some s -> s - | None -> Regset.empty)) - (Array.sub sb.instructions 0 (nr_instr_sb - trailer_length))) - in - let fake_seqa = - (Array.map (fun i -> - (match PTree.get i fake_code with - | Some ii -> ii - | None -> failwith "MyRTLpathScheduleraux.ideal_schedule'"), - (match PTree.get i fake_sb.liveins with - | Some s -> s - | None -> Regset.empty)) - (Array.sub fake_sb.instructions 0 (nr_instr_sb - trailer_length))) - in - (* Copied from PrepassSchedulingOracle.schedule_sequence *) - let opweights = OpWeights.get_opweights () in - (* WARNING: Early exit in case there is only on instruction to schedule *) - if (Array.length fake_seqa) <= 1 then None else - let nr_instr_fake_seqa = Array.length fake_seqa in - assert (nr_instr_fake_seqa = Array.length seqa); - let nr_instr_seqa = nr_instr_fake_seqa in - let store_idxs = - Array.to_list sb.instructions - |> List.mapi (fun i pc -> (i, get_some @@ PTree.get pc code)) - |> List.filter (fun (_i, inst) -> is_store inst) - |> List.map (fun (i, _inst) -> i) - in - let side_exit_idxs = side_exit_idxs sb code in - let store_side_exit_limit = - ListLabels.map store_idxs - ~f:(fun st_idx -> - let first_se = List.find_opt (fun se_idx -> se_idx > st_idx) side_exit_idxs in - let second_se = - Option.bind - first_se - (fun se_idx -> List.find_opt (fun se_idx' -> se_idx' > se_idx) side_exit_idxs) - in - second_se ) - in - let store_side_exit_deps = - ListLabels.map2 store_idxs store_side_exit_limit - ~f:(fun st_idx se_idx_opt -> - let module IS = InstructionScheduler in - match se_idx_opt with - | None -> None - | Some se_idx -> Some {IS.instr_to = se_idx; instr_from = st_idx; latency = 0} ) - |> List.filter_map (fun x -> x) - in - (* Copied from PrepassSchedulingOracle.define_problem *) - let fake_deps = PrepassSchedulingOracle.get_simple_dependencies opweights fake_seqa in - let fake_deps = store_side_exit_deps @ fake_deps in - - let problem = - { InstructionScheduler.max_latency = -1 - ; live_regs_entry = RTLpathScheduleraux.get_live_regs_entry fake_sb fake_code - ; typing = fake_sb.typing - ; reference_counting = Some (RTLpathScheduleraux.reference_counting fake_seqa fake_sb.s_output_regs fake_sb.typing) - ; resource_bounds = opweights.PrepassSchedulingOracleDeps.pipelined_resource_bounds - ; instruction_usages = Array.map (PrepassSchedulingOracle.resources_of_instruction opweights) (Array.map fst seqa) - ; latency_constraints = fake_deps } - - in - let scheduled_sequence = - match - PrepassSchedulingOracle.prepass_scheduler_by_name - (!Clflags.option_fprepass_sched) - problem - (Array.map (fun (ins, _) -> - match ins with - | Icond _ -> true - | _ -> false) - seqa) - with - | None -> None - | Some solution -> - (* Printf.eprintf "Scheduling instruction sequence of length: %d\n" nr_instructions; flush_all (); - Printf.eprintf "Result: %d\n" solution.(nr_instructions); flush_all (); *) - let positions = Array.init nr_instr_seqa (fun i -> i) in - let final_time = solution.(nr_instr_seqa) in - Array.sort (fun i j -> - let si = solution.(i) and sj = solution.(j) in - if si < sj then -1 - else if si > sj then 1 - else i - j) positions; - Some (positions, final_time) - in - - debug_flag := old_debug_flag; - - match scheduled_sequence with - | None -> None - | Some (order, final_time) -> - let ins' = - Array.append - (Array.map (fun i -> sb.instructions.(i)) order) - (Array.sub sb.instructions (nr_instr_sb - trailer_length) trailer_length) in - Some (ins', final_time) - -(* "ideal" *) -let ideal_schedule sb code = - let schedule = - RTLpathScheduleraux.schedule_superblock - {sb with liveins = PTree.map (fun n _regs -> Regset.empty) sb.liveins} - code - in - schedule - -let merge_append _ x y = match x, y with - | None, None -> None - | Some x, None | None, Some x -> Some x - | Some x, Some y -> Some (x @ y) - -(* Turns a tree of dependencies (pc -> [pcs; that; depend; on pc]) into a tree of uses by - * "inverting" tree. - * Now, each pc has the pcs associated to it that depend on it, according to the original - * tree. *) -let uses_of_deps p_ptree = - PTree.fold - (fun acc p vs -> - let acc = HashedSet.PSet.fold - (fun acc v -> - let old = ptree_get_or_default acc v HashedSet.PSet.empty in - let upd = HashedSet.PSet.add p old in - PTree.set v upd acc) - vs - acc - in - acc - ) - p_ptree - PTree.empty - -(* Returns for every side-exit pc, a list of instructions that should be executed beforehand *) -let downschedule_compensation_code sb code pm live_renames ~next_free_pc ~next_free_reg = - (* TODO: Right now we are copying the instructions even if there are duplicates per - * basic block. This leads to an issue where two identical memory writes lead to - * non-matching symbolic memory states. - * As a work-around we could eliminate the first/original memory write, this would - * allow moving memory writes at least one side-exit further down, but not - * farther. For that we would need to refine the symbolic memory model/evaluation - * which might be related to memory aliasing in general?! - * For now however, we simply use a more restrictive ideal_schedule function which - * does not propose moving memory writes below side-exits. *) - let old_debug_flag = !debug_flag in - - let mode = match !Clflags.option_prepass_past_side_exits_sched with - | "move_stores" -> Move_stores - | "no_move_stores" -> Ignore_liveness - | _ -> failwith "Unsupported option for scheduling code past side exits" - in - let sb_with_liveins = {sb with liveins = update_liveins sb.liveins live_renames} in - match ideal_schedule'' sb_with_liveins code mode, ideal_schedule'' sb_with_liveins code Default with - | None, None -> InsertPositionMap.empty (* Early Exit*) - | None, Some _ | Some _, None -> failwith "downschedule_compensation_code: Scheduling procedure failed." - | Some (idealized_schedule, idealized_final_time) - , Some (default_schedule, default_final_time) -> - if idealized_final_time >= default_final_time then ( - if (!debug_flag && idealized_final_time > default_final_time) then ( - debug "Unexpectedly, idealized dependencies lead to a worse expected final time.\n"; - debug "idealized_final_time = %d, default_final_time = %d\n" idealized_final_time default_final_time; - debug "For superblock %d" (Camlcoq.P.to_int sb.instructions.(0)); - failwith "Unexpectedly bad final time for idealized schedule"; - ); - (* Early exit *) - InsertPositionMap.empty - ) else ( - let sb_length = Array.length sb.instructions in - let pc_to_idx = - Duplicateaux.generate_fwmap - (Array.to_list sb.instructions) - (List.init sb_length (fun i -> i)) - PTree.empty - in - let pc_to_idx' = - Duplicateaux.generate_fwmap - (Array.to_list idealized_schedule) - (List.init sb_length (fun i -> i)) - PTree.empty - in - - let side_exit_pcs = side_exit_pcs sb code in - - (* NB: for the purpose of this heuristic we consider the superblock to include the final - * restoration code. sb has changed. *) - let liveins' = update_liveins sb.liveins live_renames in - (* Use the new names to calculate proper dependencies *) - let path_deps = intra_path_dependencies {sb with liveins = liveins'} code in - let path_deps_without_iconds = PTree.map - (fun _pc deps -> - HashedSet.PSet.filter - (fun dep_pc -> not @@ is_icond @@ get_some @@ PTree.get dep_pc code) - deps) - path_deps - in - let transitive_path_deps_without_iconds = - PTree.map (fun pc _deps -> transitive_dependencies path_deps_without_iconds pc) - path_deps - in - let transitive_uses_without_iconds = uses_of_deps transitive_path_deps_without_iconds in - - (* For each side-exit, check if all the dependencies are still above it - * if not, remember the pc and transitively consider its dependencies until - * no further insts to be covered are discovered *) - let side_exit_and_compensation = - ListLabels.map side_exit_pcs - ~f:(fun side_exit_pc -> - let moved_deps = moved_dependencies path_deps pc_to_idx' side_exit_pc in - let moved_deps_sorted = - ListLabels.sort (HashedSet.PSet.elements moved_deps) - ~cmp:(fun pc1 pc2 -> Int.compare (apply_map' pc_to_idx pc1) (apply_map' pc_to_idx pc2)) - in - (* The sucessors are *incorrect* at this point *) - (side_exit_pc, moved_deps_sorted) ) - in - let (side_exit_pcs, insts_pcs) = List.split side_exit_and_compensation in - - let to_insert_pcs = - ListLabels.fold_left2 side_exit_pcs insts_pcs - ~init:InsertPositionMap.empty - ~f:(fun acc side_exit_pc pcs -> - InsertPositionMap.add (InsertPosition.Above side_exit_pc) pcs acc) in - let (to_insert_as_well : Camlcoq.P.t list InsertPositionMap.t)= - InsertPositionMap.fold - (fun side_exit_pc pcs_to_insert acc -> - let side_exit_pc = InsertPosition.anchor side_exit_pc in - let ( let* ) = Option.bind in - let collateral_moves = - pcs_to_insert - |> List.filter_map (fun pc -> - let* uses = PTree.get pc transitive_uses_without_iconds in - HashedSet.PSet.filter - (fun pc -> - not @@ List.mem pc pcs_to_insert - && apply_map' pc_to_idx pc < apply_map' pc_to_idx side_exit_pc - && (not @@ is_icond @@ get_some @@ PTree.get pc code)) - uses - |> Option.some) - |> ListLabels.fold_left ~f:HashedSet.PSet.union ~init:HashedSet.PSet.empty - in - let collateral_moves = ListLabels.sort (HashedSet.PSet.elements collateral_moves) - ~cmp:(fun pc1 pc2 -> Int.compare (apply_map' pc_to_idx pc1) (apply_map' pc_to_idx pc2)) - in - (* TODO? - * In principle we should be able to move the instructions Below, unless they are - * live (liveins') at the side exit. - * But adding them above is simpler and unnecessary instructions should be removed - * by DCE. *) - let acc = InsertPositionMap.add (InsertPosition.Above side_exit_pc) collateral_moves acc in - acc) - to_insert_pcs - InsertPositionMap.empty - in - let to_insert_pcs = InsertPositionMap.merge merge_append to_insert_pcs to_insert_as_well in - let num_probably_duplicated = InsertPositionMap.fold (fun _pos pcs n -> - n + List.length pcs) - to_insert_pcs - 0 - in - let gain = default_final_time - idealized_final_time in - assert(gain > 0); - if gain * !Clflags.option_fliftif < num_probably_duplicated then ( - debug_flag := false; - debug "Expected number of cycles gained, %d, not considered worth the code duplication, expected at %d instructions.\n" - gain num_probably_duplicated; - debug_flag := old_debug_flag; - InsertPositionMap.empty - ) else ( - debug_flag := old_debug_flag; - to_insert_pcs - )) - -let my_merge_no_overwrite m1 m2 = - PTree.combine (fun x y -> match (x, y) with - | None, None -> None - | Some x, None - | None, Some x -> Some x - | Some x, Some y -> - if x = y then Some x - else failwith "Merge conflict." - ) m1 m2 - -let print_schedule schedule = - debug "Schedule\n"; - Array.iter (fun pos -> debug "%d\n" (Camlcoq.P.to_int pos)) schedule; - debug "\n"; - flush_all (); -;; - -(* Walk through sb and find those register which possibly take on different values - * i.e. which are written to twice. *) -let find_mutated_registers (sb : superblock) code input_regs : Regset.t = - let (defined, defined_multiple) = - ArrayLabels.fold_left sb.instructions - ~init:(input_regs, Regset.empty) - ~f:(fun (defined, defined_multiple) pc -> - let inst = get_some @@ PTree.get pc code in - match RTL.instr_defs inst with - | None -> (defined, defined_multiple) - | Some(r) -> - if Regset.mem r defined then - let defined_multiple = Regset.add r defined_multiple in - (defined, defined_multiple) - else - let defined = Regset.add r defined in - (defined, defined_multiple) - ) - in - defined_multiple - -(* Map each register in regs to the index of its first definition in the superblock - * Returns: mapping from a register to the index of the definition that first defines it - * in the superblock *) -let find_first_definition (sb : superblock) code (regs : Regset.t) : int PTree.t = - let (regs, first_defs, _index) = - ArrayLabels.fold_left sb.instructions - ~init:(regs, PTree.empty, 0) - ~f:(fun (regs, first_defs, index) pc -> - let inst = get_some @@ PTree.get pc code in - match RTL.instr_defs inst with - | None -> (regs, first_defs, index + 1) - | Some(r) -> - if Regset.mem r regs then - let regs = Regset.remove r regs in - let first_defs = PTree.set r index first_defs in - (regs, first_defs, index + 1) - else - (regs, first_defs, index + 1) ) - in - assert (Regset.empty = regs); - first_defs - -let print_int_ptree pt = - let module P = Camlcoq.P in - if not !debug_flag then () else - debug "Mappings, P.t -> Int.t:\n"; - List.iter (fun (p, i) -> debug "%d |-> %d\n" (P.to_int p) i) (PTree.elements pt) - -(* [def_index], the index in the superblock of the instruction defining [r] is set to -1 - * if the register is first defined outside of the path *) -let is_read_after_definition (sb : superblock) code (r : reg) (def_index) : bool = - let start = if def_index < 0 then 0 else def_index + 1 in - let stop = Array.length sb.instructions in - let rec aux n = - if n > stop then false else - - let pc = sb.instructions.(n) in - let inst = get_some @@ PTree.get pc code in - if List.mem r @@ RTL.instr_uses inst then - true - else - aux (n + 1) - in - aux start - -let registers_to_alias (sb : superblock) code pm = - let first_pc = sb.instructions.(0) in - let pi = get_some @@ PTree.get first_pc pm in - let sb_length = Array.length sb.instructions in - let mutated_registers = find_mutated_registers sb code pi.input_regs in - (* Registers are of interest if they would need to be restored after renaming *) - let registers_of_interest = Regset.inter sb.s_output_regs mutated_registers in - let first_defs_in_sb = find_first_definition sb code registers_of_interest in - (* If the register is first defined by the last instruction of the path, it does not - * need to be aliased, since it cannot possibly be used afterwards in the superblock. - * Furthermore, the renaming pass won't rename this register, in case the definition - * happens by a path-ending instruction in which case it would be impossible to place - * restoration code afterwards *) - let registers_to_alias = - registers_of_interest - |> Regset.filter (fun r -> if apply_map' first_defs_in_sb r = sb_length - 1 then false else true) - in - registers_to_alias - -let add_aliasing_code sb code pm : RTL.instruction list InsertPositionMap.t = - let first_pc = sb.instructions.(0) in - let pi = get_some @@ PTree.get first_pc pm in - let to_alias = registers_to_alias sb code pm in - let (initial, other) = Regset.partition (fun r -> Regset.mem r pi.input_regs) to_alias in - let to_insert = - (* If registers that are already live at the beginning of the superblock need to be - * aliased, they need to be inserted before (above) the first instruction of the - * superblock. *) - InsertPositionMap.singleton - (InsertPosition.Above sb.instructions.(0)) - (Regset.elements initial |> List.map (fun r -> Iop (Op.Omove, [r], r, Camlcoq.P.one))) - in - - let first_def_other = find_first_definition sb code other in - let to_insert = PTree.fold - (fun to_insert r idx -> - let pos = (InsertPosition.Below sb.instructions.(idx)) in - let old = - match InsertPositionMap.find_opt pos to_insert with - | None -> [] - | Some v -> v - in - let upd = (Iop (Op.Omove, [r], r, Camlcoq.P.one)) :: old in - InsertPositionMap.add pos upd to_insert) - first_def_other - to_insert - in - to_insert - -type icond_frame = - { inop_idx : int - ; icond_idx : int } - -let find_icond_frames (sb : superblock) code = - let (_last_inop_idx, frames, _i) = - ArrayLabels.fold_left sb.instructions - ~init:(None, [], 0) - ~f:(fun (last_inop_idx, frames, i) pc -> - let inst = get_some @@ PTree.get pc code in - match inst with - | Inop _ -> (Some i, frames, i + 1) - | Icond _ -> - ( match last_inop_idx with - | None -> (None, frames, i + 1) - | Some inop_idx -> (None, {inop_idx; icond_idx = i} :: frames, i + 1) ) - | _ -> (last_inop_idx, frames, i + 1) ) - in - frames - -let stage_duplication sb code staged_dupcode staged_revmap ~next_free_pc = - let module D = Duplicateaux in - (* let module P = Camlcoq.P in *) - let icond_frames = find_icond_frames sb code in - let (code, staged_dupcode, staged_revmap, next_free_pc) = - ListLabels.fold_left icond_frames - ~init:(code, staged_dupcode, staged_revmap, next_free_pc) - ~f:(fun (code, staged_dupcode, staged_revmap, next_free_pc) {inop_idx; icond_idx} -> - if (* icond_idx = Array.length sb.instructions - 1 (* Do not lift code before end of path *) - || *) icond_idx = inop_idx + 1 then (* Sentinel value that no code needs to be duplicated *) - (* do nothing *) - (code, staged_dupcode, staged_revmap, next_free_pc) - else - let pcs_to_copy = Array.sub sb.instructions (inop_idx + 1) (icond_idx - inop_idx) in - let (staged_dupcode', staged_revmap', dupcode, fwmap, next_free_pc) = D.clone_only_new code next_free_pc (Array.to_list pcs_to_copy) in - let staged_dupcode = my_merge_no_overwrite staged_dupcode staged_dupcode' in - let staged_revmap = my_merge_no_overwrite staged_revmap staged_revmap' in - let parental_icond = get_some @@ PTree.get sb.instructions.(icond_idx) code in - let (useless_icond, staged_icond) = - match parental_icond with - | Icond(cond, args, ifso, ifnot, info) -> - let staged_icond = Icond(cond, args, Camlcoq.P.of_int @@ List.hd dupcode, pcs_to_copy.(0), info) in - let useless_icond = Icond(cond, args, pcs_to_copy.(0), pcs_to_copy.(0), info) in - (useless_icond, staged_icond) - | _ -> failwith "Instruction was expected to be Icond, but is not" - in - let staged_dupcode = PTree.set sb.instructions.(inop_idx) staged_icond staged_dupcode in - - let code = PTree.set sb.instructions.(inop_idx) useless_icond code in - - (code, staged_dupcode, staged_revmap, next_free_pc) ) - in - (code, staged_dupcode, staged_revmap, next_free_pc) - -(* TODO? better name *) -let apply_aliases sb code name_map ~offset = - let code = ref code in - let name_map = ref name_map in - let length = Array.length sb.instructions in - - (* TODO: preferably there was an early exit condition when there is nothing left to do *) - for i = offset to length - 1 do - let pc = sb.instructions.(i) in - let inst = get_some @@ PTree.get pc !code in - let inst' = change_arg_regs inst !name_map in - code := PTree.set pc inst' !code; - name_map := - (match RTL.instr_defs inst with - | None -> !name_map - | Some r -> - if Option.is_some @@ PTree.get r !name_map then ( - (* The restoration code, is no longer incorrectly applicable *) - if not !Clflags.option_fpoormansssa then - PTree.remove r !name_map - else - !name_map - ) else - !name_map) - ; - done; - !code - -let scheduler f = - (* let module D = Duplicateaux in - let module P = Camlcoq.P in - let module N = Camlcoq.Nat in *) - (* TODO: - control for amount of code duplication *) - let open! Duplicateaux in - let f_rtl = f.fn_RTL in - let code = f_rtl.fn_code in - let _orig_code = code in - let id_ptree = PTree.map (fun n i -> n) (f.fn_path) in - let entry = f.fn_RTL.fn_entrypoint in - let pm = f.fn_path in - let do_nothing = ((((code, entry), pm), id_ptree), None) in - (* TODO: Add flag to select between "conserative" downard scheduling and the one that - * allows moving memory stores below the next side exit (singular). *) - if not !Clflags.option_fpoormansssa && !Clflags.option_fliftif < 1 then do_nothing - else (* NB: Early exit above *) - let _orig_pm = pm in - let typing = get_ok @@ RTLtyping.type_function f.fn_RTL in - let is_loop_header = Duplicateaux.get_loop_headers code (f_rtl.fn_entrypoint) in - let inner_loops = Duplicateaux.get_inner_loops f_rtl code is_loop_header in - (* inner loop map: map loop headers to inner loops *) - let ilmap = generate_fwmap (List.map (fun il -> il.head) inner_loops) inner_loops PTree.empty in - let superblocks = get_superblocks code entry pm typing in - - (* Get only those superblocks that span an inner loop *) - let superblocks = - if not !Clflags.option_ftargetinnerloops then - superblocks - else - List.filter (fun sb -> - (* sanity check; a superblock contains at least one instruction *) - assert (Array.length sb.instructions >= 1); - (* Check if first instruction of a superblock is the beginning of an inner loop*) - let first_pc = sb.instructions.(0) in - match PTree.get first_pc ilmap with - | None -> false - | Some(il) -> (* List.length il.body = Array.length sb.instructions *) - (match il.sb_final with - (* don't bother if the loop is not predicted to loop *) - | None -> false - | Some pc -> - pc == first_pc (* loop is spanned by the superblock *) - (* Make sure the loop does not exceed the superblock *) - && List.length il.body = Array.length sb.instructions - ) - ) superblocks - in - - let next_free_pc = next_free_pc code |> Camlcoq.P.of_int in - let next_free_reg = max_reg_function f.fn_RTL |> Camlcoq.P.succ in - - (* Apply aliasing code *) - let old_debug_flag = !debug_flag in - debug_flag := false; - debug "Initial code.\n"; - print_code code; - print_path_map pm; - print_superblocks superblocks code; - debug "\n"; - flush_all (); - - (* TODO: Is this extra aliasing logic really useless? *) - (* let (superblocks, code, pm, next_free_pc) = - ListLabels.fold_left superblocks - ~init:([], code, pm, next_free_pc) - ~f:(fun (superblocks, code, pm, next_free_pc) sb -> - let to_insert = add_aliasing_code sb code pm in - let (sb', code', pm', next_free_pc') = insert_code sb code pm to_insert ~next_free_pc in - (sb'::superblocks, code', pm', next_free_pc') ) - in - debug "After adding aliasing code.\n"; - print_code code; - print_path_map pm; - print_superblocks superblocks code; - debug "\n"; - flush_all (); *) - - let (superblocks, code, pm, next_free_pc) = - ListLabels.fold_left superblocks - ~init:([], code, pm, next_free_pc) - ~f:(fun (superblocks, code, pm, next_free_pc) sb -> - let to_insert = prepend_nops_before_iconds sb code in - let (sb', code', pm', next_free_pc', _) = insert_code sb code pm to_insert ~next_free_pc in - (sb'::superblocks, code', pm', next_free_pc') ) - in - debug "After adding nops before Iconds.\n"; - print_code code; - print_path_map pm; - print_superblocks superblocks code; - debug "\n"; - flush_all (); - - let (code, sb_renamings, next_free_reg) = - if not !Clflags.option_fpoormansssa then - (code, List.map (fun sb -> (sb, PTree.empty)) superblocks, next_free_reg) - else ( - debug "pmSSA path\n"; flush_all (); - ListLabels.fold_left superblocks - ~init:(code, [], next_free_reg) - ~f:(fun (code, sb_renamings, next_free_reg) sb -> - let first_pc = sb.instructions.(0) in - let pi = get_some @@ PTree.get first_pc pm in - let (code, live_renames, next_free_reg) = local_single_assignment sb code pi.input_regs ~next_free_reg in - (code, (sb, live_renames)::sb_renamings, next_free_reg) - ) ) - in - - debug "After renaming :\n"; - print_code code; - print_path_map pm; - print_superblocks (fst @@ List.split sb_renamings) code; - debug "\n"; - flush_all (); - - - let (sb_renamings, code, pm, next_free_pc) = ListLabels.fold_left sb_renamings - ~init:([], code, pm, next_free_pc) - ~f:(fun (sbs, code, pm, next_free_pc) (sb, live_renames) -> - let final_restoration = final_restoration_code sb code live_renames in - let (sb, code, pm, next_free_pc, fwmap) = insert_code sb code pm final_restoration ~next_free_pc in - let live_renames = - PTree.fold - (fun acc pc insts -> - let pc' = apply_map' fwmap pc in - (* Remove final renames, which were just inserted *) - let insts = if Camlcoq.P.eq pc sb.instructions.(Array.length sb.instructions - 1) then [] else insts in - PTree.set pc' insts acc) - live_renames - PTree.empty - in - ((sb, live_renames)::sbs, code, pm, next_free_pc) - ) - in - - debug "After inserting the final restoration code:\n"; - (* print_code code; *) - (* print_path_map pm; *) - print_superblocks (fst @@ List.split sb_renamings) code; - debug "\n"; - flush_all (); - - let sb_tocompensatepcs_liverenames = - ListLabels.map sb_renamings - ~f:(fun (sb, live_renames) -> - let (to_insert_compensation_pcs, live_renames) = - if !Clflags.option_prepass_past_side_exits then - (downschedule_compensation_code sb code pm live_renames ~next_free_pc ~next_free_reg - , live_renames ) - else - (InsertPositionMap.empty, live_renames) - in - (sb, to_insert_compensation_pcs, live_renames ) ) - in - - let (sb_tocompensate_liverenames, code, next_free_reg) = ListLabels.fold_left sb_tocompensatepcs_liverenames - ~init:([], code, next_free_reg) - ~f:(fun (sbs, code, next_free_reg) (sb, to_compensate_pcs, live_renames) -> - if !Clflags.option_fpoormansssa then ( - let to_compensate = InsertPositionMap.map (fun pcs -> - let insts = List.map (fun pc -> get_some @@ PTree.get pc code) pcs in - insts) - to_compensate_pcs - in - let code = InsertPositionMap.fold - (fun _pos pcs code-> - List.fold_left (fun code pc -> PTree.set pc (Inop Camlcoq.P.one) code) - code - pcs) - to_compensate_pcs - code - in - ((sb, to_compensate, live_renames)::sbs, code, next_free_reg) - ) else ( - assert (PTree.elements live_renames |> List.for_all (fun (_, l) -> l = [])); - let dup_count = InsertPositionMap.fold - (fun _pos (pcs : Camlcoq.P.t list) acc -> - let acc = ListLabels.fold_left pcs - ~init:acc - ~f:(fun acc pc -> - let old = ptree_get_or_default acc pc 0 in - PTree.set pc (old + 1) acc) - in - acc ) - to_compensate_pcs - PTree.empty - in - let pcs_dupd_twice_or_more = - PTree.filter1 (fun n -> n > 1) dup_count - |> PTree.elements - |> List.map fst - in - let arg_regs = ListLabels.fold_left pcs_dupd_twice_or_more - ~init:(Regset.empty) - ~f:(fun acc pc -> - let inst = get_some @@ PTree.get pc code in - RTL.instr_uses inst - |> List.fold_left - (fun acc reg -> Regset.add reg acc) - acc - ) - in - let pi = get_some @@ PTree.get sb.instructions.(0) pm in - let (code, live_renames, next_free_reg) = rename_regs ~only_rename:arg_regs sb code ~liveatentry:pi.input_regs ~next_free_reg in - let to_compensate = InsertPositionMap.map (fun pcs -> - let insts = List.map (fun pc -> get_some @@ PTree.get pc code) pcs in - insts) - to_compensate_pcs - in - let code = InsertPositionMap.fold - (fun _pos pcs code-> - List.fold_left (fun code pc -> PTree.set pc (Inop Camlcoq.P.one) code) - code - pcs) - to_compensate_pcs - code - in - ((sb, to_compensate, live_renames)::sbs, code, next_free_reg) )) - in - - (* Insert the compensation code (downward scheduling) and update the restoration code - * information to reflect the new pcs. *) - let (superblocks_liverenames, code, pm, next_free_pc) = ListLabels.fold_left sb_tocompensate_liverenames - ~init:([], code, pm, next_free_pc) - ~f:(fun (sbs, code, pm, next_free_pc) (sb, to_insert_compensation, live_renames) -> - let (sb, code, pm, next_free_pc, fwmap) = insert_code sb code pm to_insert_compensation ~next_free_pc in - let live_renames = - PTree.fold - (fun acc pc insts -> - let pc' = apply_map' fwmap pc in - PTree.set pc' insts acc) - live_renames - PTree.empty - in - ((sb, live_renames)::sbs, code, pm, next_free_pc) ) - in - - debug "After inserting the compensation code:\n"; - (* print_code code; *) - (* print_path_map pm; *) - print_superblocks (fst @@ List.split superblocks_liverenames) code; - debug "\n"; flush_all (); - - (* Insert the restoration code *) - let (superblocks, code, pm, next_free_pc, next_free_reg) = ListLabels.fold_left superblocks_liverenames - ~init:([], code, pm, next_free_pc, next_free_reg) - ~f:(fun (sbs, code, pm, next_free_pc, next_free_reg) (sb, live_renames) -> - let (sb, code, pm, next_free_pc, live_renames) = - if !Clflags.option_fpoormansssa then - (* Final restoration code was already inserted. *) - (sb, code, pm, next_free_pc, live_renames) - else - (* Ther combination of code motion below side exits WITHOUT register renaming may - * cause some restoration code to be necessary. Otherwise it is not safe to - * duplicate instructions. - * The final restoration code is special since it may insert below. *) - let final_restoration = final_restoration_code sb code live_renames in - let (sb, code, pm, next_free_pc, fwmap) = insert_code sb code pm final_restoration ~next_free_pc in - let live_renames = - PTree.fold - (fun acc pc insts -> - let pc' = apply_map' fwmap pc in - (* Remove final renames, which were just inserted *) - let insts = if Camlcoq.P.eq pc sb.instructions.(Array.length sb.instructions - 1) then [] else insts in - PTree.set pc' insts acc) - live_renames - PTree.empty - in - (sb, code, pm, next_free_pc, live_renames) - in - let pc_to_idx = Duplicateaux.generate_fwmap - (Array.to_list sb.instructions) - (List.init (Array.length sb.instructions) (fun i -> i)) - PTree.empty - in - let (to_insert_restoration, to_rename, next_free_reg) = restoration_instructions' sb code live_renames ~next_free_reg in - let side_exit_pcs = side_exit_pcs sb code in - let code = ListLabels.fold_left side_exit_pcs - ~init:code - ~f:(fun code side_exit_pc -> - match PTree.get side_exit_pc to_rename with - | None -> code - | Some aliases -> - let idx = apply_map' pc_to_idx side_exit_pc in - let code = apply_aliases sb code aliases ~offset:idx in - code) - in - let (sb, code, pm, next_free_pc, _fwmap) = insert_code sb code pm to_insert_restoration ~next_free_pc in - (sb::sbs, code, pm, next_free_pc, next_free_reg) ) - in - - debug "After inserting the restoration code:\n"; - (* print_code code; *) - (* print_path_map pm; *) - print_superblocks superblocks code; - debug "\n"; flush_all (); - - (* let (superblocks, code, pm, next_free_pc, next_free_reg) = - if not !Clflags.option_fpoormansssa then ( - (* In principle we need to do something like this because if we do not systematically - * rename registers, the downward scheduling might copy a series of instructions like - * this: i = i + 1, which when copied twice (to move below two side exits) is - * incorrect unless we rename the register i (or at least its second redefinition). - * However, at least the benchmarks do not seem to trigger this special case. - ListLabels.fold_left superblocks_torename - ~init:([], code, pm, next_free_pc, next_free_reg) - ~f:(fun (sbs, code, pm, next_free_pc, next_free_reg) (sb, to_rename) -> - let pi = get_some @@ PTree.get sb.instructions.(0) pm in - let liveatentry = pi.input_regs in - let (code, live_renames, next_free_reg) = rename_regs ~liveatentry ~only_rename:to_rename sb code ~next_free_reg in - let restoration_insts = restoration_instructions live_renames in - let (sb, code, pm, next_free_pc) = insert_code sb code pm restoration_insts ~next_free_pc in - (sb::sbs, code, pm, next_free_pc, next_free_reg) ) *) - (fst @@ List.split superblocks_torestore, code, pm, next_free_pc, next_free_reg) - ) else - (fst @@ List.split superblocks_torestore, code, pm, next_free_pc, next_free_reg) - in *) - - let (code, to_lift) = - if !Clflags.option_fliftif > 0 then - (* TODO: Use this flag to control for the amount of duplicated code. - * However, this is probably best controlled at the "downscheduling" level since only - * those instructions need to be actually duplicated, i.e. restoration code writing - * back the current value to renamed registers is not actually duplicated. *) - let (code, staged_dupcode, staged_revmap, next_free_pc) = ListLabels.fold_left superblocks - ~init:(code, PTree.empty, PTree.empty, (Camlcoq.P.to_int next_free_pc)) - ~f:(fun (code, staged_dupcode, staged_revmap, next_free_pc) sb -> - stage_duplication sb code staged_dupcode staged_revmap ~next_free_pc ) - in - (code, Some(staged_revmap, staged_dupcode)) - else - (code, None) - in - - debug "After staging the duplication (\"if-lifting\") :\n"; - print_code code; - print_path_map pm; - print_superblocks superblocks code; - debug "\n"; - flush_all (); - - debug_flag := old_debug_flag; - - ((((code, entry), pm), id_ptree), to_lift) |