diff options
Diffstat (limited to 'aarch64/PostpassSchedulingOracle.ml')
-rw-r--r-- | aarch64/PostpassSchedulingOracle.ml | 674 |
1 files changed, 674 insertions, 0 deletions
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml new file mode 100644 index 00000000..6f784238 --- /dev/null +++ b/aarch64/PostpassSchedulingOracle.ml @@ -0,0 +1,674 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Sylvain Boulmé Grenoble-INP, VERIMAG *) +(* David Monniaux CNRS, VERIMAG *) +(* Cyril Six Kalray *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open Asmblock +open OpWeightsAsm +open InstructionScheduler + +let debug = false + +let stats = false + +(** + * Extracting infos from Asm instructions + *) + +type location = Reg of Asm.preg | Mem | IREG0_XZR + +type ab_inst_rec = { + inst : instruction; + write_locs : location list; + read_locs : location list; + is_control : bool; +} + +(** Asm constructor to real instructions *) + +exception OpaqueInstruction + +let is_XZR = function IREG0_XZR -> true | _ -> false + +let reg_of_pc = Reg Asm.PC + +let reg_of_dreg r = Reg (Asm.DR r) + +let reg_of_ireg r = Reg (Asm.DR (Asm.IR (Asm.RR1 r))) + +let reg_of_iregsp r = Reg (Asm.DR (Asm.IR r)) + +let reg_of_ireg0 r = + match r with Asm.RR0 ir -> reg_of_ireg ir | Asm.XZR -> IREG0_XZR + +let reg_of_freg r = Reg (Asm.DR (Asm.FR r)) + +let reg_of_cr r = Reg (Asm.CR r) + +let regXSP = Reg (Asm.DR (Asm.IR Asm.XSP)) + +let flags_wlocs = + [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CC; reg_of_cr Asm.CV ] + +let get_arith_p_wlocs = function + | Pfmovimms _ -> [ reg_of_ireg Asm.X16 ] + | Pfmovimmd _ -> [ reg_of_ireg Asm.X16 ] + | _ -> [] + +let arith_p_rec i i' rd = + { + inst = i; + write_locs = [ rd ] @ get_arith_p_wlocs i'; + read_locs = []; + is_control = false; + } + +let arith_pp_rec i rd rs = + { inst = i; write_locs = [ rd ]; read_locs = [ rs ]; is_control = false } + +let arith_ppp_rec i rd r1 r2 = + { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false } + +let arith_rr0r_rec i rd r1 r2 = + let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } + +let arith_rr0_rec i rd r1 = + let rlocs = if is_XZR r1 then [] else [ r1 ] in + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } + +let arith_arrrr0_rec i rd r1 r2 r3 = + let rlocs = if is_XZR r3 then [ r1; r2 ] else [ r1; r2; r3 ] in + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } + +let arith_comparison_pp_rec i r1 r2 = + { + inst = i; + write_locs = flags_wlocs; + read_locs = [ r1; r2 ]; + is_control = false; + } + +let arith_comparison_r0r_rec i r1 r2 = + let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in + { inst = i; write_locs = flags_wlocs; read_locs = rlocs; is_control = false } + +let arith_comparison_p_rec i r1 = + { inst = i; write_locs = flags_wlocs; read_locs = [ r1 ]; is_control = false } + +let get_eval_addressing_rlocs a = + match a with + | Asm.ADimm (base, _) -> [ reg_of_iregsp base ] + | Asm.ADreg (base, r) -> [ reg_of_iregsp base; reg_of_ireg r ] + | Asm.ADlsl (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ] + | Asm.ADsxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ] + | Asm.ADuxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ] + | Asm.ADadr (base, _, _) -> [ reg_of_iregsp base ] + | Asm.ADpostincr (base, _) -> [ reg_of_iregsp base ] + +let load_rd_a_rec ld rd a = + { + inst = ld; + write_locs = [ rd ]; + read_locs = [ Mem ] @ get_eval_addressing_rlocs a; + is_control = false; + } + +let load_rd1_rd2_a_rec ld rd1 rd2 a = + { + inst = ld; + write_locs = [ rd1; rd2 ]; + read_locs = [ Mem ] @ get_eval_addressing_rlocs a; + is_control = false; + } + +let load_rec ldi = + match ldi with + | PLd_rd_a (ld, rd, a) -> + load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a + | Pldp (ld, rd1, rd2, _, _, a) -> + load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd1) + (reg_of_dreg rd2) a + +let store_rs_a_rec st rs a = + { + inst = st; + write_locs = [ Mem ]; + read_locs = [ rs; Mem ] @ get_eval_addressing_rlocs a; + is_control = false; + } + +let store_rs1_rs2_a_rec st rs1 rs2 a = + { + inst = st; + write_locs = [ Mem ]; + read_locs = [ rs1; rs2; Mem ] @ get_eval_addressing_rlocs a; + is_control = false; + } + +let store_rec sti = + match sti with + | PSt_rs_a (st, rs, a) -> + store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a + | Pstp (st, rs1, rs2, _, _, a) -> + store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_dreg rs1) + (reg_of_dreg rs2) a + +let loadsymbol_rec i rd id = + { inst = i; write_locs = [ rd ]; read_locs = [ Mem ]; is_control = false } + +let cvtsw2x_rec i rd r1 = + { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false } + +let cvtuw2x_rec i rd r1 = + { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false } + +let cvtx2w_rec i rd = + { inst = i; write_locs = [ rd ]; read_locs = [ rd ]; is_control = false } + +let get_testcond_rlocs c = + match c with + | Asm.TCeq -> [ reg_of_cr Asm.CZ ] + | Asm.TCne -> [ reg_of_cr Asm.CZ ] + | Asm.TChs -> [ reg_of_cr Asm.CC ] + | Asm.TClo -> [ reg_of_cr Asm.CC ] + | Asm.TCmi -> [ reg_of_cr Asm.CN ] + | Asm.TCpl -> [ reg_of_cr Asm.CN ] + | Asm.TChi -> [ reg_of_cr Asm.CZ; reg_of_cr Asm.CC ] + | Asm.TCls -> [ reg_of_cr Asm.CZ; reg_of_cr Asm.CC ] + | Asm.TCge -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CV ] + | Asm.TClt -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CV ] + | Asm.TCgt -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CV ] + | Asm.TCle -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CV ] + +let cset_rec i rd c = + { + inst = i; + write_locs = [ rd ]; + read_locs = get_testcond_rlocs c; + is_control = false; + } + +let csel_rec i rd r1 r2 c = + { + inst = i; + write_locs = [ rd ]; + read_locs = [ r1; r2 ] @ get_testcond_rlocs c; + is_control = false; + } + +let fmovi_rec i fsz rd r1 = + let rlocs = if is_XZR r1 then [] else [ r1 ] in + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } + +let fnmul_rec i fsz rd r1 r2 = + { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false } + +let allocframe_rec i sz linkofs = + { + inst = i; + write_locs = [ Mem; regXSP; reg_of_ireg Asm.X16; reg_of_ireg Asm.X29 ]; + read_locs = [ regXSP; Mem ]; + is_control = false; + } + +let freeframe_rec i sz linkofs = + { + inst = i; + write_locs = [ Mem; regXSP; reg_of_ireg Asm.X16 ]; + read_locs = [ regXSP; Mem ]; + is_control = false; + } + +let nop_rec i = + { inst = i; write_locs = []; read_locs = []; is_control = false } + +let arith_rec i = + match i with + | PArithP (i', rd) -> arith_p_rec (PBasic (PArith i)) i' (reg_of_dreg rd) + | PArithPP (i', rd, rs) -> + arith_pp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg rs) + | PArithPPP (i', rd, r1, r2) -> + arith_ppp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) + (reg_of_dreg r2) + | PArithRR0R (i', rd, r1, r2) -> + arith_rr0r_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1) + (reg_of_ireg r2) + | PArithRR0 (i', rd, r1) -> + arith_rr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1) + | PArithARRRR0 (i', rd, r1, r2, r3) -> + arith_arrrr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg r1) + (reg_of_ireg r2) (reg_of_ireg0 r3) + | PArithComparisonPP (i', r1, r2) -> + arith_comparison_pp_rec (PBasic (PArith i)) (reg_of_dreg r1) + (reg_of_dreg r2) + | PArithComparisonR0R (i', r1, r2) -> + arith_comparison_r0r_rec (PBasic (PArith i)) (reg_of_ireg0 r1) + (reg_of_ireg r2) + | PArithComparisonP (i', r1) -> + arith_comparison_p_rec (PBasic (PArith i)) (reg_of_dreg r1) + | Pcset (rd, c) -> cset_rec (PBasic (PArith i)) (reg_of_ireg rd) c + | Pfmovi (fsz, rd, r1) -> + fmovi_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_ireg0 r1) + | Pcsel (rd, r1, r2, c) -> + csel_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) + (reg_of_dreg r2) c + | Pfnmul (fsz, rd, r1, r2) -> + fnmul_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_freg r1) + (reg_of_freg r2) + +let basic_rec i = + match i with + | PArith i' -> arith_rec i' + | PLoad ld -> load_rec ld + | PStore st -> store_rec st + | Pallocframe (sz, linkofs) -> allocframe_rec (PBasic i) sz linkofs + | Pfreeframe (sz, linkofs) -> freeframe_rec (PBasic i) sz linkofs + | Ploadsymbol (rd, id) -> loadsymbol_rec (PBasic i) (reg_of_ireg rd) id + | Pcvtsw2x (rd, r1) -> + cvtsw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) + | Pcvtuw2x (rd, r1) -> + cvtuw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) + | Pcvtx2w rd -> cvtx2w_rec (PBasic i) (reg_of_ireg rd) + | Pnop -> nop_rec (PBasic i) + +let builtin_rec i ef args res = + { inst = i; write_locs = [ Mem ]; read_locs = [ Mem ]; is_control = true } + +let ctl_flow_rec i = + match i with + | Pb lbl -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_pc ]; + is_control = true; + } + | Pbc (c, lbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_pc ]; + is_control = true; + } + | Pbl (id, sg) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ]; + read_locs = [ reg_of_pc ]; + is_control = true; + } + | Pbs (id, sg) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = []; + is_control = true; + } + | Pblr (r, sg) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ]; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Pbr (r, sg) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Pret r -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Pcbnz (sz, r, lbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r; reg_of_pc ]; + is_control = true; + } + | Pcbz (sz, r, lbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r; reg_of_pc ]; + is_control = true; + } + | Ptbnz (sz, r, n, lbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r; reg_of_pc ]; + is_control = true; + } + | Ptbz (sz, r, n, lbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_pc ]; + read_locs = [ reg_of_ireg r; reg_of_pc ]; + is_control = true; + } + | Pbtbl (r1, tbl) -> + { + inst = PControl (PCtlFlow i); + write_locs = [ reg_of_ireg Asm.X16; reg_of_ireg Asm.X17; reg_of_pc ]; + read_locs = [ reg_of_ireg r1; reg_of_pc ]; + is_control = true; + } + +let control_rec i = + match i with + | Pbuiltin (ef, args, res) -> builtin_rec (PControl i) ef args res + | PCtlFlow i' -> ctl_flow_rec i' + +let rec basic_recs body = + match body with [] -> [] | bi :: body -> basic_rec bi :: basic_recs body + +let exit_rec exit = match exit with None -> [] | Some ex -> [ control_rec ex ] + +let instruction_recs bb = basic_recs bb.body @ exit_rec bb.exit + +(** + * Providing informations relative to the real instructions + *) + +type inst_info = { + write_locs : location list; + read_locs : location list; + is_control : bool; + (* resources consumed by the instruction *) + usage : int array; + latency : int; +} + +(** Abstraction providing all the necessary informations for solving the scheduling problem *) + +let rec_to_info (r : ab_inst_rec) : inst_info = + let opweights = OpWeightsAsm.get_opweights () in + let usage = opweights.resources_of_op r.inst 0 + and latency = opweights.latency_of_op r.inst 0 in + { + write_locs = r.write_locs; + read_locs = r.read_locs; + usage; + latency; + is_control = r.is_control; + } + +let instruction_infos bb = List.map rec_to_info (instruction_recs bb) + +let instruction_usages bb = + let usages = List.map (fun info -> info.usage) (instruction_infos bb) in + Array.of_list usages + +(** + * Latency constraints building + *) + +module LocHash = Hashtbl + +(* Hash table : location => list of instruction ids *) + +let rec intlist n = + if n < 0 then failwith "intlist: n < 0" + else if n = 0 then [] + else (n - 1) :: intlist (n - 1) + +let find_in_hash hashloc loc = + match LocHash.find_opt hashloc loc with Some idl -> idl | None -> [] + +(* Returns a list of instruction ids *) +let rec get_accesses hashloc (ll : location list) = + match ll with + | [] -> [] + | loc :: llocs -> find_in_hash hashloc loc @ get_accesses hashloc llocs + +let compute_latency (ifrom : inst_info) = ifrom.latency + +let latency_constraints bb = + let written = LocHash.create 70 + and read = LocHash.create 70 + and count = ref 0 + and constraints = ref [] + and instr_infos = instruction_infos bb in + let step (i : inst_info) = + let raw = get_accesses written i.read_locs + and waw = get_accesses written i.write_locs + and war = get_accesses read i.write_locs in + List.iter + (fun i -> + constraints := + { + instr_from = i; + instr_to = !count; + latency = compute_latency (List.nth instr_infos i); + } + :: !constraints) + raw; + List.iter + (fun i -> + constraints := + { + instr_from = i; + instr_to = !count; + latency = compute_latency (List.nth instr_infos i); + } + :: !constraints) + waw; + List.iter + (fun i -> + constraints := + { instr_from = i; instr_to = !count; latency = 0 } :: !constraints) + war; + if i.is_control then + List.iter + (fun n -> + constraints := + { instr_from = n; instr_to = !count; latency = 0 } :: !constraints) + (intlist !count); + (* Updating "read" and "written" hashmaps *) + List.iter + (fun loc -> + LocHash.replace written loc [ !count ]; + LocHash.replace read loc [] + (* Clearing all the entries of "read" hashmap when a register is written *)) + i.write_locs; + List.iter + (fun loc -> LocHash.replace read loc (!count :: find_in_hash read loc)) + i.read_locs; + count := !count + 1 + in + List.iter step instr_infos; + !constraints + +(** + * Using the InstructionScheduler + *) + +let opweights = OpWeightsAsm.get_opweights () + +let build_problem bb = + { + max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + live_regs_entry = Registers.Regset.empty; (* unused here *) + typing = (fun x -> AST.Tint); (* unused here *) + reference_counting = None; + instruction_usages = instruction_usages bb; + latency_constraints = latency_constraints bb; + } + +let get_from_indexes indexes l = List.map (List.nth l) indexes + +(*let is_basic = function PBasic _ -> true | _ -> false*) +let is_control = function PControl _ -> true | _ -> false + +let to_control = function + | PControl i -> i + | _ -> failwith "to_control: basic instruction found" + +let rec body_to_instrs bdy = + match bdy with [] -> [] | i :: l' -> PBasic i :: body_to_instrs l' + +let rec instrs_to_bdy instrs = + match instrs with + | [] -> [] + | PBasic i :: l' -> i :: instrs_to_bdy l' + | PControl _ :: l' -> failwith "instrs_to_bdy: control instruction found" + +let repack li hd = + let last = List.nth li (List.length li - 1) in + if is_control last then + let cut_li = + Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1) + in + { header = hd; body = instrs_to_bdy cut_li; exit = Some (to_control last) } + else { header = hd; body = instrs_to_bdy li; exit = None } + +module TimeHash = Hashtbl + +(*Hash table : time => list of instruction ids *) + +(* Flattening the minpack result *) +let hashtbl2flatarray h maxint = + let rec f i = + match TimeHash.find_opt h i with + | None -> if i > maxint then [] else f (i + 1) + | Some bund -> bund @ f (i + 1) + in + f 0 + +let find_max l = + let rec f = function + | [] -> None + | e :: l -> ( + match f l with + | None -> Some e + | Some m -> if e > m then Some e else Some m) + in + match f l with None -> raise Not_found | Some m -> m + +(* We still use the minpack algorithm even without bundles, but the result is then flattened *) +(*(* [0, 2, 3, 1, 1, 2, 4, 5] -> [[0], [3, 4], [1, 5], [2], [6], [7]] *)*) +let minpack_list (l : int list) = + let timehash = TimeHash.create (List.length l) in + let rec f i = function + | [] -> () + | t :: l -> + (match TimeHash.find_opt timehash t with + | None -> TimeHash.add timehash t [ i ] + | Some bund -> TimeHash.replace timehash t (bund @ [ i ])); + f (i + 1) l + in + f 0 l; + hashtbl2flatarray timehash (find_max l) + +let bb_to_instrs bb = + body_to_instrs bb.body + @ match bb.exit with None -> [] | Some e -> [ PControl e ] + +let build_solution bb sol = + (* Remove last element - the total *) + let tmp = Array.to_list @@ Array.sub sol 0 (Array.length sol - 1) in + let pack = minpack_list tmp and instrs = bb_to_instrs bb in + repack (get_from_indexes pack instrs) bb.header + +let print_schedule sched = + print_string "[ "; + Array.iter (fun x -> Printf.printf "%d; " x) sched; + print_endline "]" + +let do_schedule bb = + let problem = build_problem bb in + if debug then print_problem stdout problem; + let solution = scheduler_by_name !Clflags.option_fpostpass_sched problem in + match solution with + | None -> failwith "Could not find a valid schedule" + | Some sol -> + if debug then print_schedule sol; + build_solution bb sol + +(** + * Dumb schedule if the above doesn't work + *) + +(* Pack result *) +let pack_result (bb : bblock) = (bb.body, bb.exit) + +let smart_schedule bb = + let bb' = + try do_schedule bb with + | OpaqueInstruction -> + if debug then + Printf.eprintf "OpaqueInstruction raised, using identity scheduling\n"; + bb (* Identity in case of failure *) + | e -> + let msg = Printexc.to_string e and stack = Printexc.get_backtrace () in + Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg + stack; + failwith "Invalid schedule" + in + pack_result bb' + +let bblock_schedule bb = + let identity_mode = not !Clflags.option_fpostpass in + if debug && not identity_mode then ( + Printf.eprintf "###############################\n"; + Printf.eprintf "SCHEDULING\n"); + if stats then ( + let oc = + open_out_gen [ Open_append; Open_creat ] 0o666 "oracle_stats.csv" + in + Printf.fprintf oc "%d\n" (Camlcoq.Z.to_int (size bb)); + close_out oc); + if identity_mode then pack_result bb else smart_schedule bb + +(** Called schedule function from Coq *) + +(*let schedule_notime bb = let toto = bblock_schedule in toto*) +let schedule bb = + Timing.time_coq + [ + 'P'; + 'o'; + 's'; + 't'; + 'p'; + 'a'; + 's'; + 's'; + 'S'; + 'c'; + 'h'; + 'e'; + 'd'; + 'u'; + 'l'; + 'i'; + 'n'; + 'g'; + ' '; + 'o'; + 'r'; + 'a'; + 'c'; + 'l'; + 'e'; + ] + bblock_schedule bb |