(* *************************************************************) (* *) (* The Compcert verified compiler *) (* *) (* Sylvain Boulmé Grenoble-INP, VERIMAG *) (* David Monniaux CNRS, VERIMAG *) (* Cyril Six Kalray *) (* Léo Gourdin UGA, VERIMAG *) (* *) (* Copyright Kalray. Copyright VERIMAG. All rights reserved. *) (* This file is distributed under the terms of the INRIA *) (* Non-Commercial License Agreement. *) (* *) (* *************************************************************) open Asmblock open OpWeightsAsm open InstructionScheduler let debug = false let stats = false (** * Extracting infos from Asm instructions *) type location = Reg of Asm.preg | Mem | IREG0_XZR type ab_inst_rec = { inst : instruction; write_locs : location list; read_locs : location list; is_control : bool; } (** Asm constructor to real instructions *) exception OpaqueInstruction let is_XZR = function IREG0_XZR -> true | _ -> false let reg_of_pc = Reg Asm.PC let reg_of_dreg r = Reg (Asm.DR r) let reg_of_ireg r = Reg (Asm.DR (Asm.IR (Asm.RR1 r))) let reg_of_iregsp r = Reg (Asm.DR (Asm.IR r)) let reg_of_ireg0 r = match r with Asm.RR0 ir -> reg_of_ireg ir | Asm.XZR -> IREG0_XZR let reg_of_freg r = Reg (Asm.DR (Asm.FR r)) let reg_of_cr r = Reg (Asm.CR r) let regXSP = Reg (Asm.DR (Asm.IR Asm.XSP)) let flags_wlocs = [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CC; reg_of_cr Asm.CV ] let get_arith_p_wlocs = function | Pfmovimms _ -> [ reg_of_ireg Asm.X16 ] | Pfmovimmd _ -> [ reg_of_ireg Asm.X16 ] | _ -> [] let arith_p_rec i i' rd = { inst = i; write_locs = [ rd ] @ get_arith_p_wlocs i'; read_locs = []; is_control = false; } let arith_pp_rec i rd rs = { inst = i; write_locs = [ rd ]; read_locs = [ rs ]; is_control = false } let arith_ppp_rec i rd r1 r2 = { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false } let arith_rr0r_rec i rd r1 r2 = let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } let arith_rr0_rec i rd r1 = let rlocs = if is_XZR r1 then [] else [ r1 ] in { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } let arith_arrrr0_rec i rd r1 r2 r3 = let rlocs = if is_XZR r3 then [ r1; r2 ] else [ r1; r2; r3 ] in { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } let arith_comparison_pp_rec i r1 r2 = { inst = i; write_locs = flags_wlocs; read_locs = [ r1; r2 ]; is_control = false; } let arith_comparison_r0r_rec i r1 r2 = let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in { inst = i; write_locs = flags_wlocs; read_locs = rlocs; is_control = false } let arith_comparison_p_rec i r1 = { inst = i; write_locs = flags_wlocs; read_locs = [ r1 ]; is_control = false } let get_eval_addressing_rlocs a = match a with | Asm.ADimm (base, _) -> [ reg_of_iregsp base ] | Asm.ADreg (base, r) -> [ reg_of_iregsp base; reg_of_ireg r ] | Asm.ADlsl (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ] | Asm.ADsxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ] | Asm.ADuxt (base, r, _) -> [ reg_of_iregsp base; reg_of_ireg r ] | Asm.ADadr (base, _, _) -> [ reg_of_iregsp base ] | Asm.ADpostincr (base, _) -> [ reg_of_iregsp base ] let load_rd_a_rec ld rd a = { inst = ld; write_locs = [ rd ]; read_locs = [ Mem ] @ get_eval_addressing_rlocs a; is_control = false; } let load_rd1_rd2_a_rec ld rd1 rd2 a = { inst = ld; write_locs = [ rd1; rd2 ]; read_locs = [ Mem ] @ get_eval_addressing_rlocs a; is_control = false; } let load_rec ldi = match ldi with | PLd_rd_a (ld, rd, a) -> load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a | Pldp (ld, rd1, rd2, _, _, a) -> load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd1) (reg_of_dreg rd2) a let store_rs_a_rec st rs a = { inst = st; write_locs = [ Mem ]; read_locs = [ rs; Mem ] @ get_eval_addressing_rlocs a; is_control = false; } let store_rs1_rs2_a_rec st rs1 rs2 a = { inst = st; write_locs = [ Mem ]; read_locs = [ rs1; rs2; Mem ] @ get_eval_addressing_rlocs a; is_control = false; } let store_rec sti = match sti with | PSt_rs_a (st, rs, a) -> store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a | Pstp (st, rs1, rs2, _, _, a) -> store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_dreg rs1) (reg_of_dreg rs2) a let loadsymbol_rec i rd id = { inst = i; write_locs = [ rd ]; read_locs = [ Mem ]; is_control = false } let cvtsw2x_rec i rd r1 = { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false } let cvtuw2x_rec i rd r1 = { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false } let cvtx2w_rec i rd = { inst = i; write_locs = [ rd ]; read_locs = [ rd ]; is_control = false } let get_testcond_rlocs c = match c with | Asm.TCeq -> [ reg_of_cr Asm.CZ ] | Asm.TCne -> [ reg_of_cr Asm.CZ ] | Asm.TChs -> [ reg_of_cr Asm.CC ] | Asm.TClo -> [ reg_of_cr Asm.CC ] | Asm.TCmi -> [ reg_of_cr Asm.CN ] | Asm.TCpl -> [ reg_of_cr Asm.CN ] | Asm.TChi -> [ reg_of_cr Asm.CZ; reg_of_cr Asm.CC ] | Asm.TCls -> [ reg_of_cr Asm.CZ; reg_of_cr Asm.CC ] | Asm.TCge -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CV ] | Asm.TClt -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CV ] | Asm.TCgt -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CV ] | Asm.TCle -> [ reg_of_cr Asm.CN; reg_of_cr Asm.CZ; reg_of_cr Asm.CV ] let cset_rec i rd c = { inst = i; write_locs = [ rd ]; read_locs = get_testcond_rlocs c; is_control = false; } let csel_rec i rd r1 r2 c = { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ] @ get_testcond_rlocs c; is_control = false; } let fmovi_rec i fsz rd r1 = let rlocs = if is_XZR r1 then [] else [ r1 ] in { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } let fnmul_rec i fsz rd r1 r2 = { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false } let allocframe_rec i sz linkofs = { inst = i; write_locs = [ Mem; regXSP; reg_of_ireg Asm.X16; reg_of_ireg Asm.X29 ]; read_locs = [ regXSP; Mem ]; is_control = false; } let freeframe_rec i sz linkofs = { inst = i; write_locs = [ Mem; regXSP; reg_of_ireg Asm.X16 ]; read_locs = [ regXSP; Mem ]; is_control = false; } let nop_rec i = { inst = i; write_locs = []; read_locs = []; is_control = false } let arith_rec i = match i with | PArithP (i', rd) -> arith_p_rec (PBasic (PArith i)) i' (reg_of_dreg rd) | PArithPP (i', rd, rs) -> arith_pp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg rs) | PArithPPP (i', rd, r1, r2) -> arith_ppp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) (reg_of_dreg r2) | PArithRR0R (i', rd, r1, r2) -> arith_rr0r_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1) (reg_of_ireg r2) | PArithRR0 (i', rd, r1) -> arith_rr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1) | PArithARRRR0 (i', rd, r1, r2, r3) -> arith_arrrr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg r1) (reg_of_ireg r2) (reg_of_ireg0 r3) | PArithComparisonPP (i', r1, r2) -> arith_comparison_pp_rec (PBasic (PArith i)) (reg_of_dreg r1) (reg_of_dreg r2) | PArithComparisonR0R (i', r1, r2) -> arith_comparison_r0r_rec (PBasic (PArith i)) (reg_of_ireg0 r1) (reg_of_ireg r2) | PArithComparisonP (i', r1) -> arith_comparison_p_rec (PBasic (PArith i)) (reg_of_dreg r1) | Pcset (rd, c) -> cset_rec (PBasic (PArith i)) (reg_of_ireg rd) c | Pfmovi (fsz, rd, r1) -> fmovi_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_ireg0 r1) | Pcsel (rd, r1, r2, c) -> csel_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) (reg_of_dreg r2) c | Pfnmul (fsz, rd, r1, r2) -> fnmul_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_freg r1) (reg_of_freg r2) let basic_rec i = match i with | PArith i' -> arith_rec i' | PLoad ld -> load_rec ld | PStore st -> store_rec st | Pallocframe (sz, linkofs) -> allocframe_rec (PBasic i) sz linkofs | Pfreeframe (sz, linkofs) -> freeframe_rec (PBasic i) sz linkofs | Ploadsymbol (rd, id) -> loadsymbol_rec (PBasic i) (reg_of_ireg rd) id | Pcvtsw2x (rd, r1) -> cvtsw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) | Pcvtuw2x (rd, r1) -> cvtuw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) | Pcvtx2w rd -> cvtx2w_rec (PBasic i) (reg_of_ireg rd) | Pnop -> nop_rec (PBasic i) let builtin_rec i ef args res = { inst = i; write_locs = [ Mem ]; read_locs = [ Mem ]; is_control = true } let ctl_flow_rec i = match i with | Pb lbl -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_pc ]; is_control = true; } | Pbc (c, lbl) -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_pc ]; is_control = true; } | Pbl (id, sg) -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ]; read_locs = [ reg_of_pc ]; is_control = true; } | Pbs (id, sg) -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = []; is_control = true; } | Pblr (r, sg) -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ]; read_locs = [ reg_of_ireg r ]; is_control = true; } | Pbr (r, sg) -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r ]; is_control = true; } | Pret r -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r ]; is_control = true; } | Pcbnz (sz, r, lbl) -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r; reg_of_pc ]; is_control = true; } | Pcbz (sz, r, lbl) -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r; reg_of_pc ]; is_control = true; } | Ptbnz (sz, r, n, lbl) -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r; reg_of_pc ]; is_control = true; } | Ptbz (sz, r, n, lbl) -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r; reg_of_pc ]; is_control = true; } | Pbtbl (r1, tbl) -> { inst = PControl (PCtlFlow i); write_locs = [ reg_of_ireg Asm.X16; reg_of_ireg Asm.X17; reg_of_pc ]; read_locs = [ reg_of_ireg r1; reg_of_pc ]; is_control = true; } let control_rec i = match i with | Pbuiltin (ef, args, res) -> builtin_rec (PControl i) ef args res | PCtlFlow i' -> ctl_flow_rec i' let rec basic_recs body = match body with [] -> [] | bi :: body -> basic_rec bi :: basic_recs body let exit_rec exit = match exit with None -> [] | Some ex -> [ control_rec ex ] let instruction_recs bb = basic_recs bb.body @ exit_rec bb.exit (** * Providing informations relative to the real instructions *) type inst_info = { write_locs : location list; read_locs : location list; is_control : bool; (* resources consumed by the instruction *) usage : int array; latency : int; } (** Abstraction providing all the necessary informations for solving the scheduling problem *) let rec_to_info (r : ab_inst_rec) : inst_info = let opweights = OpWeightsAsm.get_opweights () in let usage = opweights.resources_of_op r.inst 0 and latency = opweights.latency_of_op r.inst 0 in { write_locs = r.write_locs; read_locs = r.read_locs; usage; latency; is_control = r.is_control; } let instruction_infos bb = List.map rec_to_info (instruction_recs bb) let instruction_usages bb = let usages = List.map (fun info -> info.usage) (instruction_infos bb) in Array.of_list usages (** * Latency constraints building *) module LocHash = Hashtbl (* Hash table : location => list of instruction ids *) let rec intlist n = if n < 0 then failwith "intlist: n < 0" else if n = 0 then [] else (n - 1) :: intlist (n - 1) let find_in_hash hashloc loc = match LocHash.find_opt hashloc loc with Some idl -> idl | None -> [] (* Returns a list of instruction ids *) let rec get_accesses hashloc (ll : location list) = match ll with | [] -> [] | loc :: llocs -> find_in_hash hashloc loc @ get_accesses hashloc llocs let compute_latency (ifrom : inst_info) = ifrom.latency let latency_constraints bb = let written = LocHash.create 70 and read = LocHash.create 70 and count = ref 0 and constraints = ref [] and instr_infos = instruction_infos bb in let step (i : inst_info) = let raw = get_accesses written i.read_locs and waw = get_accesses written i.write_locs and war = get_accesses read i.write_locs in List.iter (fun i -> constraints := { instr_from = i; instr_to = !count; latency = compute_latency (List.nth instr_infos i); } :: !constraints) raw; List.iter (fun i -> constraints := { instr_from = i; instr_to = !count; latency = compute_latency (List.nth instr_infos i); } :: !constraints) waw; List.iter (fun i -> constraints := { instr_from = i; instr_to = !count; latency = 0 } :: !constraints) war; if i.is_control then List.iter (fun n -> constraints := { instr_from = n; instr_to = !count; latency = 0 } :: !constraints) (intlist !count); (* Updating "read" and "written" hashmaps *) List.iter (fun loc -> LocHash.replace written loc [ !count ]; LocHash.replace read loc [] (* Clearing all the entries of "read" hashmap when a register is written *)) i.write_locs; List.iter (fun loc -> LocHash.replace read loc (!count :: find_in_hash read loc)) i.read_locs; count := !count + 1 in List.iter step instr_infos; !constraints (** * Using the InstructionScheduler *) let opweights = OpWeightsAsm.get_opweights () let build_problem bb = { max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; live_regs_entry = Registers.Regset.empty; (* unused here *) typing = (fun x -> AST.Tint); (* unused here *) reference_counting = None; instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb; } let get_from_indexes indexes l = List.map (List.nth l) indexes (*let is_basic = function PBasic _ -> true | _ -> false*) let is_control = function PControl _ -> true | _ -> false let to_control = function | PControl i -> i | _ -> failwith "to_control: basic instruction found" let rec body_to_instrs bdy = match bdy with [] -> [] | i :: l' -> PBasic i :: body_to_instrs l' let rec instrs_to_bdy instrs = match instrs with | [] -> [] | PBasic i :: l' -> i :: instrs_to_bdy l' | PControl _ :: l' -> failwith "instrs_to_bdy: control instruction found" let repack li hd = let last = List.nth li (List.length li - 1) in if is_control last then let cut_li = Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1) in { header = hd; body = instrs_to_bdy cut_li; exit = Some (to_control last) } else { header = hd; body = instrs_to_bdy li; exit = None } module TimeHash = Hashtbl (*Hash table : time => list of instruction ids *) (* Flattening the minpack result *) let hashtbl2flatarray h maxint = let rec f i = match TimeHash.find_opt h i with | None -> if i > maxint then [] else f (i + 1) | Some bund -> bund @ f (i + 1) in f 0 let find_max l = let rec f = function | [] -> None | e :: l -> ( match f l with | None -> Some e | Some m -> if e > m then Some e else Some m) in match f l with None -> raise Not_found | Some m -> m (* We still use the minpack algorithm even without bundles, but the result is then flattened *) (*(* [0, 2, 3, 1, 1, 2, 4, 5] -> [[0], [3, 4], [1, 5], [2], [6], [7]] *)*) let minpack_list (l : int list) = let timehash = TimeHash.create (List.length l) in let rec f i = function | [] -> () | t :: l -> (match TimeHash.find_opt timehash t with | None -> TimeHash.add timehash t [ i ] | Some bund -> TimeHash.replace timehash t (bund @ [ i ])); f (i + 1) l in f 0 l; hashtbl2flatarray timehash (find_max l) let bb_to_instrs bb = body_to_instrs bb.body @ match bb.exit with None -> [] | Some e -> [ PControl e ] let build_solution bb sol = (* Remove last element - the total *) let tmp = Array.to_list @@ Array.sub sol 0 (Array.length sol - 1) in let pack = minpack_list tmp and instrs = bb_to_instrs bb in repack (get_from_indexes pack instrs) bb.header let print_schedule sched = print_string "[ "; Array.iter (fun x -> Printf.printf "%d; " x) sched; print_endline "]" let do_schedule bb = let problem = build_problem bb in if debug then print_problem stdout problem; let solution = scheduler_by_name !Clflags.option_fpostpass_sched problem in match solution with | None -> failwith "Could not find a valid schedule" | Some sol -> if debug then print_schedule sol; build_solution bb sol (** * Dumb schedule if the above doesn't work *) (* Pack result *) let pack_result (bb : bblock) = (bb.body, bb.exit) let smart_schedule bb = let bb' = try do_schedule bb with | OpaqueInstruction -> if debug then Printf.eprintf "OpaqueInstruction raised, using identity scheduling\n"; bb (* Identity in case of failure *) | e -> let msg = Printexc.to_string e and stack = Printexc.get_backtrace () in Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg stack; failwith "Invalid schedule" in pack_result bb' let bblock_schedule bb = let identity_mode = not !Clflags.option_fpostpass in if debug && not identity_mode then ( Printf.eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"); if stats then ( let oc = open_out_gen [ Open_append; Open_creat ] 0o666 "oracle_stats.csv" in Printf.fprintf oc "%d\n" (Camlcoq.Z.to_int (size bb)); close_out oc); if identity_mode then pack_result bb else smart_schedule bb (** Called schedule function from Coq *) (*let schedule_notime bb = let toto = bblock_schedule in toto*) let schedule bb = Timing.time_coq [ 'P'; 'o'; 's'; 't'; 'p'; 'a'; 's'; 's'; 'S'; 'c'; 'h'; 'e'; 'd'; 'u'; 'l'; 'i'; 'n'; 'g'; ' '; 'o'; 'r'; 'a'; 'c'; 'l'; 'e'; ] bblock_schedule bb