diff options
author | Léo Gourdin <leo.gourdin@univ-grenoble-alpes.fr> | 2020-11-05 17:00:53 +0100 |
---|---|---|
committer | Léo Gourdin <leo.gourdin@univ-grenoble-alpes.fr> | 2020-11-05 17:00:53 +0100 |
commit | 2f25b395ef13d9c89260c932120a983218d7807c (patch) | |
tree | 6ddc3a2caf37ddf7d13a7a81c68da7c10c148ebc /aarch64/PostpassSchedulingOracle.ml | |
parent | 0bf4c8582574b9c7bea43547d75b87c85fdee1e1 (diff) | |
download | compcert-kvx-2f25b395ef13d9c89260c932120a983218d7807c.tar.gz compcert-kvx-2f25b395ef13d9c89260c932120a983218d7807c.zip |
A first (not well-tuned) working version of postpass scheduling for A64
Diffstat (limited to 'aarch64/PostpassSchedulingOracle.ml')
-rw-r--r-- | aarch64/PostpassSchedulingOracle.ml | 1427 |
1 files changed, 713 insertions, 714 deletions
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index eba10496..9669753d 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -13,390 +13,522 @@ (* *) (* *************************************************************) -(*open Asm*) open Asmblock open OpWeightsAsm -(*open Printf*) -(*open Camlcoq*) open InstructionScheduler -(*open TargetPrinter.Target*) -let debug = false +let debug = true (** * Extracting infos from Asm instructions *) -type real_instruction = - | Add | Adr | Adrp | And | Asr | B | Bic | Bl | Blr | Br | Cbnz | Cbz | Cls - | Clz | Cmn | Cmp | Csel | Cset | Eon | Eor | Fabs | Fadd | Fcmp | Fcsel - | Fcvt | Fcvtzs | Fcvtzu | Fdiv | Fmadd | Fmov | Fmsub | Fmul | Fnmadd - | Fnmul | Fnmsub | Fneg | Fsqrt | Fsub | Ldaxr | Ldp | Ldr | Ldrb | Ldrh - | Ldrsb | Ldrsh | Ldrsw | Lr | Lsl | Lsr | Madd | Mov | Movk | Movn | Movz - | Msub | Nop | Orn | Orr | Ret | Rev | Rev16 | Ror | Sbfiz | Sbfx | Scvtf - | Sdiv | Smulh | Stlxr | Stp | Str | Strb | Strh | Sub | Sxtb | Sxth | Sxtw - | Tbnz | Tbz | Tst | Ubfiz | Ubfx | Ucvtf | Udiv | Umulh | Uxtb | Uxth - | Uxtw | Uxtx +type location = Reg of Asm.preg | Mem | IREG0_XZR type ab_inst_rec = { - inst: instruction; + inst : real_instruction; + write_locs : location list; + read_locs : location list; is_control : bool; - usage: int array; (* resources consumed by the instruction *) - latency: int; } (** Asm constructor to real instructions *) exception OpaqueInstruction -exception NYI (* XXX *) let arith_p_real = function - | Padrp(_,_) -> Adrp - | Pmovz(_,_,_) -> Movz - | Pmovn(_,_,_) -> Movn - | Pfmovimms(_) -> Fmov (* TODO not sure about this and below too *) - | Pfmovimmd(_) -> Fmov + | Padrp (_, _) -> Adrp + | Pmovz (_, _, _) -> Movz + | Pmovn (_, _, _) -> Movn + | Pfmovimms _ -> Fmov (* TODO not sure about this and below too *) + | Pfmovimmd _ -> Fmov let arith_pp_real = function - | Pmov -> Mov - | Pmovk(_,_,_) -> Movk - | Paddadr(_,_) -> Add - | Psbfiz(_,_,_) -> Sbfiz - | Psbfx(_,_,_) -> Sbfx - | Pubfiz(_,_,_) -> Ubfiz - | Pubfx(_,_,_) -> Ubfx - | Pfmov -> Fmov - | Pfcvtds -> Fcvt - | Pfcvtsd -> Fcvt - | Pfabs(_) -> Fabs - | Pfneg(_) -> Fneg - | Pscvtf(_,_) -> Scvtf - | Pucvtf(_,_) -> Ucvtf - | Pfcvtzs(_,_) -> Fcvtzs - | Pfcvtzu(_,_) -> Fcvtzu - | Paddimm(_,_) -> Add - | Psubimm(_,_) -> Sub + | Pmov -> Mov + | Pmovk (_, _, _) -> Movk + | Paddadr (_, _) -> Add + | Psbfiz (_, _, _) -> Sbfiz + | Psbfx (_, _, _) -> Sbfx + | Pubfiz (_, _, _) -> Ubfiz + | Pubfx (_, _, _) -> Ubfx + | Pfmov -> Fmov + | Pfcvtds -> Fcvt + | Pfcvtsd -> Fcvt + | Pfabs _ -> Fabs + | Pfneg _ -> Fneg + | Pscvtf (_, _) -> Scvtf + | Pucvtf (_, _) -> Ucvtf + | Pfcvtzs (_, _) -> Fcvtzs + | Pfcvtzu (_, _) -> Fcvtzu + | Paddimm (_, _) -> Add + | Psubimm (_, _) -> Sub let arith_ppp_real = function - | Pasrv(_) -> Asr - | Plslv(_) -> Lsl - | Plsrv(_) -> Lsr - | Prorv(_) -> Ror - | Psmulh -> Smulh - | Pumulh -> Umulh - | Psdiv(_) -> Sdiv - | Pudiv(_) -> Udiv - | Paddext(_) -> Add - | Psubext(_) -> Sub - | Pfadd(_) -> Fadd - | Pfdiv(_) -> Fdiv - | Pfmul(_) -> Fmul - | Pfsub(_) -> Fsub + | Pasrv _ -> Asr + | Plslv _ -> Lsl + | Plsrv _ -> Lsr + | Prorv _ -> Ror + | Psmulh -> Smulh + | Pumulh -> Umulh + | Psdiv _ -> Sdiv + | Pudiv _ -> Udiv + | Paddext _ -> Add + | Psubext _ -> Sub + | Pfadd _ -> Fadd + | Pfdiv _ -> Fdiv + | Pfmul _ -> Fmul + | Pfsub _ -> Fsub let arith_rr0r_real = function - | Padd(_,_) -> Add - | Psub(_,_) -> Sub - | Pand(_,_) -> And - | Pbic(_,_) -> Bic - | Peon(_,_) -> Eon - | Peor(_,_) -> Eor - | Porr(_,_) -> Orr - | Porn(_,_) -> Orn + | Padd (_, _) -> Add + | Psub (_, _) -> Sub + | Pand (_, _) -> And + | Pbic (_, _) -> Bic + | Peon (_, _) -> Eon + | Peor (_, _) -> Eor + | Porr (_, _) -> Orr + | Porn (_, _) -> Orn let arith_rr0_real = function - | Pandimm(_,_) -> And - | Peorimm(_,_) -> Eor - | Porrimm(_,_) -> Orr + | Pandimm (_, _) -> And + | Peorimm (_, _) -> Eor + | Porrimm (_, _) -> Orr -let arith_arrrr0_real = function - | Pmadd(_) -> Madd - | Pmsub(_) -> Msub +let arith_arrrr0_real = function Pmadd _ -> Madd | Pmsub _ -> Msub let arith_comparison_p_real = function - | Pfcmp0(_) -> Fcmp - | Pcmpimm(_,_) -> Cmp - | Pcmnimm(_,_) -> Cmn - | Ptstimm(_,_) -> Tst + | Pfcmp0 _ -> Fcmp + | Pcmpimm (_, _) -> Cmp + | Pcmnimm (_, _) -> Cmn + | Ptstimm (_, _) -> Tst let arith_comparison_pp_real = function - | Pcmpext(_) -> Cmp - | Pcmnext(_) -> Cmn - | Pfcmp(_) -> Fcmp + | Pcmpext _ -> Cmp + | Pcmnext _ -> Cmn + | Pfcmp _ -> Fcmp let arith_comparison_r0r_real = function - | Pcmp(_,_) -> Cmp - | Pcmn(_,_) -> Cmn - | Ptst(_,_) -> Tst + | Pcmp (_, _) -> Cmp + | Pcmn (_, _) -> Cmn + | Ptst (_, _) -> Tst let arith_comparison_rr0r_real = function - | Padd(_,_) -> Add - | Psub(_,_) -> Sub - | Pand(_,_) -> And - | Pbic(_,_) -> Bic - | Peon(_,_) -> Eon - | Peor(_,_) -> Eor - | Porr(_,_) -> Orr - | Porn(_,_) -> Orn + | Padd (_, _) -> Add + | Psub (_, _) -> Sub + | Pand (_, _) -> And + | Pbic (_, _) -> Bic + | Peon (_, _) -> Eon + | Peor (_, _) -> Eor + | Porr (_, _) -> Orr + | Porn (_, _) -> Orn let arith_comparison_rr0_real = function - | Pandimm(_,_) -> And - | Peorimm(_,_) -> Eor - | Porrimm(_,_) -> Orr + | Pandimm (_, _) -> And + | Peorimm (_, _) -> Eor + | Porrimm (_, _) -> Orr -let arith_comparison_arrrr0_real = function - | Pmadd(_) -> Madd - | Pmsub(_) -> Msub +let arith_comparison_arrrr0_real = function Pmadd _ -> Madd | Pmsub _ -> Msub let store_rs_a_real = function - | Pstrw -> Str - | Pstrw_a -> Str - | Pstrx -> Str - | Pstrx_a -> Str - | Pstrb -> Strb - | Pstrh -> Strh - | Pstrs -> Str - | Pstrd -> Str - | Pstrd_a -> Str + | Pstrw -> Str + | Pstrw_a -> Str + | Pstrx -> Str + | Pstrx_a -> Str + | Pstrb -> Strb + | Pstrh -> Strh + | Pstrs -> Str + | Pstrd -> Str + | Pstrd_a -> Str let load_rd_a_real = function - | Pldrw -> Ldr - | Pldrw_a -> Ldr - | Pldrx -> Ldr - | Pldrx_a -> Ldr - | Pldrb(_) -> Ldrb - | Pldrsb(_) -> Ldrsb - | Pldrh(_) -> Ldrh - | Pldrsh(_) -> Ldrsh - | Pldrzw -> Ldr - | Pldrsw -> Ldrsw - | Pldrs -> Ldr - | Pldrd -> Ldr - | Pldrd_a -> Ldr - - - -(*let set_real = Set*) -(*let get_real = Get*) -(*let nop_real = Nop*) -(*let loadsymbol_real = Make*) -(*let loadqrro_real = Lq*) -(*let loadorro_real = Lo*) -(*let storeqrro_real = Sq*) -(*let storeorro_real = So*) - -(*let ret_real = Ret*) -(*let call_real = Call*) -(*let icall_real = Icall*) -(*let goto_real = Goto*) -(*let igoto_real = Igoto*) -(*let jl_real = Goto*) -(*let cb_real = Cb*) -(*let cbu_real = Cb*) - -(*let arith_rri32_rec i rd rs imm32 = { inst = arith_rri32_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm32; is_control = false;*) - (*read_at_id = []; read_at_e1 = [] }*) - -(*let arith_rri64_rec i rd rs imm64 = { inst = arith_rri64_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = imm64; is_control = false;*) - (*read_at_id = []; read_at_e1 = [] }*) - -(*let arith_rrr_rec i rd rs1 rs2 = { inst = arith_rrr_real i; write_locs = [Reg rd]; read_locs = [Reg rs1; Reg rs2]; imm = None; is_control = false;*) - (*read_at_id = []; read_at_e1 = [] }*) - -(*let arith_arri32_rec i rd rs imm32 = *) - (*let rae1 = match i with Pmaddiw -> [Reg rd] | _ -> []*) - (*in { inst = arith_arri32_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm32; is_control = false;*) - (*read_at_id = [] ; read_at_e1 = rae1 }*) - -(*let arith_arri64_rec i rd rs imm64 = *) - (*let rae1 = match i with Pmaddil -> [Reg rd] | _ -> []*) - (*in { inst = arith_arri64_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = imm64; is_control = false;*) - (*read_at_id = []; read_at_e1 = rae1 }*) - -(*let arith_arr_rec i rd rs = { inst = arith_arr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs]; imm = None; is_control = false;*) - (*read_at_id = []; read_at_e1 = [] }*) - -(*let arith_arrr_rec i rd rs1 rs2 = *) - (*let rae1 = match i with Pmaddl | Pmaddw | Pmsubl | Pmsubw -> [Reg rd] | _ -> []*) - (*in { inst = arith_arrr_real i; write_locs = [Reg rd]; read_locs = [Reg rd; Reg rs1; Reg rs2]; imm = None; is_control = false;*) - (*read_at_id = []; read_at_e1 = rae1 }*) - -(*let arith_rr_rec i rd rs = { inst = arith_rr_real i; write_locs = [Reg rd]; read_locs = [Reg rs]; imm = None; is_control = false;*) - (*read_at_id = []; read_at_e1 = [] }*) - -(*let arith_p_rec i rd = { inst = arith_p_real i; is_control = false }*) -(*let arith_pp_rec i rd rs = { inst = arith_pp_real i; is_control = false }*) -(*let arith_ppp_rec i rd r1 r2 = { inst = arith_ppp_real i; is_control = false }*) -(*let arith_rr0r_rec i rd r1 r2 = { inst = arith_rr0r_real i; is_control = false }*) -(*let arith_rr0_rec i rd r1 = { inst = arith_rr0_real i; is_control = false }*) -(*let arith_arrrr0_rec i rd r1 r2 r3 = { inst = arith_arrrr0_real i; is_control = false }*) -(*let arith_comparison_pp_rec i r1 r2 = { inst = arith_comparison_pp_real i; is_control = false }*) -(*let arith_comparison_r0r_rec i r1 r2 = { inst = arith_comparison_r0r_real i; is_control = false }*) -(*let arith_comparison_p_rec i r1 = { inst = arith_comparison_p_real i; is_control = false }*) -(*let load_rec ld rd a = { inst = load_rd_a_real ld; is_control = false }*) -(*let store_rec st r a = { inst = store_rs_a_real st; is_control = false }*) - -(*let arith_rec i =*) - (*match i with*) - (*| PArithP (i', rd) -> arith_p_rec i' rd*) - (*| PArithPP (i', rd, rs) -> arith_pp_rec i' rd rs*) - (*| PArithPPP (i', rd, r1, r2) -> arith_ppp_rec i' rd r1 r2*) - (*| PArithRR0R (i', rd, r1, r2) -> arith_rr0r_rec i' rd r1 r2*) - (*| PArithRR0 (i', rd, r1) -> arith_rr0_rec i' rd r1*) - (*| PArithARRRR0 (i', rd, r1, r2, r3) -> arith_arrrr0_rec i' rd r1 r2 r3*) - (*| PArithComparisonPP (i', r1, r2) -> arith_comparison_pp_rec i' r1 r2*) - (*| PArithComparisonR0R (i', r1, r2) -> arith_comparison_r0r_rec i' r1 r2*) - (*| PArithComparisonP (i', r1) -> arith_comparison_p_rec i' r1*) - (*| _ -> raise NYI*) - - - -(*let load_rec i = match i with*) - (*| PLoadRRO (trap, i, rs1, rs2, imm) ->*) - (*{ inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2)]; imm = (Some (Off imm)) ; is_control = false;*) - (*read_at_id = []; read_at_e1 = [] }*) - (*| PLoadQRRO(rs, ra, imm) ->*) - (*let (rs0, rs1) = gpreg_q_expand rs in*) - (*{ inst = loadqrro_real; write_locs = [Reg (IR rs0); Reg (IR rs1)]; read_locs = [Mem; Reg (IR ra)]; imm = (Some (Off imm)) ; is_control = false;*) - (*read_at_id = []; read_at_e1 = [] }*) - (*| PLoadORRO(rs, ra, imm) ->*) - (*let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in*) - (*{ inst = loadorro_real; write_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; read_locs = [Mem; Reg (IR ra)];*) - (*imm = (Some (Off imm)) ; is_control = false; read_at_id = []; read_at_e1 = []}*) - (*| PLoadRRR (trap, i, rs1, rs2, rs3) | PLoadRRRXS (trap, i, rs1, rs2, rs3) ->*) - (*{ inst = load_real i; write_locs = [Reg (IR rs1)]; read_locs = [Mem; Reg (IR rs2); Reg (IR rs3)]; imm = None ; is_control = false;*) - (*read_at_id = []; read_at_e1 = [] }*) - -(*let store_rec i = match i with*) - (*| PStoreRRO (i, rs, ra, imm) ->*) - (*{ inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra)]; imm = (Some (Off imm));*) - (*read_at_id = []; read_at_e1 = [Reg (IR rs)] ; is_control = false}*) - (*| PStoreQRRO (rs, ra, imm) ->*) - (*let (rs0, rs1) = gpreg_q_expand rs in*) - (*{ inst = storeqrro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR ra)]; imm = (Some (Off imm));*) - (*read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1)] ; is_control = false}*) - (*| PStoreORRO (rs, ra, imm) ->*) - (*let (((rs0, rs1), rs2), rs3) = gpreg_o_expand rs in*) - (*{ inst = storeorro_real; write_locs = [Mem]; read_locs = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3); Reg (IR ra)];*) - (*imm = (Some (Off imm)); read_at_id = []; read_at_e1 = [Reg (IR rs0); Reg (IR rs1); Reg (IR rs2); Reg (IR rs3)]; is_control = false}*) - (*| PStoreRRR (i, rs, ra1, ra2) | PStoreRRRXS (i, rs, ra1, ra2) ->*) - (*{ inst = store_real i; write_locs = [Mem]; read_locs = [Reg (IR rs); Reg (IR ra1); Reg (IR ra2)]; imm = None;*) - (*read_at_id = []; read_at_e1 = [Reg (IR rs)]; is_control = false}*) - -(*let get_rec (rd:gpreg) rs = { inst = get_real; write_locs = [Reg (IR rd)]; read_locs = [Reg rs]; imm = None; is_control = false;*) - (*read_at_id = []; read_at_e1 = [] }*) - -(*let set_rec rd (rs:gpreg) = { inst = set_real; write_locs = [Reg rd]; read_locs = [Reg (IR rs)]; imm = None; is_control = false;*) - (*read_at_id = [Reg (IR rs)]; read_at_e1 = [] }*) + | Pldrw -> Ldr + | Pldrw_a -> Ldr + | Pldrx -> Ldr + | Pldrx_a -> Ldr + | Pldrb _ -> Ldrb + | Pldrsb _ -> Ldrsb + | Pldrh _ -> Ldrh + | Pldrsh _ -> Ldrsh + | Pldrzw -> Ldr + | Pldrsw -> Ldrsw + | Pldrs -> Ldr + | Pldrd -> Ldr + | Pldrd_a -> Ldr + +let loadsymbol_real = Ldr + +let cvtsw2x_real = Sxtw + +let cvtuw2x_real = Uxtw + +let cset_real = Cset + +let csel_real = Csel + +let fmovi_real = Fmov + +let fnmul_real = Fnmul + +let b_real = B + +let bc_real = B + +let bl_real = Bl + +let bs_real = B + +let blr_real = Blr + +let br_real = Br + +let ret_real = Ret + +let cbnz_real = Cbnz + +let cbz_real = Cbz + +let tbnz_real = Tbnz + +let tbz_real = Tbz + +let btbl_real = Btbl + +let allocframe_real = Allocframe + +let freeframe_real = Freeframe + +let builtin_real = Builtin + +let cvtx2w_real = Cvtx2w + +let is_XZR = function IREG0_XZR -> true | _ -> false + +let reg_of_dreg r = Reg (Asm.DR r) + +let reg_of_ireg r = Reg (Asm.DR (Asm.IR (Asm.RR1 r))) + +let reg_of_ireg0 r = + match r with Asm.RR0 ir -> reg_of_ireg ir | Asm.XZR -> IREG0_XZR + +let reg_of_freg r = Reg (Asm.DR (Asm.FR r)) + +let arith_p_rec i rd = + { + inst = arith_p_real i; + write_locs = [ rd ]; + read_locs = []; + is_control = false; + } + +let arith_pp_rec i rd rs = + { + inst = arith_pp_real i; + write_locs = [ rd ]; + read_locs = [ rs ]; + is_control = false; + } + +let arith_ppp_rec i rd r1 r2 = + { + inst = arith_ppp_real i; + write_locs = [ rd ]; + read_locs = [ r1; r2 ]; + is_control = false; + } + +let arith_rr0r_rec i rd r1 r2 = + let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in + { + inst = arith_rr0r_real i; + write_locs = [ rd ]; + read_locs = rlocs; + is_control = false; + } + +let arith_rr0_rec i rd r1 = + let rlocs = if is_XZR r1 then [] else [ r1 ] in + { + inst = arith_rr0_real i; + write_locs = [ rd ]; + read_locs = rlocs; + is_control = false; + } + +let arith_arrrr0_rec i rd r1 r2 r3 = + let rlocs = if is_XZR r3 then [ r1; r2 ] else [ r1; r2; r3 ] in + { + inst = arith_arrrr0_real i; + write_locs = [ rd ]; + read_locs = rlocs; + is_control = false; + } + +let arith_comparison_pp_rec i r1 r2 = + { + inst = arith_comparison_pp_real i; + write_locs = []; + read_locs = [ r1; r2 ]; + is_control = false; + } + +let arith_comparison_r0r_rec i r1 r2 = + let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in + { + inst = arith_comparison_r0r_real i; + write_locs = []; + read_locs = rlocs; + is_control = false; + } + +let arith_comparison_p_rec i r1 = + { + inst = arith_comparison_p_real i; + write_locs = []; + read_locs = [ r1 ]; + is_control = false; + } + +let load_rec ld rd a = + { + inst = load_rd_a_real ld; + write_locs = [ rd ]; + read_locs = [ Mem ]; + is_control = false; + } + +let store_rec st r a = + { + inst = store_rs_a_real st; + write_locs = [ Mem ]; + read_locs = [ r ]; + is_control = false; + } + +let loadsymbol_rec rd id = + { + inst = loadsymbol_real; + write_locs = [ rd ]; + read_locs = [ Mem ]; + is_control = false; + } + +let cvtsw2x_rec rd r1 = + { + inst = cvtsw2x_real; + write_locs = [ rd ]; + read_locs = [ r1 ]; + is_control = false; + } + +let cvtuw2x_rec rd r1 = + { + inst = cvtuw2x_real; + write_locs = [ rd ]; + read_locs = [ r1 ]; + is_control = false; + } + +let cvtx2w_rec rd = + { inst = cvtx2w_real; write_locs = []; read_locs = []; is_control = false } + +let cset_rec rd c = + { inst = cset_real; write_locs = [ rd ]; read_locs = []; is_control = false } + +let csel_rec rd r1 r2 c = + { + inst = csel_real; + write_locs = [ rd ]; + read_locs = [ r1; r2 ]; + is_control = false; + } + +let fmovi_rec fsz rd r1 = + let rlocs = if is_XZR r1 then [] else [ r1 ] in + { + inst = fmovi_real; + write_locs = [ rd ]; + read_locs = rlocs; + is_control = false; + } + +let fnmul_rec fsz rd r1 r2 = + { + inst = fnmul_real; + write_locs = [ rd ]; + read_locs = [ r1; r2 ]; + is_control = false; + } + +let allocframe_rec sz linkofs = + { + inst = allocframe_real; + write_locs = + [ + Mem; + Reg (Asm.DR (Asm.IR Asm.XSP)); + reg_of_ireg Asm.X16; + reg_of_ireg Asm.X29; + ]; + read_locs = [ Reg (Asm.DR (Asm.IR Asm.XSP)) ]; + is_control = false; + } + +let freeframe_rec sz linkofs = + { + inst = freeframe_real; + write_locs = [ Mem; Reg (Asm.DR (Asm.IR Asm.XSP)); reg_of_ireg Asm.X16 ]; + read_locs = [ Mem; Reg (Asm.DR (Asm.IR Asm.XSP)) ]; + is_control = false; + } + +let arith_rec i = + match i with + | PArithP (i', rd) -> arith_p_rec i' (reg_of_dreg rd) + | PArithPP (i', rd, rs) -> arith_pp_rec i' (reg_of_dreg rd) (reg_of_dreg rs) + | PArithPPP (i', rd, r1, r2) -> + arith_ppp_rec i' (reg_of_dreg rd) (reg_of_dreg r1) (reg_of_dreg r2) + | PArithRR0R (i', rd, r1, r2) -> + arith_rr0r_rec i' (reg_of_ireg rd) (reg_of_ireg0 r1) (reg_of_ireg r2) + | PArithRR0 (i', rd, r1) -> + arith_rr0_rec i' (reg_of_ireg rd) (reg_of_ireg0 r1) + | PArithARRRR0 (i', rd, r1, r2, r3) -> + arith_arrrr0_rec i' (reg_of_ireg rd) (reg_of_ireg r1) (reg_of_ireg r2) + (reg_of_ireg0 r3) + | PArithComparisonPP (i', r1, r2) -> + arith_comparison_pp_rec i' (reg_of_dreg r1) (reg_of_dreg r2) + | PArithComparisonR0R (i', r1, r2) -> + arith_comparison_r0r_rec i' (reg_of_ireg0 r1) (reg_of_ireg r2) + | PArithComparisonP (i', r1) -> arith_comparison_p_rec i' (reg_of_dreg r1) + | Pcset (rd, c) -> cset_rec (reg_of_ireg rd) c + | Pfmovi (fsz, rd, r1) -> fmovi_rec fsz (reg_of_freg rd) (reg_of_ireg0 r1) + | Pcsel (rd, r1, r2, c) -> + csel_rec (reg_of_dreg rd) (reg_of_dreg r1) (reg_of_dreg r2) c + | Pfnmul (fsz, rd, r1, r2) -> + fnmul_rec fsz (reg_of_freg rd) (reg_of_freg r1) (reg_of_freg r2) let basic_rec i = - let opweights = get_opweights () - and i' = PBasic i in - { inst = i'; usage = opweights.resources_of_op i' 0; latency = opweights.latency_of_op i' 0; is_control = false } - (*match i with*) - (*| PArith (i') -> arith_rec i'*) - (*| PLoad (ld, rd, a) -> load_rec ld rd a*) - (*| PStore (st, r, a) -> store_rec st r a*) - (*| Pallocframe (_,_) -> raise OpaqueInstruction*) - (*| Pfreeframe (_,_) -> raise OpaqueInstruction*) - (*| Ploadsymbol (rd, id) -> raise OpaqueInstruction (* XXX how to manage this one? *)*) - (*| Pcvtsw2x (rd, r1) -> { inst = Sxtw; is_control = false }*) - (*| Pcvtuw2x (rd, r1) -> { inst = Uxtw; is_control = false }*) - (*| Pcvtx2w (rd) -> raise OpaqueInstruction (* XXX NYI in TargetPrinter? *)*) - (*(*| Pget (rd, rs) -> get_rec rd rs*)*) - (*(*| Pset (rd, rs) -> set_rec rd rs*)*) - (*(*| Pnop -> { inst = nop_real; write_locs = []; read_locs = []; imm = None ; is_control = false; read_at_id = []; read_at_e1 = []}*)*) - -let builtin_rec ef args res = raise OpaqueInstruction (* XXX not sure *) - -(*let ctl_flow_rec i =*) - (*let opweights = OpWeightsAsm.get_opweights () in*) - (*{ inst = i; usage = opweights.resources_of_op i 0; latency = opweights.latency_of_op i 0; is_control = true }*) - (*match i with*) - (*| Pb(_) -> { inst = B; is_control = true }*) - (*| Pbc(_,_) -> { inst = B; is_control = true }*) - (*| Pbl(_,_) -> { inst = Bl; is_control = true }*) - (*| Pbs(_,_) -> { inst = B; is_control = true }*) - (*| Pblr(_,_) -> { inst = Blr; is_control = true }*) - (*| Pbr(_,_) -> { inst = Br; is_control = true }*) - (*| Pret(_) -> { inst = Ret; is_control = true }*) - (*| Pcbnz(_,_,_) -> { inst = Cbnz; is_control = true }*) - (*| Pcbz(_,_,_) -> { inst = Cbz; is_control = true }*) - (*| Ptbnz(_,_,_,_) -> { inst = Tbnz; is_control = true }*) - (*| Ptbz(_,_,_,_) -> { inst = Tbz; is_control = true }*) - (*| Pbtbl(_,_) -> raise OpaqueInstruction (* XXX how to manage this one? Maybe Br *)*) + match i with + | PArith i' -> arith_rec i' + | PLoad (ld, rd, a) -> load_rec ld (reg_of_dreg rd) a + | PStore (st, r, a) -> store_rec st (reg_of_dreg r) a + | Pallocframe (sz, linkofs) -> allocframe_rec sz linkofs + | Pfreeframe (sz, linkofs) -> freeframe_rec sz linkofs + | Ploadsymbol (rd, id) -> loadsymbol_rec (reg_of_ireg rd) id + | Pcvtsw2x (rd, r1) -> cvtsw2x_rec (reg_of_ireg rd) (reg_of_ireg r1) + | Pcvtuw2x (rd, r1) -> cvtuw2x_rec (reg_of_ireg rd) (reg_of_ireg r1) + (* XXX NYI in TargetPrinter? *) + | Pcvtx2w rd -> cvtx2w_rec rd + +let builtin_rec ef args res = + (* XXX verify this *) + { + inst = builtin_real; + write_locs = []; + read_locs = [ Mem ]; + is_control = true; + } + +let ctl_flow_rec i = + match i with + | Pb lbl -> + { inst = b_real; write_locs = []; read_locs = []; is_control = true } + | Pbc (c, lbl) -> + { inst = bc_real; write_locs = []; read_locs = []; is_control = true } + | Pbl (id, sg) -> + { + inst = bl_real; + write_locs = [ reg_of_ireg Asm.X30 ]; + read_locs = []; + is_control = true; + } + (* XXX not sure about X30 *) + | Pbs (id, sg) -> + { inst = bs_real; write_locs = []; read_locs = []; is_control = true } + | Pblr (r, sg) -> + { + inst = blr_real; + write_locs = [ reg_of_ireg Asm.X30 ]; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + (* XXX not sure about X30 *) + | Pbr (r, sg) -> + { + inst = br_real; + write_locs = []; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Pret r -> + { + inst = ret_real; + write_locs = []; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Pcbnz (sz, r, lbl) -> + { + inst = cbnz_real; + write_locs = []; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Pcbz (sz, r, lbl) -> + { + inst = cbz_real; + write_locs = []; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Ptbnz (sz, r, n, lbl) -> + { + inst = tbnz_real; + write_locs = []; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Ptbz (sz, r, n, lbl) -> + { + inst = tbz_real; + write_locs = []; + read_locs = [ reg_of_ireg r ]; + is_control = true; + } + | Pbtbl (r1, tbl) -> + { + inst = btbl_real; + write_locs = [ reg_of_ireg Asm.X16; reg_of_ireg Asm.X17 ]; + read_locs = [ reg_of_ireg r1 ]; + is_control = true; + } + +(* XXX Verify this (Pbtbl) *) let control_rec i = - let opweights = get_opweights () - and i' = PControl i in - { inst = i'; usage = opweights.resources_of_op i' 0; latency = opweights.latency_of_op i' 0; is_control = true } - (*match i with*) - (*| Pbuiltin (ef, args, res) -> builtin_rec ef args res*) - (*| PCtlFlow (i') -> ctl_flow_rec i'*) - -(* TODO Continue here by calling constructors *) -let rec basic_recs body = match body with - | [] -> [] - | bi :: body -> (basic_rec bi) :: (basic_recs body) + match i with + | Pbuiltin (ef, args, res) -> builtin_rec ef args res + | PCtlFlow i' -> ctl_flow_rec i' -let exit_rec exit = match exit with - | None -> [] - | Some ex -> [control_rec ex] +let rec basic_recs body = + match body with [] -> [] | bi :: body -> basic_rec bi :: basic_recs body -let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit) +let exit_rec exit = match exit with None -> [] | Some ex -> [ control_rec ex ] + +let instruction_recs bb = basic_recs bb.body @ exit_rec bb.exit (** * Providing informations relative to the real instructions *) +type inst_info = { + write_locs : location list; + read_locs : location list; + is_control : bool; + usage : int array; + (* resources consumed by the instruction *) + latency : int; +} (** Abstraction providing all the necessary informations for solving the scheduling problem *) -(*type inst_info = {*) - (*is_control : bool;*) - (*usage: int array; (* resources consumed by the instruction *)*) - (*latency: int;*) -(*}*) - -(*(** Figuring out whether an immediate is s10, u27l10 or e27u27l10 *)*) -(*type imm_encoding = U6 | S10 | U27L5 | U27L10 | E27U27L10*) - -(*let rec pow a = function*) - (*| 0 -> Int64.one*) - (*| 1 -> Int64.of_int a*) - (*| n -> let b = pow a (n/2) in*) - (*Int64.mul b (Int64.mul b (if n mod 2 = 0 then Int64.one else Int64.of_int a))*) - -(*let signed_interval n : (int64 * int64) = begin *) - (*assert (n > 0);*) - (*let min = Int64.neg @@ pow 2 (n-1)*) - (*and max = Int64.sub (pow 2 (n-1)) Int64.one*) - (*in (min, max)*) -(*end*) - -(*let within i interv = match interv with (min, max) -> (i >= min && i <= max)*) - -(*let signed_length (i:int64) =*) - (*let rec f (i:int64) n = *) - (*let interv = signed_interval n*) - (*in if (within i interv) then n else f i (n+1)*) - (*in f i 1*) - -(*let unsigned_length (i:int64) = (signed_length i) - 1*) - -(*let encode_imm (imm:int64) =*) - (*if (Int64.compare imm Int64.zero < 0) then*) - (*let length = signed_length imm*) - (*in if length <= 10 then S10*) - (*else if length <= 32 then U27L5*) - (*else if length <= 37 then U27L10*) - (*else if length <= 64 then E27U27L10*) - (*else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm*) - (*else*) - (*let length = unsigned_length imm*) - (*in if length <= 6 then U6*) - (*else if length <= 9 then S10 (* Special case for S10 - stay signed no matter what *)*) - (*else if length <= 32 then U27L5*) - (*else if length <= 37 then U27L10*) - (*else if length <= 64 then E27U27L10*) - (*else failwith @@ sprintf "encode_imm: integer too big! (%Ld)" imm*) (*(** Resources *)*) (*type rname = Rissue | Rtiny | Rlite | Rfull | Rlsu | Rmau | Rbcu | Rtca | Rauxr | Rauxw | Rcrrp | Rcrwl | Rcrwh | Rnop*) @@ -404,251 +536,38 @@ let instruction_recs bb = (basic_recs bb.body) @ (exit_rec bb.exit) (*let resource_names = [Rissue; Rtiny; Rlite; Rfull; Rlsu; Rmau; Rbcu; Rtca; Rauxr; Rauxw; Rcrrp; Rcrwl; Rcrwh; Rnop]*) (*let rec find_index elt l =*) - (*match l with*) - (*| [] -> raise Not_found*) - (*| e::l -> if (e == elt) then 0*) - (*else 1 + find_index elt l*) +(*match l with*) +(*| [] -> raise Not_found*) +(*| e::l -> if (e == elt) then 0*) +(*else 1 + find_index elt l*) (*let resource_id resource : int = find_index resource resource_names*) -(*let resource_bound resource : int =*) - (*match resource with*) - (*| Rissue -> 8*) - (*| Rtiny -> 4*) - (*| Rlite -> 2*) - (*| Rfull -> 1*) - (*| Rlsu -> 1*) - (*| Rmau -> 1*) - (*| Rbcu -> 1*) - (*| Rtca -> 1*) - (*| Rauxr -> 1*) - (*| Rauxw -> 1*) - (*| Rcrrp -> 1*) - (*| Rcrwl -> 1*) - (*| Rcrwh -> 1*) - (*| Rnop -> 4*) - (*let resource_bounds : int array = Array.of_list (List.map resource_bound resource_names)*) -(*(** Reservation tables *)*) -(*let alu_full : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | Rfull -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let alu_lite : int array = let resmap = fun r -> match r with *) - (*| Rissue -> 1 | Rtiny -> 1 | Rlite -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let alu_lite_x : int array = let resmap = fun r -> match r with *) - (*| Rissue -> 2 | Rtiny -> 1 | Rlite -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let alu_lite_y : int array = let resmap = fun r -> match r with *) - (*| Rissue -> 3 | Rtiny -> 1 | Rlite -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let alu_nop : int array = let resmap = fun r -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let alu_tiny : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 1 | Rtiny -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let alu_tiny_x : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 2 | Rtiny -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let alu_tiny_y : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 3 | Rtiny -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let bcu : int array = let resmap = fun r -> match r with *) - (*| Rissue -> 1 | Rbcu -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let bcu_tiny_tiny_mau_xnop : int array = let resmap = fun r -> match r with *) - (*| Rissue -> 1 | Rtiny -> 2 | Rmau -> 1 | Rbcu -> 1 | Rnop -> 4 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let lsu_auxr : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let lsu_auxr_x : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let lsu_auxr_y : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxr -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let lsu_auxw : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 1 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let lsu_auxw_x : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 2 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let lsu_auxw_y : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 3 | Rtiny -> 1 | Rlsu -> 1 | Rauxw -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let mau : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let mau_x : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let mau_y : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let mau_auxr : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 1 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let mau_auxr_x : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 2 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*let mau_auxr_y : int array = let resmap = fun r -> match r with*) - (*| Rissue -> 3 | Rtiny -> 1 | Rmau -> 1 | Rauxr -> 1 | _ -> 0*) - (*in Array.of_list (List.map resmap resource_names)*) - -(*(** Real instructions *)*) - -(*exception InvalidEncoding*) - -(*let rec_to_usage r =*) - (*let encoding = match r.imm with None -> None | Some (I32 i) | Some (I64 i) -> Some (encode_imm @@ Z.to_int64 i)*) - (*| Some (Off ptr) -> Some (encode_imm @@ camlint64_of_ptrofs ptr)*) - - (*in match r.inst with*) - (*| Addw | Andw | Nandw | Orw | Norw | Sbfw | Xorw*) - (*| Nxorw | Andnw | Ornw -> *) - (*(match encoding with None | Some U6 | Some S10 -> alu_tiny *) - (*| Some U27L5 | Some U27L10 -> alu_tiny_x*) - (*| _ -> raise InvalidEncoding)*) - (*| Sbfxw | Sbfxd ->*) - (*(match encoding with None -> alu_lite*) - (*| Some U6 | Some S10 | Some U27L5 -> alu_lite_x*) - (*| _ -> raise InvalidEncoding)*) - (*| Addd | Andd | Nandd | Ord | Nord | Sbfd | Xord*) - (*| Nxord | Andnd | Ornd ->*) - (*(match encoding with None | Some U6 | Some S10 -> alu_tiny *) - (*| Some U27L5 | Some U27L10 -> alu_tiny_x*) - (*| Some E27U27L10 -> alu_tiny_y)*) - (*|Cmoved ->*) - (*(match encoding with None | Some U6 | Some S10 -> alu_lite*) - (*| Some U27L5 | Some U27L10 -> alu_lite_x*) - (*| Some E27U27L10 -> alu_lite_y)*) - (*| Addxw -> *) - (*(match encoding with None | Some U6 | Some S10 -> alu_lite *) - (*| Some U27L5 | Some U27L10 -> alu_lite_x*) - (*| _ -> raise InvalidEncoding)*) - (*| Addxd -> *) - (*(match encoding with None | Some U6 | Some S10 -> alu_lite *) - (*| Some U27L5 | Some U27L10 -> alu_lite_x*) - (*| Some E27U27L10 -> alu_lite_y)*) - (*| Compw -> (match encoding with None -> alu_tiny*) - (*| Some U6 | Some S10 | Some U27L5 -> alu_tiny_x*) - (*| _ -> raise InvalidEncoding)*) - (*| Compd -> (match encoding with None | Some U6 | Some S10 -> alu_tiny*) - (*| Some U27L5 | Some U27L10 -> alu_tiny_x*) - (*| Some E27U27L10 -> alu_tiny_y)*) - (*| Fcompw -> (match encoding with None -> alu_lite*) - (*| Some U6 | Some S10 | Some U27L5 -> alu_lite_x*) - (*| _ -> raise InvalidEncoding)*) - (*| Fcompd -> (match encoding with None -> alu_lite*) - (*| Some U6 | Some S10 | Some U27L5 -> alu_lite_x*) - (*| _ -> raise InvalidEncoding)*) - (*| Make -> (match encoding with Some U6 | Some S10 -> alu_tiny *) - (*| Some U27L5 | Some U27L10 -> alu_tiny_x *) - (*| Some E27U27L10 -> alu_tiny_y *) - (*| _ -> raise InvalidEncoding)*) - (*| Maddw | Msbfw -> (match encoding with None -> mau_auxr*) - (*| Some U6 | Some S10 | Some U27L5 -> mau_auxr_x*) - (*| _ -> raise InvalidEncoding)*) - (*| Maddd | Msbfd -> (match encoding with None | Some U6 | Some S10 -> mau_auxr*) - (*| Some U27L5 | Some U27L10 -> mau_auxr_x*) - (*| Some E27U27L10 -> mau_auxr_y)*) - (*| Mulw -> (match encoding with None -> mau*) - (*| Some U6 | Some S10 | Some U27L5 -> mau_x*) - (*| _ -> raise InvalidEncoding)*) - (*| Muld -> (match encoding with None | Some U6 | Some S10 -> mau*) - (*| Some U27L5 | Some U27L10 -> mau_x*) - (*| Some E27U27L10 -> mau_y)*) - (*| Nop -> alu_nop*) - (*| Sraw | Srlw | Sllw | Srad | Srld | Slld -> (match encoding with None | Some U6 -> alu_tiny | _ -> raise InvalidEncoding)*) - (*(* TODO: check *)*) - (*| Srsw | Srsd | Rorw -> (match encoding with None | Some U6 -> alu_lite | _ -> raise InvalidEncoding)*) - (*| Extfz | Extfs | Insf -> (match encoding with None -> alu_lite | _ -> raise InvalidEncoding)*) - (*| Fixeduwz | Fixedwz | Floatwz | Floatuwz | Fixeddz | Fixedudz | Floatdz | Floatudz -> mau*) - (*| Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> *) - (*(match encoding with None | Some U6 | Some S10 -> lsu_auxw*) - (*| Some U27L5 | Some U27L10 -> lsu_auxw_x*) - (*| Some E27U27L10 -> lsu_auxw_y)*) - (*| Sb | Sh | Sw | Sd | Sq | So ->*) - (*(match encoding with None | Some U6 | Some S10 -> lsu_auxr*) - (*| Some U27L5 | Some U27L10 -> lsu_auxr_x*) - (*| Some E27U27L10 -> lsu_auxr_y)*) - (*| Icall | Call | Cb | Igoto | Goto | Ret | Set -> bcu*) - (*| Get -> bcu_tiny_tiny_mau_xnop*) - (*| Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd*) - (*| Fmind | Fmaxd | Fminw | Fmaxw -> alu_lite*) - (*| Fnarrowdw -> alu_full*) - (*| Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw*) - (*| Ffmad | Ffmaw | Ffmsd | Ffmsw -> mau*) - - -(*let inst_info_to_dlatency i = *) - (*begin*) - (*assert (not (i.reads_at_id && i.reads_at_e1));*) - (*match i.reads_at_id with*) - (*| true -> +1*) - (*| false -> (match i.reads_at_e1 with*) - (*| true -> -1*) - (*| false -> 0)*) - (*end*) - -(*let real_inst_to_latency = function*) - (*| Nop -> 0 (* Only goes through ID *)*) - (*| Addw | Andw | Compw | Orw | Sbfw | Sbfxw | Sraw | Srsw | Srlw | Sllw | Xorw*) - (*(* TODO check rorw *)*) - (*| Rorw | Nandw | Norw | Nxorw | Ornw | Andnw*) - (*| Nandd | Nord | Nxord | Ornd | Andnd*) - (*| Addd | Andd | Compd | Ord | Sbfd | Sbfxd | Srad | Srsd | Srld | Slld | Xord | Make*) - (*| Extfs | Extfz | Insf | Fcompw | Fcompd | Cmoved | Addxw | Addxd*) - (*| Fmind | Fmaxd | Fminw | Fmaxw*) - (*-> 1*) - (*| Floatwz | Floatuwz | Fixeduwz | Fixedwz | Floatdz | Floatudz | Fixeddz | Fixedudz -> 4*) - (*| Mulw | Muld | Maddw | Maddd | Msbfw | Msbfd -> 2 (* FIXME - WORST CASE. If it's S10 then it's only 1 *)*) - (*| Lbs | Lbz | Lhs | Lhz | Lws | Ld | Lq | Lo -> 3*) - (*| Sb | Sh | Sw | Sd | Sq | So -> 1 (* See kvx-Optimization.pdf page 19 *)*) - (*| Get -> 1*) - (*| Set -> 4 (* According to the manual should be 3, but I measured 4 *)*) - (*| Icall | Call | Cb | Igoto | Goto | Ret -> 42 (* Should not matter since it's the final instruction of the basic block *)*) - (*| Fnegd | Fnegw | Fabsd | Fabsw | Fwidenlwd | Fnarrowdw -> 1*) - (*| Faddd | Faddw | Fsbfd | Fsbfw | Fmuld | Fmulw | Finvw*) - (*| Ffmaw | Ffmad | Ffmsw | Ffmsd -> 4*) - (*let rec empty_inter la = function*) - (*| [] -> true*) - (*| b::lb -> if (List.mem b la) then false else empty_inter la lb*) +(*| [] -> true*) +(*| b::lb -> if (List.mem b la) then false else empty_inter la lb*) -(*let rec_to_info (r : ab_inst_rec) : inst_info =*) - (*let opweights = OpWeightsAsm.get_opweights ()*) - (*in { usage=opweights.resources_of_op r.inst; latency=opweights.latency_of_op r.inst; is_control=r.is_control }*) +let rec_to_info (r : ab_inst_rec) : inst_info = + let opweights = OpWeightsAsm.get_opweights () in + let usage = opweights.resources_of_op r.inst 0 + and latency = opweights.latency_of_op r.inst 0 in + { + write_locs = r.write_locs; + read_locs = r.read_locs; + usage; + latency; + is_control = r.is_control; + } -(*let instruction_infos bb = List.map rec_to_info (instruction_recs bb)*) -let instruction_infos bb = instruction_recs bb +let instruction_infos bb = List.map rec_to_info (instruction_recs bb) + +(*let instruction_infos bb = instruction_recs bb*) let instruction_usages bb = - let usages = List.map (fun info -> info.usage) (instruction_infos bb) - in Array.of_list usages + let usages = List.map (fun info -> info.usage) (instruction_infos bb) in + Array.of_list usages (** * Latency constraints building @@ -659,8 +578,8 @@ let instruction_usages bb = (*let preg2int pr = Camlcoq.P.to_int @@ Asmblockdeps.ppos pr*) (*let loc2int = function*) - (*| Mem -> 1*) - (*| Reg pr -> preg2int pr*) +(*| Mem -> 1*) +(*| Reg pr -> preg2int pr*) (*(* module HashedLoc = struct*) (*type t = { loc: location; key: int }*) @@ -670,40 +589,85 @@ let instruction_usages bb = (*end *)*) (*(* module LocHash = Hashtbl.Make(HashedLoc) *)*) -(*module LocHash = Hashtbl*) +module LocHash = Hashtbl (*(* Hash table : location => list of instruction ids *)*) -(*let rec intlist n =*) - (*if n < 0 then failwith "intlist: n < 0"*) - (*else if n = 0 then []*) - (*else (n-1) :: (intlist (n-1))*) +let rec intlist n = + if n < 0 then failwith "intlist: n < 0" + else if n = 0 then [] + else (n - 1) :: intlist (n - 1) + +let find_in_hash hashloc loc = + match LocHash.find_opt hashloc loc with Some idl -> idl | None -> [] -(*let find_in_hash hashloc loc =*) - (*match LocHash.find_opt hashloc loc with*) - (*| Some idl -> idl*) - (*| None -> []*) +(* Returns a list of instruction ids *) +let rec get_accesses hashloc (ll : location list) = + match ll with + | [] -> [] + | loc :: llocs -> find_in_hash hashloc loc @ get_accesses hashloc llocs -(*(* Returns a list of instruction ids *)*) -(*let rec get_accesses hashloc (ll: location list) = match ll with*) - (*| [] -> []*) - (*| loc :: llocs -> (find_in_hash hashloc loc) @ (get_accesses hashloc llocs)*) +let compute_latency (ifrom : inst_info) = ifrom.latency -(*let compute_latency (ifrom: inst_info) (ito: inst_info) =*) - (*let dlat = inst_info_to_dlatency ito*) - (*in let lat = ifrom.latency + dlat*) - (*in assert (lat >= 0); if (lat == 0) then 1 else lat*) +(*let dlat = inst_info_to_dlatency ito*) +(*in let lat = ifrom.latency + dlat*) +(*in assert (lat >= 0); if (lat == 0) then 1 else lat*) let latency_constraints bb = - let count = ref 0 + let written = LocHash.create 70 + and read = LocHash.create 70 + and count = ref 0 and constraints = ref [] - and instr_infos = instruction_infos bb - in let step (i: ab_inst_rec) = - begin - constraints := {instr_from = !count; instr_to = !count+1; latency = i.latency} :: !constraints; - count := !count + 1 - end - in (List.iter step instr_infos; !constraints) + and instr_infos = instruction_infos bb in + let step (i : inst_info) = + let raw = get_accesses written i.read_locs + and waw = get_accesses written i.write_locs + and war = get_accesses read i.write_locs in + List.iter + (fun i -> + constraints := + { + instr_from = i; + instr_to = !count; + latency = compute_latency (List.nth instr_infos i); + } + :: !constraints) + raw; + List.iter + (fun i -> + constraints := + { + instr_from = i; + instr_to = !count; + latency = compute_latency (List.nth instr_infos i); + } + :: !constraints) + waw; + List.iter + (fun i -> + constraints := + { instr_from = i; instr_to = !count; latency = 0 } :: !constraints) + war; + if i.is_control then + List.iter + (fun n -> + constraints := + { instr_from = n; instr_to = !count; latency = 0 } :: !constraints) + (intlist !count); + (* Updating "read" and "written" hashmaps *) + List.iter + (fun loc -> + LocHash.replace written loc [ !count ]; + LocHash.replace read loc [] + (* Clearing all the entries of "read" hashmap when a register is written *)) + i.write_locs; + List.iter + (fun loc -> LocHash.replace read loc (!count :: find_in_hash read loc)) + i.read_locs; + count := !count + 1 + in + List.iter step instr_infos; + !constraints (** * Using the InstructionScheduler @@ -711,40 +675,48 @@ let latency_constraints bb = (* TODO RESUME HERE *) -let opweights = OpWeightsAsm.get_opweights () +let opweights = OpWeightsAsm.get_opweights () -let build_problem bb = -{ max_latency = -1; resource_bounds = opweights.pipelined_resource_bounds; - instruction_usages = instruction_usages bb; latency_constraints = latency_constraints bb } +let build_problem bb = + { + max_latency = -1; + resource_bounds = opweights.pipelined_resource_bounds; + instruction_usages = instruction_usages bb; + latency_constraints = latency_constraints bb; + } (*let rec find_min_opt (l: int option list) =*) - (*match l with*) - (*| [] -> None *) - (*| e :: l ->*) - (*begin match find_min_opt l with*) - (*| None -> e*) - (*| Some m ->*) - (*begin match e with*) - (*| None -> Some m*) - (*| Some n -> if n < m then Some n else Some m*) - (*end*) - (*end*) +(*match l with*) +(*| [] -> None *) +(*| e :: l ->*) +(*begin match find_min_opt l with*) +(*| None -> e*) +(*| Some m ->*) +(*begin match e with*) +(*| None -> Some m*) +(*| Some n -> if n < m then Some n else Some m*) +(*end*) +(*end*) (*let rec filter_indexes predicate = function*) - (*| [] -> []*) - (*| e :: l -> if (predicate e) then e :: (filter_indexes predicate l) else filter_indexes predicate l*) +(*| [] -> []*) +(*| e :: l -> if (predicate e) then e :: (filter_indexes predicate l) else filter_indexes predicate l*) let get_from_indexes indexes l = List.map (List.nth l) indexes (*let is_basic = function PBasic _ -> true | _ -> false*) let is_control = function PControl _ -> true | _ -> false -let to_basic = function PBasic i -> i | _ -> failwith "to_basic: control instruction found" -let to_control = function PControl i -> i | _ -> failwith "to_control: basic instruction found" -let rec body_to_instrs bdy = - match bdy with - | [] -> [] - | i :: l' -> PBasic i :: body_to_instrs l' +(*let to_basic = function*) +(*| PBasic i -> i*) +(*| _ -> failwith "to_basic: control instruction found"*) + +let to_control = function + | PControl i -> i + | _ -> failwith "to_control: basic instruction found" + +let rec body_to_instrs bdy = + match bdy with [] -> [] | i :: l' -> PBasic i :: body_to_instrs l' let rec instrs_to_bdy instrs = match instrs with @@ -753,134 +725,135 @@ let rec instrs_to_bdy instrs = | PControl _ :: l' -> failwith "instrs_to_bdy: control instruction found" let repack li hd = - let last = List.nth li (List.length li - 1) - in if is_control last then - let cut_li = Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1) - in { header = hd; body = (instrs_to_bdy cut_li); exit = Some (to_control last) } - else - { header = hd; body = (instrs_to_bdy li); exit = None } + let last = List.nth li (List.length li - 1) in + if is_control last then + let cut_li = + Array.to_list @@ Array.sub (Array.of_list li) 0 (List.length li - 1) + in + { header = hd; body = instrs_to_bdy cut_li; exit = Some (to_control last) } + else { header = hd; body = instrs_to_bdy li; exit = None } (*let extract_some o = match o with Some e -> e | None -> failwith "extract_some: None found"*) (*let rec find_min = function*) - (*| [] -> None*) - (*| e :: l ->*) - (*match find_min l with*) - (*| None -> Some e*) - (*| Some m -> if (e < m) then Some e else Some m*) +(*| [] -> None*) +(*| e :: l ->*) +(*match find_min l with*) +(*| None -> Some e*) +(*| Some m -> if (e < m) then Some e else Some m*) (*let rec remove_all m = function*) - (*| [] -> []*) - (*| e :: l -> if m=e then remove_all m l*) - (*else e :: (remove_all m l)*) +(*| [] -> []*) +(*| e :: l -> if m=e then remove_all m l*) +(*else e :: (remove_all m l)*) (*let rec find_mins l = match find_min l with*) - (*| None -> []*) - (*| Some m -> m :: find_mins (remove_all m l)*) +(*| None -> []*) +(*| Some m -> m :: find_mins (remove_all m l)*) (*let find_all_indices m l = *) - (*let rec find m off = function*) - (*| [] -> []*) - (*| e :: l -> if m=e then off :: find m (off+1) l*) - (*else find m (off+1) l*) - (*in find m 0 l*) +(*let rec find m off = function*) +(*| [] -> []*) +(*| e :: l -> if m=e then off :: find m (off+1) l*) +(*else find m (off+1) l*) +(*in find m 0 l*) module TimeHash = Hashtbl (*Hash table : time => list of instruction ids *) -let hashtbl2flatarray h maxint = - let rec f i = match TimeHash.find_opt h i with - | None -> if (i > maxint) then [] else (f (i+1)) - | Some bund -> bund @ (f (i+1)) - in f 0 +let hashtbl2flatarray h maxint = + let rec f i = + match TimeHash.find_opt h i with + | None -> if i > maxint then [] else f (i + 1) + | Some bund -> bund @ f (i + 1) + in + f 0 let find_max l = let rec f = function | [] -> None - | e :: l -> match f l with + | e :: l -> ( + match f l with | None -> Some e - | Some m -> if (e > m) then Some e else Some m - in match (f l) with - | None -> raise Not_found - | Some m -> m + | Some m -> if e > m then Some e else Some m ) + in + match f l with None -> raise Not_found | Some m -> m (*(* [0, 2, 3, 1, 1, 2, 4, 5] -> [[0], [3, 4], [1, 5], [2], [6], [7]] *)*) -let minpack_list (l: int list) = - let timehash = TimeHash.create (List.length l) - in let rec f i = function - | [] -> () - | t::l -> begin - (match TimeHash.find_opt timehash t with - | None -> TimeHash.add timehash t [i] - | Some bund -> TimeHash.replace timehash t (bund @ [i])); - f (i+1) l - end - in begin - f 0 l; - hashtbl2flatarray timehash (find_max l) - end;; +let minpack_list (l : int list) = + let timehash = TimeHash.create (List.length l) in + let rec f i = function + | [] -> () + | t :: l -> + ( match TimeHash.find_opt timehash t with + | None -> TimeHash.add timehash t [ i ] + | Some bund -> TimeHash.replace timehash t (bund @ [ i ]) ); + f (i + 1) l + in + f 0 l; + hashtbl2flatarray timehash (find_max l) (*let minpack_list l =*) - (*let mins = find_mins l*) - (*in List.map (fun m -> find_all_indices m l) mins*) +(*let mins = find_mins l*) +(*in List.map (fun m -> find_all_indices m l) mins*) -let bb_to_instrs bb = body_to_instrs bb.body @ (match bb.exit with None -> [] | Some e -> [PControl e]) +let bb_to_instrs bb = + body_to_instrs bb.body + @ match bb.exit with None -> [] | Some e -> [ PControl e ] let build_solution bb sol = (* Remove last element - the total *) - let tmp = (Array.to_list @@ Array.sub sol 0 (Array.length sol - 1)) - in let pack = minpack_list tmp - and instrs = bb_to_instrs bb - in repack (get_from_indexes pack instrs) bb.header - (*in let rec bund hd = function*) - (*| [] -> []*) - (*| pack :: packs -> repack (get_from_indexes pack instrs) hd :: (bund [] packs)*) - (*in bund bb.header packs*) - -(*let print_inst oc = function*) - (*| i -> TargetPrinter.Target.print_instruction oc i*) - (*(*| Asm.Pallocframe(sz, ofs) -> Printf.fprintf oc " Pallocframe\n"*)*) - (*(*| Asm.Pfreeframe(sz, ofs) -> Printf.fprintf oc " Pfreeframe\n"*)*) - (*(*| Asm.Pbuiltin(ef, args, res) -> Printf.fprintf oc " Pbuiltin\n"*)*) - (*(*| i -> TargetPrinter.Target.print_instruction oc i*)*) + let tmp = Array.to_list @@ Array.sub sol 0 (Array.length sol - 1) in + let pack = minpack_list tmp and instrs = bb_to_instrs bb in + repack (get_from_indexes pack instrs) bb.header + +(*in let rec bund hd = function*) +(*| [] -> []*) +(*| pack :: packs -> repack (get_from_indexes pack instrs) hd :: (bund [] packs)*) +(*in bund bb.header packs*) + +(*let print_inst oc = function i -> TargetPrinter.Target.print_instructions oc i*) +(*| Asm.Pallocframe(sz, ofs) -> Printf.fprintf oc " Pallocframe\n"*) +(*| Asm.Pfreeframe(sz, ofs) -> Printf.fprintf oc " Pfreeframe\n"*) +(*| Asm.Pbuiltin(ef, args, res) -> Printf.fprintf oc " Pbuiltin\n"*) (*let print_bb oc bb =*) - (*match Asmgen.Asmblock_TRANSF.unfold_bblock bb with*) - (*| Errors.OK instructions -> List.iter (print_inst oc) instructions*) - (*| Errors.Error _ -> Printf.eprintf "Error in print_bb"*) +(*match Asmgen.Asmblock_TRANSF.unfold_bblock bb with*) +(*| Errors.OK instructions -> List.iter (print_inst oc) instructions*) +(*| Errors.Error _ -> Printf.eprintf "Error in print_bb"*) let print_schedule sched = print_string "[ "; Array.iter (fun x -> Printf.printf "%d; " x) sched; - print_endline "]";; + print_endline "]" let do_schedule bb = let problem = build_problem bb in - (if debug then print_problem stdout problem); - let solution = scheduler_by_name (!Clflags.option_fpostpass_sched) problem - in match solution with + if debug then print_problem stdout problem; + let solution = scheduler_by_name !Clflags.option_fpostpass_sched problem in + match solution with | None -> failwith "Could not find a valid schedule" | Some sol -> - ((if debug then print_schedule sol); - build_solution bb sol) + if debug then print_schedule sol; + build_solution bb sol (** * Dumb schedule if the above doesn't work *) -(* Identity scheduling *) -let dumb_schedule (bb : bblock) = (bb.body, bb.exit) +(* Pack result *) +let pack_result (bb : bblock) = (bb.body, bb.exit) (** * Separates the opaque instructions such as Pfreeframe and Pallocframe *) -let is_opaque = function - | Pallocframe _ | Pfreeframe _ -> true - | _ -> false +(*let is_opaque = function*) +(*| Pallocframe _ | Pfreeframe _ -> true*) +(*| _ -> false*) -(* Returns : (accumulated instructions, remaining instructions, opaque instruction if found) *) +(*(* Returns : (accumulated instructions, remaining instructions, opaque instruction if found) *)*) (*let rec biggest_wo_opaque = function | [] -> ([], [], None) | i :: li -> if is_opaque i then ([], li, Some i) @@ -895,38 +868,64 @@ let separate_opaque bb = match opaque with | Some i -> (match big with - | [] -> (bundlize [i] hd) :: (f [] rem) - | big -> (bundlize big hd) :: (bundlize [i] []) :: (f [] rem) + | [] -> (repack [i] hd) :: (f [] rem) + | big -> (repack big hd) :: (bundlize [i] []) :: (f [] rem) ) | None -> (bundlize big hd) :: (f [] rem) in f bb.header instrs*) let smart_schedule bb = - (*let lbb = separate_opaque bb in*) - let bb' = do_schedule bb in (bb'.body, bb'.exit) - (*try do_schedule bb*) - (*with OpaqueInstruction -> dumb_schedule bb*) - (*| e -> *) - (*let msg = Printexc.to_string e*) - (*and stack = Printexc.get_backtrace ()*) - (*in begin*) - (*Printf.eprintf "In regards to this group of instructions:\n";*) - (*print_bb stderr bb;*) - (*Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg stack;*) - (*failwith "Invalid schedule"*) - (*(**) - (*Printf.eprintf "Issuing one instruction per bundle instead\n\n";*) - (*dumb_schedule bb*) - (**)*) - (*end*) + let bb' = + try do_schedule bb with + | OpaqueInstruction -> + if debug then + Printf.eprintf "OpaqueInstruction raised, using identity scheduling\n"; + bb (* Identity in case of failure *) + | e -> + let msg = Printexc.to_string e and stack = Printexc.get_backtrace () in + Printf.eprintf "Postpass scheduling could not complete: %s\n%s" msg + stack; + failwith "Invalid schedule" + in + pack_result bb' let bblock_schedule bb = - if debug then (Printf.eprintf "###############################\n"; Printf.eprintf "SCHEDULING\n"); - print_problem stdout (build_problem bb); + if debug then ( + Printf.eprintf "###############################\n"; + Printf.eprintf "SCHEDULING\n" ); (*if Compopts.optim_postpass () then smart_schedule bb else dumb_schedule bb*) smart_schedule bb (** Called schedule function from Coq *) (*let schedule_notime bb = let toto = bblock_schedule in toto*) -let schedule bb = Timing.time_coq ('P'::('o'::('s'::('t'::('p'::('a'::('s'::('s'::('S'::('c'::('h'::('e'::('d'::('u'::('l'::('i'::('n'::('g'::(' '::('o'::('r'::('a'::('c'::('l'::('e'::([])))))))))))))))))))))))))) bblock_schedule bb +let schedule bb = + Timing.time_coq + [ + 'P'; + 'o'; + 's'; + 't'; + 'p'; + 'a'; + 's'; + 's'; + 'S'; + 'c'; + 'h'; + 'e'; + 'd'; + 'u'; + 'l'; + 'i'; + 'n'; + 'g'; + ' '; + 'o'; + 'r'; + 'a'; + 'c'; + 'l'; + 'e'; + ] + bblock_schedule bb |