From cdb54160ff67bef3ab40e3cc85416f2c897ac82b Mon Sep 17 00:00:00 2001 From: Léo Gourdin Date: Tue, 3 Nov 2020 18:10:54 +0100 Subject: Dumb (identity) scheduling working and integrated --- aarch64/OpWeightsAsm.ml | 212 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 aarch64/OpWeightsAsm.ml (limited to 'aarch64/OpWeightsAsm.ml') diff --git a/aarch64/OpWeightsAsm.ml b/aarch64/OpWeightsAsm.ml new file mode 100644 index 00000000..17869a99 --- /dev/null +++ b/aarch64/OpWeightsAsm.ml @@ -0,0 +1,212 @@ +open Asmblock;; +(* TODO remove? open PrepassSchedulingOracleDeps;;*) + +module Cortex_A53= + struct + let resource_bounds = [| 2; 2; 1; 1 |];; (* instr ; ALU ; MAC; LSU *) + let nr_non_pipelined_units = 1;; + + let latency_of_op (i : basic) (nargs : int) = + match i with + | PArith _ + | PArithP _ _ -> 1 (* XXX *) + | PArithPP + + let resources_of_op (op : operation) (nargs : int) = + match op with + | Omove + | Ointconst _ + | Olongconst _ + | Ofloatconst _ + | Osingleconst _ + | Oaddrsymbol _ + | Oaddrstack _ + (* 32-bit integer arithmetic *) + | Oshift _ + | Oadd + | Oaddshift _ + | Oaddimm _ + | Oneg + | Onegshift _ + | Osub + | Osubshift _ -> [| 1 ; 1; 0; 0 |] + | Omul + | Omuladd + | Omulsub -> [| 1; 1; 1; 0 |] + | Odiv + | Odivu -> [| 1; 0; 0; 0 |] + | Oand + | Oandshift _ + | Oandimm _ + | Oor + | Oorshift _ + | Oorimm _ + | Oxor + | Oxorshift _ + | Oxorimm _ + | Onot + | Onotshift _ + | Obic + | Obicshift _ + | Oorn + | Oornshift _ + | Oeqv + | Oeqvshift _ + | Oshl + | Oshr + | Oshru + | Oshrximm _ + | Ozext _ + | Osext _ + | Oshlzext _ + | Oshlsext _ + | Ozextshr _ + | Osextshr _ + +(* 64-bit integer arithmetic *) + | Oshiftl _ + | Oextend _ + | Omakelong + | Olowlong + | Ohighlong + | Oaddl + | Oaddlshift _ + | Oaddlext _ + | Oaddlimm _ + | Onegl + | Oneglshift _ + | Osubl + | Osublshift _ + | Osublext _ -> [| 1 ; 1 ; 0; 0 |] + | Omull + | Omulladd + | Omullsub + | Omullhs + | Omullhu -> [| 1 ; 1 ; 1; 0 |] + | Odivl + | Odivlu -> [| 1; 0; 0; 0 |] + | Oandl + | Oandlshift _ + | Oandlimm _ + | Oorl + | Oorlshift _ + | Oorlimm _ + | Oxorl + | Oxorlshift _ + | Oxorlimm _ + | Onotl + | Onotlshift _ + | Obicl + | Obiclshift _ + | Oornl + | Oornlshift _ + | Oeqvl + | Oeqvlshift _ + | Oshll + | Oshrl + | Oshrlu + | Oshrlximm _ + | Ozextl _ + | Osextl _ + | Oshllzext _ + | Oshllsext _ + | Ozextshrl _ + | Osextshrl _ -> [| 1; 1; 0; 0 |] + (* 64-bit floating-point arithmetic *) + | Onegf (* r [rd = - r1] *) + | Oabsf (* r [rd = abs(r1)] *) + | Oaddf (* r [rd = r1 + r2] *) + | Osubf (* r [rd = r1 - r2] *) + | Omulf (* r [rd = r1 * r2] *) + | Odivf + (* 32-bit floating-point arithmetic *) + | Onegfs (* r [rd = - r1] *) + | Oabsfs (* r [rd = abs(r1)] *) + | Oaddfs (* r [rd = r1 + r2] *) + | Osubfs (* r [rd = r1 - r2] *) + | Omulfs (* r [rd = r1 * r2] *) + | Odivfs (* r [rd = r1 / r2] *) + | Osingleoffloat (* r [rd] is [r1] truncated to single-precision float *) + | Ofloatofsingle (* r [rd] is [r1] extended to double-precision float *) +(* Conversions between int and float *) + | Ointoffloat (* r [rd = signed_int_of_float64(r1)] *) + | Ointuoffloat (* r [rd = unsigned_int_of_float64(r1)] *) + | Ofloatofint (* r [rd = float64_of_signed_int(r1)] *) + | Ofloatofintu (* r [rd = float64_of_unsigned_int(r1)] *) + | Ointofsingle (* r [rd = signed_int_of_float32(r1)] *) + | Ointuofsingle (* r [rd = unsigned_int_of_float32(r1)] *) + | Osingleofint (* r [rd = float32_of_signed_int(r1)] *) + | Osingleofintu (* r [rd = float32_of_unsigned_int(r1)] *) + | Olongoffloat (* r [rd = signed_long_of_float64(r1)] *) + | Olonguoffloat (* r [rd = unsigned_long_of_float64(r1)] *) + | Ofloatoflong (* r [rd = float64_of_signed_long(r1)] *) + | Ofloatoflongu (* r [rd = float64_of_unsigned_long(r1)] *) + | Olongofsingle (* r [rd = signed_long_of_float32(r1)] *) + | Olonguofsingle (* r [rd = unsigned_long_of_float32(r1)] *) + | Osingleoflong (* r [rd = float32_of_signed_long(r1)] *) + | Osingleoflongu (* r [rd = float32_of_unsigned_int(r1)] *) + -> [| 1 ; 1; 1; 0 |] + +(* Boolean tests *) + | Ocmp cmp | Osel (cmp, _) -> + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let non_pipelined_resources_of_op (op : operation) (nargs : int) = + match op with + | Odiv | Odivu -> [| 29 |] + | Odivfs -> [| 20 |] + | Odivl | Odivlu | Odivf -> [| 50 |] + | _ -> [| -1 |];; + + let resources_of_cond (cmp : condition) (nargs : int) = + (match cmp with + | Ccompf _ (* r FP comparison *) + | Cnotcompf _ (* r negation of an FP comparison *) + | Ccompfzero _ (* r comparison with 0.0 *) + | Cnotcompfzero _ (* r negation of comparison with 0.0 *) + | Ccompfs _ (* r FP comparison *) + | Cnotcompfs _ (* r negation of an FP comparison *) + | Ccompfszero _ (* r equal to 0.0 *) + | Cnotcompfszero _ (* r not equal to 0.0 *) -> + [| 1; 1; 1; 0 |] + | _ -> [| 1; 1; 0; 0 |] );; + + let latency_of_load trap chunk (addr : addressing) (nargs : int) = 3;; + let latency_of_call _ _ = 6;; + + let resources_of_load trap chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_store chunk addressing nargs = [| 1; 0; 0; 1 |];; + + let resources_of_call _ _ = resource_bounds;; + let resources_of_builtin _ = resource_bounds;; + end;; + +let get_opweights () : opweights = + match !Clflags.option_mtune with + | "cortex-a53" | "cortex-a35" | "" -> + { + pipelined_resource_bounds = Cortex_A53.resource_bounds; + nr_non_pipelined_units = Cortex_A53.nr_non_pipelined_units; + latency_of_op = Cortex_A53.latency_of_op; + resources_of_op = Cortex_A53.resources_of_op; + non_pipelined_resources_of_op = Cortex_A53.non_pipelined_resources_of_op; + latency_of_load = Cortex_A53.latency_of_load; + resources_of_load = Cortex_A53.resources_of_load; + resources_of_store = Cortex_A53.resources_of_store; + resources_of_cond = Cortex_A53.resources_of_cond; + latency_of_call = Cortex_A53.latency_of_call; + resources_of_call = Cortex_A53.resources_of_call; + resources_of_builtin = Cortex_A53.resources_of_builtin + } + | xxx -> failwith (Printf.sprintf "unknown -mtune: %s" xxx);; -- cgit