From aa66e021f4a211969eb59e0ead761ff370b39907 Mon Sep 17 00:00:00 2001 From: Léo Gourdin Date: Wed, 9 Dec 2020 08:23:01 +0100 Subject: Non conseq loads in peephole --- aarch64/PeepholeOracle.ml | 274 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 192 insertions(+), 82 deletions(-) (limited to 'aarch64/PeepholeOracle.ml') diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml index c9a1f8f4..5e473049 100644 --- a/aarch64/PeepholeOracle.ml +++ b/aarch64/PeepholeOracle.ml @@ -18,20 +18,22 @@ open Printf let debug = true +(* Functions to verify the immediate offset range for ldp/stp *) let is_valid_immofs_32 z = - if z <= 252 && z >= -256 && z mod 4 == 0 then true else false + if z <= 252 && z >= -256 && z mod 4 = 0 then true else false let is_valid_immofs_64 z = - if z <= 504 && z >= -512 && z mod 8 == 0 then true else false + if z <= 504 && z >= -512 && z mod 8 = 0 then true else false +(* Functions to check if a ldp/stp replacement is valid according to args *) let is_valid_ldrw rd1 rd2 b1 b2 n1 n2 = let z1 = to_int (camlint64_of_coqint n1) in let z2 = to_int (camlint64_of_coqint n2) in if (not (ireg_eq rd1 rd2)) && iregsp_eq b1 b2 - && (not (iregsp_eq (RR1 rd1) b1)) - && z2 == z1 + 4 + && (not (iregsp_eq (RR1 rd1) b2)) + && ((z2 = z1 + 4) || (z2 = z1 - 4)) && is_valid_immofs_32 z1 then true else false @@ -42,8 +44,8 @@ let is_valid_ldrx rd1 rd2 b1 b2 n1 n2 = if (not (ireg_eq rd1 rd2)) && iregsp_eq b1 b2 - && (not (iregsp_eq (RR1 rd1) b1)) - && z2 == z1 + 8 + && (not (iregsp_eq (RR1 rd1) b2)) + && ((z2 = z1 + 8) || (z2 = z1 - 8)) && is_valid_immofs_64 z1 then true else false @@ -51,118 +53,187 @@ let is_valid_ldrx rd1 rd2 b1 b2 n1 n2 = let is_valid_strw b1 b2 n1 n2 = let z1 = to_int (camlint64_of_coqint n1) in let z2 = to_int (camlint64_of_coqint n2) in - if iregsp_eq b1 b2 && z2 == z1 + 4 && is_valid_immofs_32 z1 then true + if iregsp_eq b1 b2 && z2 = z1 + 4 && is_valid_immofs_32 z1 then true else false let is_valid_strx b1 b2 n1 n2 = let z1 = to_int (camlint64_of_coqint n1) in let z2 = to_int (camlint64_of_coqint n2) in - if iregsp_eq b1 b2 && z2 == z1 + 8 && is_valid_immofs_64 z1 then true + if iregsp_eq b1 b2 && z2 = z1 + 8 && is_valid_immofs_64 z1 then true else false let dreg_of_ireg r = IR (RR1 r) -(*let pot_ldrw_rep = ref []*) - -let rec affect_symb_mem reg pot_rep stype = - match (pot_rep, stype) with - | [], _ -> [] - | ( ((PLoad (PLd_rd_a (Pldrw, IR (RR1 rd), ADimm (b, n))), _) as h0) :: t0, - "ldrw" ) -> - if dreg_eq reg (IR b) then affect_symb_mem reg t0 stype - else h0 :: affect_symb_mem reg t0 stype - | _, _ -> failwith "Found an inconsistent inst in pot_rep" - -let rec read_symb_mem reg pot_rep stype = - match (pot_rep, stype) with - | [], _ -> [] - | ( ((PLoad (PLd_rd_a (Pldrw, IR (RR1 rd), ADimm (b, n))), _) as h0) :: t0, - "ldrw" ) -> - if dreg_eq reg (IR (RR1 rd)) then read_symb_mem reg t0 stype - else h0 :: read_symb_mem reg t0 stype - | _, _ -> failwith "Found an inconsistent inst in pot_rep" - -let update_pot_rep inst pot_rep stype = +(* Affect a symbolic memory list of potential replacements + * for a given write in reg *) +let rec affect_symb_mem reg insta pot_rep stype = + match pot_rep with + | [] -> [] + | h0 :: t0 -> ( + match (insta.(h0), stype) with + | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd), ADimm (b, n))), "ldrw" -> + if dreg_eq reg (IR b) then affect_symb_mem reg insta t0 stype + else h0 :: affect_symb_mem reg insta t0 stype + | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd), ADimm (b, n))), "ldrx" -> + if dreg_eq reg (IR b) then affect_symb_mem reg insta t0 stype + else h0 :: affect_symb_mem reg insta t0 stype + | _, _ -> + failwith "affect_symb_mem: Found an inconsistent inst in pot_rep") + +(* Affect a symbolic memory list of potential replacements + * for a given read in reg *) +let rec read_symb_mem reg insta pot_rep stype = + match pot_rep with + | [] -> [] + | h0 :: t0 -> ( + match (insta.(h0), stype) with + | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd), ADimm (b, n))), "ldrw" -> + if dreg_eq reg (IR (RR1 rd)) then read_symb_mem reg insta t0 stype + else h0 :: read_symb_mem reg insta t0 stype + | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd), ADimm (b, n))), "ldrx" -> + if dreg_eq reg (IR (RR1 rd)) then read_symb_mem reg insta t0 stype + else h0 :: read_symb_mem reg insta t0 stype + | _, _ -> failwith "read_symb_mem: Found an inconsistent inst in pot_rep") + +let update_pot_rep_addressing a insta pot_rep stype = + match a with + | ADimm (base, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype + | ADreg (base, r) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype + | ADlsl (base, r, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype + | ADsxt (base, r, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype + | ADuxt (base, r, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype + | ADadr (base, _, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype + | ADpostincr (base, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype + +(* Update a symbolic memory list of potential replacements + * for any basic instruction *) +let update_pot_rep_basic inst insta pot_rep stype = match inst with | PArith i -> ( match i with - | PArithP (_, rd) -> pot_rep := affect_symb_mem rd !pot_rep stype + | PArithP (_, rd) -> pot_rep := affect_symb_mem rd insta !pot_rep stype | PArithPP (_, rd, rs) -> - pot_rep := affect_symb_mem rd !pot_rep stype; - pot_rep := read_symb_mem rs !pot_rep stype + pot_rep := affect_symb_mem rd insta !pot_rep stype; + pot_rep := read_symb_mem rs insta !pot_rep stype | PArithPPP (_, rd, rs1, rs2) -> - pot_rep := affect_symb_mem rd !pot_rep stype; - pot_rep := read_symb_mem rs1 !pot_rep stype; - pot_rep := read_symb_mem rs2 !pot_rep stype + pot_rep := affect_symb_mem rd insta !pot_rep stype; + pot_rep := read_symb_mem rs1 insta !pot_rep stype; + pot_rep := read_symb_mem rs2 insta !pot_rep stype | PArithRR0R (_, rd, rs1, rs2) -> - pot_rep := affect_symb_mem (dreg_of_ireg rd) !pot_rep stype; + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype; (match rs1 with | RR0 rs1 -> - pot_rep := read_symb_mem (dreg_of_ireg rs1) !pot_rep stype + pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype | _ -> ()); - pot_rep := read_symb_mem (dreg_of_ireg rs2) !pot_rep stype + pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype | PArithRR0 (_, rd, rs) -> ( - pot_rep := affect_symb_mem (dreg_of_ireg rd) !pot_rep stype; + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype; match rs with | RR0 rs1 -> - pot_rep := read_symb_mem (dreg_of_ireg rs1) !pot_rep stype + pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype | _ -> ()) | PArithARRRR0 (_, rd, rs1, rs2, rs3) -> ( - pot_rep := affect_symb_mem (dreg_of_ireg rd) !pot_rep stype; - pot_rep := read_symb_mem (dreg_of_ireg rs1) !pot_rep stype; - pot_rep := read_symb_mem (dreg_of_ireg rs2) !pot_rep stype; + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype; match rs3 with | RR0 rs1 -> - pot_rep := read_symb_mem (dreg_of_ireg rs1) !pot_rep stype + pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype | _ -> ()) | PArithComparisonPP (_, rs1, rs2) -> - pot_rep := read_symb_mem rs1 !pot_rep stype; - pot_rep := read_symb_mem rs2 !pot_rep stype + pot_rep := read_symb_mem rs1 insta !pot_rep stype; + pot_rep := read_symb_mem rs2 insta !pot_rep stype | PArithComparisonR0R (_, rs1, rs2) -> (match rs1 with | RR0 rs1 -> - pot_rep := read_symb_mem (dreg_of_ireg rs1) !pot_rep stype + pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype | _ -> ()); - pot_rep := read_symb_mem (dreg_of_ireg rs2) !pot_rep stype + pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype | PArithComparisonP (_, rs1) -> - pot_rep := read_symb_mem rs1 !pot_rep stype + pot_rep := read_symb_mem rs1 insta !pot_rep stype | Pcset (rd, _) -> - pot_rep := affect_symb_mem (dreg_of_ireg rd) !pot_rep stype + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype | Pfmovi (_, _, rs) -> ( match rs with - | RR0 rs -> pot_rep := read_symb_mem (dreg_of_ireg rs) !pot_rep stype + | RR0 rs -> + pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype | _ -> ()) | Pcsel (rd, rs1, rs2, _) -> - pot_rep := affect_symb_mem rd !pot_rep stype; - pot_rep := read_symb_mem rs1 !pot_rep stype; - pot_rep := read_symb_mem rs2 !pot_rep stype + pot_rep := affect_symb_mem rd insta !pot_rep stype; + pot_rep := read_symb_mem rs1 insta !pot_rep stype; + pot_rep := read_symb_mem rs2 insta !pot_rep stype | Pfnmul (_, rd, rs1, rs2) -> ()) | PLoad i -> ( match i with - | PLd_rd_a (_, rd, _) -> pot_rep := affect_symb_mem rd !pot_rep stype - | Pldp (_, rd1, rd2, _) -> - pot_rep := affect_symb_mem (dreg_of_ireg rd1) !pot_rep stype; - pot_rep := affect_symb_mem (dreg_of_ireg rd2) !pot_rep stype) + | PLd_rd_a (_, rd, a) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype; + update_pot_rep_addressing a insta pot_rep stype + | Pldp (_, rd1, rd2, a) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd1) insta !pot_rep stype; + pot_rep := affect_symb_mem (dreg_of_ireg rd2) insta !pot_rep stype; + update_pot_rep_addressing a insta pot_rep stype) | PStore _ -> pot_rep := [] - (* TODO *) - | _ -> () + | Pallocframe (_, _) -> pot_rep := [] + | Pfreeframe (_, _) -> pot_rep := [] + | Ploadsymbol (rd, _) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype + | Pcvtsw2x (rd, rs) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype + | Pcvtuw2x (rd, rs) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype + | Pcvtx2w rd -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg rd) insta !pot_rep stype + | Pnop -> () -(*let update_symb_mem inst = update_pot_rep inst pot_ldrw_rep "ldrw"*) +(* Try to find the index of the first previous compatible + * replacement in a given symbolic memory *) +let rec search_compat_rep rd2 b2 n2 insta pot_rep stype = + match pot_rep with + | [] -> None + | h0 :: t0 -> ( + match (insta.(h0), stype) with + | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" -> + if is_valid_ldrw rd1 rd2 b1 b2 n1 n2 then Some (h0, rd1, b1, n1) + else search_compat_rep rd2 b2 n2 insta t0 stype + | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" -> + if is_valid_ldrx rd1 rd2 b1 b2 n1 n2 then Some (h0, rd1, b1, n1) + else search_compat_rep rd2 b2 n2 insta t0 stype + | _, _ -> + failwith "search_compat_rep: Found an inconsistent inst in pot_rep") -let rec search_compat_rep rd b n pot_rep stype = - match (pot_rep, stype) with - | [], _ -> None - | ( ((PLoad (PLd_rd_a (Pldrx, IR (RR1 rd'), ADimm (b', n'))), c) as h0) :: t0, - "ldrw" ) -> - if is_valid_ldrw rd rd' b b' n n' then Some h0 - else search_compat_rep rd b n t0 stype - | _, _ -> None +(* This is useful to manage the case were the immofs + * of the first ldr/str is greater than the second one *) +let min_is_rev n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if z1 < z2 then true else false +(* Main peephole function *) let pair_rep insta = + (* Each list below is a symbolic mem representation + * for one type of inst. Lists contains integers which + * are the indices of insts in the main array "insta". *) + let pot_ldrw_rep = ref [] in + let pot_ldrx_rep = ref [] in for i = 0 to Array.length insta - 2 do let h0 = insta.(i) in let h1 = insta.(i + 1) in match (h0, h1) with + (* Consecutive ldrw *) | ( PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), PLoad (PLd_rd_a (Pldrw, IR (RR1 rd2), ADimm (b2, n2))) ) -> (* When both load the same dest, and with no reuse of ld1 in ld2. *) @@ -172,8 +243,12 @@ let pair_rep insta = (* When two consec mem addr are loading two different dest. *)) else if is_valid_ldrw rd1 rd2 b1 b2 n1 n2 then ( if debug then eprintf "LDP_WPEEP\n"; - insta.(i) <- PLoad (Pldp (Pldpw, rd1, rd2, ADimm (b1, n1))); + (if min_is_rev n1 n2 then + insta.(i) <- PLoad (Pldp (Pldpw, rd1, rd2, ADimm (b1, n1))) + else + insta.(i) <- PLoad (Pldp (Pldpw, rd2, rd1, ADimm (b1, n2)))); insta.(i + 1) <- Pnop) + (* Consecutive ldrx *) | ( PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), PLoad (PLd_rd_a (Pldrx, IR (RR1 rd2), ADimm (b2, n2))) ) -> if ireg_eq rd1 rd2 && not (iregsp_eq b2 (RR1 rd1)) then ( @@ -181,13 +256,42 @@ let pair_rep insta = insta.(i) <- Pnop) else if is_valid_ldrx rd1 rd2 b1 b2 n1 n2 then ( if debug then eprintf "LDP_XPEEP\n"; - insta.(i) <- PLoad (Pldp (Pldpx, rd1, rd2, ADimm (b1, n1))); - insta.(i + 1) <- Pnop - (* When there are some insts between loads/stores *) - (*| PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), _ ->*) - (*(match search_compat_rep h0 pot_ldrw_rep with*) - (*| None -> (pot_ldrw_rep := (h0, count) :: !pot_ldrw_rep; h0 :: pair_rep insta t0)*) - (*| Some (l, c) -> *)) + (if min_is_rev n1 n2 then + insta.(i) <- PLoad (Pldp (Pldpx, rd1, rd2, ADimm (b1, n1))) + else + insta.(i) <- PLoad (Pldp (Pldpx, rd2, rd1, ADimm (b1, n2)))); + insta.(i + 1) <- Pnop) + (* Non-consecutive ldrw *) + | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), _ -> ( + (* Search a previous compatible load *) + match search_compat_rep rd1 b1 n1 insta !pot_ldrw_rep "ldrw" with + (* If we can't find a candidate, add the current load as a potential future one *) + | None -> pot_ldrw_rep := i :: !pot_ldrw_rep + (* Else, perform the peephole *) + | Some (rep, r, b, n) -> + eprintf "LDP_WCPEEP\n"; + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_ldrw_rep := List.filter filt !pot_ldrw_rep; + insta.(rep) <- Pnop; + (if min_is_rev n n1 then + insta.(i) <- PLoad (Pldp (Pldpw, r, rd1, ADimm (b, n))) + else + insta.(i) <- PLoad (Pldp (Pldpw, rd1, r, ADimm (b, n1))))) + (* Non-consecutive ldrx *) + | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), _ -> ( + match search_compat_rep rd1 b1 n1 insta !pot_ldrx_rep "ldrx" with + | None -> pot_ldrx_rep := i :: !pot_ldrx_rep + | Some (rep, r, b, n) -> + eprintf "LDP_XCPEEP\n"; + let filt x = x != rep in + pot_ldrx_rep := List.filter filt !pot_ldrx_rep; + insta.(rep) <- Pnop; + (if min_is_rev n n1 then + insta.(i) <- PLoad (Pldp (Pldpx, r, rd1, ADimm (b, n))) + else + insta.(i) <- PLoad (Pldp (Pldpx, rd1, r, ADimm (b, n1))))) + (* Consecutive strw *) | ( PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), PStore (PSt_rs_a (Pstrw, IR (RR1 rs2), ADimm (b2, n2))) ) -> (* When both store at the same addr, same ofs. *) @@ -199,6 +303,7 @@ let pair_rep insta = if debug then eprintf "STP_WPEEP\n"; insta.(i) <- PStore (Pstp (Pstpw, rs1, rs2, ADimm (b1, n1))); insta.(i + 1) <- Pnop (* When nothing can be optimized. *)) + (* Consecutive strx *) | ( PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), PStore (PSt_rs_a (Pstrx, IR (RR1 rs2), ADimm (b2, n2))) ) -> (*if (iregsp_eq b1 b2) && (z2 =? z1) then @@ -208,17 +313,22 @@ let pair_rep insta = if debug then eprintf "STP_XPEEP\n"; insta.(i) <- PStore (Pstp (Pstpx, rs1, rs2, ADimm (b1, n1))); insta.(i + 1) <- Pnop) - | h0, _ -> () (*update_symb_mem h0*) + (* Any other inst *) + | h0, _ -> + (* Here we need to update every symbolic memory according to the matched inst *) + update_pot_rep_basic h0 insta pot_ldrw_rep "ldrw"; + update_pot_rep_basic h0 insta pot_ldrx_rep "ldrx" done +(* Calling peephole if flag is set *) let optimize_bdy (bdy : basic list) : basic list = - if !Clflags.option_fcoalesce_mem then + if !Clflags.option_fcoalesce_mem then ( let insta = Array.of_list bdy in - (pair_rep insta; Array.to_list insta) + pair_rep insta; + Array.to_list insta) else bdy -(** Called peephole function from Coq *) - +(* Called peephole function from Coq *) let peephole_opt bdy = Timing.time_coq [ -- cgit