diff options
Diffstat (limited to 'aarch64/PeepholeOracle.ml')
-rw-r--r-- | aarch64/PeepholeOracle.ml | 624 |
1 files changed, 624 insertions, 0 deletions
diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml new file mode 100644 index 00000000..1d890f2c --- /dev/null +++ b/aarch64/PeepholeOracle.ml @@ -0,0 +1,624 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open Camlcoq +open Asmblock +open Asm +open Int64 +open Printf + +(* If true, the oracle will print a msg for each applied peephole *) +let debug = false + +(* Functions to verify the immediate offset range for ldp/stp *) +let is_valid_immofs_32 z = + if z <= 252 && z >= -256 && z mod 4 = 0 then true else false + +let is_valid_immofs_64 z = + if z <= 504 && z >= -512 && z mod 8 = 0 then true else false + +(* Functions to check if a ldp/stp replacement is valid according to args *) +let is_valid_ldr32 rd1 rd2 b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if + (not (dreg_eq rd1 rd2)) + && iregsp_eq b1 b2 + && (not (dreg_eq rd1 (IR b2))) + && (z2 = z1 + 4 || z2 = z1 - 4) + && is_valid_immofs_32 z1 + then true + else false + +let is_valid_ldr64 rd1 rd2 b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if + (not (dreg_eq rd1 rd2)) + && iregsp_eq b1 b2 + && (not (dreg_eq rd1 (IR b2))) + && (z2 = z1 + 8 || z2 = z1 - 8) + && is_valid_immofs_64 z1 + then true + else false + +let is_valid_str32 b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if iregsp_eq b1 b2 && z2 = z1 + 4 && is_valid_immofs_32 z1 then true + else false + +let is_valid_str64 b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if iregsp_eq b1 b2 && z2 = z1 + 8 && is_valid_immofs_64 z1 then true + else false + +let dreg_of_ireg r = IR (RR1 r) + +let dreg_of_freg r = FR r + +(* Return true if an intermediate + * affectation eliminates the potential + * candidate *) +let verify_load_affect reg rd b rev = + let b = IR b in + if not rev then dreg_eq reg b else dreg_eq reg b || dreg_eq reg rd + +(* Return true if an intermediate + * read eliminates the potential + * candidate *) +let verify_load_read reg rd b rev = dreg_eq reg rd + +(* Return true if an intermediate + * affectation eliminates the potential + * candidate *) +let verify_store_affect reg rs b rev = + let b = IR b in + dreg_eq reg b || dreg_eq reg rs + +type ph_type = P32 | P32f | P64 | P64f + +type inst_type = Ldr of ph_type | Str of ph_type + +let ph_ty_to_string = function + | Ldr P32 -> "ldr32" + | Ldr P32f -> "ldr32f" + | Ldr P64 -> "ldr64" + | Ldr P64f -> "ldr64f" + | Str P32 -> "str32" + | Str P32f -> "str32f" + | Str P64 -> "str64" + | Str P64f -> "str64f" + +let print_ph_ty chan v = output_string chan (ph_ty_to_string v) + +let symb_mem = Hashtbl.create 9 + +(* Affect a symbolic memory list of potential replacements + * for a given write in reg *) +let rec affect_symb_mem reg insta pot_rep stype rev = + match pot_rep with + | [] -> [] + | h0 :: t0 -> ( + match (insta.(h0), stype) with + | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ -> + if verify_load_affect reg rd b rev then + affect_symb_mem reg insta t0 stype rev + else h0 :: affect_symb_mem reg insta t0 stype rev + | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ -> + if verify_store_affect reg rs b rev then + affect_symb_mem reg insta t0 stype rev + else h0 :: affect_symb_mem reg insta t0 stype rev + | _, _ -> + failwith "affect_symb_mem: Found an inconsistent inst in pot_rep") + +(* Affect a symbolic memory list of potential replacements + * for a given read in reg *) +let rec read_symb_mem reg insta pot_rep stype rev = + match pot_rep with + | [] -> [] + | h0 :: t0 -> ( + match (insta.(h0), stype) with + | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ -> + if verify_load_read reg rd b rev then + read_symb_mem reg insta t0 stype rev + else h0 :: read_symb_mem reg insta t0 stype rev + | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ -> + h0 :: read_symb_mem reg insta t0 stype rev + | _, _ -> failwith "read_symb_mem: Found an inconsistent inst in pot_rep") + +(* Update a symbolic memory list of potential replacements + * for any addressing *) +let update_pot_rep_addressing a insta pot_rep stype rev = + match a with + | ADimm (base, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev + | ADreg (base, r) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev + | ADlsl (base, r, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev + | ADsxt (base, r, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev + | ADuxt (base, r, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev + | ADadr (base, _, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev + | ADpostincr (base, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev + +(* Update a symbolic memory list of potential replacements + * for any basic instruction *) +let update_pot_rep_basic inst insta stype rev = + let pot_rep = Hashtbl.find symb_mem stype in + (match inst with + | PArith i -> ( + match i with + | PArithP (_, rd) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev + | PArithPP (_, rd, rs) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev; + pot_rep := read_symb_mem rs insta !pot_rep stype rev + | PArithPPP (_, rd, rs1, rs2) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev; + pot_rep := read_symb_mem rs1 insta !pot_rep stype rev; + pot_rep := read_symb_mem rs2 insta !pot_rep stype rev + | PArithRR0R (_, rd, rs1, rs2) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + (match rs1 with + | RR0 rs1 -> + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + | _ -> ()); + pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev + | PArithRR0 (_, rd, rs) -> ( + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + match rs with + | RR0 rs1 -> + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + | _ -> ()) + | PArithARRRR0 (_, rd, rs1, rs2, rs3) -> ( + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev; + match rs3 with + | RR0 rs1 -> + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + | _ -> ()) + | PArithComparisonPP (_, rs1, rs2) -> + pot_rep := read_symb_mem rs1 insta !pot_rep stype rev; + pot_rep := read_symb_mem rs2 insta !pot_rep stype rev + | PArithComparisonR0R (_, rs1, rs2) -> + (match rs1 with + | RR0 rs1 -> + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + | _ -> ()); + pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev + | PArithComparisonP (_, rs1) -> + pot_rep := read_symb_mem rs1 insta !pot_rep stype rev + | Pcset (rd, _) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev + | Pfmovi (_, rd, rs) -> ( + pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev; + match rs with + | RR0 rs -> + pot_rep := + read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev + | _ -> ()) + | Pcsel (rd, rs1, rs2, _) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev; + pot_rep := read_symb_mem rs1 insta !pot_rep stype rev; + pot_rep := read_symb_mem rs2 insta !pot_rep stype rev + | Pfnmul (_, rd, rs1, rs2) -> + pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_freg rs1) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_freg rs2) insta !pot_rep stype rev) + | PLoad i -> ( + (* Here, we consider a different behavior for load and store potential candidates: + * a load does not obviously cancel the ldp candidates, but it does for any stp candidate. *) + match stype with + | Ldr _ -> ( + match i with + | PLd_rd_a (_, rd, a) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev; + update_pot_rep_addressing a insta pot_rep stype rev + | Pldp (_, rd1, rd2, _, _, a) -> + pot_rep := affect_symb_mem rd1 insta !pot_rep stype rev; + pot_rep := affect_symb_mem rd2 insta !pot_rep stype rev; + update_pot_rep_addressing a insta pot_rep stype rev) + | _ -> pot_rep := []) + | PStore _ -> ( + (* Here, we consider that a store cancel all ldp candidates, but it is far more complicated for stp ones : + * if we cancel stp candidates here, we would prevent ourselves to apply the non-consec store peephole. + * To solve this issue, the store candidates cleaning is managed directly in the peephole function below. *) + match stype with Ldr _ -> pot_rep := [] | _ -> ()) + | Pallocframe (_, _) -> pot_rep := [] + | Pfreeframe (_, _) -> pot_rep := [] + | Ploadsymbol (rd, _) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev + | Pcvtsw2x (rd, rs) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev + | Pcvtuw2x (rd, rs) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev + | Pcvtx2w rd -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; + pot_rep := read_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev + | Pnop -> ()); + Hashtbl.replace symb_mem stype pot_rep + +(* This is useful to manage the case were the immofs + * of the first ldr/str is greater than the second one *) +let min_is_rev n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if z1 < z2 then true else false + +(* Below functions were added to merge pattern matching cases in peephole, + * thanks to this, we can make the chunk difference (int/any) compatible. *) +let trans_ldi (ldi : load_rd_a) : load_rd1_rd2_a = + match ldi with + | Pldrw | Pldrw_a -> Pldpw + | Pldrx | Pldrx_a -> Pldpx + | Pldrs -> Pldps + | Pldrd | Pldrd_a -> Pldpd + | _ -> failwith "trans_ldi: Found a non compatible load to translate" + +let trans_sti (sti : store_rs_a) : store_rs1_rs2_a = + match sti with + | Pstrw | Pstrw_a -> Pstpw + | Pstrx | Pstrx_a -> Pstpx + | Pstrs -> Pstps + | Pstrd | Pstrd_a -> Pstpd + | _ -> failwith "trans_sti: Found a non compatible store to translate" + +let is_compat_load (ldi : load_rd_a) = + match ldi with + | Pldrw | Pldrw_a | Pldrx | Pldrx_a | Pldrs | Pldrd | Pldrd_a -> true + | _ -> false + +let are_compat_load (ldi1 : load_rd_a) (ldi2 : load_rd_a) = + match ldi1 with + | Pldrw | Pldrw_a -> ( match ldi2 with Pldrw | Pldrw_a -> true | _ -> false) + | Pldrx | Pldrx_a -> ( match ldi2 with Pldrx | Pldrx_a -> true | _ -> false) + | Pldrs -> ( match ldi2 with Pldrs -> true | _ -> false) + | Pldrd | Pldrd_a -> ( match ldi2 with Pldrd | Pldrd_a -> true | _ -> false) + | _ -> false + +let is_compat_store (sti : store_rs_a) = + match sti with + | Pstrw | Pstrw_a | Pstrx | Pstrx_a | Pstrs | Pstrd | Pstrd_a -> true + | _ -> false + +let are_compat_store (sti1 : store_rs_a) (sti2 : store_rs_a) = + match sti1 with + | Pstrw | Pstrw_a -> ( match sti2 with Pstrw | Pstrw_a -> true | _ -> false) + | Pstrx | Pstrx_a -> ( match sti2 with Pstrx | Pstrx_a -> true | _ -> false) + | Pstrs -> ( match sti2 with Pstrs -> true | _ -> false) + | Pstrd | Pstrd_a -> ( match sti2 with Pstrd | Pstrd_a -> true | _ -> false) + | _ -> false + +let get_load_pht (ldi : load_rd_a) = + match ldi with + | Pldrw | Pldrw_a -> Ldr P32 + | Pldrs -> Ldr P32f + | Pldrx | Pldrx_a -> Ldr P64 + | Pldrd | Pldrd_a -> Ldr P64f + | _ -> failwith "get_load_string: Found a non compatible load to translate" + +let get_store_pht (sti : store_rs_a) = + match sti with + | Pstrw | Pstrw_a -> Str P32 + | Pstrs -> Str P32f + | Pstrx | Pstrx_a -> Str P64 + | Pstrd | Pstrd_a -> Str P64f + | _ -> failwith "get_store_string: Found a non compatible store to translate" + +let is_valid_ldr rd1 rd2 b1 b2 n1 n2 stype = + match stype with + | Ldr P32 | Ldr P32f -> is_valid_ldr32 rd1 rd2 b1 b2 n1 n2 + | _ -> is_valid_ldr64 rd1 rd2 b1 b2 n1 n2 + +let is_valid_str b1 b2 n1 n2 stype = + match stype with + | Str P32 | Str P32f -> is_valid_str32 b1 b2 n1 n2 + | _ -> is_valid_str64 b1 b2 n1 n2 + +(* Try to find the index of the first previous compatible + * replacement in a given symbolic memory *) +let rec search_compat_rep r2 b2 n2 insta pot_rep stype = + match pot_rep with + | [] -> None + | h0 :: t0 -> ( + match insta.(h0) with + | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) -> + if is_valid_ldr rd1 r2 b1 b2 n1 n2 stype then + Some (h0, chunk_load ld1, rd1, b1, n1) + else search_compat_rep r2 b2 n2 insta t0 stype + | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) -> + if is_valid_str b1 b2 n1 n2 stype then + Some (h0, chunk_store st1, rs1, b1, n1) + else search_compat_rep r2 b2 n2 insta t0 stype + | _ -> failwith "search_compat_rep: Found an inconsistent inst in pot_rep" + ) + +(* Try to find the index of the first previous compatible + * replacement in a given symbolic memory (when iterating in the reversed list) *) +let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype = + match pot_rep with + | [] -> None + | h0 :: t0 -> ( + match insta.(h0) with + | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) -> + if is_valid_ldr r2 rd1 b2 b1 n2 n1 stype then + Some (h0, chunk_load ld1, rd1, b1, n1) + else search_compat_rep_inv r2 b2 n2 insta t0 stype + | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) -> + if is_valid_str b2 b1 n2 n1 stype then + Some (h0, chunk_store st1, rs1, b1, n1) + else search_compat_rep_inv r2 b2 n2 insta t0 stype + | _ -> + failwith + "search_compat_rep_ldst_inv: Found an inconsistent inst in pot_rep") + +let init_symb_mem () = + Hashtbl.clear symb_mem; + Hashtbl.add symb_mem (Ldr P32) (ref []); + Hashtbl.add symb_mem (Ldr P64) (ref []); + Hashtbl.add symb_mem (Ldr P32f) (ref []); + Hashtbl.add symb_mem (Ldr P64f) (ref []); + Hashtbl.add symb_mem (Str P32) (ref []); + Hashtbl.add symb_mem (Str P64) (ref []); + Hashtbl.add symb_mem (Str P32f) (ref []); + Hashtbl.add symb_mem (Str P64f) (ref []) + +let reset_str_symb_mem () = + Hashtbl.replace symb_mem (Str P32) (ref []); + Hashtbl.replace symb_mem (Str P64) (ref []); + Hashtbl.replace symb_mem (Str P32f) (ref []); + Hashtbl.replace symb_mem (Str P64f) (ref []) + +(* Main peephole function in backward style *) +let pair_rep_inv insta = + (* Each list below is a symbolic mem representation + * for one type of inst. Lists contains integers which + * are the indices of insts in the main array "insta". *) + init_symb_mem (); + for i = Array.length insta - 1 downto 0 do + let h0 = insta.(i) in + (* Here we need to update every symbolic memory according to the matched inst *) + update_pot_rep_basic h0 insta (Ldr P32) true; + update_pot_rep_basic h0 insta (Ldr P64) true; + update_pot_rep_basic h0 insta (Ldr P32f) true; + update_pot_rep_basic h0 insta (Ldr P64f) true; + update_pot_rep_basic h0 insta (Str P32) true; + update_pot_rep_basic h0 insta (Str P64) true; + update_pot_rep_basic h0 insta (Str P32f) true; + update_pot_rep_basic h0 insta (Str P64f) true; + match h0 with + (* Non-consecutive ldr *) + | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))) -> + if is_compat_load ldi then ( + (* Search a previous compatible load *) + let ld_t = get_load_pht ldi in + let pot_rep = Hashtbl.find symb_mem ld_t in + (match search_compat_rep_inv rd1 b1 n1 insta !pot_rep ld_t with + (* If we can't find a candidate, add the current load as a potential future one *) + | None -> pot_rep := i :: !pot_rep + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if min_is_rev n n1 then ( + if debug then + eprintf "LDP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n)))) + else ( + if debug then + eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))); + Hashtbl.replace symb_mem ld_t pot_rep) + (* Non-consecutive str *) + | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))) -> + if is_compat_store sti then ( + (* Search a previous compatible store *) + let st_t = get_store_pht sti in + let pot_rep = Hashtbl.find symb_mem st_t in + (match search_compat_rep_inv rd1 b1 n1 insta !pot_rep st_t with + (* If we can't find a candidate, clean and add the current store as a potential future one *) + | None -> + reset_str_symb_mem (); + pot_rep := [ i ] + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if debug then + eprintf "STP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t; + insta.(i) <- + PStore + (Pstp + (trans_sti sti, rd1, r, chunk_store sti, c, ADimm (b, n1)))); + Hashtbl.replace symb_mem st_t pot_rep + (* Any other inst *)) + else reset_str_symb_mem () + | i -> ( + (* Clear list of candidates if there is a non supported store *) + match i with PStore _ -> reset_str_symb_mem () | _ -> ()) + done + +(* Main peephole function in forward style *) +let pair_rep insta = + (* Each list below is a symbolic mem representation + * for one type of inst. Lists contains integers which + * are the indices of insts in the main array "insta". *) + init_symb_mem (); + for i = 0 to Array.length insta - 2 do + let h0 = insta.(i) in + let h1 = insta.(i + 1) in + (* Here we need to update every symbolic memory according to the matched inst *) + update_pot_rep_basic h0 insta (Ldr P32) false; + update_pot_rep_basic h0 insta (Ldr P64) false; + update_pot_rep_basic h0 insta (Ldr P32f) false; + update_pot_rep_basic h0 insta (Ldr P64f) false; + update_pot_rep_basic h0 insta (Str P32) false; + update_pot_rep_basic h0 insta (Str P64) false; + update_pot_rep_basic h0 insta (Str P32f) false; + update_pot_rep_basic h0 insta (Str P64f) false; + match (h0, h1) with + (* Consecutive ldr *) + | ( PLoad (PLd_rd_a (ldi1, rd1, ADimm (b1, n1))), + PLoad (PLd_rd_a (ldi2, rd2, ADimm (b2, n2))) ) -> + if are_compat_load ldi1 ldi2 then + let ld_t = get_load_pht ldi1 in + if is_valid_ldr rd1 rd2 b1 b2 n1 n2 ld_t then ( + if min_is_rev n1 n2 then ( + if debug then + eprintf "LDP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + ( trans_ldi ldi1, + rd1, + rd2, + chunk_load ldi1, + chunk_load ldi2, + ADimm (b1, n1) ))) + else ( + if debug then + eprintf "LDP_CONSEC_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + ( trans_ldi ldi1, + rd2, + rd1, + chunk_load ldi2, + chunk_load ldi1, + ADimm (b1, n2) ))); + insta.(i + 1) <- Pnop) + (* Non-consecutive ldr *) + | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ -> + if is_compat_load ldi then ( + (* Search a previous compatible load *) + let ld_t = get_load_pht ldi in + let pot_rep = Hashtbl.find symb_mem ld_t in + (match search_compat_rep rd1 b1 n1 insta !pot_rep ld_t with + (* If we can't find a candidate, add the current load as a potential future one *) + | None -> pot_rep := i :: !pot_rep + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if min_is_rev n n1 then ( + if debug then + eprintf "LDP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n)))) + else ( + if debug then + eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))); + Hashtbl.replace symb_mem ld_t pot_rep) + (* Consecutive str *) + | ( PStore (PSt_rs_a (sti1, rd1, ADimm (b1, n1))), + PStore (PSt_rs_a (sti2, rd2, ADimm (b2, n2))) ) -> + (* Regardless of whether we can perform the peephole or not, + * we have to clean the potential candidates for stp now as we are + * looking at two new store instructions. *) + reset_str_symb_mem (); + if are_compat_store sti1 sti2 then + let st_t = get_store_pht sti1 in + if is_valid_str b1 b2 n1 n2 st_t then ( + if debug then + eprintf "STP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty st_t; + insta.(i) <- + PStore + (Pstp + ( trans_sti sti1, + rd1, + rd2, + chunk_store sti1, + chunk_store sti2, + ADimm (b1, n1) )); + insta.(i + 1) <- Pnop) + (* Non-consecutive str *) + | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ -> + if is_compat_store sti then ( + (* Search a previous compatible store *) + let st_t = get_store_pht sti in + let pot_rep = Hashtbl.find symb_mem st_t in + (match search_compat_rep rd1 b1 n1 insta !pot_rep st_t with + (* If we can't find a candidate, clean and add the current store as a potential future one *) + | None -> + reset_str_symb_mem (); + pot_rep := [ i ] + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if debug then + eprintf "STP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t; + insta.(i) <- + PStore + (Pstp (trans_sti sti, r, rd1, c, chunk_store sti, ADimm (b, n)))); + Hashtbl.replace symb_mem st_t pot_rep) + else reset_str_symb_mem () + (* Any other inst *) + | i, _ -> ( + (* Clear list of candidates if there is a non supported store *) + match i with PStore _ -> reset_str_symb_mem () | _ -> ()) + done + +(* Calling peephole if flag is set *) +let optimize_bdy (bdy : basic list) : basic list = + if !Clflags.option_fcoalesce_mem then ( + let insta = Array.of_list bdy in + pair_rep insta; + pair_rep_inv insta; + Array.to_list insta) + else bdy + +(* Called peephole function from Coq *) +let peephole_opt bdy = + Timing.time_coq + [ + 'P'; 'e'; 'e'; 'p'; 'h'; 'o'; 'l'; 'e'; ' '; 'o'; 'r'; 'a'; 'c'; 'l'; 'e'; + ] + optimize_bdy bdy |