aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64/PeepholeOracle.ml
diff options
context:
space:
mode:
Diffstat (limited to 'aarch64/PeepholeOracle.ml')
-rw-r--r--aarch64/PeepholeOracle.ml624
1 files changed, 624 insertions, 0 deletions
diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml
new file mode 100644
index 00000000..1d890f2c
--- /dev/null
+++ b/aarch64/PeepholeOracle.ml
@@ -0,0 +1,624 @@
+(* *************************************************************)
+(* *)
+(* The Compcert verified compiler *)
+(* *)
+(* Léo Gourdin UGA, VERIMAG *)
+(* *)
+(* Copyright VERIMAG. All rights reserved. *)
+(* This file is distributed under the terms of the INRIA *)
+(* Non-Commercial License Agreement. *)
+(* *)
+(* *************************************************************)
+
+open Camlcoq
+open Asmblock
+open Asm
+open Int64
+open Printf
+
+(* If true, the oracle will print a msg for each applied peephole *)
+let debug = false
+
+(* Functions to verify the immediate offset range for ldp/stp *)
+let is_valid_immofs_32 z =
+ if z <= 252 && z >= -256 && z mod 4 = 0 then true else false
+
+let is_valid_immofs_64 z =
+ if z <= 504 && z >= -512 && z mod 8 = 0 then true else false
+
+(* Functions to check if a ldp/stp replacement is valid according to args *)
+let is_valid_ldr32 rd1 rd2 b1 b2 n1 n2 =
+ let z1 = to_int (camlint64_of_coqint n1) in
+ let z2 = to_int (camlint64_of_coqint n2) in
+ if
+ (not (dreg_eq rd1 rd2))
+ && iregsp_eq b1 b2
+ && (not (dreg_eq rd1 (IR b2)))
+ && (z2 = z1 + 4 || z2 = z1 - 4)
+ && is_valid_immofs_32 z1
+ then true
+ else false
+
+let is_valid_ldr64 rd1 rd2 b1 b2 n1 n2 =
+ let z1 = to_int (camlint64_of_coqint n1) in
+ let z2 = to_int (camlint64_of_coqint n2) in
+ if
+ (not (dreg_eq rd1 rd2))
+ && iregsp_eq b1 b2
+ && (not (dreg_eq rd1 (IR b2)))
+ && (z2 = z1 + 8 || z2 = z1 - 8)
+ && is_valid_immofs_64 z1
+ then true
+ else false
+
+let is_valid_str32 b1 b2 n1 n2 =
+ let z1 = to_int (camlint64_of_coqint n1) in
+ let z2 = to_int (camlint64_of_coqint n2) in
+ if iregsp_eq b1 b2 && z2 = z1 + 4 && is_valid_immofs_32 z1 then true
+ else false
+
+let is_valid_str64 b1 b2 n1 n2 =
+ let z1 = to_int (camlint64_of_coqint n1) in
+ let z2 = to_int (camlint64_of_coqint n2) in
+ if iregsp_eq b1 b2 && z2 = z1 + 8 && is_valid_immofs_64 z1 then true
+ else false
+
+let dreg_of_ireg r = IR (RR1 r)
+
+let dreg_of_freg r = FR r
+
+(* Return true if an intermediate
+ * affectation eliminates the potential
+ * candidate *)
+let verify_load_affect reg rd b rev =
+ let b = IR b in
+ if not rev then dreg_eq reg b else dreg_eq reg b || dreg_eq reg rd
+
+(* Return true if an intermediate
+ * read eliminates the potential
+ * candidate *)
+let verify_load_read reg rd b rev = dreg_eq reg rd
+
+(* Return true if an intermediate
+ * affectation eliminates the potential
+ * candidate *)
+let verify_store_affect reg rs b rev =
+ let b = IR b in
+ dreg_eq reg b || dreg_eq reg rs
+
+type ph_type = P32 | P32f | P64 | P64f
+
+type inst_type = Ldr of ph_type | Str of ph_type
+
+let ph_ty_to_string = function
+ | Ldr P32 -> "ldr32"
+ | Ldr P32f -> "ldr32f"
+ | Ldr P64 -> "ldr64"
+ | Ldr P64f -> "ldr64f"
+ | Str P32 -> "str32"
+ | Str P32f -> "str32f"
+ | Str P64 -> "str64"
+ | Str P64f -> "str64f"
+
+let print_ph_ty chan v = output_string chan (ph_ty_to_string v)
+
+let symb_mem = Hashtbl.create 9
+
+(* Affect a symbolic memory list of potential replacements
+ * for a given write in reg *)
+let rec affect_symb_mem reg insta pot_rep stype rev =
+ match pot_rep with
+ | [] -> []
+ | h0 :: t0 -> (
+ match (insta.(h0), stype) with
+ | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ ->
+ if verify_load_affect reg rd b rev then
+ affect_symb_mem reg insta t0 stype rev
+ else h0 :: affect_symb_mem reg insta t0 stype rev
+ | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ ->
+ if verify_store_affect reg rs b rev then
+ affect_symb_mem reg insta t0 stype rev
+ else h0 :: affect_symb_mem reg insta t0 stype rev
+ | _, _ ->
+ failwith "affect_symb_mem: Found an inconsistent inst in pot_rep")
+
+(* Affect a symbolic memory list of potential replacements
+ * for a given read in reg *)
+let rec read_symb_mem reg insta pot_rep stype rev =
+ match pot_rep with
+ | [] -> []
+ | h0 :: t0 -> (
+ match (insta.(h0), stype) with
+ | PLoad (PLd_rd_a (_, rd, ADimm (b, n))), Ldr _ ->
+ if verify_load_read reg rd b rev then
+ read_symb_mem reg insta t0 stype rev
+ else h0 :: read_symb_mem reg insta t0 stype rev
+ | PStore (PSt_rs_a (_, rs, ADimm (b, n))), Str _ ->
+ h0 :: read_symb_mem reg insta t0 stype rev
+ | _, _ -> failwith "read_symb_mem: Found an inconsistent inst in pot_rep")
+
+(* Update a symbolic memory list of potential replacements
+ * for any addressing *)
+let update_pot_rep_addressing a insta pot_rep stype rev =
+ match a with
+ | ADimm (base, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
+ | ADreg (base, r) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+ | ADlsl (base, r, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+ | ADsxt (base, r, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+ | ADuxt (base, r, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
+ | ADadr (base, _, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
+ | ADpostincr (base, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
+
+(* Update a symbolic memory list of potential replacements
+ * for any basic instruction *)
+let update_pot_rep_basic inst insta stype rev =
+ let pot_rep = Hashtbl.find symb_mem stype in
+ (match inst with
+ | PArith i -> (
+ match i with
+ | PArithP (_, rd) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev
+ | PArithPP (_, rd, rs) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs insta !pot_rep stype rev
+ | PArithPPP (_, rd, rs1, rs2) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
+ | PArithRR0R (_, rd, rs1, rs2) ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ (match rs1 with
+ | RR0 rs1 ->
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ | _ -> ());
+ pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev
+ | PArithRR0 (_, rd, rs) -> (
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ match rs with
+ | RR0 rs1 ->
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ | _ -> ())
+ | PArithARRRR0 (_, rd, rs1, rs2, rs3) -> (
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev;
+ match rs3 with
+ | RR0 rs1 ->
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ | _ -> ())
+ | PArithComparisonPP (_, rs1, rs2) ->
+ pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
+ | PArithComparisonR0R (_, rs1, rs2) ->
+ (match rs1 with
+ | RR0 rs1 ->
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ | _ -> ());
+ pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev
+ | PArithComparisonP (_, rs1) ->
+ pot_rep := read_symb_mem rs1 insta !pot_rep stype rev
+ | Pcset (rd, _) ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
+ | Pfmovi (_, rd, rs) -> (
+ pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev;
+ match rs with
+ | RR0 rs ->
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
+ | _ -> ())
+ | Pcsel (rd, rs1, rs2, _) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
+ pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
+ | Pfnmul (_, rd, rs1, rs2) ->
+ pot_rep := affect_symb_mem (dreg_of_freg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_freg rs1) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_freg rs2) insta !pot_rep stype rev)
+ | PLoad i -> (
+ (* Here, we consider a different behavior for load and store potential candidates:
+ * a load does not obviously cancel the ldp candidates, but it does for any stp candidate. *)
+ match stype with
+ | Ldr _ -> (
+ match i with
+ | PLd_rd_a (_, rd, a) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
+ update_pot_rep_addressing a insta pot_rep stype rev
+ | Pldp (_, rd1, rd2, _, _, a) ->
+ pot_rep := affect_symb_mem rd1 insta !pot_rep stype rev;
+ pot_rep := affect_symb_mem rd2 insta !pot_rep stype rev;
+ update_pot_rep_addressing a insta pot_rep stype rev)
+ | _ -> pot_rep := [])
+ | PStore _ -> (
+ (* Here, we consider that a store cancel all ldp candidates, but it is far more complicated for stp ones :
+ * if we cancel stp candidates here, we would prevent ourselves to apply the non-consec store peephole.
+ * To solve this issue, the store candidates cleaning is managed directly in the peephole function below. *)
+ match stype with Ldr _ -> pot_rep := [] | _ -> ())
+ | Pallocframe (_, _) -> pot_rep := []
+ | Pfreeframe (_, _) -> pot_rep := []
+ | Ploadsymbol (rd, _) ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
+ | Pcvtsw2x (rd, rs) ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
+ | Pcvtuw2x (rd, rs) ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
+ | Pcvtx2w rd ->
+ pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
+ pot_rep := read_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev
+ | Pnop -> ());
+ Hashtbl.replace symb_mem stype pot_rep
+
+(* This is useful to manage the case were the immofs
+ * of the first ldr/str is greater than the second one *)
+let min_is_rev n1 n2 =
+ let z1 = to_int (camlint64_of_coqint n1) in
+ let z2 = to_int (camlint64_of_coqint n2) in
+ if z1 < z2 then true else false
+
+(* Below functions were added to merge pattern matching cases in peephole,
+ * thanks to this, we can make the chunk difference (int/any) compatible. *)
+let trans_ldi (ldi : load_rd_a) : load_rd1_rd2_a =
+ match ldi with
+ | Pldrw | Pldrw_a -> Pldpw
+ | Pldrx | Pldrx_a -> Pldpx
+ | Pldrs -> Pldps
+ | Pldrd | Pldrd_a -> Pldpd
+ | _ -> failwith "trans_ldi: Found a non compatible load to translate"
+
+let trans_sti (sti : store_rs_a) : store_rs1_rs2_a =
+ match sti with
+ | Pstrw | Pstrw_a -> Pstpw
+ | Pstrx | Pstrx_a -> Pstpx
+ | Pstrs -> Pstps
+ | Pstrd | Pstrd_a -> Pstpd
+ | _ -> failwith "trans_sti: Found a non compatible store to translate"
+
+let is_compat_load (ldi : load_rd_a) =
+ match ldi with
+ | Pldrw | Pldrw_a | Pldrx | Pldrx_a | Pldrs | Pldrd | Pldrd_a -> true
+ | _ -> false
+
+let are_compat_load (ldi1 : load_rd_a) (ldi2 : load_rd_a) =
+ match ldi1 with
+ | Pldrw | Pldrw_a -> ( match ldi2 with Pldrw | Pldrw_a -> true | _ -> false)
+ | Pldrx | Pldrx_a -> ( match ldi2 with Pldrx | Pldrx_a -> true | _ -> false)
+ | Pldrs -> ( match ldi2 with Pldrs -> true | _ -> false)
+ | Pldrd | Pldrd_a -> ( match ldi2 with Pldrd | Pldrd_a -> true | _ -> false)
+ | _ -> false
+
+let is_compat_store (sti : store_rs_a) =
+ match sti with
+ | Pstrw | Pstrw_a | Pstrx | Pstrx_a | Pstrs | Pstrd | Pstrd_a -> true
+ | _ -> false
+
+let are_compat_store (sti1 : store_rs_a) (sti2 : store_rs_a) =
+ match sti1 with
+ | Pstrw | Pstrw_a -> ( match sti2 with Pstrw | Pstrw_a -> true | _ -> false)
+ | Pstrx | Pstrx_a -> ( match sti2 with Pstrx | Pstrx_a -> true | _ -> false)
+ | Pstrs -> ( match sti2 with Pstrs -> true | _ -> false)
+ | Pstrd | Pstrd_a -> ( match sti2 with Pstrd | Pstrd_a -> true | _ -> false)
+ | _ -> false
+
+let get_load_pht (ldi : load_rd_a) =
+ match ldi with
+ | Pldrw | Pldrw_a -> Ldr P32
+ | Pldrs -> Ldr P32f
+ | Pldrx | Pldrx_a -> Ldr P64
+ | Pldrd | Pldrd_a -> Ldr P64f
+ | _ -> failwith "get_load_string: Found a non compatible load to translate"
+
+let get_store_pht (sti : store_rs_a) =
+ match sti with
+ | Pstrw | Pstrw_a -> Str P32
+ | Pstrs -> Str P32f
+ | Pstrx | Pstrx_a -> Str P64
+ | Pstrd | Pstrd_a -> Str P64f
+ | _ -> failwith "get_store_string: Found a non compatible store to translate"
+
+let is_valid_ldr rd1 rd2 b1 b2 n1 n2 stype =
+ match stype with
+ | Ldr P32 | Ldr P32f -> is_valid_ldr32 rd1 rd2 b1 b2 n1 n2
+ | _ -> is_valid_ldr64 rd1 rd2 b1 b2 n1 n2
+
+let is_valid_str b1 b2 n1 n2 stype =
+ match stype with
+ | Str P32 | Str P32f -> is_valid_str32 b1 b2 n1 n2
+ | _ -> is_valid_str64 b1 b2 n1 n2
+
+(* Try to find the index of the first previous compatible
+ * replacement in a given symbolic memory *)
+let rec search_compat_rep r2 b2 n2 insta pot_rep stype =
+ match pot_rep with
+ | [] -> None
+ | h0 :: t0 -> (
+ match insta.(h0) with
+ | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) ->
+ if is_valid_ldr rd1 r2 b1 b2 n1 n2 stype then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
+ else search_compat_rep r2 b2 n2 insta t0 stype
+ | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) ->
+ if is_valid_str b1 b2 n1 n2 stype then
+ Some (h0, chunk_store st1, rs1, b1, n1)
+ else search_compat_rep r2 b2 n2 insta t0 stype
+ | _ -> failwith "search_compat_rep: Found an inconsistent inst in pot_rep"
+ )
+
+(* Try to find the index of the first previous compatible
+ * replacement in a given symbolic memory (when iterating in the reversed list) *)
+let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype =
+ match pot_rep with
+ | [] -> None
+ | h0 :: t0 -> (
+ match insta.(h0) with
+ | PLoad (PLd_rd_a (ld1, rd1, ADimm (b1, n1))) ->
+ if is_valid_ldr r2 rd1 b2 b1 n2 n1 stype then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
+ else search_compat_rep_inv r2 b2 n2 insta t0 stype
+ | PStore (PSt_rs_a (st1, rs1, ADimm (b1, n1))) ->
+ if is_valid_str b2 b1 n2 n1 stype then
+ Some (h0, chunk_store st1, rs1, b1, n1)
+ else search_compat_rep_inv r2 b2 n2 insta t0 stype
+ | _ ->
+ failwith
+ "search_compat_rep_ldst_inv: Found an inconsistent inst in pot_rep")
+
+let init_symb_mem () =
+ Hashtbl.clear symb_mem;
+ Hashtbl.add symb_mem (Ldr P32) (ref []);
+ Hashtbl.add symb_mem (Ldr P64) (ref []);
+ Hashtbl.add symb_mem (Ldr P32f) (ref []);
+ Hashtbl.add symb_mem (Ldr P64f) (ref []);
+ Hashtbl.add symb_mem (Str P32) (ref []);
+ Hashtbl.add symb_mem (Str P64) (ref []);
+ Hashtbl.add symb_mem (Str P32f) (ref []);
+ Hashtbl.add symb_mem (Str P64f) (ref [])
+
+let reset_str_symb_mem () =
+ Hashtbl.replace symb_mem (Str P32) (ref []);
+ Hashtbl.replace symb_mem (Str P64) (ref []);
+ Hashtbl.replace symb_mem (Str P32f) (ref []);
+ Hashtbl.replace symb_mem (Str P64f) (ref [])
+
+(* Main peephole function in backward style *)
+let pair_rep_inv insta =
+ (* Each list below is a symbolic mem representation
+ * for one type of inst. Lists contains integers which
+ * are the indices of insts in the main array "insta". *)
+ init_symb_mem ();
+ for i = Array.length insta - 1 downto 0 do
+ let h0 = insta.(i) in
+ (* Here we need to update every symbolic memory according to the matched inst *)
+ update_pot_rep_basic h0 insta (Ldr P32) true;
+ update_pot_rep_basic h0 insta (Ldr P64) true;
+ update_pot_rep_basic h0 insta (Ldr P32f) true;
+ update_pot_rep_basic h0 insta (Ldr P64f) true;
+ update_pot_rep_basic h0 insta (Str P32) true;
+ update_pot_rep_basic h0 insta (Str P64) true;
+ update_pot_rep_basic h0 insta (Str P32f) true;
+ update_pot_rep_basic h0 insta (Str P64f) true;
+ match h0 with
+ (* Non-consecutive ldr *)
+ | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))) ->
+ if is_compat_load ldi then (
+ (* Search a previous compatible load *)
+ let ld_t = get_load_pht ldi in
+ let pot_rep = Hashtbl.find symb_mem ld_t in
+ (match search_compat_rep_inv rd1 b1 n1 insta !pot_rep ld_t with
+ (* If we can't find a candidate, add the current load as a potential future one *)
+ | None -> pot_rep := i :: !pot_rep
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if min_is_rev n n1 then (
+ if debug then
+ eprintf "LDP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
+ else (
+ if debug then
+ eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1)))));
+ Hashtbl.replace symb_mem ld_t pot_rep)
+ (* Non-consecutive str *)
+ | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))) ->
+ if is_compat_store sti then (
+ (* Search a previous compatible store *)
+ let st_t = get_store_pht sti in
+ let pot_rep = Hashtbl.find symb_mem st_t in
+ (match search_compat_rep_inv rd1 b1 n1 insta !pot_rep st_t with
+ (* If we can't find a candidate, clean and add the current store as a potential future one *)
+ | None ->
+ reset_str_symb_mem ();
+ pot_rep := [ i ]
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if debug then
+ eprintf "STP_BACK_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
+ insta.(i) <-
+ PStore
+ (Pstp
+ (trans_sti sti, rd1, r, chunk_store sti, c, ADimm (b, n1))));
+ Hashtbl.replace symb_mem st_t pot_rep
+ (* Any other inst *))
+ else reset_str_symb_mem ()
+ | i -> (
+ (* Clear list of candidates if there is a non supported store *)
+ match i with PStore _ -> reset_str_symb_mem () | _ -> ())
+ done
+
+(* Main peephole function in forward style *)
+let pair_rep insta =
+ (* Each list below is a symbolic mem representation
+ * for one type of inst. Lists contains integers which
+ * are the indices of insts in the main array "insta". *)
+ init_symb_mem ();
+ for i = 0 to Array.length insta - 2 do
+ let h0 = insta.(i) in
+ let h1 = insta.(i + 1) in
+ (* Here we need to update every symbolic memory according to the matched inst *)
+ update_pot_rep_basic h0 insta (Ldr P32) false;
+ update_pot_rep_basic h0 insta (Ldr P64) false;
+ update_pot_rep_basic h0 insta (Ldr P32f) false;
+ update_pot_rep_basic h0 insta (Ldr P64f) false;
+ update_pot_rep_basic h0 insta (Str P32) false;
+ update_pot_rep_basic h0 insta (Str P64) false;
+ update_pot_rep_basic h0 insta (Str P32f) false;
+ update_pot_rep_basic h0 insta (Str P64f) false;
+ match (h0, h1) with
+ (* Consecutive ldr *)
+ | ( PLoad (PLd_rd_a (ldi1, rd1, ADimm (b1, n1))),
+ PLoad (PLd_rd_a (ldi2, rd2, ADimm (b2, n2))) ) ->
+ if are_compat_load ldi1 ldi2 then
+ let ld_t = get_load_pht ldi1 in
+ if is_valid_ldr rd1 rd2 b1 b2 n1 n2 ld_t then (
+ if min_is_rev n1 n2 then (
+ if debug then
+ eprintf "LDP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ ( trans_ldi ldi1,
+ rd1,
+ rd2,
+ chunk_load ldi1,
+ chunk_load ldi2,
+ ADimm (b1, n1) )))
+ else (
+ if debug then
+ eprintf "LDP_CONSEC_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ ( trans_ldi ldi1,
+ rd2,
+ rd1,
+ chunk_load ldi2,
+ chunk_load ldi1,
+ ADimm (b1, n2) )));
+ insta.(i + 1) <- Pnop)
+ (* Non-consecutive ldr *)
+ | PLoad (PLd_rd_a (ldi, rd1, ADimm (b1, n1))), _ ->
+ if is_compat_load ldi then (
+ (* Search a previous compatible load *)
+ let ld_t = get_load_pht ldi in
+ let pot_rep = Hashtbl.find symb_mem ld_t in
+ (match search_compat_rep rd1 b1 n1 insta !pot_rep ld_t with
+ (* If we can't find a candidate, add the current load as a potential future one *)
+ | None -> pot_rep := i :: !pot_rep
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if min_is_rev n n1 then (
+ if debug then
+ eprintf "LDP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
+ else (
+ if debug then
+ eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC_%a\n" print_ph_ty ld_t;
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1)))));
+ Hashtbl.replace symb_mem ld_t pot_rep)
+ (* Consecutive str *)
+ | ( PStore (PSt_rs_a (sti1, rd1, ADimm (b1, n1))),
+ PStore (PSt_rs_a (sti2, rd2, ADimm (b2, n2))) ) ->
+ (* Regardless of whether we can perform the peephole or not,
+ * we have to clean the potential candidates for stp now as we are
+ * looking at two new store instructions. *)
+ reset_str_symb_mem ();
+ if are_compat_store sti1 sti2 then
+ let st_t = get_store_pht sti1 in
+ if is_valid_str b1 b2 n1 n2 st_t then (
+ if debug then
+ eprintf "STP_CONSEC_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
+ insta.(i) <-
+ PStore
+ (Pstp
+ ( trans_sti sti1,
+ rd1,
+ rd2,
+ chunk_store sti1,
+ chunk_store sti2,
+ ADimm (b1, n1) ));
+ insta.(i + 1) <- Pnop)
+ (* Non-consecutive str *)
+ | PStore (PSt_rs_a (sti, rd1, ADimm (b1, n1))), _ ->
+ if is_compat_store sti then (
+ (* Search a previous compatible store *)
+ let st_t = get_store_pht sti in
+ let pot_rep = Hashtbl.find symb_mem st_t in
+ (match search_compat_rep rd1 b1 n1 insta !pot_rep st_t with
+ (* If we can't find a candidate, clean and add the current store as a potential future one *)
+ | None ->
+ reset_str_symb_mem ();
+ pot_rep := [ i ]
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if debug then
+ eprintf "STP_FORW_SPACED_PEEP_IMM_INC_%a\n" print_ph_ty st_t;
+ insta.(i) <-
+ PStore
+ (Pstp (trans_sti sti, r, rd1, c, chunk_store sti, ADimm (b, n))));
+ Hashtbl.replace symb_mem st_t pot_rep)
+ else reset_str_symb_mem ()
+ (* Any other inst *)
+ | i, _ -> (
+ (* Clear list of candidates if there is a non supported store *)
+ match i with PStore _ -> reset_str_symb_mem () | _ -> ())
+ done
+
+(* Calling peephole if flag is set *)
+let optimize_bdy (bdy : basic list) : basic list =
+ if !Clflags.option_fcoalesce_mem then (
+ let insta = Array.of_list bdy in
+ pair_rep insta;
+ pair_rep_inv insta;
+ Array.to_list insta)
+ else bdy
+
+(* Called peephole function from Coq *)
+let peephole_opt bdy =
+ Timing.time_coq
+ [
+ 'P'; 'e'; 'e'; 'p'; 'h'; 'o'; 'l'; 'e'; ' '; 'o'; 'r'; 'a'; 'c'; 'l'; 'e';
+ ]
+ optimize_bdy bdy