From 0c5bf1fd0ec307a3b51d59d19e99c997b66c67f8 Mon Sep 17 00:00:00 2001 From: Léo Gourdin Date: Tue, 8 Dec 2020 17:10:39 +0100 Subject: Ocaml peephole oracle and array datastruct instead of lists --- aarch64/Peephole.v | 177 --------------------------------- aarch64/PeepholeOracle.ml | 227 +++++++++++++++++++++++++++++++++++++++++++ aarch64/PostpassScheduling.v | 23 ++++- 3 files changed, 246 insertions(+), 181 deletions(-) delete mode 100644 aarch64/Peephole.v create mode 100644 aarch64/PeepholeOracle.ml (limited to 'aarch64') diff --git a/aarch64/Peephole.v b/aarch64/Peephole.v deleted file mode 100644 index 54670708..00000000 --- a/aarch64/Peephole.v +++ /dev/null @@ -1,177 +0,0 @@ -(* *************************************************************) -(* *) -(* The Compcert verified compiler *) -(* *) -(* Léo Gourdin UGA, VERIMAG *) -(* *) -(* Copyright VERIMAG. All rights reserved. *) -(* This file is distributed under the terms of the INRIA *) -(* Non-Commercial License Agreement. *) -(* *) -(* *************************************************************) - -Require Import Coqlib. -Require Import Asmblock. -Require Import Values. -Require Import Integers. -Require Import AST. -Require Compopts. - -Definition is_valid_immofs_32 (z: Z) : bool := - if (zle z 252) && (zle (-256) z) && ((Z.modulo z 4) =? 0) then true else false. - -Definition is_valid_immofs_64 (z: Z) : bool := - if (zle z 504) && (zle (-512) z) && ((Z.modulo z 8) =? 0) then true else false. - -Definition is_valid_ldrw (rd1 rd2: ireg) (b1 b2: iregsp) (n1 n2: int64) : bool := - let z1 := Int64.signed n1 in - let z2 := Int64.signed n2 in - if (negb (ireg_eq rd1 rd2)) && (iregsp_eq b1 b2) && (negb (iregsp_eq (RR1 rd1) b1)) - && (z2 =? z1 + 4) && (is_valid_immofs_32 z1) then true else false. - -Definition is_valid_ldrx (rd1 rd2: ireg) (b1 b2: iregsp) (n1 n2: int64) : bool := - let z1 := Int64.signed n1 in - let z2 := Int64.signed n2 in - if (negb (ireg_eq rd1 rd2)) && (iregsp_eq b1 b2) && (negb (iregsp_eq (RR1 rd1) b1)) - && (z2 =? z1 + 8) && (is_valid_immofs_64 z1) then true else false. - -Definition is_valid_strw (b1 b2: iregsp) (n1 n2: int64) : bool := - let z1 := Int64.signed n1 in - let z2 := Int64.signed n2 in - if (iregsp_eq b1 b2) && (z2 =? z1 + 4) && (is_valid_immofs_32 z1) - then true else false. - -Definition is_valid_strx (b1 b2: iregsp) (n1 n2: int64) : bool := - let z1 := Int64.signed n1 in - let z2 := Int64.signed n2 in - if (iregsp_eq b1 b2) && (z2 =? z1 + 8) && (is_valid_immofs_64 z1) - then true else false. - -(* Try to find arguments of the next compatible ldrw to replace. *) -Fixpoint get_next_comp_ldpw_args (rd1: ireg) (b1: iregsp) (n1: int64) (t1: list basic) : option (ireg * iregsp * int64) := - match t1 with - | nil => None - | (PLd_rd_a Pldrw (IR (RR1 rd2)) (ADimm b2 n2)) :: t2 => - if (is_valid_ldrw rd1 rd2 b1 b2 n1 n2) then Some (rd2, b2, n2) - else None - | _ :: t2 => get_next_comp_ldpw_args rd1 b1 n1 t2 - end. - -(* Try to delete the next compatible ldrw to replace. *) -Fixpoint del_next_comp_ldpw_args (rd1: ireg) (b1: iregsp) (n1: int64) (t1: list basic) : list basic := - match t1 with - | nil => nil - | (PLd_rd_a Pldrw (IR (RR1 rd2)) (ADimm b2 n2)) :: t2 => - if (is_valid_ldrw rd1 rd2 b1 b2 n1 n2) then t2 - else t1 - | h2 :: t2 => h2 :: del_next_comp_ldpw_args rd1 b1 n1 t2 - end. - -Fixpoint ldpw_follow (rd1: ireg) (b1: iregsp) (n1: int64) (t1: list basic) : option (basic * list basic) := - match (get_next_comp_ldpw_args rd1 b1 n1 t1) with - | None => None - | Some (rd2, b2, n2) => - match t1 with - | nil => None - | h2 :: t2 => - match h2 with - | (PStore _) => None - | (PLd_rd_a Pldrw (IR (RR1 rd')) (ADimm b' n')) => - if (ireg_eq rd' rd2) && (iregsp_eq b' b2) && (Int64.eq n' n2) then - Some ((PLoad (Pldp Pldpw rd1 rd2 (ADimm b1 n1))), - (del_next_comp_ldpw_args rd1 b1 n1 t1)) - else ldpw_follow rd1 b1 n1 t2 - | _ => ldpw_follow rd1 b1 n1 t2 - end - end - end. - -Fixpoint pair_rep (insns : list basic) : list basic := - match insns with - | nil => nil - | h0 :: t0 => - match t0 with - | h1 :: t1 => - match h0, h1 with - -(* ########################## LDP *) - - | (PLd_rd_a Pldrw (IR (RR1 rd1)) (ADimm b1 n1)), - (PLd_rd_a Pldrw (IR (RR1 rd2)) (ADimm b2 n2)) => - (* When both load the same dest, and with no reuse of ld1 in ld2. *) - if (ireg_eq rd1 rd2) && (negb (iregsp_eq b2 (RR1 rd1))) then - Pnop :: (pair_rep t0) - (* When two consec mem addr are loading two different dest. *) - else if (is_valid_ldrw rd1 rd2 b1 b2 n1 n2) then - (PLoad (Pldp Pldpw rd1 rd2 (ADimm b1 n1))) :: Pnop :: (pair_rep t1) - (* When nothing can be optimized. *) - else - h0 :: (pair_rep t0) - | (PLd_rd_a Pldrx (IR (RR1 rd1)) (ADimm b1 n1)), - (PLd_rd_a Pldrx (IR (RR1 rd2)) (ADimm b2 n2)) => - if (ireg_eq rd1 rd2) && (negb (iregsp_eq b2 (RR1 rd1))) then - Pnop :: (pair_rep t0) - else if (is_valid_ldrx rd1 rd2 b1 b2 n1 n2) then - (PLoad (Pldp Pldpx rd1 rd2 (ADimm b1 n1))) :: Pnop :: (pair_rep t1) - else - h0 :: (pair_rep t0) - - (* When there are some insts between loads/stores *) - (*| (PLd_rd_a Pldrw (IR (RR1 rd1)) (ADimm b1 n1)), h1 =>*) - (*match ldpw_follow rd1 b1 n1 t1 with*) - (*| None => h0 :: (pair_rep t0)*) - (*| Some (ldpw, l) => ldpw :: Pnop :: (pair_rep l)*) - (*end*) - (*| (PLd_rd_a Pldrx rd1 (ADimm b1 n1)), h1 => ldpx_follow rd1 b1 n1*) - - -(* ########################## STP *) - - | (PSt_rs_a Pstrw (IR (RR1 rs1)) (ADimm b1 n1)), - (PSt_rs_a Pstrw (IR (RR1 rs2)) (ADimm b2 n2)) => - (* When both store at the same addr, same ofs. *) - (*if (iregsp_eq b1 b2) && (z2 =? z1) then - Pnop :: (pair_rep t0) - (* When two consec mem addr are targeted by two store. *) - else*) - - if (is_valid_strw b1 b2 n1 n2) then - (PStore (Pstp Pstpw rs1 rs2 (ADimm b1 n1))) :: Pnop :: (pair_rep t1) - (* When nothing can be optimized. *) - else - h0 :: (pair_rep t0) - | (PSt_rs_a Pstrx (IR (RR1 rs1)) (ADimm b1 n1)), - (PSt_rs_a Pstrx (IR (RR1 rs2)) (ADimm b2 n2)) => - (*if (iregsp_eq b1 b2) && (z2 =? z1) then - Pnop :: (pair_rep t0) - else*) - - if (is_valid_strx b1 b2 n1 n2) then - (PStore (Pstp Pstpx rs1 rs2 (ADimm b1 n1))) :: Pnop :: (pair_rep t1) - else - h0 :: (pair_rep t0) - - - - | _, _ => h0 :: (pair_rep t0) - end - | nil => h0 :: nil - end - end. - -Definition optimize_body (insns : list basic) := - if Compopts.optim_coalesce_mem tt - then (pair_rep insns) - else insns. - -Program Definition optimize_bblock (bb : bblock) := - let optimized := optimize_body (body bb) in - let wf_ok := non_empty_bblockb optimized (exit bb) in - {| header := header bb; - body := if wf_ok then optimized else (body bb); - exit := exit bb |}. -Next Obligation. - destruct (non_empty_bblockb (optimize_body (body bb))) eqn:Rwf. - - rewrite Rwf. cbn. trivial. - - exact (correct bb). -Qed. diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml new file mode 100644 index 00000000..c9a1f8f4 --- /dev/null +++ b/aarch64/PeepholeOracle.ml @@ -0,0 +1,227 @@ +(* *************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Léo Gourdin UGA, VERIMAG *) +(* *) +(* Copyright VERIMAG. All rights reserved. *) +(* This file is distributed under the terms of the INRIA *) +(* Non-Commercial License Agreement. *) +(* *) +(* *************************************************************) + +open Camlcoq +open Asmblock +open Asm +open Int64 +open Printf + +let debug = true + +let is_valid_immofs_32 z = + if z <= 252 && z >= -256 && z mod 4 == 0 then true else false + +let is_valid_immofs_64 z = + if z <= 504 && z >= -512 && z mod 8 == 0 then true else false + +let is_valid_ldrw rd1 rd2 b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if + (not (ireg_eq rd1 rd2)) + && iregsp_eq b1 b2 + && (not (iregsp_eq (RR1 rd1) b1)) + && z2 == z1 + 4 + && is_valid_immofs_32 z1 + then true + else false + +let is_valid_ldrx rd1 rd2 b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if + (not (ireg_eq rd1 rd2)) + && iregsp_eq b1 b2 + && (not (iregsp_eq (RR1 rd1) b1)) + && z2 == z1 + 8 + && is_valid_immofs_64 z1 + then true + else false + +let is_valid_strw b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if iregsp_eq b1 b2 && z2 == z1 + 4 && is_valid_immofs_32 z1 then true + else false + +let is_valid_strx b1 b2 n1 n2 = + let z1 = to_int (camlint64_of_coqint n1) in + let z2 = to_int (camlint64_of_coqint n2) in + if iregsp_eq b1 b2 && z2 == z1 + 8 && is_valid_immofs_64 z1 then true + else false + +let dreg_of_ireg r = IR (RR1 r) + +(*let pot_ldrw_rep = ref []*) + +let rec affect_symb_mem reg pot_rep stype = + match (pot_rep, stype) with + | [], _ -> [] + | ( ((PLoad (PLd_rd_a (Pldrw, IR (RR1 rd), ADimm (b, n))), _) as h0) :: t0, + "ldrw" ) -> + if dreg_eq reg (IR b) then affect_symb_mem reg t0 stype + else h0 :: affect_symb_mem reg t0 stype + | _, _ -> failwith "Found an inconsistent inst in pot_rep" + +let rec read_symb_mem reg pot_rep stype = + match (pot_rep, stype) with + | [], _ -> [] + | ( ((PLoad (PLd_rd_a (Pldrw, IR (RR1 rd), ADimm (b, n))), _) as h0) :: t0, + "ldrw" ) -> + if dreg_eq reg (IR (RR1 rd)) then read_symb_mem reg t0 stype + else h0 :: read_symb_mem reg t0 stype + | _, _ -> failwith "Found an inconsistent inst in pot_rep" + +let update_pot_rep inst pot_rep stype = + match inst with + | PArith i -> ( + match i with + | PArithP (_, rd) -> pot_rep := affect_symb_mem rd !pot_rep stype + | PArithPP (_, rd, rs) -> + pot_rep := affect_symb_mem rd !pot_rep stype; + pot_rep := read_symb_mem rs !pot_rep stype + | PArithPPP (_, rd, rs1, rs2) -> + pot_rep := affect_symb_mem rd !pot_rep stype; + pot_rep := read_symb_mem rs1 !pot_rep stype; + pot_rep := read_symb_mem rs2 !pot_rep stype + | PArithRR0R (_, rd, rs1, rs2) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) !pot_rep stype; + (match rs1 with + | RR0 rs1 -> + pot_rep := read_symb_mem (dreg_of_ireg rs1) !pot_rep stype + | _ -> ()); + pot_rep := read_symb_mem (dreg_of_ireg rs2) !pot_rep stype + | PArithRR0 (_, rd, rs) -> ( + pot_rep := affect_symb_mem (dreg_of_ireg rd) !pot_rep stype; + match rs with + | RR0 rs1 -> + pot_rep := read_symb_mem (dreg_of_ireg rs1) !pot_rep stype + | _ -> ()) + | PArithARRRR0 (_, rd, rs1, rs2, rs3) -> ( + pot_rep := affect_symb_mem (dreg_of_ireg rd) !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg rs1) !pot_rep stype; + pot_rep := read_symb_mem (dreg_of_ireg rs2) !pot_rep stype; + match rs3 with + | RR0 rs1 -> + pot_rep := read_symb_mem (dreg_of_ireg rs1) !pot_rep stype + | _ -> ()) + | PArithComparisonPP (_, rs1, rs2) -> + pot_rep := read_symb_mem rs1 !pot_rep stype; + pot_rep := read_symb_mem rs2 !pot_rep stype + | PArithComparisonR0R (_, rs1, rs2) -> + (match rs1 with + | RR0 rs1 -> + pot_rep := read_symb_mem (dreg_of_ireg rs1) !pot_rep stype + | _ -> ()); + pot_rep := read_symb_mem (dreg_of_ireg rs2) !pot_rep stype + | PArithComparisonP (_, rs1) -> + pot_rep := read_symb_mem rs1 !pot_rep stype + | Pcset (rd, _) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd) !pot_rep stype + | Pfmovi (_, _, rs) -> ( + match rs with + | RR0 rs -> pot_rep := read_symb_mem (dreg_of_ireg rs) !pot_rep stype + | _ -> ()) + | Pcsel (rd, rs1, rs2, _) -> + pot_rep := affect_symb_mem rd !pot_rep stype; + pot_rep := read_symb_mem rs1 !pot_rep stype; + pot_rep := read_symb_mem rs2 !pot_rep stype + | Pfnmul (_, rd, rs1, rs2) -> ()) + | PLoad i -> ( + match i with + | PLd_rd_a (_, rd, _) -> pot_rep := affect_symb_mem rd !pot_rep stype + | Pldp (_, rd1, rd2, _) -> + pot_rep := affect_symb_mem (dreg_of_ireg rd1) !pot_rep stype; + pot_rep := affect_symb_mem (dreg_of_ireg rd2) !pot_rep stype) + | PStore _ -> pot_rep := [] + (* TODO *) + | _ -> () + +(*let update_symb_mem inst = update_pot_rep inst pot_ldrw_rep "ldrw"*) + +let rec search_compat_rep rd b n pot_rep stype = + match (pot_rep, stype) with + | [], _ -> None + | ( ((PLoad (PLd_rd_a (Pldrx, IR (RR1 rd'), ADimm (b', n'))), c) as h0) :: t0, + "ldrw" ) -> + if is_valid_ldrw rd rd' b b' n n' then Some h0 + else search_compat_rep rd b n t0 stype + | _, _ -> None + +let pair_rep insta = + for i = 0 to Array.length insta - 2 do + let h0 = insta.(i) in + let h1 = insta.(i + 1) in + match (h0, h1) with + | ( PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), + PLoad (PLd_rd_a (Pldrw, IR (RR1 rd2), ADimm (b2, n2))) ) -> + (* When both load the same dest, and with no reuse of ld1 in ld2. *) + if ireg_eq rd1 rd2 && not (iregsp_eq b2 (RR1 rd1)) then ( + if debug then eprintf "LDP_WOPT\n"; + insta.(i) <- Pnop + (* When two consec mem addr are loading two different dest. *)) + else if is_valid_ldrw rd1 rd2 b1 b2 n1 n2 then ( + if debug then eprintf "LDP_WPEEP\n"; + insta.(i) <- PLoad (Pldp (Pldpw, rd1, rd2, ADimm (b1, n1))); + insta.(i + 1) <- Pnop) + | ( PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), + PLoad (PLd_rd_a (Pldrx, IR (RR1 rd2), ADimm (b2, n2))) ) -> + if ireg_eq rd1 rd2 && not (iregsp_eq b2 (RR1 rd1)) then ( + if debug then eprintf "LDP_XOPT\n"; + insta.(i) <- Pnop) + else if is_valid_ldrx rd1 rd2 b1 b2 n1 n2 then ( + if debug then eprintf "LDP_XPEEP\n"; + insta.(i) <- PLoad (Pldp (Pldpx, rd1, rd2, ADimm (b1, n1))); + insta.(i + 1) <- Pnop + (* When there are some insts between loads/stores *) + (*| PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), _ ->*) + (*(match search_compat_rep h0 pot_ldrw_rep with*) + (*| None -> (pot_ldrw_rep := (h0, count) :: !pot_ldrw_rep; h0 :: pair_rep insta t0)*) + (*| Some (l, c) -> *)) + | ( PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), + PStore (PSt_rs_a (Pstrw, IR (RR1 rs2), ADimm (b2, n2))) ) -> + (* When both store at the same addr, same ofs. *) + (*if (iregsp_eq b1 b2) && (z2 =? z1) then + Pnop :: (pair_rep insta t0) + (* When two consec mem addr are targeted by two store. *) + else*) + if is_valid_strw b1 b2 n1 n2 then ( + if debug then eprintf "STP_WPEEP\n"; + insta.(i) <- PStore (Pstp (Pstpw, rs1, rs2, ADimm (b1, n1))); + insta.(i + 1) <- Pnop (* When nothing can be optimized. *)) + | ( PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), + PStore (PSt_rs_a (Pstrx, IR (RR1 rs2), ADimm (b2, n2))) ) -> + (*if (iregsp_eq b1 b2) && (z2 =? z1) then + Pnop :: (pair_rep insta t0) + else*) + if is_valid_strx b1 b2 n1 n2 then ( + if debug then eprintf "STP_XPEEP\n"; + insta.(i) <- PStore (Pstp (Pstpx, rs1, rs2, ADimm (b1, n1))); + insta.(i + 1) <- Pnop) + | h0, _ -> () (*update_symb_mem h0*) + done + +let optimize_bdy (bdy : basic list) : basic list = + if !Clflags.option_fcoalesce_mem then + let insta = Array.of_list bdy in + (pair_rep insta; Array.to_list insta) + else bdy + +(** Called peephole function from Coq *) + +let peephole_opt bdy = + Timing.time_coq + [ + 'P'; 'e'; 'e'; 'p'; 'h'; 'o'; 'l'; 'e'; ' '; 'o'; 'r'; 'a'; 'c'; 'l'; 'e'; + ] + optimize_bdy bdy diff --git a/aarch64/PostpassScheduling.v b/aarch64/PostpassScheduling.v index b8c16131..f826632b 100644 --- a/aarch64/PostpassScheduling.v +++ b/aarch64/PostpassScheduling.v @@ -19,7 +19,6 @@ Require Import Coqlib Errors AST Integers. Require Import Asmblock Axioms Memory Globalenvs. Require Import Asmblockdeps Asmblockprops. Require Import IterList. -Require Peephole. Local Open Scope error_monad_scope. @@ -27,8 +26,12 @@ Local Open Scope error_monad_scope. returns a scheduled basic block *) Axiom schedule: bblock -> (list basic) * option control. +Axiom peephole_opt: (list basic) -> list basic. + Extract Constant schedule => "PostpassSchedulingOracle.schedule". +Extract Constant peephole_opt => "PeepholeOracle.peephole_opt". + Section verify_schedule. Variable lk: aarch64_linker. @@ -68,9 +71,21 @@ Definition do_schedule (bb: bblock) : res bblock := if (Z.eqb (size bb) 1) then OK (bb) else match (schedule bb) with (lb, oc) => schedule_to_bblock lb oc end. -Definition do_peephole (bb: bblock) : bblock := - let res := Peephole.optimize_bblock bb in - if (size res =? size bb) then res else bb. +(*Definition do_peephole (bb: bblock) : bblock :=*) + (*let res := Peephole.optimize_bblock bb in*) + (*if (size res =? size bb) then res else bb.*) + +Program Definition do_peephole (bb : bblock) := + let optimized := peephole_opt (body bb) in + let wf_ok := non_empty_bblockb optimized (exit bb) in + {| header := header bb; + body := if wf_ok then optimized else (body bb); + exit := exit bb |}. +Next Obligation. + destruct (non_empty_bblockb (peephole_opt (body bb))) eqn:Rwf. + - rewrite Rwf. cbn. trivial. + - exact (correct bb). +Qed. Definition verified_schedule (bb : bblock) : res bblock := let nhbb := no_header bb in -- cgit