From ec1a33e664f3484772a06dcd7e3198aa80b5d993 Mon Sep 17 00:00:00 2001 From: Léo Gourdin Date: Thu, 10 Dec 2020 23:06:17 +0100 Subject: Big improvment in peephole, changing LDP/STP semantics --- aarch64/Asm.v | 40 +-- aarch64/Asmblock.v | 103 ++++---- aarch64/Asmblockdeps.v | 202 ++++++++------- aarch64/Asmexpand.ml | 10 +- aarch64/Asmgen.v | 16 +- aarch64/Asmgenproof.v | 31 +-- aarch64/PeepholeOracle.ml | 487 ++++++++++++++++++++---------------- aarch64/PostpassSchedulingOracle.ml | 193 +++++--------- aarch64/TargetPrinter.ml | 8 +- 9 files changed, 551 insertions(+), 539 deletions(-) (limited to 'aarch64') diff --git a/aarch64/Asm.v b/aarch64/Asm.v index 719c3b61..5d9cf601 100644 --- a/aarch64/Asm.v +++ b/aarch64/Asm.v @@ -193,16 +193,16 @@ Inductive instruction: Type := | Pldrsh (sz: isize) (rd: ireg) (a: addressing) (**r load int16, sign-extend *) | Pldrzw (rd: ireg) (a: addressing) (**r load int32, zero-extend to int64 *) | Pldrsw (rd: ireg) (a: addressing) (**r load int32, sign-extend to int64 *) - | Pldpw (rd1 rd2: ireg) (a: addressing) (**r load two int32 *) - | Pldpx (rd1 rd2: ireg) (a: addressing) (**r load two int64 *) + | Pldpw (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int32 *) + | Pldpx (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *) | Pstrw (rs: ireg) (a: addressing) (**r store int32 *) | Pstrw_a (rs: ireg) (a: addressing) (**r store int32 as any32 *) | Pstrx (rs: ireg) (a: addressing) (**r store int64 *) | Pstrx_a (rs: ireg) (a: addressing) (**r store int64 as any64 *) | Pstrb (rs: ireg) (a: addressing) (**r store int8 *) | Pstrh (rs: ireg) (a: addressing) (**r store int16 *) - | Pstpw (rs1 rs2: ireg) (a: addressing) (**r store two int64 *) - | Pstpx (rs1 rs2: ireg) (a: addressing) (**r store two int64 *) + | Pstpw (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *) + | Pstpx (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *) (** Integer arithmetic, immediate *) | Paddimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r addition *) | Psubimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r subtraction *) @@ -845,16 +845,16 @@ Definition exec_load (chunk: memory_chunk) (transf: val -> val) | Some v => Next (nextinstr (rs#r <- (transf v))) m end. -Definition exec_load_double (chunk: memory_chunk) (transf: val -> val) +Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val) (a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) := if is_pair_addressing_mode_correct a then let addr := (eval_addressing a rs) in - let ofs := match chunk with | Mint32 => 4 | _ => 8 end in + let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in let addr' := (eval_addressing (get_offset_addr a ofs) rs) in - match Mem.loadv chunk m addr with + match Mem.loadv chk1 m addr with | None => Stuck | Some v1 => - match Mem.loadv chunk m addr' with + match Mem.loadv chk2 m addr' with | None => Stuck | Some v2 => Next (nextinstr ((rs#rd1 <- (transf v1))#rd2 <- (transf v2))) m @@ -870,16 +870,16 @@ Definition exec_store (chunk: memory_chunk) | Some m' => Next (nextinstr rs) m' end. -Definition exec_store_double (chunk: memory_chunk) +Definition exec_store_double (chk1 chk2: memory_chunk) (a: addressing) (v1 v2: val) (rs: regset) (m: mem) := if is_pair_addressing_mode_correct a then let addr := (eval_addressing a rs) in - let ofs := match chunk with | Mint32 => 4 | _ => 8 end in + let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in let addr' := (eval_addressing (get_offset_addr a ofs) rs) in - match Mem.storev chunk m addr v1 with + match Mem.storev chk1 m addr v1 with | None => Stuck - | Some m' => match Mem.storev chunk m' addr' v2 with + | Some m' => match Mem.storev chk2 m' addr' v2 with | None => Stuck | Some m'' => Next (nextinstr rs) m'' end @@ -1311,15 +1311,15 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pbuiltin ef args res => Stuck (**r treated specially below *) (** The following instructions and directives are not generated directly by Asmgen, so we do not model them. *) - | Pldpw rd1 rd2 a => - exec_load_double Mint32 (fun v => v) a rd1 rd2 rs m - | Pldpx rd1 rd2 a => - exec_load_double Mint64 (fun v => v) a rd1 rd2 rs m + | Pldpw rd1 rd2 chk1 chk2 a => + exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m + | Pldpx rd1 rd2 chk1 chk2 a => + exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m | Pnop => Next (nextinstr rs) m - | Pstpw rs1 rs2 a => - exec_store_double Mint32 a rs#rs1 rs#rs2 rs m - | Pstpx rs1 rs2 a => - exec_store_double Mint64 a rs#rs1 rs#rs2 rs m + | Pstpw rs1 rs2 chk1 chk2 a => + exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m + | Pstpx rs1 rs2 chk1 chk2 a => + exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m | Pcls _ _ _ | Pclz _ _ _ | Prev _ _ _ diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v index f12e8922..a4decae7 100644 --- a/aarch64/Asmblock.v +++ b/aarch64/Asmblock.v @@ -112,7 +112,7 @@ Inductive load_rd1_rd2_a: Type := Inductive ld_instruction: Type := | PLd_rd_a (ld: load_rd_a) (rd: dreg) (a: addressing) - | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: ireg) (a: addressing) (**r load two int64 *) + | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *) . Inductive store_rs_a : Type := @@ -136,7 +136,7 @@ Inductive store_rs1_rs2_a : Type := Inductive st_instruction : Type := | PSt_rs_a (st: store_rs_a) (rs: dreg) (a: addressing) - | Pstp (st: store_rs1_rs2_a) (rs1 rs2: ireg) (a: addressing) (**r store two int64 *) + | Pstp (st: store_rs1_rs2_a) (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *) . Inductive arith_p : Type := @@ -459,16 +459,16 @@ Definition exec_load_rd_a (chunk: memory_chunk) (transf: val -> val) SOME v <- Mem.loadv chunk m (eval_addressing a rs) IN Next (rs#r <- (transf v)) m. -Definition exec_load_double (chunk: memory_chunk) (transf: val -> val) +Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val) (a: addressing) (rd1 rd2: dreg) (rs: regset) (m: mem) := if is_pair_addressing_mode_correct a then let addr := (eval_addressing a rs) in - let ofs := match chunk with | Mint32 => 4 | _ => 8 end in + let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in let addr' := (eval_addressing (get_offset_addr a ofs) rs) in - match Mem.loadv chunk m addr with + match Mem.loadv chk1 m addr with | None => Stuck | Some v1 => - match Mem.loadv chunk m addr' with + match Mem.loadv chk2 m addr' with | None => Stuck | Some v2 => Next ((rs#rd1 <- (transf v1))#rd2 <- (transf v2)) m @@ -482,16 +482,16 @@ Definition exec_store_rs_a (chunk: memory_chunk) SOME m' <- Mem.storev chunk m (eval_addressing a rs) v IN Next rs m'. -Definition exec_store_double (chunk: memory_chunk) +Definition exec_store_double (chk1 chk2: memory_chunk) (a: addressing) (v1 v2: val) (rs: regset) (m: mem) := if is_pair_addressing_mode_correct a then let addr := (eval_addressing a rs) in - let ofs := match chunk with | Mint32 => 4 | _ => 8 end in + let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in let addr' := (eval_addressing (get_offset_addr a ofs) rs) in - match Mem.storev chunk m addr v1 with + match Mem.storev chk1 m addr v1 with | None => Stuck - | Some m' => match Mem.storev chunk m' addr' v2 with + | Some m' => match Mem.storev chk2 m' addr' v2 with | None => Stuck | Some m'' => Next rs m'' end @@ -501,67 +501,62 @@ Definition exec_store_double (chunk: memory_chunk) (** execution of loads *) -Definition chunk_load (ld: ld_instruction): memory_chunk := +Definition chunk_load (ld: load_rd_a): memory_chunk := match ld with - | PLd_rd_a Pldrw _ _ => Mint32 - | PLd_rd_a Pldrw_a _ _ => Many32 - | PLd_rd_a Pldrx _ _ => Mint64 - | PLd_rd_a Pldrx_a _ _ => Many64 - | PLd_rd_a (Pldrb _) _ _ => Mint8unsigned - | PLd_rd_a (Pldrsb _) _ _ => Mint8signed - | PLd_rd_a (Pldrh _) _ _ => Mint16unsigned - | PLd_rd_a (Pldrsh _) _ _ => Mint16signed - | PLd_rd_a Pldrzw _ _ => Mint32 - | PLd_rd_a Pldrsw _ _ => Mint32 - | PLd_rd_a Pldrs _ _ => Mfloat32 - | PLd_rd_a Pldrd _ _ => Mfloat64 - | PLd_rd_a Pldrd_a _ _ => Many64 - | Pldp Pldpw _ _ _ => Mint32 - | Pldp Pldpx _ _ _ => Mint64 + | Pldrw => Mint32 + | Pldrw_a => Many32 + | Pldrx => Mint64 + | Pldrx_a => Many64 + | Pldrb _ => Mint8unsigned + | Pldrsb _ => Mint8signed + | Pldrh _ => Mint16unsigned + | Pldrsh _ => Mint16signed + | Pldrzw => Mint32 + | Pldrsw => Mint32 + | Pldrs => Mfloat32 + | Pldrd => Mfloat64 + | Pldrd_a => Many64 end. -Definition chunk_store (st: st_instruction) : memory_chunk := +Definition chunk_store (st: store_rs_a) : memory_chunk := match st with - | PSt_rs_a Pstrw _ _ => Mint32 - | PSt_rs_a Pstrw_a _ _ => Many32 - | PSt_rs_a Pstrx _ _ => Mint64 - | PSt_rs_a Pstrx_a _ _ => Many64 - | PSt_rs_a Pstrb _ _ => Mint8unsigned - | PSt_rs_a Pstrh _ _ => Mint16unsigned - | PSt_rs_a Pstrs _ _ => Mfloat32 - | PSt_rs_a Pstrd _ _ => Mfloat64 - | PSt_rs_a Pstrd_a _ _ => Many64 - | Pstp Pstpw _ _ _ => Mint32 - | Pstp Pstpx _ _ _ => Mint64 + | Pstrw => Mint32 + | Pstrw_a => Many32 + | Pstrx => Mint64 + | Pstrx_a => Many64 + | Pstrb => Mint8unsigned + | Pstrh => Mint16unsigned + | Pstrs => Mfloat32 + | Pstrd => Mfloat64 + | Pstrd_a => Many64 end. -Definition interp_load (ld: ld_instruction): val -> val := +Definition interp_load (ld: load_rd_a): val -> val := match ld with - | PLd_rd_a (Pldrb X) _ _ => Val.longofintu - | PLd_rd_a (Pldrsb X) _ _ => Val.longofint - | PLd_rd_a (Pldrh X) _ _ => Val.longofintu - | PLd_rd_a (Pldrsh X) _ _ => Val.longofint - | PLd_rd_a Pldrzw _ _ => Val.longofintu - | PLd_rd_a Pldrsw _ _ => Val.longofint + | Pldrb X => Val.longofintu + | Pldrsb X => Val.longofint + | Pldrh X => Val.longofintu + | Pldrsh X => Val.longofint + | Pldrzw => Val.longofintu + | Pldrsw => Val.longofint (* Changed to exhaustive list because I tend to forgot all the places I need * to check when changing things. *) - | PLd_rd_a (Pldrb W) _ _ | PLd_rd_a (Pldrsb W) _ _ | PLd_rd_a (Pldrh W) _ _ | PLd_rd_a (Pldrsh W) _ _ - | PLd_rd_a Pldrw _ _ | PLd_rd_a Pldrw_a _ _ | PLd_rd_a Pldrx _ _ - | PLd_rd_a Pldrx_a _ _ | PLd_rd_a Pldrs _ _ | PLd_rd_a Pldrd _ _ - | PLd_rd_a Pldrd_a _ _ - | Pldp _ _ _ _ => fun v => v + | Pldrb W | Pldrsb W | Pldrh W | Pldrsh W + | Pldrw | Pldrw_a | Pldrx + | Pldrx_a | Pldrs | Pldrd + | Pldrd_a => fun v => v end. Definition exec_load (ldi: ld_instruction) (rs: regset) (m: mem) := match ldi with - | PLd_rd_a ld rd a => exec_load_rd_a (chunk_load ldi) (interp_load ldi) a rd rs m - | Pldp ld rd1 rd2 a => exec_load_double (chunk_load ldi) (interp_load ldi) a rd1 rd2 rs m + | PLd_rd_a ld rd a => exec_load_rd_a (chunk_load ld) (interp_load ld) a rd rs m + | Pldp ld rd1 rd2 chk1 chk2 a => exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m end. Definition exec_store (sti: st_instruction) (rs: regset) (m: mem) := match sti with - | PSt_rs_a st rsr a => exec_store_rs_a (chunk_store sti) a rs#rsr rs m - | Pstp st rs1 rs2 a => exec_store_double (chunk_store sti) a rs#rs1 rs#rs2 rs m + | PSt_rs_a st rsr a => exec_store_rs_a (chunk_store st) a rs#rsr rs m + | Pstp st rs1 rs2 chk1 chk2 a => exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m end. (** TODO: redundant w.r.t Machblock ?? *) diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v index e2d788a5..1ad18889 100644 --- a/aarch64/Asmblockdeps.v +++ b/aarch64/Asmblockdeps.v @@ -166,15 +166,15 @@ Inductive arith_op := . Inductive store_op := - | Ostore1 (st: st_instruction) (a: addressing) - | Ostore2 (st: st_instruction) (a: addressing) - | OstoreU (st: st_instruction) (a: addressing) + | Ostore1 (st: store_rs_a) (chunk: memory_chunk) (a: addressing) + | Ostore2 (st: store_rs_a) (chunk: memory_chunk) (a: addressing) + | OstoreU (st: store_rs_a) (chunk: memory_chunk) (a: addressing) . Inductive load_op := - | Oload1 (ld: ld_instruction) (a: addressing) - | Oload2 (ld: ld_instruction) (a: addressing) - | OloadU (ld: ld_instruction) (a: addressing) + | Oload1 (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) + | Oload2 (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) + | OloadU (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) . Inductive allocf_op := @@ -424,16 +424,16 @@ Definition call_ll_storev (c: memory_chunk) (m: mem) (v: option val) (vs: val) : | None => None (* should never occurs *) end. -Definition exec_store1 (n: st_instruction) (m: mem) (a: addressing) (vr vs: val) := +Definition exec_store1 (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr vs: val) := let v := match a with | ADimm _ n => Some (Val.addl vs (Vlong n)) | ADadr _ id ofs => Some (Val.addl vs (symbol_low Ge.(_lk) id ofs)) | _ => None end in - call_ll_storev (chunk_store n) m v vr. + call_ll_storev chunk m v vr. -Definition exec_store2 (n: st_instruction) (m: mem) (a: addressing) (vr vs1 vs2: val) := +Definition exec_store2 (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr vs1 vs2: val) := let v := match a with | ADreg _ _ => Some (Val.addl vs1 vs2) @@ -442,10 +442,10 @@ Definition exec_store2 (n: st_instruction) (m: mem) (a: addressing) (vr vs1 vs2: | ADuxt _ _ n => Some (Val.addl vs1 (Val.shll (Val.longofintu vs2) (Vint n))) | _ => None end in - call_ll_storev (chunk_store n) m v vr. + call_ll_storev chunk m v vr. -Definition exec_storeU (n: st_instruction) (m: mem) (a: addressing) (vr: val) := - call_ll_storev (chunk_store n) m None vr. +Definition exec_storeU (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr: val) := + call_ll_storev chunk m None vr. Definition goto_label_deps (f: function) (lbl: label) (vpc: val) := match label_pos lbl 0 (fn_blocks f) with @@ -503,9 +503,9 @@ Definition control_eval (o: control_op) (l: list value) := Definition store_eval (o: store_op) (l: list value) := match o, l with - | Ostore1 st a, [Val vr; Val vs; Memstate m] => exec_store1 st m a vr vs - | Ostore2 st a, [Val vr; Val vs1; Val vs2; Memstate m] => exec_store2 st m a vr vs1 vs2 - | OstoreU st a, [Val vr; Memstate m] => exec_storeU st m a vr + | Ostore1 st chunk a, [Val vr; Val vs; Memstate m] => exec_store1 st m chunk a vr vs + | Ostore2 st chunk a, [Val vr; Val vs1; Val vs2; Memstate m] => exec_store2 st m chunk a vr vs1 vs2 + | OstoreU st chunk a, [Val vr; Memstate m] => exec_storeU st m chunk a vr | _, _ => None end. @@ -518,16 +518,16 @@ Definition call_ll_loadv (c: memory_chunk) (transf: val -> val) (m: mem) (v: opt | None => None (* should never occurs *) end. -Definition exec_load1 (ld: ld_instruction) (m: mem) (a: addressing) (vl: val) := +Definition exec_load1 (ld: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vl: val) := let v := match a with | ADimm _ n => Some (Val.addl vl (Vlong n)) | ADadr _ id ofs => Some (Val.addl vl (symbol_low Ge.(_lk) id ofs)) | _ => None end in - call_ll_loadv (chunk_load ld) (interp_load ld) m v. + call_ll_loadv chunk (interp_load ld) m v. -Definition exec_load2 (ld: ld_instruction) (m: mem) (a: addressing) (vl1 vl2: val) := +Definition exec_load2 (ld: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vl1 vl2: val) := let v := match a with | ADreg _ _ => Some (Val.addl vl1 vl2) @@ -536,16 +536,16 @@ Definition exec_load2 (ld: ld_instruction) (m: mem) (a: addressing) (vl1 vl2: va | ADuxt _ _ n => Some (Val.addl vl1 (Val.shll (Val.longofintu vl2) (Vint n))) | _ => None end in - call_ll_loadv (chunk_load ld) (interp_load ld) m v. + call_ll_loadv chunk (interp_load ld) m v. -Definition exec_loadU (n: ld_instruction) (m: mem) (a: addressing) := - call_ll_loadv (chunk_load n) (interp_load n) m None. +Definition exec_loadU (n: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) := + call_ll_loadv chunk (interp_load n) m None. Definition load_eval (o: load_op) (l: list value) := match o, l with - | Oload1 ld a, [Val vs; Memstate m] => exec_load1 ld m a vs - | Oload2 ld a, [Val vs1; Val vs2; Memstate m] => exec_load2 ld m a vs1 vs2 - | OloadU st a, [Memstate m] => exec_loadU st m a + | Oload1 ld chunk a, [Val vs; Memstate m] => exec_load1 ld m chunk a vs + | Oload2 ld chunk a, [Val vs1; Val vs2; Memstate m] => exec_load2 ld m chunk a vs1 vs2 + | OloadU st chunk a, [Memstate m] => exec_loadU st m chunk a | _, _ => None end. @@ -768,12 +768,12 @@ Opaque control_op_eq_correct. Definition store_op_eq (s1 s2: store_op): ?? bool := match s1 with - | Ostore1 st1 a1 => - match s2 with Ostore1 st2 a2 => iandb (struct_eq st1 st2) (struct_eq a1 a2) | _ => RET false end - | Ostore2 st1 a1 => - match s2 with Ostore2 st2 a2 => iandb (struct_eq st1 st2) (struct_eq a1 a2) | _ => RET false end - | OstoreU st1 a1 => - match s2 with OstoreU st2 a2 => iandb (struct_eq st1 st2) (struct_eq a1 a2) | _ => RET false end + | Ostore1 st1 chk1 a1 => + match s2 with Ostore1 st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end + | Ostore2 st1 chk1 a1 => + match s2 with Ostore2 st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end + | OstoreU st1 chk1 a1 => + match s2 with OstoreU st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end end. Lemma store_op_eq_correct s1 s2: @@ -787,12 +787,12 @@ Opaque store_op_eq_correct. Definition load_op_eq (l1 l2: load_op): ?? bool := match l1 with - | Oload1 ld1 a1 => - match l2 with Oload1 ld2 a2 => iandb (struct_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end - | Oload2 ld1 a1 => - match l2 with Oload2 ld2 a2 => iandb (struct_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end - | OloadU ld1 a1 => - match l2 with OloadU ld2 a2 => iandb (struct_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end + | Oload1 ld1 chk1 a1 => + match l2 with Oload1 ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end + | Oload2 ld1 chk1 a1 => + match l2 with Oload2 ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end + | OloadU ld1 chk1 a1 => + match l2 with OloadU ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end end. Lemma load_op_eq_correct l1 l2: @@ -1127,45 +1127,58 @@ Definition trans_arith (ai: ar_instruction) : inst := | Pfnmul fsz rd r1 r2 => [(#rd, Op(Arith (Ofnmul fsz)) (PReg(#r1) @ PReg(#r2) @ Enil))] end. -Definition eval_addressing_rlocs_st (st: st_instruction) (rs: dreg) (a: addressing) := +Definition eval_addressing_rlocs_st (st: store_rs_a) (chunk: memory_chunk) (rs: dreg) (a: addressing) := match a with - | ADimm base n => Op (Store (Ostore1 st a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil) - | ADreg base r => Op (Store (Ostore2 st a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) - | ADlsl base r n => Op (Store (Ostore2 st a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) - | ADsxt base r n => Op (Store (Ostore2 st a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) - | ADuxt base r n => Op (Store (Ostore2 st a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) - | ADadr base id ofs => Op (Store (Ostore1 st a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil) - | ADpostincr base n => Op (Store (OstoreU st a)) (PReg (#rs) @ PReg (pmem) @ Enil) (* not modeled yet *) + | ADimm base n => Op (Store (Ostore1 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil) + | ADreg base r => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADlsl base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADsxt base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADuxt base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADadr base id ofs => Op (Store (Ostore1 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil) + | ADpostincr base n => Op (Store (OstoreU st chunk a)) (PReg (#rs) @ PReg (pmem) @ Enil) (* not modeled yet *) end. -Definition eval_addressing_rlocs_ld (ld: ld_instruction) (a: addressing) := +Definition eval_addressing_rlocs_ld (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) := match a with - | ADimm base n => Op (Load (Oload1 ld a)) (PReg (#base) @ PReg (pmem) @ Enil) - | ADreg base r => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) - | ADlsl base r n => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) - | ADsxt base r n => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) - | ADuxt base r n => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) - | ADadr base id ofs => Op (Load (Oload1 ld a)) (PReg (#base) @ PReg (pmem) @ Enil) - | ADpostincr base n => Op (Load (Oload1 ld a)) (PReg (#base) @ PReg (pmem) @ Enil) + | ADimm base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil) + | ADreg base r => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADlsl base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADsxt base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADuxt base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil) + | ADadr base id ofs => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil) + | ADpostincr base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil) + end. + +Definition trans_ldp_chunk (chunk: memory_chunk): load_rd_a := + match chunk with + | Many32 => Pldrw_a + | Mint64 => Pldrx + | Many64 => Pldrx_a + | _ => Pldrw + end. + +Definition trans_stp_chunk (chunk: memory_chunk): store_rs_a := + match chunk with + | Many32 => Pstrw_a + | Mint64 => Pstrx + | Many64 => Pstrx_a + | _ => Pstrw end. Definition trans_load (ldi: ld_instruction) := match ldi with | PLd_rd_a ld r a => - let lr := eval_addressing_rlocs_ld ldi a in [(#r, lr)] - | Pldp ld r1 r2 a => - let ldi' := - match ld with - | Pldpw => Pldrw - | Pldpx => Pldrx - end in - let lr := eval_addressing_rlocs_ld (PLd_rd_a ldi' r1 a) a in - let ofs := match ld with | Pldpw => 4%Z | Pldpx => 8%Z end in + let lr := eval_addressing_rlocs_ld ld (chunk_load ld) a in [(#r, lr)] + | Pldp ld r1 r2 chk1 chk2 a => + let ldi1 := trans_ldp_chunk chk1 in + let ldi2 := trans_ldp_chunk chk2 in + let lr := eval_addressing_rlocs_ld ldi1 chk1 a in + let ofs := match chk1 with | Mint32 | Many32 => 4%Z | _ => 8%Z end in match a with | ADimm base n => let a' := (get_offset_addr a ofs) in [(#r1, lr); - (#r2, Op (Load (Oload1 (PLd_rd_a ldi' r2 a') a')) + (#r2, Op (Load (Oload1 ldi2 chk2 a')) (Old(PReg (#base)) @ PReg (pmem) @ Enil))] | _ => [(#PC, (Op (OError)) Enil)] end @@ -1174,20 +1187,17 @@ Definition trans_load (ldi: ld_instruction) := Definition trans_store (sti: st_instruction) := match sti with | PSt_rs_a st r a => - let lr := eval_addressing_rlocs_st sti r a in [(pmem, lr)] - | Pstp st r1 r2 a => - let sti' := - match st with - | Pstpw => Pstrw - | Pstpx => Pstrx - end in - let lr := eval_addressing_rlocs_st (PSt_rs_a sti' r1 a) r1 a in - let ofs := match st with | Pstpw => 4%Z | Pstpx => 8%Z end in + let lr := eval_addressing_rlocs_st st (chunk_store st) r a in [(pmem, lr)] + | Pstp st r1 r2 chk1 chk2 a => + let sti1 := trans_stp_chunk chk1 in + let sti2 := trans_stp_chunk chk2 in + let lr := eval_addressing_rlocs_st sti1 chk1 r1 a in + let ofs := match chk1 with | Mint32 | Many32 => 4%Z | _ => 8%Z end in match a with | ADimm base n => let a' := (get_offset_addr a ofs) in [(pmem, lr); - (pmem, Op (Store (Ostore1 (PSt_rs_a sti' r2 a') a')) + (pmem, Op (Store (Ostore1 sti2 chk2 a')) (PReg (#r2) @ Old(PReg (#base)) @ PReg (pmem) @ Enil))] | _ => [(#PC, (Op (OError)) Enil)] end @@ -1695,6 +1705,12 @@ Proof. econstructor. Qed. +Lemma load_chunk_neutral: forall chk v, + interp_load (trans_ldp_chunk chk) v = v. +Proof. + intros; destruct chk; simpl; reflexivity. +Qed. + Theorem bisimu_basic rsr mr sr bi: match_states (State rsr mr) sr -> match_outcome (exec_basic Ge.(_lk) Ge.(_genv) bi rsr mr) (inst_run Ge (trans_basic bi) sr sr). @@ -1721,14 +1737,16 @@ Local Ltac preg_eq_discr r rd := unfold exec_load1, exec_load2, chunk_load; unfold call_ll_loadv; try destruct (Mem.loadv _ _ _); simpl; auto. all: try (fold (ppos rd); Simpl_exists sr; auto; intros rr; Simpl_update). - + unfold exec_load, exec_load_double, eval_addressing_rlocs_ld, exp_eval; + + + unfold exec_load, exec_load_double, eval_addressing_rlocs_ld, exp_eval; destruct ld; destruct a; simpl; unfold control_eval; destruct Ge; auto; try fold (ppos base); try erewrite !H0, H; simpl; - unfold exec_load1, exec_load2, chunk_load; unfold call_ll_loadv; - try destruct (Mem.loadv _ _ _); simpl; auto; + unfold exec_load1, exec_load2; unfold call_ll_loadv; + destruct (Mem.loadv _ _ _); simpl; auto; fold (ppos rd1); rewrite assign_diff; discriminate_ppos; rewrite H; try destruct (Mem.loadv _ _ _); simpl; auto; Simpl_exists sr; + rewrite !load_chunk_neutral; try (rewrite !assign_diff; discriminate_ppos; reflexivity); try (destruct base; discriminate_ppos); repeat (try fold (ppos r); intros; Simpl_update). @@ -1746,7 +1764,7 @@ Local Ltac preg_eq_discr r rd := destruct st; destruct a; simpl; unfold control_eval; destruct Ge; auto; try fold (ppos base); try fold (ppos rs1); erewrite !H0, H; simpl; - unfold exec_store1, exec_store2, chunk_store; unfold call_ll_storev; + unfold exec_store1, exec_store2; unfold call_ll_storev; try destruct (Mem.storev _ _ _ _); simpl; auto; fold (ppos rs2); rewrite assign_diff; try congruence; try rewrite H0; simpl; try destruct (Mem.storev _ _ _ _); simpl; auto. @@ -2473,22 +2491,22 @@ Definition string_of_ldi (ldi: ld_instruction) : ?? pstring:= DO a' <~ string_of_addressing a;; DO rd' <~ dreg_name rd;; RET (Str "PLd_rd_a (" +; (string_of_ld_rd_a ld) +; " " +; rd' +; " " +; a' +; ")") - | Pldp _ _ _ a => + | Pldp _ _ _ _ _ a => DO a' <~ string_of_addressing a;; RET (Str "Pldp (" +; a' +; ")") end. Definition string_of_load (op: load_op) : ?? pstring := match op with - | Oload1 ld a => + | Oload1 ld _ a => DO a' <~ string_of_addressing a;; - DO ld' <~ string_of_ldi ld;; - RET((Str "Oload1 ") +; " " +; ld' +; " " +; a' +; " ") - | Oload2 ld a => + (*DO ld' <~ string_of_ldi ld;;*) + RET((Str "Oload1 ") +; " " +; string_of_ld_rd_a ld +; " " +; a' +; " ") + | Oload2 ld _ a => DO a' <~ string_of_addressing a;; - DO ld' <~ string_of_ldi ld;; - RET((Str "Oload2 ") +; " " +; ld' +; " " +; a' +; " ") - | OloadU _ _ => RET (Str "OloadU") + (*DO ld' <~ string_of_ldi ld;;*) + RET((Str "Oload2 ") +; " " +; string_of_ld_rd_a ld +; " " +; a' +; " ") + | OloadU _ _ _ => RET (Str "OloadU") end. Definition string_of_st_rs_a (st: store_rs_a) : pstring := @@ -2510,22 +2528,22 @@ Definition string_of_sti (sti: st_instruction) : ?? pstring:= DO a' <~ string_of_addressing a;; DO rs' <~ dreg_name rs;; RET (Str "PSt_rs_a (" +; (string_of_st_rs_a st) +; " " +; rs' +; " " +; a' +; ")") - | Pstp _ _ _ a => + | Pstp _ _ _ _ _ a => DO a' <~ string_of_addressing a;; RET (Str "Pstp (" +; a' +; ")") end. Definition string_of_store (op: store_op) : ?? pstring := match op with - | Ostore1 st a => + | Ostore1 st _ a => DO a' <~ string_of_addressing a;; - DO st' <~ string_of_sti st;; - RET((Str "Ostore1 ") +; " " +; st' +; " " +; a' +; " ") - | Ostore2 st a => + (*DO st' <~ string_of_sti st;;*) + RET((Str "Ostore1 ") +; " " +; string_of_st_rs_a st +; " " +; a' +; " ") + | Ostore2 st _ a => DO a' <~ string_of_addressing a;; - DO st' <~ string_of_sti st;; - RET((Str "Ostore2 ") +; " " +; st' +; " " +; a' +; " ") - | OstoreU _ _ => RET (Str "OstoreU") + (*DO st' <~ string_of_sti st;;*) + RET((Str "Ostore2 ") +; " " +; string_of_st_rs_a st +; " " +; a' +; " ") + | OstoreU _ _ _ => RET (Str "OstoreU") end. Definition string_of_control (op: control_op) : pstring := diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml index 94697df7..1a02b019 100644 --- a/aarch64/Asmexpand.ml +++ b/aarch64/Asmexpand.ml @@ -74,7 +74,7 @@ let save_parameter_registers ir fr = let pos = 8*16 + !i*8 in if !i land 1 = 0 then begin emit (Pstpx(int_param_regs.(!i), int_param_regs.(!i + 1), - ADimm(XSP, Z.of_uint pos))); + Mint64, Mint64, ADimm(XSP, Z.of_uint pos))); i := !i + 2 end else begin emit (Pstrx(int_param_regs.(!i), ADimm(XSP, Z.of_uint pos))); @@ -169,8 +169,8 @@ let expand_builtin_memcpy_small sz al src dst = let (rdst, odst) = memcpy_small_arg sz dst tdst in let rec copy osrc odst sz = if sz >= 16 then begin - emit (Pldpx(X16, X30, ADimm(rsrc, osrc))); - emit (Pstpx(X16, X30, ADimm(rdst, odst))); + emit (Pldpx(X16, X30, Mint64, Mint64, ADimm(rsrc, osrc))); + emit (Pstpx(X16, X30, Mint64, Mint64, ADimm(rdst, odst))); copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16) end else if sz >= 8 then begin @@ -208,8 +208,8 @@ let expand_builtin_memcpy_big sz al src dst = let lbl = new_label () in expand_loadimm32 X15 (Z.of_uint (sz / 16)); emit (Plabel lbl); - emit (Pldpx(X16, X17, ADpostincr(RR1 X30, _16))); - emit (Pstpx(X16, X17, ADpostincr(RR1 X29, _16))); + emit (Pldpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X30, _16))); + emit (Pstpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X29, _16))); emit (Psubimm(W, RR1 X15, RR1 X15, _1)); emit (Pcbnz(W, X15, lbl)); if sz mod 16 >= 8 then begin diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v index f7ec07ae..d4a63b65 100644 --- a/aarch64/Asmgen.v +++ b/aarch64/Asmgen.v @@ -324,12 +324,12 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction := | PLoad (PLd_rd_a Pldrd rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd rd' a) | PLoad (PLd_rd_a Pldrd_a rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd_a rd' a) - | PLoad (Pldp Pldpw rd1 rd2 a) => do rd1' <- ireg_of_preg rd1; + | PLoad (Pldp Pldpw rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1; do rd2' <- ireg_of_preg rd2; - OK (Asm.Pldpw rd1 rd2 a) - | PLoad (Pldp Pldpx rd1 rd2 a) => do rd1' <- ireg_of_preg rd1; + OK (Asm.Pldpw rd1 rd2 chk1 chk2 a) + | PLoad (Pldp Pldpx rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1; do rd2' <- ireg_of_preg rd2; - OK (Asm.Pldpx rd1 rd2 a) + OK (Asm.Pldpx rd1 rd2 chk1 chk2 a) | PStore (PSt_rs_a Pstrw r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw r' a) | PStore (PSt_rs_a Pstrw_a r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw_a r' a) @@ -342,12 +342,12 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction := | PStore (PSt_rs_a Pstrd r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd r' a) | PStore (PSt_rs_a Pstrd_a r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd_a r' a) - | PStore (Pstp Pstpw rs1 rs2 a) => do rs1' <- ireg_of_preg rs1; + | PStore (Pstp Pstpw rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1; do rs2' <- ireg_of_preg rs2; - OK (Asm.Pstpw rs1 rs2 a) - | PStore (Pstp Pstpx rs1 rs2 a) => do rs1' <- ireg_of_preg rs1; + OK (Asm.Pstpw rs1 rs2 chk1 chk2 a) + | PStore (Pstp Pstpx rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1; do rs2' <- ireg_of_preg rs2; - OK (Asm.Pstpx rs1 rs2 a) + OK (Asm.Pstpx rs1 rs2 chk1 chk2 a) | Pallocframe sz linkofs => OK (Asm.Pallocframe sz linkofs) | Pfreeframe sz linkofs => OK (Asm.Pfreeframe sz linkofs) diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v index 793d94f9..a91ec285 100644 --- a/aarch64/Asmgenproof.v +++ b/aarch64/Asmgenproof.v @@ -1092,11 +1092,11 @@ Proof. + next_stuck_cong. Qed. -Lemma load_double_preserved n rs1 m1 rs1' m1' rs2 m2 rd1 rd2 chk f a: forall +Lemma load_double_preserved n rs1 m1 rs1' m1' rs2 m2 rd1 rd2 chk1 chk2 f a: forall (BOUNDED: 0 <= n <= Ptrofs.max_unsigned) (MATCHI: match_internal n (State rs1 m1) (State rs2 m2)) - (HLOAD: exec_load_double lk chk f a rd1 rd2 rs1 m1 = Next rs1' m1'), - exists (rs2' : regset) (m2' : mem), Asm.exec_load_double tge chk f a rd1 rd2 rs2 m2 = Next rs2' m2' + (HLOAD: exec_load_double lk chk1 chk2 f a rd1 rd2 rs1 m1 = Next rs1' m1'), + exists (rs2' : regset) (m2' : mem), Asm.exec_load_double tge chk1 chk2 f a rd1 rd2 rs2 m2 = Next rs2' m2' /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2'). Proof. intros. @@ -1105,10 +1105,10 @@ Proof. erewrite <- !eval_addressing_preserved; eauto. destruct (is_pair_addressing_mode_correct a); try discriminate. destruct (Mem.loadv _ _ _); - destruct (Mem.loadv chk m2 + destruct (Mem.loadv chk2 m2 (eval_addressing lk - (get_offset_addr a match chk with - | Mint32 => 4 + (get_offset_addr a match chk1 with + | Mint32 | Many32 => 4 | _ => 8 end) rs1)); inversion HLOAD; auto. @@ -1141,11 +1141,11 @@ Proof. + next_stuck_cong. Qed. -Lemma store_double_preserved n rs1 m1 rs1' m1' rs2 m2 v1 v2 chk a: forall +Lemma store_double_preserved n rs1 m1 rs1' m1' rs2 m2 v1 v2 chk1 chk2 a: forall (BOUNDED: 0 <= n <= Ptrofs.max_unsigned) (MATCHI: match_internal n (State rs1 m1) (State rs2 m2)) - (HSTORE: exec_store_double lk chk a v1 v2 rs1 m1 = Next rs1' m1'), - exists (rs2' : regset) (m2' : mem), Asm.exec_store_double tge chk a v1 v2 rs2 m2 = Next rs2' m2' + (HSTORE: exec_store_double lk chk1 chk2 a v1 v2 rs1 m1 = Next rs1' m1'), + exists (rs2' : regset) (m2' : mem), Asm.exec_store_double tge chk1 chk2 a v1 v2 rs2 m2 = Next rs2' m2' /\ match_internal (n + 1) (State rs1' m1') (State rs2' m2'). Proof. intros. @@ -1154,12 +1154,13 @@ Proof. erewrite <- !eval_addressing_preserved; eauto. destruct (is_pair_addressing_mode_correct a); try discriminate. destruct (Mem.storev _ _ _ _); - try destruct (Mem.storev chk m - (eval_addressing lk - (get_offset_addr a match chk with - | Mint32 => 4 - | _ => 8 - end) rs1) v2); + try destruct (Mem.storev chk2 m + (eval_addressing lk + (get_offset_addr a + match chk1 with + | Mint32 | Many32 => 4 + | _ => 8 + end) rs1) v2); inversion HSTORE; auto. repeat (econstructor; eauto). * eapply nextinstr_agree_but_pc; intros. diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml index 94efc75e..65c820ac 100644 --- a/aarch64/PeepholeOracle.ml +++ b/aarch64/PeepholeOracle.ml @@ -68,12 +68,9 @@ let dreg_of_ireg r = IR (RR1 r) * affectation eliminates the potential * candidate *) let verify_load_affect reg rd b rev = - let b = (IR b) in + let b = IR b in let rd = dreg_of_ireg rd in - if not rev then - dreg_eq reg b - else - (dreg_eq reg b) || (dreg_eq reg rd) + if not rev then dreg_eq reg b else dreg_eq reg b || dreg_eq reg rd (* Return true if an intermediate * read eliminates the potential @@ -86,9 +83,9 @@ let verify_load_read reg rd b rev = * affectation eliminates the potential * candidate *) let verify_store_affect reg rs b rev = - let b = (IR b) in + let b = IR b in let rs = dreg_of_ireg rs in - (dreg_eq reg b) || (dreg_eq reg rs) + dreg_eq reg b || dreg_eq reg rs (* Affect a symbolic memory list of potential replacements * for a given write in reg *) @@ -97,17 +94,11 @@ let rec affect_symb_mem reg insta pot_rep stype rev = | [] -> [] | h0 :: t0 -> ( match (insta.(h0), stype) with - | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd), ADimm (b, n))), "ldrw" -> - if verify_load_affect reg rd b rev then affect_symb_mem reg insta t0 stype rev - else h0 :: affect_symb_mem reg insta t0 stype rev - | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd), ADimm (b, n))), "ldrx" -> - if verify_load_affect reg rd b rev then affect_symb_mem reg insta t0 stype rev - else h0 :: affect_symb_mem reg insta t0 stype rev - | PStore (PSt_rs_a (Pstrw, IR (RR1 rs), ADimm (b, n))), "strw" -> - if verify_store_affect reg rs b rev then + | PLoad (PLd_rd_a (_, IR (RR1 rd), ADimm (b, n))), "ldr" -> + if verify_load_affect reg rd b rev then affect_symb_mem reg insta t0 stype rev else h0 :: affect_symb_mem reg insta t0 stype rev - | PStore (PSt_rs_a (Pstrx, IR (RR1 rs), ADimm (b, n))), "strx" -> + | PStore (PSt_rs_a (_, IR (RR1 rs), ADimm (b, n))), "str" -> if verify_store_affect reg rs b rev then affect_symb_mem reg insta t0 stype rev else h0 :: affect_symb_mem reg insta t0 stype rev @@ -121,23 +112,20 @@ let rec read_symb_mem reg insta pot_rep stype rev = | [] -> [] | h0 :: t0 -> ( match (insta.(h0), stype) with - | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd), ADimm (b, n))), "ldrw" -> - if verify_load_read reg rd b rev then read_symb_mem reg insta t0 stype rev + | PLoad (PLd_rd_a (_, IR (RR1 rd), ADimm (b, n))), "ldr" -> + if verify_load_read reg rd b rev then + read_symb_mem reg insta t0 stype rev else h0 :: read_symb_mem reg insta t0 stype rev - | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd), ADimm (b, n))), "ldrx" -> - if verify_load_read reg rd b rev then read_symb_mem reg insta t0 stype rev - else h0 :: read_symb_mem reg insta t0 stype rev - | PStore (PSt_rs_a (Pstrw, IR (RR1 rs), ADimm (b, n))), "strw" -> - h0 :: affect_symb_mem reg insta t0 stype rev - | PStore (PSt_rs_a (Pstrx, IR (RR1 rs), ADimm (b, n))), "strx" -> - h0 :: affect_symb_mem reg insta t0 stype rev + | PStore (PSt_rs_a (_, IR (RR1 rs), ADimm (b, n))), "str" -> + h0 :: read_symb_mem reg insta t0 stype rev | _, _ -> failwith "read_symb_mem: Found an inconsistent inst in pot_rep") (* Update a symbolic memory list of potential replacements * for any addressing *) let update_pot_rep_addressing a insta pot_rep stype rev = match a with - | ADimm (base, _) -> pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev + | ADimm (base, _) -> + pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev | ADreg (base, r) -> pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev; pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev @@ -161,7 +149,8 @@ let update_pot_rep_basic inst insta pot_rep stype rev = match inst with | PArith i -> ( match i with - | PArithP (_, rd) -> pot_rep := affect_symb_mem rd insta !pot_rep stype rev + | PArithP (_, rd) -> + pot_rep := affect_symb_mem rd insta !pot_rep stype rev | PArithPP (_, rd, rs) -> pot_rep := affect_symb_mem rd insta !pot_rep stype rev; pot_rep := read_symb_mem rs insta !pot_rep stype rev @@ -173,14 +162,16 @@ let update_pot_rep_basic inst insta pot_rep stype rev = pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; (match rs1 with | RR0 rs1 -> - pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev | _ -> ()); pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev | PArithRR0 (_, rd, rs) -> ( pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; match rs with | RR0 rs1 -> - pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev | _ -> ()) | PArithARRRR0 (_, rd, rs1, rs2, rs3) -> ( pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev; @@ -188,7 +179,8 @@ let update_pot_rep_basic inst insta pot_rep stype rev = pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev; match rs3 with | RR0 rs1 -> - pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev | _ -> ()) | PArithComparisonPP (_, rs1, rs2) -> pot_rep := read_symb_mem rs1 insta !pot_rep stype rev; @@ -196,7 +188,8 @@ let update_pot_rep_basic inst insta pot_rep stype rev = | PArithComparisonR0R (_, rs1, rs2) -> (match rs1 with | RR0 rs1 -> - pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev + pot_rep := + read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev | _ -> ()); pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev | PArithComparisonP (_, rs1) -> @@ -206,7 +199,8 @@ let update_pot_rep_basic inst insta pot_rep stype rev = | Pfmovi (_, _, rs) -> ( match rs with | RR0 rs -> - pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev + pot_rep := + read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev | _ -> ()) | Pcsel (rd, rs1, rs2, _) -> pot_rep := affect_symb_mem rd insta !pot_rep stype rev; @@ -215,14 +209,16 @@ let update_pot_rep_basic inst insta pot_rep stype rev = | Pfnmul (_, rd, rs1, rs2) -> ()) | PLoad i -> ( match stype with - | "ldrw" | "ldrx" -> ( + | "ldr" -> ( match i with | PLd_rd_a (_, rd, a) -> pot_rep := affect_symb_mem rd insta !pot_rep stype rev; update_pot_rep_addressing a insta pot_rep stype rev - | Pldp (_, rd1, rd2, a) -> - pot_rep := affect_symb_mem (dreg_of_ireg rd1) insta !pot_rep stype rev; - pot_rep := affect_symb_mem (dreg_of_ireg rd2) insta !pot_rep stype rev; + | Pldp (_, rd1, rd2, _, _, a) -> + pot_rep := + affect_symb_mem (dreg_of_ireg rd1) insta !pot_rep stype rev; + pot_rep := + affect_symb_mem (dreg_of_ireg rd2) insta !pot_rep stype rev; update_pot_rep_addressing a insta pot_rep stype rev) | _ -> pot_rep := []) | PStore _ -> pot_rep := [] @@ -248,17 +244,21 @@ let rec search_compat_rep r2 b2 n2 insta pot_rep stype = | [] -> None | h0 :: t0 -> ( match (insta.(h0), stype) with - | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" -> - if is_valid_ldrw rd1 r2 b1 b2 n1 n2 then Some (h0, rd1, b1, n1) + | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" -> + if is_valid_ldrw rd1 r2 b1 b2 n1 n2 then + Some (h0, chunk_load ld1, rd1, b1, n1) else search_compat_rep r2 b2 n2 insta t0 stype - | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" -> - if is_valid_ldrx rd1 r2 b1 b2 n1 n2 then Some (h0, rd1, b1, n1) + | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" -> + if is_valid_ldrx rd1 r2 b1 b2 n1 n2 then + Some (h0, chunk_load ld1, rd1, b1, n1) else search_compat_rep r2 b2 n2 insta t0 stype - | PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), "strw" -> - if is_valid_strw b1 b2 n1 n2 then Some (h0, rs1, b1, n1) + | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strw" -> + if is_valid_strw b1 b2 n1 n2 then + Some (h0, chunk_store st1, rs1, b1, n1) else search_compat_rep r2 b2 n2 insta t0 stype - | PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), "strx" -> - if is_valid_strx b1 b2 n1 n2 then Some (h0, rs1, b1, n1) + | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strx" -> + if is_valid_strx b1 b2 n1 n2 then + Some (h0, chunk_store st1, rs1, b1, n1) else search_compat_rep r2 b2 n2 insta t0 stype | _, _ -> failwith "search_compat_rep: Found an inconsistent inst in pot_rep") @@ -270,20 +270,25 @@ let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype = | [] -> None | h0 :: t0 -> ( match (insta.(h0), stype) with - | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" -> - if is_valid_ldrw r2 rd1 b2 b1 n2 n1 then Some (h0, rd1, b1, n1) + | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" -> + if is_valid_ldrw r2 rd1 b2 b1 n2 n1 then + Some (h0, chunk_load ld1, rd1, b1, n1) else search_compat_rep_inv r2 b2 n2 insta t0 stype - | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" -> - if is_valid_ldrx r2 rd1 b2 b1 n2 n1 then Some (h0, rd1, b1, n1) + | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" -> + if is_valid_ldrx r2 rd1 b2 b1 n2 n1 then + Some (h0, chunk_load ld1, rd1, b1, n1) else search_compat_rep_inv r2 b2 n2 insta t0 stype - | PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), "strw" -> - if is_valid_strw b2 b1 n2 n1 then Some (h0, rs1, b1, n1) + | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strw" -> + if is_valid_strw b2 b1 n2 n1 then + Some (h0, chunk_store st1, rs1, b1, n1) else search_compat_rep_inv r2 b2 n2 insta t0 stype - | PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), "strx" -> - if is_valid_strx b2 b1 n2 n1 then Some (h0, rs1, b1, n1) + | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strx" -> + if is_valid_strx b2 b1 n2 n1 then + Some (h0, chunk_store st1, rs1, b1, n1) else search_compat_rep_inv r2 b2 n2 insta t0 stype | _, _ -> - failwith "search_compat_rep_inv: Found an inconsistent inst in pot_rep") + failwith + "search_compat_rep_inv: Found an inconsistent inst in pot_rep") (* This is useful to manage the case were the immofs * of the first ldr/str is greater than the second one *) @@ -292,6 +297,58 @@ let min_is_rev n1 n2 = let z2 = to_int (camlint64_of_coqint n2) in if z1 < z2 then true else false +let trans_ldi (ldi : load_rd_a) : load_rd1_rd2_a = + match ldi with + | Pldrw | Pldrw_a -> Pldpw + | Pldrx | Pldrx_a -> Pldpx + | _ -> failwith "trans_ldi: Found a non compatible load to translate" + +let trans_sti (sti : store_rs_a) : store_rs1_rs2_a = + match sti with + | Pstrw | Pstrw_a -> Pstpw + | Pstrx | Pstrx_a -> Pstpx + | _ -> failwith "trans_sti: Found a non compatible store to translate" + +let is_compat_load (ldi : load_rd_a) = + match ldi with Pldrw | Pldrw_a | Pldrx | Pldrx_a -> true | _ -> false + +let are_compat_load (ldi1 : load_rd_a) (ldi2 : load_rd_a) = + match ldi1 with + | Pldrw | Pldrw_a -> ( match ldi2 with Pldrw | Pldrw_a -> true | _ -> false) + | Pldrx | Pldrx_a -> ( match ldi2 with Pldrx | Pldrx_a -> true | _ -> false) + | _ -> false + +let is_compat_store (sti : store_rs_a) = + match sti with Pstrw | Pstrw_a | Pstrx | Pstrx_a -> true | _ -> false + +let are_compat_store (sti1 : store_rs_a) (sti2 : store_rs_a) = + match sti1 with + | Pstrw | Pstrw_a -> ( match sti2 with Pstrw | Pstrw_a -> true | _ -> false) + | Pstrx | Pstrx_a -> ( match sti2 with Pstrx | Pstrx_a -> true | _ -> false) + | _ -> false + +let get_load_string (ldi : load_rd_a) = + match ldi with + | Pldrw | Pldrw_a -> "ldrw" + | Pldrx | Pldrx_a -> "ldrx" + | _ -> failwith "get_load_string: Found a non compatible load to translate" + +let get_store_string (sti : store_rs_a) = + match sti with + | Pstrw | Pstrw_a -> "strw" + | Pstrx | Pstrx_a -> "strx" + | _ -> failwith "get_store_string: Found a non compatible store to translate" + +let is_valid_ldr rd1 rd2 b1 b2 n1 n2 stype = + match stype with + | "ldrw" -> is_valid_ldrw rd1 rd2 b1 b2 n1 n2 + | _ -> is_valid_ldrx rd1 rd2 b1 b2 n1 n2 + +let is_valid_str b1 b2 n1 n2 stype = + match stype with + | "strw" -> is_valid_strw b1 b2 n1 n2 + | _ -> is_valid_strx b1 b2 n1 n2 + (* Main peephole function in backward style *) let pair_rep_inv insta = (* Each list below is a symbolic mem representation @@ -305,64 +362,66 @@ let pair_rep_inv insta = let h0 = insta.(i) in let h1 = insta.(i - 1) in (* Here we need to update every symbolic memory according to the matched inst *) - update_pot_rep_basic h0 insta pot_ldrw_rep "ldrw" true; - update_pot_rep_basic h0 insta pot_ldrx_rep "ldrx" true; - update_pot_rep_basic h0 insta pot_strw_rep "strw" true; - update_pot_rep_basic h0 insta pot_strx_rep "strx" true; + update_pot_rep_basic h0 insta pot_ldrw_rep "ldr" true; + update_pot_rep_basic h0 insta pot_ldrx_rep "ldr" true; + update_pot_rep_basic h0 insta pot_strw_rep "str" true; + update_pot_rep_basic h0 insta pot_strx_rep "str" true; match (h0, h1) with - (* Non-consecutive ldrw *) - | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), _ -> ( - (* Search a previous compatible load *) - match search_compat_rep_inv rd1 b1 n1 insta !pot_ldrw_rep "ldrw" with - (* If we can't find a candidate, add the current load as a potential future one *) - | None -> pot_ldrw_rep := i :: !pot_ldrw_rep - (* Else, perform the peephole *) - | Some (rep, r, b, n) -> - (* The two lines below are used to filter the elected candidate *) - let filt x = x != rep in - pot_ldrw_rep := List.filter filt !pot_ldrw_rep; - insta.(rep) <- Pnop; - if min_is_rev n n1 then ( - if debug then eprintf "LDP_W_BACK_SPACED_PEEP_IMM_INC\n"; - insta.(i) <- PLoad (Pldp (Pldpw, r, rd1, ADimm (b, n)))) - else ( - if debug then eprintf "LDP_W_BACK_SPACED_PEEP_IMM_DEC\n"; - insta.(i) <- PLoad (Pldp (Pldpw, rd1, r, ADimm (b, n1))))) - (* Non-consecutive ldrx *) - | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), _ -> ( - match search_compat_rep_inv rd1 b1 n1 insta !pot_ldrx_rep "ldrx" with - | None -> pot_ldrx_rep := i :: !pot_ldrx_rep - | Some (rep, r, b, n) -> - let filt x = x != rep in - pot_ldrx_rep := List.filter filt !pot_ldrx_rep; - insta.(rep) <- Pnop; - if min_is_rev n n1 then ( - if debug then eprintf "LDP_X_BACK_SPACED_PEEP_IMM_INC\n"; - insta.(i) <- PLoad (Pldp (Pldpx, r, rd1, ADimm (b, n)))) - else ( - if debug then eprintf "LDP_X_BACK_SPACED_PEEP_IMM_DEC\n"; - insta.(i) <- PLoad (Pldp (Pldpx, rd1, r, ADimm (b, n1))))) - (* Non-consecutive stpw *) - | PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), _ -> ( - match search_compat_rep_inv rs1 b1 n1 insta !pot_strw_rep "strw" with - | None -> pot_strw_rep := i :: !pot_strw_rep - | Some (rep, r, b, n) -> - let filt x = x != rep in - pot_strw_rep := List.filter filt !pot_strw_rep; - insta.(rep) <- Pnop; - if debug then eprintf "STP_W_BACK_SPACED_PEEP_IMM_INC\n"; - insta.(i) <- PStore (Pstp (Pstpw, r, rs1, ADimm (b, n)))) - (* Non-consecutive stpx *) - | PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), _ -> ( - match search_compat_rep_inv rs1 b1 n1 insta !pot_strx_rep "strx" with - | None -> pot_strx_rep := i :: !pot_strx_rep - | Some (rep, r, b, n) -> - let filt x = x != rep in - pot_strx_rep := List.filter filt !pot_strx_rep; - insta.(rep) <- Pnop; - if debug then eprintf "STP_X_BACK_SPACED_PEEP_IMM_INC\n"; - insta.(i) <- PStore (Pstp (Pstpx, r, rs1, ADimm (b, n)))) - (* Any other inst *) + (* Non-consecutive ldr *) + | PLoad (PLd_rd_a (ldi, IR (RR1 rd1), ADimm (b1, n1))), _ -> ( + if is_compat_load ldi then + let pot_rep = + match ldi with Pldrw | Pldrw_a -> pot_ldrw_rep | _ -> pot_ldrx_rep + in + (* Search a previous compatible load *) + match + search_compat_rep_inv rd1 b1 n1 insta !pot_rep (get_load_string ldi) + with + (* If we can't find a candidate, add the current load as a potential future one *) + | None -> pot_rep := i :: !pot_rep + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if min_is_rev n n1 then ( + if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_INC\n"; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n)))) + else ( + if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC\n"; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1)))) + ) + (* Non-consecutive str *) + | PStore (PSt_rs_a (sti, IR (RR1 rd1), ADimm (b1, n1))), _ -> ( + if is_compat_store sti then + let pot_rep = + match sti with Pstrw | Pstrw_a -> pot_strw_rep | _ -> pot_strx_rep + in + (* Search a previous compatible store *) + match + search_compat_rep_inv rd1 b1 n1 insta !pot_rep + (get_store_string sti) + with + (* If we can't find a candidate, add the current store as a potential future one *) + | None -> pot_rep := i :: !pot_rep + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if debug then eprintf "STP_BACK_SPACED_PEEP_IMM_INC\n"; + insta.(i) <- + PStore + (Pstp (trans_sti sti, r, rd1, c, chunk_store sti, ADimm (b, n))) + (* Any other inst *)) | _, _ -> () done @@ -379,115 +438,109 @@ let pair_rep insta = let h0 = insta.(i) in let h1 = insta.(i + 1) in (* Here we need to update every symbolic memory according to the matched inst *) - update_pot_rep_basic h0 insta pot_ldrw_rep "ldrw" false; - update_pot_rep_basic h0 insta pot_ldrx_rep "ldrx" false; - update_pot_rep_basic h0 insta pot_strw_rep "strw" false; - update_pot_rep_basic h0 insta pot_strx_rep "strx" false; + update_pot_rep_basic h0 insta pot_ldrw_rep "ldr" false; + update_pot_rep_basic h0 insta pot_ldrx_rep "ldr" false; + update_pot_rep_basic h0 insta pot_strw_rep "str" false; + update_pot_rep_basic h0 insta pot_strx_rep "str" false; match (h0, h1) with - (* Consecutive ldrw *) - | ( PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), - PLoad (PLd_rd_a (Pldrw, IR (RR1 rd2), ADimm (b2, n2))) ) -> - (* When both load the same dest, and with no reuse of ld1 in ld2. *) - if ireg_eq rd1 rd2 && not (iregsp_eq b2 (RR1 rd1)) then ( - if debug then eprintf "LDP_WOPT\n"; - insta.(i) <- Pnop - (* When two consec mem addr are loading two different dest. *)) - else if is_valid_ldrw rd1 rd2 b1 b2 n1 n2 then ( - if min_is_rev n1 n2 then ( - if debug then eprintf "LDP_W_CONSEC_PEEP_IMM_INC\n"; - insta.(i) <- PLoad (Pldp (Pldpw, rd1, rd2, ADimm (b1, n1)))) - else ( - if debug then eprintf "LDP_W_CONSEC_PEEP_IMM_DEC\n"; - insta.(i) <- PLoad (Pldp (Pldpw, rd2, rd1, ADimm (b1, n2)))); - insta.(i + 1) <- Pnop) - (* Consecutive ldrx *) - | ( PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), - PLoad (PLd_rd_a (Pldrx, IR (RR1 rd2), ADimm (b2, n2))) ) -> - if ireg_eq rd1 rd2 && not (iregsp_eq b2 (RR1 rd1)) then ( - if debug then eprintf "LDP_XOPT\n"; - insta.(i) <- Pnop) - else if is_valid_ldrx rd1 rd2 b1 b2 n1 n2 then ( - if min_is_rev n1 n2 then ( - if debug then eprintf "LDP_X_CONSEC_PEEP_IMM_INC\n"; - insta.(i) <- PLoad (Pldp (Pldpx, rd1, rd2, ADimm (b1, n1)))) - else ( - if debug then eprintf "LDP_X_CONSEC_PEEP_IMM_DEC\n"; - insta.(i) <- PLoad (Pldp (Pldpx, rd2, rd1, ADimm (b1, n2)))); - insta.(i + 1) <- Pnop) - (* Non-consecutive ldrw *) - | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), _ -> ( - (* Search a previous compatible load *) - match search_compat_rep rd1 b1 n1 insta !pot_ldrw_rep "ldrw" with - (* If we can't find a candidate, add the current load as a potential future one *) - | None -> pot_ldrw_rep := i :: !pot_ldrw_rep - (* Else, perform the peephole *) - | Some (rep, r, b, n) -> - (* The two lines below are used to filter the elected candidate *) - let filt x = x != rep in - pot_ldrw_rep := List.filter filt !pot_ldrw_rep; - insta.(rep) <- Pnop; - if min_is_rev n n1 then ( - if debug then eprintf "LDP_W_FORW_SPACED_PEEP_IMM_INC\n"; - insta.(i) <- PLoad (Pldp (Pldpw, r, rd1, ADimm (b, n)))) - else ( - if debug then eprintf "LDP_W_FORW_SPACED_PEEP_IMM_DEC\n"; - insta.(i) <- PLoad (Pldp (Pldpw, rd1, r, ADimm (b, n1))))) - (* Non-consecutive ldrx *) - | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), _ -> ( - match search_compat_rep rd1 b1 n1 insta !pot_ldrx_rep "ldrx" with - | None -> pot_ldrx_rep := i :: !pot_ldrx_rep - | Some (rep, r, b, n) -> - let filt x = x != rep in - pot_ldrx_rep := List.filter filt !pot_ldrx_rep; - insta.(rep) <- Pnop; - if min_is_rev n n1 then ( - if debug then eprintf "LDP_X_FORW_SPACED_PEEP_IMM_INC\n"; - insta.(i) <- PLoad (Pldp (Pldpx, r, rd1, ADimm (b, n)))) + (* Consecutive ldr *) + | ( PLoad (PLd_rd_a (ldi1, IR (RR1 rd1), ADimm (b1, n1))), + PLoad (PLd_rd_a (ldi2, IR (RR1 rd2), ADimm (b2, n2))) ) -> + if are_compat_load ldi1 ldi2 then + if is_valid_ldr rd1 rd2 b1 b2 n1 n2 (get_load_string ldi1) then ( + if min_is_rev n1 n2 then ( + if debug then eprintf "LDP_CONSEC_PEEP_IMM_INC\n"; + insta.(i) <- + PLoad + (Pldp + ( trans_ldi ldi1, + rd1, + rd2, + chunk_load ldi1, + chunk_load ldi2, + ADimm (b1, n1) ))) else ( - if debug then eprintf "LDP_X_FORW_SPACED_PEEP_IMM_DEC\n"; - insta.(i) <- PLoad (Pldp (Pldpx, rd1, r, ADimm (b, n1))))) - (* Consecutive strw *) - | ( PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), - PStore (PSt_rs_a (Pstrw, IR (RR1 rs2), ADimm (b2, n2))) ) -> - (* When both store at the same addr, same ofs. *) - (*if (iregsp_eq b1 b2) && (z2 =? z1) then - Pnop :: (pair_rep insta t0) - (* When two consec mem addr are targeted by two store. *) - else*) - if is_valid_strw b1 b2 n1 n2 then ( - if debug then eprintf "STP_W_CONSEC_PEEP_IMM_INC\n"; - insta.(i) <- PStore (Pstp (Pstpw, rs1, rs2, ADimm (b1, n1))); - insta.(i + 1) <- Pnop (* When nothing can be optimized. *)) - (* Consecutive strx *) - | ( PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), - PStore (PSt_rs_a (Pstrx, IR (RR1 rs2), ADimm (b2, n2))) ) -> - (*if (iregsp_eq b1 b2) && (z2 =? z1) then - Pnop :: (pair_rep insta t0) - else*) - if is_valid_strx b1 b2 n1 n2 then ( - if debug then eprintf "STP_X_CONSEC_PEEP_IMM_INC\n"; - insta.(i) <- PStore (Pstp (Pstpx, rs1, rs2, ADimm (b1, n1))); - insta.(i + 1) <- Pnop) - (* Non-consecutive stpw *) - | PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), _ -> ( - match search_compat_rep rs1 b1 n1 insta !pot_strw_rep "strw" with - | None -> pot_strw_rep := i :: !pot_strw_rep - | Some (rep, r, b, n) -> - let filt x = x != rep in - pot_strw_rep := List.filter filt !pot_strw_rep; - insta.(rep) <- Pnop; - if debug then eprintf "STP_W_FORW_SPACED_PEEP_IMM_INC\n"; - insta.(i) <- PStore (Pstp (Pstpw, r, rs1, ADimm (b, n)))) - (* Non-consecutive stpx *) - | PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), _ -> ( - match search_compat_rep rs1 b1 n1 insta !pot_strx_rep "strx" with - | None -> pot_strx_rep := i :: !pot_strx_rep - | Some (rep, r, b, n) -> - let filt x = x != rep in - pot_strx_rep := List.filter filt !pot_strx_rep; - insta.(rep) <- Pnop; - if debug then eprintf "STP_X_FORW_SPACED_PEEP_IMM_INC\n"; - insta.(i) <- PStore (Pstp (Pstpx, r, rs1, ADimm (b, n)))) + if debug then eprintf "LDP_CONSEC_PEEP_IMM_DEC\n"; + insta.(i) <- + PLoad + (Pldp + ( trans_ldi ldi1, + rd2, + rd1, + chunk_load ldi2, + chunk_load ldi1, + ADimm (b1, n2) ))); + insta.(i + 1) <- Pnop) + (* Non-consecutive ldr *) + | PLoad (PLd_rd_a (ldi, IR (RR1 rd1), ADimm (b1, n1))), _ -> ( + if is_compat_load ldi then + let pot_rep = + match ldi with Pldrw | Pldrw_a -> pot_ldrw_rep | _ -> pot_ldrx_rep + in + (* Search a previous compatible load *) + match + search_compat_rep rd1 b1 n1 insta !pot_rep (get_load_string ldi) + with + (* If we can't find a candidate, add the current load as a potential future one *) + | None -> pot_rep := i :: !pot_rep + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if min_is_rev n n1 then ( + if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_INC\n"; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n)))) + else ( + if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC\n"; + insta.(i) <- + PLoad + (Pldp + (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1)))) + ) + (* Consecutive str *) + | ( PStore (PSt_rs_a (sti1, IR (RR1 rd1), ADimm (b1, n1))), + PStore (PSt_rs_a (sti2, IR (RR1 rd2), ADimm (b2, n2))) ) -> + if are_compat_store sti1 sti2 then + if is_valid_str b1 b2 n1 n2 (get_store_string sti1) then ( + if debug then eprintf "STP_CONSEC_PEEP_IMM_INC\n"; + insta.(i) <- + PStore + (Pstp + ( trans_sti sti1, + rd1, + rd2, + chunk_store sti1, + chunk_store sti2, + ADimm (b1, n1) )); + insta.(i + 1) <- Pnop) + (* Non-consecutive str *) + | PStore (PSt_rs_a (sti, IR (RR1 rd1), ADimm (b1, n1))), _ -> ( + if is_compat_store sti then + let pot_rep = + match sti with Pstrw | Pstrw_a -> pot_strw_rep | _ -> pot_strx_rep + in + (* Search a previous compatible store *) + match + search_compat_rep rd1 b1 n1 insta !pot_rep (get_store_string sti) + with + (* If we can't find a candidate, add the current store as a potential future one *) + | None -> pot_rep := i :: !pot_rep + (* Else, perform the peephole *) + | Some (rep, c, r, b, n) -> + (* The two lines below are used to filter the elected candidate *) + let filt x = x != rep in + pot_rep := List.filter filt !pot_rep; + insta.(rep) <- Pnop; + if debug then eprintf "STP_FORW_SPACED_PEEP_IMM_INC\n"; + insta.(i) <- + PStore + (Pstp (trans_sti sti, r, rd1, c, chunk_store sti, ADimm (b, n))) + ) (* Any other inst *) | _, _ -> () done diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml index e233f77d..d7fab1de 100644 --- a/aarch64/PostpassSchedulingOracle.ml +++ b/aarch64/PostpassSchedulingOracle.ml @@ -18,6 +18,7 @@ open OpWeightsAsm open InstructionScheduler let debug = false + let stats = false (** @@ -73,47 +74,22 @@ let arith_p_rec i i' rd = } let arith_pp_rec i rd rs = - { - inst = i; - write_locs = [ rd ]; - read_locs = [ rs ]; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = [ rs ]; is_control = false } let arith_ppp_rec i rd r1 r2 = - { - inst = i; - write_locs = [ rd ]; - read_locs = [ r1; r2 ]; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false } let arith_rr0r_rec i rd r1 r2 = let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in - { - inst = i; - write_locs = [ rd ]; - read_locs = rlocs; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } let arith_rr0_rec i rd r1 = let rlocs = if is_XZR r1 then [] else [ r1 ] in - { - inst = i; - write_locs = [ rd ]; - read_locs = rlocs; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } let arith_arrrr0_rec i rd r1 r2 r3 = let rlocs = if is_XZR r3 then [ r1; r2 ] else [ r1; r2; r3 ] in - { - inst = i; - write_locs = [ rd ]; - read_locs = rlocs; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } let arith_comparison_pp_rec i r1 r2 = { @@ -125,20 +101,10 @@ let arith_comparison_pp_rec i r1 r2 = let arith_comparison_r0r_rec i r1 r2 = let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in - { - inst = i; - write_locs = flags_wlocs; - read_locs = rlocs; - is_control = false; - } + { inst = i; write_locs = flags_wlocs; read_locs = rlocs; is_control = false } let arith_comparison_p_rec i r1 = - { - inst = i; - write_locs = flags_wlocs; - read_locs = [ r1 ]; - is_control = false; - } + { inst = i; write_locs = flags_wlocs; read_locs = [ r1 ]; is_control = false } let get_eval_addressing_rlocs a = match a with @@ -168,8 +134,11 @@ let load_rd1_rd2_a_rec ld rd1 rd2 a = let load_rec ldi = match ldi with - | PLd_rd_a (ld, rd, a) -> load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a - | Pldp (ld, rd1, rd2, a) -> load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_ireg rd1) (reg_of_ireg rd2) a + | PLd_rd_a (ld, rd, a) -> + load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a + | Pldp (ld, rd1, rd2, _, _, a) -> + load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_ireg rd1) + (reg_of_ireg rd2) a let store_rs_a_rec st rs a = { @@ -189,40 +158,23 @@ let store_rs1_rs2_a_rec st rs1 rs2 a = let store_rec sti = match sti with - | PSt_rs_a (st, rs, a) -> store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a - | Pstp (st, rs1, rs2, a) -> store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_ireg rs1) (reg_of_ireg rs2) a + | PSt_rs_a (st, rs, a) -> + store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a + | Pstp (st, rs1, rs2, _, _, a) -> + store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_ireg rs1) + (reg_of_ireg rs2) a let loadsymbol_rec i rd id = - { - inst = i; - write_locs = [ rd ]; - read_locs = [ Mem ]; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = [ Mem ]; is_control = false } let cvtsw2x_rec i rd r1 = - { - inst = i; - write_locs = [ rd ]; - read_locs = [ r1 ]; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false } let cvtuw2x_rec i rd r1 = - { - inst = i; - write_locs = [ rd ]; - read_locs = [ r1 ]; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false } let cvtx2w_rec i rd = - { - inst = i; - write_locs = [ rd ]; - read_locs = [ rd ]; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = [ rd ]; is_control = false } let get_testcond_rlocs c = match c with @@ -257,20 +209,10 @@ let csel_rec i rd r1 r2 c = let fmovi_rec i fsz rd r1 = let rlocs = if is_XZR r1 then [] else [ r1 ] in - { - inst = i; - write_locs = [ rd ]; - read_locs = rlocs; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false } let fnmul_rec i fsz rd r1 r2 = - { - inst = i; - write_locs = [ rd ]; - read_locs = [ r1; r2 ]; - is_control = false; - } + { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false } let allocframe_rec i sz linkofs = { @@ -289,37 +231,41 @@ let freeframe_rec i sz linkofs = } let nop_rec i = - { - inst = i; - write_locs = [ ]; - read_locs = [ ]; - is_control = false; - } + { inst = i; write_locs = []; read_locs = []; is_control = false } let arith_rec i = match i with | PArithP (i', rd) -> arith_p_rec (PBasic (PArith i)) i' (reg_of_dreg rd) - | PArithPP (i', rd, rs) -> arith_pp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg rs) + | PArithPP (i', rd, rs) -> + arith_pp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg rs) | PArithPPP (i', rd, r1, r2) -> - arith_ppp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) (reg_of_dreg r2) + arith_ppp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) + (reg_of_dreg r2) | PArithRR0R (i', rd, r1, r2) -> - arith_rr0r_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1) (reg_of_ireg r2) + arith_rr0r_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1) + (reg_of_ireg r2) | PArithRR0 (i', rd, r1) -> arith_rr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1) | PArithARRRR0 (i', rd, r1, r2, r3) -> - arith_arrrr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg r1) (reg_of_ireg r2) - (reg_of_ireg0 r3) + arith_arrrr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg r1) + (reg_of_ireg r2) (reg_of_ireg0 r3) | PArithComparisonPP (i', r1, r2) -> - arith_comparison_pp_rec (PBasic (PArith i)) (reg_of_dreg r1) (reg_of_dreg r2) + arith_comparison_pp_rec (PBasic (PArith i)) (reg_of_dreg r1) + (reg_of_dreg r2) | PArithComparisonR0R (i', r1, r2) -> - arith_comparison_r0r_rec (PBasic (PArith i)) (reg_of_ireg0 r1) (reg_of_ireg r2) - | PArithComparisonP (i', r1) -> arith_comparison_p_rec (PBasic (PArith i)) (reg_of_dreg r1) + arith_comparison_r0r_rec (PBasic (PArith i)) (reg_of_ireg0 r1) + (reg_of_ireg r2) + | PArithComparisonP (i', r1) -> + arith_comparison_p_rec (PBasic (PArith i)) (reg_of_dreg r1) | Pcset (rd, c) -> cset_rec (PBasic (PArith i)) (reg_of_ireg rd) c - | Pfmovi (fsz, rd, r1) -> fmovi_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_ireg0 r1) + | Pfmovi (fsz, rd, r1) -> + fmovi_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_ireg0 r1) | Pcsel (rd, r1, r2, c) -> - csel_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) (reg_of_dreg r2) c + csel_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) + (reg_of_dreg r2) c | Pfnmul (fsz, rd, r1, r2) -> - fnmul_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_freg r1) (reg_of_freg r2) + fnmul_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_freg r1) + (reg_of_freg r2) let basic_rec i = match i with @@ -329,101 +275,98 @@ let basic_rec i = | Pallocframe (sz, linkofs) -> allocframe_rec (PBasic i) sz linkofs | Pfreeframe (sz, linkofs) -> freeframe_rec (PBasic i) sz linkofs | Ploadsymbol (rd, id) -> loadsymbol_rec (PBasic i) (reg_of_ireg rd) id - | Pcvtsw2x (rd, r1) -> cvtsw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) - | Pcvtuw2x (rd, r1) -> cvtuw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) + | Pcvtsw2x (rd, r1) -> + cvtsw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) + | Pcvtuw2x (rd, r1) -> + cvtuw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1) | Pcvtx2w rd -> cvtx2w_rec (PBasic i) (reg_of_ireg rd) | Pnop -> nop_rec (PBasic i) let builtin_rec i ef args res = - { - inst = i; - write_locs = [ Mem ]; - read_locs = [ Mem ]; - is_control = true; - } + { inst = i; write_locs = [ Mem ]; read_locs = [ Mem ]; is_control = true } let ctl_flow_rec i = match i with | Pb lbl -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_pc ]; is_control = true; } | Pbc (c, lbl) -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_pc ]; is_control = true; } | Pbl (id, sg) -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ]; read_locs = [ reg_of_pc ]; is_control = true; } | Pbs (id, sg) -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = []; is_control = true; } | Pblr (r, sg) -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ]; read_locs = [ reg_of_ireg r ]; is_control = true; } | Pbr (r, sg) -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r ]; is_control = true; } | Pret r -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r ]; is_control = true; } | Pcbnz (sz, r, lbl) -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r; reg_of_pc ]; is_control = true; } | Pcbz (sz, r, lbl) -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r; reg_of_pc ]; is_control = true; } | Ptbnz (sz, r, n, lbl) -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r; reg_of_pc ]; is_control = true; } | Ptbz (sz, r, n, lbl) -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_pc ]; read_locs = [ reg_of_ireg r; reg_of_pc ]; is_control = true; } | Pbtbl (r1, tbl) -> { - inst = (PControl (PCtlFlow i)); + inst = PControl (PCtlFlow i); write_locs = [ reg_of_ireg Asm.X16; reg_of_ireg Asm.X17; reg_of_pc ]; read_locs = [ reg_of_ireg r1; reg_of_pc ]; is_control = true; @@ -614,7 +557,7 @@ let find_max l = | e :: l -> ( match f l with | None -> Some e - | Some m -> if e > m then Some e else Some m ) + | Some m -> if e > m then Some e else Some m) in match f l with None -> raise Not_found | Some m -> m @@ -625,9 +568,9 @@ let minpack_list (l : int list) = let rec f i = function | [] -> () | t :: l -> - ( match TimeHash.find_opt timehash t with + (match TimeHash.find_opt timehash t with | None -> TimeHash.add timehash t [ i ] - | Some bund -> TimeHash.replace timehash t (bund @ [ i ]) ); + | Some bund -> TimeHash.replace timehash t (bund @ [ i ])); f (i + 1) l in f 0 l; @@ -684,9 +627,11 @@ let bblock_schedule bb = let identity_mode = not !Clflags.option_fpostpass in if debug && not identity_mode then ( Printf.eprintf "###############################\n"; - Printf.eprintf "SCHEDULING\n" ); + Printf.eprintf "SCHEDULING\n"); if stats then ( - let oc = open_out_gen [Open_append; Open_creat] 0o666 "oracle_stats.csv" in + let oc = + open_out_gen [ Open_append; Open_creat ] 0o666 "oracle_stats.csv" + in Printf.fprintf oc "%d\n" (Camlcoq.Z.to_int (size bb)); close_out oc); if identity_mode then pack_result bb else smart_schedule bb diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml index c336fb1e..8c3daabe 100644 --- a/aarch64/TargetPrinter.ml +++ b/aarch64/TargetPrinter.ml @@ -279,9 +279,9 @@ module Target (*: TARGET*) = (* the upper 32 bits of Xrd are set to 0, performing zero-extension *) | Pldrsw(rd, a) -> fprintf oc " ldrsw %a, %a\n" xreg rd addressing a - | Pldpw(rd1, rd2, a) -> + | Pldpw(rd1, rd2, _, _, a) -> fprintf oc " ldp %a, %a, %a\n" wreg rd1 wreg rd2 addressing a - | Pldpx(rd1, rd2, a) -> + | Pldpx(rd1, rd2, _, _, a) -> fprintf oc " ldp %a, %a, %a\n" xreg rd1 xreg rd2 addressing a | Pstrw(rs, a) | Pstrw_a(rs, a) -> fprintf oc " str %a, %a\n" wreg rs addressing a @@ -291,9 +291,9 @@ module Target (*: TARGET*) = fprintf oc " strb %a, %a\n" wreg rs addressing a | Pstrh(rs, a) -> fprintf oc " strh %a, %a\n" wreg rs addressing a - | Pstpw(rs1, rs2, a) -> + | Pstpw(rs1, rs2, _, _, a) -> fprintf oc " stp %a, %a, %a\n" wreg rs1 wreg rs2 addressing a - | Pstpx(rs1, rs2, a) -> + | Pstpx(rs1, rs2, _, _, a) -> fprintf oc " stp %a, %a, %a\n" xreg rs1 xreg rs2 addressing a (* Integer arithmetic, immediate *) | Paddimm(sz, rd, r1, n) -> -- cgit