aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64
diff options
context:
space:
mode:
Diffstat (limited to 'aarch64')
-rw-r--r--aarch64/Archi.v6
-rw-r--r--aarch64/Asm.v50
-rw-r--r--aarch64/Asmblock.v107
-rw-r--r--aarch64/Asmblockdeps.v220
-rw-r--r--aarch64/Asmexpand.ml24
-rw-r--r--aarch64/Asmgen.v16
-rw-r--r--aarch64/Asmgenproof.v57
-rw-r--r--aarch64/CBuiltins.ml11
-rw-r--r--aarch64/Machregsaux.ml19
-rw-r--r--aarch64/PeepholeOracle.ml526
-rw-r--r--aarch64/PostpassSchedulingOracle.ml193
-rw-r--r--aarch64/TargetPrinter.ml14
-rw-r--r--aarch64/extractionMachdep.v3
13 files changed, 650 insertions, 596 deletions
diff --git a/aarch64/Archi.v b/aarch64/Archi.v
index 7d7b6887..7f39d1fa 100644
--- a/aarch64/Archi.v
+++ b/aarch64/Archi.v
@@ -6,15 +6,17 @@
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU General Public License as published by *)
+(* the Free Software Foundation, either version 2 of the License, or *)
+(* (at your option) any later version. This file is also distributed *)
(* under the terms of the INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)
(** Architecture-dependent parameters for AArch64 *)
+From Flocq Require Import Binary Bits.
Require Import ZArith List.
-(*From Flocq*)
-Require Import Binary Bits.
Definition ptr64 := true.
diff --git a/aarch64/Asm.v b/aarch64/Asm.v
index 719c3b61..30bac2a4 100644
--- a/aarch64/Asm.v
+++ b/aarch64/Asm.v
@@ -193,16 +193,16 @@ Inductive instruction: Type :=
| Pldrsh (sz: isize) (rd: ireg) (a: addressing) (**r load int16, sign-extend *)
| Pldrzw (rd: ireg) (a: addressing) (**r load int32, zero-extend to int64 *)
| Pldrsw (rd: ireg) (a: addressing) (**r load int32, sign-extend to int64 *)
- | Pldpw (rd1 rd2: ireg) (a: addressing) (**r load two int32 *)
- | Pldpx (rd1 rd2: ireg) (a: addressing) (**r load two int64 *)
+ | Pldpw (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int32 *)
+ | Pldpx (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *)
| Pstrw (rs: ireg) (a: addressing) (**r store int32 *)
| Pstrw_a (rs: ireg) (a: addressing) (**r store int32 as any32 *)
| Pstrx (rs: ireg) (a: addressing) (**r store int64 *)
| Pstrx_a (rs: ireg) (a: addressing) (**r store int64 as any64 *)
| Pstrb (rs: ireg) (a: addressing) (**r store int8 *)
| Pstrh (rs: ireg) (a: addressing) (**r store int16 *)
- | Pstpw (rs1 rs2: ireg) (a: addressing) (**r store two int64 *)
- | Pstpx (rs1 rs2: ireg) (a: addressing) (**r store two int64 *)
+ | Pstpw (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
+ | Pstpx (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
(** Integer arithmetic, immediate *)
| Paddimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r addition *)
| Psubimm (sz: isize) (rd: iregsp) (r1: iregsp) (n: Z) (**r subtraction *)
@@ -255,6 +255,7 @@ Inductive instruction: Type :=
| Pclz (sz: isize) (rd r1: ireg) (**r count leading zero bits *)
| Prev (sz: isize) (rd r1: ireg) (**r reverse bytes *)
| Prev16 (sz: isize) (rd r1: ireg) (**r reverse bytes in each 16-bit word *)
+ | Prbit (sz: isize) (rd r1: ireg) (**r reverse bits *)
(** Conditional data processing *)
| Pcsel (rd: ireg) (r1 r2: ireg) (c: testcond) (**r int conditional move *)
| Pcset (rd: ireg) (c: testcond) (**r set to 1/0 if cond is true/false *)
@@ -300,6 +301,8 @@ Inductive instruction: Type :=
| Pfmsub (sz: fsize) (rd r1 r2 r3: freg) (**r [rd = r3 - r1 * r2] *)
| Pfnmadd (sz: fsize) (rd r1 r2 r3: freg) (**r [rd = - r3 - r1 * r2] *)
| Pfnmsub (sz: fsize) (rd r1 r2 r3: freg) (**r [rd = - r3 + r1 * r2] *)
+ | Pfmax (sz: fsize) (rd r1 r2: freg) (**r maximum *)
+ | Pfmin (sz: fsize) (rd r1 r2: freg) (**r minimum *)
(** Floating-point comparison *)
| Pfcmp (sz: fsize) (r1 r2: freg) (**r compare [r1] and [r2] *)
| Pfcmp0 (sz: fsize) (r1: freg) (**r compare [r1] and [+0.0] *)
@@ -845,16 +848,16 @@ Definition exec_load (chunk: memory_chunk) (transf: val -> val)
| Some v => Next (nextinstr (rs#r <- (transf v))) m
end.
-Definition exec_load_double (chunk: memory_chunk) (transf: val -> val)
+Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val)
(a: addressing) (rd1 rd2: preg) (rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chunk with | Mint32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
- match Mem.loadv chunk m addr with
+ match Mem.loadv chk1 m addr with
| None => Stuck
| Some v1 =>
- match Mem.loadv chunk m addr' with
+ match Mem.loadv chk2 m addr' with
| None => Stuck
| Some v2 =>
Next (nextinstr ((rs#rd1 <- (transf v1))#rd2 <- (transf v2))) m
@@ -870,16 +873,16 @@ Definition exec_store (chunk: memory_chunk)
| Some m' => Next (nextinstr rs) m'
end.
-Definition exec_store_double (chunk: memory_chunk)
+Definition exec_store_double (chk1 chk2: memory_chunk)
(a: addressing) (v1 v2: val)
(rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chunk with | Mint32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
- match Mem.storev chunk m addr v1 with
+ match Mem.storev chk1 m addr v1 with
| None => Stuck
- | Some m' => match Mem.storev chunk m' addr' v2 with
+ | Some m' => match Mem.storev chk2 m' addr' v2 with
| None => Stuck
| Some m'' => Next (nextinstr rs) m''
end
@@ -1304,31 +1307,34 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Vint n =>
match list_nth_z tbl (Int.unsigned n) with
| None => Stuck
- | Some lbl => goto_label f lbl (rs#X16 <- Vundef #X17 <- Vundef) m
+ | Some lbl => goto_label f lbl (rs#X16 <- Vundef) m
end
| _ => Stuck
end
| Pbuiltin ef args res => Stuck (**r treated specially below *)
(** The following instructions and directives are not generated directly
by Asmgen, so we do not model them. *)
- | Pldpw rd1 rd2 a =>
- exec_load_double Mint32 (fun v => v) a rd1 rd2 rs m
- | Pldpx rd1 rd2 a =>
- exec_load_double Mint64 (fun v => v) a rd1 rd2 rs m
+ | Pldpw rd1 rd2 chk1 chk2 a =>
+ exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
+ | Pldpx rd1 rd2 chk1 chk2 a =>
+ exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
| Pnop => Next (nextinstr rs) m
- | Pstpw rs1 rs2 a =>
- exec_store_double Mint32 a rs#rs1 rs#rs2 rs m
- | Pstpx rs1 rs2 a =>
- exec_store_double Mint64 a rs#rs1 rs#rs2 rs m
+ | Pstpw rs1 rs2 chk1 chk2 a =>
+ exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
+ | Pstpx rs1 rs2 chk1 chk2 a =>
+ exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
| Pcls _ _ _
| Pclz _ _ _
| Prev _ _ _
| Prev16 _ _ _
+ | Prbit _ _ _
| Pfsqrt _ _ _
| Pfmadd _ _ _ _ _
| Pfmsub _ _ _ _ _
| Pfnmadd _ _ _ _ _
| Pfnmsub _ _ _ _ _
+ | Pfmax _ _ _ _
+ | Pfmin _ _ _ _
| Pcfi_adjust _
| Pcfi_rel_offset _ =>
Stuck
@@ -1351,7 +1357,7 @@ Inductive step: state -> trace -> state -> Prop :=
external_call ef ge vargs m t vres m' ->
rs' = nextinstr
(set_res res vres
- (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+ (undef_regs (DR X16 :: DR X30 :: map preg_of (destroyed_by_builtin ef)) rs)) ->
step (State rs m) t (State rs' m')
| exec_step_external:
forall b ef args res rs m t rs' m',
diff --git a/aarch64/Asmblock.v b/aarch64/Asmblock.v
index c163ea10..ed84b7d8 100644
--- a/aarch64/Asmblock.v
+++ b/aarch64/Asmblock.v
@@ -114,7 +114,7 @@ Inductive load_rd1_rd2_a: Type :=
Inductive ld_instruction: Type :=
| PLd_rd_a (ld: load_rd_a) (rd: dreg) (a: addressing)
- | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: ireg) (a: addressing) (**r load two int64 *)
+ | Pldp (ld: load_rd1_rd2_a) (rd1 rd2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r load two int64 *)
.
Inductive store_rs_a : Type :=
@@ -138,7 +138,7 @@ Inductive store_rs1_rs2_a : Type :=
Inductive st_instruction : Type :=
| PSt_rs_a (st: store_rs_a) (rs: dreg) (a: addressing)
- | Pstp (st: store_rs1_rs2_a) (rs1 rs2: ireg) (a: addressing) (**r store two int64 *)
+ | Pstp (st: store_rs1_rs2_a) (rs1 rs2: ireg) (chk1 chk2: memory_chunk) (a: addressing) (**r store two int64 *)
.
Inductive arith_p : Type :=
@@ -477,16 +477,16 @@ Definition exec_load_rd_a (chunk: memory_chunk) (transf: val -> val)
SOME v <- Mem.loadv chunk m (eval_addressing a rs) IN
Next (rs#r <- (transf v)) m.
-Definition exec_load_double (chunk: memory_chunk) (transf: val -> val)
+Definition exec_load_double (chk1 chk2: memory_chunk) (transf: val -> val)
(a: addressing) (rd1 rd2: dreg) (rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chunk with | Mint32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
- match Mem.loadv chunk m addr with
+ match Mem.loadv chk1 m addr with
| None => Stuck
| Some v1 =>
- match Mem.loadv chunk m addr' with
+ match Mem.loadv chk2 m addr' with
| None => Stuck
| Some v2 =>
Next ((rs#rd1 <- (transf v1))#rd2 <- (transf v2)) m
@@ -500,16 +500,16 @@ Definition exec_store_rs_a (chunk: memory_chunk)
SOME m' <- Mem.storev chunk m (eval_addressing a rs) v IN
Next rs m'.
-Definition exec_store_double (chunk: memory_chunk)
+Definition exec_store_double (chk1 chk2: memory_chunk)
(a: addressing) (v1 v2: val)
(rs: regset) (m: mem) :=
if is_pair_addressing_mode_correct a then
let addr := (eval_addressing a rs) in
- let ofs := match chunk with | Mint32 => 4 | _ => 8 end in
+ let ofs := match chk1 with | Mint32 | Many32 => 4 | _ => 8 end in
let addr' := (eval_addressing (get_offset_addr a ofs) rs) in
- match Mem.storev chunk m addr v1 with
+ match Mem.storev chk1 m addr v1 with
| None => Stuck
- | Some m' => match Mem.storev chunk m' addr' v2 with
+ | Some m' => match Mem.storev chk2 m' addr' v2 with
| None => Stuck
| Some m'' => Next rs m''
end
@@ -519,67 +519,62 @@ Definition exec_store_double (chunk: memory_chunk)
(** execution of loads
*)
-Definition chunk_load (ld: ld_instruction): memory_chunk :=
+Definition chunk_load (ld: load_rd_a): memory_chunk :=
match ld with
- | PLd_rd_a Pldrw _ _ => Mint32
- | PLd_rd_a Pldrw_a _ _ => Many32
- | PLd_rd_a Pldrx _ _ => Mint64
- | PLd_rd_a Pldrx_a _ _ => Many64
- | PLd_rd_a (Pldrb _) _ _ => Mint8unsigned
- | PLd_rd_a (Pldrsb _) _ _ => Mint8signed
- | PLd_rd_a (Pldrh _) _ _ => Mint16unsigned
- | PLd_rd_a (Pldrsh _) _ _ => Mint16signed
- | PLd_rd_a Pldrzw _ _ => Mint32
- | PLd_rd_a Pldrsw _ _ => Mint32
- | PLd_rd_a Pldrs _ _ => Mfloat32
- | PLd_rd_a Pldrd _ _ => Mfloat64
- | PLd_rd_a Pldrd_a _ _ => Many64
- | Pldp Pldpw _ _ _ => Mint32
- | Pldp Pldpx _ _ _ => Mint64
+ | Pldrw => Mint32
+ | Pldrw_a => Many32
+ | Pldrx => Mint64
+ | Pldrx_a => Many64
+ | Pldrb _ => Mint8unsigned
+ | Pldrsb _ => Mint8signed
+ | Pldrh _ => Mint16unsigned
+ | Pldrsh _ => Mint16signed
+ | Pldrzw => Mint32
+ | Pldrsw => Mint32
+ | Pldrs => Mfloat32
+ | Pldrd => Mfloat64
+ | Pldrd_a => Many64
end.
-Definition chunk_store (st: st_instruction) : memory_chunk :=
+Definition chunk_store (st: store_rs_a) : memory_chunk :=
match st with
- | PSt_rs_a Pstrw _ _ => Mint32
- | PSt_rs_a Pstrw_a _ _ => Many32
- | PSt_rs_a Pstrx _ _ => Mint64
- | PSt_rs_a Pstrx_a _ _ => Many64
- | PSt_rs_a Pstrb _ _ => Mint8unsigned
- | PSt_rs_a Pstrh _ _ => Mint16unsigned
- | PSt_rs_a Pstrs _ _ => Mfloat32
- | PSt_rs_a Pstrd _ _ => Mfloat64
- | PSt_rs_a Pstrd_a _ _ => Many64
- | Pstp Pstpw _ _ _ => Mint32
- | Pstp Pstpx _ _ _ => Mint64
+ | Pstrw => Mint32
+ | Pstrw_a => Many32
+ | Pstrx => Mint64
+ | Pstrx_a => Many64
+ | Pstrb => Mint8unsigned
+ | Pstrh => Mint16unsigned
+ | Pstrs => Mfloat32
+ | Pstrd => Mfloat64
+ | Pstrd_a => Many64
end.
-Definition interp_load (ld: ld_instruction): val -> val :=
+Definition interp_load (ld: load_rd_a): val -> val :=
match ld with
- | PLd_rd_a (Pldrb X) _ _ => Val.longofintu
- | PLd_rd_a (Pldrsb X) _ _ => Val.longofint
- | PLd_rd_a (Pldrh X) _ _ => Val.longofintu
- | PLd_rd_a (Pldrsh X) _ _ => Val.longofint
- | PLd_rd_a Pldrzw _ _ => Val.longofintu
- | PLd_rd_a Pldrsw _ _ => Val.longofint
+ | Pldrb X => Val.longofintu
+ | Pldrsb X => Val.longofint
+ | Pldrh X => Val.longofintu
+ | Pldrsh X => Val.longofint
+ | Pldrzw => Val.longofintu
+ | Pldrsw => Val.longofint
(* Changed to exhaustive list because I tend to forgot all the places I need
* to check when changing things. *)
- | PLd_rd_a (Pldrb W) _ _ | PLd_rd_a (Pldrsb W) _ _ | PLd_rd_a (Pldrh W) _ _ | PLd_rd_a (Pldrsh W) _ _
- | PLd_rd_a Pldrw _ _ | PLd_rd_a Pldrw_a _ _ | PLd_rd_a Pldrx _ _
- | PLd_rd_a Pldrx_a _ _ | PLd_rd_a Pldrs _ _ | PLd_rd_a Pldrd _ _
- | PLd_rd_a Pldrd_a _ _
- | Pldp _ _ _ _ => fun v => v
+ | Pldrb W | Pldrsb W | Pldrh W | Pldrsh W
+ | Pldrw | Pldrw_a | Pldrx
+ | Pldrx_a | Pldrs | Pldrd
+ | Pldrd_a => fun v => v
end.
Definition exec_load (ldi: ld_instruction) (rs: regset) (m: mem) :=
match ldi with
- | PLd_rd_a ld rd a => exec_load_rd_a (chunk_load ldi) (interp_load ldi) a rd rs m
- | Pldp ld rd1 rd2 a => exec_load_double (chunk_load ldi) (interp_load ldi) a rd1 rd2 rs m
+ | PLd_rd_a ld rd a => exec_load_rd_a (chunk_load ld) (interp_load ld) a rd rs m
+ | Pldp ld rd1 rd2 chk1 chk2 a => exec_load_double chk1 chk2 (fun v => v) a rd1 rd2 rs m
end.
Definition exec_store (sti: st_instruction) (rs: regset) (m: mem) :=
match sti with
- | PSt_rs_a st rsr a => exec_store_rs_a (chunk_store sti) a rs#rsr rs m
- | Pstp st rs1 rs2 a => exec_store_double (chunk_store sti) a rs#rs1 rs#rs2 rs m
+ | PSt_rs_a st rsr a => exec_store_rs_a (chunk_store st) a rs#rsr rs m
+ | Pstp st rs1 rs2 chk1 chk2 a => exec_store_double chk1 chk2 a rs#rs1 rs#rs2 rs m
end.
(** TODO: redundant w.r.t Machblock ?? *)
@@ -663,7 +658,7 @@ Definition exec_cfi (f: function) (cfi: cf_instruction) (rs: regset) (m: mem) :
match (rs#X16 <- Vundef)#r with
| Vint n =>
SOME lbl <- list_nth_z tbl (Int.unsigned n) IN
- goto_label f lbl (rs#X16 <- Vundef #X17 <- Vundef) m
+ goto_label f lbl (rs#X16 <- Vundef) m
| _ => Stuck
end
end.
@@ -974,7 +969,7 @@ Inductive exec_exit (f: function) size_b (rs: regset) (m: mem): (option control)
external_call ef ge vargs m t vres m' ->
rs' = incrPC size_b
(set_res (map_builtin_res DR res) vres
- (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+ (undef_regs (DR (IR X16) :: DR (IR X30) :: map preg_of (destroyed_by_builtin ef)) rs)) ->
exec_exit f size_b rs m (Some (Pbuiltin ef args res)) t rs' m'
.
diff --git a/aarch64/Asmblockdeps.v b/aarch64/Asmblockdeps.v
index e2d788a5..670a7d06 100644
--- a/aarch64/Asmblockdeps.v
+++ b/aarch64/Asmblockdeps.v
@@ -39,8 +39,10 @@ Require Import Events.
Require Import Lia.
-Open Scope impure.
+Import ListNotations.
+Local Open Scope list_scope.
+Open Scope impure.
(** auxiliary treatments of builtins *)
Definition is_builtin(ex: option control): bool :=
@@ -166,15 +168,15 @@ Inductive arith_op :=
.
Inductive store_op :=
- | Ostore1 (st: st_instruction) (a: addressing)
- | Ostore2 (st: st_instruction) (a: addressing)
- | OstoreU (st: st_instruction) (a: addressing)
+ | Ostore1 (st: store_rs_a) (chunk: memory_chunk) (a: addressing)
+ | Ostore2 (st: store_rs_a) (chunk: memory_chunk) (a: addressing)
+ | OstoreU (st: store_rs_a) (chunk: memory_chunk) (a: addressing)
.
Inductive load_op :=
- | Oload1 (ld: ld_instruction) (a: addressing)
- | Oload2 (ld: ld_instruction) (a: addressing)
- | OloadU (ld: ld_instruction) (a: addressing)
+ | Oload1 (ld: load_rd_a) (chunk: memory_chunk) (a: addressing)
+ | Oload2 (ld: load_rd_a) (chunk: memory_chunk) (a: addressing)
+ | OloadU (ld: load_rd_a) (chunk: memory_chunk) (a: addressing)
.
Inductive allocf_op :=
@@ -424,16 +426,16 @@ Definition call_ll_storev (c: memory_chunk) (m: mem) (v: option val) (vs: val) :
| None => None (* should never occurs *)
end.
-Definition exec_store1 (n: st_instruction) (m: mem) (a: addressing) (vr vs: val) :=
+Definition exec_store1 (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr vs: val) :=
let v :=
match a with
| ADimm _ n => Some (Val.addl vs (Vlong n))
| ADadr _ id ofs => Some (Val.addl vs (symbol_low Ge.(_lk) id ofs))
| _ => None
end in
- call_ll_storev (chunk_store n) m v vr.
+ call_ll_storev chunk m v vr.
-Definition exec_store2 (n: st_instruction) (m: mem) (a: addressing) (vr vs1 vs2: val) :=
+Definition exec_store2 (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr vs1 vs2: val) :=
let v :=
match a with
| ADreg _ _ => Some (Val.addl vs1 vs2)
@@ -442,10 +444,10 @@ Definition exec_store2 (n: st_instruction) (m: mem) (a: addressing) (vr vs1 vs2:
| ADuxt _ _ n => Some (Val.addl vs1 (Val.shll (Val.longofintu vs2) (Vint n)))
| _ => None
end in
- call_ll_storev (chunk_store n) m v vr.
+ call_ll_storev chunk m v vr.
-Definition exec_storeU (n: st_instruction) (m: mem) (a: addressing) (vr: val) :=
- call_ll_storev (chunk_store n) m None vr.
+Definition exec_storeU (n: store_rs_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vr: val) :=
+ call_ll_storev chunk m None vr.
Definition goto_label_deps (f: function) (lbl: label) (vpc: val) :=
match label_pos lbl 0 (fn_blocks f) with
@@ -503,9 +505,9 @@ Definition control_eval (o: control_op) (l: list value) :=
Definition store_eval (o: store_op) (l: list value) :=
match o, l with
- | Ostore1 st a, [Val vr; Val vs; Memstate m] => exec_store1 st m a vr vs
- | Ostore2 st a, [Val vr; Val vs1; Val vs2; Memstate m] => exec_store2 st m a vr vs1 vs2
- | OstoreU st a, [Val vr; Memstate m] => exec_storeU st m a vr
+ | Ostore1 st chunk a, [Val vr; Val vs; Memstate m] => exec_store1 st m chunk a vr vs
+ | Ostore2 st chunk a, [Val vr; Val vs1; Val vs2; Memstate m] => exec_store2 st m chunk a vr vs1 vs2
+ | OstoreU st chunk a, [Val vr; Memstate m] => exec_storeU st m chunk a vr
| _, _ => None
end.
@@ -518,16 +520,16 @@ Definition call_ll_loadv (c: memory_chunk) (transf: val -> val) (m: mem) (v: opt
| None => None (* should never occurs *)
end.
-Definition exec_load1 (ld: ld_instruction) (m: mem) (a: addressing) (vl: val) :=
+Definition exec_load1 (ld: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vl: val) :=
let v :=
match a with
| ADimm _ n => Some (Val.addl vl (Vlong n))
| ADadr _ id ofs => Some (Val.addl vl (symbol_low Ge.(_lk) id ofs))
| _ => None
end in
- call_ll_loadv (chunk_load ld) (interp_load ld) m v.
+ call_ll_loadv chunk (interp_load ld) m v.
-Definition exec_load2 (ld: ld_instruction) (m: mem) (a: addressing) (vl1 vl2: val) :=
+Definition exec_load2 (ld: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) (vl1 vl2: val) :=
let v :=
match a with
| ADreg _ _ => Some (Val.addl vl1 vl2)
@@ -536,16 +538,16 @@ Definition exec_load2 (ld: ld_instruction) (m: mem) (a: addressing) (vl1 vl2: va
| ADuxt _ _ n => Some (Val.addl vl1 (Val.shll (Val.longofintu vl2) (Vint n)))
| _ => None
end in
- call_ll_loadv (chunk_load ld) (interp_load ld) m v.
+ call_ll_loadv chunk (interp_load ld) m v.
-Definition exec_loadU (n: ld_instruction) (m: mem) (a: addressing) :=
- call_ll_loadv (chunk_load n) (interp_load n) m None.
+Definition exec_loadU (n: load_rd_a) (m: mem) (chunk: memory_chunk) (a: addressing) :=
+ call_ll_loadv chunk (interp_load n) m None.
Definition load_eval (o: load_op) (l: list value) :=
match o, l with
- | Oload1 ld a, [Val vs; Memstate m] => exec_load1 ld m a vs
- | Oload2 ld a, [Val vs1; Val vs2; Memstate m] => exec_load2 ld m a vs1 vs2
- | OloadU st a, [Memstate m] => exec_loadU st m a
+ | Oload1 ld chunk a, [Val vs; Memstate m] => exec_load1 ld m chunk a vs
+ | Oload2 ld chunk a, [Val vs1; Val vs2; Memstate m] => exec_load2 ld m chunk a vs1 vs2
+ | OloadU st chunk a, [Memstate m] => exec_loadU st m chunk a
| _, _ => None
end.
@@ -768,12 +770,12 @@ Opaque control_op_eq_correct.
Definition store_op_eq (s1 s2: store_op): ?? bool :=
match s1 with
- | Ostore1 st1 a1 =>
- match s2 with Ostore1 st2 a2 => iandb (struct_eq st1 st2) (struct_eq a1 a2) | _ => RET false end
- | Ostore2 st1 a1 =>
- match s2 with Ostore2 st2 a2 => iandb (struct_eq st1 st2) (struct_eq a1 a2) | _ => RET false end
- | OstoreU st1 a1 =>
- match s2 with OstoreU st2 a2 => iandb (struct_eq st1 st2) (struct_eq a1 a2) | _ => RET false end
+ | Ostore1 st1 chk1 a1 =>
+ match s2 with Ostore1 st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end
+ | Ostore2 st1 chk1 a1 =>
+ match s2 with Ostore2 st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end
+ | OstoreU st1 chk1 a1 =>
+ match s2 with OstoreU st2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq st1 st2) (struct_eq a1 a2)) | _ => RET false end
end.
Lemma store_op_eq_correct s1 s2:
@@ -787,12 +789,12 @@ Opaque store_op_eq_correct.
Definition load_op_eq (l1 l2: load_op): ?? bool :=
match l1 with
- | Oload1 ld1 a1 =>
- match l2 with Oload1 ld2 a2 => iandb (struct_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end
- | Oload2 ld1 a1 =>
- match l2 with Oload2 ld2 a2 => iandb (struct_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end
- | OloadU ld1 a1 =>
- match l2 with OloadU ld2 a2 => iandb (struct_eq ld1 ld2) (struct_eq a1 a2) | _ => RET false end
+ | Oload1 ld1 chk1 a1 =>
+ match l2 with Oload1 ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end
+ | Oload2 ld1 chk1 a1 =>
+ match l2 with Oload2 ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end
+ | OloadU ld1 chk1 a1 =>
+ match l2 with OloadU ld2 chk2 a2 => iandb (struct_eq chk1 chk2) (iandb (struct_eq ld1 ld2) (struct_eq a1 a2)) | _ => RET false end
end.
Lemma load_op_eq_correct l1 l2:
@@ -1049,8 +1051,7 @@ Definition trans_control (ctl: control) : inst :=
| Ptbz sz r n lbl => [(#PC, Op (Control (Otbz sz n lbl)) (PReg(#r) @ PReg(#PC) @ Enil))]
| Pbtbl r tbl => [(#X16, Op (Constant Vundef) Enil);
(#PC, Op (Control (Obtbl tbl)) (PReg(#r) @ PReg(#PC) @ Enil));
- (#X16, Op (Constant Vundef) Enil);
- (#X17, Op (Constant Vundef) Enil)]
+ (#X16, Op (Constant Vundef) Enil)]
| Pbuiltin ef args res => []
end.
@@ -1127,45 +1128,58 @@ Definition trans_arith (ai: ar_instruction) : inst :=
| Pfnmul fsz rd r1 r2 => [(#rd, Op(Arith (Ofnmul fsz)) (PReg(#r1) @ PReg(#r2) @ Enil))]
end.
-Definition eval_addressing_rlocs_st (st: st_instruction) (rs: dreg) (a: addressing) :=
+Definition eval_addressing_rlocs_st (st: store_rs_a) (chunk: memory_chunk) (rs: dreg) (a: addressing) :=
match a with
- | ADimm base n => Op (Store (Ostore1 st a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil)
- | ADreg base r => Op (Store (Ostore2 st a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
- | ADlsl base r n => Op (Store (Ostore2 st a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
- | ADsxt base r n => Op (Store (Ostore2 st a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
- | ADuxt base r n => Op (Store (Ostore2 st a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
- | ADadr base id ofs => Op (Store (Ostore1 st a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil)
- | ADpostincr base n => Op (Store (OstoreU st a)) (PReg (#rs) @ PReg (pmem) @ Enil) (* not modeled yet *)
+ | ADimm base n => Op (Store (Ostore1 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil)
+ | ADreg base r => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADlsl base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADsxt base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADuxt base r n => Op (Store (Ostore2 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADadr base id ofs => Op (Store (Ostore1 st chunk a)) (PReg (#rs) @ PReg (#base) @ PReg (pmem) @ Enil)
+ | ADpostincr base n => Op (Store (OstoreU st chunk a)) (PReg (#rs) @ PReg (pmem) @ Enil) (* not modeled yet *)
end.
-Definition eval_addressing_rlocs_ld (ld: ld_instruction) (a: addressing) :=
+Definition eval_addressing_rlocs_ld (ld: load_rd_a) (chunk: memory_chunk) (a: addressing) :=
match a with
- | ADimm base n => Op (Load (Oload1 ld a)) (PReg (#base) @ PReg (pmem) @ Enil)
- | ADreg base r => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
- | ADlsl base r n => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
- | ADsxt base r n => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
- | ADuxt base r n => Op (Load (Oload2 ld a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
- | ADadr base id ofs => Op (Load (Oload1 ld a)) (PReg (#base) @ PReg (pmem) @ Enil)
- | ADpostincr base n => Op (Load (Oload1 ld a)) (PReg (#base) @ PReg (pmem) @ Enil)
+ | ADimm base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
+ | ADreg base r => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADlsl base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADsxt base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADuxt base r n => Op (Load (Oload2 ld chunk a)) (PReg (#base) @ PReg(#r) @ PReg (pmem) @ Enil)
+ | ADadr base id ofs => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
+ | ADpostincr base n => Op (Load (Oload1 ld chunk a)) (PReg (#base) @ PReg (pmem) @ Enil)
+ end.
+
+Definition trans_ldp_chunk (chunk: memory_chunk): load_rd_a :=
+ match chunk with
+ | Many32 => Pldrw_a
+ | Mint64 => Pldrx
+ | Many64 => Pldrx_a
+ | _ => Pldrw
+ end.
+
+Definition trans_stp_chunk (chunk: memory_chunk): store_rs_a :=
+ match chunk with
+ | Many32 => Pstrw_a
+ | Mint64 => Pstrx
+ | Many64 => Pstrx_a
+ | _ => Pstrw
end.
Definition trans_load (ldi: ld_instruction) :=
match ldi with
| PLd_rd_a ld r a =>
- let lr := eval_addressing_rlocs_ld ldi a in [(#r, lr)]
- | Pldp ld r1 r2 a =>
- let ldi' :=
- match ld with
- | Pldpw => Pldrw
- | Pldpx => Pldrx
- end in
- let lr := eval_addressing_rlocs_ld (PLd_rd_a ldi' r1 a) a in
- let ofs := match ld with | Pldpw => 4%Z | Pldpx => 8%Z end in
+ let lr := eval_addressing_rlocs_ld ld (chunk_load ld) a in [(#r, lr)]
+ | Pldp ld r1 r2 chk1 chk2 a =>
+ let ldi1 := trans_ldp_chunk chk1 in
+ let ldi2 := trans_ldp_chunk chk2 in
+ let lr := eval_addressing_rlocs_ld ldi1 chk1 a in
+ let ofs := match chk1 with | Mint32 | Many32 => 4%Z | _ => 8%Z end in
match a with
| ADimm base n =>
let a' := (get_offset_addr a ofs) in
[(#r1, lr);
- (#r2, Op (Load (Oload1 (PLd_rd_a ldi' r2 a') a'))
+ (#r2, Op (Load (Oload1 ldi2 chk2 a'))
(Old(PReg (#base)) @ PReg (pmem) @ Enil))]
| _ => [(#PC, (Op (OError)) Enil)]
end
@@ -1174,20 +1188,17 @@ Definition trans_load (ldi: ld_instruction) :=
Definition trans_store (sti: st_instruction) :=
match sti with
| PSt_rs_a st r a =>
- let lr := eval_addressing_rlocs_st sti r a in [(pmem, lr)]
- | Pstp st r1 r2 a =>
- let sti' :=
- match st with
- | Pstpw => Pstrw
- | Pstpx => Pstrx
- end in
- let lr := eval_addressing_rlocs_st (PSt_rs_a sti' r1 a) r1 a in
- let ofs := match st with | Pstpw => 4%Z | Pstpx => 8%Z end in
+ let lr := eval_addressing_rlocs_st st (chunk_store st) r a in [(pmem, lr)]
+ | Pstp st r1 r2 chk1 chk2 a =>
+ let sti1 := trans_stp_chunk chk1 in
+ let sti2 := trans_stp_chunk chk2 in
+ let lr := eval_addressing_rlocs_st sti1 chk1 r1 a in
+ let ofs := match chk1 with | Mint32 | Many32 => 4%Z | _ => 8%Z end in
match a with
| ADimm base n =>
let a' := (get_offset_addr a ofs) in
[(pmem, lr);
- (pmem, Op (Store (Ostore1 (PSt_rs_a sti' r2 a') a'))
+ (pmem, Op (Store (Ostore1 sti2 chk2 a'))
(PReg (#r2) @ Old(PReg (#base)) @ PReg (pmem) @ Enil))]
| _ => [(#PC, (Op (OError)) Enil)]
end
@@ -1695,6 +1706,12 @@ Proof.
econstructor.
Qed.
+Lemma load_chunk_neutral: forall chk v,
+ interp_load (trans_ldp_chunk chk) v = v.
+Proof.
+ intros; destruct chk; simpl; reflexivity.
+Qed.
+
Theorem bisimu_basic rsr mr sr bi:
match_states (State rsr mr) sr ->
match_outcome (exec_basic Ge.(_lk) Ge.(_genv) bi rsr mr) (inst_run Ge (trans_basic bi) sr sr).
@@ -1721,14 +1738,16 @@ Local Ltac preg_eq_discr r rd :=
unfold exec_load1, exec_load2, chunk_load; unfold call_ll_loadv;
try destruct (Mem.loadv _ _ _); simpl; auto.
all: try (fold (ppos rd); Simpl_exists sr; auto; intros rr; Simpl_update).
- + unfold exec_load, exec_load_double, eval_addressing_rlocs_ld, exp_eval;
+ +
+ unfold exec_load, exec_load_double, eval_addressing_rlocs_ld, exp_eval;
destruct ld; destruct a; simpl; unfold control_eval; destruct Ge; auto;
try fold (ppos base);
try erewrite !H0, H; simpl;
- unfold exec_load1, exec_load2, chunk_load; unfold call_ll_loadv;
- try destruct (Mem.loadv _ _ _); simpl; auto;
+ unfold exec_load1, exec_load2; unfold call_ll_loadv;
+ destruct (Mem.loadv _ _ _); simpl; auto;
fold (ppos rd1); rewrite assign_diff; discriminate_ppos; rewrite H;
try destruct (Mem.loadv _ _ _); simpl; auto; Simpl_exists sr;
+ rewrite !load_chunk_neutral;
try (rewrite !assign_diff; discriminate_ppos; reflexivity);
try (destruct base; discriminate_ppos);
repeat (try fold (ppos r); intros; Simpl_update).
@@ -1746,7 +1765,7 @@ Local Ltac preg_eq_discr r rd :=
destruct st; destruct a; simpl; unfold control_eval; destruct Ge; auto;
try fold (ppos base); try fold (ppos rs1);
erewrite !H0, H; simpl;
- unfold exec_store1, exec_store2, chunk_store; unfold call_ll_storev;
+ unfold exec_store1, exec_store2; unfold call_ll_storev;
try destruct (Mem.storev _ _ _ _); simpl; auto;
fold (ppos rs2); rewrite assign_diff; try congruence; try rewrite H0; simpl;
try destruct (Mem.storev _ _ _ _); simpl; auto.
@@ -1903,10 +1922,9 @@ Proof.
try rewrite 2Pregmap.gso, Pregmap.gss; destruct (Val.offset_ptr (rsr PC) (Ptrofs.repr sz));
try reflexivity; discriminate_ppos. Simpl sr.
destruct (PregEq.eq X16 rr); [ subst; Simpl_update |];
- destruct (PregEq.eq X17 rr); [ subst; Simpl_update |];
destruct (PregEq.eq PC rr); [ subst; Simpl_update |].
rewrite !Pregmap.gso; auto;
- apply ppos_discr in n0; apply ppos_discr in n1; apply ppos_discr in n2;
+ apply ppos_discr in n0; apply ppos_discr in n1;
rewrite !assign_diff; auto.
Qed.
@@ -2024,11 +2042,11 @@ Proof.
Qed.
Lemma incrPC_undef_regs_commut l : forall d rs,
- incrPC d (undef_regs (map preg_of l) rs) = undef_regs (map preg_of l) (incrPC d rs).
+ incrPC d (undef_regs l rs) = undef_regs l (incrPC d rs).
Proof.
induction l; simpl; auto.
intros. rewrite IHl. unfold incrPC.
- destruct (PregEq.eq (preg_of a) PC).
+ destruct (PregEq.eq a PC).
- rewrite e. rewrite Pregmap.gss.
simpl. apply f_equal. unfold Pregmap.set.
apply functional_extensionality. intros x.
@@ -2036,7 +2054,9 @@ Proof.
- rewrite Pregmap.gso; auto.
apply f_equal. unfold Pregmap.set.
apply functional_extensionality. intros x.
- destruct (PregEq.eq x PC); subst; auto.
+ destruct (PregEq.eq x PC).
+ + subst. destruct a; simpl; auto. congruence.
+ + auto.
Qed.
Lemma bblock_simu_reduce:
@@ -2473,22 +2493,22 @@ Definition string_of_ldi (ldi: ld_instruction) : ?? pstring:=
DO a' <~ string_of_addressing a;;
DO rd' <~ dreg_name rd;;
RET (Str "PLd_rd_a (" +; (string_of_ld_rd_a ld) +; " " +; rd' +; " " +; a' +; ")")
- | Pldp _ _ _ a =>
+ | Pldp _ _ _ _ _ a =>
DO a' <~ string_of_addressing a;;
RET (Str "Pldp (" +; a' +; ")")
end.
Definition string_of_load (op: load_op) : ?? pstring :=
match op with
- | Oload1 ld a =>
+ | Oload1 ld _ a =>
DO a' <~ string_of_addressing a;;
- DO ld' <~ string_of_ldi ld;;
- RET((Str "Oload1 ") +; " " +; ld' +; " " +; a' +; " ")
- | Oload2 ld a =>
+ (*DO ld' <~ string_of_ldi ld;;*)
+ RET((Str "Oload1 ") +; " " +; string_of_ld_rd_a ld +; " " +; a' +; " ")
+ | Oload2 ld _ a =>
DO a' <~ string_of_addressing a;;
- DO ld' <~ string_of_ldi ld;;
- RET((Str "Oload2 ") +; " " +; ld' +; " " +; a' +; " ")
- | OloadU _ _ => RET (Str "OloadU")
+ (*DO ld' <~ string_of_ldi ld;;*)
+ RET((Str "Oload2 ") +; " " +; string_of_ld_rd_a ld +; " " +; a' +; " ")
+ | OloadU _ _ _ => RET (Str "OloadU")
end.
Definition string_of_st_rs_a (st: store_rs_a) : pstring :=
@@ -2510,22 +2530,22 @@ Definition string_of_sti (sti: st_instruction) : ?? pstring:=
DO a' <~ string_of_addressing a;;
DO rs' <~ dreg_name rs;;
RET (Str "PSt_rs_a (" +; (string_of_st_rs_a st) +; " " +; rs' +; " " +; a' +; ")")
- | Pstp _ _ _ a =>
+ | Pstp _ _ _ _ _ a =>
DO a' <~ string_of_addressing a;;
RET (Str "Pstp (" +; a' +; ")")
end.
Definition string_of_store (op: store_op) : ?? pstring :=
match op with
- | Ostore1 st a =>
+ | Ostore1 st _ a =>
DO a' <~ string_of_addressing a;;
- DO st' <~ string_of_sti st;;
- RET((Str "Ostore1 ") +; " " +; st' +; " " +; a' +; " ")
- | Ostore2 st a =>
+ (*DO st' <~ string_of_sti st;;*)
+ RET((Str "Ostore1 ") +; " " +; string_of_st_rs_a st +; " " +; a' +; " ")
+ | Ostore2 st _ a =>
DO a' <~ string_of_addressing a;;
- DO st' <~ string_of_sti st;;
- RET((Str "Ostore2 ") +; " " +; st' +; " " +; a' +; " ")
- | OstoreU _ _ => RET (Str "OstoreU")
+ (*DO st' <~ string_of_sti st;;*)
+ RET((Str "Ostore2 ") +; " " +; string_of_st_rs_a st +; " " +; a' +; " ")
+ | OstoreU _ _ _ => RET (Str "OstoreU")
end.
Definition string_of_control (op: control_op) : pstring :=
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index 94697df7..8187e077 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -74,7 +74,7 @@ let save_parameter_registers ir fr =
let pos = 8*16 + !i*8 in
if !i land 1 = 0 then begin
emit (Pstpx(int_param_regs.(!i), int_param_regs.(!i + 1),
- ADimm(XSP, Z.of_uint pos)));
+ Mint64, Mint64, ADimm(XSP, Z.of_uint pos)));
i := !i + 2
end else begin
emit (Pstrx(int_param_regs.(!i), ADimm(XSP, Z.of_uint pos)));
@@ -169,8 +169,8 @@ let expand_builtin_memcpy_small sz al src dst =
let (rdst, odst) = memcpy_small_arg sz dst tdst in
let rec copy osrc odst sz =
if sz >= 16 then begin
- emit (Pldpx(X16, X30, ADimm(rsrc, osrc)));
- emit (Pstpx(X16, X30, ADimm(rdst, odst)));
+ emit (Pldpx(X16, X30, Mint64, Mint64, ADimm(rsrc, osrc)));
+ emit (Pstpx(X16, X30, Mint64, Mint64, ADimm(rdst, odst)));
copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16)
end
else if sz >= 8 then begin
@@ -208,8 +208,8 @@ let expand_builtin_memcpy_big sz al src dst =
let lbl = new_label () in
expand_loadimm32 X15 (Z.of_uint (sz / 16));
emit (Plabel lbl);
- emit (Pldpx(X16, X17, ADpostincr(RR1 X30, _16)));
- emit (Pstpx(X16, X17, ADpostincr(RR1 X29, _16)));
+ emit (Pldpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X30, _16)));
+ emit (Pstpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X29, _16)));
emit (Psubimm(W, RR1 X15, RR1 X15, _1));
emit (Pcbnz(W, X15, lbl));
if sz mod 16 >= 8 then begin
@@ -337,7 +337,7 @@ let expand_builtin_inline name args res =
| "__builtin_bswap16", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prev16(W, res, a1));
emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF))
- (* Count leading zeros and leading sign bits *)
+ (* Count leading zeros, leading sign bits, trailing zeros *)
| "__builtin_clz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pclz(W, res, a1))
| ("__builtin_clzl" | "__builtin_clzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
@@ -346,10 +346,16 @@ let expand_builtin_inline name args res =
emit (Pcls(W, res, a1))
| ("__builtin_clsl" | "__builtin_clsll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pcls(X, res, a1))
+ | "__builtin_ctz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
+ emit (Prbit(W, res, a1));
+ emit (Pclz(W, res, res))
+ | ("__builtin_ctzl" | "__builtin_ctzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
+ emit (Prbit(X, res, a1));
+ emit (Pclz(X, res, res))
(* Float arithmetic *)
| "__builtin_fabs", [BA(DR(FR a1))], BR(DR(FR res)) ->
emit (Pfabs(D, res, a1))
- | "__builtin_fsqrt", [BA(DR(FR a1))], BR(DR(FR res)) ->
+ | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(DR(FR a1))], BR(DR(FR res)) ->
emit (Pfsqrt(D, res, a1))
| "__builtin_fmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfmadd(D, res, a1, a2, a3))
@@ -359,6 +365,10 @@ let expand_builtin_inline name args res =
emit (Pfnmadd(D, res, a1, a2, a3))
| "__builtin_fnmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfnmsub(D, res, a1, a2, a3))
+ | "__builtin_fmax", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) ->
+ emit (Pfmax (D, res, a1, a2))
+ | "__builtin_fmin", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) ->
+ emit (Pfmin (D, res, a1, a2))
(* Vararg *)
| "__builtin_va_start", [BA(DR(IR(RR1 a)))], _ ->
expand_builtin_va_start a
diff --git a/aarch64/Asmgen.v b/aarch64/Asmgen.v
index cd9175cb..a720c11c 100644
--- a/aarch64/Asmgen.v
+++ b/aarch64/Asmgen.v
@@ -324,12 +324,12 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction :=
| PLoad (PLd_rd_a Pldrd rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd rd' a)
| PLoad (PLd_rd_a Pldrd_a rd a) => do rd' <- freg_of_preg rd; OK (Asm.Pldrd_a rd' a)
- | PLoad (Pldp Pldpw rd1 rd2 a) => do rd1' <- ireg_of_preg rd1;
+ | PLoad (Pldp Pldpw rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1;
do rd2' <- ireg_of_preg rd2;
- OK (Asm.Pldpw rd1 rd2 a)
- | PLoad (Pldp Pldpx rd1 rd2 a) => do rd1' <- ireg_of_preg rd1;
+ OK (Asm.Pldpw rd1 rd2 chk1 chk2 a)
+ | PLoad (Pldp Pldpx rd1 rd2 chk1 chk2 a) => do rd1' <- ireg_of_preg rd1;
do rd2' <- ireg_of_preg rd2;
- OK (Asm.Pldpx rd1 rd2 a)
+ OK (Asm.Pldpx rd1 rd2 chk1 chk2 a)
| PStore (PSt_rs_a Pstrw r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw r' a)
| PStore (PSt_rs_a Pstrw_a r a) => do r' <- ireg_of_preg r; OK (Asm.Pstrw_a r' a)
@@ -342,12 +342,12 @@ Definition basic_to_instruction (b: basic) : res Asm.instruction :=
| PStore (PSt_rs_a Pstrd r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd r' a)
| PStore (PSt_rs_a Pstrd_a r a) => do r' <- freg_of_preg r; OK (Asm.Pstrd_a r' a)
- | PStore (Pstp Pstpw rs1 rs2 a) => do rs1' <- ireg_of_preg rs1;
+ | PStore (Pstp Pstpw rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1;
do rs2' <- ireg_of_preg rs2;
- OK (Asm.Pstpw rs1 rs2 a)
- | PStore (Pstp Pstpx rs1 rs2 a) => do rs1' <- ireg_of_preg rs1;
+ OK (Asm.Pstpw rs1 rs2 chk1 chk2 a)
+ | PStore (Pstp Pstpx rs1 rs2 chk1 chk2 a) => do rs1' <- ireg_of_preg rs1;
do rs2' <- ireg_of_preg rs2;
- OK (Asm.Pstpx rs1 rs2 a)
+ OK (Asm.Pstpx rs1 rs2 chk1 chk2 a)
| Pallocframe sz linkofs => OK (Asm.Pallocframe sz linkofs)
| Pfreeframe sz linkofs => OK (Asm.Pfreeframe sz linkofs)
diff --git a/aarch64/Asmgenproof.v b/aarch64/Asmgenproof.v
index 690a54a2..cec8ea69 100644
--- a/aarch64/Asmgenproof.v
+++ b/aarch64/Asmgenproof.v
@@ -1092,11 +1092,11 @@ Proof.
+ next_stuck_cong.
Qed.
-Lemma load_double_preserved n rs1 m1 rs1' m1' rs2 m2 rd1 rd2 chk f a: forall
+Lemma load_double_preserved n rs1 m1 rs1' m1' rs2 m2 rd1 rd2 chk1 chk2 f a: forall
(BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
(MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
- (HLOAD: exec_load_double lk chk f a rd1 rd2 rs1 m1 = Next rs1' m1'),
- exists (rs2' : regset) (m2' : mem), Asm.exec_load_double tge chk f a rd1 rd2 rs2 m2 = Next rs2' m2'
+ (HLOAD: exec_load_double lk chk1 chk2 f a rd1 rd2 rs1 m1 = Next rs1' m1'),
+ exists (rs2' : regset) (m2' : mem), Asm.exec_load_double tge chk1 chk2 f a rd1 rd2 rs2 m2 = Next rs2' m2'
/\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
intros.
@@ -1105,10 +1105,10 @@ Proof.
erewrite <- !eval_addressing_preserved; eauto.
destruct (is_pair_addressing_mode_correct a); try discriminate.
destruct (Mem.loadv _ _ _);
- destruct (Mem.loadv chk m2
+ destruct (Mem.loadv chk2 m2
(eval_addressing lk
- (get_offset_addr a match chk with
- | Mint32 => 4
+ (get_offset_addr a match chk1 with
+ | Mint32 | Many32 => 4
| _ => 8
end) rs1));
inversion HLOAD; auto.
@@ -1141,11 +1141,11 @@ Proof.
+ next_stuck_cong.
Qed.
-Lemma store_double_preserved n rs1 m1 rs1' m1' rs2 m2 v1 v2 chk a: forall
+Lemma store_double_preserved n rs1 m1 rs1' m1' rs2 m2 v1 v2 chk1 chk2 a: forall
(BOUNDED: 0 <= n <= Ptrofs.max_unsigned)
(MATCHI: match_internal n (State rs1 m1) (State rs2 m2))
- (HSTORE: exec_store_double lk chk a v1 v2 rs1 m1 = Next rs1' m1'),
- exists (rs2' : regset) (m2' : mem), Asm.exec_store_double tge chk a v1 v2 rs2 m2 = Next rs2' m2'
+ (HSTORE: exec_store_double lk chk1 chk2 a v1 v2 rs1 m1 = Next rs1' m1'),
+ exists (rs2' : regset) (m2' : mem), Asm.exec_store_double tge chk1 chk2 a v1 v2 rs2 m2 = Next rs2' m2'
/\ match_internal (n + 1) (State rs1' m1') (State rs2' m2').
Proof.
intros.
@@ -1154,12 +1154,13 @@ Proof.
erewrite <- !eval_addressing_preserved; eauto.
destruct (is_pair_addressing_mode_correct a); try discriminate.
destruct (Mem.storev _ _ _ _);
- try destruct (Mem.storev chk m
- (eval_addressing lk
- (get_offset_addr a match chk with
- | Mint32 => 4
- | _ => 8
- end) rs1) v2);
+ try destruct (Mem.storev chk2 m
+ (eval_addressing lk
+ (get_offset_addr a
+ match chk1 with
+ | Mint32 | Many32 => 4
+ | _ => 8
+ end) rs1) v2);
inversion HSTORE; auto.
repeat (econstructor; eauto).
* eapply nextinstr_agree_but_pc; intros.
@@ -1935,28 +1936,25 @@ Proof.
rewrite <- (label_pos_preserved f); auto.
inversion MATCHI; subst.
destruct label_pos; next_stuck_cong.
- destruct (((incrPC (Ptrofs.repr (size bb)) rs1) # X16 <- Vundef) # X17 <- Vundef PC) eqn:INCRPC; next_stuck_cong.
+ destruct ((incrPC (Ptrofs.repr (size bb)) rs1) # X16 <- Vundef PC) eqn:INCRPC; next_stuck_cong.
inversion H0; auto. repeat (econstructor; eauto).
rewrite !Pregmap.gso; try congruence.
rewrite <- AGPC.
unfold incrPC in *.
destruct (rs1 PC) eqn:EQRS1; simpl in *; try discriminate.
- replace (((rs2 # X16 <- Vundef) # X17 <- Vundef) # PC <- (Vptr b0 (Ptrofs.repr z))) with
- ((((rs1 # PC <- (Vptr b0 (Ptrofs.add i1 (Ptrofs.repr (size bb))))) # X16 <-
- Vundef) # X17 <- Vundef) # PC <- (Vptr b (Ptrofs.repr z))); auto.
+ replace ((rs2 # X16 <- Vundef) # PC <- (Vptr b0 (Ptrofs.repr z))) with
+ (((rs1 # PC <- (Vptr b0 (Ptrofs.add i1 (Ptrofs.repr (size bb))))) # X16 <-
+ Vundef) # PC <- (Vptr b (Ptrofs.repr z))); auto.
eapply functional_extensionality; intros x.
destruct (PregEq.eq x PC); subst.
+ rewrite Pregmap.gso in INCRPC; try congruence.
- rewrite Pregmap.gso in INCRPC; try congruence.
rewrite Pregmap.gss in INCRPC.
rewrite !Pregmap.gss in *; congruence.
+ rewrite Pregmap.gso; auto.
rewrite (Pregmap.gso (i := x) (j := PC)); auto.
- destruct (PregEq.eq x X17); subst.
+ destruct (PregEq.eq x X16); subst.
* rewrite !Pregmap.gss; auto.
- * rewrite !(Pregmap.gso (i := x) (j:= X17)); auto. destruct (PregEq.eq x X16); subst.
- -- rewrite !Pregmap.gss; auto.
- -- rewrite !Pregmap.gso; auto.
+ * rewrite !Pregmap.gso; auto.
Qed.
Lemma last_instruction_cannot_be_label bb:
@@ -2187,9 +2185,16 @@ Proof.
reflexivity. }
apply set_builtin_res_dont_move_pc_gen.
-- erewrite !set_builtin_map_not_pc.
- erewrite !undef_regs_other_2.
- rewrite HPC; auto. all: rewrite preg_notin_charact; intros; try discriminate.
+ erewrite !undef_regs_other.
+ rewrite HPC; auto.
+ all: intros; simpl in *; destruct H3 as [HX16 | [HX30 | HDES]]; subst; try discriminate;
+ exploit list_in_map_inv; eauto; intros [mr [A B]]; subst; discriminate.
-- intros. eapply undef_reg_preserved; eauto.
+ intros. destruct (PregEq.eq X16 r0); destruct (PregEq.eq X30 r0); subst.
+ rewrite Pregmap.gso, Pregmap.gss; try congruence.
+ do 2 (rewrite Pregmap.gso, Pregmap.gss; try discriminate; auto).
+ rewrite 2Pregmap.gss; auto.
+ rewrite !Pregmap.gso; auto.
Qed.
Lemma exec_exit_simulation_star b ofs f bb s2 t rs m rs' m': forall
diff --git a/aarch64/CBuiltins.ml b/aarch64/CBuiltins.ml
index fdc1372d..e2a9c87a 100644
--- a/aarch64/CBuiltins.ml
+++ b/aarch64/CBuiltins.ml
@@ -6,6 +6,9 @@
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU General Public License as published by *)
+(* the Free Software Foundation, either version 2 of the License, or *)
+(* (at your option) any later version. This file is also distributed *)
(* under the terms of the INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)
@@ -29,14 +32,6 @@ let builtins = {
"__builtin_fence",
(TVoid [], [], false);
(* Integer arithmetic *)
- "__builtin_bswap64",
- (TInt(IULongLong, []), [TInt(IULongLong, [])], false);
- "__builtin_clz",
- (TInt(IInt, []), [TInt(IUInt, [])], false);
- "__builtin_clzl",
- (TInt(IInt, []), [TInt(IULong, [])], false);
- "__builtin_clzll",
- (TInt(IInt, []), [TInt(IULongLong, [])], false);
"__builtin_cls",
(TInt(IInt, []), [TInt(IInt, [])], false);
"__builtin_clsl",
diff --git a/aarch64/Machregsaux.ml b/aarch64/Machregsaux.ml
index f13a9ff5..41db3bd4 100644
--- a/aarch64/Machregsaux.ml
+++ b/aarch64/Machregsaux.ml
@@ -12,28 +12,9 @@
(** Auxiliary functions on machine registers *)
-open Camlcoq
-open Machregs
-
-let register_names : (mreg, string) Hashtbl.t = Hashtbl.create 31
-
-let _ =
- List.iter
- (fun (s, r) -> Hashtbl.add register_names r (camlstring_of_coqstring s))
- Machregs.register_names
-
let is_scratch_register s =
s = "X16" || s = "x16" || s = "X30" || s = "x30"
-
-let name_of_register r =
- try Some (Hashtbl.find register_names r) with Not_found -> None
-
-let register_by_name s =
- Machregs.register_by_name (coqstring_uppercase_ascii_of_camlstring s)
-
-let can_reserve_register r = Conventions1.is_callee_save r
-
let class_of_type = function
| AST.Tint | AST.Tlong -> 0
| AST.Tfloat | AST.Tsingle -> 1
diff --git a/aarch64/PeepholeOracle.ml b/aarch64/PeepholeOracle.ml
index 94efc75e..a6945b9f 100644
--- a/aarch64/PeepholeOracle.ml
+++ b/aarch64/PeepholeOracle.ml
@@ -16,6 +16,7 @@ open Asm
open Int64
open Printf
+(* If true, the oracle will print a msg for each applied peephole *)
let debug = false
(* Functions to verify the immediate offset range for ldp/stp *)
@@ -68,12 +69,9 @@ let dreg_of_ireg r = IR (RR1 r)
* affectation eliminates the potential
* candidate *)
let verify_load_affect reg rd b rev =
- let b = (IR b) in
+ let b = IR b in
let rd = dreg_of_ireg rd in
- if not rev then
- dreg_eq reg b
- else
- (dreg_eq reg b) || (dreg_eq reg rd)
+ if not rev then dreg_eq reg b else dreg_eq reg b || dreg_eq reg rd
(* Return true if an intermediate
* read eliminates the potential
@@ -86,9 +84,9 @@ let verify_load_read reg rd b rev =
* affectation eliminates the potential
* candidate *)
let verify_store_affect reg rs b rev =
- let b = (IR b) in
+ let b = IR b in
let rs = dreg_of_ireg rs in
- (dreg_eq reg b) || (dreg_eq reg rs)
+ dreg_eq reg b || dreg_eq reg rs
(* Affect a symbolic memory list of potential replacements
* for a given write in reg *)
@@ -97,17 +95,11 @@ let rec affect_symb_mem reg insta pot_rep stype rev =
| [] -> []
| h0 :: t0 -> (
match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd), ADimm (b, n))), "ldrw" ->
- if verify_load_affect reg rd b rev then affect_symb_mem reg insta t0 stype rev
- else h0 :: affect_symb_mem reg insta t0 stype rev
- | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd), ADimm (b, n))), "ldrx" ->
- if verify_load_affect reg rd b rev then affect_symb_mem reg insta t0 stype rev
- else h0 :: affect_symb_mem reg insta t0 stype rev
- | PStore (PSt_rs_a (Pstrw, IR (RR1 rs), ADimm (b, n))), "strw" ->
- if verify_store_affect reg rs b rev then
+ | PLoad (PLd_rd_a (_, IR (RR1 rd), ADimm (b, n))), "ldr" ->
+ if verify_load_affect reg rd b rev then
affect_symb_mem reg insta t0 stype rev
else h0 :: affect_symb_mem reg insta t0 stype rev
- | PStore (PSt_rs_a (Pstrx, IR (RR1 rs), ADimm (b, n))), "strx" ->
+ | PStore (PSt_rs_a (_, IR (RR1 rs), ADimm (b, n))), "str" ->
if verify_store_affect reg rs b rev then
affect_symb_mem reg insta t0 stype rev
else h0 :: affect_symb_mem reg insta t0 stype rev
@@ -121,23 +113,20 @@ let rec read_symb_mem reg insta pot_rep stype rev =
| [] -> []
| h0 :: t0 -> (
match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd), ADimm (b, n))), "ldrw" ->
- if verify_load_read reg rd b rev then read_symb_mem reg insta t0 stype rev
+ | PLoad (PLd_rd_a (_, IR (RR1 rd), ADimm (b, n))), "ldr" ->
+ if verify_load_read reg rd b rev then
+ read_symb_mem reg insta t0 stype rev
else h0 :: read_symb_mem reg insta t0 stype rev
- | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd), ADimm (b, n))), "ldrx" ->
- if verify_load_read reg rd b rev then read_symb_mem reg insta t0 stype rev
- else h0 :: read_symb_mem reg insta t0 stype rev
- | PStore (PSt_rs_a (Pstrw, IR (RR1 rs), ADimm (b, n))), "strw" ->
- h0 :: affect_symb_mem reg insta t0 stype rev
- | PStore (PSt_rs_a (Pstrx, IR (RR1 rs), ADimm (b, n))), "strx" ->
- h0 :: affect_symb_mem reg insta t0 stype rev
+ | PStore (PSt_rs_a (_, IR (RR1 rs), ADimm (b, n))), "str" ->
+ h0 :: read_symb_mem reg insta t0 stype rev
| _, _ -> failwith "read_symb_mem: Found an inconsistent inst in pot_rep")
(* Update a symbolic memory list of potential replacements
* for any addressing *)
let update_pot_rep_addressing a insta pot_rep stype rev =
match a with
- | ADimm (base, _) -> pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
+ | ADimm (base, _) ->
+ pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev
| ADreg (base, r) ->
pot_rep := read_symb_mem (IR base) insta !pot_rep stype rev;
pot_rep := read_symb_mem (dreg_of_ireg r) insta !pot_rep stype rev
@@ -161,7 +150,8 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
match inst with
| PArith i -> (
match i with
- | PArithP (_, rd) -> pot_rep := affect_symb_mem rd insta !pot_rep stype rev
+ | PArithP (_, rd) ->
+ pot_rep := affect_symb_mem rd insta !pot_rep stype rev
| PArithPP (_, rd, rs) ->
pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
pot_rep := read_symb_mem rs insta !pot_rep stype rev
@@ -173,14 +163,16 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
(match rs1 with
| RR0 rs1 ->
- pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
| _ -> ());
pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev
| PArithRR0 (_, rd, rs) -> (
pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
match rs with
| RR0 rs1 ->
- pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
| _ -> ())
| PArithARRRR0 (_, rd, rs1, rs2, rs3) -> (
pot_rep := affect_symb_mem (dreg_of_ireg rd) insta !pot_rep stype rev;
@@ -188,7 +180,8 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev;
match rs3 with
| RR0 rs1 ->
- pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
| _ -> ())
| PArithComparisonPP (_, rs1, rs2) ->
pot_rep := read_symb_mem rs1 insta !pot_rep stype rev;
@@ -196,7 +189,8 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
| PArithComparisonR0R (_, rs1, rs2) ->
(match rs1 with
| RR0 rs1 ->
- pot_rep := read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs1) insta !pot_rep stype rev
| _ -> ());
pot_rep := read_symb_mem (dreg_of_ireg rs2) insta !pot_rep stype rev
| PArithComparisonP (_, rs1) ->
@@ -206,7 +200,8 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
| Pfmovi (_, _, rs) -> (
match rs with
| RR0 rs ->
- pot_rep := read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
+ pot_rep :=
+ read_symb_mem (dreg_of_ireg rs) insta !pot_rep stype rev
| _ -> ())
| Pcsel (rd, rs1, rs2, _) ->
pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
@@ -214,18 +209,26 @@ let update_pot_rep_basic inst insta pot_rep stype rev =
pot_rep := read_symb_mem rs2 insta !pot_rep stype rev
| Pfnmul (_, rd, rs1, rs2) -> ())
| PLoad i -> (
+ (* Here, we consider a different behavior for load and store potential candidates:
+ * a load does not obviously cancel the ldp candidates, but it does for any stp candidate. *)
match stype with
- | "ldrw" | "ldrx" -> (
+ | "ldr" -> (
match i with
| PLd_rd_a (_, rd, a) ->
pot_rep := affect_symb_mem rd insta !pot_rep stype rev;
update_pot_rep_addressing a insta pot_rep stype rev
- | Pldp (_, rd1, rd2, a) ->
- pot_rep := affect_symb_mem (dreg_of_ireg rd1) insta !pot_rep stype rev;
- pot_rep := affect_symb_mem (dreg_of_ireg rd2) insta !pot_rep stype rev;
+ | Pldp (_, rd1, rd2, _, _, a) ->
+ pot_rep :=
+ affect_symb_mem (dreg_of_ireg rd1) insta !pot_rep stype rev;
+ pot_rep :=
+ affect_symb_mem (dreg_of_ireg rd2) insta !pot_rep stype rev;
update_pot_rep_addressing a insta pot_rep stype rev)
| _ -> pot_rep := [])
- | PStore _ -> pot_rep := []
+ | PStore _ -> (
+ (* Here, we consider that a store cancel all ldp candidates, but it is far more complicated for stp ones :
+ * if we cancel stp candidates here, we would prevent ourselves to apply the non-consec store peephole.
+ * To solve this issue, the store candidates cleaning is managed directly in the peephole function below. *)
+ match stype with "ldr" -> pot_rep := [] | _ -> ())
| Pallocframe (_, _) -> pot_rep := []
| Pfreeframe (_, _) -> pot_rep := []
| Ploadsymbol (rd, _) ->
@@ -248,17 +251,21 @@ let rec search_compat_rep r2 b2 n2 insta pot_rep stype =
| [] -> None
| h0 :: t0 -> (
match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" ->
- if is_valid_ldrw rd1 r2 b1 b2 n1 n2 then Some (h0, rd1, b1, n1)
+ | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" ->
+ if is_valid_ldrw rd1 r2 b1 b2 n1 n2 then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
else search_compat_rep r2 b2 n2 insta t0 stype
- | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" ->
- if is_valid_ldrx rd1 r2 b1 b2 n1 n2 then Some (h0, rd1, b1, n1)
+ | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" ->
+ if is_valid_ldrx rd1 r2 b1 b2 n1 n2 then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
else search_compat_rep r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), "strw" ->
- if is_valid_strw b1 b2 n1 n2 then Some (h0, rs1, b1, n1)
+ | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strw" ->
+ if is_valid_strw b1 b2 n1 n2 then
+ Some (h0, chunk_store st1, rs1, b1, n1)
else search_compat_rep r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), "strx" ->
- if is_valid_strx b1 b2 n1 n2 then Some (h0, rs1, b1, n1)
+ | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strx" ->
+ if is_valid_strx b1 b2 n1 n2 then
+ Some (h0, chunk_store st1, rs1, b1, n1)
else search_compat_rep r2 b2 n2 insta t0 stype
| _, _ ->
failwith "search_compat_rep: Found an inconsistent inst in pot_rep")
@@ -270,20 +277,25 @@ let rec search_compat_rep_inv r2 b2 n2 insta pot_rep stype =
| [] -> None
| h0 :: t0 -> (
match (insta.(h0), stype) with
- | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" ->
- if is_valid_ldrw r2 rd1 b2 b1 n2 n1 then Some (h0, rd1, b1, n1)
+ | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrw" ->
+ if is_valid_ldrw r2 rd1 b2 b1 n2 n1 then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" ->
- if is_valid_ldrx r2 rd1 b2 b1 n2 n1 then Some (h0, rd1, b1, n1)
+ | PLoad (PLd_rd_a (ld1, IR (RR1 rd1), ADimm (b1, n1))), "ldrx" ->
+ if is_valid_ldrx r2 rd1 b2 b1 n2 n1 then
+ Some (h0, chunk_load ld1, rd1, b1, n1)
else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), "strw" ->
- if is_valid_strw b2 b1 n2 n1 then Some (h0, rs1, b1, n1)
+ | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strw" ->
+ if is_valid_strw b2 b1 n2 n1 then
+ Some (h0, chunk_store st1, rs1, b1, n1)
else search_compat_rep_inv r2 b2 n2 insta t0 stype
- | PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), "strx" ->
- if is_valid_strx b2 b1 n2 n1 then Some (h0, rs1, b1, n1)
+ | PStore (PSt_rs_a (st1, IR (RR1 rs1), ADimm (b1, n1))), "strx" ->
+ if is_valid_strx b2 b1 n2 n1 then
+ Some (h0, chunk_store st1, rs1, b1, n1)
else search_compat_rep_inv r2 b2 n2 insta t0 stype
| _, _ ->
- failwith "search_compat_rep_inv: Found an inconsistent inst in pot_rep")
+ failwith
+ "search_compat_rep_inv: Found an inconsistent inst in pot_rep")
(* This is useful to manage the case were the immofs
* of the first ldr/str is greater than the second one *)
@@ -292,6 +304,60 @@ let min_is_rev n1 n2 =
let z2 = to_int (camlint64_of_coqint n2) in
if z1 < z2 then true else false
+(* Below functions were added to merge pattern matching cases in peephole,
+ * thanks to this, we can make the chunk difference (int/any) compatible. *)
+let trans_ldi (ldi : load_rd_a) : load_rd1_rd2_a =
+ match ldi with
+ | Pldrw | Pldrw_a -> Pldpw
+ | Pldrx | Pldrx_a -> Pldpx
+ | _ -> failwith "trans_ldi: Found a non compatible load to translate"
+
+let trans_sti (sti : store_rs_a) : store_rs1_rs2_a =
+ match sti with
+ | Pstrw | Pstrw_a -> Pstpw
+ | Pstrx | Pstrx_a -> Pstpx
+ | _ -> failwith "trans_sti: Found a non compatible store to translate"
+
+let is_compat_load (ldi : load_rd_a) =
+ match ldi with Pldrw | Pldrw_a | Pldrx | Pldrx_a -> true | _ -> false
+
+let are_compat_load (ldi1 : load_rd_a) (ldi2 : load_rd_a) =
+ match ldi1 with
+ | Pldrw | Pldrw_a -> ( match ldi2 with Pldrw | Pldrw_a -> true | _ -> false)
+ | Pldrx | Pldrx_a -> ( match ldi2 with Pldrx | Pldrx_a -> true | _ -> false)
+ | _ -> false
+
+let is_compat_store (sti : store_rs_a) =
+ match sti with Pstrw | Pstrw_a | Pstrx | Pstrx_a -> true | _ -> false
+
+let are_compat_store (sti1 : store_rs_a) (sti2 : store_rs_a) =
+ match sti1 with
+ | Pstrw | Pstrw_a -> ( match sti2 with Pstrw | Pstrw_a -> true | _ -> false)
+ | Pstrx | Pstrx_a -> ( match sti2 with Pstrx | Pstrx_a -> true | _ -> false)
+ | _ -> false
+
+let get_load_string (ldi : load_rd_a) =
+ match ldi with
+ | Pldrw | Pldrw_a -> "ldrw"
+ | Pldrx | Pldrx_a -> "ldrx"
+ | _ -> failwith "get_load_string: Found a non compatible load to translate"
+
+let get_store_string (sti : store_rs_a) =
+ match sti with
+ | Pstrw | Pstrw_a -> "strw"
+ | Pstrx | Pstrx_a -> "strx"
+ | _ -> failwith "get_store_string: Found a non compatible store to translate"
+
+let is_valid_ldr rd1 rd2 b1 b2 n1 n2 stype =
+ match stype with
+ | "ldrw" -> is_valid_ldrw rd1 rd2 b1 b2 n1 n2
+ | _ -> is_valid_ldrx rd1 rd2 b1 b2 n1 n2
+
+let is_valid_str b1 b2 n1 n2 stype =
+ match stype with
+ | "strw" -> is_valid_strw b1 b2 n1 n2
+ | _ -> is_valid_strx b1 b2 n1 n2
+
(* Main peephole function in backward style *)
let pair_rep_inv insta =
(* Each list below is a symbolic mem representation
@@ -305,65 +371,77 @@ let pair_rep_inv insta =
let h0 = insta.(i) in
let h1 = insta.(i - 1) in
(* Here we need to update every symbolic memory according to the matched inst *)
- update_pot_rep_basic h0 insta pot_ldrw_rep "ldrw" true;
- update_pot_rep_basic h0 insta pot_ldrx_rep "ldrx" true;
- update_pot_rep_basic h0 insta pot_strw_rep "strw" true;
- update_pot_rep_basic h0 insta pot_strx_rep "strx" true;
+ update_pot_rep_basic h0 insta pot_ldrw_rep "ldr" true;
+ update_pot_rep_basic h0 insta pot_ldrx_rep "ldr" true;
+ update_pot_rep_basic h0 insta pot_strw_rep "str" true;
+ update_pot_rep_basic h0 insta pot_strx_rep "str" true;
match (h0, h1) with
- (* Non-consecutive ldrw *)
- | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
- (* Search a previous compatible load *)
- match search_compat_rep_inv rd1 b1 n1 insta !pot_ldrw_rep "ldrw" with
- (* If we can't find a candidate, add the current load as a potential future one *)
- | None -> pot_ldrw_rep := i :: !pot_ldrw_rep
- (* Else, perform the peephole *)
- | Some (rep, r, b, n) ->
- (* The two lines below are used to filter the elected candidate *)
- let filt x = x != rep in
- pot_ldrw_rep := List.filter filt !pot_ldrw_rep;
- insta.(rep) <- Pnop;
- if min_is_rev n n1 then (
- if debug then eprintf "LDP_W_BACK_SPACED_PEEP_IMM_INC\n";
- insta.(i) <- PLoad (Pldp (Pldpw, r, rd1, ADimm (b, n))))
- else (
- if debug then eprintf "LDP_W_BACK_SPACED_PEEP_IMM_DEC\n";
- insta.(i) <- PLoad (Pldp (Pldpw, rd1, r, ADimm (b, n1)))))
- (* Non-consecutive ldrx *)
- | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
- match search_compat_rep_inv rd1 b1 n1 insta !pot_ldrx_rep "ldrx" with
- | None -> pot_ldrx_rep := i :: !pot_ldrx_rep
- | Some (rep, r, b, n) ->
- let filt x = x != rep in
- pot_ldrx_rep := List.filter filt !pot_ldrx_rep;
- insta.(rep) <- Pnop;
- if min_is_rev n n1 then (
- if debug then eprintf "LDP_X_BACK_SPACED_PEEP_IMM_INC\n";
- insta.(i) <- PLoad (Pldp (Pldpx, r, rd1, ADimm (b, n))))
- else (
- if debug then eprintf "LDP_X_BACK_SPACED_PEEP_IMM_DEC\n";
- insta.(i) <- PLoad (Pldp (Pldpx, rd1, r, ADimm (b, n1)))))
- (* Non-consecutive stpw *)
- | PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), _ -> (
- match search_compat_rep_inv rs1 b1 n1 insta !pot_strw_rep "strw" with
- | None -> pot_strw_rep := i :: !pot_strw_rep
- | Some (rep, r, b, n) ->
- let filt x = x != rep in
- pot_strw_rep := List.filter filt !pot_strw_rep;
- insta.(rep) <- Pnop;
- if debug then eprintf "STP_W_BACK_SPACED_PEEP_IMM_INC\n";
- insta.(i) <- PStore (Pstp (Pstpw, r, rs1, ADimm (b, n))))
- (* Non-consecutive stpx *)
- | PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), _ -> (
- match search_compat_rep_inv rs1 b1 n1 insta !pot_strx_rep "strx" with
- | None -> pot_strx_rep := i :: !pot_strx_rep
- | Some (rep, r, b, n) ->
- let filt x = x != rep in
- pot_strx_rep := List.filter filt !pot_strx_rep;
- insta.(rep) <- Pnop;
- if debug then eprintf "STP_X_BACK_SPACED_PEEP_IMM_INC\n";
- insta.(i) <- PStore (Pstp (Pstpx, r, rs1, ADimm (b, n))))
- (* Any other inst *)
- | _, _ -> ()
+ (* Non-consecutive ldr *)
+ | PLoad (PLd_rd_a (ldi, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ if is_compat_load ldi then
+ let pot_rep =
+ match ldi with Pldrw | Pldrw_a -> pot_ldrw_rep | _ -> pot_ldrx_rep
+ in
+ (* Search a previous compatible load *)
+ match
+ search_compat_rep_inv rd1 b1 n1 insta !pot_rep (get_load_string ldi)
+ with
+ (* If we can't find a candidate, add the current load as a potential future one *)
+ | None -> pot_rep := i :: !pot_rep
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if min_is_rev n n1 then (
+ if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_INC\n";
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
+ else (
+ if debug then eprintf "LDP_BACK_SPACED_PEEP_IMM_DEC\n";
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))
+ )
+ (* Non-consecutive str *)
+ | PStore (PSt_rs_a (sti, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ if is_compat_store sti then
+ let pot_rep =
+ match sti with Pstrw | Pstrw_a -> pot_strw_rep | _ -> pot_strx_rep
+ in
+ (* Search a previous compatible store *)
+ match
+ search_compat_rep_inv rd1 b1 n1 insta !pot_rep
+ (get_store_string sti)
+ with
+ (* If we can't find a candidate, clean and add the current store as a potential future one *)
+ | None ->
+ pot_strw_rep := [];
+ pot_strx_rep := [];
+ pot_rep := i :: !pot_rep
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if debug then eprintf "STP_BACK_SPACED_PEEP_IMM_INC\n";
+ insta.(i) <-
+ PStore
+ (Pstp
+ (trans_sti sti, rd1, r, chunk_store sti, c, ADimm (b, n1)))
+ (* Any other inst *))
+ | i, _ -> (
+ (* Clear list of candidates if there is a non supported store *)
+ match i with
+ | PStore _ ->
+ pot_strw_rep := [];
+ pot_strx_rep := []
+ | _ -> ())
done
(* Main peephole function in forward style *)
@@ -379,117 +457,125 @@ let pair_rep insta =
let h0 = insta.(i) in
let h1 = insta.(i + 1) in
(* Here we need to update every symbolic memory according to the matched inst *)
- update_pot_rep_basic h0 insta pot_ldrw_rep "ldrw" false;
- update_pot_rep_basic h0 insta pot_ldrx_rep "ldrx" false;
- update_pot_rep_basic h0 insta pot_strw_rep "strw" false;
- update_pot_rep_basic h0 insta pot_strx_rep "strx" false;
+ update_pot_rep_basic h0 insta pot_ldrw_rep "ldr" false;
+ update_pot_rep_basic h0 insta pot_ldrx_rep "ldr" false;
+ update_pot_rep_basic h0 insta pot_strw_rep "str" false;
+ update_pot_rep_basic h0 insta pot_strx_rep "str" false;
match (h0, h1) with
- (* Consecutive ldrw *)
- | ( PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))),
- PLoad (PLd_rd_a (Pldrw, IR (RR1 rd2), ADimm (b2, n2))) ) ->
- (* When both load the same dest, and with no reuse of ld1 in ld2. *)
- if ireg_eq rd1 rd2 && not (iregsp_eq b2 (RR1 rd1)) then (
- if debug then eprintf "LDP_WOPT\n";
- insta.(i) <- Pnop
- (* When two consec mem addr are loading two different dest. *))
- else if is_valid_ldrw rd1 rd2 b1 b2 n1 n2 then (
- if min_is_rev n1 n2 then (
- if debug then eprintf "LDP_W_CONSEC_PEEP_IMM_INC\n";
- insta.(i) <- PLoad (Pldp (Pldpw, rd1, rd2, ADimm (b1, n1))))
- else (
- if debug then eprintf "LDP_W_CONSEC_PEEP_IMM_DEC\n";
- insta.(i) <- PLoad (Pldp (Pldpw, rd2, rd1, ADimm (b1, n2))));
- insta.(i + 1) <- Pnop)
- (* Consecutive ldrx *)
- | ( PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))),
- PLoad (PLd_rd_a (Pldrx, IR (RR1 rd2), ADimm (b2, n2))) ) ->
- if ireg_eq rd1 rd2 && not (iregsp_eq b2 (RR1 rd1)) then (
- if debug then eprintf "LDP_XOPT\n";
- insta.(i) <- Pnop)
- else if is_valid_ldrx rd1 rd2 b1 b2 n1 n2 then (
- if min_is_rev n1 n2 then (
- if debug then eprintf "LDP_X_CONSEC_PEEP_IMM_INC\n";
- insta.(i) <- PLoad (Pldp (Pldpx, rd1, rd2, ADimm (b1, n1))))
- else (
- if debug then eprintf "LDP_X_CONSEC_PEEP_IMM_DEC\n";
- insta.(i) <- PLoad (Pldp (Pldpx, rd2, rd1, ADimm (b1, n2))));
- insta.(i + 1) <- Pnop)
- (* Non-consecutive ldrw *)
- | PLoad (PLd_rd_a (Pldrw, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
- (* Search a previous compatible load *)
- match search_compat_rep rd1 b1 n1 insta !pot_ldrw_rep "ldrw" with
- (* If we can't find a candidate, add the current load as a potential future one *)
- | None -> pot_ldrw_rep := i :: !pot_ldrw_rep
- (* Else, perform the peephole *)
- | Some (rep, r, b, n) ->
- (* The two lines below are used to filter the elected candidate *)
- let filt x = x != rep in
- pot_ldrw_rep := List.filter filt !pot_ldrw_rep;
- insta.(rep) <- Pnop;
- if min_is_rev n n1 then (
- if debug then eprintf "LDP_W_FORW_SPACED_PEEP_IMM_INC\n";
- insta.(i) <- PLoad (Pldp (Pldpw, r, rd1, ADimm (b, n))))
- else (
- if debug then eprintf "LDP_W_FORW_SPACED_PEEP_IMM_DEC\n";
- insta.(i) <- PLoad (Pldp (Pldpw, rd1, r, ADimm (b, n1)))))
- (* Non-consecutive ldrx *)
- | PLoad (PLd_rd_a (Pldrx, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
- match search_compat_rep rd1 b1 n1 insta !pot_ldrx_rep "ldrx" with
- | None -> pot_ldrx_rep := i :: !pot_ldrx_rep
- | Some (rep, r, b, n) ->
- let filt x = x != rep in
- pot_ldrx_rep := List.filter filt !pot_ldrx_rep;
- insta.(rep) <- Pnop;
- if min_is_rev n n1 then (
- if debug then eprintf "LDP_X_FORW_SPACED_PEEP_IMM_INC\n";
- insta.(i) <- PLoad (Pldp (Pldpx, r, rd1, ADimm (b, n))))
+ (* Consecutive ldr *)
+ | ( PLoad (PLd_rd_a (ldi1, IR (RR1 rd1), ADimm (b1, n1))),
+ PLoad (PLd_rd_a (ldi2, IR (RR1 rd2), ADimm (b2, n2))) ) ->
+ if are_compat_load ldi1 ldi2 then
+ if is_valid_ldr rd1 rd2 b1 b2 n1 n2 (get_load_string ldi1) then (
+ if min_is_rev n1 n2 then (
+ if debug then eprintf "LDP_CONSEC_PEEP_IMM_INC\n";
+ insta.(i) <-
+ PLoad
+ (Pldp
+ ( trans_ldi ldi1,
+ rd1,
+ rd2,
+ chunk_load ldi1,
+ chunk_load ldi2,
+ ADimm (b1, n1) )))
else (
- if debug then eprintf "LDP_X_FORW_SPACED_PEEP_IMM_DEC\n";
- insta.(i) <- PLoad (Pldp (Pldpx, rd1, r, ADimm (b, n1)))))
- (* Consecutive strw *)
- | ( PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))),
- PStore (PSt_rs_a (Pstrw, IR (RR1 rs2), ADimm (b2, n2))) ) ->
- (* When both store at the same addr, same ofs. *)
- (*if (iregsp_eq b1 b2) && (z2 =? z1) then
- Pnop :: (pair_rep insta t0)
- (* When two consec mem addr are targeted by two store. *)
- else*)
- if is_valid_strw b1 b2 n1 n2 then (
- if debug then eprintf "STP_W_CONSEC_PEEP_IMM_INC\n";
- insta.(i) <- PStore (Pstp (Pstpw, rs1, rs2, ADimm (b1, n1)));
- insta.(i + 1) <- Pnop (* When nothing can be optimized. *))
- (* Consecutive strx *)
- | ( PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))),
- PStore (PSt_rs_a (Pstrx, IR (RR1 rs2), ADimm (b2, n2))) ) ->
- (*if (iregsp_eq b1 b2) && (z2 =? z1) then
- Pnop :: (pair_rep insta t0)
- else*)
- if is_valid_strx b1 b2 n1 n2 then (
- if debug then eprintf "STP_X_CONSEC_PEEP_IMM_INC\n";
- insta.(i) <- PStore (Pstp (Pstpx, rs1, rs2, ADimm (b1, n1)));
- insta.(i + 1) <- Pnop)
- (* Non-consecutive stpw *)
- | PStore (PSt_rs_a (Pstrw, IR (RR1 rs1), ADimm (b1, n1))), _ -> (
- match search_compat_rep rs1 b1 n1 insta !pot_strw_rep "strw" with
- | None -> pot_strw_rep := i :: !pot_strw_rep
- | Some (rep, r, b, n) ->
- let filt x = x != rep in
- pot_strw_rep := List.filter filt !pot_strw_rep;
- insta.(rep) <- Pnop;
- if debug then eprintf "STP_W_FORW_SPACED_PEEP_IMM_INC\n";
- insta.(i) <- PStore (Pstp (Pstpw, r, rs1, ADimm (b, n))))
- (* Non-consecutive stpx *)
- | PStore (PSt_rs_a (Pstrx, IR (RR1 rs1), ADimm (b1, n1))), _ -> (
- match search_compat_rep rs1 b1 n1 insta !pot_strx_rep "strx" with
- | None -> pot_strx_rep := i :: !pot_strx_rep
- | Some (rep, r, b, n) ->
- let filt x = x != rep in
- pot_strx_rep := List.filter filt !pot_strx_rep;
- insta.(rep) <- Pnop;
- if debug then eprintf "STP_X_FORW_SPACED_PEEP_IMM_INC\n";
- insta.(i) <- PStore (Pstp (Pstpx, r, rs1, ADimm (b, n))))
+ if debug then eprintf "LDP_CONSEC_PEEP_IMM_DEC\n";
+ insta.(i) <-
+ PLoad
+ (Pldp
+ ( trans_ldi ldi1,
+ rd2,
+ rd1,
+ chunk_load ldi2,
+ chunk_load ldi1,
+ ADimm (b1, n2) )));
+ insta.(i + 1) <- Pnop)
+ (* Non-consecutive ldr *)
+ | PLoad (PLd_rd_a (ldi, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ if is_compat_load ldi then
+ let pot_rep =
+ match ldi with Pldrw | Pldrw_a -> pot_ldrw_rep | _ -> pot_ldrx_rep
+ in
+ (* Search a previous compatible load *)
+ match
+ search_compat_rep rd1 b1 n1 insta !pot_rep (get_load_string ldi)
+ with
+ (* If we can't find a candidate, add the current load as a potential future one *)
+ | None -> pot_rep := i :: !pot_rep
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if min_is_rev n n1 then (
+ if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_INC\n";
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, r, rd1, c, chunk_load ldi, ADimm (b, n))))
+ else (
+ if debug then eprintf "LDP_FORW_SPACED_PEEP_IMM_DEC\n";
+ insta.(i) <-
+ PLoad
+ (Pldp
+ (trans_ldi ldi, rd1, r, chunk_load ldi, c, ADimm (b, n1))))
+ )
+ (* Consecutive str *)
+ | ( PStore (PSt_rs_a (sti1, IR (RR1 rd1), ADimm (b1, n1))),
+ PStore (PSt_rs_a (sti2, IR (RR1 rd2), ADimm (b2, n2))) ) ->
+ (* Regardless of whether we can perform the peephole or not,
+ * we have to clean the potential candidates for stp now as we are
+ * looking at two new store instructions. *)
+ pot_strw_rep := [];
+ pot_strx_rep := [];
+ if are_compat_store sti1 sti2 then
+ if is_valid_str b1 b2 n1 n2 (get_store_string sti1) then (
+ if debug then eprintf "STP_CONSEC_PEEP_IMM_INC\n";
+ insta.(i) <-
+ PStore
+ (Pstp
+ ( trans_sti sti1,
+ rd1,
+ rd2,
+ chunk_store sti1,
+ chunk_store sti2,
+ ADimm (b1, n1) ));
+ insta.(i + 1) <- Pnop)
+ (* Non-consecutive str *)
+ | PStore (PSt_rs_a (sti, IR (RR1 rd1), ADimm (b1, n1))), _ -> (
+ if is_compat_store sti then
+ let pot_rep =
+ match sti with Pstrw | Pstrw_a -> pot_strw_rep | _ -> pot_strx_rep
+ in
+ (* Search a previous compatible store *)
+ match
+ search_compat_rep rd1 b1 n1 insta !pot_rep (get_store_string sti)
+ with
+ (* If we can't find a candidate, clean and add the current store as a potential future one *)
+ | None ->
+ pot_strw_rep := [];
+ pot_strx_rep := [];
+ pot_rep := i :: !pot_rep
+ (* Else, perform the peephole *)
+ | Some (rep, c, r, b, n) ->
+ (* The two lines below are used to filter the elected candidate *)
+ let filt x = x != rep in
+ pot_rep := List.filter filt !pot_rep;
+ insta.(rep) <- Pnop;
+ if debug then eprintf "STP_FORW_SPACED_PEEP_IMM_INC\n";
+ insta.(i) <-
+ PStore
+ (Pstp (trans_sti sti, r, rd1, c, chunk_store sti, ADimm (b, n)))
+ )
(* Any other inst *)
- | _, _ -> ()
+ | i, _ -> (
+ (* Clear list of candidates if there is a non supported store *)
+ match i with
+ | PStore _ ->
+ pot_strw_rep := [];
+ pot_strx_rep := []
+ | _ -> ())
done
(* Calling peephole if flag is set *)
diff --git a/aarch64/PostpassSchedulingOracle.ml b/aarch64/PostpassSchedulingOracle.ml
index e233f77d..d7fab1de 100644
--- a/aarch64/PostpassSchedulingOracle.ml
+++ b/aarch64/PostpassSchedulingOracle.ml
@@ -18,6 +18,7 @@ open OpWeightsAsm
open InstructionScheduler
let debug = false
+
let stats = false
(**
@@ -73,47 +74,22 @@ let arith_p_rec i i' rd =
}
let arith_pp_rec i rd rs =
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = [ rs ];
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = [ rs ]; is_control = false }
let arith_ppp_rec i rd r1 r2 =
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = [ r1; r2 ];
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false }
let arith_rr0r_rec i rd r1 r2 =
let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = rlocs;
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
let arith_rr0_rec i rd r1 =
let rlocs = if is_XZR r1 then [] else [ r1 ] in
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = rlocs;
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
let arith_arrrr0_rec i rd r1 r2 r3 =
let rlocs = if is_XZR r3 then [ r1; r2 ] else [ r1; r2; r3 ] in
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = rlocs;
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
let arith_comparison_pp_rec i r1 r2 =
{
@@ -125,20 +101,10 @@ let arith_comparison_pp_rec i r1 r2 =
let arith_comparison_r0r_rec i r1 r2 =
let rlocs = if is_XZR r1 then [ r2 ] else [ r1; r2 ] in
- {
- inst = i;
- write_locs = flags_wlocs;
- read_locs = rlocs;
- is_control = false;
- }
+ { inst = i; write_locs = flags_wlocs; read_locs = rlocs; is_control = false }
let arith_comparison_p_rec i r1 =
- {
- inst = i;
- write_locs = flags_wlocs;
- read_locs = [ r1 ];
- is_control = false;
- }
+ { inst = i; write_locs = flags_wlocs; read_locs = [ r1 ]; is_control = false }
let get_eval_addressing_rlocs a =
match a with
@@ -168,8 +134,11 @@ let load_rd1_rd2_a_rec ld rd1 rd2 a =
let load_rec ldi =
match ldi with
- | PLd_rd_a (ld, rd, a) -> load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a
- | Pldp (ld, rd1, rd2, a) -> load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_ireg rd1) (reg_of_ireg rd2) a
+ | PLd_rd_a (ld, rd, a) ->
+ load_rd_a_rec (PBasic (PLoad ldi)) (reg_of_dreg rd) a
+ | Pldp (ld, rd1, rd2, _, _, a) ->
+ load_rd1_rd2_a_rec (PBasic (PLoad ldi)) (reg_of_ireg rd1)
+ (reg_of_ireg rd2) a
let store_rs_a_rec st rs a =
{
@@ -189,40 +158,23 @@ let store_rs1_rs2_a_rec st rs1 rs2 a =
let store_rec sti =
match sti with
- | PSt_rs_a (st, rs, a) -> store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a
- | Pstp (st, rs1, rs2, a) -> store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_ireg rs1) (reg_of_ireg rs2) a
+ | PSt_rs_a (st, rs, a) ->
+ store_rs_a_rec (PBasic (PStore sti)) (reg_of_dreg rs) a
+ | Pstp (st, rs1, rs2, _, _, a) ->
+ store_rs1_rs2_a_rec (PBasic (PStore sti)) (reg_of_ireg rs1)
+ (reg_of_ireg rs2) a
let loadsymbol_rec i rd id =
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = [ Mem ];
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = [ Mem ]; is_control = false }
let cvtsw2x_rec i rd r1 =
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = [ r1 ];
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false }
let cvtuw2x_rec i rd r1 =
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = [ r1 ];
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = [ r1 ]; is_control = false }
let cvtx2w_rec i rd =
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = [ rd ];
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = [ rd ]; is_control = false }
let get_testcond_rlocs c =
match c with
@@ -257,20 +209,10 @@ let csel_rec i rd r1 r2 c =
let fmovi_rec i fsz rd r1 =
let rlocs = if is_XZR r1 then [] else [ r1 ] in
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = rlocs;
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = rlocs; is_control = false }
let fnmul_rec i fsz rd r1 r2 =
- {
- inst = i;
- write_locs = [ rd ];
- read_locs = [ r1; r2 ];
- is_control = false;
- }
+ { inst = i; write_locs = [ rd ]; read_locs = [ r1; r2 ]; is_control = false }
let allocframe_rec i sz linkofs =
{
@@ -289,37 +231,41 @@ let freeframe_rec i sz linkofs =
}
let nop_rec i =
- {
- inst = i;
- write_locs = [ ];
- read_locs = [ ];
- is_control = false;
- }
+ { inst = i; write_locs = []; read_locs = []; is_control = false }
let arith_rec i =
match i with
| PArithP (i', rd) -> arith_p_rec (PBasic (PArith i)) i' (reg_of_dreg rd)
- | PArithPP (i', rd, rs) -> arith_pp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg rs)
+ | PArithPP (i', rd, rs) ->
+ arith_pp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg rs)
| PArithPPP (i', rd, r1, r2) ->
- arith_ppp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) (reg_of_dreg r2)
+ arith_ppp_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1)
+ (reg_of_dreg r2)
| PArithRR0R (i', rd, r1, r2) ->
- arith_rr0r_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1) (reg_of_ireg r2)
+ arith_rr0r_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1)
+ (reg_of_ireg r2)
| PArithRR0 (i', rd, r1) ->
arith_rr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg0 r1)
| PArithARRRR0 (i', rd, r1, r2, r3) ->
- arith_arrrr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg r1) (reg_of_ireg r2)
- (reg_of_ireg0 r3)
+ arith_arrrr0_rec (PBasic (PArith i)) (reg_of_ireg rd) (reg_of_ireg r1)
+ (reg_of_ireg r2) (reg_of_ireg0 r3)
| PArithComparisonPP (i', r1, r2) ->
- arith_comparison_pp_rec (PBasic (PArith i)) (reg_of_dreg r1) (reg_of_dreg r2)
+ arith_comparison_pp_rec (PBasic (PArith i)) (reg_of_dreg r1)
+ (reg_of_dreg r2)
| PArithComparisonR0R (i', r1, r2) ->
- arith_comparison_r0r_rec (PBasic (PArith i)) (reg_of_ireg0 r1) (reg_of_ireg r2)
- | PArithComparisonP (i', r1) -> arith_comparison_p_rec (PBasic (PArith i)) (reg_of_dreg r1)
+ arith_comparison_r0r_rec (PBasic (PArith i)) (reg_of_ireg0 r1)
+ (reg_of_ireg r2)
+ | PArithComparisonP (i', r1) ->
+ arith_comparison_p_rec (PBasic (PArith i)) (reg_of_dreg r1)
| Pcset (rd, c) -> cset_rec (PBasic (PArith i)) (reg_of_ireg rd) c
- | Pfmovi (fsz, rd, r1) -> fmovi_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_ireg0 r1)
+ | Pfmovi (fsz, rd, r1) ->
+ fmovi_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_ireg0 r1)
| Pcsel (rd, r1, r2, c) ->
- csel_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1) (reg_of_dreg r2) c
+ csel_rec (PBasic (PArith i)) (reg_of_dreg rd) (reg_of_dreg r1)
+ (reg_of_dreg r2) c
| Pfnmul (fsz, rd, r1, r2) ->
- fnmul_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_freg r1) (reg_of_freg r2)
+ fnmul_rec (PBasic (PArith i)) fsz (reg_of_freg rd) (reg_of_freg r1)
+ (reg_of_freg r2)
let basic_rec i =
match i with
@@ -329,101 +275,98 @@ let basic_rec i =
| Pallocframe (sz, linkofs) -> allocframe_rec (PBasic i) sz linkofs
| Pfreeframe (sz, linkofs) -> freeframe_rec (PBasic i) sz linkofs
| Ploadsymbol (rd, id) -> loadsymbol_rec (PBasic i) (reg_of_ireg rd) id
- | Pcvtsw2x (rd, r1) -> cvtsw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1)
- | Pcvtuw2x (rd, r1) -> cvtuw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1)
+ | Pcvtsw2x (rd, r1) ->
+ cvtsw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1)
+ | Pcvtuw2x (rd, r1) ->
+ cvtuw2x_rec (PBasic i) (reg_of_ireg rd) (reg_of_ireg r1)
| Pcvtx2w rd -> cvtx2w_rec (PBasic i) (reg_of_ireg rd)
| Pnop -> nop_rec (PBasic i)
let builtin_rec i ef args res =
- {
- inst = i;
- write_locs = [ Mem ];
- read_locs = [ Mem ];
- is_control = true;
- }
+ { inst = i; write_locs = [ Mem ]; read_locs = [ Mem ]; is_control = true }
let ctl_flow_rec i =
match i with
| Pb lbl ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_pc ];
read_locs = [ reg_of_pc ];
is_control = true;
}
| Pbc (c, lbl) ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_pc ];
read_locs = [ reg_of_pc ];
is_control = true;
}
| Pbl (id, sg) ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ];
read_locs = [ reg_of_pc ];
is_control = true;
}
| Pbs (id, sg) ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_pc ];
read_locs = [];
is_control = true;
}
| Pblr (r, sg) ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_ireg Asm.X30; reg_of_pc ];
read_locs = [ reg_of_ireg r ];
is_control = true;
}
| Pbr (r, sg) ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_pc ];
read_locs = [ reg_of_ireg r ];
is_control = true;
}
| Pret r ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_pc ];
read_locs = [ reg_of_ireg r ];
is_control = true;
}
| Pcbnz (sz, r, lbl) ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_pc ];
read_locs = [ reg_of_ireg r; reg_of_pc ];
is_control = true;
}
| Pcbz (sz, r, lbl) ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_pc ];
read_locs = [ reg_of_ireg r; reg_of_pc ];
is_control = true;
}
| Ptbnz (sz, r, n, lbl) ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_pc ];
read_locs = [ reg_of_ireg r; reg_of_pc ];
is_control = true;
}
| Ptbz (sz, r, n, lbl) ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_pc ];
read_locs = [ reg_of_ireg r; reg_of_pc ];
is_control = true;
}
| Pbtbl (r1, tbl) ->
{
- inst = (PControl (PCtlFlow i));
+ inst = PControl (PCtlFlow i);
write_locs = [ reg_of_ireg Asm.X16; reg_of_ireg Asm.X17; reg_of_pc ];
read_locs = [ reg_of_ireg r1; reg_of_pc ];
is_control = true;
@@ -614,7 +557,7 @@ let find_max l =
| e :: l -> (
match f l with
| None -> Some e
- | Some m -> if e > m then Some e else Some m )
+ | Some m -> if e > m then Some e else Some m)
in
match f l with None -> raise Not_found | Some m -> m
@@ -625,9 +568,9 @@ let minpack_list (l : int list) =
let rec f i = function
| [] -> ()
| t :: l ->
- ( match TimeHash.find_opt timehash t with
+ (match TimeHash.find_opt timehash t with
| None -> TimeHash.add timehash t [ i ]
- | Some bund -> TimeHash.replace timehash t (bund @ [ i ]) );
+ | Some bund -> TimeHash.replace timehash t (bund @ [ i ]));
f (i + 1) l
in
f 0 l;
@@ -684,9 +627,11 @@ let bblock_schedule bb =
let identity_mode = not !Clflags.option_fpostpass in
if debug && not identity_mode then (
Printf.eprintf "###############################\n";
- Printf.eprintf "SCHEDULING\n" );
+ Printf.eprintf "SCHEDULING\n");
if stats then (
- let oc = open_out_gen [Open_append; Open_creat] 0o666 "oracle_stats.csv" in
+ let oc =
+ open_out_gen [ Open_append; Open_creat ] 0o666 "oracle_stats.csv"
+ in
Printf.fprintf oc "%d\n" (Camlcoq.Z.to_int (size bb));
close_out oc);
if identity_mode then pack_result bb else smart_schedule bb
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index c336fb1e..40e4a182 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -279,9 +279,9 @@ module Target (*: TARGET*) =
(* the upper 32 bits of Xrd are set to 0, performing zero-extension *)
| Pldrsw(rd, a) ->
fprintf oc " ldrsw %a, %a\n" xreg rd addressing a
- | Pldpw(rd1, rd2, a) ->
+ | Pldpw(rd1, rd2, _, _, a) ->
fprintf oc " ldp %a, %a, %a\n" wreg rd1 wreg rd2 addressing a
- | Pldpx(rd1, rd2, a) ->
+ | Pldpx(rd1, rd2, _, _, a) ->
fprintf oc " ldp %a, %a, %a\n" xreg rd1 xreg rd2 addressing a
| Pstrw(rs, a) | Pstrw_a(rs, a) ->
fprintf oc " str %a, %a\n" wreg rs addressing a
@@ -291,9 +291,9 @@ module Target (*: TARGET*) =
fprintf oc " strb %a, %a\n" wreg rs addressing a
| Pstrh(rs, a) ->
fprintf oc " strh %a, %a\n" wreg rs addressing a
- | Pstpw(rs1, rs2, a) ->
+ | Pstpw(rs1, rs2, _, _, a) ->
fprintf oc " stp %a, %a, %a\n" wreg rs1 wreg rs2 addressing a
- | Pstpx(rs1, rs2, a) ->
+ | Pstpx(rs1, rs2, _, _, a) ->
fprintf oc " stp %a, %a, %a\n" xreg rs1 xreg rs2 addressing a
(* Integer arithmetic, immediate *)
| Paddimm(sz, rd, r1, n) ->
@@ -388,6 +388,8 @@ module Target (*: TARGET*) =
fprintf oc " rev %a, %a\n" ireg (sz, rd) ireg (sz, r1)
| Prev16(sz, rd, r1) ->
fprintf oc " rev16 %a, %a\n" ireg (sz, rd) ireg (sz, r1)
+ | Prbit(sz, rd, r1) ->
+ fprintf oc " rbit %a, %a\n" ireg (sz, rd) ireg (sz, r1)
(* Conditional data processing *)
| Pcsel(rd, r1, r2, c) ->
fprintf oc " csel %a, %a, %a, %s\n" xreg rd xreg r1 xreg r2 (condition_name c)
@@ -478,6 +480,10 @@ module Target (*: TARGET*) =
fprintf oc " fnmadd %a, %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2) freg (sz, r3)
| Pfnmsub(sz, rd, r1, r2, r3) ->
fprintf oc " fnmsub %a, %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2) freg (sz, r3)
+ | Pfmax (sz, rd, r1, r2) ->
+ fprintf oc " fmax %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
+ | Pfmin (sz, rd, r1, r2) ->
+ fprintf oc " fmin %a, %a, %a\n" freg (sz, rd) freg (sz, r1) freg (sz, r2)
(* Floating-point comparison *)
| Pfcmp(sz, r1, r2) ->
fprintf oc " fcmp %a, %a\n" freg (sz, r1) freg (sz, r2)
diff --git a/aarch64/extractionMachdep.v b/aarch64/extractionMachdep.v
index 6815bc59..69edeb55 100644
--- a/aarch64/extractionMachdep.v
+++ b/aarch64/extractionMachdep.v
@@ -6,6 +6,9 @@
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
+(* under the terms of the GNU General Public License as published by *)
+(* the Free Software Foundation, either version 2 of the License, or *)
+(* (at your option) any later version. This file is also distributed *)
(* under the terms of the INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)