aboutsummaryrefslogtreecommitdiffstats
path: root/aarch64/Asmexpand.ml
diff options
context:
space:
mode:
Diffstat (limited to 'aarch64/Asmexpand.ml')
-rw-r--r--aarch64/Asmexpand.ml126
1 files changed, 64 insertions, 62 deletions
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index 573e8b92..eb9ec600 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -34,13 +34,13 @@ let _m1 = Z.of_sint (-1)
(* Emit instruction sequences that set or offset a register by a constant. *)
let expand_loadimm32 (dst: ireg) n =
- List.iter emit (Asmgen.loadimm32 dst n [])
+ List.iter emit (Asmgen.Asmgen_expand.loadimm32 dst n [])
let expand_addimm64 (dst: iregsp) (src: iregsp) n =
- List.iter emit (Asmgen.addimm64 dst src n [])
+ List.iter emit (Asmgen.Asmgen_expand.addimm64 dst src n [])
let expand_storeptr (src: ireg) (base: iregsp) ofs =
- List.iter emit (Asmgen.storeptr src base ofs [])
+ List.iter emit (Asmgen.Asmgen_expand.storeptr src base ofs [])
(* Handling of varargs *)
@@ -84,8 +84,8 @@ let save_parameter_registers ir fr =
while !i < 8 do
let pos = 8*16 + !i*8 in
if !i land 1 = 0 then begin
- emit (Pstp(int_param_regs.(!i), int_param_regs.(!i + 1),
- ADimm(XSP, Z.of_uint pos)));
+ emit (Pstpx(int_param_regs.(!i), int_param_regs.(!i + 1),
+ Mint64, Mint64, ADimm(XSP, Z.of_uint pos)));
i := !i + 2
end else begin
emit (Pstrx(int_param_regs.(!i), ADimm(XSP, Z.of_uint pos)));
@@ -160,9 +160,9 @@ let expand_builtin_va_start r =
let expand_annot_val kind txt targ args res =
emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none));
match args, res with
- | [BA(IR src)], BR(IR dst) ->
- if dst <> src then emit (Pmov (RR1 dst, RR1 src))
- | [BA(FR src)], BR(FR dst) ->
+ | [BA(DR(IR src))], BR(DR(IR dst)) ->
+ if dst <> src then emit (Pmov (dst, src))
+ | [BA(DR(FR src))], BR(DR(FR dst)) ->
if dst <> src then emit (Pfmov (dst, src))
| _, _ ->
raise (Error "ill-formed __builtin_annot_val")
@@ -180,8 +180,8 @@ let offset_in_range ofs =
let memcpy_small_arg sz arg tmp =
match arg with
- | BA (IR r) ->
- (RR1 r, _0)
+ | BA (DR(IR r)) ->
+ (r, _0)
| BA_addrstack ofs ->
if offset_in_range ofs
&& offset_in_range (Ptrofs.add ofs (Ptrofs.repr (Z.of_uint sz)))
@@ -192,14 +192,14 @@ let memcpy_small_arg sz arg tmp =
assert false
let expand_builtin_memcpy_small sz al src dst =
- let tsrc = if dst <> BA (IR X17) then X17 else X29 in
- let tdst = if src <> BA (IR X29) then X29 else X17 in
+ let tsrc = if dst <> BA (DR(IR(RR1 X17))) then X17 else X29 in
+ let tdst = if src <> BA (DR(IR(RR1 X29))) then X29 else X17 in
let (rsrc, osrc) = memcpy_small_arg sz src tsrc in
let (rdst, odst) = memcpy_small_arg sz dst tdst in
let rec copy osrc odst sz =
if sz >= 16 then begin
- emit (Pldp(X16, X30, ADimm(rsrc, osrc)));
- emit (Pstp(X16, X30, ADimm(rdst, odst)));
+ emit (Pldpx(X16, X30, Mint64, Mint64, ADimm(rsrc, osrc)));
+ emit (Pstpx(X16, X30, Mint64, Mint64, ADimm(rdst, odst)));
copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16)
end
else if sz >= 8 then begin
@@ -226,7 +226,7 @@ let expand_builtin_memcpy_small sz al src dst =
let memcpy_big_arg arg tmp =
match arg with
- | BA (IR r) -> emit (Pmov(RR1 tmp, RR1 r))
+ | BA (DR(IR r)) -> emit (Pmov(RR1 tmp, r))
| BA_addrstack ofs -> expand_addimm64 (RR1 tmp) XSP ofs
| _ -> assert false
@@ -237,8 +237,8 @@ let expand_builtin_memcpy_big sz al src dst =
let lbl = new_label () in
expand_loadimm32 X15 (Z.of_uint (sz / 16));
emit (Plabel lbl);
- emit (Pldp(X16, X17, ADpostincr(RR1 X30, _16)));
- emit (Pstp(X16, X17, ADpostincr(RR1 X29, _16)));
+ emit (Pldpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X30, _16)));
+ emit (Pstpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X29, _16)));
emit (Psubimm(W, RR1 X15, RR1 X15, _1));
emit (Pcbnz(W, X15, lbl));
if sz mod 16 >= 8 then begin
@@ -270,29 +270,29 @@ let expand_builtin_memcpy sz al args =
let expand_builtin_vload_common chunk base ofs res =
let addr = ADimm(base, ofs) in
match chunk, res with
- | Mint8unsigned, BR(IR res) ->
+ | Mint8unsigned, BR(DR(IR(RR1 res))) ->
emit (Pldrb(W, res, addr))
- | Mint8signed, BR(IR res) ->
+ | Mint8signed, BR(DR(IR(RR1 res))) ->
emit (Pldrsb(W, res, addr))
- | Mint16unsigned, BR(IR res) ->
+ | Mint16unsigned, BR(DR(IR(RR1 res))) ->
emit (Pldrh(W, res, addr))
- | Mint16signed, BR(IR res) ->
+ | Mint16signed, BR(DR(IR(RR1 res))) ->
emit (Pldrsh(W, res, addr))
- | Mint32, BR(IR res) ->
+ | Mint32, BR(DR(IR(RR1 res))) ->
emit (Pldrw(res, addr))
- | Mint64, BR(IR res) ->
+ | Mint64, BR(DR(IR(RR1 res))) ->
emit (Pldrx(res, addr))
- | Mfloat32, BR(FR res) ->
+ | Mfloat32, BR(DR(FR res)) ->
emit (Pldrs(res, addr))
- | Mfloat64, BR(FR res) ->
+ | Mfloat64, BR(DR(FR res)) ->
emit (Pldrd(res, addr))
| _ ->
assert false
let expand_builtin_vload chunk args res =
match args with
- | [BA(IR addr)] ->
- expand_builtin_vload_common chunk (RR1 addr) _0 res
+ | [BA(DR(IR addr))] ->
+ expand_builtin_vload_common chunk addr _0 res
| [BA_addrstack ofs] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
expand_builtin_vload_common chunk XSP ofs res
@@ -300,11 +300,11 @@ let expand_builtin_vload chunk args res =
expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
expand_builtin_vload_common chunk (RR1 X16) _0 res
end
- | [BA_addptr(BA(IR addr), BA_long ofs)] ->
+ | [BA_addptr(BA(DR(IR addr)), BA_long ofs)] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
- expand_builtin_vload_common chunk (RR1 addr) ofs res
+ expand_builtin_vload_common chunk addr ofs res
else begin
- expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *)
+ expand_addimm64 (RR1 X16) addr ofs; (* X16 <- addr + ofs *)
expand_builtin_vload_common chunk (RR1 X16) _0 res
end
| _ ->
@@ -313,25 +313,25 @@ let expand_builtin_vload chunk args res =
let expand_builtin_vstore_common chunk base ofs src =
let addr = ADimm(base, ofs) in
match chunk, src with
- | (Mint8signed | Mint8unsigned), BA(IR src) ->
+ | (Mint8signed | Mint8unsigned), BA(DR(IR(RR1 src))) ->
emit (Pstrb(src, addr))
- | (Mint16signed | Mint16unsigned), BA(IR src) ->
+ | (Mint16signed | Mint16unsigned), BA(DR(IR(RR1 src))) ->
emit (Pstrh(src, addr))
- | Mint32, BA(IR src) ->
+ | Mint32, BA(DR(IR(RR1 src))) ->
emit (Pstrw(src, addr))
- | Mint64, BA(IR src) ->
+ | Mint64, BA(DR(IR(RR1 src))) ->
emit (Pstrx(src, addr))
- | Mfloat32, BA(FR src) ->
+ | Mfloat32, BA(DR(FR src)) ->
emit (Pstrs(src, addr))
- | Mfloat64, BA(FR src) ->
+ | Mfloat64, BA(DR(FR src)) ->
emit (Pstrd(src, addr))
| _ ->
assert false
let expand_builtin_vstore chunk args =
match args with
- | [BA(IR addr); src] ->
- expand_builtin_vstore_common chunk (RR1 addr) _0 src
+ | [BA(DR(IR addr)); src] ->
+ expand_builtin_vstore_common chunk addr _0 src
| [BA_addrstack ofs; src] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
expand_builtin_vstore_common chunk XSP ofs src
@@ -339,11 +339,11 @@ let expand_builtin_vstore chunk args =
expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *)
expand_builtin_vstore_common chunk (RR1 X16) _0 src
end
- | [BA_addptr(BA(IR addr), BA_long ofs); src] ->
+ | [BA_addptr(BA(DR(IR addr)), BA_long ofs); src] ->
if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then
- expand_builtin_vstore_common chunk (RR1 addr) ofs src
+ expand_builtin_vstore_common chunk addr ofs src
else begin
- expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *)
+ expand_addimm64 (RR1 X16) addr ofs; (* X16 <- addr + ofs *)
expand_builtin_vstore_common chunk (RR1 X16) _0 src
end
| _ ->
@@ -363,45 +363,47 @@ let expand_builtin_inline name args res =
| "__builtin_unreachable", [], _ ->
()
(* Byte swap *)
- | ("__builtin_bswap" | "__builtin_bswap32"), [BA(IR a1)], BR(IR res) ->
+ | ("__builtin_bswap" | "__builtin_bswap32"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prev(W, res, a1))
- | "__builtin_bswap64", [BA(IR a1)], BR(IR res) ->
+ | "__builtin_bswap64", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prev(X, res, a1))
- | "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
+ | "__builtin_bswap16", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prev16(W, res, a1));
emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF))
(* Count leading zeros, leading sign bits, trailing zeros *)
- | "__builtin_clz", [BA(IR a1)], BR(IR res) ->
+ | "__builtin_clz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pclz(W, res, a1))
- | ("__builtin_clzl" | "__builtin_clzll"), [BA(IR a1)], BR(IR res) ->
+ | ("__builtin_clzl" | "__builtin_clzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pclz(X, res, a1))
- | "__builtin_cls", [BA(IR a1)], BR(IR res) ->
+ | "__builtin_cls", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pcls(W, res, a1))
- | ("__builtin_clsl" | "__builtin_clsll"), [BA(IR a1)], BR(IR res) ->
+ | ("__builtin_clsl" | "__builtin_clsll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Pcls(X, res, a1))
- | "__builtin_ctz", [BA(IR a1)], BR(IR res) ->
+ | "__builtin_ctz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prbit(W, res, a1));
emit (Pclz(W, res, res))
- | ("__builtin_ctzl" | "__builtin_ctzll"), [BA(IR a1)], BR(IR res) ->
+ | ("__builtin_ctzl" | "__builtin_ctzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) ->
emit (Prbit(X, res, a1));
emit (Pclz(X, res, res))
(* Float arithmetic *)
- | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(FR a1)], BR(FR res) ->
+ | "__builtin_fabs", [BA(DR(FR a1))], BR(DR(FR res)) ->
+ emit (Pfabs(D, res, a1))
+ | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(DR(FR a1))], BR(DR(FR res)) ->
emit (Pfsqrt(D, res, a1))
- | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+ | "__builtin_fmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfmadd(D, res, a1, a2, a3))
- | "__builtin_fmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+ | "__builtin_fmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfmsub(D, res, a1, a2, a3))
- | "__builtin_fnmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+ | "__builtin_fnmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfnmadd(D, res, a1, a2, a3))
- | "__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) ->
+ | "__builtin_fnmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) ->
emit (Pfnmsub(D, res, a1, a2, a3))
- | "__builtin_fmax", [BA (FR a1); BA (FR a2)], BR (FR res) ->
+ | "__builtin_fmax", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) ->
emit (Pfmax (D, res, a1, a2))
- | "__builtin_fmin", [BA (FR a1); BA (FR a2)], BR (FR res) ->
+ | "__builtin_fmin", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) ->
emit (Pfmin (D, res, a1, a2))
(* Vararg *)
- | "__builtin_va_start", [BA(IR a)], _ ->
+ | "__builtin_va_start", [BA(DR(IR(RR1 a)))], _ ->
expand_builtin_va_start a
(* Catch-all *)
| _ ->
@@ -441,7 +443,7 @@ let expand_instruction instr =
expand_annot_val kind txt targ args res
| EF_memcpy(sz, al) ->
expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
- | EF_annot _ | EF_debug _ | EF_inline_asm _ ->
+ | EF_annot _ | EF_debug _ | EF_inline_asm _ | EF_profiling _ ->
emit instr
| _ ->
assert false
@@ -468,9 +470,9 @@ let float_reg_to_dwarf = function
| D30 -> 94 | D31 -> 95
let preg_to_dwarf = function
- | IR r -> int_reg_to_dwarf r
- | FR r -> float_reg_to_dwarf r
- | SP -> 31
+ | DR(IR(RR1 r)) -> int_reg_to_dwarf r
+ | DR(FR r) -> float_reg_to_dwarf r
+ | DR(IR(XSP)) -> 31
| _ -> assert false
let expand_function id fn =