diff options
Diffstat (limited to 'aarch64/Asmexpand.ml')
-rw-r--r-- | aarch64/Asmexpand.ml | 126 |
1 files changed, 68 insertions, 58 deletions
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml index b0787d0a..8187e077 100644 --- a/aarch64/Asmexpand.ml +++ b/aarch64/Asmexpand.ml @@ -34,13 +34,13 @@ let _m1 = Z.of_sint (-1) (* Emit instruction sequences that set or offset a register by a constant. *) let expand_loadimm32 (dst: ireg) n = - List.iter emit (Asmgen.loadimm32 dst n []) + List.iter emit (Asmgen.Asmgen_expand.loadimm32 dst n []) let expand_addimm64 (dst: iregsp) (src: iregsp) n = - List.iter emit (Asmgen.addimm64 dst src n []) + List.iter emit (Asmgen.Asmgen_expand.addimm64 dst src n []) let expand_storeptr (src: ireg) (base: iregsp) ofs = - List.iter emit (Asmgen.storeptr src base ofs []) + List.iter emit (Asmgen.Asmgen_expand.storeptr src base ofs []) (* Handling of varargs *) @@ -73,8 +73,8 @@ let save_parameter_registers ir fr = while !i < 8 do let pos = 8*16 + !i*8 in if !i land 1 = 0 then begin - emit (Pstp(int_param_regs.(!i), int_param_regs.(!i + 1), - ADimm(XSP, Z.of_uint pos))); + emit (Pstpx(int_param_regs.(!i), int_param_regs.(!i + 1), + Mint64, Mint64, ADimm(XSP, Z.of_uint pos))); i := !i + 2 end else begin emit (Pstrx(int_param_regs.(!i), ADimm(XSP, Z.of_uint pos))); @@ -132,9 +132,9 @@ let expand_builtin_va_start r = let expand_annot_val kind txt targ args res = emit (Pbuiltin (EF_annot(kind,txt,[targ]), args, BR_none)); match args, res with - | [BA(IR src)], BR(IR dst) -> - if dst <> src then emit (Pmov (RR1 dst, RR1 src)) - | [BA(FR src)], BR(FR dst) -> + | [BA(DR(IR src))], BR(DR(IR dst)) -> + if dst <> src then emit (Pmov (dst, src)) + | [BA(DR(FR src))], BR(DR(FR dst)) -> if dst <> src then emit (Pfmov (dst, src)) | _, _ -> raise (Error "ill-formed __builtin_annot_val") @@ -152,8 +152,8 @@ let offset_in_range ofs = let memcpy_small_arg sz arg tmp = match arg with - | BA (IR r) -> - (RR1 r, _0) + | BA (DR(IR r)) -> + (r, _0) | BA_addrstack ofs -> if offset_in_range ofs && offset_in_range (Ptrofs.add ofs (Ptrofs.repr (Z.of_uint sz))) @@ -164,13 +164,13 @@ let memcpy_small_arg sz arg tmp = let expand_builtin_memcpy_small sz al src dst = let (tsrc, tdst) = - if dst <> BA (IR X17) then (X17, X29) else (X29, X17) in + if dst <> BA (DR(IR(RR1 X17))) then (X17, X29) else (X29, X17) in let (rsrc, osrc) = memcpy_small_arg sz src tsrc in let (rdst, odst) = memcpy_small_arg sz dst tdst in let rec copy osrc odst sz = if sz >= 16 then begin - emit (Pldp(X16, X30, ADimm(rsrc, osrc))); - emit (Pstp(X16, X30, ADimm(rdst, odst))); + emit (Pldpx(X16, X30, Mint64, Mint64, ADimm(rsrc, osrc))); + emit (Pstpx(X16, X30, Mint64, Mint64, ADimm(rdst, odst))); copy (Ptrofs.add osrc _16) (Ptrofs.add odst _16) (sz - 16) end else if sz >= 8 then begin @@ -197,7 +197,7 @@ let expand_builtin_memcpy_small sz al src dst = let memcpy_big_arg arg tmp = match arg with - | BA (IR r) -> emit (Pmov(RR1 tmp, RR1 r)) + | BA (DR(IR r)) -> emit (Pmov(RR1 tmp, r)) | BA_addrstack ofs -> expand_addimm64 (RR1 tmp) XSP ofs | _ -> assert false @@ -208,8 +208,8 @@ let expand_builtin_memcpy_big sz al src dst = let lbl = new_label () in expand_loadimm32 X15 (Z.of_uint (sz / 16)); emit (Plabel lbl); - emit (Pldp(X16, X17, ADpostincr(RR1 X30, _16))); - emit (Pstp(X16, X17, ADpostincr(RR1 X29, _16))); + emit (Pldpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X30, _16))); + emit (Pstpx(X16, X17, Mint64, Mint64, ADpostincr(RR1 X29, _16))); emit (Psubimm(W, RR1 X15, RR1 X15, _1)); emit (Pcbnz(W, X15, lbl)); if sz mod 16 >= 8 then begin @@ -241,29 +241,29 @@ let expand_builtin_memcpy sz al args = let expand_builtin_vload_common chunk base ofs res = let addr = ADimm(base, ofs) in match chunk, res with - | Mint8unsigned, BR(IR res) -> + | Mint8unsigned, BR(DR(IR(RR1 res))) -> emit (Pldrb(W, res, addr)) - | Mint8signed, BR(IR res) -> + | Mint8signed, BR(DR(IR(RR1 res))) -> emit (Pldrsb(W, res, addr)) - | Mint16unsigned, BR(IR res) -> + | Mint16unsigned, BR(DR(IR(RR1 res))) -> emit (Pldrh(W, res, addr)) - | Mint16signed, BR(IR res) -> + | Mint16signed, BR(DR(IR(RR1 res))) -> emit (Pldrsh(W, res, addr)) - | Mint32, BR(IR res) -> + | Mint32, BR(DR(IR(RR1 res))) -> emit (Pldrw(res, addr)) - | Mint64, BR(IR res) -> + | Mint64, BR(DR(IR(RR1 res))) -> emit (Pldrx(res, addr)) - | Mfloat32, BR(FR res) -> + | Mfloat32, BR(DR(FR res)) -> emit (Pldrs(res, addr)) - | Mfloat64, BR(FR res) -> + | Mfloat64, BR(DR(FR res)) -> emit (Pldrd(res, addr)) | _ -> assert false let expand_builtin_vload chunk args res = match args with - | [BA(IR addr)] -> - expand_builtin_vload_common chunk (RR1 addr) _0 res + | [BA(DR(IR addr))] -> + expand_builtin_vload_common chunk addr _0 res | [BA_addrstack ofs] -> if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then expand_builtin_vload_common chunk XSP ofs res @@ -271,11 +271,11 @@ let expand_builtin_vload chunk args res = expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *) expand_builtin_vload_common chunk (RR1 X16) _0 res end - | [BA_addptr(BA(IR addr), BA_long ofs)] -> + | [BA_addptr(BA(DR(IR addr)), BA_long ofs)] -> if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then - expand_builtin_vload_common chunk (RR1 addr) ofs res + expand_builtin_vload_common chunk addr ofs res else begin - expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *) + expand_addimm64 (RR1 X16) addr ofs; (* X16 <- addr + ofs *) expand_builtin_vload_common chunk (RR1 X16) _0 res end | _ -> @@ -284,25 +284,25 @@ let expand_builtin_vload chunk args res = let expand_builtin_vstore_common chunk base ofs src = let addr = ADimm(base, ofs) in match chunk, src with - | (Mint8signed | Mint8unsigned), BA(IR src) -> + | (Mint8signed | Mint8unsigned), BA(DR(IR(RR1 src))) -> emit (Pstrb(src, addr)) - | (Mint16signed | Mint16unsigned), BA(IR src) -> + | (Mint16signed | Mint16unsigned), BA(DR(IR(RR1 src))) -> emit (Pstrh(src, addr)) - | Mint32, BA(IR src) -> + | Mint32, BA(DR(IR(RR1 src))) -> emit (Pstrw(src, addr)) - | Mint64, BA(IR src) -> + | Mint64, BA(DR(IR(RR1 src))) -> emit (Pstrx(src, addr)) - | Mfloat32, BA(FR src) -> + | Mfloat32, BA(DR(FR src)) -> emit (Pstrs(src, addr)) - | Mfloat64, BA(FR src) -> + | Mfloat64, BA(DR(FR src)) -> emit (Pstrd(src, addr)) | _ -> assert false let expand_builtin_vstore chunk args = match args with - | [BA(IR addr); src] -> - expand_builtin_vstore_common chunk (RR1 addr) _0 src + | [BA(DR(IR addr)); src] -> + expand_builtin_vstore_common chunk addr _0 src | [BA_addrstack ofs; src] -> if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then expand_builtin_vstore_common chunk XSP ofs src @@ -310,11 +310,11 @@ let expand_builtin_vstore chunk args = expand_addimm64 (RR1 X16) XSP ofs; (* X16 <- SP + ofs *) expand_builtin_vstore_common chunk (RR1 X16) _0 src end - | [BA_addptr(BA(IR addr), BA_long ofs); src] -> + | [BA_addptr(BA(DR(IR addr)), BA_long ofs); src] -> if offset_in_range (Z.add ofs (Memdata.size_chunk chunk)) then - expand_builtin_vstore_common chunk (RR1 addr) ofs src + expand_builtin_vstore_common chunk addr ofs src else begin - expand_addimm64 (RR1 X16) (RR1 addr) ofs; (* X16 <- addr + ofs *) + expand_addimm64 (RR1 X16) addr ofs; (* X16 <- addr + ofs *) expand_builtin_vstore_common chunk (RR1 X16) _0 src end | _ -> @@ -330,37 +330,47 @@ let expand_builtin_inline name args res = | "__builtin_nop", [], _ -> emit Pnop (* Byte swap *) - | ("__builtin_bswap" | "__builtin_bswap32"), [BA(IR a1)], BR(IR res) -> + | ("__builtin_bswap" | "__builtin_bswap32"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Prev(W, res, a1)) - | "__builtin_bswap64", [BA(IR a1)], BR(IR res) -> + | "__builtin_bswap64", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Prev(X, res, a1)) - | "__builtin_bswap16", [BA(IR a1)], BR(IR res) -> + | "__builtin_bswap16", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Prev16(W, res, a1)); emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF)) - (* Count leading zeros and leading sign bits *) - | "__builtin_clz", [BA(IR a1)], BR(IR res) -> + (* Count leading zeros, leading sign bits, trailing zeros *) + | "__builtin_clz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Pclz(W, res, a1)) - | ("__builtin_clzl" | "__builtin_clzll"), [BA(IR a1)], BR(IR res) -> + | ("__builtin_clzl" | "__builtin_clzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Pclz(X, res, a1)) - | "__builtin_cls", [BA(IR a1)], BR(IR res) -> + | "__builtin_cls", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Pcls(W, res, a1)) - | ("__builtin_clsl" | "__builtin_clsll"), [BA(IR a1)], BR(IR res) -> + | ("__builtin_clsl" | "__builtin_clsll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> emit (Pcls(X, res, a1)) + | "__builtin_ctz", [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> + emit (Prbit(W, res, a1)); + emit (Pclz(W, res, res)) + | ("__builtin_ctzl" | "__builtin_ctzll"), [BA(DR(IR(RR1 a1)))], BR(DR(IR(RR1 res))) -> + emit (Prbit(X, res, a1)); + emit (Pclz(X, res, res)) (* Float arithmetic *) - | "__builtin_fabs", [BA(FR a1)], BR(FR res) -> + | "__builtin_fabs", [BA(DR(FR a1))], BR(DR(FR res)) -> emit (Pfabs(D, res, a1)) - | "__builtin_fsqrt", [BA(FR a1)], BR(FR res) -> + | ("__builtin_fsqrt" | "__builtin_sqrt"), [BA(DR(FR a1))], BR(DR(FR res)) -> emit (Pfsqrt(D, res, a1)) - | "__builtin_fmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> + | "__builtin_fmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) -> emit (Pfmadd(D, res, a1, a2, a3)) - | "__builtin_fmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> + | "__builtin_fmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) -> emit (Pfmsub(D, res, a1, a2, a3)) - | "__builtin_fnmadd", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> + | "__builtin_fnmadd", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) -> emit (Pfnmadd(D, res, a1, a2, a3)) - | "__builtin_fnmsub", [BA(FR a1); BA(FR a2); BA(FR a3)], BR(FR res) -> + | "__builtin_fnmsub", [BA(DR(FR a1)); BA(DR(FR a2)); BA(DR(FR a3))], BR(DR(FR res)) -> emit (Pfnmsub(D, res, a1, a2, a3)) + | "__builtin_fmax", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) -> + emit (Pfmax (D, res, a1, a2)) + | "__builtin_fmin", [BA(DR(FR a1)); BA(DR(FR a2))], BR(DR(FR res)) -> + emit (Pfmin (D, res, a1, a2)) (* Vararg *) - | "__builtin_va_start", [BA(IR a)], _ -> + | "__builtin_va_start", [BA(DR(IR(RR1 a)))], _ -> expand_builtin_va_start a (* Catch-all *) | _ -> @@ -427,9 +437,9 @@ let float_reg_to_dwarf = function | D30 -> 94 | D31 -> 95 let preg_to_dwarf = function - | IR r -> int_reg_to_dwarf r - | FR r -> float_reg_to_dwarf r - | SP -> 31 + | DR(IR(RR1 r)) -> int_reg_to_dwarf r + | DR(FR r) -> float_reg_to_dwarf r + | DR(IR(XSP)) -> 31 | _ -> assert false let expand_function id fn = |