From a56e0c65b08f0f7123630f3a1b415e67ef48c38e Mon Sep 17 00:00:00 2001 From: Xavier Leroy Date: Sat, 25 Jul 2020 18:27:04 +0200 Subject: AArch64 implementation of __builtin_ctz* Using the "rbit" instruction (reverse bits). --- aarch64/Asm.v | 2 ++ aarch64/Asmexpand.ml | 8 +++++++- aarch64/TargetPrinter.ml | 2 ++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'aarch64') diff --git a/aarch64/Asm.v b/aarch64/Asm.v index 47cd3051..79232783 100644 --- a/aarch64/Asm.v +++ b/aarch64/Asm.v @@ -237,6 +237,7 @@ Inductive instruction: Type := | Pclz (sz: isize) (rd r1: ireg) (**r count leading zero bits *) | Prev (sz: isize) (rd r1: ireg) (**r reverse bytes *) | Prev16 (sz: isize) (rd r1: ireg) (**r reverse bytes in each 16-bit word *) + | Prbit (sz: isize) (rd r1: ireg) (**r reverse bits *) (** Conditional data processing *) | Pcsel (rd: ireg) (r1 r2: ireg) (c: testcond) (**r int conditional move *) | Pcset (rd: ireg) (c: testcond) (**r set to 1/0 if cond is true/false *) @@ -1107,6 +1108,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pclz _ _ _ | Prev _ _ _ | Prev16 _ _ _ + | Prbit _ _ _ | Pfsqrt _ _ _ | Pfmadd _ _ _ _ _ | Pfmsub _ _ _ _ _ diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml index dcb2d1aa..02c27053 100644 --- a/aarch64/Asmexpand.ml +++ b/aarch64/Asmexpand.ml @@ -337,7 +337,7 @@ let expand_builtin_inline name args res = | "__builtin_bswap16", [BA(IR a1)], BR(IR res) -> emit (Prev16(W, res, a1)); emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF)) - (* Count leading zeros and leading sign bits *) + (* Count leading zeros, leading sign bits, trailing zeros *) | "__builtin_clz", [BA(IR a1)], BR(IR res) -> emit (Pclz(W, res, a1)) | ("__builtin_clzl" | "__builtin_clzll"), [BA(IR a1)], BR(IR res) -> @@ -346,6 +346,12 @@ let expand_builtin_inline name args res = emit (Pcls(W, res, a1)) | ("__builtin_clsl" | "__builtin_clsll"), [BA(IR a1)], BR(IR res) -> emit (Pcls(X, res, a1)) + | "__builtin_ctz", [BA(IR a1)], BR(IR res) -> + emit (Prbit(W, res, a1)); + emit (Pclz(W, res, res)) + | ("__builtin_ctzl" | "__builtin_ctzll"), [BA(IR a1)], BR(IR res) -> + emit (Prbit(X, res, a1)); + emit (Pclz(X, res, res)) (* Float arithmetic *) | "__builtin_fsqrt", [BA(FR a1)], BR(FR res) -> emit (Pfsqrt(D, res, a1)) diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml index e54673dd..fec05cc6 100644 --- a/aarch64/TargetPrinter.ml +++ b/aarch64/TargetPrinter.ml @@ -375,6 +375,8 @@ module Target : TARGET = fprintf oc " rev %a, %a\n" ireg (sz, rd) ireg (sz, r1) | Prev16(sz, rd, r1) -> fprintf oc " rev16 %a, %a\n" ireg (sz, rd) ireg (sz, r1) + | Prbit(sz, rd, r1) -> + fprintf oc " rbit %a, %a\n" ireg (sz, rd) ireg (sz, r1) (* Conditional data processing *) | Pcsel(rd, r1, r2, c) -> fprintf oc " csel %a, %a, %a, %s\n" xreg rd xreg r1 xreg r2 (condition_name c) -- cgit