From a56e0c65b08f0f7123630f3a1b415e67ef48c38e Mon Sep 17 00:00:00 2001
From: Xavier Leroy <xavier.leroy@college-de-france.fr>
Date: Sat, 25 Jul 2020 18:27:04 +0200
Subject: AArch64 implementation of __builtin_ctz*

Using the "rbit" instruction (reverse bits).
---
 aarch64/Asm.v            | 2 ++
 aarch64/Asmexpand.ml     | 8 +++++++-
 aarch64/TargetPrinter.ml | 2 ++
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'aarch64')

diff --git a/aarch64/Asm.v b/aarch64/Asm.v
index 47cd3051..79232783 100644
--- a/aarch64/Asm.v
+++ b/aarch64/Asm.v
@@ -237,6 +237,7 @@ Inductive instruction: Type :=
   | Pclz (sz: isize) (rd r1: ireg)                                    (**r count leading zero bits *)
   | Prev (sz: isize) (rd r1: ireg)                                    (**r reverse bytes *)
   | Prev16 (sz: isize) (rd r1: ireg)                                  (**r reverse bytes in each 16-bit word *)
+  | Prbit (sz: isize) (rd r1: ireg)                                   (**r reverse bits *)
   (** Conditional data processing *)
   | Pcsel (rd: ireg) (r1 r2: ireg) (c: testcond)                      (**r int conditional move *)
   | Pcset (rd: ireg) (c: testcond)                                    (**r set to 1/0 if cond is true/false *)
@@ -1107,6 +1108,7 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   | Pclz _ _ _
   | Prev _ _ _
   | Prev16 _ _ _
+  | Prbit _ _ _
   | Pfsqrt _ _ _
   | Pfmadd _ _ _ _ _
   | Pfmsub _ _ _ _ _
diff --git a/aarch64/Asmexpand.ml b/aarch64/Asmexpand.ml
index dcb2d1aa..02c27053 100644
--- a/aarch64/Asmexpand.ml
+++ b/aarch64/Asmexpand.ml
@@ -337,7 +337,7 @@ let expand_builtin_inline name args res =
   | "__builtin_bswap16", [BA(IR a1)], BR(IR res) ->
      emit (Prev16(W, res, a1));
      emit (Pandimm(W, res, RR0 res, Z.of_uint 0xFFFF))
-  (* Count leading zeros and leading sign bits *)
+  (* Count leading zeros, leading sign bits, trailing zeros *)
   | "__builtin_clz",  [BA(IR a1)], BR(IR res) ->
      emit (Pclz(W, res, a1))
   | ("__builtin_clzl" | "__builtin_clzll"),  [BA(IR a1)], BR(IR res) ->
@@ -346,6 +346,12 @@ let expand_builtin_inline name args res =
      emit (Pcls(W, res, a1))
   | ("__builtin_clsl" | "__builtin_clsll"),  [BA(IR a1)], BR(IR res) ->
      emit (Pcls(X, res, a1))
+  | "__builtin_ctz",  [BA(IR a1)], BR(IR res) ->
+     emit (Prbit(W, res, a1));
+     emit (Pclz(W, res, res))
+  | ("__builtin_ctzl" | "__builtin_ctzll"),  [BA(IR a1)], BR(IR res) ->
+     emit (Prbit(X, res, a1));
+     emit (Pclz(X, res, res))
  (* Float arithmetic *)
   | "__builtin_fsqrt",  [BA(FR a1)], BR(FR res) ->
      emit (Pfsqrt(D, res, a1))
diff --git a/aarch64/TargetPrinter.ml b/aarch64/TargetPrinter.ml
index e54673dd..fec05cc6 100644
--- a/aarch64/TargetPrinter.ml
+++ b/aarch64/TargetPrinter.ml
@@ -375,6 +375,8 @@ module Target : TARGET =
         fprintf oc "	rev	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
     | Prev16(sz, rd, r1) ->
         fprintf oc "	rev16	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
+    | Prbit(sz, rd, r1) ->
+        fprintf oc "	rbit	%a, %a\n" ireg (sz, rd) ireg (sz, r1)
     (* Conditional data processing *)
     | Pcsel(rd, r1, r2, c) ->
         fprintf oc "	csel	%a, %a, %a, %s\n" xreg rd xreg r1 xreg r2 (condition_name c)
-- 
cgit