aboutsummaryrefslogtreecommitdiffstats
path: root/runtime
diff options
context:
space:
mode:
authorDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-04-20 18:45:02 +0200
committerDavid Monniaux <david.monniaux@univ-grenoble-alpes.fr>2020-04-20 18:45:02 +0200
commit71509473c434483d6fb7901795a1004cf272680c (patch)
tree3f6c7a89dd21b535915533e2ca66b36ac6ee7522 /runtime
parent4965352c558f8e030b3b968f98566f87ed6f0b8a (diff)
parent09184b1ab9be700d0cb5125c113b4fb8d6be06c8 (diff)
downloadcompcert-kvx-71509473c434483d6fb7901795a1004cf272680c.tar.gz
compcert-kvx-71509473c434483d6fb7901795a1004cf272680c.zip
Merge remote-tracking branch 'origin/mppa-fast-div' into mppa-features
Diffstat (limited to 'runtime')
-rw-r--r--runtime/Makefile8
-rw-r--r--runtime/mppa_k1c/i32_divmod.s120
-rw-r--r--runtime/mppa_k1c/i64_sdiv.c5
-rw-r--r--runtime/mppa_k1c/i64_smod.c35
-rw-r--r--runtime/mppa_k1c/i64_udiv.c6
-rw-r--r--runtime/mppa_k1c/i64_udivmod.c2
-rw-r--r--runtime/mppa_k1c/i64_udivmod_stsud.s218
-rw-r--r--runtime/mppa_k1c/i64_umod.c6
8 files changed, 356 insertions, 44 deletions
diff --git a/runtime/Makefile b/runtime/Makefile
index 98cde235..ebce458b 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -23,10 +23,10 @@ OBJS=i64_dtou.o i64_utod.o i64_utof.o vararg.o
else ifeq ($(ARCH),powerpc64)
OBJS=i64_dtou.o i64_stof.o i64_utod.o i64_utof.o vararg.o
else ifeq ($(ARCH),mppa_k1c)
-OBJS=i64_umod.o i64_udiv.o i64_udivmod.o i64_sdiv.o i64_smod.o vararg.o\
- i64_dtos.o i64_dtou.o i64_utod.o i64_utof.o i64_stod.o i64_stof.o\
- i64_shl.o i64_shr.o
-# Missing: i64_utod.o i64_utof.o i64_stod.o i64_stof.o
+OBJS=i64_umod.o i64_udiv.o i64_udivmod.o i64_sdiv.o i64_smod.o \
+ i64_udivmod_stsud.o i32_divmod.o \
+ i64_utod.o i64_utof.o i64_stod.o i64_stof.o \
+ vararg.o
DOMAKE:=$(shell (cd mppa_k1c && make))
else ifeq ($(ARCH),aarch64)
OBJS=vararg.o
diff --git a/runtime/mppa_k1c/i32_divmod.s b/runtime/mppa_k1c/i32_divmod.s
new file mode 100644
index 00000000..d2b4e8d5
--- /dev/null
+++ b/runtime/mppa_k1c/i32_divmod.s
@@ -0,0 +1,120 @@
+/* K1C
+32-bit unsigned/signed integer division/modulo (udiv5)
+
+D. Monniaux, CNRS, VERIMAG */
+
+
+ .globl __compcert_i32_sdiv_fp
+__compcert_i32_sdiv_fp:
+ compw.lt $r2 = $r0, 0
+ compw.lt $r3 = $r1, 0
+ absw $r0 = $r0
+ absw $r1 = $r1
+ ;;
+ xord $r2 = $r2, $r3
+ make $r3 = 0
+ goto __compcert_i32_divmod_fp
+ ;;
+
+ .globl __compcert_i32_smod_fp
+__compcert_i32_smod_fp:
+ compw.lt $r2 = $r0, 0
+ absw $r0 = $r0
+ absw $r1 = $r1
+ make $r3 = 1
+ goto __compcert_i32_divmod_fp
+ ;;
+
+ .globl __compcert_i32_umod_fp
+__compcert_i32_umod_fp:
+ make $r2 = 0
+ make $r3 = 1
+ goto __compcert_i32_divmod_fp
+ ;;
+
+ .globl __compcert_i32_udiv_fp
+__compcert_i32_udiv_fp:
+ make $r2 = 0
+ make $r3 = 0
+ ;;
+
+/*
+r0 : a
+r1 : b
+r2 : negate result?
+r3 : return mod?
+*/
+
+ .globl __compcert_i32_divmod_fp
+__compcert_i32_divmod_fp:
+ zxwd $r7 = $r1
+ zxwd $r1 = $r0
+#ifndef NO_SHORTCUT
+ compw.ltu $r8 = $r0, $r1
+ cb.weqz $r1? .ERR # return 0 if divide by 0
+#endif
+ ;;
+# a in r1, b in r7
+ floatud.rn.s $r5 = $r7, 0
+#ifndef NO_SHORTCUT
+ compd.eq $r8 = $r7, 1
+ cb.wnez $r8? .LESS # shortcut if a < b
+#endif
+ ;;
+# b (double) in r5
+ make $r6 = 0x3ff0000000000000 # 1.0
+ fnarrowdw.rn.s $r11 = $r5
+# cb.wnez $r8, .RET1 # if b=1
+ ;;
+# b (single) in r11
+ floatud.rn.s $r10 = $r1, 0
+ finvw.rn.s $r11 = $r11
+ ;;
+ fwidenlwd.s $r11 = $r11
+ ;;
+# invb0 in r11
+ copyd $r9 = $r11
+ ffmsd.rn.s $r6 = $r11, $r5
+# alpha in r6
+ ;;
+ ffmad.rn.s $r9 = $r11, $r6
+# 1/b in r9
+ ;;
+ fmuld.rn.s $r0 = $r10, $r9
+# a/b in r1
+ ;;
+ fixedud.rn.s $r0 = $r0, 0
+ ;;
+ msbfd $r1 = $r0, $r7
+ ;;
+ addd $r6 = $r0, -1
+ addd $r8 = $r1, $r7
+ ;;
+ cmoved.dltz $r1? $r0 = $r6
+ cmoved.dltz $r1? $r1 = $r8
+ ;;
+ negw $r4 = $r0
+ negw $r5 = $r1
+ ;;
+ cmoved.wnez $r2? $r0 = $r4
+ cmoved.wnez $r2? $r1 = $r5
+ ;;
+.END:
+ cmoved.wnez $r3? $r0 = $r1
+ ret
+ ;;
+#ifndef NO_SHORTCUT
+
+.LESS:
+ make $r0 = 0
+ negw $r5 = $r1
+ ;;
+ cmoved.wnez $r2? $r1 = $r5
+ goto .END
+ ;;
+
+.ERR:
+ make $r0 = 0
+ ret
+ ;;
+#endif
diff --git a/runtime/mppa_k1c/i64_sdiv.c b/runtime/mppa_k1c/i64_sdiv.c
index 60269cae..b98d9316 100644
--- a/runtime/mppa_k1c/i64_sdiv.c
+++ b/runtime/mppa_k1c/i64_sdiv.c
@@ -1,10 +1,5 @@
extern long __divdi3 (long a, long b);
-long i64_sdiv (long a, long b)
-{
- return __divdi3 (a, b);
-}
-
int i32_sdiv (int a, int b)
{
return __divdi3 (a, b);
diff --git a/runtime/mppa_k1c/i64_smod.c b/runtime/mppa_k1c/i64_smod.c
index 26ffb39b..3371eecf 100644
--- a/runtime/mppa_k1c/i64_smod.c
+++ b/runtime/mppa_k1c/i64_smod.c
@@ -1,40 +1,5 @@
-#if COMPLIQUE
-unsigned long long
-udivmoddi4(unsigned long long num, unsigned long long den, int modwanted);
-
-long long
-i64_smod (long long a, long long b)
-{
- int neg = 0;
- long long res;
-
- if (a < 0)
- {
- a = -a;
- neg = 1;
- }
-
- if (b < 0)
- b = -b;
-
- res = udivmoddi4 (a, b, 1);
-
- if (neg)
- res = -res;
-
- return res;
-}
-
-#else
extern long __moddi3 (long a, long b);
-
-long i64_smod (long a, long b)
-{
- return __moddi3 (a, b);
-}
-
int i32_smod (int a, int b)
{
return __moddi3 (a, b);
}
-#endif
diff --git a/runtime/mppa_k1c/i64_udiv.c b/runtime/mppa_k1c/i64_udiv.c
index e69de29b..75f4bbf5 100644
--- a/runtime/mppa_k1c/i64_udiv.c
+++ b/runtime/mppa_k1c/i64_udiv.c
@@ -0,0 +1,6 @@
+extern unsigned long __udivdi3 (unsigned long a, unsigned long b);
+
+unsigned i32_udiv (unsigned a, unsigned b)
+{
+ return __udivdi3 (a, b);
+}
diff --git a/runtime/mppa_k1c/i64_udivmod.c b/runtime/mppa_k1c/i64_udivmod.c
index 74b39874..ca48cd87 100644
--- a/runtime/mppa_k1c/i64_udivmod.c
+++ b/runtime/mppa_k1c/i64_udivmod.c
@@ -1,3 +1,4 @@
+#if 0
/* THIS IS THE PREVIOUS VERSION, USED ON BOSTAN AND ANDEY */
unsigned long long
udivmoddi4(unsigned long long num, unsigned long long den, int modwanted)
@@ -26,3 +27,4 @@ udivmoddi4(unsigned long long num, unsigned long long den, int modwanted)
return modwanted ? r : q;
}
+#endif
diff --git a/runtime/mppa_k1c/i64_udivmod_stsud.s b/runtime/mppa_k1c/i64_udivmod_stsud.s
new file mode 100644
index 00000000..50d0a767
--- /dev/null
+++ b/runtime/mppa_k1c/i64_udivmod_stsud.s
@@ -0,0 +1,218 @@
+/*
+Integer division for K1c
+
+David Monniaux, CNRS / Verimag
+ */
+
+ .globl dm_udivmoddi4
+dm_udivmoddi4:
+ sxwd $r2 = $r2
+ make $r5 = 0
+ compd.ltu $r3 = $r0, $r1
+ ;;
+
+ clzd $r3 = $r1
+ clzd $r4 = $r0
+ cb.dnez $r3? .L74
+ ;;
+
+ sbfw $r4 = $r4, $r3
+ ;;
+
+ zxwd $r3 = $r4
+ slld $r1 = $r1, $r4
+ ;;
+
+ compd.ltu $r6 = $r0, $r1
+ ;;
+
+ cb.dnez $r6? .L4C
+ ;;
+
+ make $r5 = 1
+ sbfd $r0 = $r1, $r0
+ ;;
+
+ slld $r5 = $r5, $r4
+ ;;
+
+.L4C:
+ cb.deqz $r3? .L74
+ ;;
+
+ srld $r1 = $r1, 1
+ zxwd $r3 = $r4
+ ;;
+
+ loopdo $r3, .LOOP
+ ;;
+
+ stsud $r0 = $r1, $r0
+ ;;
+
+.LOOP:
+ addd $r5 = $r0, $r5
+ srld $r0 = $r0, $r4
+ ;;
+
+ slld $r4 = $r0, $r4
+ ;;
+
+ sbfd $r5 = $r4, $r5
+ ;;
+
+.L74:
+ cmoved.deqz $r2? $r0 = $r5
+ ret
+ ;;
+
+/*
+r0 : a
+r1 : b
+r2 : negate result?
+r3 : return mod?
+*/
+
+ .globl __compcert_i32_sdiv_stsud
+__compcert_i32_sdiv_stsud:
+ compw.lt $r2 = $r0, 0
+ compw.lt $r3 = $r1, 0
+ absw $r0 = $r0
+ absw $r1 = $r1
+ ;;
+ zxwd $r0 = $r0
+ zxwd $r1 = $r1
+ xord $r2 = $r2, $r3
+ make $r3 = 0
+ goto __compcert_i64_divmod_stsud
+ ;;
+
+ .globl __compcert_i32_smod_stsud
+__compcert_i32_smod_stsud:
+ compw.lt $r2 = $r0, 0
+ absw $r0 = $r0
+ absw $r1 = $r1
+ make $r3 = 1
+ ;;
+ zxwd $r0 = $r0
+ zxwd $r1 = $r1
+ goto __compcert_i64_divmod_stsud
+ ;;
+
+ .globl __compcert_i32_umod_stsud
+__compcert_i32_umod_stsud:
+ make $r2 = 0
+ make $r3 = 1
+ zxwd $r0 = $r0
+ zxwd $r1 = $r1
+ goto __compcert_i64_divmod_stsud
+ ;;
+
+ .globl __compcert_i32_udiv_stsud
+__compcert_i32_udiv_stsud:
+ make $r2 = 0
+ make $r3 = 0
+ zxwd $r0 = $r0
+ zxwd $r1 = $r1
+ goto __compcert_i64_divmod_stsud
+ ;;
+
+ .globl __compcert_i64_umod_stsud
+__compcert_i64_umod_stsud:
+ make $r2 = 0
+ make $r3 = 1
+ goto __compcert_i64_divmod_stsud
+ ;;
+
+ .globl __compcert_i64_udiv_stsud
+__compcert_i64_udiv_stsud:
+ make $r2 = 0
+ make $r3 = 0
+ goto __compcert_i64_divmod_stsud
+ ;;
+
+ .globl __compcert_i64_sdiv_stsud
+__compcert_i64_sdiv_stsud:
+ compd.lt $r2 = $r0, 0
+ compd.lt $r3 = $r1, 0
+ absd $r0 = $r0
+ absd $r1 = $r1
+ ;;
+ xord $r2 = $r2, $r3
+ make $r3 = 0
+ goto __compcert_i64_divmod_stsud
+ ;;
+
+ .globl __compcert_i64_smod_stsud
+__compcert_i64_smod_stsud:
+ compd.lt $r2 = $r0, 0
+ absd $r0 = $r0
+ absd $r1 = $r1
+ make $r3 = 1
+ goto __compcert_i64_divmod_stsud
+ ;;
+
+ .globl __compcert_i64_divmod_stsud
+__compcert_i64_divmod_stsud:
+ make $r5 = 0
+ compd.ltu $r7 = $r0, $r1
+ ;;
+
+ clzd $r7 = $r1
+ clzd $r4 = $r0
+ cb.dnez $r7? .ZL74
+ ;;
+
+ sbfw $r4 = $r4, $r7
+ ;;
+
+ zxwd $r7 = $r4
+ slld $r1 = $r1, $r4
+ ;;
+
+ compd.ltu $r6 = $r0, $r1
+ ;;
+
+ cb.dnez $r6? .ZL4C
+ ;;
+
+ make $r5 = 1
+ sbfd $r0 = $r1, $r0
+ ;;
+
+ slld $r5 = $r5, $r4
+ ;;
+
+.ZL4C:
+ cb.deqz $r7? .ZL74
+ ;;
+
+ srld $r1 = $r1, 1
+ zxwd $r7 = $r4
+ ;;
+
+ loopdo $r7, .ZLOOP
+ ;;
+
+ stsud $r0 = $r1, $r0
+ ;;
+
+.ZLOOP:
+ addd $r5 = $r0, $r5
+ srld $r0 = $r0, $r4
+ ;;
+
+ slld $r4 = $r0, $r4
+ ;;
+
+ sbfd $r5 = $r4, $r5
+ ;;
+
+.ZL74:
+ cmoved.weqz $r3? $r0 = $r5
+ ;;
+ negd $r5 = $r0
+ ;;
+ cmoved.wnez $r2? $r0 = $r5
+ ret
+ ;;
diff --git a/runtime/mppa_k1c/i64_umod.c b/runtime/mppa_k1c/i64_umod.c
index e69de29b..59e35960 100644
--- a/runtime/mppa_k1c/i64_umod.c
+++ b/runtime/mppa_k1c/i64_umod.c
@@ -0,0 +1,6 @@
+extern unsigned long __umoddi3 (unsigned long a, unsigned long b);
+
+unsigned i32_umod (unsigned a, unsigned b)
+{
+ return __umoddi3 (a, b);
+}