diff options
author | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2020-04-20 18:45:02 +0200 |
---|---|---|
committer | David Monniaux <david.monniaux@univ-grenoble-alpes.fr> | 2020-04-20 18:45:02 +0200 |
commit | 71509473c434483d6fb7901795a1004cf272680c (patch) | |
tree | 3f6c7a89dd21b535915533e2ca66b36ac6ee7522 /runtime | |
parent | 4965352c558f8e030b3b968f98566f87ed6f0b8a (diff) | |
parent | 09184b1ab9be700d0cb5125c113b4fb8d6be06c8 (diff) | |
download | compcert-kvx-71509473c434483d6fb7901795a1004cf272680c.tar.gz compcert-kvx-71509473c434483d6fb7901795a1004cf272680c.zip |
Merge remote-tracking branch 'origin/mppa-fast-div' into mppa-features
Diffstat (limited to 'runtime')
-rw-r--r-- | runtime/Makefile | 8 | ||||
-rw-r--r-- | runtime/mppa_k1c/i32_divmod.s | 120 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_sdiv.c | 5 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_smod.c | 35 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_udiv.c | 6 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_udivmod.c | 2 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_udivmod_stsud.s | 218 | ||||
-rw-r--r-- | runtime/mppa_k1c/i64_umod.c | 6 |
8 files changed, 356 insertions, 44 deletions
diff --git a/runtime/Makefile b/runtime/Makefile index 98cde235..ebce458b 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -23,10 +23,10 @@ OBJS=i64_dtou.o i64_utod.o i64_utof.o vararg.o else ifeq ($(ARCH),powerpc64) OBJS=i64_dtou.o i64_stof.o i64_utod.o i64_utof.o vararg.o else ifeq ($(ARCH),mppa_k1c) -OBJS=i64_umod.o i64_udiv.o i64_udivmod.o i64_sdiv.o i64_smod.o vararg.o\ - i64_dtos.o i64_dtou.o i64_utod.o i64_utof.o i64_stod.o i64_stof.o\ - i64_shl.o i64_shr.o -# Missing: i64_utod.o i64_utof.o i64_stod.o i64_stof.o +OBJS=i64_umod.o i64_udiv.o i64_udivmod.o i64_sdiv.o i64_smod.o \ + i64_udivmod_stsud.o i32_divmod.o \ + i64_utod.o i64_utof.o i64_stod.o i64_stof.o \ + vararg.o DOMAKE:=$(shell (cd mppa_k1c && make)) else ifeq ($(ARCH),aarch64) OBJS=vararg.o diff --git a/runtime/mppa_k1c/i32_divmod.s b/runtime/mppa_k1c/i32_divmod.s new file mode 100644 index 00000000..d2b4e8d5 --- /dev/null +++ b/runtime/mppa_k1c/i32_divmod.s @@ -0,0 +1,120 @@ +/* K1C +32-bit unsigned/signed integer division/modulo (udiv5) + +D. Monniaux, CNRS, VERIMAG */ + + + .globl __compcert_i32_sdiv_fp +__compcert_i32_sdiv_fp: + compw.lt $r2 = $r0, 0 + compw.lt $r3 = $r1, 0 + absw $r0 = $r0 + absw $r1 = $r1 + ;; + xord $r2 = $r2, $r3 + make $r3 = 0 + goto __compcert_i32_divmod_fp + ;; + + .globl __compcert_i32_smod_fp +__compcert_i32_smod_fp: + compw.lt $r2 = $r0, 0 + absw $r0 = $r0 + absw $r1 = $r1 + make $r3 = 1 + goto __compcert_i32_divmod_fp + ;; + + .globl __compcert_i32_umod_fp +__compcert_i32_umod_fp: + make $r2 = 0 + make $r3 = 1 + goto __compcert_i32_divmod_fp + ;; + + .globl __compcert_i32_udiv_fp +__compcert_i32_udiv_fp: + make $r2 = 0 + make $r3 = 0 + ;; + +/* +r0 : a +r1 : b +r2 : negate result? +r3 : return mod? +*/ + + .globl __compcert_i32_divmod_fp +__compcert_i32_divmod_fp: + zxwd $r7 = $r1 + zxwd $r1 = $r0 +#ifndef NO_SHORTCUT + compw.ltu $r8 = $r0, $r1 + cb.weqz $r1? .ERR # return 0 if divide by 0 +#endif + ;; +# a in r1, b in r7 + floatud.rn.s $r5 = $r7, 0 +#ifndef NO_SHORTCUT + compd.eq $r8 = $r7, 1 + cb.wnez $r8? .LESS # shortcut if a < b +#endif + ;; +# b (double) in r5 + make $r6 = 0x3ff0000000000000 # 1.0 + fnarrowdw.rn.s $r11 = $r5 +# cb.wnez $r8, .RET1 # if b=1 + ;; +# b (single) in r11 + floatud.rn.s $r10 = $r1, 0 + finvw.rn.s $r11 = $r11 + ;; + fwidenlwd.s $r11 = $r11 + ;; +# invb0 in r11 + copyd $r9 = $r11 + ffmsd.rn.s $r6 = $r11, $r5 +# alpha in r6 + ;; + ffmad.rn.s $r9 = $r11, $r6 +# 1/b in r9 + ;; + fmuld.rn.s $r0 = $r10, $r9 +# a/b in r1 + ;; + fixedud.rn.s $r0 = $r0, 0 + ;; + msbfd $r1 = $r0, $r7 + ;; + addd $r6 = $r0, -1 + addd $r8 = $r1, $r7 + ;; + cmoved.dltz $r1? $r0 = $r6 + cmoved.dltz $r1? $r1 = $r8 + ;; + negw $r4 = $r0 + negw $r5 = $r1 + ;; + cmoved.wnez $r2? $r0 = $r4 + cmoved.wnez $r2? $r1 = $r5 + ;; +.END: + cmoved.wnez $r3? $r0 = $r1 + ret + ;; +#ifndef NO_SHORTCUT + +.LESS: + make $r0 = 0 + negw $r5 = $r1 + ;; + cmoved.wnez $r2? $r1 = $r5 + goto .END + ;; + +.ERR: + make $r0 = 0 + ret + ;; +#endif diff --git a/runtime/mppa_k1c/i64_sdiv.c b/runtime/mppa_k1c/i64_sdiv.c index 60269cae..b98d9316 100644 --- a/runtime/mppa_k1c/i64_sdiv.c +++ b/runtime/mppa_k1c/i64_sdiv.c @@ -1,10 +1,5 @@ extern long __divdi3 (long a, long b); -long i64_sdiv (long a, long b) -{ - return __divdi3 (a, b); -} - int i32_sdiv (int a, int b) { return __divdi3 (a, b); diff --git a/runtime/mppa_k1c/i64_smod.c b/runtime/mppa_k1c/i64_smod.c index 26ffb39b..3371eecf 100644 --- a/runtime/mppa_k1c/i64_smod.c +++ b/runtime/mppa_k1c/i64_smod.c @@ -1,40 +1,5 @@ -#if COMPLIQUE -unsigned long long -udivmoddi4(unsigned long long num, unsigned long long den, int modwanted); - -long long -i64_smod (long long a, long long b) -{ - int neg = 0; - long long res; - - if (a < 0) - { - a = -a; - neg = 1; - } - - if (b < 0) - b = -b; - - res = udivmoddi4 (a, b, 1); - - if (neg) - res = -res; - - return res; -} - -#else extern long __moddi3 (long a, long b); - -long i64_smod (long a, long b) -{ - return __moddi3 (a, b); -} - int i32_smod (int a, int b) { return __moddi3 (a, b); } -#endif diff --git a/runtime/mppa_k1c/i64_udiv.c b/runtime/mppa_k1c/i64_udiv.c index e69de29b..75f4bbf5 100644 --- a/runtime/mppa_k1c/i64_udiv.c +++ b/runtime/mppa_k1c/i64_udiv.c @@ -0,0 +1,6 @@ +extern unsigned long __udivdi3 (unsigned long a, unsigned long b); + +unsigned i32_udiv (unsigned a, unsigned b) +{ + return __udivdi3 (a, b); +} diff --git a/runtime/mppa_k1c/i64_udivmod.c b/runtime/mppa_k1c/i64_udivmod.c index 74b39874..ca48cd87 100644 --- a/runtime/mppa_k1c/i64_udivmod.c +++ b/runtime/mppa_k1c/i64_udivmod.c @@ -1,3 +1,4 @@ +#if 0 /* THIS IS THE PREVIOUS VERSION, USED ON BOSTAN AND ANDEY */ unsigned long long udivmoddi4(unsigned long long num, unsigned long long den, int modwanted) @@ -26,3 +27,4 @@ udivmoddi4(unsigned long long num, unsigned long long den, int modwanted) return modwanted ? r : q; } +#endif diff --git a/runtime/mppa_k1c/i64_udivmod_stsud.s b/runtime/mppa_k1c/i64_udivmod_stsud.s new file mode 100644 index 00000000..50d0a767 --- /dev/null +++ b/runtime/mppa_k1c/i64_udivmod_stsud.s @@ -0,0 +1,218 @@ +/* +Integer division for K1c + +David Monniaux, CNRS / Verimag + */ + + .globl dm_udivmoddi4 +dm_udivmoddi4: + sxwd $r2 = $r2 + make $r5 = 0 + compd.ltu $r3 = $r0, $r1 + ;; + + clzd $r3 = $r1 + clzd $r4 = $r0 + cb.dnez $r3? .L74 + ;; + + sbfw $r4 = $r4, $r3 + ;; + + zxwd $r3 = $r4 + slld $r1 = $r1, $r4 + ;; + + compd.ltu $r6 = $r0, $r1 + ;; + + cb.dnez $r6? .L4C + ;; + + make $r5 = 1 + sbfd $r0 = $r1, $r0 + ;; + + slld $r5 = $r5, $r4 + ;; + +.L4C: + cb.deqz $r3? .L74 + ;; + + srld $r1 = $r1, 1 + zxwd $r3 = $r4 + ;; + + loopdo $r3, .LOOP + ;; + + stsud $r0 = $r1, $r0 + ;; + +.LOOP: + addd $r5 = $r0, $r5 + srld $r0 = $r0, $r4 + ;; + + slld $r4 = $r0, $r4 + ;; + + sbfd $r5 = $r4, $r5 + ;; + +.L74: + cmoved.deqz $r2? $r0 = $r5 + ret + ;; + +/* +r0 : a +r1 : b +r2 : negate result? +r3 : return mod? +*/ + + .globl __compcert_i32_sdiv_stsud +__compcert_i32_sdiv_stsud: + compw.lt $r2 = $r0, 0 + compw.lt $r3 = $r1, 0 + absw $r0 = $r0 + absw $r1 = $r1 + ;; + zxwd $r0 = $r0 + zxwd $r1 = $r1 + xord $r2 = $r2, $r3 + make $r3 = 0 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i32_smod_stsud +__compcert_i32_smod_stsud: + compw.lt $r2 = $r0, 0 + absw $r0 = $r0 + absw $r1 = $r1 + make $r3 = 1 + ;; + zxwd $r0 = $r0 + zxwd $r1 = $r1 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i32_umod_stsud +__compcert_i32_umod_stsud: + make $r2 = 0 + make $r3 = 1 + zxwd $r0 = $r0 + zxwd $r1 = $r1 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i32_udiv_stsud +__compcert_i32_udiv_stsud: + make $r2 = 0 + make $r3 = 0 + zxwd $r0 = $r0 + zxwd $r1 = $r1 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i64_umod_stsud +__compcert_i64_umod_stsud: + make $r2 = 0 + make $r3 = 1 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i64_udiv_stsud +__compcert_i64_udiv_stsud: + make $r2 = 0 + make $r3 = 0 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i64_sdiv_stsud +__compcert_i64_sdiv_stsud: + compd.lt $r2 = $r0, 0 + compd.lt $r3 = $r1, 0 + absd $r0 = $r0 + absd $r1 = $r1 + ;; + xord $r2 = $r2, $r3 + make $r3 = 0 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i64_smod_stsud +__compcert_i64_smod_stsud: + compd.lt $r2 = $r0, 0 + absd $r0 = $r0 + absd $r1 = $r1 + make $r3 = 1 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i64_divmod_stsud +__compcert_i64_divmod_stsud: + make $r5 = 0 + compd.ltu $r7 = $r0, $r1 + ;; + + clzd $r7 = $r1 + clzd $r4 = $r0 + cb.dnez $r7? .ZL74 + ;; + + sbfw $r4 = $r4, $r7 + ;; + + zxwd $r7 = $r4 + slld $r1 = $r1, $r4 + ;; + + compd.ltu $r6 = $r0, $r1 + ;; + + cb.dnez $r6? .ZL4C + ;; + + make $r5 = 1 + sbfd $r0 = $r1, $r0 + ;; + + slld $r5 = $r5, $r4 + ;; + +.ZL4C: + cb.deqz $r7? .ZL74 + ;; + + srld $r1 = $r1, 1 + zxwd $r7 = $r4 + ;; + + loopdo $r7, .ZLOOP + ;; + + stsud $r0 = $r1, $r0 + ;; + +.ZLOOP: + addd $r5 = $r0, $r5 + srld $r0 = $r0, $r4 + ;; + + slld $r4 = $r0, $r4 + ;; + + sbfd $r5 = $r4, $r5 + ;; + +.ZL74: + cmoved.weqz $r3? $r0 = $r5 + ;; + negd $r5 = $r0 + ;; + cmoved.wnez $r2? $r0 = $r5 + ret + ;; diff --git a/runtime/mppa_k1c/i64_umod.c b/runtime/mppa_k1c/i64_umod.c index e69de29b..59e35960 100644 --- a/runtime/mppa_k1c/i64_umod.c +++ b/runtime/mppa_k1c/i64_umod.c @@ -0,0 +1,6 @@ +extern unsigned long __umoddi3 (unsigned long a, unsigned long b); + +unsigned i32_umod (unsigned a, unsigned b) +{ + return __umoddi3 (a, b); +} |