From b4a08d0815342b6238d307864f0823d0f07bb691 Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Tue, 26 May 2020 22:04:20 +0200 Subject: k1c -> kvx changes --- runtime/kvx/Makefile | 15 +++ runtime/kvx/ccomp_k1c_fixes.h | 1 + runtime/kvx/i32_divmod.s | 120 ++++++++++++++++++++++ runtime/kvx/i64_sdiv.c | 23 +++++ runtime/kvx/i64_smod.c | 5 + runtime/kvx/i64_udiv.c | 6 ++ runtime/kvx/i64_udivmod.c | 30 ++++++ runtime/kvx/i64_udivmod_stsud.s | 218 ++++++++++++++++++++++++++++++++++++++++ runtime/kvx/i64_umod.c | 6 ++ runtime/kvx/vararg.s | 54 ++++++++++ 10 files changed, 478 insertions(+) create mode 100644 runtime/kvx/Makefile create mode 120000 runtime/kvx/ccomp_k1c_fixes.h create mode 100644 runtime/kvx/i32_divmod.s create mode 100644 runtime/kvx/i64_sdiv.c create mode 100644 runtime/kvx/i64_smod.c create mode 100644 runtime/kvx/i64_udiv.c create mode 100644 runtime/kvx/i64_udivmod.c create mode 100644 runtime/kvx/i64_udivmod_stsud.s create mode 100644 runtime/kvx/i64_umod.c create mode 100644 runtime/kvx/vararg.s (limited to 'runtime/kvx') diff --git a/runtime/kvx/Makefile b/runtime/kvx/Makefile new file mode 100644 index 00000000..4e47f567 --- /dev/null +++ b/runtime/kvx/Makefile @@ -0,0 +1,15 @@ +CCOMP ?= ../../ccomp +CFLAGS ?= -O2 -D__K1_TINYK1__ + +CFILES=$(wildcard *.c) +SFILES=$(subst .c,.s,$(CFILES)) + +CCOMPPATH=$(shell which $(CCOMP)) + +all: $(SFILES) + +.SECONDARY: +%.s: %.c $(CCOMPPATH) + $(CCOMP) $(CFLAGS) -S $< -o $@ + sed -i -e 's/i64_/__compcert_i64_/g' -e 's/i32_/__compcert_i32_/g' \ + -e 's/f64_/__compcert_f64_/g' -e 's/f32_/__compcert_f32_/g' $@ diff --git a/runtime/kvx/ccomp_k1c_fixes.h b/runtime/kvx/ccomp_k1c_fixes.h new file mode 120000 index 00000000..b640c96e --- /dev/null +++ b/runtime/kvx/ccomp_k1c_fixes.h @@ -0,0 +1 @@ +../include/ccomp_k1c_fixes.h \ No newline at end of file diff --git a/runtime/kvx/i32_divmod.s b/runtime/kvx/i32_divmod.s new file mode 100644 index 00000000..9a6f0bce --- /dev/null +++ b/runtime/kvx/i32_divmod.s @@ -0,0 +1,120 @@ +/* KVX +32-bit unsigned/signed integer division/modulo (udiv5) + +D. Monniaux, CNRS, VERIMAG */ + + + .globl __compcert_i32_sdiv_fp +__compcert_i32_sdiv_fp: + compw.lt $r2 = $r0, 0 + compw.lt $r3 = $r1, 0 + absw $r0 = $r0 + absw $r1 = $r1 + ;; + xord $r2 = $r2, $r3 + make $r3 = 0 + goto __compcert_i32_divmod_fp + ;; + + .globl __compcert_i32_smod_fp +__compcert_i32_smod_fp: + compw.lt $r2 = $r0, 0 + absw $r0 = $r0 + absw $r1 = $r1 + make $r3 = 1 + goto __compcert_i32_divmod_fp + ;; + + .globl __compcert_i32_umod_fp +__compcert_i32_umod_fp: + make $r2 = 0 + make $r3 = 1 + goto __compcert_i32_divmod_fp + ;; + + .globl __compcert_i32_udiv_fp +__compcert_i32_udiv_fp: + make $r2 = 0 + make $r3 = 0 + ;; + +/* +r0 : a +r1 : b +r2 : negate result? +r3 : return mod? +*/ + + .globl __compcert_i32_divmod_fp +__compcert_i32_divmod_fp: + zxwd $r7 = $r1 + zxwd $r1 = $r0 +#ifndef NO_SHORTCUT + compw.ltu $r8 = $r0, $r1 + cb.weqz $r1? .ERR # return 0 if divide by 0 +#endif + ;; +# a in r1, b in r7 + floatud.rn.s $r5 = $r7, 0 +#ifndef NO_SHORTCUT + compd.eq $r8 = $r7, 1 + cb.wnez $r8? .LESS # shortcut if a < b +#endif + ;; +# b (double) in r5 + make $r6 = 0x3ff0000000000000 # 1.0 + fnarrowdw.rn.s $r11 = $r5 +# cb.wnez $r8, .RET1 # if b=1 + ;; +# b (single) in r11 + floatud.rn.s $r10 = $r1, 0 + finvw.rn.s $r11 = $r11 + ;; + fwidenlwd.s $r11 = $r11 + ;; +# invb0 in r11 + copyd $r9 = $r11 + ffmsd.rn.s $r6 = $r11, $r5 +# alpha in r6 + ;; + ffmad.rn.s $r9 = $r11, $r6 +# 1/b in r9 + ;; + fmuld.rn.s $r0 = $r10, $r9 +# a/b in r1 + ;; + fixedud.rn.s $r0 = $r0, 0 + ;; + msbfd $r1 = $r0, $r7 + ;; + addd $r6 = $r0, -1 + addd $r8 = $r1, $r7 + ;; + cmoved.dltz $r1? $r0 = $r6 + cmoved.dltz $r1? $r1 = $r8 + ;; + negw $r4 = $r0 + negw $r5 = $r1 + ;; + cmoved.wnez $r2? $r0 = $r4 + cmoved.wnez $r2? $r1 = $r5 + ;; +.END: + cmoved.wnez $r3? $r0 = $r1 + ret + ;; +#ifndef NO_SHORTCUT + +.LESS: + make $r0 = 0 + negw $r5 = $r1 + ;; + cmoved.wnez $r2? $r1 = $r5 + goto .END + ;; + +.ERR: + make $r0 = 0 + ret + ;; +#endif diff --git a/runtime/kvx/i64_sdiv.c b/runtime/kvx/i64_sdiv.c new file mode 100644 index 00000000..a42164cc --- /dev/null +++ b/runtime/kvx/i64_sdiv.c @@ -0,0 +1,23 @@ +extern long __divdi3 (long a, long b); + +int i32_sdiv (int a, int b) +{ + return __divdi3 (a, b); +} + +#ifdef OUR_OWN_FE_EXCEPT +#include <../../k1-cos/include/hal/cos_registers.h> + +/* DM FIXME this is for floating point */ +int fetestexcept(int excepts) { + int mask = (COS_SFR_CS_IO_MASK | COS_SFR_CS_DZ_MASK | COS_SFR_CS_OV_MASK | COS_SFR_CS_UN_MASK | COS_SFR_CS_IN_MASK) & excepts; + unsigned long long cs = __builtin_kvx_get(COS_SFR_CS); + return cs & mask; +} + +int feclearexcept(int excepts) { + int mask = (COS_SFR_CS_IO_MASK | COS_SFR_CS_DZ_MASK | COS_SFR_CS_OV_MASK | COS_SFR_CS_UN_MASK | COS_SFR_CS_IN_MASK) & excepts; + __builtin_kvx_wfxl(COS_SFR_CS, mask); + return 0; +} +#endif diff --git a/runtime/kvx/i64_smod.c b/runtime/kvx/i64_smod.c new file mode 100644 index 00000000..3371eecf --- /dev/null +++ b/runtime/kvx/i64_smod.c @@ -0,0 +1,5 @@ +extern long __moddi3 (long a, long b); +int i32_smod (int a, int b) +{ + return __moddi3 (a, b); +} diff --git a/runtime/kvx/i64_udiv.c b/runtime/kvx/i64_udiv.c new file mode 100644 index 00000000..75f4bbf5 --- /dev/null +++ b/runtime/kvx/i64_udiv.c @@ -0,0 +1,6 @@ +extern unsigned long __udivdi3 (unsigned long a, unsigned long b); + +unsigned i32_udiv (unsigned a, unsigned b) +{ + return __udivdi3 (a, b); +} diff --git a/runtime/kvx/i64_udivmod.c b/runtime/kvx/i64_udivmod.c new file mode 100644 index 00000000..952e47e5 --- /dev/null +++ b/runtime/kvx/i64_udivmod.c @@ -0,0 +1,30 @@ +#if 0 +/* THIS IS THE PREVIOUS VERSION, USED ON BOSTAN AND ANDEY */ +unsigned long long +udivmoddi4(unsigned long long num, unsigned long long den, int modwanted) +{ + unsigned long long r = num, q = 0; + + if(den <= r) { + unsigned k = __builtin_clzll (den) - __builtin_clzll (r); + den = den << k; + if(r >= den) { + r = r - den; + q = 1LL << k; + } + if(k != 0) { + unsigned i = k; + den = den >> 1; + do { + r = __builtin_kvx_stsud (den, r); + i--; + } while (i!= 0); + q = q + r; + r = r >> k; + q = q - (r << k); + } + } + + return modwanted ? r : q; +} +#endif diff --git a/runtime/kvx/i64_udivmod_stsud.s b/runtime/kvx/i64_udivmod_stsud.s new file mode 100644 index 00000000..2dd73d66 --- /dev/null +++ b/runtime/kvx/i64_udivmod_stsud.s @@ -0,0 +1,218 @@ +/* +Integer division for KVX + +David Monniaux, CNRS / Verimag + */ + + .globl dm_udivmoddi4 +dm_udivmoddi4: + sxwd $r2 = $r2 + make $r5 = 0 + compd.ltu $r3 = $r0, $r1 + ;; + + clzd $r3 = $r1 + clzd $r4 = $r0 + cb.dnez $r3? .L74 + ;; + + sbfw $r4 = $r4, $r3 + ;; + + zxwd $r3 = $r4 + slld $r1 = $r1, $r4 + ;; + + compd.ltu $r6 = $r0, $r1 + ;; + + cb.dnez $r6? .L4C + ;; + + make $r5 = 1 + sbfd $r0 = $r1, $r0 + ;; + + slld $r5 = $r5, $r4 + ;; + +.L4C: + cb.deqz $r3? .L74 + ;; + + srld $r1 = $r1, 1 + zxwd $r3 = $r4 + ;; + + loopdo $r3, .LOOP + ;; + + stsud $r0 = $r1, $r0 + ;; + +.LOOP: + addd $r5 = $r0, $r5 + srld $r0 = $r0, $r4 + ;; + + slld $r4 = $r0, $r4 + ;; + + sbfd $r5 = $r4, $r5 + ;; + +.L74: + cmoved.deqz $r2? $r0 = $r5 + ret + ;; + +/* +r0 : a +r1 : b +r2 : negate result? +r3 : return mod? +*/ + + .globl __compcert_i32_sdiv_stsud +__compcert_i32_sdiv_stsud: + compw.lt $r2 = $r0, 0 + compw.lt $r3 = $r1, 0 + absw $r0 = $r0 + absw $r1 = $r1 + ;; + zxwd $r0 = $r0 + zxwd $r1 = $r1 + xord $r2 = $r2, $r3 + make $r3 = 0 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i32_smod_stsud +__compcert_i32_smod_stsud: + compw.lt $r2 = $r0, 0 + absw $r0 = $r0 + absw $r1 = $r1 + make $r3 = 1 + ;; + zxwd $r0 = $r0 + zxwd $r1 = $r1 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i32_umod_stsud +__compcert_i32_umod_stsud: + make $r2 = 0 + make $r3 = 1 + zxwd $r0 = $r0 + zxwd $r1 = $r1 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i32_udiv_stsud +__compcert_i32_udiv_stsud: + make $r2 = 0 + make $r3 = 0 + zxwd $r0 = $r0 + zxwd $r1 = $r1 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i64_umod_stsud +__compcert_i64_umod_stsud: + make $r2 = 0 + make $r3 = 1 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i64_udiv_stsud +__compcert_i64_udiv_stsud: + make $r2 = 0 + make $r3 = 0 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i64_sdiv_stsud +__compcert_i64_sdiv_stsud: + compd.lt $r2 = $r0, 0 + compd.lt $r3 = $r1, 0 + absd $r0 = $r0 + absd $r1 = $r1 + ;; + xord $r2 = $r2, $r3 + make $r3 = 0 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i64_smod_stsud +__compcert_i64_smod_stsud: + compd.lt $r2 = $r0, 0 + absd $r0 = $r0 + absd $r1 = $r1 + make $r3 = 1 + goto __compcert_i64_divmod_stsud + ;; + + .globl __compcert_i64_divmod_stsud +__compcert_i64_divmod_stsud: + make $r5 = 0 + compd.ltu $r7 = $r0, $r1 + ;; + + clzd $r7 = $r1 + clzd $r4 = $r0 + cb.dnez $r7? .ZL74 + ;; + + sbfw $r4 = $r4, $r7 + ;; + + zxwd $r7 = $r4 + slld $r1 = $r1, $r4 + ;; + + compd.ltu $r6 = $r0, $r1 + ;; + + cb.dnez $r6? .ZL4C + ;; + + make $r5 = 1 + sbfd $r0 = $r1, $r0 + ;; + + slld $r5 = $r5, $r4 + ;; + +.ZL4C: + cb.deqz $r7? .ZL74 + ;; + + srld $r1 = $r1, 1 + zxwd $r7 = $r4 + ;; + + loopdo $r7, .ZLOOP + ;; + + stsud $r0 = $r1, $r0 + ;; + +.ZLOOP: + addd $r5 = $r0, $r5 + srld $r0 = $r0, $r4 + ;; + + slld $r4 = $r0, $r4 + ;; + + sbfd $r5 = $r4, $r5 + ;; + +.ZL74: + cmoved.weqz $r3? $r0 = $r5 + ;; + negd $r5 = $r0 + ;; + cmoved.wnez $r2? $r0 = $r5 + ret + ;; diff --git a/runtime/kvx/i64_umod.c b/runtime/kvx/i64_umod.c new file mode 100644 index 00000000..59e35960 --- /dev/null +++ b/runtime/kvx/i64_umod.c @@ -0,0 +1,6 @@ +extern unsigned long __umoddi3 (unsigned long a, unsigned long b); + +unsigned i32_umod (unsigned a, unsigned b) +{ + return __umoddi3 (a, b); +} diff --git a/runtime/kvx/vararg.s b/runtime/kvx/vararg.s new file mode 100644 index 00000000..65c1eab8 --- /dev/null +++ b/runtime/kvx/vararg.s @@ -0,0 +1,54 @@ + +# typedef void * va_list; +# unsigned int __compcert_va_int32(va_list * ap); +# unsigned long long __compcert_va_int64(va_list * ap); + + .text + .balign 2 + .globl __compcert_va_int32 +__compcert_va_int32: + ld $r32 = 0[$r0] # $r32 <- *ap +;; + addd $r32 = $r32, 8 # $r32 <- $r32 + WORDSIZE +;; + sd 0[$r0] = $r32 # *ap <- $r32 +;; + lws $r0 = -8[$r32] # retvalue <- 32-bits at *ap - WORDSIZE + ret +;; + + .text + .balign 2 + .globl __compcert_va_int64 + .globl __compcert_va_float64 + .globl __compcert_va_composite +__compcert_va_int64: +__compcert_va_float64: +# FIXME this assumes pass-by-reference +__compcert_va_composite: +# Prologue + ld $r32 = 0[$r0] # $r32 <- *ap +;; + addd $r32 = $r32, 8 # $r32 <- $r32 + WORDSIZE +;; + sd 0[$r0] = $r32 # *ap <- $r32 +;; + ld $r0 = -8[$r32] # retvalue <- 64-bits at *ap - WORDSIZE + ret +;; + +# FIXME this assumes pass-by-reference + .globl __compcert_acswapd +__compcert_acswapd: + acswapd 0[$r1] = $r2r3 + ;; + sq 0[$r0] = $r2r3 + ret + ;; + .globl __compcert_acswapw +__compcert_acswapw: + acswapw 0[$r1] = $r2r3 + ;; + sq 0[$r0] = $r2r3 + ret + ;; -- cgit