Merge remote-tracking branch 'origin/mppa-work' into mppa-fast-div

(unfinished)
author: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-04-20 14:06:21 +0200
committer: David Monniaux <david.monniaux@univ-grenoble-alpes.fr> 2020-04-20 14:06:21 +0200
commit: 168393089024b5f926836cb813fddf14e6b6e4d4 (patch)
tree: 5d018198dc26be847544f162b87ad3dcecbab479 /runtime
parent: 633b72565b022f159526338b5bbb9fcac86dfd2b (diff)
parent: b3431b1d9ee5121883d307cff0b62b7e53369891 (diff)
download: compcert-kvx-168393089024b5f926836cb813fddf14e6b6e4d4.tar.gz
compcert-kvx-168393089024b5f926836cb813fddf14e6b6e4d4.zip
13 files changed, 248 insertions, 50 deletions
diff --git a/runtime/Makefile b/runtime/Makefile
index 6bc3e7ea..a689f3ea 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -28,6 +28,8 @@ OBJS=i64_umod.o i64_udiv.o i64_udivmod.o i64_sdiv.o i64_smod.o \
      vararg.o
 # Missing: i64_utod.o i64_utof.o i64_stod.o i64_stof.o
 DOMAKE:=$(shell (cd mppa_k1c && make))
+else ifeq ($(ARCH),aarch64)
+OBJS=vararg.o
 else
 OBJS=i64_dtos.o i64_dtou.o i64_sar.o i64_sdiv.o i64_shl.o \
   i64_shr.o i64_smod.o i64_stod.o i64_stof.o \
@@ -79,16 +81,16 @@ clean::
 
 ifeq ($(strip $(HAS_RUNTIME_LIB)),true)
 install::
-	install -d $(LIBDIR)
-	install -m 0644 $(LIB) $(LIBDIR)
+	install -d $(DESTDIR)$(LIBDIR)
+	install -m 0644 $(LIB) $(DESTDIR)$(LIBDIR)
 else
 install::
 endif
 
 ifeq ($(strip $(HAS_STANDARD_HEADERS)),true)
 install::
-	install -d $(LIBDIR)/include
-	install -m 0644 $(INCLUDES) $(LIBDIR)/include
+	install -d $(DESTDIR)$(LIBDIR)/include
+	install -m 0644 $(INCLUDES) $(DESTDIR)$(LIBDIR)/include
 else
 install::
 endif
diff --git a/runtime/aarch64/sysdeps.h b/runtime/aarch64/sysdeps.h
new file mode 100644
index 00000000..0cee9ae3
--- /dev/null
+++ b/runtime/aarch64/sysdeps.h
@@ -0,0 +1,45 @@
+// *****************************************************************
+//
+//               The Compcert verified compiler
+//
+//           Xavier Leroy, Collège de France and INRIA Paris
+//
+// Copyright (c) Institut National de Recherche en Informatique et
+//  en Automatique.
+//	
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of the <organization> nor the
+//       names of its contributors may be used to endorse or promote products
+//       derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+// HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// *********************************************************************
+
+// System dependencies
+
+#define FUNCTION(f) \
+	.text; \
+        .balign 16; \
+	.globl f; \
+f:
+
+#define ENDFUNCTION(f) \
+	.type f, @function; .size f, . - f
+
diff --git a/runtime/aarch64/vararg.S b/runtime/aarch64/vararg.S
new file mode 100644
index 00000000..b7347d65
--- /dev/null
+++ b/runtime/aarch64/vararg.S
@@ -0,0 +1,109 @@
+// *****************************************************************
+//
+//               The Compcert verified compiler
+//
+//           Xavier Leroy, Collège de France and INRIA Paris
+//
+// Copyright (c) Institut National de Recherche en Informatique et
+//  en Automatique.
+//	
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above copyright
+//       notice, this list of conditions and the following disclaimer in the
+//       documentation and/or other materials provided with the distribution.
+//     * Neither the name of the <organization> nor the
+//       names of its contributors may be used to endorse or promote products
+//       derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+// HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// *********************************************************************
+
+// Helper functions for variadic functions <stdarg.h>.  AArch64 version.
+
+#include "sysdeps.h"	
+
+// typedef struct __va_list {
+//   void *__stack;             // next stack parameter
+//   void *__gr_top;            // top of the save area for int regs
+//   void *__vr_top;            // top of the save area for float regs
+//   int__gr_offs;              // offset from gr_top to next int reg
+//   int__vr_offs;              // offset from gr_top to next FP reg
+// }
+// typedef struct __va_list va_list;   // struct passed by reference
+// unsigned int __compcert_va_int32(va_list * ap);
+// unsigned long long __compcert_va_int64(va_list * ap);
+// double __compcert_va_float64(va_list * ap);
+
+FUNCTION(__compcert_va_int32)
+        ldr     w1, [x0, #24]   // w1 = gr_offs
+        cbz     w1, 1f
+  // gr_offs is not zero: load from int save area and update gr_offs
+        ldr     x2, [x0, #8]    // x2 = gr_top
+        ldr     w2, [x2, w1, sxtw] // w2 = the next integer
+        add     w1, w1, #8
+        str     w1, [x0, #24]   // update gr_offs
+        mov     w0, w2
+	ret
+  // gr_offs is zero: load from stack save area and update stack pointer
+1:      ldr     x1, [x0, #0]    // x1 = stack
+        ldr     w2, [x1, #0]    // w2 = the next integer
+        add     x1, x1, #8
+        str     x1, [x0, #0]    // update stack
+        mov     w0, w2
+	ret
+ENDFUNCTION(__compcert_va_int32)
+
+FUNCTION(__compcert_va_int64)
+        ldr     w1, [x0, #24]   // w1 = gr_offs
+        cbz     w1, 1f
+  // gr_offs is not zero: load from int save area and update gr_offs
+        ldr     x2, [x0, #8]    // x2 = gr_top
+        ldr     x2, [x2, w1, sxtw] // w2 = the next long integer
+        add     w1, w1, #8
+        str     w1, [x0, #24]   // update gr_offs
+        mov     x0, x2
+	ret
+  // gr_offs is zero: load from stack save area and update stack pointer
+1:      ldr     x1, [x0, #0]    // x1 = stack
+        ldr     x2, [x1, #0]    // w2 = the next long integer
+        add     x1, x1, #8
+        str     x1, [x0, #0]    // update stack
+        mov     x0, x2
+	ret
+ENDFUNCTION(__compcert_va_int64)
+
+FUNCTION(__compcert_va_float64)
+        ldr     w1, [x0, #28]   // w1 = vr_offs
+        cbz     w1, 1f
+  // vr_offs is not zero: load from float save area and update vr_offs
+        ldr     x2, [x0, #16]   // x2 = vr_top
+        ldr     d0, [x2, w1, sxtw] // d0 = the next float
+        add     w1, w1, #16
+        str     w1, [x0, #28]   // update vr_offs
+	ret
+  // gr_offs is zero: load from stack save area and update stack pointer
+1:      ldr     x1, [x0, #0]    // x1 = stack
+        ldr     d0, [x1, #0]    // d0 = the next float
+        add     x1, x1, #8
+        str     x1, [x0, #0]    // update stack
+	ret
+ENDFUNCTION(__compcert_va_float64)
+
+// Right now we pass structs by reference.  This is not ABI conformant.
+FUNCTION(__compcert_va_composite)
+	b	__compcert_va_int64
+ENDFUNCTION(__compcert_va_composite)
diff --git a/runtime/arm/i64_stof.S b/runtime/arm/i64_stof.S
index bcfa471c..11e00a2a 100644
--- a/runtime/arm/i64_stof.S
+++ b/runtime/arm/i64_stof.S
@@ -39,12 +39,11 @@
 @@@ Conversion from signed 64-bit integer to single float
 
 FUNCTION(__compcert_i64_stof)
-    @ Check whether -2^53 <= X < 2^53
-        ASR r2, Reg0HI, #21
-        ASR r3, Reg0HI, #31     @ (r2,r3) = X >> 53
+   @ Check whether -2^53 <= X < 2^53
+        ASR r2, Reg0HI, #21     @ r2 = high 32 bits of X >> 53
+   @ -2^53 <= X < 2^53 iff r2 is -1 or 0, that is, iff r2 + 1 is 0 or 1
         adds r2, r2, #1
-        adc r3, r3, #0          @ (r2,r3) = X >> 53 + 1
-        cmp r3, #2
+        cmp r2, #2
         blo 1f
    @ X is large enough that double rounding can occur.
    @ Avoid it by nudging X away from the points where double rounding
diff --git a/runtime/include/ccomp_k1c_fixes.h b/runtime/include/ccomp_k1c_fixes.h
index 5c543d8f..718ac3e5 100644
--- a/runtime/include/ccomp_k1c_fixes.h
+++ b/runtime/include/ccomp_k1c_fixes.h
@@ -17,6 +17,12 @@ extern __int128 __compcert_acswapd(void *address, unsigned long long new_value,
 
 #define __builtin_k1_acswapw __compcert_acswapw
 extern __int128 __compcert_acswapw(void *address, unsigned long long new_value, unsigned long long old_value);
+
+#define __builtin_k1_afaddd __compcert_afaddd
+extern long long __compcert_afaddd(void *address, unsigned long long incr);
+
+#define __builtin_k1_afaddw __compcert_afaddw
+extern int __compcert_afaddw(void *address, unsigned int incr);
 #endif
 
 #define __builtin_expect(x, y) (x)
diff --git a/runtime/include/math.h b/runtime/include/math.h
index 805cc8e7..01b8d8d8 100644
--- a/runtime/include/math.h
+++ b/runtime/include/math.h
@@ -1,7 +1,26 @@
 #ifndef _COMPCERT_MATH_H
 #define _COMPCERT_MATH_H
 
+#ifdef __K1C__
+
 #define isfinite(__y) (fpclassify((__y)) >= FP_ZERO)
 
 #include_next <math.h>
+
+#ifndef COMPCERT_NO_FP_MACROS
+#define fmin(x, y) __builtin_fmin((x),(y))
+#define fmax(x, y) __builtin_fmax((x),(y))
+#define fminf(x, y) __builtin_fminf((x),(y))
+#define fmaxf(x, y) __builtin_fmaxf((x),(y))
+#define fabs(x) __builtin_fabs((x))
+#define fabsf(x) __builtin_fabsf((x))
+#define fma(x, y, z) __builtin_fma((x),(y),(z))
+#define fmaf(x, y, z) __builtin_fmaf((x),(y),(z))
+#endif
+
+#else
+
+#include_next <math.h>
+
+#endif
 #endif
diff --git a/runtime/mppa_k1c/i32_divmod.S b/runtime/mppa_k1c/i32_divmod.s
index d2b4e8d5..d2b4e8d5 100644
--- a/runtime/mppa_k1c/i32_divmod.S
+++ b/runtime/mppa_k1c/i32_divmod.s
diff --git a/runtime/mppa_k1c/i64_sdiv.c b/runtime/mppa_k1c/i64_sdiv.c
index df308736..b98d9316 100644
--- a/runtime/mppa_k1c/i64_sdiv.c
+++ b/runtime/mppa_k1c/i64_sdiv.c
@@ -5,31 +5,19 @@ int i32_sdiv (int a, int b)
   return __divdi3 (a, b);
 }
 
-/* #define COMPCERT_FE_EXCEPT */
-#ifdef COMPCERT_FE_EXCEPT
-#ifdef __K1C_COS__
-
-#include <hal/cos_registers.h>
-#define K1_SFR_CS_IO_MASK COS_SFR_CS_IO_MASK
-#define K1_SFR_CS_DZ_MASK COS_SFR_CS_DZ_MASK
-#define K1_SFR_CS_OV_MASK COS_SFR_CS_OV_MASK
-#define K1_SFR_CS_UN_MASK COS_SFR_CS_UN_MASK
-#define K1_SFR_CS_IN_MASK COS_SFR_CS_IN_MASK
-#define K1_SFR_CS COS_SFR_CS
-#else
-#include <mppa_bare_runtime/k1c/registers.h>
-#endif
+#ifdef OUR_OWN_FE_EXCEPT
+#include <../../k1-cos/include/hal/cos_registers.h>
 
 /* DM FIXME this is for floating point */
 int fetestexcept(int excepts) {
-  int mask = (K1_SFR_CS_IO_MASK | K1_SFR_CS_DZ_MASK | K1_SFR_CS_OV_MASK | K1_SFR_CS_UN_MASK | K1_SFR_CS_IN_MASK) & excepts;
-  unsigned long long cs = __builtin_k1_get(K1_SFR_CS);
+  int mask = (COS_SFR_CS_IO_MASK | COS_SFR_CS_DZ_MASK | COS_SFR_CS_OV_MASK | COS_SFR_CS_UN_MASK | COS_SFR_CS_IN_MASK) & excepts;
+  unsigned long long cs = __builtin_k1_get(COS_SFR_CS);
   return cs & mask;
 }
 
 int feclearexcept(int excepts) {
-  int mask = (K1_SFR_CS_IO_MASK | K1_SFR_CS_DZ_MASK | K1_SFR_CS_OV_MASK | K1_SFR_CS_UN_MASK | K1_SFR_CS_IN_MASK) & excepts;
-  __builtin_k1_wfxl(K1_SFR_CS, mask);
+  int mask = (COS_SFR_CS_IO_MASK | COS_SFR_CS_DZ_MASK | COS_SFR_CS_OV_MASK | COS_SFR_CS_UN_MASK | COS_SFR_CS_IN_MASK) & excepts;
+  __builtin_k1_wfxl(COS_SFR_CS, mask);
   return 0;
 }
 #endif
diff --git a/runtime/mppa_k1c/i64_udivmod_stsud.S b/runtime/mppa_k1c/i64_udivmod_stsud.s
index ac84ca47..b1d10326 100644
--- a/runtime/mppa_k1c/i64_udivmod_stsud.S
+++ b/runtime/mppa_k1c/i64_udivmod_stsud.s
@@ -116,9 +116,40 @@ __compcert_i32_udiv_stsud:
 	zxwd $r1 = $r1
 	goto __compcert_i64_divmod_stsud
 	;;
+	
+	.globl __compcert_i64_umod_stsud
+__compcert_i64_umod_stsud:
+	make $r2 = 0
+	make $r3 = 1
+	goto __compcert_i64_divmod_stsud
+	;;
+
+	.globl __compcert_i64_udiv_stsud
+__compcert_i64_udiv_stsud:
+	make $r2 = 0
+	make $r3 = 0
+	goto __compcert_i64_divmod_stsud
+	;;
+
+	.globl __compcert_i64_sdiv_stsud
+__compcert_i64_sdiv_stsud:
+	compd.lt $r2 = $r0, 0
+	compd.lt $r3 = $r1, 0
+	;;
+	xord $r2 = $r2, $r3
+	make $r3 = 0
+	goto __compcert_i64_divmod_stsud
+	;;
+	
+	.globl __compcert_i64_smod_stsud
+__compcert_i64_smod_stsud:
+	compd.lt $r2 = $r0, 0
+	make $r3 = 1
+	goto __compcert_i64_divmod_stsud
+	;;
 
 	.globl __compcert_i64_divmod_stsud
- __compcert_i64_divmod_stsud:
+__compcert_i64_divmod_stsud:
 	make $r5 = 0
 	compd.ltu $r7 = $r0, $r1
 	;;
diff --git a/runtime/mppa_k1c/vararg.S b/runtime/mppa_k1c/vararg.s
index 9e23e0b3..65c1eab8 100644
--- a/runtime/mppa_k1c/vararg.S
+++ b/runtime/mppa_k1c/vararg.s
@@ -1,7 +1,7 @@
 
-// typedef void * va_list;
-// unsigned int __compcert_va_int32(va_list * ap);
-// unsigned long long __compcert_va_int64(va_list * ap);
+# typedef void * va_list;
+# unsigned int __compcert_va_int32(va_list * ap);
+# unsigned long long __compcert_va_int64(va_list * ap);
 
 	.text
 	.balign 2
diff --git a/runtime/powerpc/i64_stof.s b/runtime/powerpc/i64_stof.s
index 97fa6bb8..ea23a1c8 100644
--- a/runtime/powerpc/i64_stof.s
+++ b/runtime/powerpc/i64_stof.s
@@ -43,20 +43,19 @@
 __compcert_i64_stof:
 	mflr r9
    # Check whether -2^53 <= X < 2^53	
-        srawi r5, r3, 31
-        srawi r6, r3, 21        # (r5,r6) = X >> 53
-        addic r6, r6, 1
-        addze r5, r5            # (r5,r6) = (X >> 53) + 1
+        srawi r5, r3, 21        # r5 = high 32 bits of X >> 53
+   # -2^53 <= X < 2^53 iff r5 is -1 or 0, that is, iff r5 + 1 is 0 or 1
+        addi r5, r5, 1
         cmplwi r5, 2
         blt 1f
    # X is large enough that double rounding can occur.
    # Avoid it by nudging X away from the points where double rounding
    # occurs (the "round to odd" technique)
-        rlwinm r0, r4, 0, 21, 31 # extract bits 0 to 11 of X
-        addi r0, r0, 0x7FF      # r0 = (X & 0x7FF) + 0x7FF
-   # bit 12 of r0 is 0 if all low 12 bits of X are 0, 1 otherwise
-   # bits 13-31 of r0 are 0
-        or r4, r4, r0           # correct bit number 12 of X
+        rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X
+        addi r5, r5, 0x7FF      # r5 = (X & 0x7FF) + 0x7FF
+   # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise
+   # bits 13-31 of r5 are 0
+        or r4, r4, r5           # correct bit number 12 of X
         rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X
    # Convert to double, then round to single	
 1:      bl __compcert_i64_stod
diff --git a/runtime/powerpc/i64_utof.s b/runtime/powerpc/i64_utof.s
index cdb2f867..4a2a172b 100644
--- a/runtime/powerpc/i64_utof.s
+++ b/runtime/powerpc/i64_utof.s
@@ -48,11 +48,11 @@ __compcert_i64_utof:
    # X is large enough that double rounding can occur.
    # Avoid it by nudging X away from the points where double rounding
    # occurs (the "round to odd" technique)
-        rlwinm r0, r4, 0, 21, 31 # extract bits 0 to 11 of X
-        addi r0, r0, 0x7FF      # r0 = (X & 0x7FF) + 0x7FF
-   # bit 12 of r0 is 0 if all low 12 bits of X are 0, 1 otherwise
-   # bits 13-31 of r0 are 0
-        or r4, r4, r0           # correct bit number 12 of X
+        rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X
+        addi r5, r5, 0x7FF      # r5 = (X & 0x7FF) + 0x7FF
+   # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise
+   # bits 13-31 of r5 are 0
+        or r4, r4, r5           # correct bit number 12 of X
         rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X
    # Convert to double, then round to single	
 1:      bl __compcert_i64_utod
diff --git a/runtime/powerpc64/i64_utof.s b/runtime/powerpc64/i64_utof.s
index cdb2f867..4a2a172b 100644
--- a/runtime/powerpc64/i64_utof.s
+++ b/runtime/powerpc64/i64_utof.s
@@ -48,11 +48,11 @@ __compcert_i64_utof:
    # X is large enough that double rounding can occur.
    # Avoid it by nudging X away from the points where double rounding
    # occurs (the "round to odd" technique)
-        rlwinm r0, r4, 0, 21, 31 # extract bits 0 to 11 of X
-        addi r0, r0, 0x7FF      # r0 = (X & 0x7FF) + 0x7FF
-   # bit 12 of r0 is 0 if all low 12 bits of X are 0, 1 otherwise
-   # bits 13-31 of r0 are 0
-        or r4, r4, r0           # correct bit number 12 of X
+        rlwinm r5, r4, 0, 21, 31 # extract bits 0 to 11 of X
+        addi r5, r5, 0x7FF      # r5 = (X & 0x7FF) + 0x7FF
+   # bit 12 of r5 is 0 if all low 12 bits of X are 0, 1 otherwise
+   # bits 13-31 of r5 are 0
+        or r4, r4, r5           # correct bit number 12 of X
         rlwinm r4, r4, 0, 0, 20 # set to 0 bits 0 to 11 of X
    # Convert to double, then round to single	
 1:      bl __compcert_i64_utod
author	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-04-20 14:06:21 +0200
committer	David Monniaux <david.monniaux@univ-grenoble-alpes.fr>	2020-04-20 14:06:21 +0200
commit	168393089024b5f926836cb813fddf14e6b6e4d4 (patch)
tree	5d018198dc26be847544f162b87ad3dcecbab479 /runtime
parent	633b72565b022f159526338b5bbb9fcac86dfd2b (diff)
parent	b3431b1d9ee5121883d307cff0b62b7e53369891 (diff)
download	compcert-kvx-168393089024b5f926836cb813fddf14e6b6e4d4.tar.gz compcert-kvx-168393089024b5f926836cb813fddf14e6b6e4d4.zip