From 255cee09b71255051c2b40eae0c88bffce1f6f32 Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Sat, 20 Apr 2013 07:54:52 +0000
Subject: Big merge of the newregalloc-int64 branch.  Lots of changes in two
 directions: 1- new register allocator (+ live range splitting,
 spilling&reloading, etc)    based on a posteriori validation using the
 Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long
 long").

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 runtime/arm/int64.s | 424 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 424 insertions(+)
 create mode 100644 runtime/arm/int64.s

(limited to 'runtime/arm')
diff --git a/runtime/arm/int64.s b/runtime/arm/int64.s
new file mode 100644
index 00000000..6b033510
--- /dev/null
+++ b/runtime/arm/int64.s
@@ -0,0 +1,424 @@
+@ *****************************************************************
+@
+@               The Compcert verified compiler
+@
+@           Xavier Leroy, INRIA Paris-Rocquencourt
+@
+@ Copyright (c) 2013 Institut National de Recherche en Informatique et
+@  en Automatique.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are met:
+@     * Redistributions of source code must retain the above copyright
+@       notice, this list of conditions and the following disclaimer.
+@     * Redistributions in binary form must reproduce the above copyright
+@       notice, this list of conditions and the following disclaimer in the
+@       documentation and/or other materials provided with the distribution.
+@     * Neither the name of the <organization> nor the
+@       names of its contributors may be used to endorse or promote products
+@       derived from this software without specific prior written permission.
+@ 
+@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+@ HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+@
+@ *********************************************************************
+
+@ Helper functions for 64-bit integer arithmetic.  ARM version.
+
+@ Calling conventions for R = F(X) or R = F(X,Y):
+@   one or two long arguments:   XL in r0, XH in r1, YL in r2, YH in r3
+@   one long argument, one int:  XL in r0, XH in r1, Y in r2
+@   one float argument:          X in r0, r1
+@   one long result:             RL in r0, RH in r1
+@   one float result:            R in r0, r1
+@ This is a little-endian convention: the low word is in the
+@ low-numbered register.
+@ Can use r0...r3 and f0...f7 as temporary registers (caller-save)	
+	
+	.text
+        
+@@@ Unsigned comparison	
+	
+	.global __i64_ucmp
+__i64_ucmp:
+        cmp r1, r3      @ compare high words
+        cmpeq r0, r2    @ if equal, compare low words instead
+        moveq r0, #0    @ res = 0 if eq
+        movhi r0, #1    @ res = 1 if unsigned higher
+        mvnlo r0, #0    @ res = -1 if unsigned lower
+        bx lr
+	.type __i64_ucmp, %function
+	.size __i64_ucmp, . - __i64_ucmp
+	
+@@@ Signed comparison	
+	
+	.global __i64_scmp
+__i64_scmp:
+        cmp r0, r2              @ compare low words (unsigned)
+        moveq r0, #0            @ res = 0 if eq
+        movhi r0, #1            @ res = 1 if unsigned higher
+        mvnlo r0, #0            @ res = -1 if unsigned lower
+        cmp r1, r3              @ compare high words (signed)
+	addgt r0, r0, #2        @ res += 2 if signed greater
+	sublt r0, r0, #2        @ res -= 2 if signed less
+   @ here, r0 = 0 if X == Y
+   @       r0 = -3, -2, -1 if X < Y
+   @       r0 = 1, 2, 3 if X > Y
+        bx      lr
+	.type __i64_scmp, %function
+	.size __i64_scmp, . - __i64_scmp
+	
+@@@ Opposite
+	
+        .global __i64_neg
+__i64_neg:
+        rsbs r0, r0, #0
+        rsc r1, r1, #0
+        bx lr
+	.type __i64_neg, %function
+	.size __i64_neg, . - __i64_neg
+
+@@@ Addition	
+	
+	.global __i64_add
+__i64_add:
+        adds r0, r0, r2
+        adc r1, r1, r3
+        bx lr
+	.type __i64_add, %function
+	.size __i64_add, . - __i64_add
+	
+@@@ Subtraction
+	
+	.global __i64_sub
+__i64_sub:
+        subs r0, r0, r2
+        sbc r1, r1, r3
+        bx lr
+	.type __i64_sub, %function
+	.size __i64_sub, . - __i64_sub
+	
+@ Note on ARM shifts: the shift amount is taken modulo 256.
+@ Therefore, unsigned shifts by 32 bits or more produce 0.        
+	
+@@@ Shift left	
+
+	.global __i64_shl
+__i64_shl:
+        and r2, r2, #63         @ normalize amount to 0...63
+        rsbs r3, r2, #32        @ r3 = 32 - amount
+        ble 1f                  @ branch if <= 0, namely if amount >= 32
+        mov r1, r1, lsl r2
+        orr r1, r0, lsr r3
+        mov r0, r0, lsl r2
+        bx lr
+1:
+        sub r2, r2, #32
+        mov r1, r0, lsl r2
+        mov r0, #0
+        bx lr
+	.type __i64_shl, %function
+	.size __i64_shl, . - __i64_shl
+
+@@@ Shift right unsigned	
+
+	.global __i64_shr
+__i64_shr:
+        and r2, r2, #63         @ normalize amount to 0...63
+        rsbs r3, r2, #32        @ r3 = 32 - amount
+        ble 1f                  @ branch if <= 0, namely if amount >= 32
+        mov r0, r0, lsr r2
+        orr r0, r1, lsl r3
+        mov r1, r1, lsr r2
+        bx lr
+1:
+        sub r2, r2, #32
+        mov r0, r1, lsr r2
+        mov r1, #0
+        bx lr
+	.type __i64_shr, %function
+	.size __i64_shr, . - __i64_shr
+	
+@@@ Shift right signed
+
+	.global __i64_sar
+__i64_sar:
+        and r2, r2, #63         @ normalize amount to 0...63
+        rsbs r3, r2, #32        @ r3 = 32 - amount
+        ble 1f                  @ branch if <= 0, namely if amount >= 32
+        mov r0, r0, lsr r2
+        orr r0, r1, lsl r3
+        mov r1, r1, asr r2
+        bx lr
+1:
+        sub r2, r2, #32
+        mov r0, r1, asr r2
+        mov r1, r1, asr #31
+        bx lr
+	.type __i64_sar, %function
+	.size __i64_sar, . - __i64_sar
+	
+@@@ Multiplication
+	
+	.global __i64_mul
+__i64_mul:
+        push {r4, r5}
+        mov r4, r0              @ save first arg in r4,r5
+        mov r5, r1
+        umull r0, r1, r2, r4    @ 64-bit product of low halves
+        mla r1, r2, r5, r1      @ add 32-bit products low half * high half
+        mla r1, r3, r4, r1      @ to high half of result
+        pop {r4, r5}
+        bx lr
+	.type __i64_mul, %function
+	.size __i64_mul, . - __i64_mul
+	
+@@@ Auxiliary function for division and modulus. Not exported.
+
+@ On entry:  N = (r0, r1) numerator    D = (r2, r3) divisor	
+@ On exit:   Q = (r4, r5) quotient     R = (r0, r1) remainder	
+@ Locals:    M = (r6, r7) mask         TMP = r8 temporary	
+	
+__i64_udivmod:
+        orrs r8, r2, r3         @ is D == 0?
+        bxeq lr                 @ if so, return with unspecified results
+        mov r4, #0              @ Q = 0
+        mov r5, #0
+        mov r6, #1              @ M = 1
+        mov r7, #0
+1:      cmp r3, #0              @ while ((signed) D >= 0) ...
+        blt 2f
+        subs r8, r0, r2         @ ... and N >= D ...
+        sbcs r8, r1, r3
+        blo 2f
+        adds r2, r2, r2         @ D = D << 1
+        adc r3, r3, r3
+        adds r6, r6, r6         @ M = M << 1
+        adc r7, r7, r7
+        b 1b
+2:      subs r0, r0, r2         @ N = N - D
+        sbcs r1, r1, r3
+        orr r4, r4, r6          @ Q = Q | M
+        orr r5, r5, r7
+        bhs 3f                  @ if N was >= D, continue
+        adds r0, r0, r2         @ otherwise, undo what we just did
+        adc r1, r1, r3          @ N = N + D
+        bic r4, r4, r6          @ Q = Q & ~M
+        bic r5, r5, r7
+3:      movs r7, r7, lsr #1     @ M = M >> 1
+        mov r6, r6, rrx
+        movs r3, r3, lsr #1     @ D = D >> 1
+        mov r2, r2, rrx
+        orrs r8, r6, r7         @ repeat while (M != 0) ...
+        bne 2b
+        bx lr
+	
+@@@ Unsigned division	
+	
+	.global __i64_udiv
+__i64_udiv:
+        push {r4, r5, r6, r7, r8, lr}
+        bl __i64_udivmod
+        mov r0, r4
+        mov r1, r5
+        pop {r4, r5, r6, r7, r8, lr}
+        bx lr
+	.type __i64_udiv, %function
+	.size __i64_udiv, . - __i64_udiv
+	
+@@@ Unsigned modulus	
+	
+	.global __i64_umod
+__i64_umod:
+        push {r4, r5, r6, r7, r8, lr}
+        bl __i64_udivmod         @ remainder is already in r0,r1
+        pop {r4, r5, r6, r7, r8, lr}
+        bx lr
+	.type __i64_umod, %function
+	.size __i64_umod, . - __i64_umod
+	
+	.global __i64_sdiv
+__i64_sdiv:
+        push {r4, r5, r6, r7, r8, r10, lr}
+        eor r10, r1, r3          @ r10 = sign of result
+        mov r4, r1, asr #31      @ take absolute value of N
+        eor r0, r0, r4           @ N = (N ^ (N >>s 31)) - (N >>s 31)
+        eor r1, r1, r4
+        subs r0, r0, r4
+        sbc r1, r1, r4
+        mov r4, r3, asr #31      @ take absolute value of D
+        eor r2, r2, r4
+        eor r3, r3, r4
+        subs r2, r2, r4
+        sbc r3, r3, r4
+        bl __i64_udivmod         @ do unsigned division
+        mov r0, r4
+        mov r1, r5
+        eor r0, r0, r10, asr#31  @ apply expected sign
+        eor r1, r1, r10, asr#31
+        subs r0, r0, r10, asr#31
+        sbc r1, r1, r10, asr#31
+        pop {r4, r5, r6, r7, r8, r10, lr}
+        bx lr
+	.type __i64_sdiv, %function
+	.size __i64_sdiv, . - __i64_sdiv
+	
+@@@ Signed modulus	
+
+	.global __i64_smod
+__i64_smod:
+        push {r4, r5, r6, r7, r8, r10, lr}
+	mov r10, r1              @ r10 = sign of result
+        mov r4, r1, asr#31       @ take absolute value of N
+        eor r0, r0, r4           @ N = (N ^ (N >>s 31)) - (N >>s 31)
+        eor r1, r1, r4
+        subs r0, r0, r4
+        sbc r1, r1, r4
+        mov r4, r3, asr #31      @ take absolute value of D
+        eor r2, r2, r4
+        eor r3, r3, r4
+        subs r2, r2, r4
+        sbc r3, r3, r4
+        bl __i64_udivmod         @ do unsigned division
+        eor r0, r0, r10, asr#31  @ apply expected sign
+        eor r1, r1, r10, asr#31
+        subs r0, r0, r10, asr#31
+        sbc r1, r1, r10, asr#31
+        pop {r4, r5, r6, r7, r8, r10, lr}
+        bx lr
+	.type __i64_smod, %function
+	.size __i64_smod, . - __i64_smod
+	
+@@@ Conversion from unsigned 64-bit integer to double float
+	
+	.global __i64_utod
+__i64_utod:
+        fmsr s0, r0
+        fuitod d0, s0           @ convert low half to double (unsigned)
+        fmsr s2, r1
+        fuitod d1, s2           @ convert high half to double (unsigned)
+        fldd d2, .LC1           @ d2 = 2^32
+        fmacd d0, d1, d2        @ d0 = d0 + d1 * d2 = double value of int64
+	fmrrd r0, r1, d0        @ return result in r0, r1
+        bx lr
+	.type __i64_utod, %function
+	.size __i64_utod, . - __i64_utod
+	
+        .balign 8
+.LC1:   .quad 0x41f0000000000000 @ 2^32 in double precision
+	
+@@@ Conversion from signed 64-bit integer to double float
+	
+	.global __i64_stod
+__i64_stod:
+        fmsr s0, r0
+        fuitod d0, s0           @ convert low half to double (unsigned)
+        fmsr s2, r1
+        fsitod d1, s2           @ convert high half to double (signed)
+        fldd d2, .LC1           @ d2 = 2^32
+        fmacd d0, d1, d2        @ d0 = d0 + d1 * d2 = double value of int64
+	fmrrd r0, r1, d0        @ return result in r0, r1
+        bx lr
+	.type __i64_stod, %function
+	.size __i64_stod, . - __i64_stod
+
+@@@ Conversion from double float to unsigned 64-bit integer	
+
+	.global __i64_dtou
+__i64_dtou:
+        cmp r1, #0              @ is double < 0 ?
+        blt 1f                  @ then it converts to 0
+  @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
+  @ note: 1023 + 52 = 1075 = 1024 + 51
+  @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
+        mov r2, r1, lsl #1
+        mov r2, r2, lsr #21
+        sub r2, r2, #51
+        sub r2, r2, #1024
+  @ check range of exponent
+        cmn r2, #52             @ if EXP < -52, double is < 1.0
+        blt 1f
+        cmp r2, #12             @ if EXP >= 64 - 52, double is >= 2^64
+        bge 2f
+  @ extract true mantissa
+        bic r1, r1, #0xFF000000
+        bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+        orr r1, r1, #0x00100000 @ HI |= 0x00100000
+  @ shift it appropriately
+        cmp r2, #0
+        bge __i64_shl           @ if EXP >= 0, shift left by EXP
+        rsb r2, r2, #0
+        b __i64_shr             @ otherwise, shift right by -EXP
+  @ special cases
+1:      mov r0, #0              @ result is 0
+        mov r1, #0
+        bx lr
+2:      mvn r0, #0              @ result is 0xFF....FF (MAX_UINT)
+        mvn r1, #0
+        bx lr
+	.type __i64_dtou, %function
+        .size __i64_dtou, . - __i64_dtou
+	
+@@@ Conversion from double float to signed 64-bit integer
+	
+	.global __i64_dtos
+__i64_dtos:
+        push {r4, lr}
+        mov r4, r1, asr #31     @ save sign in r4
+  @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
+  @ note: 1023 + 52 = 1075 = 1024 + 51
+  @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
+        mov r2, r1, lsl #1
+        mov r2, r2, lsr #21
+        sub r2, r2, #51
+        sub r2, r2, #1024
+  @ check range of exponent
+        cmn r2, #52             @ if EXP < -52, |double| is < 1.0
+        blt 1f
+        cmp r2, #11             @ if EXP >= 63 - 52, |double| is >= 2^63
+        bge 2f
+  @ extract true mantissa
+        bic r1, r1, #0xFF000000
+        bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+        orr r1, r1, #0x00100000 @ HI |= 0x00100000
+  @ shift it appropriately
+        cmp r2, #0
+        blt 3f
+        bl __i64_shl            @ if EXP >= 0, shift left by EXP
+        b 4f
+3:      rsb r2, r2, #0
+        bl __i64_shr            @ otherwise, shift right by -EXP
+  @ apply sign to result
+4:      eor r0, r0, r4
+        eor r1, r1, r4
+        subs r0, r0, r4
+        sbc r1, r1, r4
+        pop {r4, lr}
+        bx lr
+  @ special cases
+1:      mov r0, #0 @ result is 0
+        mov r1, #0
+        pop {r4, lr}
+        bx lr
+2:      cmp r4, #0
+        blt 6f
+        mvn r0, #0              @ result is 0x7F....FF (MAX_SINT)
+        mov r1, r0, lsr #1
+        pop {r4, lr}
+        bx lr
+6:      mov r0, #0              @ result is 0x80....00 (MIN_SINT)
+        mov r1, #0x80000000
+        pop {r4, lr}
+        bx lr
+	.type __i64_dtos, %function
+        .size __i64_dtos, . - __i64_dtos
+
-- 
cgit