From 255cee09b71255051c2b40eae0c88bffce1f6f32 Mon Sep 17 00:00:00 2001 From: xleroy Date: Sat, 20 Apr 2013 07:54:52 +0000 Subject: Big merge of the newregalloc-int64 branch. Lots of changes in two directions: 1- new register allocator (+ live range splitting, spilling&reloading, etc) based on a posteriori validation using the Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long long"). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- runtime/arm/int64.s | 424 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 424 insertions(+) create mode 100644 runtime/arm/int64.s (limited to 'runtime/arm') diff --git a/runtime/arm/int64.s b/runtime/arm/int64.s new file mode 100644 index 00000000..6b033510 --- /dev/null +++ b/runtime/arm/int64.s @@ -0,0 +1,424 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +@ Calling conventions for R = F(X) or R = F(X,Y): +@ one or two long arguments: XL in r0, XH in r1, YL in r2, YH in r3 +@ one long argument, one int: XL in r0, XH in r1, Y in r2 +@ one float argument: X in r0, r1 +@ one long result: RL in r0, RH in r1 +@ one float result: R in r0, r1 +@ This is a little-endian convention: the low word is in the +@ low-numbered register. +@ Can use r0...r3 and f0...f7 as temporary registers (caller-save) + + .text + +@@@ Unsigned comparison + + .global __i64_ucmp +__i64_ucmp: + cmp r1, r3 @ compare high words + cmpeq r0, r2 @ if equal, compare low words instead + moveq r0, #0 @ res = 0 if eq + movhi r0, #1 @ res = 1 if unsigned higher + mvnlo r0, #0 @ res = -1 if unsigned lower + bx lr + .type __i64_ucmp, %function + .size __i64_ucmp, . - __i64_ucmp + +@@@ Signed comparison + + .global __i64_scmp +__i64_scmp: + cmp r0, r2 @ compare low words (unsigned) + moveq r0, #0 @ res = 0 if eq + movhi r0, #1 @ res = 1 if unsigned higher + mvnlo r0, #0 @ res = -1 if unsigned lower + cmp r1, r3 @ compare high words (signed) + addgt r0, r0, #2 @ res += 2 if signed greater + sublt r0, r0, #2 @ res -= 2 if signed less + @ here, r0 = 0 if X == Y + @ r0 = -3, -2, -1 if X < Y + @ r0 = 1, 2, 3 if X > Y + bx lr + .type __i64_scmp, %function + .size __i64_scmp, . - __i64_scmp + +@@@ Opposite + + .global __i64_neg +__i64_neg: + rsbs r0, r0, #0 + rsc r1, r1, #0 + bx lr + .type __i64_neg, %function + .size __i64_neg, . - __i64_neg + +@@@ Addition + + .global __i64_add +__i64_add: + adds r0, r0, r2 + adc r1, r1, r3 + bx lr + .type __i64_add, %function + .size __i64_add, . - __i64_add + +@@@ Subtraction + + .global __i64_sub +__i64_sub: + subs r0, r0, r2 + sbc r1, r1, r3 + bx lr + .type __i64_sub, %function + .size __i64_sub, . - __i64_sub + +@ Note on ARM shifts: the shift amount is taken modulo 256. +@ Therefore, unsigned shifts by 32 bits or more produce 0. + +@@@ Shift left + + .global __i64_shl +__i64_shl: + and r2, r2, #63 @ normalize amount to 0...63 + rsbs r3, r2, #32 @ r3 = 32 - amount + ble 1f @ branch if <= 0, namely if amount >= 32 + mov r1, r1, lsl r2 + orr r1, r0, lsr r3 + mov r0, r0, lsl r2 + bx lr +1: + sub r2, r2, #32 + mov r1, r0, lsl r2 + mov r0, #0 + bx lr + .type __i64_shl, %function + .size __i64_shl, . - __i64_shl + +@@@ Shift right unsigned + + .global __i64_shr +__i64_shr: + and r2, r2, #63 @ normalize amount to 0...63 + rsbs r3, r2, #32 @ r3 = 32 - amount + ble 1f @ branch if <= 0, namely if amount >= 32 + mov r0, r0, lsr r2 + orr r0, r1, lsl r3 + mov r1, r1, lsr r2 + bx lr +1: + sub r2, r2, #32 + mov r0, r1, lsr r2 + mov r1, #0 + bx lr + .type __i64_shr, %function + .size __i64_shr, . - __i64_shr + +@@@ Shift right signed + + .global __i64_sar +__i64_sar: + and r2, r2, #63 @ normalize amount to 0...63 + rsbs r3, r2, #32 @ r3 = 32 - amount + ble 1f @ branch if <= 0, namely if amount >= 32 + mov r0, r0, lsr r2 + orr r0, r1, lsl r3 + mov r1, r1, asr r2 + bx lr +1: + sub r2, r2, #32 + mov r0, r1, asr r2 + mov r1, r1, asr #31 + bx lr + .type __i64_sar, %function + .size __i64_sar, . - __i64_sar + +@@@ Multiplication + + .global __i64_mul +__i64_mul: + push {r4, r5} + mov r4, r0 @ save first arg in r4,r5 + mov r5, r1 + umull r0, r1, r2, r4 @ 64-bit product of low halves + mla r1, r2, r5, r1 @ add 32-bit products low half * high half + mla r1, r3, r4, r1 @ to high half of result + pop {r4, r5} + bx lr + .type __i64_mul, %function + .size __i64_mul, . - __i64_mul + +@@@ Auxiliary function for division and modulus. Not exported. + +@ On entry: N = (r0, r1) numerator D = (r2, r3) divisor +@ On exit: Q = (r4, r5) quotient R = (r0, r1) remainder +@ Locals: M = (r6, r7) mask TMP = r8 temporary + +__i64_udivmod: + orrs r8, r2, r3 @ is D == 0? + bxeq lr @ if so, return with unspecified results + mov r4, #0 @ Q = 0 + mov r5, #0 + mov r6, #1 @ M = 1 + mov r7, #0 +1: cmp r3, #0 @ while ((signed) D >= 0) ... + blt 2f + subs r8, r0, r2 @ ... and N >= D ... + sbcs r8, r1, r3 + blo 2f + adds r2, r2, r2 @ D = D << 1 + adc r3, r3, r3 + adds r6, r6, r6 @ M = M << 1 + adc r7, r7, r7 + b 1b +2: subs r0, r0, r2 @ N = N - D + sbcs r1, r1, r3 + orr r4, r4, r6 @ Q = Q | M + orr r5, r5, r7 + bhs 3f @ if N was >= D, continue + adds r0, r0, r2 @ otherwise, undo what we just did + adc r1, r1, r3 @ N = N + D + bic r4, r4, r6 @ Q = Q & ~M + bic r5, r5, r7 +3: movs r7, r7, lsr #1 @ M = M >> 1 + mov r6, r6, rrx + movs r3, r3, lsr #1 @ D = D >> 1 + mov r2, r2, rrx + orrs r8, r6, r7 @ repeat while (M != 0) ... + bne 2b + bx lr + +@@@ Unsigned division + + .global __i64_udiv +__i64_udiv: + push {r4, r5, r6, r7, r8, lr} + bl __i64_udivmod + mov r0, r4 + mov r1, r5 + pop {r4, r5, r6, r7, r8, lr} + bx lr + .type __i64_udiv, %function + .size __i64_udiv, . - __i64_udiv + +@@@ Unsigned modulus + + .global __i64_umod +__i64_umod: + push {r4, r5, r6, r7, r8, lr} + bl __i64_udivmod @ remainder is already in r0,r1 + pop {r4, r5, r6, r7, r8, lr} + bx lr + .type __i64_umod, %function + .size __i64_umod, . - __i64_umod + + .global __i64_sdiv +__i64_sdiv: + push {r4, r5, r6, r7, r8, r10, lr} + eor r10, r1, r3 @ r10 = sign of result + mov r4, r1, asr #31 @ take absolute value of N + eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31) + eor r1, r1, r4 + subs r0, r0, r4 + sbc r1, r1, r4 + mov r4, r3, asr #31 @ take absolute value of D + eor r2, r2, r4 + eor r3, r3, r4 + subs r2, r2, r4 + sbc r3, r3, r4 + bl __i64_udivmod @ do unsigned division + mov r0, r4 + mov r1, r5 + eor r0, r0, r10, asr#31 @ apply expected sign + eor r1, r1, r10, asr#31 + subs r0, r0, r10, asr#31 + sbc r1, r1, r10, asr#31 + pop {r4, r5, r6, r7, r8, r10, lr} + bx lr + .type __i64_sdiv, %function + .size __i64_sdiv, . - __i64_sdiv + +@@@ Signed modulus + + .global __i64_smod +__i64_smod: + push {r4, r5, r6, r7, r8, r10, lr} + mov r10, r1 @ r10 = sign of result + mov r4, r1, asr#31 @ take absolute value of N + eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31) + eor r1, r1, r4 + subs r0, r0, r4 + sbc r1, r1, r4 + mov r4, r3, asr #31 @ take absolute value of D + eor r2, r2, r4 + eor r3, r3, r4 + subs r2, r2, r4 + sbc r3, r3, r4 + bl __i64_udivmod @ do unsigned division + eor r0, r0, r10, asr#31 @ apply expected sign + eor r1, r1, r10, asr#31 + subs r0, r0, r10, asr#31 + sbc r1, r1, r10, asr#31 + pop {r4, r5, r6, r7, r8, r10, lr} + bx lr + .type __i64_smod, %function + .size __i64_smod, . - __i64_smod + +@@@ Conversion from unsigned 64-bit integer to double float + + .global __i64_utod +__i64_utod: + fmsr s0, r0 + fuitod d0, s0 @ convert low half to double (unsigned) + fmsr s2, r1 + fuitod d1, s2 @ convert high half to double (unsigned) + fldd d2, .LC1 @ d2 = 2^32 + fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 + fmrrd r0, r1, d0 @ return result in r0, r1 + bx lr + .type __i64_utod, %function + .size __i64_utod, . - __i64_utod + + .balign 8 +.LC1: .quad 0x41f0000000000000 @ 2^32 in double precision + +@@@ Conversion from signed 64-bit integer to double float + + .global __i64_stod +__i64_stod: + fmsr s0, r0 + fuitod d0, s0 @ convert low half to double (unsigned) + fmsr s2, r1 + fsitod d1, s2 @ convert high half to double (signed) + fldd d2, .LC1 @ d2 = 2^32 + fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 + fmrrd r0, r1, d0 @ return result in r0, r1 + bx lr + .type __i64_stod, %function + .size __i64_stod, . - __i64_stod + +@@@ Conversion from double float to unsigned 64-bit integer + + .global __i64_dtou +__i64_dtou: + cmp r1, #0 @ is double < 0 ? + blt 1f @ then it converts to 0 + @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2 + @ note: 1023 + 52 = 1075 = 1024 + 51 + @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21 + mov r2, r1, lsl #1 + mov r2, r2, lsr #21 + sub r2, r2, #51 + sub r2, r2, #1024 + @ check range of exponent + cmn r2, #52 @ if EXP < -52, double is < 1.0 + blt 1f + cmp r2, #12 @ if EXP >= 64 - 52, double is >= 2^64 + bge 2f + @ extract true mantissa + bic r1, r1, #0xFF000000 + bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000 + orr r1, r1, #0x00100000 @ HI |= 0x00100000 + @ shift it appropriately + cmp r2, #0 + bge __i64_shl @ if EXP >= 0, shift left by EXP + rsb r2, r2, #0 + b __i64_shr @ otherwise, shift right by -EXP + @ special cases +1: mov r0, #0 @ result is 0 + mov r1, #0 + bx lr +2: mvn r0, #0 @ result is 0xFF....FF (MAX_UINT) + mvn r1, #0 + bx lr + .type __i64_dtou, %function + .size __i64_dtou, . - __i64_dtou + +@@@ Conversion from double float to signed 64-bit integer + + .global __i64_dtos +__i64_dtos: + push {r4, lr} + mov r4, r1, asr #31 @ save sign in r4 + @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2 + @ note: 1023 + 52 = 1075 = 1024 + 51 + @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21 + mov r2, r1, lsl #1 + mov r2, r2, lsr #21 + sub r2, r2, #51 + sub r2, r2, #1024 + @ check range of exponent + cmn r2, #52 @ if EXP < -52, |double| is < 1.0 + blt 1f + cmp r2, #11 @ if EXP >= 63 - 52, |double| is >= 2^63 + bge 2f + @ extract true mantissa + bic r1, r1, #0xFF000000 + bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000 + orr r1, r1, #0x00100000 @ HI |= 0x00100000 + @ shift it appropriately + cmp r2, #0 + blt 3f + bl __i64_shl @ if EXP >= 0, shift left by EXP + b 4f +3: rsb r2, r2, #0 + bl __i64_shr @ otherwise, shift right by -EXP + @ apply sign to result +4: eor r0, r0, r4 + eor r1, r1, r4 + subs r0, r0, r4 + sbc r1, r1, r4 + pop {r4, lr} + bx lr + @ special cases +1: mov r0, #0 @ result is 0 + mov r1, #0 + pop {r4, lr} + bx lr +2: cmp r4, #0 + blt 6f + mvn r0, #0 @ result is 0x7F....FF (MAX_SINT) + mov r1, r0, lsr #1 + pop {r4, lr} + bx lr +6: mov r0, #0 @ result is 0x80....00 (MIN_SINT) + mov r1, #0x80000000 + pop {r4, lr} + bx lr + .type __i64_dtos, %function + .size __i64_dtos, . - __i64_dtos + -- cgit