From b257a6d283f6f5784cb351856b5dbe8c645a1f6f Mon Sep 17 00:00:00 2001 From: xleroy Date: Sun, 5 May 2013 16:29:20 +0000 Subject: ia32/i64_dtou: wrong play on rounding mode arm, powerpc: expand shifts inline in dtos and dtou arm: branchless code for shl and shr test: more tests for double -> long long conversions. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2234 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- runtime/arm/i64_dtos.S | 33 +++++++++++++++++++-------------- runtime/arm/i64_dtou.S | 19 ++++++++++++++++--- runtime/arm/i64_shl.s | 30 +++++++++++++++++++++--------- runtime/arm/i64_shr.s | 31 +++++++++++++++++++++++-------- 4 files changed, 79 insertions(+), 34 deletions(-) (limited to 'runtime/arm') diff --git a/runtime/arm/i64_dtos.S b/runtime/arm/i64_dtos.S index d0ef0371..43748683 100644 --- a/runtime/arm/i64_dtos.S +++ b/runtime/arm/i64_dtos.S @@ -40,8 +40,7 @@ .global __i64_dtos __i64_dtos: - push {r4, lr} - mov r4, r1, asr #31 @ save sign in r4 + mov r12, r1, asr #31 @ save sign of result in r12 @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2 @ note: 1023 + 52 = 1075 = 1024 + 51 @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21 @@ -61,31 +60,37 @@ __i64_dtos: @ shift it appropriately cmp r2, #0 blt 3f - bl __i64_shl @ if EXP >= 0, shift left by EXP - b 4f -3: rsb r2, r2, #0 - bl __i64_shr @ otherwise, shift right by -EXP + @ EXP >= 0: shift left by EXP. Note that EXP < 12 + rsb r3, r2, #32 @ r3 = 32 - amount + mov r1, r1, lsl r2 + orr r1, r1, r0, lsr r3 + mov r0, r0, lsl r2 + b 4f + @ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32 +3: rsb r2, r2, #0 @ r2 = -EXP - shift amount + rsb r3, r2, #32 @ r3 = 32 - amount + mov r0, r0, lsr r2 + orr r0, r0, r1, lsl r3 + sub r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s) + orr r0, r0, r1, lsr r3 + mov r1, r1, lsr r2 @ apply sign to result -4: eor r0, r0, r4 - eor r1, r1, r4 - subs r0, r0, r4 - sbc r1, r1, r4 - pop {r4, lr} +4: eor r0, r0, r12 + eor r1, r1, r12 + subs r0, r0, r12 + sbc r1, r1, r12 bx lr @ special cases 1: mov r0, #0 @ result is 0 mov r1, #0 - pop {r4, lr} bx lr 2: cmp r4, #0 blt 6f mvn r0, #0 @ result is 0x7F....FF (MAX_SINT) mov r1, r0, lsr #1 - pop {r4, lr} bx lr 6: mov r0, #0 @ result is 0x80....00 (MIN_SINT) mov r1, #0x80000000 - pop {r4, lr} bx lr .type __i64_dtos, %function .size __i64_dtos, . - __i64_dtos diff --git a/runtime/arm/i64_dtou.S b/runtime/arm/i64_dtou.S index 7f6152e1..50648a5d 100644 --- a/runtime/arm/i64_dtou.S +++ b/runtime/arm/i64_dtou.S @@ -60,9 +60,22 @@ __i64_dtou: orr r1, r1, #0x00100000 @ HI |= 0x00100000 @ shift it appropriately cmp r2, #0 - bge __i64_shl @ if EXP >= 0, shift left by EXP - rsb r2, r2, #0 - b __i64_shr @ otherwise, shift right by -EXP + blt 3f + @ EXP >= 0: shift left by EXP. Note that EXP < 12 + rsb r3, r2, #32 @ r3 = 32 - amount + mov r1, r1, lsl r2 + orr r1, r1, r0, lsr r3 + mov r0, r0, lsl r2 + bx lr + @ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32 +3: rsb r2, r2, #0 @ r2 = -EXP - shift amount + rsb r3, r2, #32 @ r3 = 32 - amount + mov r0, r0, lsr r2 + orr r0, r0, r1, lsl r3 + sub r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s) + orr r0, r0, r1, lsr r3 + mov r1, r1, lsr r2 + bx lr @ special cases 1: mov r0, #0 @ result is 0 mov r1, #0 diff --git a/runtime/arm/i64_shl.s b/runtime/arm/i64_shl.s index 8014f884..afd92dbe 100644 --- a/runtime/arm/i64_shl.s +++ b/runtime/arm/i64_shl.s @@ -39,22 +39,34 @@ @@@ Shift left @ Note on ARM shifts: the shift amount is taken modulo 256. -@ Therefore, unsigned shifts by 32 bits or more produce 0. +@ If shift amount mod 256 >= 32, the shift produces 0. + +@ Algorithm: +@ RH = (XH << N) | (XL >> (32-N) | (XL << (N-32)) +@ RL = XL << N +@ If N = 0: +@ RH = XH | 0 | 0 +@ RL = XL +@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255 +@ RH = (XH << N) | (XL >> (32-N) | 0 +@ RL = XL << N +@ If N = 32: +@ RH = 0 | XL | 0 +@ RL = 0 +@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31 +@ RH = 0 | 0 | (XL << (N-32)) +@ RL = 0 .global __i64_shl __i64_shl: and r2, r2, #63 @ normalize amount to 0...63 - rsbs r3, r2, #32 @ r3 = 32 - amount - ble 1f @ branch if <= 0, namely if amount >= 32 + rsb r3, r2, #32 @ r3 = 32 - amount mov r1, r1, lsl r2 - orr r1, r0, lsr r3 + orr r1, r1, r0, lsr r3 + sub r3, r2, #32 @ r3 = amount - 32 + orr r1, r1, r0, lsl r3 mov r0, r0, lsl r2 bx lr -1: - sub r2, r2, #32 - mov r1, r0, lsl r2 - mov r0, #0 - bx lr .type __i64_shl, %function .size __i64_shl, . - __i64_shl diff --git a/runtime/arm/i64_shr.s b/runtime/arm/i64_shr.s index f10b770a..9d604417 100644 --- a/runtime/arm/i64_shr.s +++ b/runtime/arm/i64_shr.s @@ -38,20 +38,35 @@ @@@ Shift right unsigned +@ Note on ARM shifts: the shift amount is taken modulo 256. +@ If shift amount mod 256 >= 32, the shift produces 0. + +@ Algorithm: +@ RL = (XL >> N) | (XH << (32-N) | (XH >> (N-32)) +@ RH = XH >> N +@ If N = 0: +@ RL = XL | 0 | 0 +@ RH = XH +@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255 +@ RL = (XL >> N) | (XH >> (32-N) | 0 +@ RH = XH >> N +@ If N = 32: +@ RL = 0 | XH | 0 +@ RH = 0 +@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31 +@ RL = 0 | 0 | (XH >> (N-32)) +@ RH = 0 + .global __i64_shr __i64_shr: and r2, r2, #63 @ normalize amount to 0...63 - rsbs r3, r2, #32 @ r3 = 32 - amount - ble 1f @ branch if <= 0, namely if amount >= 32 + rsb r3, r2, #32 @ r3 = 32 - amount mov r0, r0, lsr r2 - orr r0, r1, lsl r3 + orr r0, r0, r1, lsl r3 + sub r3, r2, #32 @ r3 = amount - 32 + orr r0, r0, r1, lsr r3 mov r1, r1, lsr r2 bx lr -1: - sub r2, r2, #32 - mov r0, r1, lsr r2 - mov r1, #0 - bx lr .type __i64_shr, %function .size __i64_shr, . - __i64_shr -- cgit