diff options
author | xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> | 2013-07-03 11:28:17 +0000 |
---|---|---|
committer | xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> | 2013-07-03 11:28:17 +0000 |
commit | 14ae5ba40c3217f7410c377bf36e21509b01eb8f (patch) | |
tree | f1d57929fb4310b6e8bdf8bf2edb3718c1169be9 /runtime/powerpc/i64_sdiv.s | |
parent | 67976ff28a03fc57690ad792fd5e515010f803a5 (diff) | |
download | compcert-14ae5ba40c3217f7410c377bf36e21509b01eb8f.tar.gz compcert-14ae5ba40c3217f7410c377bf36e21509b01eb8f.zip |
powerpc: faster implementation of long division modeled on that for IA32
test: add one test (2^64-1) / (2^32+3) to exercise a special case of
this long division.
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2288 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'runtime/powerpc/i64_sdiv.s')
-rw-r--r-- | runtime/powerpc/i64_sdiv.s | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/runtime/powerpc/i64_sdiv.s b/runtime/powerpc/i64_sdiv.s index f5225061..411ad50c 100644 --- a/runtime/powerpc/i64_sdiv.s +++ b/runtime/powerpc/i64_sdiv.s @@ -41,8 +41,10 @@ .balign 16 .globl __i64_sdiv __i64_sdiv: - mflr r11 # save return address - xor r12, r3, r5 # save sign of result in r12 (top bit) + mflr r0 + stw r0, 4(r1) # save return address in caller's frame + xor r0, r3, r5 # compute sign of result (top bit) + mtctr r0 # save it in CTR (why not?) srawi r0, r3, 31 # take absolute value of N xor r4, r4, r0 # (i.e. N = N ^ r0 - r0, xor r3, r3, r0 # where r0 = 0 if N >= 0 and r0 = -1 if N < 0) @@ -54,12 +56,14 @@ __i64_sdiv: subfc r6, r0, r6 subfe r5, r0, r5 bl __i64_udivmod # do unsigned division - mtlr r11 # restore return address - srawi r0, r12, 31 # apply expected sign to quotient - xor r8, r8, r0 # RES = Q if r12 >= 0, -Q if r12 < 0 - xor r7, r7, r0 - subfc r4, r0, r8 - subfe r3, r0, r7 + lwz r0, 4(r1) + mtlr r0 # restore return address + mfctr r0 + srawi r0, r0, 31 # apply expected sign to quotient + xor r6, r6, r0 # RES = Q if CTR >= 0, -Q if CTR < 0 + xor r5, r5, r0 + subfc r4, r0, r6 + subfe r3, r0, r5 blr .type __i64_sdiv, @function .size __i64_sdiv, .-__i64_sdiv |