From fcd76683e5cc5f8d4b9a2179df6ea1dc9614c39a Mon Sep 17 00:00:00 2001 From: David Monniaux Date: Sat, 6 Apr 2019 10:54:18 +0200 Subject: various hand optimizations --- .../binary_search.ccomp.k1c.s.optimized07 | 372 +++++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 test/monniaux/binary_search/binary_search.ccomp.k1c.s.optimized07 (limited to 'test/monniaux/binary_search/binary_search.ccomp.k1c.s.optimized07') diff --git a/test/monniaux/binary_search/binary_search.ccomp.k1c.s.optimized07 b/test/monniaux/binary_search/binary_search.ccomp.k1c.s.optimized07 new file mode 100644 index 00000000..3214c5bc --- /dev/null +++ b/test/monniaux/binary_search/binary_search.ccomp.k1c.s.optimized07 @@ -0,0 +1,372 @@ +# File generated by CompCert 3.5 +# Command line: -O3 -Wall -Wno-c11-extensions -fno-unprototyped -S binary_search.c -o binary_search.ccomp.k1c.s + .section .rodata + .balign 1 +__stringlit_1: + .ascii "position1: %d\012position2: %d\012position3: %d\012random fill cycles: %lu\012search1 cycles: %lu\012search2 cycles: %lu\012search3 cycles: %lu\012\000" + .type __stringlit_1, @object + .size __stringlit_1, . - __stringlit_1 + .text + .balign 2 + .globl my_bsearch +my_bsearch: + addd $r17 = $r12, 0 + addd $r12 = $r12, -16 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + make $r4, 0 + addw $r6 = $r1, -1 +;; +.L100: + addw $r5 = $r4, $r6 +;; + srlw $r3 = $r5, 1 +;; + zxwd $r5 = $r3 +;; + slld $r8 = $r5, 2 +;; + lws $r1 = $r8[$r0] +;; + compw.eq $r32 = $r1, $r2 +;; + cb.wnez $r32? .L101 +;; + compw.lt $r32 = $r1, $r2 +;; + cb.wnez $r32? .L102 +;; + addw $r6 = $r3, -1 + goto .L103 +;; +.L102: + addw $r4 = $r3, 1 +;; +.L103: + compw.leu $r32 = $r4, $r6 +;; + cb.wnez $r32? .L100 +;; + make $r0, -1 + goto .L104 +;; +.L101: + addd $r0 = $r3, 0 +;; +.L104: + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 16 +;; + ret +;; + .type my_bsearch, @function + .size my_bsearch, . - my_bsearch + .text + .balign 2 + .globl my_bsearch2 +my_bsearch2: + addd $r17 = $r12, 0 + addd $r12 = $r12, -16 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + make $r5, 0 + addw $r4 = $r1, -1 +;; +.L105: + addw $r9 = $r5, $r4 +;; + srlw $r1 = $r9, 1 +;; + zxwd $r10 = $r1 +;; + slld $r8 = $r10, 2 +;; + lws $r7 = $r8[$r0] +;; + compw.ne $r32 = $r7, $r2 +;; + cb.wnez $r32? .L106 +;; + addd $r0 = $r1, 0 + goto .L107 +;; +.L106: + compw.lt $r3 = $r7, $r2 + addw $r6 = $r1, 1 + addw $r1 = $r1, -1 +;; + cmoved.wnez $r3? $r5 = $r6 + compw.gt $r6 = $r7, $r2 +;; + cmoved.wnez $r6? $r4 = $r1 +;; + compw.leu $r32 = $r5, $r4 +;; + cb.wnez $r32? .L105 +;; + make $r0, -1 +;; +.L107: + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 16 +;; + ret +;; + .type my_bsearch2, @function + .size my_bsearch2, . - my_bsearch2 + .text + .balign 2 + .globl my_bsearch3 +my_bsearch3: + addd $r17 = $r12, 0 + addd $r12 = $r12, -16 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + make $r7, 0 + addw $r3 = $r1, -1 +;; +.L108: + addw $r11 = $r7, $r3 +;; + srlw $r6 = $r11, 1 +;; + addw $r1 = $r6, 1 + lws.xs $r4 = $r6[$r0] +;; + compw.lt $r5 = $r4, $r2 + compw.gt $r8 = $r4, $r2 + compw.eq $r32 = $r4, $r2 +;; + cmoved.wnez $r5? $r7 = $r1 + addw $r5 = $r6, -1 +;; + cmoved.wnez $r8? $r3 = $r5 + cb.wnez $r32? .L109 +;; + compw.leu $r32 = $r7, $r3 +;; + cb.wnez $r32? .L108 +;; + make $r6, -1 +;; +.L109: + addd $r0 = $r6, 0 + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 16 +;; + ret +;; + .type my_bsearch3, @function + .size my_bsearch3, . - my_bsearch3 + .text + .balign 2 + .globl random_ascending_fill +random_ascending_fill: + addd $r17 = $r12, 0 + addd $r12 = $r12, -16 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + make $r7, 41 + make $r8, 0 + make $r6, 0 +;; +.L110: + compw.geu $r32 = $r6, $r1 +;; + cb.wnez $r32? .L111 +;; + zxwd $r2 = $r6 + andw $r3 = $r7, 1073741824 +;; + slld $r4 = $r2, 2 +;; + sw $r4[$r0] = $r8 + addw $r8 = $r8, 1 + cb.weqz $r3? .L112 +;; + addw $r8 = $r8, 1 +;; +.L112: + mulw $r2 = $r7, 97 + addw $r6 = $r6, 1 +;; + addw $r7 = $r2, 5 + goto .L110 +;; +.L111: + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 16 +;; + ret +;; + .type random_ascending_fill, @function + .size random_ascending_fill, . - random_ascending_fill + .text + .balign 2 + .globl main +main: + addd $r17 = $r12, 0 + addd $r12 = $r12, -96 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + sd 16[$r12] = $r18 + make $r0, 20000 +;; + sd 24[$r12] = $r19 + make $r19, 1502 +;; + sd 32[$r12] = $r20 + make $r20, 5000 +;; + sd 40[$r12] = $r21 +;; + sd 48[$r12] = $r22 +;; + sd 56[$r12] = $r23 +;; + sd 64[$r12] = $r24 +;; + sd 72[$r12] = $r25 +;; + sd 80[$r12] = $r26 +;; + sd 88[$r12] = $r27 + call malloc +;; + addd $r21 = $r0, 0 + call get_current_cycle +;; + addd $r18 = $r0, 0 + addd $r1 = $r20, 0 + addd $r0 = $r21, 0 + call random_ascending_fill +;; + call get_current_cycle +;; + sbfd $r24 = $r18, $r0 + addd $r2 = $r19, 0 + addd $r1 = $r20, 0 + addd $r0 = $r21, 0 + call my_bsearch +;; + call get_current_cycle +;; + addd $r18 = $r0, 0 + addd $r2 = $r19, 0 + addd $r1 = $r20, 0 + addd $r0 = $r21, 0 + call my_bsearch +;; + addd $r26 = $r0, 0 + call get_current_cycle +;; + sbfd $r23 = $r18, $r0 + call get_current_cycle +;; + addd $r18 = $r0, 0 + addd $r2 = $r19, 0 + addd $r1 = $r20, 0 + addd $r0 = $r21, 0 + call my_bsearch2 +;; + addd $r25 = $r0, 0 + call get_current_cycle +;; + sbfd $r22 = $r18, $r0 + call get_current_cycle +;; + addd $r27 = $r0, 0 + addd $r2 = $r19, 0 + addd $r1 = $r20, 0 + addd $r0 = $r21, 0 + call my_bsearch3 +;; + addd $r18 = $r0, 0 + call get_current_cycle +;; + sbfd $r7 = $r27, $r0 + make $r0 = __stringlit_1 + addd $r6 = $r22, 0 + addd $r5 = $r23, 0 +;; + addd $r4 = $r24, 0 + addd $r3 = $r18, 0 + addd $r2 = $r25, 0 + addd $r1 = $r26, 0 + call printf +;; + addd $r0 = $r21, 0 + call free +;; + make $r0, 0 + ld $r16 = 8[$r12] +;; + ld $r18 = 16[$r12] +;; + ld $r19 = 24[$r12] +;; + ld $r20 = 32[$r12] + set $ra = $r16 +;; + ld $r21 = 40[$r12] +;; + ld $r22 = 48[$r12] +;; + ld $r23 = 56[$r12] +;; + ld $r24 = 64[$r12] +;; + ld $r25 = 72[$r12] +;; + ld $r26 = 80[$r12] +;; + ld $r27 = 88[$r12] +;; + addd $r12 = $r12, 96 +;; + ret +;; + .type main, @function + .size main, . - main -- cgit