diff options
Diffstat (limited to 'test/monniaux')
-rw-r--r-- | test/monniaux/bitsliced-aes/bs.ccomp.k1c.s.optimized | 3268 | ||||
-rw-r--r-- | test/monniaux/bitsliced-aes/notes.txt | 3 |
2 files changed, 3271 insertions, 0 deletions
diff --git a/test/monniaux/bitsliced-aes/bs.ccomp.k1c.s.optimized b/test/monniaux/bitsliced-aes/bs.ccomp.k1c.s.optimized new file mode 100644 index 00000000..d939f856 --- /dev/null +++ b/test/monniaux/bitsliced-aes/bs.ccomp.k1c.s.optimized @@ -0,0 +1,3268 @@ +# File generated by CompCert 3.5 +# Command line: -O3 -Wall -Wno-c11-extensions -fno-unprototyped -S bs.c -o bs.ccomp.k1c.s + .text + .balign 2 + .globl bs_addroundkey +bs_addroundkey: + addd $r17 = $r12, 0 + addd $r12 = $r12, -16 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + make $r5, 0 +;; +.L100: + sxwd $r6 = $r5 + addw $r5 = $r5, 1 + make $r32, 128 +;; + slld $r2 = $r6, 3 + compw.lt $r32 = $r5, $r32 +;; + addd $r3 = $r0, $r2 + addd $r4 = $r1, $r2 +;; + ld $r7 = 0[$r3] +;; + ld $r9 = 0[$r4] +;; + xord $r6 = $r7, $r9 +;; + sd 0[$r3] = $r6 +;; + cb.wnez $r32? .L100 +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 16 +;; + ret +;; + .type bs_addroundkey, @function + .size bs_addroundkey, . - bs_addroundkey + .text + .balign 2 + .globl bs_apply_sbox +bs_apply_sbox: + addd $r17 = $r12, 0 + addd $r12 = $r12, -32 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + sd 16[$r12] = $r18 + addd $r18 = $r0, 0 +;; + sd 24[$r12] = $r19 + make $r19, 0 +;; +.L101: + sxwd $r1 = $r19 +;; + slld $r0 = $r1, 3 +;; + addd $r0 = $r18, $r0 + call bs_sbox +;; + addw $r19 = $r19, 8 + make $r32, 128 +;; + compw.lt $r32 = $r19, $r32 +;; + cb.wnez $r32? .L101 +;; + ld $r18 = 16[$r12] +;; + ld $r19 = 24[$r12] +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 32 +;; + ret +;; + .type bs_apply_sbox, @function + .size bs_apply_sbox, . - bs_apply_sbox + .text + .balign 2 + .globl bs_apply_sbox_rev +bs_apply_sbox_rev: + addd $r17 = $r12, 0 + addd $r12 = $r12, -32 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + sd 16[$r12] = $r18 + addd $r18 = $r0, 0 +;; + sd 24[$r12] = $r19 + make $r19, 0 +;; +.L102: + sxwd $r1 = $r19 +;; + slld $r0 = $r1, 3 +;; + addd $r0 = $r18, $r0 + call bs_sbox_rev +;; + addw $r19 = $r19, 8 + make $r32, 128 +;; + compw.lt $r32 = $r19, $r32 +;; + cb.wnez $r32? .L102 +;; + ld $r18 = 16[$r12] +;; + ld $r19 = 24[$r12] +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 32 +;; + ret +;; + .type bs_apply_sbox_rev, @function + .size bs_apply_sbox_rev, . - bs_apply_sbox_rev + .text + .balign 2 + .globl bs_sbox_rev +bs_sbox_rev: + addd $r17 = $r12, 0 + addd $r12 = $r12, -96 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + sd 16[$r12] = $r18 +;; + sd 24[$r12] = $r19 +;; + ld $r7 = 48[$r0] +;; + ld $r3 = 56[$r0] +;; + ld $r4 = 32[$r0] + nxord $r40 = $r3, $r7 +;; + xord $r10 = $r3, $r4 + nxord $r11 = $r7, $r4 + ld $r2 = 8[$r0] +;; + ld $r5 = 24[$r0] + nxord $r45 = $r7, $r10 + xord $r59 = $r7, $r2 +;; + xord $r41 = $r4, $r5 + ld $r1 = 0[$r0] + xord $r60 = $r5, $r45 + andd $r33 = $r10, $r45 +;; + nxord $r35 = $r5, $r1 + xord $r63 = $r2, $r1 + nxord $r39 = $r1, $r41 + ld $r6 = 40[$r0] +;; + xord $r46 = $r11, $r63 + xord $r54 = $r40, $r35 + ld $r7 = 16[$r0] + nxord $r57 = $r6, $r5 +;; + xord $r52 = $r40, $r63 + xord $r50 = $r41, $r63 + nxord $r47 = $r6, $r7 + nxord $r38 = $r7, $r2 +;; + nxord $r58 = $r6, $r46 + xord $r19 = $r11, $r47 + xord $r63 = $r59, $r57 + xord $r7 = $r41, $r38 +;; + xord $r44 = $r35, $r59 + xord $r18 = $r3, $r47 + xord $r3 = $r54, $r7 + xord $r55 = $r54, $r38 +;; + nxord $r34 = $r6, $r41 + xord $r2 = $r50, $r63 + andd $r57 = $r52, $r19 + andd $r17 = $r50, $r63 +;; + xord $r36 = $r55, $r57 + andd $r62 = $r46, $r18 + andd $r53 = $r11, $r39 + xord $r6 = $r2, $r17 +;; + andd $r42 = $r44, $r58 + andd $r15 = $r41, $r3 + andd $r2 = $r60, $r7 + andd $r37 = $r40, $r54 +;; + xord $r59 = $r62, $r57 + xord $r51 = $r42, $r17 + xord $r8 = $r2, $r15 + xord $r4 = $r37, $r15 +;; + xord $r5 = $r36, $r33 + xord $r38 = $r59, $r35 + xord $r48 = $r6, $r53 + xord $r47 = $r51, $r4 +;; + xord $r53 = $r5, $r8 + xord $r43 = $r38, $r4 + xord $r56 = $r48, $r8 + xord $r57 = $r47, $r34 +;; + xord $r49 = $r56, $r57 + andd $r48 = $r56, $r53 + xord $r47 = $r53, $r43 + andd $r9 = $r53, $r57 +;; + xord $r36 = $r43, $r48 + xord $r35 = $r57, $r48 + andd $r62 = $r47, $r9 + xord $r17 = $r47, $r48 +;; + andd $r15 = $r35, $r47 + andd $r42 = $r36, $r49 + andd $r47 = $r43, $r56 + xord $r59 = $r49, $r48 +;; + andd $r37 = $r49, $r47 + xord $r5 = $r43, $r15 + xord $r4 = $r62, $r17 + xord $r55 = $r57, $r42 +;; + xord $r1 = $r37, $r59 + xord $r2 = $r5, $r55 + xord $r47 = $r5, $r4 + andd $r35 = $r4, $r39 +;; + xord $r61 = $r4, $r1 + xord $r33 = $r55, $r1 + andd $r62 = $r1, $r45 + andd $r45 = $r55, $r18 +;; + xord $r48 = $r2, $r61 + andd $r49 = $r2, $r3 + andd $r6 = $r1, $r10 + andd $r3 = $r47, $r50 +;; + andd $r56 = $r47, $r63 + andd $r42 = $r5, $r58 + andd $r1 = $r4, $r11 + andd $r57 = $r2, $r41 +;; + andd $r9 = $r61, $r54 + andd $r51 = $r33, $r52 + andd $r58 = $r55, $r46 + andd $r53 = $r5, $r44 +;; + andd $r41 = $r48, $r60 + andd $r10 = $r61, $r40 + xord $r59 = $r49, $r57 + xord $r61 = $r3, $r1 +;; + andd $r34 = $r33, $r19 + andd $r39 = $r48, $r7 + xord $r55 = $r9, $r41 + xord $r60 = $r45, $r6 +;; + xord $r48 = $r62, $r35 + xord $r15 = $r56, $r53 + xord $r44 = $r59, $r61 + xord $r49 = $r51, $r10 +;; + xord $r54 = $r34, $r42 + xord $r51 = $r58, $r60 + xord $r59 = $r59, $r48 + xord $r8 = $r56, $r42 +;; + xord $r47 = $r9, $r1 + xord $r11 = $r60, $r15 + xord $r40 = $r55, $r44 + xord $r60 = $r15, $r51 +;; + xord $r52 = $r56, $r41 + xord $r56 = $r10, $r54 + xord $r2 = $r49, $r59 + xord $r5 = $r59, $r60 +;; + xord $r7 = $r3, $r55 + xord $r61 = $r51, $r56 + xord $r59 = $r47, $r11 + xord $r47 = $r8, $r40 +;; + xord $r63 = $r35, $r39 + xord $r4 = $r34, $r45 + sd 88[$r12] = $r47 + xord $r51 = $r2, $r59 +;; + xord $r10 = $r55, $r48 + xord $r50 = $r44, $r63 + sd 80[$r12] = $r51 + xord $r37 = $r7, $r5 +;; + xord $r53 = $r54, $r44 + sd 72[$r12] = $r37 + xord $r37 = $r4, $r40 + xord $r40 = $r50, $r61 +;; + xord $r1 = $r58, $r57 + sd 64[$r12] = $r37 + xord $r46 = $r10, $r53 + xord $r7 = $r52, $r50 +;; + sd 56[$r12] = $r40 + xord $r49 = $r49, $r1 + addd $r1 = $r12, 32 + make $r2, 64 +;; + sd 48[$r12] = $r46 +;; + sd 40[$r12] = $r7 +;; + sd 32[$r12] = $r49 + call memmove +;; + ld $r18 = 16[$r12] +;; + ld $r19 = 24[$r12] +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 96 +;; + ret +;; + .type bs_sbox_rev, @function + .size bs_sbox_rev, . - bs_sbox_rev + .text + .balign 2 + .globl bs_sbox +bs_sbox: + addd $r17 = $r12, 0 + addd $r12 = $r12, -80 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + ld $r5 = 56[$r0] +;; + ld $r6 = 32[$r0] +;; + xord $r41 = $r5, $r6 + ld $r2 = 16[$r0] +;; + xord $r42 = $r5, $r2 + ld $r4 = 8[$r0] + xord $r49 = $r6, $r2 +;; + xord $r48 = $r5, $r4 + ld $r55 = 24[$r0] +;; + xord $r9 = $r55, $r4 + ld $r3 = 48[$r0] +;; + xord $r5 = $r41, $r9 + ld $r7 = 40[$r0] + xord $r34 = $r3, $r2 +;; + xord $r10 = $r3, $r7 + ld $r1 = 0[$r0] + xord $r11 = $r7, $r2 + xord $r3 = $r48, $r49 +;; + xord $r33 = $r9, $r34 + xord $r8 = $r9, $r11 + xord $r44 = $r6, $r1 + xord $r47 = $r4, $r1 +;; + xord $r59 = $r1, $r10 + xord $r7 = $r5, $r34 + xord $r9 = $r10, $r44 + xord $r4 = $r10, $r47 +;; + xord $r61 = $r1, $r5 + xord $r57 = $r5, $r10 + andd $r50 = $r3, $r5 + andd $r43 = $r9, $r1 +;; + xord $r34 = $r59, $r8 + xord $r6 = $r41, $r9 + xord $r35 = $r42, $r4 + xord $r36 = $r48, $r8 +;; + xord $r38 = $r41, $r11 + xord $r40 = $r7, $r50 + xord $r11 = $r43, $r50 + andd $r50 = $r48, $r8 +;; + xord $r15 = $r42, $r57 + andd $r62 = $r35, $r61 + andd $r37 = $r4, $r59 + xord $r52 = $r36, $r50 +;; + andd $r53 = $r6, $r34 + andd $r55 = $r41, $r33 + andd $r46 = $r49, $r38 + andd $r54 = $r42, $r57 +;; + xord $r39 = $r53, $r50 + xord $r60 = $r46, $r55 + xord $r55 = $r54, $r55 + xord $r10 = $r40, $r62 +;; + xord $r44 = $r6, $r34 + xord $r43 = $r11, $r15 + xord $r15 = $r52, $r37 + xord $r17 = $r39, $r55 +;; + xord $r45 = $r10, $r60 + xord $r55 = $r43, $r55 + xord $r50 = $r15, $r60 + xord $r46 = $r17, $r44 +;; + xord $r63 = $r50, $r46 + andd $r43 = $r50, $r45 + xord $r56 = $r45, $r55 + andd $r54 = $r45, $r46 +;; + xord $r36 = $r55, $r43 + xord $r47 = $r46, $r43 + andd $r40 = $r56, $r54 + andd $r60 = $r55, $r50 +;; + andd $r2 = $r47, $r56 + andd $r58 = $r36, $r63 + xord $r36 = $r56, $r43 + andd $r15 = $r63, $r60 +;; + xord $r47 = $r63, $r43 + xord $r17 = $r55, $r2 + xord $r50 = $r40, $r36 + xord $r52 = $r46, $r58 +;; + xord $r58 = $r15, $r47 + xord $r51 = $r17, $r52 + xord $r7 = $r17, $r50 + andd $r43 = $r52, $r1 +;; + xord $r53 = $r50, $r58 + xord $r62 = $r52, $r58 + andd $r44 = $r7, $r8 + andd $r8 = $r50, $r59 +;; + xord $r40 = $r51, $r53 + andd $r45 = $r58, $r61 + andd $r54 = $r51, $r33 + andd $r10 = $r58, $r35 +;; + andd $r47 = $r40, $r38 + andd $r46 = $r62, $r3 + andd $r2 = $r51, $r41 + xord $r35 = $r8, $r10 +;; + andd $r5 = $r62, $r5 + andd $r36 = $r17, $r34 + andd $r39 = $r53, $r57 + andd $r56 = $r7, $r48 +;; + andd $r41 = $r40, $r49 + xord $r34 = $r45, $r46 + xord $r51 = $r44, $r2 + xord $r62 = $r54, $r47 +;; + andd $r38 = $r52, $r9 + andd $r37 = $r50, $r4 + andd $r9 = $r17, $r6 + xord $r59 = $r46, $r35 +;; + andd $r61 = $r53, $r42 + xord $r1 = $r2, $r41 + xord $r63 = $r5, $r43 + xord $r33 = $r39, $r56 +;; + xord $r42 = $r41, $r51 + xord $r51 = $r5, $r34 + xord $r52 = $r36, $r37 + xord $r49 = $r59, $r62 +;; + xord $r57 = $r47, $r33 + xord $r3 = $r9, $r63 + xord $r11 = $r54, $r2 + xord $r50 = $r10, $r1 +;; + xord $r37 = $r43, $r36 + xord $r6 = $r56, $r52 + xord $r9 = $r51, $r62 + xord $r40 = $r42, $r49 +;; + xord $r36 = $r61, $r33 + xord $r10 = $r42, $r57 + xord $r56 = $r52, $r57 + xord $r57 = $r3, $r11 +;; + xord $r5 = $r35, $r51 + sd 72[$r12] = $r40 + nxord $r43 = $r50, $r9 + nxord $r17 = $r36, $r57 +;; + xord $r39 = $r8, $r1 + xord $r53 = $r38, $r35 + xord $r8 = $r1, $r35 + sd 64[$r12] = $r43 +;; + xord $r7 = $r34, $r37 + xord $r58 = $r3, $r53 + sd 56[$r12] = $r17 + xord $r38 = $r42, $r5 +;; + xord $r48 = $r6, $r63 + sd 48[$r12] = $r38 + xord $r1 = $r8, $r7 + xord $r43 = $r10, $r58 +;; + sd 40[$r12] = $r1 + nxord $r4 = $r39, $r56 + nxord $r34 = $r42, $r48 + addd $r1 = $r12, 16 +;; + sd 32[$r12] = $r43 + make $r2, 64 +;; + sd 24[$r12] = $r4 +;; + sd 16[$r12] = $r34 + call memmove +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 80 +;; + ret +;; + .type bs_sbox, @function + .size bs_sbox, . - bs_sbox + .text + .balign 2 + .globl bs_transpose +bs_transpose: + addd $r17 = $r12, 0 + addd $r12 = $r12, -1056 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + sd 16[$r12] = $r18 + addd $r18 = $r0, 0 + addd $r0 = $r12, 24 + make $r1, 0 +;; + make $r2, 1024 + call memset +;; + addd $r0 = $r12, 24 + addd $r1 = $r18, 0 + call bs_transpose_dst +;; + addd $r1 = $r12, 24 + make $r2, 1024 + addd $r0 = $r18, 0 + call memmove +;; + ld $r16 = 8[$r12] +;; + ld $r18 = 16[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 1056 +;; + ret +;; + .type bs_transpose, @function + .size bs_transpose, . - bs_transpose + .text + .balign 2 + .globl bs_transpose_dst +bs_transpose_dst: + addd $r17 = $r12, 0 + addd $r12 = $r12, -16 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + make $r4, 0 +;; +.L103: + make $r35, 1 + make $r17, 0 +;; + slld $r41 = $r35, $r4 +;; + addw $r9 = $r41, 0 +;; +.L104: + sllw $r10 = $r4, 1 + sllw $r42 = $r17, 6 + make $r6, 0 +;; + addw $r36 = $r10, $r17 +;; + sxwd $r15 = $r36 +;; + slld $r2 = $r15, 3 +;; + addd $r8 = $r1, $r2 +;; + ld $r11 = 0[$r8] +;; +.L105: + addw $r40 = $r42, $r6 + make $r2, 0 + make $r44, 1 + make $r32, 64 +;; + sxwd $r34 = $r40 + sxwd $r39 = $r9 + slld $r37 = $r44, $r6 + addw $r6 = $r6, 1 +;; + ld.xs $r7 = $r34[$r0] + andd $r33 = $r11, $r37 + compw.lt $r32 = $r6, $r32 +;; + cmoved.dnez $r33? $r2 = $r39 +;; + ord $r38 = $r7, $r2 +;; + sd.xs $r34[$r0] = $r38 + cb.wnez $r32? .L105 +;; + addw $r17 = $r17, 1 + make $r32, 2 +;; + compw.lt $r32 = $r17, $r32 +;; + cb.wnez $r32? .L104 +;; + addw $r4 = $r4, 1 + make $r32, 64 +;; + compw.lt $r32 = $r4, $r32 +;; + cb.wnez $r32? .L103 +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 16 +;; + ret +;; + .type bs_transpose_dst, @function + .size bs_transpose_dst, . - bs_transpose_dst + .text + .balign 2 + .globl bs_transpose_rev +bs_transpose_rev: + addd $r17 = $r12, 0 + addd $r12 = $r12, -1056 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + sd 16[$r12] = $r18 + addd $r18 = $r0, 0 + addd $r0 = $r12, 24 + make $r1, 0 +;; + make $r2, 1024 + call memset +;; + make $r3, 0 +;; +.L106: + sxwd $r8 = $r3 + sraw $r32 = $r3, 31 + make $r11, 0 +;; + slld $r34 = $r8, 3 + srlw $r32 = $r32, 26 +;; + addd $r6 = $r18, $r34 + addw $r32 = $r3, $r32 +;; + sraw $r2 = $r32, 6 +;; + sxwd $r5 = $r2 +;; + ld $r36 = 0[$r6] +;; +.L107: + make $r39, 1 +;; + slld $r38 = $r39, $r11 +;; + andd $r17 = $r36, $r38 +;; + cb.deqz $r17? .L108 +;; + make $r44, 1 + sraw $r32 = $r3, 31 +;; + srlw $r32 = $r32, 26 +;; + addw $r32 = $r3, $r32 +;; + sraw $r40 = $r32, 6 +;; + sllw $r9 = $r40, 6 +;; + sbfw $r45 = $r9, $r3 +;; + slld $r0 = $r44, $r45 + goto .L109 +;; +.L108: + make $r0, 0 +;; +.L109: + addd $r37 = $r12, 24 + sllw $r46 = $r11, 1 + addw $r11 = $r11, 1 + make $r32, 64 +;; + sxwd $r7 = $r46 + compw.lt $r32 = $r11, $r32 +;; + addd $r4 = $r7, $r5 +;; + slld $r10 = $r4, 3 +;; + addd $r1 = $r37, $r10 +;; + ld $r41 = 0[$r1] +;; + ord $r35 = $r41, $r0 +;; + sd 0[$r1] = $r35 +;; + cb.wnez $r32? .L107 +;; + addw $r3 = $r3, 1 + make $r32, 128 +;; + compw.lt $r32 = $r3, $r32 +;; + cb.wnez $r32? .L106 +;; + addd $r1 = $r12, 24 + make $r2, 1024 + addd $r0 = $r18, 0 + call memmove +;; + ld $r16 = 8[$r12] +;; + ld $r18 = 16[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 1056 +;; + ret +;; + .type bs_transpose_rev, @function + .size bs_transpose_rev, . - bs_transpose_rev + .text + .balign 2 + .globl bs_shiftrows +bs_shiftrows: + addd $r17 = $r12, 0 + addd $r12 = $r12, -1040 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + addd $r50 = $r12, 16 + addd $r1 = $r0, 0 + addd $r43 = $r0, 256 + addd $r8 = $r0, 512 +;; + addd $r60 = $r0, 768 + make $r15, 0 + make $r52, 32 + make $r3, 64 +;; + make $r36, 96 + make $r7, 0 +;; +.L110: + ld $r5 = 0[$r1] + addw $r59 = $r52, 40 + addw $r7 = $r7, 1 + make $r32, 4 +;; + sd 0[$r50] = $r5 + andw $r52 = $r59, 127 + addw $r63 = $r36, 40 + compw.lt $r32 = $r7, $r32 +;; + andw $r36 = $r63, 127 + sxwd $r62 = $r52 +;; + slld $r53 = $r62, 3 +;; + ld $r11 = 8[$r1] +;; + sd 8[$r50] = $r11 +;; + ld $r61 = 16[$r1] +;; + sd 16[$r50] = $r61 +;; + ld $r6 = 24[$r1] +;; + sd 24[$r50] = $r6 +;; + ld $r56 = 32[$r1] +;; + sd 32[$r50] = $r56 +;; + ld $r2 = 40[$r1] +;; + sd 40[$r50] = $r2 + addw $r2 = $r3, 40 +;; + andw $r3 = $r2, 127 + sxwd $r2 = $r36 +;; + sxwd $r5 = $r3 + slld $r39 = $r2, 3 +;; + ld $r38 = 48[$r1] + slld $r46 = $r5, 3 +;; + sd 48[$r50] = $r38 +;; + ld $r54 = 56[$r1] +;; + sd 56[$r50] = $r54 +;; + ld $r4 = 0[$r43] +;; + sd 256[$r50] = $r4 +;; + ld $r58 = 8[$r43] +;; + sd 264[$r50] = $r58 +;; + ld $r10 = 16[$r43] +;; + sd 272[$r50] = $r10 +;; + ld $r34 = 24[$r43] +;; + sd 280[$r50] = $r34 +;; + ld $r51 = 32[$r43] +;; + sd 288[$r50] = $r51 +;; + ld $r9 = 40[$r43] +;; + sd 296[$r50] = $r9 +;; + ld $r1 = 48[$r43] +;; + sd 304[$r50] = $r1 +;; + ld $r4 = 56[$r43] + addd $r43 = $r0, $r53 +;; + sd 312[$r50] = $r4 +;; + ld $r41 = 0[$r8] +;; + sd 512[$r50] = $r41 +;; + ld $r9 = 8[$r8] +;; + sd 520[$r50] = $r9 +;; + ld $r6 = 16[$r8] +;; + sd 528[$r50] = $r6 +;; + ld $r9 = 24[$r8] +;; + sd 536[$r50] = $r9 +;; + ld $r42 = 32[$r8] +;; + sd 544[$r50] = $r42 +;; + ld $r35 = 40[$r8] +;; + sd 552[$r50] = $r35 +;; + ld $r10 = 48[$r8] +;; + sd 560[$r50] = $r10 +;; + ld $r57 = 56[$r8] +;; + sd 568[$r50] = $r57 +;; + ld $r17 = 0[$r60] +;; + sd 768[$r50] = $r17 +;; + ld $r8 = 8[$r60] +;; + sd 776[$r50] = $r8 + addw $r8 = $r15, 40 +;; + andw $r15 = $r8, 127 + addd $r8 = $r0, $r46 +;; + sxwd $r37 = $r15 +;; + ld $r48 = 16[$r60] + slld $r40 = $r37, 3 +;; + sd 784[$r50] = $r48 + addd $r1 = $r0, $r40 +;; + ld $r33 = 24[$r60] +;; + sd 792[$r50] = $r33 +;; + ld $r47 = 32[$r60] +;; + sd 800[$r50] = $r47 +;; + ld $r4 = 40[$r60] +;; + sd 808[$r50] = $r4 +;; + ld $r44 = 48[$r60] +;; + sd 816[$r50] = $r44 +;; + ld $r49 = 56[$r60] + addd $r60 = $r0, $r39 +;; + sd 824[$r50] = $r49 + addd $r50 = $r50, 64 + cb.wnez $r32? .L110 +;; + addd $r1 = $r12, 16 + make $r2, 1024 + call memmove +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 1040 +;; + ret +;; + .type bs_shiftrows, @function + .size bs_shiftrows, . - bs_shiftrows + .text + .balign 2 + .globl bs_shiftrows_rev +bs_shiftrows_rev: + addd $r17 = $r12, 0 + addd $r12 = $r12, -1040 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + addd $r56 = $r12, 16 + addd $r34 = $r12, 16 + addd $r45 = $r12, 272 + addd $r6 = $r12, 528 +;; + addd $r62 = $r12, 784 + make $r4, 0 + make $r10, 32 + make $r55, 64 +;; + make $r2, 96 + make $r59, 0 +;; +.L111: + ld $r43 = 0[$r0] + addw $r9 = $r4, 40 + addw $r59 = $r59, 1 + make $r32, 4 +;; + sd 0[$r34] = $r43 + andw $r4 = $r9, 127 + addw $r51 = $r10, 40 + compw.lt $r32 = $r59, $r32 +;; + andw $r10 = $r51, 127 + sxwd $r39 = $r4 +;; + slld $r60 = $r39, 3 +;; + ld $r57 = 8[$r0] +;; + sd 8[$r34] = $r57 +;; + ld $r63 = 16[$r0] +;; + sd 16[$r34] = $r63 +;; + ld $r7 = 24[$r0] +;; + sd 24[$r34] = $r7 +;; + ld $r44 = 32[$r0] +;; + sd 32[$r34] = $r44 +;; + ld $r42 = 40[$r0] +;; + sd 40[$r34] = $r42 +;; + ld $r40 = 48[$r0] +;; + sd 48[$r34] = $r40 +;; + ld $r61 = 56[$r0] +;; + sd 56[$r34] = $r61 + addd $r34 = $r56, $r60 +;; + ld $r35 = 256[$r0] +;; + sd 0[$r45] = $r35 +;; + ld $r1 = 264[$r0] +;; + sd 8[$r45] = $r1 + addw $r1 = $r2, 40 +;; + andw $r2 = $r1, 127 +;; + ld $r49 = 272[$r0] +;; + sd 16[$r45] = $r49 +;; + ld $r37 = 280[$r0] +;; + sd 24[$r45] = $r37 +;; + ld $r54 = 288[$r0] +;; + sd 32[$r45] = $r54 +;; + ld $r15 = 296[$r0] +;; + sd 40[$r45] = $r15 +;; + ld $r3 = 304[$r0] +;; + sd 48[$r45] = $r3 +;; + ld $r5 = 312[$r0] +;; + sd 56[$r45] = $r5 + sxwd $r5 = $r2 +;; + slld $r38 = $r5, 3 +;; + ld $r53 = 512[$r0] +;; + sd 0[$r6] = $r53 +;; + ld $r33 = 520[$r0] +;; + sd 8[$r6] = $r33 +;; + ld $r8 = 528[$r0] +;; + sd 16[$r6] = $r8 +;; + ld $r11 = 536[$r0] +;; + sd 24[$r6] = $r11 +;; + ld $r47 = 544[$r0] +;; + sd 32[$r6] = $r47 +;; + ld $r3 = 552[$r0] +;; + sd 40[$r6] = $r3 +;; + ld $r17 = 560[$r0] +;; + sd 48[$r6] = $r17 +;; + ld $r52 = 568[$r0] +;; + sd 56[$r6] = $r52 + sxwd $r6 = $r10 +;; + slld $r1 = $r6, 3 +;; + addd $r45 = $r56, $r1 +;; + ld $r8 = 768[$r0] +;; + sd 0[$r62] = $r8 +;; + ld $r41 = 776[$r0] +;; + sd 8[$r62] = $r41 +;; + ld $r3 = 784[$r0] +;; + sd 16[$r62] = $r3 + addw $r3 = $r55, 40 +;; + andw $r55 = $r3, 127 +;; + sxwd $r7 = $r55 +;; + ld $r36 = 792[$r0] + slld $r58 = $r7, 3 +;; + sd 24[$r62] = $r36 + addd $r6 = $r56, $r58 +;; + ld $r48 = 800[$r0] +;; + sd 32[$r62] = $r48 +;; + ld $r11 = 808[$r0] +;; + sd 40[$r62] = $r11 +;; + ld $r46 = 816[$r0] +;; + sd 48[$r62] = $r46 +;; + ld $r50 = 824[$r0] + addd $r0 = $r0, 64 +;; + sd 56[$r62] = $r50 + addd $r62 = $r56, $r38 + cb.wnez $r32? .L111 +;; + addd $r0 = $r0, -256 + addd $r1 = $r12, 16 + make $r2, 1024 + call memmove +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 1040 +;; + ret +;; + .type bs_shiftrows_rev, @function + .size bs_shiftrows_rev, . - bs_shiftrows_rev + .text + .balign 2 + .globl bs_shiftmix +bs_shiftmix: + addd $r17 = $r12, 0 + addd $r12 = $r12, -1088 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + sd 16[$r12] = $r18 + addd $r4 = $r0, 256 + addd $r1 = $r0, 512 + addd $r3 = $r0, 768 +;; + sd 24[$r12] = $r19 + addd $r19 = $r12, 64 + make $r18, 0 + addd $r2 = $r0, 0 +;; + sd 32[$r12] = $r20 + make $r20, 64 +;; + sd 40[$r12] = $r21 + make $r21, 96 +;; + sd 48[$r12] = $r22 + make $r22, 32 +;; + sd 56[$r12] = $r23 + make $r23, 0 +;; +.L112: + ld $r46 = 64[$r4] + addw $r23 = $r23, 1 + make $r32, 4 +;; + ld $r8 = 128[$r1] + compw.lt $r32 = $r23, $r32 +;; + ld $r5 = 56[$r2] + xord $r57 = $r46, $r8 +;; + ld $r59 = 120[$r4] +;; + xord $r7 = $r5, $r59 + ld $r17 = 192[$r3] +;; + xord $r5 = $r57, $r17 +;; + xord $r61 = $r5, $r7 +;; + sd 0[$r19] = $r61 +;; + ld $r48 = 0[$r2] +;; + ld $r62 = 64[$r4] +;; + xord $r42 = $r48, $r62 + ld $r60 = 72[$r4] +;; + xord $r5 = $r42, $r60 + ld $r61 = 136[$r1] +;; + xord $r45 = $r5, $r61 + ld $r40 = 200[$r3] +;; + xord $r45 = $r45, $r40 +;; + xord $r5 = $r45, $r7 +;; + sd 8[$r19] = $r5 +;; + ld $r11 = 8[$r2] +;; + ld $r51 = 72[$r4] +;; + xord $r45 = $r11, $r51 + ld $r40 = 80[$r4] +;; + xord $r37 = $r45, $r40 + ld $r39 = 144[$r1] +;; + xord $r6 = $r37, $r39 + ld $r42 = 208[$r3] +;; + xord $r59 = $r6, $r42 +;; + sd 16[$r19] = $r59 +;; + ld $r6 = 16[$r2] +;; + ld $r44 = 80[$r4] +;; + xord $r43 = $r6, $r44 + ld $r9 = 88[$r4] +;; + xord $r52 = $r43, $r9 + ld $r46 = 152[$r1] +;; + xord $r42 = $r52, $r46 + ld $r48 = 216[$r3] +;; + xord $r5 = $r42, $r48 +;; + xord $r55 = $r5, $r7 +;; + sd 24[$r19] = $r55 +;; + ld $r34 = 24[$r2] +;; + ld $r8 = 88[$r4] +;; + xord $r62 = $r34, $r8 + ld $r47 = 96[$r4] +;; + xord $r38 = $r62, $r47 + ld $r50 = 160[$r1] +;; + xord $r34 = $r38, $r50 + ld $r56 = 224[$r3] +;; + xord $r8 = $r34, $r56 +;; + xord $r11 = $r8, $r7 +;; + sd 32[$r19] = $r11 +;; + ld $r5 = 96[$r4] +;; + ld $r53 = 32[$r2] +;; + xord $r44 = $r53, $r5 + ld $r54 = 168[$r1] +;; + ld $r5 = 104[$r4] +;; + xord $r40 = $r44, $r5 +;; + xord $r10 = $r40, $r54 +;; + ld $r5 = 232[$r3] +;; + xord $r39 = $r10, $r5 +;; + sd 40[$r19] = $r39 +;; + ld $r5 = 40[$r2] +;; + ld $r58 = 104[$r4] +;; + xord $r17 = $r5, $r58 + ld $r15 = 112[$r4] +;; + xord $r37 = $r17, $r15 + ld $r5 = 176[$r1] +;; + xord $r57 = $r37, $r5 + ld $r51 = 240[$r3] +;; + xord $r57 = $r57, $r51 +;; + sd 48[$r19] = $r57 +;; + ld $r40 = 48[$r2] +;; + ld $r52 = 112[$r4] +;; + xord $r35 = $r40, $r52 + ld $r5 = 120[$r4] +;; + xord $r5 = $r35, $r5 + ld $r49 = 184[$r1] +;; + xord $r15 = $r5, $r49 + ld $r34 = 248[$r3] +;; + xord $r46 = $r15, $r34 +;; + sd 56[$r19] = $r46 +;; + ld $r33 = 0[$r2] +;; + ld $r36 = 128[$r1] +;; + ld $r48 = 120[$r4] + xord $r42 = $r33, $r36 +;; + ld $r5 = 184[$r1] +;; + xord $r34 = $r48, $r5 + ld $r47 = 192[$r3] +;; + xord $r60 = $r42, $r47 +;; + xord $r60 = $r60, $r34 +;; + sd 64[$r19] = $r60 +;; + ld $r43 = 8[$r2] +;; + ld $r47 = 64[$r4] +;; + xord $r63 = $r43, $r47 + ld $r52 = 128[$r1] +;; + xord $r5 = $r63, $r52 + ld $r7 = 136[$r1] +;; + xord $r60 = $r5, $r7 + ld $r15 = 200[$r3] +;; + xord $r55 = $r60, $r15 +;; + xord $r48 = $r55, $r34 +;; + sd 72[$r19] = $r48 +;; + ld $r56 = 16[$r2] +;; + ld $r5 = 72[$r4] +;; + xord $r7 = $r56, $r5 + ld $r46 = 136[$r1] +;; + xord $r41 = $r7, $r46 + ld $r40 = 144[$r1] +;; + xord $r5 = $r41, $r40 + ld $r47 = 208[$r3] +;; + xord $r5 = $r5, $r47 +;; + sd 80[$r19] = $r5 +;; + ld $r52 = 24[$r2] +;; + ld $r54 = 80[$r4] +;; + xord $r35 = $r52, $r54 + ld $r63 = 144[$r1] +;; + xord $r7 = $r35, $r63 + ld $r8 = 152[$r1] +;; + xord $r33 = $r7, $r8 + ld $r37 = 216[$r3] +;; + xord $r56 = $r33, $r37 +;; + xord $r54 = $r56, $r34 +;; + sd 88[$r19] = $r54 +;; + ld $r9 = 32[$r2] +;; + ld $r6 = 88[$r4] +;; + xord $r44 = $r9, $r6 + ld $r51 = 152[$r1] +;; + xord $r35 = $r44, $r51 + ld $r52 = 160[$r1] +;; + xord $r38 = $r35, $r52 + ld $r9 = 224[$r3] +;; + xord $r62 = $r38, $r9 +;; + xord $r6 = $r62, $r34 +;; + sd 96[$r19] = $r6 +;; + ld $r15 = 40[$r2] +;; + ld $r17 = 96[$r4] +;; + xord $r36 = $r15, $r17 + ld $r5 = 160[$r1] +;; + xord $r50 = $r36, $r5 + ld $r51 = 168[$r1] +;; + xord $r37 = $r50, $r51 + ld $r42 = 232[$r3] +;; + xord $r58 = $r37, $r42 +;; + sd 104[$r19] = $r58 +;; + ld $r56 = 48[$r2] +;; + ld $r41 = 104[$r4] +;; + xord $r11 = $r56, $r41 + ld $r48 = 168[$r1] +;; + xord $r51 = $r11, $r48 + ld $r58 = 176[$r1] +;; + xord $r61 = $r51, $r58 + ld $r5 = 240[$r3] +;; + xord $r61 = $r61, $r5 +;; + sd 112[$r19] = $r61 +;; + ld $r34 = 56[$r2] +;; + ld $r56 = 112[$r4] +;; + xord $r46 = $r34, $r56 + ld $r9 = 176[$r1] +;; + xord $r62 = $r46, $r9 + ld $r33 = 184[$r1] +;; + xord $r46 = $r62, $r33 + ld $r61 = 248[$r3] +;; + xord $r40 = $r46, $r61 +;; + sd 120[$r19] = $r40 +;; + ld $r5 = 184[$r1] +;; + ld $r59 = 248[$r3] +;; + xord $r43 = $r5, $r59 + ld $r55 = 0[$r2] +;; + ld $r5 = 64[$r4] +;; + xord $r42 = $r55, $r5 + ld $r35 = 192[$r3] +;; + xord $r49 = $r42, $r35 +;; + xord $r5 = $r49, $r43 +;; + sd 128[$r19] = $r5 +;; + ld $r57 = 8[$r2] +;; + ld $r5 = 72[$r4] +;; + xord $r44 = $r57, $r5 + ld $r45 = 128[$r1] +;; + xord $r17 = $r44, $r45 + ld $r33 = 192[$r3] +;; + xord $r52 = $r17, $r33 + ld $r39 = 200[$r3] +;; + xord $r35 = $r52, $r39 +;; + xord $r62 = $r35, $r43 +;; + sd 136[$r19] = $r62 +;; + ld $r5 = 16[$r2] +;; + ld $r39 = 80[$r4] +;; + xord $r36 = $r5, $r39 + ld $r41 = 136[$r1] +;; + xord $r6 = $r36, $r41 + ld $r5 = 200[$r3] +;; + xord $r35 = $r6, $r5 + ld $r11 = 208[$r3] +;; + xord $r37 = $r35, $r11 +;; + sd 144[$r19] = $r37 +;; + ld $r5 = 24[$r2] +;; + ld $r63 = 88[$r4] +;; + xord $r33 = $r5, $r63 + ld $r45 = 144[$r1] +;; + xord $r49 = $r33, $r45 + ld $r36 = 208[$r3] +;; + xord $r55 = $r49, $r36 + ld $r8 = 216[$r3] +;; + xord $r41 = $r55, $r8 +;; + xord $r58 = $r41, $r43 +;; + sd 152[$r19] = $r58 +;; + ld $r6 = 32[$r2] +;; + ld $r47 = 96[$r4] +;; + xord $r11 = $r6, $r47 + ld $r61 = 152[$r1] +;; + xord $r44 = $r11, $r61 + ld $r9 = 216[$r3] +;; + xord $r59 = $r44, $r9 + ld $r34 = 224[$r3] +;; + xord $r7 = $r59, $r34 +;; + xord $r17 = $r7, $r43 +;; + sd 160[$r19] = $r17 +;; + ld $r54 = 40[$r2] +;; + ld $r53 = 104[$r4] +;; + xord $r7 = $r54, $r53 + ld $r59 = 160[$r1] +;; + xord $r37 = $r7, $r59 + ld $r41 = 224[$r3] +;; + xord $r10 = $r37, $r41 + ld $r46 = 232[$r3] +;; + xord $r10 = $r10, $r46 +;; + sd 168[$r19] = $r10 +;; + ld $r58 = 48[$r2] +;; + ld $r5 = 112[$r4] +;; + xord $r40 = $r58, $r5 + ld $r38 = 168[$r1] +;; + xord $r57 = $r40, $r38 + ld $r51 = 232[$r3] +;; + xord $r60 = $r57, $r51 + ld $r55 = 240[$r3] +;; + xord $r53 = $r60, $r55 +;; + sd 176[$r19] = $r53 +;; + ld $r45 = 56[$r2] +;; + ld $r41 = 120[$r4] +;; + xord $r5 = $r45, $r41 + ld $r53 = 176[$r1] +;; + xord $r38 = $r5, $r53 + ld $r8 = 240[$r3] +;; + xord $r43 = $r38, $r8 + ld $r63 = 248[$r3] +;; + xord $r6 = $r43, $r63 +;; + sd 184[$r19] = $r6 +;; + ld $r8 = 0[$r2] +;; + ld $r58 = 64[$r4] +;; + ld $r35 = 56[$r2] + xord $r54 = $r8, $r58 +;; + ld $r5 = 248[$r3] +;; + xord $r50 = $r35, $r5 + ld $r51 = 128[$r1] +;; + xord $r11 = $r54, $r51 +;; + xord $r38 = $r11, $r50 +;; + sd 192[$r19] = $r38 +;; + ld $r63 = 8[$r2] +;; + ld $r54 = 0[$r2] +;; + xord $r54 = $r63, $r54 + ld $r36 = 72[$r4] +;; + xord $r5 = $r54, $r36 + ld $r41 = 136[$r1] +;; + xord $r39 = $r5, $r41 + ld $r58 = 192[$r3] +;; + xord $r44 = $r39, $r58 +;; + xord $r33 = $r44, $r50 +;; + sd 200[$r19] = $r33 +;; + ld $r5 = 8[$r2] +;; + ld $r63 = 16[$r2] +;; + xord $r54 = $r63, $r5 + ld $r49 = 80[$r4] + addw $r63 = $r18, 32 +;; + xord $r51 = $r54, $r49 + ld $r5 = 144[$r1] + andw $r18 = $r63, 127 +;; + xord $r43 = $r51, $r5 + ld $r57 = 200[$r3] +;; + xord $r47 = $r43, $r57 +;; + sd 208[$r19] = $r47 + addw $r47 = $r21, 32 +;; + andw $r21 = $r47, 127 +;; + ld $r7 = 24[$r2] +;; + ld $r15 = 16[$r2] +;; + xord $r56 = $r7, $r15 + ld $r48 = 88[$r4] +;; + xord $r10 = $r56, $r48 + ld $r51 = 152[$r1] +;; + xord $r39 = $r10, $r51 + addw $r10 = $r22, 32 +;; + ld $r48 = 208[$r3] + andw $r22 = $r10, 127 +;; + xord $r53 = $r39, $r48 +;; + xord $r37 = $r53, $r50 +;; + sd 216[$r19] = $r37 +;; + ld $r9 = 32[$r2] +;; + ld $r15 = 24[$r2] +;; + xord $r43 = $r9, $r15 + ld $r53 = 96[$r4] + addw $r15 = $r20, 32 +;; + xord $r42 = $r43, $r53 + ld $r17 = 160[$r1] + andw $r20 = $r15, 127 +;; + xord $r55 = $r42, $r17 + ld $r62 = 216[$r3] + sxwd $r8 = $r20 +;; + xord $r60 = $r55, $r62 + slld $r43 = $r8, 3 +;; + xord $r5 = $r60, $r50 + sxwd $r50 = $r18 +;; + sd 224[$r19] = $r5 + slld $r39 = $r50, 3 +;; + ld $r5 = 40[$r2] +;; + ld $r51 = 32[$r2] +;; + xord $r62 = $r5, $r51 + ld $r45 = 168[$r1] +;; + ld $r5 = 104[$r4] +;; + xord $r9 = $r62, $r5 +;; + xord $r17 = $r9, $r45 +;; + ld $r5 = 224[$r3] +;; + xord $r49 = $r17, $r5 +;; + sd 232[$r19] = $r49 +;; + ld $r33 = 48[$r2] +;; + ld $r57 = 40[$r2] +;; + xord $r49 = $r33, $r57 + ld $r55 = 112[$r4] +;; + xord $r59 = $r49, $r55 + ld $r36 = 176[$r1] +;; + xord $r61 = $r59, $r36 + ld $r52 = 232[$r3] +;; + xord $r6 = $r61, $r52 +;; + sd 240[$r19] = $r6 +;; + ld $r49 = 56[$r2] +;; + ld $r45 = 48[$r2] + addd $r2 = $r0, $r39 +;; + xord $r56 = $r49, $r45 + ld $r59 = 120[$r4] +;; + xord $r11 = $r56, $r59 + ld $r38 = 184[$r1] +;; + xord $r4 = $r11, $r38 + ld $r34 = 240[$r3] + sxwd $r38 = $r22 + sxwd $r3 = $r21 +;; + xord $r1 = $r4, $r34 + slld $r10 = $r38, 3 + slld $r36 = $r3, 3 +;; + sd 248[$r19] = $r1 + addd $r19 = $r19, 256 + addd $r4 = $r0, $r10 + addd $r1 = $r0, $r43 +;; + addd $r3 = $r0, $r36 + cb.wnez $r32? .L112 +;; + addd $r1 = $r12, 64 + make $r2, 1024 + call memmove +;; + ld $r18 = 16[$r12] +;; + ld $r19 = 24[$r12] +;; + ld $r20 = 32[$r12] +;; + ld $r21 = 40[$r12] +;; + ld $r22 = 48[$r12] +;; + ld $r23 = 56[$r12] +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 1088 +;; + ret +;; + .type bs_shiftmix, @function + .size bs_shiftmix, . - bs_shiftmix + .text + .balign 2 + .globl bs_mixcolumns +bs_mixcolumns: + addd $r17 = $r12, 0 + addd $r12 = $r12, -1040 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + addd $r46 = $r12, 16 + make $r45, 0 +;; +.L113: + ld $r60 = 64[$r0] + addw $r45 = $r45, 1 + make $r32, 4 +;; + ld $r54 = 128[$r0] + compw.lt $r32 = $r45, $r32 +;; + ld $r44 = 56[$r0] + xord $r49 = $r60, $r54 +;; + ld $r7 = 120[$r0] +;; + xord $r57 = $r44, $r7 + ld $r5 = 192[$r0] +;; + xord $r1 = $r49, $r5 +;; + xord $r40 = $r1, $r57 +;; + sd 0[$r46] = $r40 +;; + ld $r42 = 0[$r0] +;; + xord $r53 = $r42, $r60 + ld $r39 = 72[$r0] +;; + xord $r43 = $r53, $r39 + ld $r55 = 136[$r0] +;; + xord $r6 = $r43, $r55 + ld $r2 = 200[$r0] +;; + xord $r15 = $r6, $r2 +;; + xord $r8 = $r15, $r57 +;; + sd 8[$r46] = $r8 +;; + ld $r35 = 8[$r0] +;; + xord $r59 = $r35, $r39 + ld $r37 = 80[$r0] + xord $r60 = $r35, $r60 +;; + xord $r56 = $r59, $r37 + ld $r6 = 144[$r0] + xord $r59 = $r59, $r54 +;; + xord $r43 = $r56, $r6 + ld $r51 = 208[$r0] + xord $r59 = $r59, $r5 +;; + xord $r11 = $r43, $r51 + xord $r59 = $r59, $r2 +;; + sd 16[$r46] = $r11 +;; + ld $r47 = 16[$r0] +;; + xord $r11 = $r47, $r37 + ld $r52 = 88[$r0] +;; + xord $r48 = $r11, $r52 + ld $r17 = 152[$r0] +;; + xord $r4 = $r48, $r17 + ld $r1 = 216[$r0] +;; + xord $r4 = $r4, $r1 +;; + xord $r3 = $r4, $r57 +;; + sd 24[$r46] = $r3 +;; + ld $r8 = 24[$r0] +;; + xord $r58 = $r8, $r52 + ld $r36 = 96[$r0] +;; + xord $r9 = $r58, $r36 + ld $r50 = 160[$r0] + xord $r58 = $r58, $r6 +;; + xord $r40 = $r9, $r50 + ld $r4 = 224[$r0] +;; + xord $r61 = $r40, $r4 +;; + xord $r48 = $r61, $r57 +;; + sd 32[$r46] = $r48 +;; + ld $r15 = 32[$r0] +;; + xord $r57 = $r15, $r36 + ld $r38 = 104[$r0] +;; + xord $r61 = $r57, $r38 + ld $r3 = 168[$r0] +;; + xord $r9 = $r61, $r3 + ld $r48 = 232[$r0] +;; + xord $r9 = $r9, $r48 +;; + sd 40[$r46] = $r9 +;; + ld $r43 = 40[$r0] +;; + xord $r34 = $r43, $r38 + ld $r33 = 112[$r0] +;; + xord $r40 = $r34, $r33 + ld $r10 = 176[$r0] + xord $r63 = $r44, $r33 +;; + xord $r49 = $r40, $r10 + ld $r41 = 240[$r0] +;; + xord $r62 = $r49, $r41 +;; + sd 48[$r46] = $r62 + xord $r62 = $r42, $r54 +;; + xord $r62 = $r62, $r5 +;; + ld $r9 = 48[$r0] +;; + xord $r56 = $r9, $r33 + ld $r40 = 184[$r0] +;; + xord $r49 = $r56, $r7 + xord $r56 = $r56, $r3 +;; + xord $r61 = $r49, $r40 +;; + ld $r49 = 248[$r0] + addd $r0 = $r0, 256 +;; + xord $r61 = $r61, $r49 +;; + sd 56[$r46] = $r61 + xord $r61 = $r7, $r40 +;; + xord $r62 = $r62, $r61 +;; + sd 64[$r46] = $r62 + xord $r62 = $r60, $r54 +;; + xord $r60 = $r62, $r55 +;; + xord $r60 = $r60, $r2 +;; + xord $r60 = $r60, $r61 +;; + sd 72[$r46] = $r60 + xord $r60 = $r47, $r39 +;; + xord $r60 = $r60, $r55 +;; + xord $r60 = $r60, $r6 +;; + xord $r60 = $r60, $r51 +;; + sd 80[$r46] = $r60 + xord $r60 = $r8, $r37 +;; + xord $r60 = $r60, $r6 +;; + xord $r60 = $r60, $r17 +;; + xord $r60 = $r60, $r1 +;; + xord $r60 = $r60, $r61 +;; + sd 88[$r46] = $r60 + xord $r60 = $r15, $r52 +;; + xord $r60 = $r60, $r17 +;; + xord $r60 = $r60, $r50 +;; + xord $r60 = $r60, $r4 +;; + xord $r60 = $r60, $r61 + xord $r61 = $r53, $r5 +;; + sd 96[$r46] = $r60 + xord $r60 = $r43, $r36 +;; + xord $r60 = $r60, $r50 +;; + xord $r60 = $r60, $r3 +;; + xord $r60 = $r60, $r48 +;; + sd 104[$r46] = $r60 + xord $r60 = $r9, $r38 +;; + xord $r60 = $r60, $r3 +;; + xord $r60 = $r60, $r10 +;; + xord $r60 = $r60, $r41 +;; + sd 112[$r46] = $r60 + xord $r60 = $r63, $r10 +;; + xord $r60 = $r60, $r40 +;; + xord $r60 = $r60, $r49 +;; + sd 120[$r46] = $r60 + xord $r60 = $r40, $r49 +;; + xord $r61 = $r61, $r60 + xord $r63 = $r59, $r60 + xord $r59 = $r11, $r55 +;; + sd 128[$r46] = $r61 + xord $r59 = $r59, $r2 +;; + sd 136[$r46] = $r63 + xord $r11 = $r59, $r51 + xord $r63 = $r58, $r51 +;; + sd 144[$r46] = $r11 + xord $r58 = $r63, $r1 + xord $r11 = $r57, $r17 +;; + xord $r61 = $r58, $r60 + xord $r57 = $r11, $r1 +;; + sd 152[$r46] = $r61 + xord $r57 = $r57, $r4 +;; + xord $r57 = $r57, $r60 +;; + sd 160[$r46] = $r57 + xord $r57 = $r34, $r50 +;; + xord $r57 = $r57, $r4 +;; + xord $r11 = $r57, $r48 + xord $r57 = $r53, $r54 +;; + sd 168[$r46] = $r11 + xord $r11 = $r56, $r48 +;; + xord $r56 = $r11, $r41 + xord $r11 = $r44, $r7 +;; + sd 176[$r46] = $r56 + xord $r34 = $r11, $r10 +;; + xord $r11 = $r34, $r41 +;; + xord $r56 = $r11, $r49 + xord $r49 = $r44, $r49 + xord $r11 = $r35, $r42 + xord $r42 = $r9, $r43 +;; + sd 184[$r46] = $r56 + xord $r53 = $r57, $r49 + xord $r11 = $r11, $r39 + xord $r58 = $r42, $r33 +;; + sd 192[$r46] = $r53 + xord $r61 = $r11, $r55 + xord $r53 = $r47, $r35 +;; + xord $r39 = $r61, $r5 + xord $r62 = $r53, $r37 +;; + xord $r34 = $r39, $r49 + xord $r37 = $r62, $r6 +;; + sd 200[$r46] = $r34 + xord $r57 = $r37, $r2 + xord $r37 = $r8, $r47 + xord $r34 = $r15, $r8 +;; + sd 208[$r46] = $r57 + xord $r35 = $r37, $r52 +;; + xord $r47 = $r35, $r17 +;; + xord $r47 = $r47, $r51 +;; + xord $r54 = $r47, $r49 + xord $r47 = $r34, $r36 +;; + sd 216[$r46] = $r54 + xord $r35 = $r47, $r50 +;; + xord $r39 = $r35, $r1 +;; + xord $r11 = $r39, $r49 + xord $r49 = $r43, $r15 +;; + sd 224[$r46] = $r11 + xord $r53 = $r49, $r38 + xord $r11 = $r58, $r10 + xord $r38 = $r44, $r9 +;; + xord $r6 = $r53, $r3 + xord $r56 = $r11, $r48 + xord $r52 = $r38, $r7 +;; + xord $r15 = $r6, $r4 + xord $r34 = $r52, $r40 +;; + sd 232[$r46] = $r15 + xord $r58 = $r34, $r41 +;; + sd 240[$r46] = $r56 +;; + sd 248[$r46] = $r58 + addd $r46 = $r46, 256 + cb.wnez $r32? .L113 +;; + addd $r0 = $r0, -1024 + addd $r1 = $r46, -1024 + make $r2, 1024 + call memmove +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 1040 +;; + ret +;; + .type bs_mixcolumns, @function + .size bs_mixcolumns, . - bs_mixcolumns + .text + .balign 2 + .globl bs_mixcolumns_rev +bs_mixcolumns_rev: + addd $r17 = $r12, 0 + addd $r12 = $r12, -1040 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + addd $r49 = $r12, 16 + make $r50, 0 +;; +.L114: + ld $r47 = 56[$r0] + addw $r50 = $r50, 8 + make $r32, 32 +;; + ld $r11 = 48[$r0] + compw.lt $r32 = $r50, $r32 +;; + xord $r56 = $r47, $r11 + ld $r6 = 40[$r0] +;; + xord $r2 = $r56, $r6 + ld $r15 = 120[$r0] +;; + ld $r33 = 104[$r0] + xord $r1 = $r47, $r15 +;; + xord $r60 = $r15, $r33 + ld $r7 = 176[$r0] +;; + xord $r57 = $r2, $r60 + ld $r48 = 168[$r0] +;; + xord $r63 = $r7, $r48 + ld $r44 = 64[$r0] +;; + xord $r62 = $r57, $r63 + ld $r34 = 128[$r0] +;; + ld $r3 = 232[$r0] + xord $r38 = $r44, $r34 +;; + xord $r9 = $r62, $r3 + ld $r17 = 192[$r0] +;; + ld $r51 = 112[$r0] + xord $r36 = $r38, $r17 +;; + xord $r8 = $r56, $r51 + ld $r10 = 184[$r0] + xord $r40 = $r36, $r9 +;; + xord $r57 = $r10, $r7 + ld $r5 = 240[$r0] + xord $r4 = $r1, $r10 +;; + xord $r54 = $r8, $r57 + ld $r39 = 248[$r0] +;; + xord $r35 = $r54, $r5 + xord $r58 = $r4, $r39 + sd 0[$r49] = $r40 +;; + ld $r8 = 0[$r0] +;; + xord $r36 = $r8, $r44 + ld $r52 = 72[$r0] +;; + xord $r59 = $r36, $r52 + ld $r45 = 136[$r0] +;; + xord $r59 = $r59, $r45 + ld $r4 = 200[$r0] +;; + xord $r59 = $r59, $r4 +;; + xord $r36 = $r59, $r9 +;; + xord $r61 = $r36, $r35 +;; + sd 8[$r49] = $r61 +;; + ld $r43 = 8[$r0] +;; + xord $r53 = $r43, $r8 + ld $r46 = 80[$r0] +;; + xord $r53 = $r53, $r52 + ld $r2 = 144[$r0] +;; + xord $r36 = $r53, $r46 + ld $r42 = 208[$r0] +;; + xord $r59 = $r36, $r2 +;; + xord $r53 = $r59, $r34 +;; + xord $r53 = $r53, $r42 +;; + xord $r37 = $r53, $r35 +;; + xord $r53 = $r37, $r58 +;; + sd 16[$r49] = $r53 +;; + ld $r1 = 16[$r0] +;; + xord $r41 = $r1, $r43 + ld $r38 = 88[$r0] +;; + xord $r36 = $r41, $r8 +;; + xord $r53 = $r36, $r44 +;; + xord $r36 = $r53, $r46 + ld $r41 = 152[$r0] +;; + xord $r36 = $r36, $r38 +;; + xord $r40 = $r36, $r41 +;; + xord $r53 = $r40, $r45 +;; + xord $r36 = $r53, $r34 +;; + ld $r53 = 216[$r0] +;; + xord $r54 = $r36, $r53 +;; + xord $r36 = $r54, $r17 +;; + xord $r36 = $r36, $r9 +;; + xord $r36 = $r36, $r58 +;; + sd 24[$r49] = $r36 +;; + ld $r36 = 24[$r0] +;; + xord $r54 = $r36, $r1 + ld $r40 = 96[$r0] +;; + xord $r61 = $r54, $r43 + ld $r37 = 160[$r0] +;; + xord $r54 = $r61, $r52 +;; + xord $r59 = $r54, $r38 +;; + xord $r59 = $r59, $r40 +;; + xord $r54 = $r59, $r37 +;; + xord $r54 = $r54, $r2 +;; + xord $r55 = $r54, $r45 +;; + ld $r54 = 224[$r0] +;; + xord $r55 = $r55, $r54 +;; + xord $r55 = $r55, $r4 +;; + xord $r55 = $r55, $r9 +;; + xord $r9 = $r55, $r35 +;; + sd 32[$r49] = $r9 +;; + ld $r9 = 32[$r0] + addd $r0 = $r0, 256 +;; + xord $r55 = $r9, $r36 + xord $r61 = $r6, $r9 +;; + xord $r55 = $r55, $r1 +;; + xord $r55 = $r55, $r46 +;; + xord $r55 = $r55, $r40 +;; + xord $r55 = $r55, $r33 +;; + xord $r55 = $r55, $r48 +;; + xord $r55 = $r55, $r41 +;; + xord $r55 = $r55, $r2 +;; + xord $r55 = $r55, $r3 +;; + xord $r55 = $r55, $r42 +;; + xord $r35 = $r55, $r35 +;; + xord $r60 = $r35, $r58 + xord $r35 = $r61, $r36 +;; + sd 40[$r49] = $r60 + xord $r55 = $r35, $r38 +;; + xord $r62 = $r55, $r33 +;; + xord $r59 = $r62, $r51 + xord $r62 = $r8, $r34 +;; + xord $r35 = $r59, $r7 + xord $r59 = $r11, $r6 + xord $r62 = $r62, $r17 +;; + xord $r35 = $r35, $r37 +;; + xord $r35 = $r35, $r41 +;; + xord $r55 = $r35, $r5 + xord $r35 = $r59, $r9 +;; + xord $r63 = $r55, $r53 + xord $r60 = $r35, $r40 +;; + xord $r55 = $r63, $r58 + xord $r35 = $r60, $r51 +;; + sd 48[$r49] = $r55 + xord $r63 = $r35, $r15 + xord $r55 = $r15, $r51 +;; + xord $r35 = $r63, $r10 + xord $r63 = $r1, $r52 +;; + xord $r35 = $r35, $r48 +;; + xord $r35 = $r35, $r37 +;; + xord $r60 = $r35, $r39 + xord $r35 = $r55, $r33 +;; + xord $r60 = $r60, $r54 + xord $r35 = $r6, $r35 +;; + sd 56[$r49] = $r60 + xord $r60 = $r10, $r48 +;; + xord $r61 = $r35, $r60 + xord $r60 = $r5, $r3 + xord $r35 = $r39, $r5 +;; + xord $r60 = $r61, $r60 + xord $r61 = $r11, $r55 +;; + xord $r61 = $r61, $r7 + xord $r62 = $r62, $r60 +;; + xord $r61 = $r61, $r35 + sd 64[$r49] = $r62 + xord $r62 = $r43, $r44 +;; + xord $r62 = $r62, $r45 +;; + xord $r62 = $r62, $r34 +;; + xord $r62 = $r62, $r4 +;; + xord $r62 = $r62, $r60 +;; + xord $r62 = $r62, $r61 +;; + sd 72[$r49] = $r62 + xord $r62 = $r63, $r44 +;; + xord $r62 = $r62, $r2 +;; + xord $r62 = $r62, $r45 +;; + xord $r62 = $r62, $r42 +;; + xord $r62 = $r62, $r17 +;; + xord $r62 = $r62, $r61 +;; + xord $r62 = $r62, $r58 +;; + sd 80[$r49] = $r62 + xord $r62 = $r36, $r8 +;; + xord $r62 = $r62, $r46 +;; + xord $r62 = $r62, $r52 +;; + xord $r62 = $r62, $r44 +;; + xord $r62 = $r62, $r41 +;; + xord $r62 = $r62, $r2 +;; + xord $r62 = $r62, $r34 +;; + xord $r62 = $r62, $r53 +;; + xord $r62 = $r62, $r4 +;; + xord $r62 = $r62, $r17 +;; + xord $r62 = $r62, $r60 +;; + xord $r62 = $r62, $r58 +;; + sd 88[$r49] = $r62 + xord $r62 = $r9, $r43 +;; + xord $r62 = $r62, $r38 +;; + xord $r62 = $r62, $r46 +;; + xord $r62 = $r62, $r52 +;; + xord $r62 = $r62, $r37 +;; + xord $r62 = $r62, $r41 +;; + xord $r62 = $r62, $r45 +;; + xord $r62 = $r62, $r54 +;; + xord $r62 = $r62, $r42 +;; + xord $r62 = $r62, $r4 +;; + xord $r60 = $r62, $r60 +;; + xord $r60 = $r60, $r61 +;; + sd 96[$r49] = $r60 + xord $r60 = $r6, $r1 +;; + xord $r60 = $r60, $r40 +;; + xord $r60 = $r60, $r38 +;; + xord $r60 = $r60, $r46 +;; + xord $r60 = $r60, $r48 +;; + xord $r60 = $r60, $r37 +;; + xord $r60 = $r60, $r2 +;; + xord $r60 = $r60, $r3 +;; + xord $r60 = $r60, $r53 +;; + xord $r60 = $r60, $r42 +;; + xord $r60 = $r60, $r61 +;; + xord $r60 = $r60, $r58 +;; + sd 104[$r49] = $r60 + xord $r60 = $r11, $r36 +;; + xord $r60 = $r60, $r33 +;; + xord $r60 = $r60, $r40 +;; + xord $r60 = $r60, $r38 +;; + xord $r62 = $r60, $r7 +;; + xord $r60 = $r62, $r48 +;; + xord $r60 = $r60, $r41 +;; + xord $r60 = $r60, $r5 +;; + xord $r60 = $r60, $r54 +;; + xord $r60 = $r60, $r53 +;; + xord $r58 = $r60, $r58 +;; + sd 112[$r49] = $r58 + xord $r58 = $r47, $r9 +;; + xord $r58 = $r58, $r51 +;; + xord $r58 = $r58, $r33 +;; + xord $r58 = $r58, $r40 +;; + xord $r58 = $r58, $r10 +;; + xord $r58 = $r58, $r7 +;; + xord $r58 = $r58, $r37 +;; + xord $r58 = $r58, $r39 +;; + xord $r63 = $r58, $r3 +;; + xord $r58 = $r63, $r54 +;; + sd 120[$r49] = $r58 + xord $r58 = $r57, $r48 + xord $r57 = $r51, $r57 +;; + xord $r60 = $r33, $r58 + xord $r58 = $r39, $r3 + xord $r63 = $r57, $r5 +;; + xord $r58 = $r60, $r58 + xord $r63 = $r63, $r56 + xord $r56 = $r15, $r10 +;; + xord $r61 = $r58, $r59 + xord $r56 = $r56, $r39 +;; + xord $r62 = $r56, $r47 + xord $r56 = $r44, $r17 +;; + xord $r56 = $r56, $r8 +;; + xord $r56 = $r56, $r61 +;; + sd 128[$r49] = $r56 + xord $r56 = $r52, $r34 +;; + xord $r56 = $r56, $r4 +;; + xord $r56 = $r56, $r17 +;; + xord $r56 = $r56, $r43 +;; + xord $r56 = $r56, $r61 +;; + xord $r56 = $r56, $r63 +;; + sd 136[$r49] = $r56 + xord $r56 = $r46, $r45 +;; + xord $r56 = $r56, $r34 +;; + xord $r56 = $r56, $r42 +;; + xord $r56 = $r56, $r4 +;; + xord $r56 = $r56, $r1 +;; + xord $r56 = $r56, $r8 +;; + xord $r56 = $r56, $r63 +;; + xord $r56 = $r56, $r62 +;; + sd 144[$r49] = $r56 + xord $r56 = $r38, $r44 +;; + xord $r56 = $r56, $r2 +;; + xord $r56 = $r56, $r45 +;; + xord $r56 = $r56, $r34 +;; + xord $r56 = $r56, $r53 +;; + xord $r56 = $r56, $r42 +;; + xord $r56 = $r56, $r17 +;; + xord $r56 = $r56, $r36 +;; + xord $r59 = $r56, $r43 +;; + xord $r56 = $r59, $r8 +;; + xord $r59 = $r56, $r61 +;; + xord $r56 = $r59, $r62 + xord $r59 = $r33, $r46 +;; + sd 152[$r49] = $r56 + xord $r56 = $r40, $r52 +;; + xord $r56 = $r56, $r41 +;; + xord $r56 = $r56, $r2 +;; + xord $r56 = $r56, $r45 +;; + xord $r56 = $r56, $r54 +;; + xord $r56 = $r56, $r53 +;; + xord $r56 = $r56, $r4 +;; + xord $r56 = $r56, $r9 +;; + xord $r56 = $r56, $r1 +;; + xord $r56 = $r56, $r43 +;; + xord $r56 = $r56, $r61 +;; + xord $r56 = $r56, $r63 +;; + sd 160[$r49] = $r56 + xord $r56 = $r59, $r37 +;; + xord $r56 = $r56, $r41 +;; + xord $r56 = $r56, $r2 +;; + xord $r56 = $r56, $r3 +;; + xord $r56 = $r56, $r54 +;; + xord $r57 = $r56, $r42 +;; + xord $r56 = $r57, $r6 +;; + xord $r56 = $r56, $r36 +;; + xord $r56 = $r56, $r1 +;; + xord $r56 = $r56, $r63 +;; + xord $r56 = $r56, $r62 +;; + sd 168[$r49] = $r56 + xord $r56 = $r51, $r38 +;; + xord $r56 = $r56, $r48 +;; + xord $r56 = $r56, $r37 +;; + xord $r58 = $r56, $r41 +;; + xord $r56 = $r58, $r5 +;; + xord $r56 = $r56, $r3 +;; + xord $r56 = $r56, $r53 +;; + xord $r56 = $r56, $r11 +;; + xord $r58 = $r56, $r9 +;; + xord $r56 = $r58, $r36 +;; + xord $r56 = $r56, $r62 +;; + sd 176[$r49] = $r56 + xord $r56 = $r15, $r40 +;; + xord $r56 = $r56, $r7 +;; + xord $r56 = $r56, $r48 +;; + xord $r56 = $r56, $r37 +;; + xord $r60 = $r56, $r39 + xord $r39 = $r10, $r39 +;; + xord $r56 = $r60, $r5 +;; + xord $r56 = $r56, $r54 +;; + xord $r56 = $r56, $r47 +;; + xord $r56 = $r56, $r6 +;; + xord $r56 = $r56, $r9 +;; + sd 184[$r49] = $r56 + xord $r56 = $r35, $r3 +;; + xord $r57 = $r48, $r56 + xord $r56 = $r47, $r6 +;; + xord $r57 = $r57, $r56 + xord $r56 = $r51, $r33 +;; + xord $r56 = $r57, $r56 + xord $r57 = $r7, $r35 +;; + xord $r35 = $r57, $r11 + xord $r57 = $r39, $r47 +;; + xord $r55 = $r35, $r55 + xord $r35 = $r57, $r15 + xord $r57 = $r34, $r8 + xord $r34 = $r41, $r34 +;; + xord $r39 = $r57, $r44 + xord $r57 = $r45, $r17 + xord $r58 = $r34, $r42 +;; + xord $r39 = $r39, $r56 + xord $r34 = $r58, $r4 +;; + sd 192[$r49] = $r39 + xord $r39 = $r57, $r43 +;; + xord $r39 = $r39, $r8 +;; + xord $r39 = $r39, $r52 +;; + xord $r57 = $r39, $r56 +;; + xord $r39 = $r57, $r55 +;; + sd 200[$r49] = $r39 + xord $r39 = $r2, $r4 +;; + xord $r39 = $r39, $r17 + xord $r17 = $r34, $r17 +;; + xord $r39 = $r39, $r1 +;; + xord $r57 = $r39, $r43 +;; + xord $r39 = $r57, $r46 +;; + xord $r39 = $r39, $r44 +;; + xord $r63 = $r39, $r55 +;; + xord $r57 = $r63, $r35 +;; + sd 208[$r49] = $r57 + xord $r57 = $r17, $r36 +;; + xord $r57 = $r57, $r1 +;; + xord $r57 = $r57, $r8 + xord $r8 = $r48, $r2 +;; + xord $r17 = $r57, $r38 + xord $r57 = $r37, $r45 +;; + xord $r59 = $r17, $r52 + xord $r62 = $r57, $r53 +;; + xord $r34 = $r59, $r44 + xord $r39 = $r62, $r42 +;; + xord $r44 = $r34, $r56 +;; + xord $r44 = $r44, $r35 +;; + sd 216[$r49] = $r44 + xord $r44 = $r39, $r4 +;; + xord $r4 = $r44, $r9 +;; + xord $r4 = $r4, $r36 +;; + xord $r57 = $r4, $r43 + xord $r43 = $r8, $r54 +;; + xord $r59 = $r57, $r40 + xord $r48 = $r43, $r53 +;; + xord $r34 = $r59, $r46 +;; + xord $r52 = $r34, $r52 +;; + xord $r52 = $r52, $r56 + xord $r56 = $r10, $r37 +;; + xord $r39 = $r52, $r55 + xord $r52 = $r48, $r42 +;; + sd 224[$r49] = $r39 + xord $r42 = $r52, $r6 +;; + xord $r17 = $r42, $r9 +;; + xord $r39 = $r17, $r1 +;; + xord $r48 = $r39, $r33 +;; + xord $r42 = $r48, $r38 +;; + xord $r17 = $r42, $r46 +;; + xord $r52 = $r17, $r55 + xord $r55 = $r7, $r41 +;; + xord $r17 = $r52, $r35 + xord $r34 = $r55, $r3 +;; + sd 232[$r49] = $r17 + xord $r44 = $r34, $r54 +;; + xord $r39 = $r44, $r53 +;; + xord $r60 = $r39, $r11 +;; + xord $r61 = $r60, $r6 +;; + xord $r8 = $r61, $r36 +;; + xord $r4 = $r8, $r51 +;; + xord $r39 = $r4, $r40 +;; + xord $r2 = $r39, $r38 +;; + xord $r2 = $r2, $r35 + xord $r35 = $r56, $r5 +;; + sd 240[$r49] = $r2 + xord $r62 = $r35, $r3 +;; + xord $r38 = $r62, $r54 +;; + xord $r48 = $r38, $r47 +;; + xord $r38 = $r48, $r11 +;; + xord $r11 = $r38, $r9 +;; + xord $r1 = $r11, $r15 +;; + xord $r55 = $r1, $r33 +;; + xord $r5 = $r55, $r40 +;; + sd 248[$r49] = $r5 + addd $r49 = $r49, 256 + cb.wnez $r32? .L114 +;; + addd $r0 = $r0, -1024 + addd $r1 = $r49, -1024 + make $r2, 1024 + call memmove +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 1040 +;; + ret +;; + .type bs_mixcolumns_rev, @function + .size bs_mixcolumns_rev, . - bs_mixcolumns_rev + .text + .balign 2 + .globl bs_expand_key +bs_expand_key: + addd $r17 = $r12, 0 + addd $r12 = $r12, -224 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + sd 16[$r12] = $r18 + addd $r18 = $r0, 0 + addd $r0 = $r12, 40 + make $r2, 16 +;; + sd 24[$r12] = $r19 +;; + sd 32[$r12] = $r20 + call memmove +;; + addd $r0 = $r12, 40 + call expand_key +;; + make $r20, 0 + make $r19, 0 +;; +.L115: + sxwd $r1 = $r20 + addd $r11 = $r12, 40 + sxwd $r15 = $r19 + make $r2, 16 +;; + slld $r34 = $r1, 10 + addd $r1 = $r11, $r15 +;; + addd $r0 = $r18, $r34 + call memmove +;; + make $r1, 2 +;; +.L116: + make $r35, 0 +;; +.L117: + addw $r38 = $r1, $r35 + sxwd $r37 = $r35 + addw $r35 = $r35, 1 + make $r32, 2 +;; + sxwd $r6 = $r20 + sxwd $r33 = $r38 + slld $r10 = $r37, 3 + compw.lt $r32 = $r35, $r32 +;; + slld $r3 = $r6, 10 + slld $r9 = $r33, 3 +;; + addd $r0 = $r18, $r3 +;; + addd $r39 = $r0, $r9 + addd $r8 = $r0, $r10 +;; + ld $r17 = 0[$r8] +;; + sd 0[$r39] = $r17 +;; + cb.wnez $r32? .L117 +;; + addw $r1 = $r1, 2 + make $r32, 128 +;; + compw.lt $r32 = $r1, $r32 +;; + cb.wnez $r32? .L116 +;; + call bs_transpose +;; + addw $r20 = $r20, 1 + addw $r19 = $r19, 16 + make $r32, 176 +;; + compw.lt $r32 = $r19, $r32 +;; + cb.wnez $r32? .L115 +;; + ld $r16 = 8[$r12] +;; + ld $r18 = 16[$r12] +;; + ld $r19 = 24[$r12] +;; + ld $r20 = 32[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 224 +;; + ret +;; + .type bs_expand_key, @function + .size bs_expand_key, . - bs_expand_key + .text + .balign 2 + .globl bs_cipher +bs_cipher: + addd $r17 = $r12, 0 + addd $r12 = $r12, -48 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + sd 16[$r12] = $r18 + addd $r18 = $r0, 0 +;; + sd 24[$r12] = $r19 + addd $r19 = $r1, 0 + addd $r0 = $r18, 0 +;; + sd 32[$r12] = $r20 + call bs_transpose +;; + addd $r1 = $r19, 0 + addd $r0 = $r18, 0 + call bs_addroundkey +;; + make $r20, 1 +;; +.L118: + addd $r0 = $r18, 0 + call bs_apply_sbox +;; + addd $r0 = $r18, 0 + call bs_shiftmix +;; + sxwd $r6 = $r20 + addd $r0 = $r18, 0 +;; + slld $r4 = $r6, 10 +;; + addd $r1 = $r19, $r4 + call bs_addroundkey +;; + addw $r20 = $r20, 1 + make $r32, 10 +;; + compw.lt $r32 = $r20, $r32 +;; + cb.wnez $r32? .L118 +;; + addd $r0 = $r18, 0 + call bs_apply_sbox +;; + addd $r0 = $r18, 0 + call bs_shiftrows +;; + addd $r1 = $r19, 10240 + addd $r0 = $r18, 0 + call bs_addroundkey +;; + addd $r0 = $r18, 0 +;; + ld $r18 = 16[$r12] +;; + ld $r19 = 24[$r12] +;; + ld $r20 = 32[$r12] +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 48 +;; + goto bs_transpose_rev +;; + .type bs_cipher, @function + .size bs_cipher, . - bs_cipher + .text + .balign 2 + .globl bs_cipher_rev +bs_cipher_rev: + addd $r17 = $r12, 0 + addd $r12 = $r12, -48 +;; + sd 0[$r12] = $r17 +;; +;; + get $r16 = $ra +;; + sd 8[$r12] = $r16 +;; + sd 16[$r12] = $r18 + addd $r18 = $r0, 0 +;; + sd 24[$r12] = $r19 + addd $r19 = $r1, 0 + addd $r0 = $r18, 0 +;; + sd 32[$r12] = $r20 + call bs_transpose +;; + addd $r1 = $r19, 10240 + addd $r0 = $r18, 0 + call bs_addroundkey +;; + make $r20, 9 +;; +.L119: + addd $r0 = $r18, 0 + call bs_shiftrows_rev +;; + addd $r0 = $r18, 0 + call bs_apply_sbox_rev +;; + sxwd $r5 = $r20 + addd $r0 = $r18, 0 +;; + slld $r8 = $r5, 10 +;; + addd $r1 = $r19, $r8 + call bs_addroundkey +;; + addd $r0 = $r18, 0 + call bs_mixcolumns_rev +;; + addw $r20 = $r20, -1 +;; + cb.wgtz $r20? .L119 +;; + addd $r0 = $r18, 0 + call bs_shiftrows_rev +;; + addd $r0 = $r18, 0 + call bs_apply_sbox_rev +;; + addd $r1 = $r19, 0 + addd $r0 = $r18, 0 + call bs_addroundkey +;; + addd $r0 = $r18, 0 +;; + ld $r18 = 16[$r12] +;; + ld $r19 = 24[$r12] +;; + ld $r20 = 32[$r12] +;; + ld $r16 = 8[$r12] +;; + set $ra = $r16 +;; + addd $r12 = $r12, 48 +;; + goto bs_transpose_rev +;; + .type bs_cipher_rev, @function + .size bs_cipher_rev, . - bs_cipher_rev diff --git a/test/monniaux/bitsliced-aes/notes.txt b/test/monniaux/bitsliced-aes/notes.txt index 815d5931..7ad2ff3c 100644 --- a/test/monniaux/bitsliced-aes/notes.txt +++ b/test/monniaux/bitsliced-aes/notes.txt @@ -49,3 +49,6 @@ cycles: 1849125 ==> test.gcc.k1c.out <== cycles: 5255763 + +* hand optimized loads +cycles: 6027072 |