From 35748654f39a99c226cd14f3b92822eb64bd7037 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 1 Dec 2013 07:40:27 +0100 Subject: Add fast SSE2 implementations "xmm" for Salsa20 and Salsa20/12 Public Domain implementations by D. J. Bernstein, see http://cr.yp.to/snuffle.html --- .../cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s | 4823 ++++++++++++++++++++ 1 file changed, 4823 insertions(+) create mode 100644 src/crypto/cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s (limited to 'src/crypto/cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s') diff --git a/src/crypto/cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s b/src/crypto/cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s new file mode 100644 index 0000000..0e26dc9 --- /dev/null +++ b/src/crypto/cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s @@ -0,0 +1,4823 @@ + +# qhasm: int64 r11_caller + +# qhasm: int64 r12_caller + +# qhasm: int64 r13_caller + +# qhasm: int64 r14_caller + +# qhasm: int64 r15_caller + +# qhasm: int64 rbx_caller + +# qhasm: int64 rbp_caller + +# qhasm: caller r11_caller + +# qhasm: caller r12_caller + +# qhasm: caller r13_caller + +# qhasm: caller r14_caller + +# qhasm: caller r15_caller + +# qhasm: caller rbx_caller + +# qhasm: caller rbp_caller + +# qhasm: stack64 r11_stack + +# qhasm: stack64 r12_stack + +# qhasm: stack64 r13_stack + +# qhasm: stack64 r14_stack + +# qhasm: stack64 r15_stack + +# qhasm: stack64 rbx_stack + +# qhasm: stack64 rbp_stack + +# qhasm: int64 a + +# qhasm: int64 arg1 + +# qhasm: int64 arg2 + +# qhasm: int64 arg3 + +# qhasm: int64 arg4 + +# qhasm: int64 arg5 + +# qhasm: input arg1 + +# qhasm: input arg2 + +# qhasm: input arg3 + +# qhasm: input arg4 + +# qhasm: input arg5 + +# qhasm: int64 k + +# qhasm: int64 kbits + +# qhasm: int64 iv + +# qhasm: int64 i + +# qhasm: stack128 x0 + +# qhasm: stack128 x1 + +# qhasm: stack128 x2 + +# qhasm: stack128 x3 + +# qhasm: int64 m + +# qhasm: int64 out + +# qhasm: int64 bytes + +# qhasm: stack32 eax_stack + +# qhasm: stack32 ebx_stack + +# qhasm: stack32 esi_stack + +# qhasm: stack32 edi_stack + +# qhasm: stack32 ebp_stack + +# qhasm: int6464 diag0 + +# qhasm: int6464 diag1 + +# qhasm: int6464 diag2 + +# qhasm: int6464 diag3 + +# qhasm: int6464 a0 + +# qhasm: int6464 a1 + +# qhasm: int6464 a2 + +# qhasm: int6464 a3 + +# qhasm: int6464 a4 + +# qhasm: int6464 a5 + +# qhasm: int6464 a6 + +# qhasm: int6464 a7 + +# qhasm: int6464 b0 + +# qhasm: int6464 b1 + +# qhasm: int6464 b2 + +# qhasm: int6464 b3 + +# qhasm: int6464 b4 + +# qhasm: int6464 b5 + +# qhasm: int6464 b6 + +# qhasm: int6464 b7 + +# qhasm: int6464 z0 + +# qhasm: int6464 z1 + +# qhasm: int6464 z2 + +# qhasm: int6464 z3 + +# qhasm: int6464 z4 + +# qhasm: int6464 z5 + +# qhasm: int6464 z6 + +# qhasm: int6464 z7 + +# qhasm: int6464 z8 + +# qhasm: int6464 z9 + +# qhasm: int6464 z10 + +# qhasm: int6464 z11 + +# qhasm: int6464 z12 + +# qhasm: int6464 z13 + +# qhasm: int6464 z14 + +# qhasm: int6464 z15 + +# qhasm: stack128 z0_stack + +# qhasm: stack128 z1_stack + +# qhasm: stack128 z2_stack + +# qhasm: stack128 z3_stack + +# qhasm: stack128 z4_stack + +# qhasm: stack128 z5_stack + +# qhasm: stack128 z6_stack + +# qhasm: stack128 z7_stack + +# qhasm: stack128 z8_stack + +# qhasm: stack128 z9_stack + +# qhasm: stack128 z10_stack + +# qhasm: stack128 z11_stack + +# qhasm: stack128 z12_stack + +# qhasm: stack128 z13_stack + +# qhasm: stack128 z14_stack + +# qhasm: stack128 z15_stack + +# qhasm: int6464 y0 + +# qhasm: int6464 y1 + +# qhasm: int6464 y2 + +# qhasm: int6464 y3 + +# qhasm: int6464 y4 + +# qhasm: int6464 y5 + +# qhasm: int6464 y6 + +# qhasm: int6464 y7 + +# qhasm: int6464 y8 + +# qhasm: int6464 y9 + +# qhasm: int6464 y10 + +# qhasm: int6464 y11 + +# qhasm: int6464 y12 + +# qhasm: int6464 y13 + +# qhasm: int6464 y14 + +# qhasm: int6464 y15 + +# qhasm: int6464 r0 + +# qhasm: int6464 r1 + +# qhasm: int6464 r2 + +# qhasm: int6464 r3 + +# qhasm: int6464 r4 + +# qhasm: int6464 r5 + +# qhasm: int6464 r6 + +# qhasm: int6464 r7 + +# qhasm: int6464 r8 + +# qhasm: int6464 r9 + +# qhasm: int6464 r10 + +# qhasm: int6464 r11 + +# qhasm: int6464 r12 + +# qhasm: int6464 r13 + +# qhasm: int6464 r14 + +# qhasm: int6464 r15 + +# qhasm: stack128 orig0 + +# qhasm: stack128 orig1 + +# qhasm: stack128 orig2 + +# qhasm: stack128 orig3 + +# qhasm: stack128 orig4 + +# qhasm: stack128 orig5 + +# qhasm: stack128 orig6 + +# qhasm: stack128 orig7 + +# qhasm: stack128 orig8 + +# qhasm: stack128 orig9 + +# qhasm: stack128 orig10 + +# qhasm: stack128 orig11 + +# qhasm: stack128 orig12 + +# qhasm: stack128 orig13 + +# qhasm: stack128 orig14 + +# qhasm: stack128 orig15 + +# qhasm: int64 in0 + +# qhasm: int64 in1 + +# qhasm: int64 in2 + +# qhasm: int64 in3 + +# qhasm: int64 in4 + +# qhasm: int64 in5 + +# qhasm: int64 in6 + +# qhasm: int64 in7 + +# qhasm: int64 in8 + +# qhasm: int64 in9 + +# qhasm: int64 in10 + +# qhasm: int64 in11 + +# qhasm: int64 in12 + +# qhasm: int64 in13 + +# qhasm: int64 in14 + +# qhasm: int64 in15 + +# qhasm: stack512 tmp + +# qhasm: int64 ctarget + +# qhasm: stack64 bytes_backup + +# qhasm: enter crypto_stream_salsa2012_amd64_xmm6 +.text +.p2align 5 +.globl _crypto_stream_salsa2012_amd64_xmm6 +.globl crypto_stream_salsa2012_amd64_xmm6 +_crypto_stream_salsa2012_amd64_xmm6: +crypto_stream_salsa2012_amd64_xmm6: +mov %rsp,%r11 +and $31,%r11 +add $480,%r11 +sub %r11,%rsp + +# qhasm: r11_stack = r11_caller +# asm 1: movq r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: bytes = arg2 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rsi,%r9 + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: iv = arg3 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rdx,%rdx + +# qhasm: k = arg4 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %rcx,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done + +# qhasm: a = 0 +# asm 1: mov $0,>a=int64#7 +# asm 2: mov $0,>a=%rax +mov $0,%rax + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = a; --i } +rep stosb + +# qhasm: out -= bytes +# asm 1: sub r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = arg2 +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rsi,%rsi + +# qhasm: bytes = arg3 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rdx,%r9 + +# qhasm: iv = arg4 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rcx,%rdx + +# qhasm: k = arg5 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %r8,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: start: +._start: + +# qhasm: in12 = *(uint32 *) (k + 20) +# asm 1: movl 20(in12=int64#4d +# asm 2: movl 20(in12=%ecx +movl 20(%r10),%ecx + +# qhasm: in1 = *(uint32 *) (k + 0) +# asm 1: movl 0(in1=int64#5d +# asm 2: movl 0(in1=%r8d +movl 0(%r10),%r8d + +# qhasm: in6 = *(uint32 *) (iv + 0) +# asm 1: movl 0(in6=int64#7d +# asm 2: movl 0(in6=%eax +movl 0(%rdx),%eax + +# qhasm: in11 = *(uint32 *) (k + 16) +# asm 1: movl 16(in11=int64#9d +# asm 2: movl 16(in11=%r11d +movl 16(%r10),%r11d + +# qhasm: ((uint32 *)&x1)[0] = in12 +# asm 1: movl x1=stack128#1 +# asm 2: movl x1=0(%rsp) +movl %ecx,0(%rsp) + +# qhasm: ((uint32 *)&x1)[1] = in1 +# asm 1: movl in8=int64#4 +# asm 2: mov $0,>in8=%rcx +mov $0,%rcx + +# qhasm: in13 = *(uint32 *) (k + 24) +# asm 1: movl 24(in13=int64#5d +# asm 2: movl 24(in13=%r8d +movl 24(%r10),%r8d + +# qhasm: in2 = *(uint32 *) (k + 4) +# asm 1: movl 4(in2=int64#7d +# asm 2: movl 4(in2=%eax +movl 4(%r10),%eax + +# qhasm: in7 = *(uint32 *) (iv + 4) +# asm 1: movl 4(in7=int64#3d +# asm 2: movl 4(in7=%edx +movl 4(%rdx),%edx + +# qhasm: ((uint32 *)&x2)[0] = in8 +# asm 1: movl x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x2)[1] = in13 +# asm 1: movl in4=int64#3d +# asm 2: movl 12(in4=%edx +movl 12(%r10),%edx + +# qhasm: in9 = 0 +# asm 1: mov $0,>in9=int64#4 +# asm 2: mov $0,>in9=%rcx +mov $0,%rcx + +# qhasm: in14 = *(uint32 *) (k + 28) +# asm 1: movl 28(in14=int64#5d +# asm 2: movl 28(in14=%r8d +movl 28(%r10),%r8d + +# qhasm: in3 = *(uint32 *) (k + 8) +# asm 1: movl 8(in3=int64#7d +# asm 2: movl 8(in3=%eax +movl 8(%r10),%eax + +# qhasm: ((uint32 *)&x3)[0] = in4 +# asm 1: movl x3=stack128#3 +# asm 2: movl x3=32(%rsp) +movl %edx,32(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl in0=int64#3 +# asm 2: mov $1634760805,>in0=%rdx +mov $1634760805,%rdx + +# qhasm: in5 = 857760878 +# asm 1: mov $857760878,>in5=int64#4 +# asm 2: mov $857760878,>in5=%rcx +mov $857760878,%rcx + +# qhasm: in10 = 2036477234 +# asm 1: mov $2036477234,>in10=int64#5 +# asm 2: mov $2036477234,>in10=%r8 +mov $2036477234,%r8 + +# qhasm: in15 = 1797285236 +# asm 1: mov $1797285236,>in15=int64#7 +# asm 2: mov $1797285236,>in15=%rax +mov $1797285236,%rax + +# qhasm: ((uint32 *)&x0)[0] = in0 +# asm 1: movl x0=stack128#4 +# asm 2: movl x0=48(%rsp) +movl %edx,48(%rsp) + +# qhasm: ((uint32 *)&x0)[1] = in5 +# asm 1: movl z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: z5 = z0[1,1,1,1] +# asm 1: pshufd $0x55,z5=int6464#2 +# asm 2: pshufd $0x55,z5=%xmm1 +pshufd $0x55,%xmm0,%xmm1 + +# qhasm: z10 = z0[2,2,2,2] +# asm 1: pshufd $0xaa,z10=int6464#3 +# asm 2: pshufd $0xaa,z10=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z15 = z0[3,3,3,3] +# asm 1: pshufd $0xff,z15=int6464#4 +# asm 2: pshufd $0xff,z15=%xmm3 +pshufd $0xff,%xmm0,%xmm3 + +# qhasm: z0 = z0[0,0,0,0] +# asm 1: pshufd $0x00,z0=int6464#1 +# asm 2: pshufd $0x00,z0=%xmm0 +pshufd $0x00,%xmm0,%xmm0 + +# qhasm: orig5 = z5 +# asm 1: movdqa orig5=stack128#5 +# asm 2: movdqa orig5=64(%rsp) +movdqa %xmm1,64(%rsp) + +# qhasm: orig10 = z10 +# asm 1: movdqa orig10=stack128#6 +# asm 2: movdqa orig10=80(%rsp) +movdqa %xmm2,80(%rsp) + +# qhasm: orig15 = z15 +# asm 1: movdqa orig15=stack128#7 +# asm 2: movdqa orig15=96(%rsp) +movdqa %xmm3,96(%rsp) + +# qhasm: orig0 = z0 +# asm 1: movdqa orig0=stack128#8 +# asm 2: movdqa orig0=112(%rsp) +movdqa %xmm0,112(%rsp) + +# qhasm: z1 = x1 +# asm 1: movdqa z1=int6464#1 +# asm 2: movdqa z1=%xmm0 +movdqa 0(%rsp),%xmm0 + +# qhasm: z6 = z1[2,2,2,2] +# asm 1: pshufd $0xaa,z6=int6464#2 +# asm 2: pshufd $0xaa,z6=%xmm1 +pshufd $0xaa,%xmm0,%xmm1 + +# qhasm: z11 = z1[3,3,3,3] +# asm 1: pshufd $0xff,z11=int6464#3 +# asm 2: pshufd $0xff,z11=%xmm2 +pshufd $0xff,%xmm0,%xmm2 + +# qhasm: z12 = z1[0,0,0,0] +# asm 1: pshufd $0x00,z12=int6464#4 +# asm 2: pshufd $0x00,z12=%xmm3 +pshufd $0x00,%xmm0,%xmm3 + +# qhasm: z1 = z1[1,1,1,1] +# asm 1: pshufd $0x55,z1=int6464#1 +# asm 2: pshufd $0x55,z1=%xmm0 +pshufd $0x55,%xmm0,%xmm0 + +# qhasm: orig6 = z6 +# asm 1: movdqa orig6=stack128#9 +# asm 2: movdqa orig6=128(%rsp) +movdqa %xmm1,128(%rsp) + +# qhasm: orig11 = z11 +# asm 1: movdqa orig11=stack128#10 +# asm 2: movdqa orig11=144(%rsp) +movdqa %xmm2,144(%rsp) + +# qhasm: orig12 = z12 +# asm 1: movdqa orig12=stack128#11 +# asm 2: movdqa orig12=160(%rsp) +movdqa %xmm3,160(%rsp) + +# qhasm: orig1 = z1 +# asm 1: movdqa orig1=stack128#12 +# asm 2: movdqa orig1=176(%rsp) +movdqa %xmm0,176(%rsp) + +# qhasm: z2 = x2 +# asm 1: movdqa z2=int6464#1 +# asm 2: movdqa z2=%xmm0 +movdqa 16(%rsp),%xmm0 + +# qhasm: z7 = z2[3,3,3,3] +# asm 1: pshufd $0xff,z7=int6464#2 +# asm 2: pshufd $0xff,z7=%xmm1 +pshufd $0xff,%xmm0,%xmm1 + +# qhasm: z13 = z2[1,1,1,1] +# asm 1: pshufd $0x55,z13=int6464#3 +# asm 2: pshufd $0x55,z13=%xmm2 +pshufd $0x55,%xmm0,%xmm2 + +# qhasm: z2 = z2[2,2,2,2] +# asm 1: pshufd $0xaa,z2=int6464#1 +# asm 2: pshufd $0xaa,z2=%xmm0 +pshufd $0xaa,%xmm0,%xmm0 + +# qhasm: orig7 = z7 +# asm 1: movdqa orig7=stack128#13 +# asm 2: movdqa orig7=192(%rsp) +movdqa %xmm1,192(%rsp) + +# qhasm: orig13 = z13 +# asm 1: movdqa orig13=stack128#14 +# asm 2: movdqa orig13=208(%rsp) +movdqa %xmm2,208(%rsp) + +# qhasm: orig2 = z2 +# asm 1: movdqa orig2=stack128#15 +# asm 2: movdqa orig2=224(%rsp) +movdqa %xmm0,224(%rsp) + +# qhasm: z3 = x3 +# asm 1: movdqa z3=int6464#1 +# asm 2: movdqa z3=%xmm0 +movdqa 32(%rsp),%xmm0 + +# qhasm: z4 = z3[0,0,0,0] +# asm 1: pshufd $0x00,z4=int6464#2 +# asm 2: pshufd $0x00,z4=%xmm1 +pshufd $0x00,%xmm0,%xmm1 + +# qhasm: z14 = z3[2,2,2,2] +# asm 1: pshufd $0xaa,z14=int6464#3 +# asm 2: pshufd $0xaa,z14=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z3 = z3[3,3,3,3] +# asm 1: pshufd $0xff,z3=int6464#1 +# asm 2: pshufd $0xff,z3=%xmm0 +pshufd $0xff,%xmm0,%xmm0 + +# qhasm: orig4 = z4 +# asm 1: movdqa orig4=stack128#16 +# asm 2: movdqa orig4=240(%rsp) +movdqa %xmm1,240(%rsp) + +# qhasm: orig14 = z14 +# asm 1: movdqa orig14=stack128#17 +# asm 2: movdqa orig14=256(%rsp) +movdqa %xmm2,256(%rsp) + +# qhasm: orig3 = z3 +# asm 1: movdqa orig3=stack128#18 +# asm 2: movdqa orig3=272(%rsp) +movdqa %xmm0,272(%rsp) + +# qhasm: bytesatleast256: +._bytesatleast256: + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#3d +# asm 2: movl in8=%edx +movl 16(%rsp),%edx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#4d +# asm 2: movl 4+in9=%ecx +movl 4+32(%rsp),%ecx + +# qhasm: ((uint32 *) &orig8)[0] = in8 +# asm 1: movl orig8=stack128#19 +# asm 2: movl orig8=288(%rsp) +movl %edx,288(%rsp) + +# qhasm: ((uint32 *) &orig9)[0] = in9 +# asm 1: movl orig9=stack128#20 +# asm 2: movl orig9=304(%rsp) +movl %ecx,304(%rsp) + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %edx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: i = 12 +# asm 1: mov $12,>i=int64#3 +# asm 2: mov $12,>i=%rdx +mov $12,%rdx + +# qhasm: z5 = orig5 +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 64(%rsp),%xmm0 + +# qhasm: z10 = orig10 +# asm 1: movdqa z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 80(%rsp),%xmm1 + +# qhasm: z15 = orig15 +# asm 1: movdqa z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 96(%rsp),%xmm2 + +# qhasm: z14 = orig14 +# asm 1: movdqa z14=int6464#4 +# asm 2: movdqa z14=%xmm3 +movdqa 256(%rsp),%xmm3 + +# qhasm: z3 = orig3 +# asm 1: movdqa z3=int6464#5 +# asm 2: movdqa z3=%xmm4 +movdqa 272(%rsp),%xmm4 + +# qhasm: z6 = orig6 +# asm 1: movdqa z6=int6464#6 +# asm 2: movdqa z6=%xmm5 +movdqa 128(%rsp),%xmm5 + +# qhasm: z11 = orig11 +# asm 1: movdqa z11=int6464#7 +# asm 2: movdqa z11=%xmm6 +movdqa 144(%rsp),%xmm6 + +# qhasm: z1 = orig1 +# asm 1: movdqa z1=int6464#8 +# asm 2: movdqa z1=%xmm7 +movdqa 176(%rsp),%xmm7 + +# qhasm: z7 = orig7 +# asm 1: movdqa z7=int6464#9 +# asm 2: movdqa z7=%xmm8 +movdqa 192(%rsp),%xmm8 + +# qhasm: z13 = orig13 +# asm 1: movdqa z13=int6464#10 +# asm 2: movdqa z13=%xmm9 +movdqa 208(%rsp),%xmm9 + +# qhasm: z2 = orig2 +# asm 1: movdqa z2=int6464#11 +# asm 2: movdqa z2=%xmm10 +movdqa 224(%rsp),%xmm10 + +# qhasm: z9 = orig9 +# asm 1: movdqa z9=int6464#12 +# asm 2: movdqa z9=%xmm11 +movdqa 304(%rsp),%xmm11 + +# qhasm: z0 = orig0 +# asm 1: movdqa z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 112(%rsp),%xmm12 + +# qhasm: z12 = orig12 +# asm 1: movdqa z12=int6464#14 +# asm 2: movdqa z12=%xmm13 +movdqa 160(%rsp),%xmm13 + +# qhasm: z4 = orig4 +# asm 1: movdqa z4=int6464#15 +# asm 2: movdqa z4=%xmm14 +movdqa 240(%rsp),%xmm14 + +# qhasm: z8 = orig8 +# asm 1: movdqa z8=int6464#16 +# asm 2: movdqa z8=%xmm15 +movdqa 288(%rsp),%xmm15 + +# qhasm: mainloop1: +._mainloop1: + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y4 = z12 +# asm 1: movdqa y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: uint32323232 y4 += z0 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y9=int6464#2 +# asm 2: movdqa y9=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y9 += z5 +# asm 1: paddd r9=int6464#3 +# asm 2: movdqa r9=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y9 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#2 +# asm 2: movdqa y8=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y8 += z4 +# asm 1: paddd r8=int6464#3 +# asm 2: movdqa r8=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#2 +# asm 2: movdqa y13=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y13 += z9 +# asm 1: paddd r13=int6464#3 +# asm 2: movdqa r13=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y12=int6464#2 +# asm 2: movdqa y12=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y12 += z8 +# asm 1: paddd r12=int6464#3 +# asm 2: movdqa r12=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y12 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm11,%xmm1 + +# qhasm: uint32323232 y1 += z13 +# asm 1: paddd r1=int6464#3 +# asm 2: movdqa r1=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y1 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm15,%xmm1 + +# qhasm: uint32323232 y0 += z12 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm12,320(%rsp) + +# qhasm: y5 = z13 +# asm 1: movdqa y5=int6464#3 +# asm 2: movdqa y5=%xmm2 +movdqa %xmm9,%xmm2 + +# qhasm: uint32323232 y5 += z1 +# asm 1: paddd r5=int6464#13 +# asm 2: movdqa r5=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y14=int6464#3 +# asm 2: movdqa y14=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: uint32323232 y14 += z10 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y14 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm0,336(%rsp) + +# qhasm: y3 = z11 +# asm 1: movdqa y3=int6464#1 +# asm 2: movdqa y3=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y3 += z15 +# asm 1: paddd r3=int6464#13 +# asm 2: movdqa r3=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y3 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#1 +# asm 2: movdqa y2=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y2 += z14 +# asm 1: paddd r2=int6464#13 +# asm 2: movdqa r2=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#1 +# asm 2: movdqa y7=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y7 += z3 +# asm 1: paddd r7=int6464#13 +# asm 2: movdqa r7=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y6=int6464#1 +# asm 2: movdqa y6=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y6 += z2 +# asm 1: paddd r6=int6464#13 +# asm 2: movdqa r6=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 y11 += z7 +# asm 1: paddd r11=int6464#13 +# asm 2: movdqa r11=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y11 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm10,%xmm0 + +# qhasm: uint32323232 y10 += z6 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 320(%rsp),%xmm0 + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: y1 = z3 +# asm 1: movdqa y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm4,%xmm1 + +# qhasm: uint32323232 y1 += z0 +# asm 1: paddd r1=int6464#13 +# asm 2: movdqa r1=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y1 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y15=int6464#2 +# asm 2: movdqa y15=%xmm1 +movdqa %xmm8,%xmm1 + +# qhasm: uint32323232 y15 += z11 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z5=int6464#13 +# asm 2: movdqa z5=%xmm12 +movdqa 336(%rsp),%xmm12 + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y6 = z4 +# asm 1: movdqa y6=int6464#2 +# asm 2: movdqa y6=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y6 += z5 +# asm 1: paddd r6=int6464#3 +# asm 2: movdqa r6=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y6 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#2 +# asm 2: movdqa y2=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y2 += z1 +# asm 1: paddd r2=int6464#3 +# asm 2: movdqa r2=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#2 +# asm 2: movdqa y7=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y7 += z6 +# asm 1: paddd r7=int6464#3 +# asm 2: movdqa r7=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y3=int6464#2 +# asm 2: movdqa y3=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y3 += z2 +# asm 1: paddd r3=int6464#3 +# asm 2: movdqa r3=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y3 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm5,%xmm1 + +# qhasm: uint32323232 y4 += z7 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm10,%xmm1 + +# qhasm: uint32323232 y0 += z3 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm0,320(%rsp) + +# qhasm: y5 = z7 +# asm 1: movdqa y5=int6464#1 +# asm 2: movdqa y5=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint32323232 y5 += z4 +# asm 1: paddd r5=int6464#3 +# asm 2: movdqa r5=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm11,%xmm0 + +# qhasm: uint32323232 y11 += z10 +# asm 1: paddd r11=int6464#3 +# asm 2: movdqa r11=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y11 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm12,336(%rsp) + +# qhasm: y12 = z14 +# asm 1: movdqa y12=int6464#1 +# asm 2: movdqa y12=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y12 += z15 +# asm 1: paddd r12=int6464#13 +# asm 2: movdqa r12=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y12 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#1 +# asm 2: movdqa y8=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y8 += z11 +# asm 1: paddd r8=int6464#13 +# asm 2: movdqa r8=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#1 +# asm 2: movdqa y13=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y13 += z12 +# asm 1: paddd r13=int6464#13 +# asm 2: movdqa r13=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y9=int6464#1 +# asm 2: movdqa y9=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y9 += z8 +# asm 1: paddd r9=int6464#13 +# asm 2: movdqa r9=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y9 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y14=int6464#1 +# asm 2: movdqa y14=%xmm0 +movdqa %xmm13,%xmm0 + +# qhasm: uint32323232 y14 += z13 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y14 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: uint32323232 y10 += z9 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y15=int6464#1 +# asm 2: movdqa y15=%xmm0 +movdqa %xmm9,%xmm0 + +# qhasm: uint32323232 y15 += z14 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 320(%rsp),%xmm12 + +# qhasm: z5 = z5_stack +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 336(%rsp),%xmm0 + +# qhasm: unsigned>? i -= 2 +# asm 1: sub $2, +ja ._mainloop1 + +# qhasm: uint32323232 z0 += orig0 +# asm 1: paddd in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: (uint32) in0 ^= *(uint32 *) (m + 192) +# asm 1: xorl 192(in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: (uint32) in4 ^= *(uint32 *) (m + 208) +# asm 1: xorl 208(in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: (uint32) in8 ^= *(uint32 *) (m + 224) +# asm 1: xorl 224(in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: (uint32) in12 ^= *(uint32 *) (m + 240) +# asm 1: xorl 240(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: bytes -= 256 +# asm 1: sub $256,? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesbetween1and255: +._bytesbetween1and255: + +# qhasm: unsignedctarget=int64#3 +# asm 2: mov ctarget=%rdx +mov %rdi,%rdx + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: m = &tmp +# asm 1: leaq m=int64#2 +# asm 2: leaq m=%rsi +leaq 416(%rsp),%rsi +# comment:fp stack unchanged by fallthrough + +# qhasm: nocopy: +._nocopy: + +# qhasm: bytes_backup = bytes +# asm 1: movq bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: diag0 = x0 +# asm 1: movdqa diag0=int6464#1 +# asm 2: movdqa diag0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: diag1 = x1 +# asm 1: movdqa diag1=int6464#2 +# asm 2: movdqa diag1=%xmm1 +movdqa 0(%rsp),%xmm1 + +# qhasm: diag2 = x2 +# asm 1: movdqa diag2=int6464#3 +# asm 2: movdqa diag2=%xmm2 +movdqa 16(%rsp),%xmm2 + +# qhasm: diag3 = x3 +# asm 1: movdqa diag3=int6464#4 +# asm 2: movdqa diag3=%xmm3 +movdqa 32(%rsp),%xmm3 + +# qhasm: a0 = diag1 +# asm 1: movdqa a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: i = 12 +# asm 1: mov $12,>i=int64#4 +# asm 2: mov $12,>i=%rcx +mov $12,%rcx + +# qhasm: mainloop2: +._mainloop2: + +# qhasm: uint32323232 a0 += diag0 +# asm 1: paddd a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,? i -= 4 +# asm 1: sub $4,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 +# asm 2: pxor >b0=%xmm7,>b0=%xmm7 +pxor %xmm7,%xmm7 + +# qhasm: uint32323232 b7 >>= 14 +# asm 1: psrld $14, +ja ._mainloop2 + +# qhasm: uint32323232 diag0 += x0 +# asm 1: paddd in0=int64#4 +# asm 2: movd in0=%rcx +movd %xmm0,%rcx + +# qhasm: in12 = diag1 +# asm 1: movd in12=int64#5 +# asm 2: movd in12=%r8 +movd %xmm1,%r8 + +# qhasm: in8 = diag2 +# asm 1: movd in8=int64#6 +# asm 2: movd in8=%r9 +movd %xmm2,%r9 + +# qhasm: in4 = diag3 +# asm 1: movd in4=int64#7 +# asm 2: movd in4=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in1 = diag1 +# asm 1: movd in1=int64#5 +# asm 2: movd in1=%r8 +movd %xmm1,%r8 + +# qhasm: in13 = diag2 +# asm 1: movd in13=int64#6 +# asm 2: movd in13=%r9 +movd %xmm2,%r9 + +# qhasm: in9 = diag3 +# asm 1: movd in9=int64#7 +# asm 2: movd in9=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in10=int64#4 +# asm 2: movd in10=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = diag1 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm1,%r8 + +# qhasm: in2 = diag2 +# asm 1: movd in2=int64#6 +# asm 2: movd in2=%r9 +movd %xmm2,%r9 + +# qhasm: in14 = diag3 +# asm 1: movd in14=int64#7 +# asm 2: movd in14=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in15=int64#4 +# asm 2: movd in15=%rcx +movd %xmm0,%rcx + +# qhasm: in11 = diag1 +# asm 1: movd in11=int64#5 +# asm 2: movd in11=%r8 +movd %xmm1,%r8 + +# qhasm: in7 = diag2 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm2,%r9 + +# qhasm: in3 = diag3 +# asm 1: movd in3=int64#7 +# asm 2: movd in3=%rax +movd %xmm3,%rax + +# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60) +# asm 1: xorl 60(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#4d +# asm 2: movl in8=%ecx +movl 16(%rsp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#5d +# asm 2: movl 4+in9=%r8d +movl 4+32(%rsp),%r8d + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#5 +# asm 2: mov in9=%r8 +mov %rcx,%r8 + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl ? unsigned +ja ._bytesatleast65 +# comment:fp stack unchanged by jump + +# qhasm: goto bytesatleast64 if !unsigned< +jae ._bytesatleast64 + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: out = ctarget +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdx,%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesatleast64: +._bytesatleast64: +# comment:fp stack unchanged by fallthrough + +# qhasm: done: +._done: + +# qhasm: r11_caller = r11_stack +# asm 1: movq r11_caller=int64#9 +# asm 2: movq r11_caller=%r11 +movq 352(%rsp),%r11 + +# qhasm: r12_caller = r12_stack +# asm 1: movq r12_caller=int64#10 +# asm 2: movq r12_caller=%r12 +movq 360(%rsp),%r12 + +# qhasm: r13_caller = r13_stack +# asm 1: movq r13_caller=int64#11 +# asm 2: movq r13_caller=%r13 +movq 368(%rsp),%r13 + +# qhasm: r14_caller = r14_stack +# asm 1: movq r14_caller=int64#12 +# asm 2: movq r14_caller=%r14 +movq 376(%rsp),%r14 + +# qhasm: r15_caller = r15_stack +# asm 1: movq r15_caller=int64#13 +# asm 2: movq r15_caller=%r15 +movq 384(%rsp),%r15 + +# qhasm: rbx_caller = rbx_stack +# asm 1: movq rbx_caller=int64#14 +# asm 2: movq rbx_caller=%rbx +movq 392(%rsp),%rbx + +# qhasm: rbp_caller = rbp_stack +# asm 1: movq rbp_caller=int64#15 +# asm 2: movq rbp_caller=%rbp +movq 400(%rsp),%rbp + +# qhasm: leave +add %r11,%rsp +xor %rax,%rax +xor %rdx,%rdx +ret + +# qhasm: bytesatleast65: +._bytesatleast65: + +# qhasm: bytes -= 64 +# asm 1: sub $64,