From 35748654f39a99c226cd14f3b92822eb64bd7037 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sun, 1 Dec 2013 07:40:27 +0100 Subject: Add fast SSE2 implementations "xmm" for Salsa20 and Salsa20/12 Public Domain implementations by D. J. Bernstein, see http://cr.yp.to/snuffle.html --- src/crypto/cipher/salsa2012/CMakeLists.txt | 1 + src/crypto/cipher/salsa2012/xmm/CMakeLists.txt | 13 + .../cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s | 4823 +++++++++++++++++++ .../cipher/salsa2012/xmm/salsa2012_x86_xmm5.s | 5078 ++++++++++++++++++++ src/crypto/cipher/salsa2012/xmm/salsa2012_xmm.c | 84 + 5 files changed, 9999 insertions(+) create mode 100644 src/crypto/cipher/salsa2012/xmm/CMakeLists.txt create mode 100644 src/crypto/cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s create mode 100644 src/crypto/cipher/salsa2012/xmm/salsa2012_x86_xmm5.s create mode 100644 src/crypto/cipher/salsa2012/xmm/salsa2012_xmm.c (limited to 'src/crypto/cipher/salsa2012') diff --git a/src/crypto/cipher/salsa2012/CMakeLists.txt b/src/crypto/cipher/salsa2012/CMakeLists.txt index eb853e0..4821f41 100644 --- a/src/crypto/cipher/salsa2012/CMakeLists.txt +++ b/src/crypto/cipher/salsa2012/CMakeLists.txt @@ -1,2 +1,3 @@ fastd_cipher(salsa2012 salsa2012.c) +add_subdirectory(xmm) add_subdirectory(nacl) diff --git a/src/crypto/cipher/salsa2012/xmm/CMakeLists.txt b/src/crypto/cipher/salsa2012/xmm/CMakeLists.txt new file mode 100644 index 0000000..9cb5222 --- /dev/null +++ b/src/crypto/cipher/salsa2012/xmm/CMakeLists.txt @@ -0,0 +1,13 @@ +if(ARCH_X86_64) + fastd_cipher_impl(salsa2012 xmm + salsa2012_xmm.c + salsa2012_amd64_xmm6.s + ) +endif(ARCH_X86_64) + +if(ARCH_X86) + fastd_cipher_impl(salsa2012 xmm + salsa2012_xmm.c + salsa2012_x86_xmm5.s + ) +endif(ARCH_X86) diff --git a/src/crypto/cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s b/src/crypto/cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s new file mode 100644 index 0000000..0e26dc9 --- /dev/null +++ b/src/crypto/cipher/salsa2012/xmm/salsa2012_amd64_xmm6.s @@ -0,0 +1,4823 @@ + +# qhasm: int64 r11_caller + +# qhasm: int64 r12_caller + +# qhasm: int64 r13_caller + +# qhasm: int64 r14_caller + +# qhasm: int64 r15_caller + +# qhasm: int64 rbx_caller + +# qhasm: int64 rbp_caller + +# qhasm: caller r11_caller + +# qhasm: caller r12_caller + +# qhasm: caller r13_caller + +# qhasm: caller r14_caller + +# qhasm: caller r15_caller + +# qhasm: caller rbx_caller + +# qhasm: caller rbp_caller + +# qhasm: stack64 r11_stack + +# qhasm: stack64 r12_stack + +# qhasm: stack64 r13_stack + +# qhasm: stack64 r14_stack + +# qhasm: stack64 r15_stack + +# qhasm: stack64 rbx_stack + +# qhasm: stack64 rbp_stack + +# qhasm: int64 a + +# qhasm: int64 arg1 + +# qhasm: int64 arg2 + +# qhasm: int64 arg3 + +# qhasm: int64 arg4 + +# qhasm: int64 arg5 + +# qhasm: input arg1 + +# qhasm: input arg2 + +# qhasm: input arg3 + +# qhasm: input arg4 + +# qhasm: input arg5 + +# qhasm: int64 k + +# qhasm: int64 kbits + +# qhasm: int64 iv + +# qhasm: int64 i + +# qhasm: stack128 x0 + +# qhasm: stack128 x1 + +# qhasm: stack128 x2 + +# qhasm: stack128 x3 + +# qhasm: int64 m + +# qhasm: int64 out + +# qhasm: int64 bytes + +# qhasm: stack32 eax_stack + +# qhasm: stack32 ebx_stack + +# qhasm: stack32 esi_stack + +# qhasm: stack32 edi_stack + +# qhasm: stack32 ebp_stack + +# qhasm: int6464 diag0 + +# qhasm: int6464 diag1 + +# qhasm: int6464 diag2 + +# qhasm: int6464 diag3 + +# qhasm: int6464 a0 + +# qhasm: int6464 a1 + +# qhasm: int6464 a2 + +# qhasm: int6464 a3 + +# qhasm: int6464 a4 + +# qhasm: int6464 a5 + +# qhasm: int6464 a6 + +# qhasm: int6464 a7 + +# qhasm: int6464 b0 + +# qhasm: int6464 b1 + +# qhasm: int6464 b2 + +# qhasm: int6464 b3 + +# qhasm: int6464 b4 + +# qhasm: int6464 b5 + +# qhasm: int6464 b6 + +# qhasm: int6464 b7 + +# qhasm: int6464 z0 + +# qhasm: int6464 z1 + +# qhasm: int6464 z2 + +# qhasm: int6464 z3 + +# qhasm: int6464 z4 + +# qhasm: int6464 z5 + +# qhasm: int6464 z6 + +# qhasm: int6464 z7 + +# qhasm: int6464 z8 + +# qhasm: int6464 z9 + +# qhasm: int6464 z10 + +# qhasm: int6464 z11 + +# qhasm: int6464 z12 + +# qhasm: int6464 z13 + +# qhasm: int6464 z14 + +# qhasm: int6464 z15 + +# qhasm: stack128 z0_stack + +# qhasm: stack128 z1_stack + +# qhasm: stack128 z2_stack + +# qhasm: stack128 z3_stack + +# qhasm: stack128 z4_stack + +# qhasm: stack128 z5_stack + +# qhasm: stack128 z6_stack + +# qhasm: stack128 z7_stack + +# qhasm: stack128 z8_stack + +# qhasm: stack128 z9_stack + +# qhasm: stack128 z10_stack + +# qhasm: stack128 z11_stack + +# qhasm: stack128 z12_stack + +# qhasm: stack128 z13_stack + +# qhasm: stack128 z14_stack + +# qhasm: stack128 z15_stack + +# qhasm: int6464 y0 + +# qhasm: int6464 y1 + +# qhasm: int6464 y2 + +# qhasm: int6464 y3 + +# qhasm: int6464 y4 + +# qhasm: int6464 y5 + +# qhasm: int6464 y6 + +# qhasm: int6464 y7 + +# qhasm: int6464 y8 + +# qhasm: int6464 y9 + +# qhasm: int6464 y10 + +# qhasm: int6464 y11 + +# qhasm: int6464 y12 + +# qhasm: int6464 y13 + +# qhasm: int6464 y14 + +# qhasm: int6464 y15 + +# qhasm: int6464 r0 + +# qhasm: int6464 r1 + +# qhasm: int6464 r2 + +# qhasm: int6464 r3 + +# qhasm: int6464 r4 + +# qhasm: int6464 r5 + +# qhasm: int6464 r6 + +# qhasm: int6464 r7 + +# qhasm: int6464 r8 + +# qhasm: int6464 r9 + +# qhasm: int6464 r10 + +# qhasm: int6464 r11 + +# qhasm: int6464 r12 + +# qhasm: int6464 r13 + +# qhasm: int6464 r14 + +# qhasm: int6464 r15 + +# qhasm: stack128 orig0 + +# qhasm: stack128 orig1 + +# qhasm: stack128 orig2 + +# qhasm: stack128 orig3 + +# qhasm: stack128 orig4 + +# qhasm: stack128 orig5 + +# qhasm: stack128 orig6 + +# qhasm: stack128 orig7 + +# qhasm: stack128 orig8 + +# qhasm: stack128 orig9 + +# qhasm: stack128 orig10 + +# qhasm: stack128 orig11 + +# qhasm: stack128 orig12 + +# qhasm: stack128 orig13 + +# qhasm: stack128 orig14 + +# qhasm: stack128 orig15 + +# qhasm: int64 in0 + +# qhasm: int64 in1 + +# qhasm: int64 in2 + +# qhasm: int64 in3 + +# qhasm: int64 in4 + +# qhasm: int64 in5 + +# qhasm: int64 in6 + +# qhasm: int64 in7 + +# qhasm: int64 in8 + +# qhasm: int64 in9 + +# qhasm: int64 in10 + +# qhasm: int64 in11 + +# qhasm: int64 in12 + +# qhasm: int64 in13 + +# qhasm: int64 in14 + +# qhasm: int64 in15 + +# qhasm: stack512 tmp + +# qhasm: int64 ctarget + +# qhasm: stack64 bytes_backup + +# qhasm: enter crypto_stream_salsa2012_amd64_xmm6 +.text +.p2align 5 +.globl _crypto_stream_salsa2012_amd64_xmm6 +.globl crypto_stream_salsa2012_amd64_xmm6 +_crypto_stream_salsa2012_amd64_xmm6: +crypto_stream_salsa2012_amd64_xmm6: +mov %rsp,%r11 +and $31,%r11 +add $480,%r11 +sub %r11,%rsp + +# qhasm: r11_stack = r11_caller +# asm 1: movq r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: bytes = arg2 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rsi,%r9 + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: iv = arg3 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rdx,%rdx + +# qhasm: k = arg4 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %rcx,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done + +# qhasm: a = 0 +# asm 1: mov $0,>a=int64#7 +# asm 2: mov $0,>a=%rax +mov $0,%rax + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = a; --i } +rep stosb + +# qhasm: out -= bytes +# asm 1: sub r11_stack=stack64#1 +# asm 2: movq r11_stack=352(%rsp) +movq %r11,352(%rsp) + +# qhasm: r12_stack = r12_caller +# asm 1: movq r12_stack=stack64#2 +# asm 2: movq r12_stack=360(%rsp) +movq %r12,360(%rsp) + +# qhasm: r13_stack = r13_caller +# asm 1: movq r13_stack=stack64#3 +# asm 2: movq r13_stack=368(%rsp) +movq %r13,368(%rsp) + +# qhasm: r14_stack = r14_caller +# asm 1: movq r14_stack=stack64#4 +# asm 2: movq r14_stack=376(%rsp) +movq %r14,376(%rsp) + +# qhasm: r15_stack = r15_caller +# asm 1: movq r15_stack=stack64#5 +# asm 2: movq r15_stack=384(%rsp) +movq %r15,384(%rsp) + +# qhasm: rbx_stack = rbx_caller +# asm 1: movq rbx_stack=stack64#6 +# asm 2: movq rbx_stack=392(%rsp) +movq %rbx,392(%rsp) + +# qhasm: rbp_stack = rbp_caller +# asm 1: movq rbp_stack=stack64#7 +# asm 2: movq rbp_stack=400(%rsp) +movq %rbp,400(%rsp) + +# qhasm: out = arg1 +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdi,%rdi + +# qhasm: m = arg2 +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rsi,%rsi + +# qhasm: bytes = arg3 +# asm 1: mov bytes=int64#6 +# asm 2: mov bytes=%r9 +mov %rdx,%r9 + +# qhasm: iv = arg4 +# asm 1: mov iv=int64#3 +# asm 2: mov iv=%rdx +mov %rcx,%rdx + +# qhasm: k = arg5 +# asm 1: mov k=int64#8 +# asm 2: mov k=%r10 +mov %r8,%r10 + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: start: +._start: + +# qhasm: in12 = *(uint32 *) (k + 20) +# asm 1: movl 20(in12=int64#4d +# asm 2: movl 20(in12=%ecx +movl 20(%r10),%ecx + +# qhasm: in1 = *(uint32 *) (k + 0) +# asm 1: movl 0(in1=int64#5d +# asm 2: movl 0(in1=%r8d +movl 0(%r10),%r8d + +# qhasm: in6 = *(uint32 *) (iv + 0) +# asm 1: movl 0(in6=int64#7d +# asm 2: movl 0(in6=%eax +movl 0(%rdx),%eax + +# qhasm: in11 = *(uint32 *) (k + 16) +# asm 1: movl 16(in11=int64#9d +# asm 2: movl 16(in11=%r11d +movl 16(%r10),%r11d + +# qhasm: ((uint32 *)&x1)[0] = in12 +# asm 1: movl x1=stack128#1 +# asm 2: movl x1=0(%rsp) +movl %ecx,0(%rsp) + +# qhasm: ((uint32 *)&x1)[1] = in1 +# asm 1: movl in8=int64#4 +# asm 2: mov $0,>in8=%rcx +mov $0,%rcx + +# qhasm: in13 = *(uint32 *) (k + 24) +# asm 1: movl 24(in13=int64#5d +# asm 2: movl 24(in13=%r8d +movl 24(%r10),%r8d + +# qhasm: in2 = *(uint32 *) (k + 4) +# asm 1: movl 4(in2=int64#7d +# asm 2: movl 4(in2=%eax +movl 4(%r10),%eax + +# qhasm: in7 = *(uint32 *) (iv + 4) +# asm 1: movl 4(in7=int64#3d +# asm 2: movl 4(in7=%edx +movl 4(%rdx),%edx + +# qhasm: ((uint32 *)&x2)[0] = in8 +# asm 1: movl x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x2)[1] = in13 +# asm 1: movl in4=int64#3d +# asm 2: movl 12(in4=%edx +movl 12(%r10),%edx + +# qhasm: in9 = 0 +# asm 1: mov $0,>in9=int64#4 +# asm 2: mov $0,>in9=%rcx +mov $0,%rcx + +# qhasm: in14 = *(uint32 *) (k + 28) +# asm 1: movl 28(in14=int64#5d +# asm 2: movl 28(in14=%r8d +movl 28(%r10),%r8d + +# qhasm: in3 = *(uint32 *) (k + 8) +# asm 1: movl 8(in3=int64#7d +# asm 2: movl 8(in3=%eax +movl 8(%r10),%eax + +# qhasm: ((uint32 *)&x3)[0] = in4 +# asm 1: movl x3=stack128#3 +# asm 2: movl x3=32(%rsp) +movl %edx,32(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl in0=int64#3 +# asm 2: mov $1634760805,>in0=%rdx +mov $1634760805,%rdx + +# qhasm: in5 = 857760878 +# asm 1: mov $857760878,>in5=int64#4 +# asm 2: mov $857760878,>in5=%rcx +mov $857760878,%rcx + +# qhasm: in10 = 2036477234 +# asm 1: mov $2036477234,>in10=int64#5 +# asm 2: mov $2036477234,>in10=%r8 +mov $2036477234,%r8 + +# qhasm: in15 = 1797285236 +# asm 1: mov $1797285236,>in15=int64#7 +# asm 2: mov $1797285236,>in15=%rax +mov $1797285236,%rax + +# qhasm: ((uint32 *)&x0)[0] = in0 +# asm 1: movl x0=stack128#4 +# asm 2: movl x0=48(%rsp) +movl %edx,48(%rsp) + +# qhasm: ((uint32 *)&x0)[1] = in5 +# asm 1: movl z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: z5 = z0[1,1,1,1] +# asm 1: pshufd $0x55,z5=int6464#2 +# asm 2: pshufd $0x55,z5=%xmm1 +pshufd $0x55,%xmm0,%xmm1 + +# qhasm: z10 = z0[2,2,2,2] +# asm 1: pshufd $0xaa,z10=int6464#3 +# asm 2: pshufd $0xaa,z10=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z15 = z0[3,3,3,3] +# asm 1: pshufd $0xff,z15=int6464#4 +# asm 2: pshufd $0xff,z15=%xmm3 +pshufd $0xff,%xmm0,%xmm3 + +# qhasm: z0 = z0[0,0,0,0] +# asm 1: pshufd $0x00,z0=int6464#1 +# asm 2: pshufd $0x00,z0=%xmm0 +pshufd $0x00,%xmm0,%xmm0 + +# qhasm: orig5 = z5 +# asm 1: movdqa orig5=stack128#5 +# asm 2: movdqa orig5=64(%rsp) +movdqa %xmm1,64(%rsp) + +# qhasm: orig10 = z10 +# asm 1: movdqa orig10=stack128#6 +# asm 2: movdqa orig10=80(%rsp) +movdqa %xmm2,80(%rsp) + +# qhasm: orig15 = z15 +# asm 1: movdqa orig15=stack128#7 +# asm 2: movdqa orig15=96(%rsp) +movdqa %xmm3,96(%rsp) + +# qhasm: orig0 = z0 +# asm 1: movdqa orig0=stack128#8 +# asm 2: movdqa orig0=112(%rsp) +movdqa %xmm0,112(%rsp) + +# qhasm: z1 = x1 +# asm 1: movdqa z1=int6464#1 +# asm 2: movdqa z1=%xmm0 +movdqa 0(%rsp),%xmm0 + +# qhasm: z6 = z1[2,2,2,2] +# asm 1: pshufd $0xaa,z6=int6464#2 +# asm 2: pshufd $0xaa,z6=%xmm1 +pshufd $0xaa,%xmm0,%xmm1 + +# qhasm: z11 = z1[3,3,3,3] +# asm 1: pshufd $0xff,z11=int6464#3 +# asm 2: pshufd $0xff,z11=%xmm2 +pshufd $0xff,%xmm0,%xmm2 + +# qhasm: z12 = z1[0,0,0,0] +# asm 1: pshufd $0x00,z12=int6464#4 +# asm 2: pshufd $0x00,z12=%xmm3 +pshufd $0x00,%xmm0,%xmm3 + +# qhasm: z1 = z1[1,1,1,1] +# asm 1: pshufd $0x55,z1=int6464#1 +# asm 2: pshufd $0x55,z1=%xmm0 +pshufd $0x55,%xmm0,%xmm0 + +# qhasm: orig6 = z6 +# asm 1: movdqa orig6=stack128#9 +# asm 2: movdqa orig6=128(%rsp) +movdqa %xmm1,128(%rsp) + +# qhasm: orig11 = z11 +# asm 1: movdqa orig11=stack128#10 +# asm 2: movdqa orig11=144(%rsp) +movdqa %xmm2,144(%rsp) + +# qhasm: orig12 = z12 +# asm 1: movdqa orig12=stack128#11 +# asm 2: movdqa orig12=160(%rsp) +movdqa %xmm3,160(%rsp) + +# qhasm: orig1 = z1 +# asm 1: movdqa orig1=stack128#12 +# asm 2: movdqa orig1=176(%rsp) +movdqa %xmm0,176(%rsp) + +# qhasm: z2 = x2 +# asm 1: movdqa z2=int6464#1 +# asm 2: movdqa z2=%xmm0 +movdqa 16(%rsp),%xmm0 + +# qhasm: z7 = z2[3,3,3,3] +# asm 1: pshufd $0xff,z7=int6464#2 +# asm 2: pshufd $0xff,z7=%xmm1 +pshufd $0xff,%xmm0,%xmm1 + +# qhasm: z13 = z2[1,1,1,1] +# asm 1: pshufd $0x55,z13=int6464#3 +# asm 2: pshufd $0x55,z13=%xmm2 +pshufd $0x55,%xmm0,%xmm2 + +# qhasm: z2 = z2[2,2,2,2] +# asm 1: pshufd $0xaa,z2=int6464#1 +# asm 2: pshufd $0xaa,z2=%xmm0 +pshufd $0xaa,%xmm0,%xmm0 + +# qhasm: orig7 = z7 +# asm 1: movdqa orig7=stack128#13 +# asm 2: movdqa orig7=192(%rsp) +movdqa %xmm1,192(%rsp) + +# qhasm: orig13 = z13 +# asm 1: movdqa orig13=stack128#14 +# asm 2: movdqa orig13=208(%rsp) +movdqa %xmm2,208(%rsp) + +# qhasm: orig2 = z2 +# asm 1: movdqa orig2=stack128#15 +# asm 2: movdqa orig2=224(%rsp) +movdqa %xmm0,224(%rsp) + +# qhasm: z3 = x3 +# asm 1: movdqa z3=int6464#1 +# asm 2: movdqa z3=%xmm0 +movdqa 32(%rsp),%xmm0 + +# qhasm: z4 = z3[0,0,0,0] +# asm 1: pshufd $0x00,z4=int6464#2 +# asm 2: pshufd $0x00,z4=%xmm1 +pshufd $0x00,%xmm0,%xmm1 + +# qhasm: z14 = z3[2,2,2,2] +# asm 1: pshufd $0xaa,z14=int6464#3 +# asm 2: pshufd $0xaa,z14=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z3 = z3[3,3,3,3] +# asm 1: pshufd $0xff,z3=int6464#1 +# asm 2: pshufd $0xff,z3=%xmm0 +pshufd $0xff,%xmm0,%xmm0 + +# qhasm: orig4 = z4 +# asm 1: movdqa orig4=stack128#16 +# asm 2: movdqa orig4=240(%rsp) +movdqa %xmm1,240(%rsp) + +# qhasm: orig14 = z14 +# asm 1: movdqa orig14=stack128#17 +# asm 2: movdqa orig14=256(%rsp) +movdqa %xmm2,256(%rsp) + +# qhasm: orig3 = z3 +# asm 1: movdqa orig3=stack128#18 +# asm 2: movdqa orig3=272(%rsp) +movdqa %xmm0,272(%rsp) + +# qhasm: bytesatleast256: +._bytesatleast256: + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#3d +# asm 2: movl in8=%edx +movl 16(%rsp),%edx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#4d +# asm 2: movl 4+in9=%ecx +movl 4+32(%rsp),%ecx + +# qhasm: ((uint32 *) &orig8)[0] = in8 +# asm 1: movl orig8=stack128#19 +# asm 2: movl orig8=288(%rsp) +movl %edx,288(%rsp) + +# qhasm: ((uint32 *) &orig9)[0] = in9 +# asm 1: movl orig9=stack128#20 +# asm 2: movl orig9=304(%rsp) +movl %ecx,304(%rsp) + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,in9=int64#4 +# asm 2: mov in9=%rcx +mov %rdx,%rcx + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %edx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: i = 12 +# asm 1: mov $12,>i=int64#3 +# asm 2: mov $12,>i=%rdx +mov $12,%rdx + +# qhasm: z5 = orig5 +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 64(%rsp),%xmm0 + +# qhasm: z10 = orig10 +# asm 1: movdqa z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 80(%rsp),%xmm1 + +# qhasm: z15 = orig15 +# asm 1: movdqa z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 96(%rsp),%xmm2 + +# qhasm: z14 = orig14 +# asm 1: movdqa z14=int6464#4 +# asm 2: movdqa z14=%xmm3 +movdqa 256(%rsp),%xmm3 + +# qhasm: z3 = orig3 +# asm 1: movdqa z3=int6464#5 +# asm 2: movdqa z3=%xmm4 +movdqa 272(%rsp),%xmm4 + +# qhasm: z6 = orig6 +# asm 1: movdqa z6=int6464#6 +# asm 2: movdqa z6=%xmm5 +movdqa 128(%rsp),%xmm5 + +# qhasm: z11 = orig11 +# asm 1: movdqa z11=int6464#7 +# asm 2: movdqa z11=%xmm6 +movdqa 144(%rsp),%xmm6 + +# qhasm: z1 = orig1 +# asm 1: movdqa z1=int6464#8 +# asm 2: movdqa z1=%xmm7 +movdqa 176(%rsp),%xmm7 + +# qhasm: z7 = orig7 +# asm 1: movdqa z7=int6464#9 +# asm 2: movdqa z7=%xmm8 +movdqa 192(%rsp),%xmm8 + +# qhasm: z13 = orig13 +# asm 1: movdqa z13=int6464#10 +# asm 2: movdqa z13=%xmm9 +movdqa 208(%rsp),%xmm9 + +# qhasm: z2 = orig2 +# asm 1: movdqa z2=int6464#11 +# asm 2: movdqa z2=%xmm10 +movdqa 224(%rsp),%xmm10 + +# qhasm: z9 = orig9 +# asm 1: movdqa z9=int6464#12 +# asm 2: movdqa z9=%xmm11 +movdqa 304(%rsp),%xmm11 + +# qhasm: z0 = orig0 +# asm 1: movdqa z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 112(%rsp),%xmm12 + +# qhasm: z12 = orig12 +# asm 1: movdqa z12=int6464#14 +# asm 2: movdqa z12=%xmm13 +movdqa 160(%rsp),%xmm13 + +# qhasm: z4 = orig4 +# asm 1: movdqa z4=int6464#15 +# asm 2: movdqa z4=%xmm14 +movdqa 240(%rsp),%xmm14 + +# qhasm: z8 = orig8 +# asm 1: movdqa z8=int6464#16 +# asm 2: movdqa z8=%xmm15 +movdqa 288(%rsp),%xmm15 + +# qhasm: mainloop1: +._mainloop1: + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y4 = z12 +# asm 1: movdqa y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm13,%xmm1 + +# qhasm: uint32323232 y4 += z0 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y9=int6464#2 +# asm 2: movdqa y9=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y9 += z5 +# asm 1: paddd r9=int6464#3 +# asm 2: movdqa r9=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y9 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#2 +# asm 2: movdqa y8=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y8 += z4 +# asm 1: paddd r8=int6464#3 +# asm 2: movdqa r8=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#2 +# asm 2: movdqa y13=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y13 += z9 +# asm 1: paddd r13=int6464#3 +# asm 2: movdqa r13=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y12=int6464#2 +# asm 2: movdqa y12=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y12 += z8 +# asm 1: paddd r12=int6464#3 +# asm 2: movdqa r12=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y12 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm11,%xmm1 + +# qhasm: uint32323232 y1 += z13 +# asm 1: paddd r1=int6464#3 +# asm 2: movdqa r1=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y1 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm15,%xmm1 + +# qhasm: uint32323232 y0 += z12 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm12,320(%rsp) + +# qhasm: y5 = z13 +# asm 1: movdqa y5=int6464#3 +# asm 2: movdqa y5=%xmm2 +movdqa %xmm9,%xmm2 + +# qhasm: uint32323232 y5 += z1 +# asm 1: paddd r5=int6464#13 +# asm 2: movdqa r5=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y14=int6464#3 +# asm 2: movdqa y14=%xmm2 +movdqa %xmm5,%xmm2 + +# qhasm: uint32323232 y14 += z10 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm2,%xmm12 + +# qhasm: uint32323232 y14 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm0,336(%rsp) + +# qhasm: y3 = z11 +# asm 1: movdqa y3=int6464#1 +# asm 2: movdqa y3=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y3 += z15 +# asm 1: paddd r3=int6464#13 +# asm 2: movdqa r3=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y3 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#1 +# asm 2: movdqa y2=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y2 += z14 +# asm 1: paddd r2=int6464#13 +# asm 2: movdqa r2=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#1 +# asm 2: movdqa y7=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y7 += z3 +# asm 1: paddd r7=int6464#13 +# asm 2: movdqa r7=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y6=int6464#1 +# asm 2: movdqa y6=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y6 += z2 +# asm 1: paddd r6=int6464#13 +# asm 2: movdqa r6=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 y11 += z7 +# asm 1: paddd r11=int6464#13 +# asm 2: movdqa r11=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y11 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm10,%xmm0 + +# qhasm: uint32323232 y10 += z6 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 320(%rsp),%xmm0 + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#21 +# asm 2: movdqa z10_stack=320(%rsp) +movdqa %xmm1,320(%rsp) + +# qhasm: y1 = z3 +# asm 1: movdqa y1=int6464#2 +# asm 2: movdqa y1=%xmm1 +movdqa %xmm4,%xmm1 + +# qhasm: uint32323232 y1 += z0 +# asm 1: paddd r1=int6464#13 +# asm 2: movdqa r1=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y1 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y15=int6464#2 +# asm 2: movdqa y15=%xmm1 +movdqa %xmm8,%xmm1 + +# qhasm: uint32323232 y15 += z11 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm1,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z5=int6464#13 +# asm 2: movdqa z5=%xmm12 +movdqa 336(%rsp),%xmm12 + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#22 +# asm 2: movdqa z15_stack=336(%rsp) +movdqa %xmm2,336(%rsp) + +# qhasm: y6 = z4 +# asm 1: movdqa y6=int6464#2 +# asm 2: movdqa y6=%xmm1 +movdqa %xmm14,%xmm1 + +# qhasm: uint32323232 y6 += z5 +# asm 1: paddd r6=int6464#3 +# asm 2: movdqa r6=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y6 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y2=int6464#2 +# asm 2: movdqa y2=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 y2 += z1 +# asm 1: paddd r2=int6464#3 +# asm 2: movdqa r2=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y2 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y7=int6464#2 +# asm 2: movdqa y7=%xmm1 +movdqa %xmm12,%xmm1 + +# qhasm: uint32323232 y7 += z6 +# asm 1: paddd r7=int6464#3 +# asm 2: movdqa r7=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y7 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y3=int6464#2 +# asm 2: movdqa y3=%xmm1 +movdqa %xmm7,%xmm1 + +# qhasm: uint32323232 y3 += z2 +# asm 1: paddd r3=int6464#3 +# asm 2: movdqa r3=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y3 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y4=int6464#2 +# asm 2: movdqa y4=%xmm1 +movdqa %xmm5,%xmm1 + +# qhasm: uint32323232 y4 += z7 +# asm 1: paddd r4=int6464#3 +# asm 2: movdqa r4=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y4 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y0=int6464#2 +# asm 2: movdqa y0=%xmm1 +movdqa %xmm10,%xmm1 + +# qhasm: uint32323232 y0 += z3 +# asm 1: paddd r0=int6464#3 +# asm 2: movdqa r0=%xmm2 +movdqa %xmm1,%xmm2 + +# qhasm: uint32323232 y0 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 320(%rsp),%xmm1 + +# qhasm: z0_stack = z0 +# asm 1: movdqa z0_stack=stack128#21 +# asm 2: movdqa z0_stack=320(%rsp) +movdqa %xmm0,320(%rsp) + +# qhasm: y5 = z7 +# asm 1: movdqa y5=int6464#1 +# asm 2: movdqa y5=%xmm0 +movdqa %xmm8,%xmm0 + +# qhasm: uint32323232 y5 += z4 +# asm 1: paddd r5=int6464#3 +# asm 2: movdqa r5=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y5 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y11=int6464#1 +# asm 2: movdqa y11=%xmm0 +movdqa %xmm11,%xmm0 + +# qhasm: uint32323232 y11 += z10 +# asm 1: paddd r11=int6464#3 +# asm 2: movdqa r11=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 y11 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 336(%rsp),%xmm2 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#22 +# asm 2: movdqa z5_stack=336(%rsp) +movdqa %xmm12,336(%rsp) + +# qhasm: y12 = z14 +# asm 1: movdqa y12=int6464#1 +# asm 2: movdqa y12=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 y12 += z15 +# asm 1: paddd r12=int6464#13 +# asm 2: movdqa r12=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y12 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,y8=int6464#1 +# asm 2: movdqa y8=%xmm0 +movdqa %xmm1,%xmm0 + +# qhasm: uint32323232 y8 += z11 +# asm 1: paddd r8=int6464#13 +# asm 2: movdqa r8=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y8 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y13=int6464#1 +# asm 2: movdqa y13=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 y13 += z12 +# asm 1: paddd r13=int6464#13 +# asm 2: movdqa r13=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y13 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,y9=int6464#1 +# asm 2: movdqa y9=%xmm0 +movdqa %xmm6,%xmm0 + +# qhasm: uint32323232 y9 += z8 +# asm 1: paddd r9=int6464#13 +# asm 2: movdqa r9=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y9 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y14=int6464#1 +# asm 2: movdqa y14=%xmm0 +movdqa %xmm13,%xmm0 + +# qhasm: uint32323232 y14 += z13 +# asm 1: paddd r14=int6464#13 +# asm 2: movdqa r14=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y14 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,y10=int6464#1 +# asm 2: movdqa y10=%xmm0 +movdqa %xmm15,%xmm0 + +# qhasm: uint32323232 y10 += z9 +# asm 1: paddd r10=int6464#13 +# asm 2: movdqa r10=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y10 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,y15=int6464#1 +# asm 2: movdqa y15=%xmm0 +movdqa %xmm9,%xmm0 + +# qhasm: uint32323232 y15 += z14 +# asm 1: paddd r15=int6464#13 +# asm 2: movdqa r15=%xmm12 +movdqa %xmm0,%xmm12 + +# qhasm: uint32323232 y15 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,z0=int6464#13 +# asm 2: movdqa z0=%xmm12 +movdqa 320(%rsp),%xmm12 + +# qhasm: z5 = z5_stack +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 336(%rsp),%xmm0 + +# qhasm: unsigned>? i -= 2 +# asm 1: sub $2, +ja ._mainloop1 + +# qhasm: uint32323232 z0 += orig0 +# asm 1: paddd in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int64#3 +# asm 2: movd in0=%rdx +movd %xmm12,%rdx + +# qhasm: in1 = z1 +# asm 1: movd in1=int64#4 +# asm 2: movd in1=%rcx +movd %xmm7,%rcx + +# qhasm: in2 = z2 +# asm 1: movd in2=int64#5 +# asm 2: movd in2=%r8 +movd %xmm10,%r8 + +# qhasm: in3 = z3 +# asm 1: movd in3=int64#6 +# asm 2: movd in3=%r9 +movd %xmm4,%r9 + +# qhasm: (uint32) in0 ^= *(uint32 *) (m + 192) +# asm 1: xorl 192(in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int64#3 +# asm 2: movd in4=%rdx +movd %xmm14,%rdx + +# qhasm: in5 = z5 +# asm 1: movd in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = z6 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm5,%r8 + +# qhasm: in7 = z7 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm8,%r9 + +# qhasm: (uint32) in4 ^= *(uint32 *) (m + 208) +# asm 1: xorl 208(in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int64#3 +# asm 2: movd in8=%rdx +movd %xmm15,%rdx + +# qhasm: in9 = z9 +# asm 1: movd in9=int64#4 +# asm 2: movd in9=%rcx +movd %xmm11,%rcx + +# qhasm: in10 = z10 +# asm 1: movd in10=int64#5 +# asm 2: movd in10=%r8 +movd %xmm1,%r8 + +# qhasm: in11 = z11 +# asm 1: movd in11=int64#6 +# asm 2: movd in11=%r9 +movd %xmm6,%r9 + +# qhasm: (uint32) in8 ^= *(uint32 *) (m + 224) +# asm 1: xorl 224(in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int64#3 +# asm 2: movd in12=%rdx +movd %xmm13,%rdx + +# qhasm: in13 = z13 +# asm 1: movd in13=int64#4 +# asm 2: movd in13=%rcx +movd %xmm9,%rcx + +# qhasm: in14 = z14 +# asm 1: movd in14=int64#5 +# asm 2: movd in14=%r8 +movd %xmm3,%r8 + +# qhasm: in15 = z15 +# asm 1: movd in15=int64#6 +# asm 2: movd in15=%r9 +movd %xmm2,%r9 + +# qhasm: (uint32) in12 ^= *(uint32 *) (m + 240) +# asm 1: xorl 240(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: bytes -= 256 +# asm 1: sub $256,? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesbetween1and255: +._bytesbetween1and255: + +# qhasm: unsignedctarget=int64#3 +# asm 2: mov ctarget=%rdx +mov %rdi,%rdx + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb + +# qhasm: out = &tmp +# asm 1: leaq out=int64#1 +# asm 2: leaq out=%rdi +leaq 416(%rsp),%rdi + +# qhasm: m = &tmp +# asm 1: leaq m=int64#2 +# asm 2: leaq m=%rsi +leaq 416(%rsp),%rsi +# comment:fp stack unchanged by fallthrough + +# qhasm: nocopy: +._nocopy: + +# qhasm: bytes_backup = bytes +# asm 1: movq bytes_backup=stack64#8 +# asm 2: movq bytes_backup=408(%rsp) +movq %r9,408(%rsp) + +# qhasm: diag0 = x0 +# asm 1: movdqa diag0=int6464#1 +# asm 2: movdqa diag0=%xmm0 +movdqa 48(%rsp),%xmm0 + +# qhasm: diag1 = x1 +# asm 1: movdqa diag1=int6464#2 +# asm 2: movdqa diag1=%xmm1 +movdqa 0(%rsp),%xmm1 + +# qhasm: diag2 = x2 +# asm 1: movdqa diag2=int6464#3 +# asm 2: movdqa diag2=%xmm2 +movdqa 16(%rsp),%xmm2 + +# qhasm: diag3 = x3 +# asm 1: movdqa diag3=int6464#4 +# asm 2: movdqa diag3=%xmm3 +movdqa 32(%rsp),%xmm3 + +# qhasm: a0 = diag1 +# asm 1: movdqa a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: i = 12 +# asm 1: mov $12,>i=int64#4 +# asm 2: mov $12,>i=%rcx +mov $12,%rcx + +# qhasm: mainloop2: +._mainloop2: + +# qhasm: uint32323232 a0 += diag0 +# asm 1: paddd a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,? i -= 4 +# asm 1: sub $4,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 +# asm 2: pxor >b0=%xmm7,>b0=%xmm7 +pxor %xmm7,%xmm7 + +# qhasm: uint32323232 b7 >>= 14 +# asm 1: psrld $14, +ja ._mainloop2 + +# qhasm: uint32323232 diag0 += x0 +# asm 1: paddd in0=int64#4 +# asm 2: movd in0=%rcx +movd %xmm0,%rcx + +# qhasm: in12 = diag1 +# asm 1: movd in12=int64#5 +# asm 2: movd in12=%r8 +movd %xmm1,%r8 + +# qhasm: in8 = diag2 +# asm 1: movd in8=int64#6 +# asm 2: movd in8=%r9 +movd %xmm2,%r9 + +# qhasm: in4 = diag3 +# asm 1: movd in4=int64#7 +# asm 2: movd in4=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in5=int64#4 +# asm 2: movd in5=%rcx +movd %xmm0,%rcx + +# qhasm: in1 = diag1 +# asm 1: movd in1=int64#5 +# asm 2: movd in1=%r8 +movd %xmm1,%r8 + +# qhasm: in13 = diag2 +# asm 1: movd in13=int64#6 +# asm 2: movd in13=%r9 +movd %xmm2,%r9 + +# qhasm: in9 = diag3 +# asm 1: movd in9=int64#7 +# asm 2: movd in9=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in10=int64#4 +# asm 2: movd in10=%rcx +movd %xmm0,%rcx + +# qhasm: in6 = diag1 +# asm 1: movd in6=int64#5 +# asm 2: movd in6=%r8 +movd %xmm1,%r8 + +# qhasm: in2 = diag2 +# asm 1: movd in2=int64#6 +# asm 2: movd in2=%r9 +movd %xmm2,%r9 + +# qhasm: in14 = diag3 +# asm 1: movd in14=int64#7 +# asm 2: movd in14=%rax +movd %xmm3,%rax + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in15=int64#4 +# asm 2: movd in15=%rcx +movd %xmm0,%rcx + +# qhasm: in11 = diag1 +# asm 1: movd in11=int64#5 +# asm 2: movd in11=%r8 +movd %xmm1,%r8 + +# qhasm: in7 = diag2 +# asm 1: movd in7=int64#6 +# asm 2: movd in7=%r9 +movd %xmm2,%r9 + +# qhasm: in3 = diag3 +# asm 1: movd in3=int64#7 +# asm 2: movd in3=%rax +movd %xmm3,%rax + +# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60) +# asm 1: xorl 60(bytes=int64#6 +# asm 2: movq bytes=%r9 +movq 408(%rsp),%r9 + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int64#4d +# asm 2: movl in8=%ecx +movl 16(%rsp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int64#5d +# asm 2: movl 4+in9=%r8d +movl 4+32(%rsp),%r8d + +# qhasm: in8 += 1 +# asm 1: add $1,in9=int64#5 +# asm 2: mov in9=%r8 +mov %rcx,%r8 + +# qhasm: (uint64) in9 >>= 32 +# asm 1: shr $32,x2=stack128#2 +# asm 2: movl x2=16(%rsp) +movl %ecx,16(%rsp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl ? unsigned +ja ._bytesatleast65 +# comment:fp stack unchanged by jump + +# qhasm: goto bytesatleast64 if !unsigned< +jae ._bytesatleast64 + +# qhasm: m = out +# asm 1: mov m=int64#2 +# asm 2: mov m=%rsi +mov %rdi,%rsi + +# qhasm: out = ctarget +# asm 1: mov out=int64#1 +# asm 2: mov out=%rdi +mov %rdx,%rdi + +# qhasm: i = bytes +# asm 1: mov i=int64#4 +# asm 2: mov i=%rcx +mov %r9,%rcx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesatleast64: +._bytesatleast64: +# comment:fp stack unchanged by fallthrough + +# qhasm: done: +._done: + +# qhasm: r11_caller = r11_stack +# asm 1: movq r11_caller=int64#9 +# asm 2: movq r11_caller=%r11 +movq 352(%rsp),%r11 + +# qhasm: r12_caller = r12_stack +# asm 1: movq r12_caller=int64#10 +# asm 2: movq r12_caller=%r12 +movq 360(%rsp),%r12 + +# qhasm: r13_caller = r13_stack +# asm 1: movq r13_caller=int64#11 +# asm 2: movq r13_caller=%r13 +movq 368(%rsp),%r13 + +# qhasm: r14_caller = r14_stack +# asm 1: movq r14_caller=int64#12 +# asm 2: movq r14_caller=%r14 +movq 376(%rsp),%r14 + +# qhasm: r15_caller = r15_stack +# asm 1: movq r15_caller=int64#13 +# asm 2: movq r15_caller=%r15 +movq 384(%rsp),%r15 + +# qhasm: rbx_caller = rbx_stack +# asm 1: movq rbx_caller=int64#14 +# asm 2: movq rbx_caller=%rbx +movq 392(%rsp),%rbx + +# qhasm: rbp_caller = rbp_stack +# asm 1: movq rbp_caller=int64#15 +# asm 2: movq rbp_caller=%rbp +movq 400(%rsp),%rbp + +# qhasm: leave +add %r11,%rsp +xor %rax,%rax +xor %rdx,%rdx +ret + +# qhasm: bytesatleast65: +._bytesatleast65: + +# qhasm: bytes -= 64 +# asm 1: sub $64,eax_stack=stack32#1 +# asm 2: movl eax_stack=0(%esp) +movl %eax,0(%esp) + +# qhasm: ebx_stack = ebx +# asm 1: movl ebx_stack=stack32#2 +# asm 2: movl ebx_stack=4(%esp) +movl %ebx,4(%esp) + +# qhasm: esi_stack = esi +# asm 1: movl esi_stack=stack32#3 +# asm 2: movl esi_stack=8(%esp) +movl %esi,8(%esp) + +# qhasm: edi_stack = edi +# asm 1: movl edi_stack=stack32#4 +# asm 2: movl edi_stack=12(%esp) +movl %edi,12(%esp) + +# qhasm: ebp_stack = ebp +# asm 1: movl ebp_stack=stack32#5 +# asm 2: movl ebp_stack=16(%esp) +movl %ebp,16(%esp) + +# qhasm: bytes = arg2 +# asm 1: movl bytes=int32#3 +# asm 2: movl bytes=%edx +movl 8(%esp,%eax),%edx + +# qhasm: out = arg1 +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 4(%esp,%eax),%edi + +# qhasm: m = out +# asm 1: mov m=int32#5 +# asm 2: mov m=%esi +mov %edi,%esi + +# qhasm: iv = arg4 +# asm 1: movl iv=int32#4 +# asm 2: movl iv=%ebx +movl 16(%esp,%eax),%ebx + +# qhasm: k = arg5 +# asm 1: movl k=int32#7 +# asm 2: movl k=%ebp +movl 20(%esp,%eax),%ebp + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done + +# qhasm: a = 0 +# asm 1: mov $0,>a=int32#1 +# asm 2: mov $0,>a=%eax +mov $0,%eax + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %edx,%ecx + +# qhasm: while (i) { *out++ = a; --i } +rep stosb + +# qhasm: out -= bytes +# asm 1: subl eax_stack=stack32#1 +# asm 2: movl eax_stack=0(%esp) +movl %eax,0(%esp) + +# qhasm: ebx_stack = ebx +# asm 1: movl ebx_stack=stack32#2 +# asm 2: movl ebx_stack=4(%esp) +movl %ebx,4(%esp) + +# qhasm: esi_stack = esi +# asm 1: movl esi_stack=stack32#3 +# asm 2: movl esi_stack=8(%esp) +movl %esi,8(%esp) + +# qhasm: edi_stack = edi +# asm 1: movl edi_stack=stack32#4 +# asm 2: movl edi_stack=12(%esp) +movl %edi,12(%esp) + +# qhasm: ebp_stack = ebp +# asm 1: movl ebp_stack=stack32#5 +# asm 2: movl ebp_stack=16(%esp) +movl %ebp,16(%esp) + +# qhasm: out = arg1 +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 4(%esp,%eax),%edi + +# qhasm: m = arg2 +# asm 1: movl m=int32#5 +# asm 2: movl m=%esi +movl 8(%esp,%eax),%esi + +# qhasm: bytes = arg3 +# asm 1: movl bytes=int32#3 +# asm 2: movl bytes=%edx +movl 12(%esp,%eax),%edx + +# qhasm: iv = arg5 +# asm 1: movl iv=int32#4 +# asm 2: movl iv=%ebx +movl 20(%esp,%eax),%ebx + +# qhasm: k = arg6 +# asm 1: movl k=int32#7 +# asm 2: movl k=%ebp +movl 24(%esp,%eax),%ebp + +# qhasm: unsigned>? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: start: +._start: + +# qhasm: out_stack = out +# asm 1: movl out_stack=stack32#6 +# asm 2: movl out_stack=20(%esp) +movl %edi,20(%esp) + +# qhasm: bytes_stack = bytes +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %edx,24(%esp) + +# qhasm: in4 = *(uint32 *) (k + 12) +# asm 1: movl 12(in4=int32#1 +# asm 2: movl 12(in4=%eax +movl 12(%ebp),%eax + +# qhasm: in12 = *(uint32 *) (k + 20) +# asm 1: movl 20(in12=int32#2 +# asm 2: movl 20(in12=%ecx +movl 20(%ebp),%ecx + +# qhasm: ((uint32 *)&x3)[0] = in4 +# asm 1: movl x3=stack128#1 +# asm 2: movl x3=32(%esp) +movl %eax,32(%esp) + +# qhasm: ((uint32 *)&x1)[0] = in12 +# asm 1: movl x1=stack128#2 +# asm 2: movl x1=48(%esp) +movl %ecx,48(%esp) + +# qhasm: in0 = 1634760805 +# asm 1: mov $1634760805,>in0=int32#1 +# asm 2: mov $1634760805,>in0=%eax +mov $1634760805,%eax + +# qhasm: in8 = 0 +# asm 1: mov $0,>in8=int32#2 +# asm 2: mov $0,>in8=%ecx +mov $0,%ecx + +# qhasm: ((uint32 *)&x0)[0] = in0 +# asm 1: movl x0=stack128#3 +# asm 2: movl x0=64(%esp) +movl %eax,64(%esp) + +# qhasm: ((uint32 *)&x2)[0] = in8 +# asm 1: movl x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: in6 = *(uint32 *) (iv + 0) +# asm 1: movl 0(in6=int32#1 +# asm 2: movl 0(in6=%eax +movl 0(%ebx),%eax + +# qhasm: in7 = *(uint32 *) (iv + 4) +# asm 1: movl 4(in7=int32#2 +# asm 2: movl 4(in7=%ecx +movl 4(%ebx),%ecx + +# qhasm: ((uint32 *)&x1)[2] = in6 +# asm 1: movl in9=int32#1 +# asm 2: mov $0,>in9=%eax +mov $0,%eax + +# qhasm: in10 = 2036477234 +# asm 1: mov $2036477234,>in10=int32#2 +# asm 2: mov $2036477234,>in10=%ecx +mov $2036477234,%ecx + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl in1=int32#1 +# asm 2: movl 0(in1=%eax +movl 0(%ebp),%eax + +# qhasm: in2 = *(uint32 *) (k + 4) +# asm 1: movl 4(in2=int32#2 +# asm 2: movl 4(in2=%ecx +movl 4(%ebp),%ecx + +# qhasm: in3 = *(uint32 *) (k + 8) +# asm 1: movl 8(in3=int32#3 +# asm 2: movl 8(in3=%edx +movl 8(%ebp),%edx + +# qhasm: in5 = 857760878 +# asm 1: mov $857760878,>in5=int32#4 +# asm 2: mov $857760878,>in5=%ebx +mov $857760878,%ebx + +# qhasm: ((uint32 *)&x1)[1] = in1 +# asm 1: movl in11=int32#1 +# asm 2: movl 16(in11=%eax +movl 16(%ebp),%eax + +# qhasm: in13 = *(uint32 *) (k + 24) +# asm 1: movl 24(in13=int32#2 +# asm 2: movl 24(in13=%ecx +movl 24(%ebp),%ecx + +# qhasm: in14 = *(uint32 *) (k + 28) +# asm 1: movl 28(in14=int32#3 +# asm 2: movl 28(in14=%edx +movl 28(%ebp),%edx + +# qhasm: in15 = 1797285236 +# asm 1: mov $1797285236,>in15=int32#4 +# asm 2: mov $1797285236,>in15=%ebx +mov $1797285236,%ebx + +# qhasm: ((uint32 *)&x1)[3] = in11 +# asm 1: movl bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: unsignedz0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 64(%esp),%xmm0 + +# qhasm: z5 = z0[1,1,1,1] +# asm 1: pshufd $0x55,z5=int6464#2 +# asm 2: pshufd $0x55,z5=%xmm1 +pshufd $0x55,%xmm0,%xmm1 + +# qhasm: z10 = z0[2,2,2,2] +# asm 1: pshufd $0xaa,z10=int6464#3 +# asm 2: pshufd $0xaa,z10=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z15 = z0[3,3,3,3] +# asm 1: pshufd $0xff,z15=int6464#4 +# asm 2: pshufd $0xff,z15=%xmm3 +pshufd $0xff,%xmm0,%xmm3 + +# qhasm: z0 = z0[0,0,0,0] +# asm 1: pshufd $0x00,z0=int6464#1 +# asm 2: pshufd $0x00,z0=%xmm0 +pshufd $0x00,%xmm0,%xmm0 + +# qhasm: orig5 = z5 +# asm 1: movdqa orig5=stack128#5 +# asm 2: movdqa orig5=96(%esp) +movdqa %xmm1,96(%esp) + +# qhasm: orig10 = z10 +# asm 1: movdqa orig10=stack128#6 +# asm 2: movdqa orig10=112(%esp) +movdqa %xmm2,112(%esp) + +# qhasm: orig15 = z15 +# asm 1: movdqa orig15=stack128#7 +# asm 2: movdqa orig15=128(%esp) +movdqa %xmm3,128(%esp) + +# qhasm: orig0 = z0 +# asm 1: movdqa orig0=stack128#8 +# asm 2: movdqa orig0=144(%esp) +movdqa %xmm0,144(%esp) + +# qhasm: z1 = x1 +# asm 1: movdqa z1=int6464#1 +# asm 2: movdqa z1=%xmm0 +movdqa 48(%esp),%xmm0 + +# qhasm: z6 = z1[2,2,2,2] +# asm 1: pshufd $0xaa,z6=int6464#2 +# asm 2: pshufd $0xaa,z6=%xmm1 +pshufd $0xaa,%xmm0,%xmm1 + +# qhasm: z11 = z1[3,3,3,3] +# asm 1: pshufd $0xff,z11=int6464#3 +# asm 2: pshufd $0xff,z11=%xmm2 +pshufd $0xff,%xmm0,%xmm2 + +# qhasm: z12 = z1[0,0,0,0] +# asm 1: pshufd $0x00,z12=int6464#4 +# asm 2: pshufd $0x00,z12=%xmm3 +pshufd $0x00,%xmm0,%xmm3 + +# qhasm: z1 = z1[1,1,1,1] +# asm 1: pshufd $0x55,z1=int6464#1 +# asm 2: pshufd $0x55,z1=%xmm0 +pshufd $0x55,%xmm0,%xmm0 + +# qhasm: orig6 = z6 +# asm 1: movdqa orig6=stack128#9 +# asm 2: movdqa orig6=160(%esp) +movdqa %xmm1,160(%esp) + +# qhasm: orig11 = z11 +# asm 1: movdqa orig11=stack128#10 +# asm 2: movdqa orig11=176(%esp) +movdqa %xmm2,176(%esp) + +# qhasm: orig12 = z12 +# asm 1: movdqa orig12=stack128#11 +# asm 2: movdqa orig12=192(%esp) +movdqa %xmm3,192(%esp) + +# qhasm: orig1 = z1 +# asm 1: movdqa orig1=stack128#12 +# asm 2: movdqa orig1=208(%esp) +movdqa %xmm0,208(%esp) + +# qhasm: z2 = x2 +# asm 1: movdqa z2=int6464#1 +# asm 2: movdqa z2=%xmm0 +movdqa 80(%esp),%xmm0 + +# qhasm: z7 = z2[3,3,3,3] +# asm 1: pshufd $0xff,z7=int6464#2 +# asm 2: pshufd $0xff,z7=%xmm1 +pshufd $0xff,%xmm0,%xmm1 + +# qhasm: z13 = z2[1,1,1,1] +# asm 1: pshufd $0x55,z13=int6464#3 +# asm 2: pshufd $0x55,z13=%xmm2 +pshufd $0x55,%xmm0,%xmm2 + +# qhasm: z2 = z2[2,2,2,2] +# asm 1: pshufd $0xaa,z2=int6464#1 +# asm 2: pshufd $0xaa,z2=%xmm0 +pshufd $0xaa,%xmm0,%xmm0 + +# qhasm: orig7 = z7 +# asm 1: movdqa orig7=stack128#13 +# asm 2: movdqa orig7=224(%esp) +movdqa %xmm1,224(%esp) + +# qhasm: orig13 = z13 +# asm 1: movdqa orig13=stack128#14 +# asm 2: movdqa orig13=240(%esp) +movdqa %xmm2,240(%esp) + +# qhasm: orig2 = z2 +# asm 1: movdqa orig2=stack128#15 +# asm 2: movdqa orig2=256(%esp) +movdqa %xmm0,256(%esp) + +# qhasm: z3 = x3 +# asm 1: movdqa z3=int6464#1 +# asm 2: movdqa z3=%xmm0 +movdqa 32(%esp),%xmm0 + +# qhasm: z4 = z3[0,0,0,0] +# asm 1: pshufd $0x00,z4=int6464#2 +# asm 2: pshufd $0x00,z4=%xmm1 +pshufd $0x00,%xmm0,%xmm1 + +# qhasm: z14 = z3[2,2,2,2] +# asm 1: pshufd $0xaa,z14=int6464#3 +# asm 2: pshufd $0xaa,z14=%xmm2 +pshufd $0xaa,%xmm0,%xmm2 + +# qhasm: z3 = z3[3,3,3,3] +# asm 1: pshufd $0xff,z3=int6464#1 +# asm 2: pshufd $0xff,z3=%xmm0 +pshufd $0xff,%xmm0,%xmm0 + +# qhasm: orig4 = z4 +# asm 1: movdqa orig4=stack128#16 +# asm 2: movdqa orig4=272(%esp) +movdqa %xmm1,272(%esp) + +# qhasm: orig14 = z14 +# asm 1: movdqa orig14=stack128#17 +# asm 2: movdqa orig14=288(%esp) +movdqa %xmm2,288(%esp) + +# qhasm: orig3 = z3 +# asm 1: movdqa orig3=stack128#18 +# asm 2: movdqa orig3=304(%esp) +movdqa %xmm0,304(%esp) + +# qhasm: bytesatleast256: +._bytesatleast256: + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int32#2 +# asm 2: movl in8=%ecx +movl 80(%esp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int32#3 +# asm 2: movl 4+in9=%edx +movl 4+32(%esp),%edx + +# qhasm: ((uint32 *) &orig8)[0] = in8 +# asm 1: movl orig8=stack128#19 +# asm 2: movl orig8=320(%esp) +movl %ecx,320(%esp) + +# qhasm: ((uint32 *) &orig9)[0] = in9 +# asm 1: movl orig9=stack128#20 +# asm 2: movl orig9=336(%esp) +movl %edx,336(%esp) + +# qhasm: carry? in8 += 1 +# asm 1: add $1,x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %eax,24(%esp) + +# qhasm: i = 12 +# asm 1: mov $12,>i=int32#1 +# asm 2: mov $12,>i=%eax +mov $12,%eax + +# qhasm: z5 = orig5 +# asm 1: movdqa z5=int6464#1 +# asm 2: movdqa z5=%xmm0 +movdqa 96(%esp),%xmm0 + +# qhasm: z10 = orig10 +# asm 1: movdqa z10=int6464#2 +# asm 2: movdqa z10=%xmm1 +movdqa 112(%esp),%xmm1 + +# qhasm: z15 = orig15 +# asm 1: movdqa z15=int6464#3 +# asm 2: movdqa z15=%xmm2 +movdqa 128(%esp),%xmm2 + +# qhasm: z14 = orig14 +# asm 1: movdqa z14=int6464#4 +# asm 2: movdqa z14=%xmm3 +movdqa 288(%esp),%xmm3 + +# qhasm: z3 = orig3 +# asm 1: movdqa z3=int6464#5 +# asm 2: movdqa z3=%xmm4 +movdqa 304(%esp),%xmm4 + +# qhasm: z6 = orig6 +# asm 1: movdqa z6=int6464#6 +# asm 2: movdqa z6=%xmm5 +movdqa 160(%esp),%xmm5 + +# qhasm: z11 = orig11 +# asm 1: movdqa z11=int6464#7 +# asm 2: movdqa z11=%xmm6 +movdqa 176(%esp),%xmm6 + +# qhasm: z1 = orig1 +# asm 1: movdqa z1=int6464#8 +# asm 2: movdqa z1=%xmm7 +movdqa 208(%esp),%xmm7 + +# qhasm: z5_stack = z5 +# asm 1: movdqa z5_stack=stack128#21 +# asm 2: movdqa z5_stack=352(%esp) +movdqa %xmm0,352(%esp) + +# qhasm: z10_stack = z10 +# asm 1: movdqa z10_stack=stack128#22 +# asm 2: movdqa z10_stack=368(%esp) +movdqa %xmm1,368(%esp) + +# qhasm: z15_stack = z15 +# asm 1: movdqa z15_stack=stack128#23 +# asm 2: movdqa z15_stack=384(%esp) +movdqa %xmm2,384(%esp) + +# qhasm: z14_stack = z14 +# asm 1: movdqa z14_stack=stack128#24 +# asm 2: movdqa z14_stack=400(%esp) +movdqa %xmm3,400(%esp) + +# qhasm: z3_stack = z3 +# asm 1: movdqa z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm4,416(%esp) + +# qhasm: z6_stack = z6 +# asm 1: movdqa z6_stack=stack128#26 +# asm 2: movdqa z6_stack=432(%esp) +movdqa %xmm5,432(%esp) + +# qhasm: z11_stack = z11 +# asm 1: movdqa z11_stack=stack128#27 +# asm 2: movdqa z11_stack=448(%esp) +movdqa %xmm6,448(%esp) + +# qhasm: z1_stack = z1 +# asm 1: movdqa z1_stack=stack128#28 +# asm 2: movdqa z1_stack=464(%esp) +movdqa %xmm7,464(%esp) + +# qhasm: z7 = orig7 +# asm 1: movdqa z7=int6464#5 +# asm 2: movdqa z7=%xmm4 +movdqa 224(%esp),%xmm4 + +# qhasm: z13 = orig13 +# asm 1: movdqa z13=int6464#6 +# asm 2: movdqa z13=%xmm5 +movdqa 240(%esp),%xmm5 + +# qhasm: z2 = orig2 +# asm 1: movdqa z2=int6464#7 +# asm 2: movdqa z2=%xmm6 +movdqa 256(%esp),%xmm6 + +# qhasm: z9 = orig9 +# asm 1: movdqa z9=int6464#8 +# asm 2: movdqa z9=%xmm7 +movdqa 336(%esp),%xmm7 + +# qhasm: p = orig0 +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 144(%esp),%xmm0 + +# qhasm: t = orig12 +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 192(%esp),%xmm2 + +# qhasm: q = orig4 +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 272(%esp),%xmm3 + +# qhasm: r = orig8 +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 320(%esp),%xmm1 + +# qhasm: z7_stack = z7 +# asm 1: movdqa z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm4,480(%esp) + +# qhasm: z13_stack = z13 +# asm 1: movdqa z13_stack=stack128#30 +# asm 2: movdqa z13_stack=496(%esp) +movdqa %xmm5,496(%esp) + +# qhasm: z2_stack = z2 +# asm 1: movdqa z2_stack=stack128#31 +# asm 2: movdqa z2_stack=512(%esp) +movdqa %xmm6,512(%esp) + +# qhasm: z9_stack = z9 +# asm 1: movdqa z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm7,528(%esp) + +# qhasm: z0_stack = p +# asm 1: movdqa z0_stack=stack128#33 +# asm 2: movdqa z0_stack=544(%esp) +movdqa %xmm0,544(%esp) + +# qhasm: z12_stack = t +# asm 1: movdqa z12_stack=stack128#34 +# asm 2: movdqa z12_stack=560(%esp) +movdqa %xmm2,560(%esp) + +# qhasm: z4_stack = q +# asm 1: movdqa z4_stack=stack128#35 +# asm 2: movdqa z4_stack=576(%esp) +movdqa %xmm3,576(%esp) + +# qhasm: z8_stack = r +# asm 1: movdqa z8_stack=stack128#36 +# asm 2: movdqa z8_stack=592(%esp) +movdqa %xmm1,592(%esp) + +# qhasm: mainloop1: +._mainloop1: + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z4_stack=stack128#33 +# asm 2: movdqa z4_stack=544(%esp) +movdqa %xmm3,544(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z8_stack=stack128#34 +# asm 2: movdqa z8_stack=560(%esp) +movdqa %xmm1,560(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 464(%esp),%xmm2 + +# qhasm: mp = z5_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 352(%esp),%xmm4 + +# qhasm: mq = z9_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 528(%esp),%xmm3 + +# qhasm: mr = z13_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 496(%esp),%xmm5 + +# qhasm: z12_stack = s +# asm 1: movdqa z12_stack=stack128#30 +# asm 2: movdqa z12_stack=496(%esp) +movdqa %xmm6,496(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z0_stack=stack128#21 +# asm 2: movdqa z0_stack=352(%esp) +movdqa %xmm0,352(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm3,528(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z13_stack=stack128#35 +# asm 2: movdqa z13_stack=576(%esp) +movdqa %xmm5,576(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 432(%esp),%xmm2 + +# qhasm: p = z10_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 368(%esp),%xmm0 + +# qhasm: q = z14_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 400(%esp),%xmm3 + +# qhasm: r = z2_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 512(%esp),%xmm1 + +# qhasm: z1_stack = ms +# asm 1: movdqa z1_stack=stack128#22 +# asm 2: movdqa z1_stack=368(%esp) +movdqa %xmm6,368(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z5_stack=stack128#24 +# asm 2: movdqa z5_stack=400(%esp) +movdqa %xmm4,400(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z14_stack=stack128#36 +# asm 2: movdqa z14_stack=592(%esp) +movdqa %xmm3,592(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z2_stack=stack128#26 +# asm 2: movdqa z2_stack=432(%esp) +movdqa %xmm1,432(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 448(%esp),%xmm2 + +# qhasm: mp = z15_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 384(%esp),%xmm4 + +# qhasm: mq = z3_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 416(%esp),%xmm3 + +# qhasm: mr = z7_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 480(%esp),%xmm5 + +# qhasm: z6_stack = s +# asm 1: movdqa z6_stack=stack128#23 +# asm 2: movdqa z6_stack=384(%esp) +movdqa %xmm6,384(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z10_stack=stack128#27 +# asm 2: movdqa z10_stack=448(%esp) +movdqa %xmm0,448(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm3,416(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm5,480(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 416(%esp),%xmm2 + +# qhasm: p = z0_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 352(%esp),%xmm0 + +# qhasm: q = z1_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 368(%esp),%xmm3 + +# qhasm: r = z2_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 432(%esp),%xmm1 + +# qhasm: z11_stack = ms +# asm 1: movdqa z11_stack=stack128#21 +# asm 2: movdqa z11_stack=352(%esp) +movdqa %xmm6,352(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z15_stack=stack128#22 +# asm 2: movdqa z15_stack=368(%esp) +movdqa %xmm4,368(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z1_stack=stack128#28 +# asm 2: movdqa z1_stack=464(%esp) +movdqa %xmm3,464(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z2_stack=stack128#31 +# asm 2: movdqa z2_stack=512(%esp) +movdqa %xmm1,512(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 544(%esp),%xmm2 + +# qhasm: mp = z5_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 400(%esp),%xmm4 + +# qhasm: mq = z6_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 384(%esp),%xmm3 + +# qhasm: mr = z7_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 480(%esp),%xmm5 + +# qhasm: z3_stack = s +# asm 1: movdqa z3_stack=stack128#25 +# asm 2: movdqa z3_stack=416(%esp) +movdqa %xmm6,416(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z0_stack=stack128#33 +# asm 2: movdqa z0_stack=544(%esp) +movdqa %xmm0,544(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z6_stack=stack128#26 +# asm 2: movdqa z6_stack=432(%esp) +movdqa %xmm3,432(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z7_stack=stack128#29 +# asm 2: movdqa z7_stack=480(%esp) +movdqa %xmm5,480(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 528(%esp),%xmm2 + +# qhasm: p = z10_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 448(%esp),%xmm0 + +# qhasm: q = z11_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 352(%esp),%xmm3 + +# qhasm: r = z8_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 560(%esp),%xmm1 + +# qhasm: z4_stack = ms +# asm 1: movdqa z4_stack=stack128#34 +# asm 2: movdqa z4_stack=560(%esp) +movdqa %xmm6,560(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z5_stack=stack128#21 +# asm 2: movdqa z5_stack=352(%esp) +movdqa %xmm4,352(%esp) + +# qhasm: assign xmm0 to p + +# qhasm: assign xmm1 to r + +# qhasm: assign xmm2 to t + +# qhasm: assign xmm3 to q + +# qhasm: s = t +# asm 1: movdqa s=int6464#7 +# asm 2: movdqa s=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 t += p +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 25 +# asm 1: psrld $25,z11_stack=stack128#27 +# asm 2: movdqa z11_stack=448(%esp) +movdqa %xmm3,448(%esp) + +# qhasm: t = p +# asm 1: movdqa t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa %xmm0,%xmm2 + +# qhasm: uint32323232 t += q +# asm 1: paddd u=int6464#5 +# asm 2: movdqa u=%xmm4 +movdqa %xmm2,%xmm4 + +# qhasm: uint32323232 t >>= 23 +# asm 1: psrld $23,z8_stack=stack128#37 +# asm 2: movdqa z8_stack=608(%esp) +movdqa %xmm1,608(%esp) + +# qhasm: uint32323232 q += r +# asm 1: paddd u=int6464#3 +# asm 2: movdqa u=%xmm2 +movdqa %xmm3,%xmm2 + +# qhasm: uint32323232 q >>= 19 +# asm 1: psrld $19,mt=int6464#3 +# asm 2: movdqa mt=%xmm2 +movdqa 592(%esp),%xmm2 + +# qhasm: mp = z15_stack +# asm 1: movdqa mp=int6464#5 +# asm 2: movdqa mp=%xmm4 +movdqa 368(%esp),%xmm4 + +# qhasm: mq = z12_stack +# asm 1: movdqa mq=int6464#4 +# asm 2: movdqa mq=%xmm3 +movdqa 496(%esp),%xmm3 + +# qhasm: mr = z13_stack +# asm 1: movdqa mr=int6464#6 +# asm 2: movdqa mr=%xmm5 +movdqa 576(%esp),%xmm5 + +# qhasm: z9_stack = s +# asm 1: movdqa z9_stack=stack128#32 +# asm 2: movdqa z9_stack=528(%esp) +movdqa %xmm6,528(%esp) + +# qhasm: uint32323232 r += s +# asm 1: paddd u=int6464#7 +# asm 2: movdqa u=%xmm6 +movdqa %xmm1,%xmm6 + +# qhasm: uint32323232 r >>= 14 +# asm 1: psrld $14,z10_stack=stack128#22 +# asm 2: movdqa z10_stack=368(%esp) +movdqa %xmm0,368(%esp) + +# qhasm: assign xmm2 to mt + +# qhasm: assign xmm3 to mq + +# qhasm: assign xmm4 to mp + +# qhasm: assign xmm5 to mr + +# qhasm: ms = mt +# asm 1: movdqa ms=int6464#7 +# asm 2: movdqa ms=%xmm6 +movdqa %xmm2,%xmm6 + +# qhasm: uint32323232 mt += mp +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm2,%xmm0 + +# qhasm: uint32323232 mt >>= 25 +# asm 1: psrld $25,z12_stack=stack128#35 +# asm 2: movdqa z12_stack=576(%esp) +movdqa %xmm3,576(%esp) + +# qhasm: mt = mp +# asm 1: movdqa mt=int6464#1 +# asm 2: movdqa mt=%xmm0 +movdqa %xmm4,%xmm0 + +# qhasm: uint32323232 mt += mq +# asm 1: paddd mu=int6464#2 +# asm 2: movdqa mu=%xmm1 +movdqa %xmm0,%xmm1 + +# qhasm: uint32323232 mt >>= 23 +# asm 1: psrld $23,z13_stack=stack128#30 +# asm 2: movdqa z13_stack=496(%esp) +movdqa %xmm5,496(%esp) + +# qhasm: uint32323232 mq += mr +# asm 1: paddd mu=int6464#1 +# asm 2: movdqa mu=%xmm0 +movdqa %xmm3,%xmm0 + +# qhasm: uint32323232 mq >>= 19 +# asm 1: psrld $19,t=int6464#3 +# asm 2: movdqa t=%xmm2 +movdqa 576(%esp),%xmm2 + +# qhasm: p = z0_stack +# asm 1: movdqa p=int6464#1 +# asm 2: movdqa p=%xmm0 +movdqa 544(%esp),%xmm0 + +# qhasm: q = z4_stack +# asm 1: movdqa q=int6464#4 +# asm 2: movdqa q=%xmm3 +movdqa 560(%esp),%xmm3 + +# qhasm: r = z8_stack +# asm 1: movdqa r=int6464#2 +# asm 2: movdqa r=%xmm1 +movdqa 608(%esp),%xmm1 + +# qhasm: z14_stack = ms +# asm 1: movdqa z14_stack=stack128#24 +# asm 2: movdqa z14_stack=400(%esp) +movdqa %xmm6,400(%esp) + +# qhasm: uint32323232 mr += ms +# asm 1: paddd mu=int6464#7 +# asm 2: movdqa mu=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 mr >>= 14 +# asm 1: psrld $14,z15_stack=stack128#23 +# asm 2: movdqa z15_stack=384(%esp) +movdqa %xmm4,384(%esp) + +# qhasm: unsigned>? i -= 2 +# asm 1: sub $2, +ja ._mainloop1 + +# qhasm: out = out_stack +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 20(%esp),%edi + +# qhasm: z0 = z0_stack +# asm 1: movdqa z0=int6464#1 +# asm 2: movdqa z0=%xmm0 +movdqa 544(%esp),%xmm0 + +# qhasm: z1 = z1_stack +# asm 1: movdqa z1=int6464#2 +# asm 2: movdqa z1=%xmm1 +movdqa 464(%esp),%xmm1 + +# qhasm: z2 = z2_stack +# asm 1: movdqa z2=int6464#3 +# asm 2: movdqa z2=%xmm2 +movdqa 512(%esp),%xmm2 + +# qhasm: z3 = z3_stack +# asm 1: movdqa z3=int6464#4 +# asm 2: movdqa z3=%xmm3 +movdqa 416(%esp),%xmm3 + +# qhasm: uint32323232 z0 += orig0 +# asm 1: paddd in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: z0 <<<= 96 +# asm 1: pshufd $0x39,in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in1 = z1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = z2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in3 = z3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: in0 ^= *(uint32 *) (m + 192) +# asm 1: xorl 192(z4=int6464#1 +# asm 2: movdqa z4=%xmm0 +movdqa 560(%esp),%xmm0 + +# qhasm: z5 = z5_stack +# asm 1: movdqa z5=int6464#2 +# asm 2: movdqa z5=%xmm1 +movdqa 352(%esp),%xmm1 + +# qhasm: z6 = z6_stack +# asm 1: movdqa z6=int6464#3 +# asm 2: movdqa z6=%xmm2 +movdqa 432(%esp),%xmm2 + +# qhasm: z7 = z7_stack +# asm 1: movdqa z7=int6464#4 +# asm 2: movdqa z7=%xmm3 +movdqa 480(%esp),%xmm3 + +# qhasm: uint32323232 z4 += orig4 +# asm 1: paddd in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: z4 <<<= 96 +# asm 1: pshufd $0x39,in4=int32#1 +# asm 2: movd in4=%eax +movd %xmm0,%eax + +# qhasm: in5 = z5 +# asm 1: movd in5=int32#2 +# asm 2: movd in5=%ecx +movd %xmm1,%ecx + +# qhasm: in6 = z6 +# asm 1: movd in6=int32#3 +# asm 2: movd in6=%edx +movd %xmm2,%edx + +# qhasm: in7 = z7 +# asm 1: movd in7=int32#4 +# asm 2: movd in7=%ebx +movd %xmm3,%ebx + +# qhasm: in4 ^= *(uint32 *) (m + 208) +# asm 1: xorl 208(z8=int6464#1 +# asm 2: movdqa z8=%xmm0 +movdqa 608(%esp),%xmm0 + +# qhasm: z9 = z9_stack +# asm 1: movdqa z9=int6464#2 +# asm 2: movdqa z9=%xmm1 +movdqa 528(%esp),%xmm1 + +# qhasm: z10 = z10_stack +# asm 1: movdqa z10=int6464#3 +# asm 2: movdqa z10=%xmm2 +movdqa 368(%esp),%xmm2 + +# qhasm: z11 = z11_stack +# asm 1: movdqa z11=int6464#4 +# asm 2: movdqa z11=%xmm3 +movdqa 448(%esp),%xmm3 + +# qhasm: uint32323232 z8 += orig8 +# asm 1: paddd in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: z8 <<<= 96 +# asm 1: pshufd $0x39,in8=int32#1 +# asm 2: movd in8=%eax +movd %xmm0,%eax + +# qhasm: in9 = z9 +# asm 1: movd in9=int32#2 +# asm 2: movd in9=%ecx +movd %xmm1,%ecx + +# qhasm: in10 = z10 +# asm 1: movd in10=int32#3 +# asm 2: movd in10=%edx +movd %xmm2,%edx + +# qhasm: in11 = z11 +# asm 1: movd in11=int32#4 +# asm 2: movd in11=%ebx +movd %xmm3,%ebx + +# qhasm: in8 ^= *(uint32 *) (m + 224) +# asm 1: xorl 224(z12=int6464#1 +# asm 2: movdqa z12=%xmm0 +movdqa 576(%esp),%xmm0 + +# qhasm: z13 = z13_stack +# asm 1: movdqa z13=int6464#2 +# asm 2: movdqa z13=%xmm1 +movdqa 496(%esp),%xmm1 + +# qhasm: z14 = z14_stack +# asm 1: movdqa z14=int6464#3 +# asm 2: movdqa z14=%xmm2 +movdqa 400(%esp),%xmm2 + +# qhasm: z15 = z15_stack +# asm 1: movdqa z15=int6464#4 +# asm 2: movdqa z15=%xmm3 +movdqa 384(%esp),%xmm3 + +# qhasm: uint32323232 z12 += orig12 +# asm 1: paddd in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: z12 <<<= 96 +# asm 1: pshufd $0x39,in12=int32#1 +# asm 2: movd in12=%eax +movd %xmm0,%eax + +# qhasm: in13 = z13 +# asm 1: movd in13=int32#2 +# asm 2: movd in13=%ecx +movd %xmm1,%ecx + +# qhasm: in14 = z14 +# asm 1: movd in14=int32#3 +# asm 2: movd in14=%edx +movd %xmm2,%edx + +# qhasm: in15 = z15 +# asm 1: movd in15=int32#4 +# asm 2: movd in15=%ebx +movd %xmm3,%ebx + +# qhasm: in12 ^= *(uint32 *) (m + 240) +# asm 1: xorl 240(bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: bytes -= 256 +# asm 1: sub $256,out_stack=stack32#6 +# asm 2: movl out_stack=20(%esp) +movl %edi,20(%esp) + +# qhasm: unsigned? bytes - 0 +# asm 1: cmp $0, +jbe ._done +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesbetween1and255: +._bytesbetween1and255: + +# qhasm: unsignedctarget=stack32#6 +# asm 2: movl ctarget=20(%esp) +movl %edi,20(%esp) + +# qhasm: out = &tmp +# asm 1: leal out=int32#6 +# asm 2: leal out=%edi +leal 640(%esp),%edi + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %eax,%ecx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb + +# qhasm: out = &tmp +# asm 1: leal out=int32#6 +# asm 2: leal out=%edi +leal 640(%esp),%edi + +# qhasm: m = &tmp +# asm 1: leal m=int32#5 +# asm 2: leal m=%esi +leal 640(%esp),%esi +# comment:fp stack unchanged by fallthrough + +# qhasm: nocopy: +._nocopy: + +# qhasm: bytes_stack = bytes +# asm 1: movl bytes_stack=stack32#7 +# asm 2: movl bytes_stack=24(%esp) +movl %eax,24(%esp) + +# qhasm: diag0 = x0 +# asm 1: movdqa diag0=int6464#1 +# asm 2: movdqa diag0=%xmm0 +movdqa 64(%esp),%xmm0 + +# qhasm: diag1 = x1 +# asm 1: movdqa diag1=int6464#2 +# asm 2: movdqa diag1=%xmm1 +movdqa 48(%esp),%xmm1 + +# qhasm: diag2 = x2 +# asm 1: movdqa diag2=int6464#3 +# asm 2: movdqa diag2=%xmm2 +movdqa 80(%esp),%xmm2 + +# qhasm: diag3 = x3 +# asm 1: movdqa diag3=int6464#4 +# asm 2: movdqa diag3=%xmm3 +movdqa 32(%esp),%xmm3 + +# qhasm: a0 = diag1 +# asm 1: movdqa a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: i = 12 +# asm 1: mov $12,>i=int32#1 +# asm 2: mov $12,>i=%eax +mov $12,%eax + +# qhasm: mainloop2: +._mainloop2: + +# qhasm: uint32323232 a0 += diag0 +# asm 1: paddd a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a1=int6464#6 +# asm 2: movdqa a1=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b0 = a0 +# asm 1: movdqa b0=int6464#7 +# asm 2: movdqa b0=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a0 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a2=int6464#5 +# asm 2: movdqa a2=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b1 = a1 +# asm 1: movdqa b1=int6464#7 +# asm 2: movdqa b1=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a1 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a3=int6464#6 +# asm 2: movdqa a3=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b2 = a2 +# asm 1: movdqa b2=int6464#7 +# asm 2: movdqa b2=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a2 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,a4=int6464#5 +# asm 2: movdqa a4=%xmm4 +movdqa %xmm3,%xmm4 + +# qhasm: b3 = a3 +# asm 1: movdqa b3=int6464#7 +# asm 2: movdqa b3=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a3 <<= 18 +# asm 1: pslld $18,>= 14 +# asm 1: psrld $14,a5=int6464#6 +# asm 2: movdqa a5=%xmm5 +movdqa %xmm0,%xmm5 + +# qhasm: b4 = a4 +# asm 1: movdqa b4=int6464#7 +# asm 2: movdqa b4=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a4 <<= 7 +# asm 1: pslld $7,>= 25 +# asm 1: psrld $25,a6=int6464#5 +# asm 2: movdqa a6=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b5 = a5 +# asm 1: movdqa b5=int6464#7 +# asm 2: movdqa b5=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a5 <<= 9 +# asm 1: pslld $9,>= 23 +# asm 1: psrld $23,a7=int6464#6 +# asm 2: movdqa a7=%xmm5 +movdqa %xmm2,%xmm5 + +# qhasm: b6 = a6 +# asm 1: movdqa b6=int6464#7 +# asm 2: movdqa b6=%xmm6 +movdqa %xmm4,%xmm6 + +# qhasm: uint32323232 a6 <<= 13 +# asm 1: pslld $13,>= 19 +# asm 1: psrld $19,? i -= 4 +# asm 1: sub $4,a0=int6464#5 +# asm 2: movdqa a0=%xmm4 +movdqa %xmm1,%xmm4 + +# qhasm: b7 = a7 +# asm 1: movdqa b7=int6464#7 +# asm 2: movdqa b7=%xmm6 +movdqa %xmm5,%xmm6 + +# qhasm: uint32323232 a7 <<= 18 +# asm 1: pslld $18,b0=int6464#8,>b0=int6464#8 +# asm 2: pxor >b0=%xmm7,>b0=%xmm7 +pxor %xmm7,%xmm7 + +# qhasm: uint32323232 b7 >>= 14 +# asm 1: psrld $14, +ja ._mainloop2 + +# qhasm: uint32323232 diag0 += x0 +# asm 1: paddd in0=int32#1 +# asm 2: movd in0=%eax +movd %xmm0,%eax + +# qhasm: in12 = diag1 +# asm 1: movd in12=int32#2 +# asm 2: movd in12=%ecx +movd %xmm1,%ecx + +# qhasm: in8 = diag2 +# asm 1: movd in8=int32#3 +# asm 2: movd in8=%edx +movd %xmm2,%edx + +# qhasm: in4 = diag3 +# asm 1: movd in4=int32#4 +# asm 2: movd in4=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in5=int32#1 +# asm 2: movd in5=%eax +movd %xmm0,%eax + +# qhasm: in1 = diag1 +# asm 1: movd in1=int32#2 +# asm 2: movd in1=%ecx +movd %xmm1,%ecx + +# qhasm: in13 = diag2 +# asm 1: movd in13=int32#3 +# asm 2: movd in13=%edx +movd %xmm2,%edx + +# qhasm: in9 = diag3 +# asm 1: movd in9=int32#4 +# asm 2: movd in9=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in10=int32#1 +# asm 2: movd in10=%eax +movd %xmm0,%eax + +# qhasm: in6 = diag1 +# asm 1: movd in6=int32#2 +# asm 2: movd in6=%ecx +movd %xmm1,%ecx + +# qhasm: in2 = diag2 +# asm 1: movd in2=int32#3 +# asm 2: movd in2=%edx +movd %xmm2,%edx + +# qhasm: in14 = diag3 +# asm 1: movd in14=int32#4 +# asm 2: movd in14=%ebx +movd %xmm3,%ebx + +# qhasm: diag0 <<<= 96 +# asm 1: pshufd $0x39,in15=int32#1 +# asm 2: movd in15=%eax +movd %xmm0,%eax + +# qhasm: in11 = diag1 +# asm 1: movd in11=int32#2 +# asm 2: movd in11=%ecx +movd %xmm1,%ecx + +# qhasm: in7 = diag2 +# asm 1: movd in7=int32#3 +# asm 2: movd in7=%edx +movd %xmm2,%edx + +# qhasm: in3 = diag3 +# asm 1: movd in3=int32#4 +# asm 2: movd in3=%ebx +movd %xmm3,%ebx + +# qhasm: in15 ^= *(uint32 *) (m + 60) +# asm 1: xorl 60(bytes=int32#1 +# asm 2: movl bytes=%eax +movl 24(%esp),%eax + +# qhasm: in8 = ((uint32 *)&x2)[0] +# asm 1: movl in8=int32#2 +# asm 2: movl in8=%ecx +movl 80(%esp),%ecx + +# qhasm: in9 = ((uint32 *)&x3)[1] +# asm 1: movl 4+in9=int32#3 +# asm 2: movl 4+in9=%edx +movl 4+32(%esp),%edx + +# qhasm: carry? in8 += 1 +# asm 1: add $1,x2=stack128#4 +# asm 2: movl x2=80(%esp) +movl %ecx,80(%esp) + +# qhasm: ((uint32 *)&x3)[1] = in9 +# asm 1: movl ? unsigned +ja ._bytesatleast65 +# comment:fp stack unchanged by jump + +# qhasm: goto bytesatleast64 if !unsigned< +jae ._bytesatleast64 + +# qhasm: m = out +# asm 1: mov m=int32#5 +# asm 2: mov m=%esi +mov %edi,%esi + +# qhasm: out = ctarget +# asm 1: movl out=int32#6 +# asm 2: movl out=%edi +movl 20(%esp),%edi + +# qhasm: i = bytes +# asm 1: mov i=int32#2 +# asm 2: mov i=%ecx +mov %eax,%ecx + +# qhasm: while (i) { *out++ = *m++; --i } +rep movsb +# comment:fp stack unchanged by fallthrough + +# qhasm: bytesatleast64: +._bytesatleast64: +# comment:fp stack unchanged by fallthrough + +# qhasm: done: +._done: + +# qhasm: eax = eax_stack +# asm 1: movl eax=int32#1 +# asm 2: movl eax=%eax +movl 0(%esp),%eax + +# qhasm: ebx = ebx_stack +# asm 1: movl ebx=int32#4 +# asm 2: movl ebx=%ebx +movl 4(%esp),%ebx + +# qhasm: esi = esi_stack +# asm 1: movl esi=int32#5 +# asm 2: movl esi=%esi +movl 8(%esp),%esi + +# qhasm: edi = edi_stack +# asm 1: movl edi=int32#6 +# asm 2: movl edi=%edi +movl 12(%esp),%edi + +# qhasm: ebp = ebp_stack +# asm 1: movl ebp=int32#7 +# asm 2: movl ebp=%ebp +movl 16(%esp),%ebp + +# qhasm: leave +add %eax,%esp +xor %eax,%eax +ret + +# qhasm: bytesatleast65: +._bytesatleast65: + +# qhasm: bytes -= 64 +# asm 1: sub $64, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + The assembly implementations were written by D. J. Bernstein and are + Public Domain. For more information see http://cr.yp.to/snuffle.html +*/ + +#include "../../../../crypto.h" +#include "../../../../cpuid.h" + + +#define KEYBYTES 32 + + +#ifdef __x86_64__ +#define crypto_stream_salsa2012_xor crypto_stream_salsa2012_amd64_xmm6_xor +#endif + +#ifdef __i386__ +#define crypto_stream_salsa2012_xor crypto_stream_salsa2012_x86_xmm5_xor +#endif + + +int crypto_stream_salsa2012_xor(unsigned char *c, const unsigned char *m, unsigned long long mlen, const unsigned char *n, const unsigned char *k); + + +struct fastd_cipher_state { + uint8_t key[KEYBYTES]; +}; + + +static bool salsa2012_available(void) { + return fastd_cpuid() & CPUID_SSE2; +} + +static fastd_cipher_state_t* salsa2012_init(const uint8_t *key) { + fastd_cipher_state_t *state = malloc(sizeof(fastd_cipher_state_t)); + memcpy(state->key, key, KEYBYTES); + + return state; +} + +static bool salsa2012_crypt(const fastd_cipher_state_t *state, fastd_block128_t *out, const fastd_block128_t *in, size_t len, const uint8_t *iv) { + crypto_stream_salsa2012_xor(out->b, in->b, len, iv, state->key); + return true; +} + +static void salsa2012_free(fastd_cipher_state_t *state) { + if (state) { + secure_memzero(state, sizeof(*state)); + free(state); + } +} + +const fastd_cipher_t fastd_cipher_salsa2012_xmm = { + .available = salsa2012_available, + + .init = salsa2012_init, + .crypt = salsa2012_crypt, + .free = salsa2012_free, +}; -- cgit v1.2.3