aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/crypto/salsa20
diff options
context:
space:
mode:
authorDaniel J Walsh <dwalsh@redhat.com>2022-07-11 10:03:44 -0400
committerDaniel J Walsh <dwalsh@redhat.com>2022-07-18 10:42:04 -0400
commitf67ab1eb20ae357fd004815ec25c5350e5813a46 (patch)
treee25b2cf83e53263f9f7967e5ba5d3a20de4da7e0 /vendor/golang.org/x/crypto/salsa20
parent5f848d89edef76adff6d203859803be9b791d258 (diff)
downloadpodman-f67ab1eb20ae357fd004815ec25c5350e5813a46.tar.gz
podman-f67ab1eb20ae357fd004815ec25c5350e5813a46.tar.bz2
podman-f67ab1eb20ae357fd004815ec25c5350e5813a46.zip
Vendor in containers/(storage,image, common, buildah)
Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
Diffstat (limited to 'vendor/golang.org/x/crypto/salsa20')
-rw-r--r--vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go144
-rw-r--r--vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go199
-rw-r--r--vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go24
-rw-r--r--vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s881
-rw-r--r--vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go15
-rw-r--r--vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go231
6 files changed, 1494 insertions, 0 deletions
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go
new file mode 100644
index 000000000..4c96147c8
--- /dev/null
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go
@@ -0,0 +1,144 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package salsa provides low-level access to functions in the Salsa family.
+package salsa // import "golang.org/x/crypto/salsa20/salsa"
+
+// Sigma is the Salsa20 constant for 256-bit keys.
+var Sigma = [16]byte{'e', 'x', 'p', 'a', 'n', 'd', ' ', '3', '2', '-', 'b', 'y', 't', 'e', ' ', 'k'}
+
+// HSalsa20 applies the HSalsa20 core function to a 16-byte input in, 32-byte
+// key k, and 16-byte constant c, and puts the result into the 32-byte array
+// out.
+func HSalsa20(out *[32]byte, in *[16]byte, k *[32]byte, c *[16]byte) {
+ x0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24
+ x1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24
+ x2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24
+ x3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24
+ x4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24
+ x5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24
+ x6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
+ x7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
+ x8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
+ x9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
+ x10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24
+ x11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24
+ x12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24
+ x13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24
+ x14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24
+ x15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24
+
+ for i := 0; i < 20; i += 2 {
+ u := x0 + x12
+ x4 ^= u<<7 | u>>(32-7)
+ u = x4 + x0
+ x8 ^= u<<9 | u>>(32-9)
+ u = x8 + x4
+ x12 ^= u<<13 | u>>(32-13)
+ u = x12 + x8
+ x0 ^= u<<18 | u>>(32-18)
+
+ u = x5 + x1
+ x9 ^= u<<7 | u>>(32-7)
+ u = x9 + x5
+ x13 ^= u<<9 | u>>(32-9)
+ u = x13 + x9
+ x1 ^= u<<13 | u>>(32-13)
+ u = x1 + x13
+ x5 ^= u<<18 | u>>(32-18)
+
+ u = x10 + x6
+ x14 ^= u<<7 | u>>(32-7)
+ u = x14 + x10
+ x2 ^= u<<9 | u>>(32-9)
+ u = x2 + x14
+ x6 ^= u<<13 | u>>(32-13)
+ u = x6 + x2
+ x10 ^= u<<18 | u>>(32-18)
+
+ u = x15 + x11
+ x3 ^= u<<7 | u>>(32-7)
+ u = x3 + x15
+ x7 ^= u<<9 | u>>(32-9)
+ u = x7 + x3
+ x11 ^= u<<13 | u>>(32-13)
+ u = x11 + x7
+ x15 ^= u<<18 | u>>(32-18)
+
+ u = x0 + x3
+ x1 ^= u<<7 | u>>(32-7)
+ u = x1 + x0
+ x2 ^= u<<9 | u>>(32-9)
+ u = x2 + x1
+ x3 ^= u<<13 | u>>(32-13)
+ u = x3 + x2
+ x0 ^= u<<18 | u>>(32-18)
+
+ u = x5 + x4
+ x6 ^= u<<7 | u>>(32-7)
+ u = x6 + x5
+ x7 ^= u<<9 | u>>(32-9)
+ u = x7 + x6
+ x4 ^= u<<13 | u>>(32-13)
+ u = x4 + x7
+ x5 ^= u<<18 | u>>(32-18)
+
+ u = x10 + x9
+ x11 ^= u<<7 | u>>(32-7)
+ u = x11 + x10
+ x8 ^= u<<9 | u>>(32-9)
+ u = x8 + x11
+ x9 ^= u<<13 | u>>(32-13)
+ u = x9 + x8
+ x10 ^= u<<18 | u>>(32-18)
+
+ u = x15 + x14
+ x12 ^= u<<7 | u>>(32-7)
+ u = x12 + x15
+ x13 ^= u<<9 | u>>(32-9)
+ u = x13 + x12
+ x14 ^= u<<13 | u>>(32-13)
+ u = x14 + x13
+ x15 ^= u<<18 | u>>(32-18)
+ }
+ out[0] = byte(x0)
+ out[1] = byte(x0 >> 8)
+ out[2] = byte(x0 >> 16)
+ out[3] = byte(x0 >> 24)
+
+ out[4] = byte(x5)
+ out[5] = byte(x5 >> 8)
+ out[6] = byte(x5 >> 16)
+ out[7] = byte(x5 >> 24)
+
+ out[8] = byte(x10)
+ out[9] = byte(x10 >> 8)
+ out[10] = byte(x10 >> 16)
+ out[11] = byte(x10 >> 24)
+
+ out[12] = byte(x15)
+ out[13] = byte(x15 >> 8)
+ out[14] = byte(x15 >> 16)
+ out[15] = byte(x15 >> 24)
+
+ out[16] = byte(x6)
+ out[17] = byte(x6 >> 8)
+ out[18] = byte(x6 >> 16)
+ out[19] = byte(x6 >> 24)
+
+ out[20] = byte(x7)
+ out[21] = byte(x7 >> 8)
+ out[22] = byte(x7 >> 16)
+ out[23] = byte(x7 >> 24)
+
+ out[24] = byte(x8)
+ out[25] = byte(x8 >> 8)
+ out[26] = byte(x8 >> 16)
+ out[27] = byte(x8 >> 24)
+
+ out[28] = byte(x9)
+ out[29] = byte(x9 >> 8)
+ out[30] = byte(x9 >> 16)
+ out[31] = byte(x9 >> 24)
+}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go
new file mode 100644
index 000000000..9bfc0927c
--- /dev/null
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa208.go
@@ -0,0 +1,199 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package salsa
+
+// Core208 applies the Salsa20/8 core function to the 64-byte array in and puts
+// the result into the 64-byte array out. The input and output may be the same array.
+func Core208(out *[64]byte, in *[64]byte) {
+ j0 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
+ j1 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
+ j2 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
+ j3 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
+ j4 := uint32(in[16]) | uint32(in[17])<<8 | uint32(in[18])<<16 | uint32(in[19])<<24
+ j5 := uint32(in[20]) | uint32(in[21])<<8 | uint32(in[22])<<16 | uint32(in[23])<<24
+ j6 := uint32(in[24]) | uint32(in[25])<<8 | uint32(in[26])<<16 | uint32(in[27])<<24
+ j7 := uint32(in[28]) | uint32(in[29])<<8 | uint32(in[30])<<16 | uint32(in[31])<<24
+ j8 := uint32(in[32]) | uint32(in[33])<<8 | uint32(in[34])<<16 | uint32(in[35])<<24
+ j9 := uint32(in[36]) | uint32(in[37])<<8 | uint32(in[38])<<16 | uint32(in[39])<<24
+ j10 := uint32(in[40]) | uint32(in[41])<<8 | uint32(in[42])<<16 | uint32(in[43])<<24
+ j11 := uint32(in[44]) | uint32(in[45])<<8 | uint32(in[46])<<16 | uint32(in[47])<<24
+ j12 := uint32(in[48]) | uint32(in[49])<<8 | uint32(in[50])<<16 | uint32(in[51])<<24
+ j13 := uint32(in[52]) | uint32(in[53])<<8 | uint32(in[54])<<16 | uint32(in[55])<<24
+ j14 := uint32(in[56]) | uint32(in[57])<<8 | uint32(in[58])<<16 | uint32(in[59])<<24
+ j15 := uint32(in[60]) | uint32(in[61])<<8 | uint32(in[62])<<16 | uint32(in[63])<<24
+
+ x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8
+ x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15
+
+ for i := 0; i < 8; i += 2 {
+ u := x0 + x12
+ x4 ^= u<<7 | u>>(32-7)
+ u = x4 + x0
+ x8 ^= u<<9 | u>>(32-9)
+ u = x8 + x4
+ x12 ^= u<<13 | u>>(32-13)
+ u = x12 + x8
+ x0 ^= u<<18 | u>>(32-18)
+
+ u = x5 + x1
+ x9 ^= u<<7 | u>>(32-7)
+ u = x9 + x5
+ x13 ^= u<<9 | u>>(32-9)
+ u = x13 + x9
+ x1 ^= u<<13 | u>>(32-13)
+ u = x1 + x13
+ x5 ^= u<<18 | u>>(32-18)
+
+ u = x10 + x6
+ x14 ^= u<<7 | u>>(32-7)
+ u = x14 + x10
+ x2 ^= u<<9 | u>>(32-9)
+ u = x2 + x14
+ x6 ^= u<<13 | u>>(32-13)
+ u = x6 + x2
+ x10 ^= u<<18 | u>>(32-18)
+
+ u = x15 + x11
+ x3 ^= u<<7 | u>>(32-7)
+ u = x3 + x15
+ x7 ^= u<<9 | u>>(32-9)
+ u = x7 + x3
+ x11 ^= u<<13 | u>>(32-13)
+ u = x11 + x7
+ x15 ^= u<<18 | u>>(32-18)
+
+ u = x0 + x3
+ x1 ^= u<<7 | u>>(32-7)
+ u = x1 + x0
+ x2 ^= u<<9 | u>>(32-9)
+ u = x2 + x1
+ x3 ^= u<<13 | u>>(32-13)
+ u = x3 + x2
+ x0 ^= u<<18 | u>>(32-18)
+
+ u = x5 + x4
+ x6 ^= u<<7 | u>>(32-7)
+ u = x6 + x5
+ x7 ^= u<<9 | u>>(32-9)
+ u = x7 + x6
+ x4 ^= u<<13 | u>>(32-13)
+ u = x4 + x7
+ x5 ^= u<<18 | u>>(32-18)
+
+ u = x10 + x9
+ x11 ^= u<<7 | u>>(32-7)
+ u = x11 + x10
+ x8 ^= u<<9 | u>>(32-9)
+ u = x8 + x11
+ x9 ^= u<<13 | u>>(32-13)
+ u = x9 + x8
+ x10 ^= u<<18 | u>>(32-18)
+
+ u = x15 + x14
+ x12 ^= u<<7 | u>>(32-7)
+ u = x12 + x15
+ x13 ^= u<<9 | u>>(32-9)
+ u = x13 + x12
+ x14 ^= u<<13 | u>>(32-13)
+ u = x14 + x13
+ x15 ^= u<<18 | u>>(32-18)
+ }
+ x0 += j0
+ x1 += j1
+ x2 += j2
+ x3 += j3
+ x4 += j4
+ x5 += j5
+ x6 += j6
+ x7 += j7
+ x8 += j8
+ x9 += j9
+ x10 += j10
+ x11 += j11
+ x12 += j12
+ x13 += j13
+ x14 += j14
+ x15 += j15
+
+ out[0] = byte(x0)
+ out[1] = byte(x0 >> 8)
+ out[2] = byte(x0 >> 16)
+ out[3] = byte(x0 >> 24)
+
+ out[4] = byte(x1)
+ out[5] = byte(x1 >> 8)
+ out[6] = byte(x1 >> 16)
+ out[7] = byte(x1 >> 24)
+
+ out[8] = byte(x2)
+ out[9] = byte(x2 >> 8)
+ out[10] = byte(x2 >> 16)
+ out[11] = byte(x2 >> 24)
+
+ out[12] = byte(x3)
+ out[13] = byte(x3 >> 8)
+ out[14] = byte(x3 >> 16)
+ out[15] = byte(x3 >> 24)
+
+ out[16] = byte(x4)
+ out[17] = byte(x4 >> 8)
+ out[18] = byte(x4 >> 16)
+ out[19] = byte(x4 >> 24)
+
+ out[20] = byte(x5)
+ out[21] = byte(x5 >> 8)
+ out[22] = byte(x5 >> 16)
+ out[23] = byte(x5 >> 24)
+
+ out[24] = byte(x6)
+ out[25] = byte(x6 >> 8)
+ out[26] = byte(x6 >> 16)
+ out[27] = byte(x6 >> 24)
+
+ out[28] = byte(x7)
+ out[29] = byte(x7 >> 8)
+ out[30] = byte(x7 >> 16)
+ out[31] = byte(x7 >> 24)
+
+ out[32] = byte(x8)
+ out[33] = byte(x8 >> 8)
+ out[34] = byte(x8 >> 16)
+ out[35] = byte(x8 >> 24)
+
+ out[36] = byte(x9)
+ out[37] = byte(x9 >> 8)
+ out[38] = byte(x9 >> 16)
+ out[39] = byte(x9 >> 24)
+
+ out[40] = byte(x10)
+ out[41] = byte(x10 >> 8)
+ out[42] = byte(x10 >> 16)
+ out[43] = byte(x10 >> 24)
+
+ out[44] = byte(x11)
+ out[45] = byte(x11 >> 8)
+ out[46] = byte(x11 >> 16)
+ out[47] = byte(x11 >> 24)
+
+ out[48] = byte(x12)
+ out[49] = byte(x12 >> 8)
+ out[50] = byte(x12 >> 16)
+ out[51] = byte(x12 >> 24)
+
+ out[52] = byte(x13)
+ out[53] = byte(x13 >> 8)
+ out[54] = byte(x13 >> 16)
+ out[55] = byte(x13 >> 24)
+
+ out[56] = byte(x14)
+ out[57] = byte(x14 >> 8)
+ out[58] = byte(x14 >> 16)
+ out[59] = byte(x14 >> 24)
+
+ out[60] = byte(x15)
+ out[61] = byte(x15 >> 8)
+ out[62] = byte(x15 >> 16)
+ out[63] = byte(x15 >> 24)
+}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go
new file mode 100644
index 000000000..c400dfcf7
--- /dev/null
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go
@@ -0,0 +1,24 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 && !purego && gc
+// +build amd64,!purego,gc
+
+package salsa
+
+//go:noescape
+
+// salsa2020XORKeyStream is implemented in salsa20_amd64.s.
+func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
+
+// XORKeyStream crypts bytes from in to out using the given key and counters.
+// In and out must overlap entirely or not at all. Counter
+// contains the raw salsa20 counter bytes (both nonce and block counter).
+func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
+ if len(in) == 0 {
+ return
+ }
+ _ = out[len(in)-1]
+ salsa2020XORKeyStream(&out[0], &in[0], uint64(len(in)), &counter[0], &key[0])
+}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s
new file mode 100644
index 000000000..c08927720
--- /dev/null
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s
@@ -0,0 +1,881 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build amd64 && !purego && gc
+// +build amd64,!purego,gc
+
+// This code was translated into a form compatible with 6a from the public
+// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
+
+// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
+// This needs up to 64 bytes at 360(R12); hence the non-obvious frame size.
+TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
+ MOVQ out+0(FP),DI
+ MOVQ in+8(FP),SI
+ MOVQ n+16(FP),DX
+ MOVQ nonce+24(FP),CX
+ MOVQ key+32(FP),R8
+
+ MOVQ SP,R12
+ ADDQ $31, R12
+ ANDQ $~31, R12
+
+ MOVQ DX,R9
+ MOVQ CX,DX
+ MOVQ R8,R10
+ CMPQ R9,$0
+ JBE DONE
+ START:
+ MOVL 20(R10),CX
+ MOVL 0(R10),R8
+ MOVL 0(DX),AX
+ MOVL 16(R10),R11
+ MOVL CX,0(R12)
+ MOVL R8, 4 (R12)
+ MOVL AX, 8 (R12)
+ MOVL R11, 12 (R12)
+ MOVL 8(DX),CX
+ MOVL 24(R10),R8
+ MOVL 4(R10),AX
+ MOVL 4(DX),R11
+ MOVL CX,16(R12)
+ MOVL R8, 20 (R12)
+ MOVL AX, 24 (R12)
+ MOVL R11, 28 (R12)
+ MOVL 12(DX),CX
+ MOVL 12(R10),DX
+ MOVL 28(R10),R8
+ MOVL 8(R10),AX
+ MOVL DX,32(R12)
+ MOVL CX, 36 (R12)
+ MOVL R8, 40 (R12)
+ MOVL AX, 44 (R12)
+ MOVQ $1634760805,DX
+ MOVQ $857760878,CX
+ MOVQ $2036477234,R8
+ MOVQ $1797285236,AX
+ MOVL DX,48(R12)
+ MOVL CX, 52 (R12)
+ MOVL R8, 56 (R12)
+ MOVL AX, 60 (R12)
+ CMPQ R9,$256
+ JB BYTESBETWEEN1AND255
+ MOVOA 48(R12),X0
+ PSHUFL $0X55,X0,X1
+ PSHUFL $0XAA,X0,X2
+ PSHUFL $0XFF,X0,X3
+ PSHUFL $0X00,X0,X0
+ MOVOA X1,64(R12)
+ MOVOA X2,80(R12)
+ MOVOA X3,96(R12)
+ MOVOA X0,112(R12)
+ MOVOA 0(R12),X0
+ PSHUFL $0XAA,X0,X1
+ PSHUFL $0XFF,X0,X2
+ PSHUFL $0X00,X0,X3
+ PSHUFL $0X55,X0,X0
+ MOVOA X1,128(R12)
+ MOVOA X2,144(R12)
+ MOVOA X3,160(R12)
+ MOVOA X0,176(R12)
+ MOVOA 16(R12),X0
+ PSHUFL $0XFF,X0,X1
+ PSHUFL $0X55,X0,X2
+ PSHUFL $0XAA,X0,X0
+ MOVOA X1,192(R12)
+ MOVOA X2,208(R12)
+ MOVOA X0,224(R12)
+ MOVOA 32(R12),X0
+ PSHUFL $0X00,X0,X1
+ PSHUFL $0XAA,X0,X2
+ PSHUFL $0XFF,X0,X0
+ MOVOA X1,240(R12)
+ MOVOA X2,256(R12)
+ MOVOA X0,272(R12)
+ BYTESATLEAST256:
+ MOVL 16(R12),DX
+ MOVL 36 (R12),CX
+ MOVL DX,288(R12)
+ MOVL CX,304(R12)
+ SHLQ $32,CX
+ ADDQ CX,DX
+ ADDQ $1,DX
+ MOVQ DX,CX
+ SHRQ $32,CX
+ MOVL DX, 292 (R12)
+ MOVL CX, 308 (R12)
+ ADDQ $1,DX
+ MOVQ DX,CX
+ SHRQ $32,CX
+ MOVL DX, 296 (R12)
+ MOVL CX, 312 (R12)
+ ADDQ $1,DX
+ MOVQ DX,CX
+ SHRQ $32,CX
+ MOVL DX, 300 (R12)
+ MOVL CX, 316 (R12)
+ ADDQ $1,DX
+ MOVQ DX,CX
+ SHRQ $32,CX
+ MOVL DX,16(R12)
+ MOVL CX, 36 (R12)
+ MOVQ R9,352(R12)
+ MOVQ $20,DX
+ MOVOA 64(R12),X0
+ MOVOA 80(R12),X1
+ MOVOA 96(R12),X2
+ MOVOA 256(R12),X3
+ MOVOA 272(R12),X4
+ MOVOA 128(R12),X5
+ MOVOA 144(R12),X6
+ MOVOA 176(R12),X7
+ MOVOA 192(R12),X8
+ MOVOA 208(R12),X9
+ MOVOA 224(R12),X10
+ MOVOA 304(R12),X11
+ MOVOA 112(R12),X12
+ MOVOA 160(R12),X13
+ MOVOA 240(R12),X14
+ MOVOA 288(R12),X15
+ MAINLOOP1:
+ MOVOA X1,320(R12)
+ MOVOA X2,336(R12)
+ MOVOA X13,X1
+ PADDL X12,X1
+ MOVOA X1,X2
+ PSLLL $7,X1
+ PXOR X1,X14
+ PSRLL $25,X2
+ PXOR X2,X14
+ MOVOA X7,X1
+ PADDL X0,X1
+ MOVOA X1,X2
+ PSLLL $7,X1
+ PXOR X1,X11
+ PSRLL $25,X2
+ PXOR X2,X11
+ MOVOA X12,X1
+ PADDL X14,X1
+ MOVOA X1,X2
+ PSLLL $9,X1
+ PXOR X1,X15
+ PSRLL $23,X2
+ PXOR X2,X15
+ MOVOA X0,X1
+ PADDL X11,X1
+ MOVOA X1,X2
+ PSLLL $9,X1
+ PXOR X1,X9
+ PSRLL $23,X2
+ PXOR X2,X9
+ MOVOA X14,X1
+ PADDL X15,X1
+ MOVOA X1,X2
+ PSLLL $13,X1
+ PXOR X1,X13
+ PSRLL $19,X2
+ PXOR X2,X13
+ MOVOA X11,X1
+ PADDL X9,X1
+ MOVOA X1,X2
+ PSLLL $13,X1
+ PXOR X1,X7
+ PSRLL $19,X2
+ PXOR X2,X7
+ MOVOA X15,X1
+ PADDL X13,X1
+ MOVOA X1,X2
+ PSLLL $18,X1
+ PXOR X1,X12
+ PSRLL $14,X2
+ PXOR X2,X12
+ MOVOA 320(R12),X1
+ MOVOA X12,320(R12)
+ MOVOA X9,X2
+ PADDL X7,X2
+ MOVOA X2,X12
+ PSLLL $18,X2
+ PXOR X2,X0
+ PSRLL $14,X12
+ PXOR X12,X0
+ MOVOA X5,X2
+ PADDL X1,X2
+ MOVOA X2,X12
+ PSLLL $7,X2
+ PXOR X2,X3
+ PSRLL $25,X12
+ PXOR X12,X3
+ MOVOA 336(R12),X2
+ MOVOA X0,336(R12)
+ MOVOA X6,X0
+ PADDL X2,X0
+ MOVOA X0,X12
+ PSLLL $7,X0
+ PXOR X0,X4
+ PSRLL $25,X12
+ PXOR X12,X4
+ MOVOA X1,X0
+ PADDL X3,X0
+ MOVOA X0,X12
+ PSLLL $9,X0
+ PXOR X0,X10
+ PSRLL $23,X12
+ PXOR X12,X10
+ MOVOA X2,X0
+ PADDL X4,X0
+ MOVOA X0,X12
+ PSLLL $9,X0
+ PXOR X0,X8
+ PSRLL $23,X12
+ PXOR X12,X8
+ MOVOA X3,X0
+ PADDL X10,X0
+ MOVOA X0,X12
+ PSLLL $13,X0
+ PXOR X0,X5
+ PSRLL $19,X12
+ PXOR X12,X5
+ MOVOA X4,X0
+ PADDL X8,X0
+ MOVOA X0,X12
+ PSLLL $13,X0
+ PXOR X0,X6
+ PSRLL $19,X12
+ PXOR X12,X6
+ MOVOA X10,X0
+ PADDL X5,X0
+ MOVOA X0,X12
+ PSLLL $18,X0
+ PXOR X0,X1
+ PSRLL $14,X12
+ PXOR X12,X1
+ MOVOA 320(R12),X0
+ MOVOA X1,320(R12)
+ MOVOA X4,X1
+ PADDL X0,X1
+ MOVOA X1,X12
+ PSLLL $7,X1
+ PXOR X1,X7
+ PSRLL $25,X12
+ PXOR X12,X7
+ MOVOA X8,X1
+ PADDL X6,X1
+ MOVOA X1,X12
+ PSLLL $18,X1
+ PXOR X1,X2
+ PSRLL $14,X12
+ PXOR X12,X2
+ MOVOA 336(R12),X12
+ MOVOA X2,336(R12)
+ MOVOA X14,X1
+ PADDL X12,X1
+ MOVOA X1,X2
+ PSLLL $7,X1
+ PXOR X1,X5
+ PSRLL $25,X2
+ PXOR X2,X5
+ MOVOA X0,X1
+ PADDL X7,X1
+ MOVOA X1,X2
+ PSLLL $9,X1
+ PXOR X1,X10
+ PSRLL $23,X2
+ PXOR X2,X10
+ MOVOA X12,X1
+ PADDL X5,X1
+ MOVOA X1,X2
+ PSLLL $9,X1
+ PXOR X1,X8
+ PSRLL $23,X2
+ PXOR X2,X8
+ MOVOA X7,X1
+ PADDL X10,X1
+ MOVOA X1,X2
+ PSLLL $13,X1
+ PXOR X1,X4
+ PSRLL $19,X2
+ PXOR X2,X4
+ MOVOA X5,X1
+ PADDL X8,X1
+ MOVOA X1,X2
+ PSLLL $13,X1
+ PXOR X1,X14
+ PSRLL $19,X2
+ PXOR X2,X14
+ MOVOA X10,X1
+ PADDL X4,X1
+ MOVOA X1,X2
+ PSLLL $18,X1
+ PXOR X1,X0
+ PSRLL $14,X2
+ PXOR X2,X0
+ MOVOA 320(R12),X1
+ MOVOA X0,320(R12)
+ MOVOA X8,X0
+ PADDL X14,X0
+ MOVOA X0,X2
+ PSLLL $18,X0
+ PXOR X0,X12
+ PSRLL $14,X2
+ PXOR X2,X12
+ MOVOA X11,X0
+ PADDL X1,X0
+ MOVOA X0,X2
+ PSLLL $7,X0
+ PXOR X0,X6
+ PSRLL $25,X2
+ PXOR X2,X6
+ MOVOA 336(R12),X2
+ MOVOA X12,336(R12)
+ MOVOA X3,X0
+ PADDL X2,X0
+ MOVOA X0,X12
+ PSLLL $7,X0
+ PXOR X0,X13
+ PSRLL $25,X12
+ PXOR X12,X13
+ MOVOA X1,X0
+ PADDL X6,X0
+ MOVOA X0,X12
+ PSLLL $9,X0
+ PXOR X0,X15
+ PSRLL $23,X12
+ PXOR X12,X15
+ MOVOA X2,X0
+ PADDL X13,X0
+ MOVOA X0,X12
+ PSLLL $9,X0
+ PXOR X0,X9
+ PSRLL $23,X12
+ PXOR X12,X9
+ MOVOA X6,X0
+ PADDL X15,X0
+ MOVOA X0,X12
+ PSLLL $13,X0
+ PXOR X0,X11
+ PSRLL $19,X12
+ PXOR X12,X11
+ MOVOA X13,X0
+ PADDL X9,X0
+ MOVOA X0,X12
+ PSLLL $13,X0
+ PXOR X0,X3
+ PSRLL $19,X12
+ PXOR X12,X3
+ MOVOA X15,X0
+ PADDL X11,X0
+ MOVOA X0,X12
+ PSLLL $18,X0
+ PXOR X0,X1
+ PSRLL $14,X12
+ PXOR X12,X1
+ MOVOA X9,X0
+ PADDL X3,X0
+ MOVOA X0,X12
+ PSLLL $18,X0
+ PXOR X0,X2
+ PSRLL $14,X12
+ PXOR X12,X2
+ MOVOA 320(R12),X12
+ MOVOA 336(R12),X0
+ SUBQ $2,DX
+ JA MAINLOOP1
+ PADDL 112(R12),X12
+ PADDL 176(R12),X7
+ PADDL 224(R12),X10
+ PADDL 272(R12),X4
+ MOVD X12,DX
+ MOVD X7,CX
+ MOVD X10,R8
+ MOVD X4,R9
+ PSHUFL $0X39,X12,X12
+ PSHUFL $0X39,X7,X7
+ PSHUFL $0X39,X10,X10
+ PSHUFL $0X39,X4,X4
+ XORL 0(SI),DX
+ XORL 4(SI),CX
+ XORL 8(SI),R8
+ XORL 12(SI),R9
+ MOVL DX,0(DI)
+ MOVL CX,4(DI)
+ MOVL R8,8(DI)
+ MOVL R9,12(DI)
+ MOVD X12,DX
+ MOVD X7,CX
+ MOVD X10,R8
+ MOVD X4,R9
+ PSHUFL $0X39,X12,X12
+ PSHUFL $0X39,X7,X7
+ PSHUFL $0X39,X10,X10
+ PSHUFL $0X39,X4,X4
+ XORL 64(SI),DX
+ XORL 68(SI),CX
+ XORL 72(SI),R8
+ XORL 76(SI),R9
+ MOVL DX,64(DI)
+ MOVL CX,68(DI)
+ MOVL R8,72(DI)
+ MOVL R9,76(DI)
+ MOVD X12,DX
+ MOVD X7,CX
+ MOVD X10,R8
+ MOVD X4,R9
+ PSHUFL $0X39,X12,X12
+ PSHUFL $0X39,X7,X7
+ PSHUFL $0X39,X10,X10
+ PSHUFL $0X39,X4,X4
+ XORL 128(SI),DX
+ XORL 132(SI),CX
+ XORL 136(SI),R8
+ XORL 140(SI),R9
+ MOVL DX,128(DI)
+ MOVL CX,132(DI)
+ MOVL R8,136(DI)
+ MOVL R9,140(DI)
+ MOVD X12,DX
+ MOVD X7,CX
+ MOVD X10,R8
+ MOVD X4,R9
+ XORL 192(SI),DX
+ XORL 196(SI),CX
+ XORL 200(SI),R8
+ XORL 204(SI),R9
+ MOVL DX,192(DI)
+ MOVL CX,196(DI)
+ MOVL R8,200(DI)
+ MOVL R9,204(DI)
+ PADDL 240(R12),X14
+ PADDL 64(R12),X0
+ PADDL 128(R12),X5
+ PADDL 192(R12),X8
+ MOVD X14,DX
+ MOVD X0,CX
+ MOVD X5,R8
+ MOVD X8,R9
+ PSHUFL $0X39,X14,X14
+ PSHUFL $0X39,X0,X0
+ PSHUFL $0X39,X5,X5
+ PSHUFL $0X39,X8,X8
+ XORL 16(SI),DX
+ XORL 20(SI),CX
+ XORL 24(SI),R8
+ XORL 28(SI),R9
+ MOVL DX,16(DI)
+ MOVL CX,20(DI)
+ MOVL R8,24(DI)
+ MOVL R9,28(DI)
+ MOVD X14,DX
+ MOVD X0,CX
+ MOVD X5,R8
+ MOVD X8,R9
+ PSHUFL $0X39,X14,X14
+ PSHUFL $0X39,X0,X0
+ PSHUFL $0X39,X5,X5
+ PSHUFL $0X39,X8,X8
+ XORL 80(SI),DX
+ XORL 84(SI),CX
+ XORL 88(SI),R8
+ XORL 92(SI),R9
+ MOVL DX,80(DI)
+ MOVL CX,84(DI)
+ MOVL R8,88(DI)
+ MOVL R9,92(DI)
+ MOVD X14,DX
+ MOVD X0,CX
+ MOVD X5,R8
+ MOVD X8,R9
+ PSHUFL $0X39,X14,X14
+ PSHUFL $0X39,X0,X0
+ PSHUFL $0X39,X5,X5
+ PSHUFL $0X39,X8,X8
+ XORL 144(SI),DX
+ XORL 148(SI),CX
+ XORL 152(SI),R8
+ XORL 156(SI),R9
+ MOVL DX,144(DI)
+ MOVL CX,148(DI)
+ MOVL R8,152(DI)
+ MOVL R9,156(DI)
+ MOVD X14,DX
+ MOVD X0,CX
+ MOVD X5,R8
+ MOVD X8,R9
+ XORL 208(SI),DX
+ XORL 212(SI),CX
+ XORL 216(SI),R8
+ XORL 220(SI),R9
+ MOVL DX,208(DI)
+ MOVL CX,212(DI)
+ MOVL R8,216(DI)
+ MOVL R9,220(DI)
+ PADDL 288(R12),X15
+ PADDL 304(R12),X11
+ PADDL 80(R12),X1
+ PADDL 144(R12),X6
+ MOVD X15,DX
+ MOVD X11,CX
+ MOVD X1,R8
+ MOVD X6,R9
+ PSHUFL $0X39,X15,X15
+ PSHUFL $0X39,X11,X11
+ PSHUFL $0X39,X1,X1
+ PSHUFL $0X39,X6,X6
+ XORL 32(SI),DX
+ XORL 36(SI),CX
+ XORL 40(SI),R8
+ XORL 44(SI),R9
+ MOVL DX,32(DI)
+ MOVL CX,36(DI)
+ MOVL R8,40(DI)
+ MOVL R9,44(DI)
+ MOVD X15,DX
+ MOVD X11,CX
+ MOVD X1,R8
+ MOVD X6,R9
+ PSHUFL $0X39,X15,X15
+ PSHUFL $0X39,X11,X11
+ PSHUFL $0X39,X1,X1
+ PSHUFL $0X39,X6,X6
+ XORL 96(SI),DX
+ XORL 100(SI),CX
+ XORL 104(SI),R8
+ XORL 108(SI),R9
+ MOVL DX,96(DI)
+ MOVL CX,100(DI)
+ MOVL R8,104(DI)
+ MOVL R9,108(DI)
+ MOVD X15,DX
+ MOVD X11,CX
+ MOVD X1,R8
+ MOVD X6,R9
+ PSHUFL $0X39,X15,X15
+ PSHUFL $0X39,X11,X11
+ PSHUFL $0X39,X1,X1
+ PSHUFL $0X39,X6,X6
+ XORL 160(SI),DX
+ XORL 164(SI),CX
+ XORL 168(SI),R8
+ XORL 172(SI),R9
+ MOVL DX,160(DI)
+ MOVL CX,164(DI)
+ MOVL R8,168(DI)
+ MOVL R9,172(DI)
+ MOVD X15,DX
+ MOVD X11,CX
+ MOVD X1,R8
+ MOVD X6,R9
+ XORL 224(SI),DX
+ XORL 228(SI),CX
+ XORL 232(SI),R8
+ XORL 236(SI),R9
+ MOVL DX,224(DI)
+ MOVL CX,228(DI)
+ MOVL R8,232(DI)
+ MOVL R9,236(DI)
+ PADDL 160(R12),X13
+ PADDL 208(R12),X9
+ PADDL 256(R12),X3
+ PADDL 96(R12),X2
+ MOVD X13,DX
+ MOVD X9,CX
+ MOVD X3,R8
+ MOVD X2,R9
+ PSHUFL $0X39,X13,X13
+ PSHUFL $0X39,X9,X9
+ PSHUFL $0X39,X3,X3
+ PSHUFL $0X39,X2,X2
+ XORL 48(SI),DX
+ XORL 52(SI),CX
+ XORL 56(SI),R8
+ XORL 60(SI),R9
+ MOVL DX,48(DI)
+ MOVL CX,52(DI)
+ MOVL R8,56(DI)
+ MOVL R9,60(DI)
+ MOVD X13,DX
+ MOVD X9,CX
+ MOVD X3,R8
+ MOVD X2,R9
+ PSHUFL $0X39,X13,X13
+ PSHUFL $0X39,X9,X9
+ PSHUFL $0X39,X3,X3
+ PSHUFL $0X39,X2,X2
+ XORL 112(SI),DX
+ XORL 116(SI),CX
+ XORL 120(SI),R8
+ XORL 124(SI),R9
+ MOVL DX,112(DI)
+ MOVL CX,116(DI)
+ MOVL R8,120(DI)
+ MOVL R9,124(DI)
+ MOVD X13,DX
+ MOVD X9,CX
+ MOVD X3,R8
+ MOVD X2,R9
+ PSHUFL $0X39,X13,X13
+ PSHUFL $0X39,X9,X9
+ PSHUFL $0X39,X3,X3
+ PSHUFL $0X39,X2,X2
+ XORL 176(SI),DX
+ XORL 180(SI),CX
+ XORL 184(SI),R8
+ XORL 188(SI),R9
+ MOVL DX,176(DI)
+ MOVL CX,180(DI)
+ MOVL R8,184(DI)
+ MOVL R9,188(DI)
+ MOVD X13,DX
+ MOVD X9,CX
+ MOVD X3,R8
+ MOVD X2,R9
+ XORL 240(SI),DX
+ XORL 244(SI),CX
+ XORL 248(SI),R8
+ XORL 252(SI),R9
+ MOVL DX,240(DI)
+ MOVL CX,244(DI)
+ MOVL R8,248(DI)
+ MOVL R9,252(DI)
+ MOVQ 352(R12),R9
+ SUBQ $256,R9
+ ADDQ $256,SI
+ ADDQ $256,DI
+ CMPQ R9,$256
+ JAE BYTESATLEAST256
+ CMPQ R9,$0
+ JBE DONE
+ BYTESBETWEEN1AND255:
+ CMPQ R9,$64
+ JAE NOCOPY
+ MOVQ DI,DX
+ LEAQ 360(R12),DI
+ MOVQ R9,CX
+ REP; MOVSB
+ LEAQ 360(R12),DI
+ LEAQ 360(R12),SI
+ NOCOPY:
+ MOVQ R9,352(R12)
+ MOVOA 48(R12),X0
+ MOVOA 0(R12),X1
+ MOVOA 16(R12),X2
+ MOVOA 32(R12),X3
+ MOVOA X1,X4
+ MOVQ $20,CX
+ MAINLOOP2:
+ PADDL X0,X4
+ MOVOA X0,X5
+ MOVOA X4,X6
+ PSLLL $7,X4
+ PSRLL $25,X6
+ PXOR X4,X3
+ PXOR X6,X3
+ PADDL X3,X5
+ MOVOA X3,X4
+ MOVOA X5,X6
+ PSLLL $9,X5
+ PSRLL $23,X6
+ PXOR X5,X2
+ PSHUFL $0X93,X3,X3
+ PXOR X6,X2
+ PADDL X2,X4
+ MOVOA X2,X5
+ MOVOA X4,X6
+ PSLLL $13,X4
+ PSRLL $19,X6
+ PXOR X4,X1
+ PSHUFL $0X4E,X2,X2
+ PXOR X6,X1
+ PADDL X1,X5
+ MOVOA X3,X4
+ MOVOA X5,X6
+ PSLLL $18,X5
+ PSRLL $14,X6
+ PXOR X5,X0
+ PSHUFL $0X39,X1,X1
+ PXOR X6,X0
+ PADDL X0,X4
+ MOVOA X0,X5
+ MOVOA X4,X6
+ PSLLL $7,X4
+ PSRLL $25,X6
+ PXOR X4,X1
+ PXOR X6,X1
+ PADDL X1,X5
+ MOVOA X1,X4
+ MOVOA X5,X6
+ PSLLL $9,X5
+ PSRLL $23,X6
+ PXOR X5,X2
+ PSHUFL $0X93,X1,X1
+ PXOR X6,X2
+ PADDL X2,X4
+ MOVOA X2,X5
+ MOVOA X4,X6
+ PSLLL $13,X4
+ PSRLL $19,X6
+ PXOR X4,X3
+ PSHUFL $0X4E,X2,X2
+ PXOR X6,X3
+ PADDL X3,X5
+ MOVOA X1,X4
+ MOVOA X5,X6
+ PSLLL $18,X5
+ PSRLL $14,X6
+ PXOR X5,X0
+ PSHUFL $0X39,X3,X3
+ PXOR X6,X0
+ PADDL X0,X4
+ MOVOA X0,X5
+ MOVOA X4,X6
+ PSLLL $7,X4
+ PSRLL $25,X6
+ PXOR X4,X3
+ PXOR X6,X3
+ PADDL X3,X5
+ MOVOA X3,X4
+ MOVOA X5,X6
+ PSLLL $9,X5
+ PSRLL $23,X6
+ PXOR X5,X2
+ PSHUFL $0X93,X3,X3
+ PXOR X6,X2
+ PADDL X2,X4
+ MOVOA X2,X5
+ MOVOA X4,X6
+ PSLLL $13,X4
+ PSRLL $19,X6
+ PXOR X4,X1
+ PSHUFL $0X4E,X2,X2
+ PXOR X6,X1
+ PADDL X1,X5
+ MOVOA X3,X4
+ MOVOA X5,X6
+ PSLLL $18,X5
+ PSRLL $14,X6
+ PXOR X5,X0
+ PSHUFL $0X39,X1,X1
+ PXOR X6,X0
+ PADDL X0,X4
+ MOVOA X0,X5
+ MOVOA X4,X6
+ PSLLL $7,X4
+ PSRLL $25,X6
+ PXOR X4,X1
+ PXOR X6,X1
+ PADDL X1,X5
+ MOVOA X1,X4
+ MOVOA X5,X6
+ PSLLL $9,X5
+ PSRLL $23,X6
+ PXOR X5,X2
+ PSHUFL $0X93,X1,X1
+ PXOR X6,X2
+ PADDL X2,X4
+ MOVOA X2,X5
+ MOVOA X4,X6
+ PSLLL $13,X4
+ PSRLL $19,X6
+ PXOR X4,X3
+ PSHUFL $0X4E,X2,X2
+ PXOR X6,X3
+ SUBQ $4,CX
+ PADDL X3,X5
+ MOVOA X1,X4
+ MOVOA X5,X6
+ PSLLL $18,X5
+ PXOR X7,X7
+ PSRLL $14,X6
+ PXOR X5,X0
+ PSHUFL $0X39,X3,X3
+ PXOR X6,X0
+ JA MAINLOOP2
+ PADDL 48(R12),X0
+ PADDL 0(R12),X1
+ PADDL 16(R12),X2
+ PADDL 32(R12),X3
+ MOVD X0,CX
+ MOVD X1,R8
+ MOVD X2,R9
+ MOVD X3,AX
+ PSHUFL $0X39,X0,X0
+ PSHUFL $0X39,X1,X1
+ PSHUFL $0X39,X2,X2
+ PSHUFL $0X39,X3,X3
+ XORL 0(SI),CX
+ XORL 48(SI),R8
+ XORL 32(SI),R9
+ XORL 16(SI),AX
+ MOVL CX,0(DI)
+ MOVL R8,48(DI)
+ MOVL R9,32(DI)
+ MOVL AX,16(DI)
+ MOVD X0,CX
+ MOVD X1,R8
+ MOVD X2,R9
+ MOVD X3,AX
+ PSHUFL $0X39,X0,X0
+ PSHUFL $0X39,X1,X1
+ PSHUFL $0X39,X2,X2
+ PSHUFL $0X39,X3,X3
+ XORL 20(SI),CX
+ XORL 4(SI),R8
+ XORL 52(SI),R9
+ XORL 36(SI),AX
+ MOVL CX,20(DI)
+ MOVL R8,4(DI)
+ MOVL R9,52(DI)
+ MOVL AX,36(DI)
+ MOVD X0,CX
+ MOVD X1,R8
+ MOVD X2,R9
+ MOVD X3,AX
+ PSHUFL $0X39,X0,X0
+ PSHUFL $0X39,X1,X1
+ PSHUFL $0X39,X2,X2
+ PSHUFL $0X39,X3,X3
+ XORL 40(SI),CX
+ XORL 24(SI),R8
+ XORL 8(SI),R9
+ XORL 56(SI),AX
+ MOVL CX,40(DI)
+ MOVL R8,24(DI)
+ MOVL R9,8(DI)
+ MOVL AX,56(DI)
+ MOVD X0,CX
+ MOVD X1,R8
+ MOVD X2,R9
+ MOVD X3,AX
+ XORL 60(SI),CX
+ XORL 44(SI),R8
+ XORL 28(SI),R9
+ XORL 12(SI),AX
+ MOVL CX,60(DI)
+ MOVL R8,44(DI)
+ MOVL R9,28(DI)
+ MOVL AX,12(DI)
+ MOVQ 352(R12),R9
+ MOVL 16(R12),CX
+ MOVL 36 (R12),R8
+ ADDQ $1,CX
+ SHLQ $32,R8
+ ADDQ R8,CX
+ MOVQ CX,R8
+ SHRQ $32,R8
+ MOVL CX,16(R12)
+ MOVL R8, 36 (R12)
+ CMPQ R9,$64
+ JA BYTESATLEAST65
+ JAE BYTESATLEAST64
+ MOVQ DI,SI
+ MOVQ DX,DI
+ MOVQ R9,CX
+ REP; MOVSB
+ BYTESATLEAST64:
+ DONE:
+ RET
+ BYTESATLEAST65:
+ SUBQ $64,R9
+ ADDQ $64,DI
+ ADDQ $64,SI
+ JMP BYTESBETWEEN1AND255
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go
new file mode 100644
index 000000000..4392cc1ac
--- /dev/null
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go
@@ -0,0 +1,15 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 || purego || !gc
+// +build !amd64 purego !gc
+
+package salsa
+
+// XORKeyStream crypts bytes from in to out using the given key and counters.
+// In and out must overlap entirely or not at all. Counter
+// contains the raw salsa20 counter bytes (both nonce and block counter).
+func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
+ genericXORKeyStream(out, in, counter, key)
+}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go
new file mode 100644
index 000000000..68169c6d6
--- /dev/null
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go
@@ -0,0 +1,231 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package salsa
+
+const rounds = 20
+
+// core applies the Salsa20 core function to 16-byte input in, 32-byte key k,
+// and 16-byte constant c, and puts the result into 64-byte array out.
+func core(out *[64]byte, in *[16]byte, k *[32]byte, c *[16]byte) {
+ j0 := uint32(c[0]) | uint32(c[1])<<8 | uint32(c[2])<<16 | uint32(c[3])<<24
+ j1 := uint32(k[0]) | uint32(k[1])<<8 | uint32(k[2])<<16 | uint32(k[3])<<24
+ j2 := uint32(k[4]) | uint32(k[5])<<8 | uint32(k[6])<<16 | uint32(k[7])<<24
+ j3 := uint32(k[8]) | uint32(k[9])<<8 | uint32(k[10])<<16 | uint32(k[11])<<24
+ j4 := uint32(k[12]) | uint32(k[13])<<8 | uint32(k[14])<<16 | uint32(k[15])<<24
+ j5 := uint32(c[4]) | uint32(c[5])<<8 | uint32(c[6])<<16 | uint32(c[7])<<24
+ j6 := uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
+ j7 := uint32(in[4]) | uint32(in[5])<<8 | uint32(in[6])<<16 | uint32(in[7])<<24
+ j8 := uint32(in[8]) | uint32(in[9])<<8 | uint32(in[10])<<16 | uint32(in[11])<<24
+ j9 := uint32(in[12]) | uint32(in[13])<<8 | uint32(in[14])<<16 | uint32(in[15])<<24
+ j10 := uint32(c[8]) | uint32(c[9])<<8 | uint32(c[10])<<16 | uint32(c[11])<<24
+ j11 := uint32(k[16]) | uint32(k[17])<<8 | uint32(k[18])<<16 | uint32(k[19])<<24
+ j12 := uint32(k[20]) | uint32(k[21])<<8 | uint32(k[22])<<16 | uint32(k[23])<<24
+ j13 := uint32(k[24]) | uint32(k[25])<<8 | uint32(k[26])<<16 | uint32(k[27])<<24
+ j14 := uint32(k[28]) | uint32(k[29])<<8 | uint32(k[30])<<16 | uint32(k[31])<<24
+ j15 := uint32(c[12]) | uint32(c[13])<<8 | uint32(c[14])<<16 | uint32(c[15])<<24
+
+ x0, x1, x2, x3, x4, x5, x6, x7, x8 := j0, j1, j2, j3, j4, j5, j6, j7, j8
+ x9, x10, x11, x12, x13, x14, x15 := j9, j10, j11, j12, j13, j14, j15
+
+ for i := 0; i < rounds; i += 2 {
+ u := x0 + x12
+ x4 ^= u<<7 | u>>(32-7)
+ u = x4 + x0
+ x8 ^= u<<9 | u>>(32-9)
+ u = x8 + x4
+ x12 ^= u<<13 | u>>(32-13)
+ u = x12 + x8
+ x0 ^= u<<18 | u>>(32-18)
+
+ u = x5 + x1
+ x9 ^= u<<7 | u>>(32-7)
+ u = x9 + x5
+ x13 ^= u<<9 | u>>(32-9)
+ u = x13 + x9
+ x1 ^= u<<13 | u>>(32-13)
+ u = x1 + x13
+ x5 ^= u<<18 | u>>(32-18)
+
+ u = x10 + x6
+ x14 ^= u<<7 | u>>(32-7)
+ u = x14 + x10
+ x2 ^= u<<9 | u>>(32-9)
+ u = x2 + x14
+ x6 ^= u<<13 | u>>(32-13)
+ u = x6 + x2
+ x10 ^= u<<18 | u>>(32-18)
+
+ u = x15 + x11
+ x3 ^= u<<7 | u>>(32-7)
+ u = x3 + x15
+ x7 ^= u<<9 | u>>(32-9)
+ u = x7 + x3
+ x11 ^= u<<13 | u>>(32-13)
+ u = x11 + x7
+ x15 ^= u<<18 | u>>(32-18)
+
+ u = x0 + x3
+ x1 ^= u<<7 | u>>(32-7)
+ u = x1 + x0
+ x2 ^= u<<9 | u>>(32-9)
+ u = x2 + x1
+ x3 ^= u<<13 | u>>(32-13)
+ u = x3 + x2
+ x0 ^= u<<18 | u>>(32-18)
+
+ u = x5 + x4
+ x6 ^= u<<7 | u>>(32-7)
+ u = x6 + x5
+ x7 ^= u<<9 | u>>(32-9)
+ u = x7 + x6
+ x4 ^= u<<13 | u>>(32-13)
+ u = x4 + x7
+ x5 ^= u<<18 | u>>(32-18)
+
+ u = x10 + x9
+ x11 ^= u<<7 | u>>(32-7)
+ u = x11 + x10
+ x8 ^= u<<9 | u>>(32-9)
+ u = x8 + x11
+ x9 ^= u<<13 | u>>(32-13)
+ u = x9 + x8
+ x10 ^= u<<18 | u>>(32-18)
+
+ u = x15 + x14
+ x12 ^= u<<7 | u>>(32-7)
+ u = x12 + x15
+ x13 ^= u<<9 | u>>(32-9)
+ u = x13 + x12
+ x14 ^= u<<13 | u>>(32-13)
+ u = x14 + x13
+ x15 ^= u<<18 | u>>(32-18)
+ }
+ x0 += j0
+ x1 += j1
+ x2 += j2
+ x3 += j3
+ x4 += j4
+ x5 += j5
+ x6 += j6
+ x7 += j7
+ x8 += j8
+ x9 += j9
+ x10 += j10
+ x11 += j11
+ x12 += j12
+ x13 += j13
+ x14 += j14
+ x15 += j15
+
+ out[0] = byte(x0)
+ out[1] = byte(x0 >> 8)
+ out[2] = byte(x0 >> 16)
+ out[3] = byte(x0 >> 24)
+
+ out[4] = byte(x1)
+ out[5] = byte(x1 >> 8)
+ out[6] = byte(x1 >> 16)
+ out[7] = byte(x1 >> 24)
+
+ out[8] = byte(x2)
+ out[9] = byte(x2 >> 8)
+ out[10] = byte(x2 >> 16)
+ out[11] = byte(x2 >> 24)
+
+ out[12] = byte(x3)
+ out[13] = byte(x3 >> 8)
+ out[14] = byte(x3 >> 16)
+ out[15] = byte(x3 >> 24)
+
+ out[16] = byte(x4)
+ out[17] = byte(x4 >> 8)
+ out[18] = byte(x4 >> 16)
+ out[19] = byte(x4 >> 24)
+
+ out[20] = byte(x5)
+ out[21] = byte(x5 >> 8)
+ out[22] = byte(x5 >> 16)
+ out[23] = byte(x5 >> 24)
+
+ out[24] = byte(x6)
+ out[25] = byte(x6 >> 8)
+ out[26] = byte(x6 >> 16)
+ out[27] = byte(x6 >> 24)
+
+ out[28] = byte(x7)
+ out[29] = byte(x7 >> 8)
+ out[30] = byte(x7 >> 16)
+ out[31] = byte(x7 >> 24)
+
+ out[32] = byte(x8)
+ out[33] = byte(x8 >> 8)
+ out[34] = byte(x8 >> 16)
+ out[35] = byte(x8 >> 24)
+
+ out[36] = byte(x9)
+ out[37] = byte(x9 >> 8)
+ out[38] = byte(x9 >> 16)
+ out[39] = byte(x9 >> 24)
+
+ out[40] = byte(x10)
+ out[41] = byte(x10 >> 8)
+ out[42] = byte(x10 >> 16)
+ out[43] = byte(x10 >> 24)
+
+ out[44] = byte(x11)
+ out[45] = byte(x11 >> 8)
+ out[46] = byte(x11 >> 16)
+ out[47] = byte(x11 >> 24)
+
+ out[48] = byte(x12)
+ out[49] = byte(x12 >> 8)
+ out[50] = byte(x12 >> 16)
+ out[51] = byte(x12 >> 24)
+
+ out[52] = byte(x13)
+ out[53] = byte(x13 >> 8)
+ out[54] = byte(x13 >> 16)
+ out[55] = byte(x13 >> 24)
+
+ out[56] = byte(x14)
+ out[57] = byte(x14 >> 8)
+ out[58] = byte(x14 >> 16)
+ out[59] = byte(x14 >> 24)
+
+ out[60] = byte(x15)
+ out[61] = byte(x15 >> 8)
+ out[62] = byte(x15 >> 16)
+ out[63] = byte(x15 >> 24)
+}
+
+// genericXORKeyStream is the generic implementation of XORKeyStream to be used
+// when no assembly implementation is available.
+func genericXORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
+ var block [64]byte
+ var counterCopy [16]byte
+ copy(counterCopy[:], counter[:])
+
+ for len(in) >= 64 {
+ core(&block, &counterCopy, key, &Sigma)
+ for i, x := range block {
+ out[i] = in[i] ^ x
+ }
+ u := uint32(1)
+ for i := 8; i < 16; i++ {
+ u += uint32(counterCopy[i])
+ counterCopy[i] = byte(u)
+ u >>= 8
+ }
+ in = in[64:]
+ out = out[64:]
+ }
+
+ if len(in) > 0 {
+ core(&block, &counterCopy, key, &Sigma)
+ for i, v := range in {
+ out[i] = v ^ block[i]
+ }
+ }
+}