summaryrefslogtreecommitdiff
path: root/vendor/github.com
diff options
context:
space:
mode:
authorDaniel J Walsh <dwalsh@redhat.com>2022-05-02 17:16:10 -0400
committerDaniel J Walsh <dwalsh@redhat.com>2022-05-02 17:16:10 -0400
commite247f02a4fbf3997e4ff01eecf30ac1002e993af (patch)
tree2d287c16b9a80f888074af6b6118625d74da6a16 /vendor/github.com
parent3ac5cec086e1ee1f11fabfd8a8f0aa8cf9f371f5 (diff)
downloadpodman-e247f02a4fbf3997e4ff01eecf30ac1002e993af.tar.gz
podman-e247f02a4fbf3997e4ff01eecf30ac1002e993af.tar.bz2
podman-e247f02a4fbf3997e4ff01eecf30ac1002e993af.zip
Vendor in containers/(common, storage, image)
Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
Diffstat (limited to 'vendor/github.com')
-rw-r--r--vendor/github.com/containers/common/version/version.go2
-rw-r--r--vendor/github.com/containers/image/v5/copy/copy.go195
-rw-r--r--vendor/github.com/containers/image/v5/copy/progress_bars.go148
-rw-r--r--vendor/github.com/containers/image/v5/copy/progress_channel.go (renamed from vendor/github.com/containers/image/v5/copy/progress_reader.go)29
-rw-r--r--vendor/github.com/containers/image/v5/version/version.go2
-rw-r--r--vendor/github.com/containers/ocicrypt/MAINTAINERS1
-rw-r--r--vendor/github.com/containers/ocicrypt/crypto/pkcs11/pkcs11helpers.go26
-rw-r--r--vendor/github.com/containers/storage/VERSION2
-rw-r--r--vendor/github.com/containers/storage/drivers/chown_unix.go2
-rw-r--r--vendor/github.com/containers/storage/drivers/driver_freebsd.go1
-rw-r--r--vendor/github.com/containers/storage/drivers/overlay/overlay.go12
-rw-r--r--vendor/github.com/containers/storage/drivers/zfs/zfs_freebsd.go16
-rw-r--r--vendor/github.com/containers/storage/go.mod2
-rw-r--r--vendor/github.com/containers/storage/go.sum3
-rw-r--r--vendor/github.com/containers/storage/layers.go188
-rw-r--r--vendor/github.com/containers/storage/pkg/idtools/idtools.go64
-rw-r--r--vendor/github.com/containers/storage/pkg/mount/flags_freebsd.go48
-rw-r--r--vendor/github.com/containers/storage/pkg/mount/flags_unsupported.go3
-rw-r--r--vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go21
-rw-r--r--vendor/github.com/containers/storage/pkg/reexec/command_freebsd.go37
-rw-r--r--vendor/github.com/containers/storage/pkg/reexec/command_unix.go3
-rw-r--r--vendor/github.com/klauspost/compress/.gitignore7
-rw-r--r--vendor/github.com/klauspost/compress/README.md7
-rw-r--r--vendor/github.com/klauspost/compress/go.mod2
-rw-r--r--vendor/github.com/klauspost/compress/huff0/decompress_amd64.go4
-rw-r--r--vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go34
-rw-r--r--vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go11
-rw-r--r--vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s36
-rw-r--r--vendor/github.com/klauspost/compress/zstd/README.md58
-rw-r--r--vendor/github.com/klauspost/compress/zstd/blockdec.go42
-rw-r--r--vendor/github.com/klauspost/compress/zstd/decoder.go14
-rw-r--r--vendor/github.com/klauspost/compress/zstd/decoder_options.go4
-rw-r--r--vendor/github.com/klauspost/compress/zstd/framedec.go13
-rw-r--r--vendor/github.com/klauspost/compress/zstd/fse_decoder.go25
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec.go168
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go350
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s3519
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec_generic.go237
-rw-r--r--vendor/github.com/klauspost/compress/zstd/zip.go48
39 files changed, 4864 insertions, 520 deletions
diff --git a/vendor/github.com/containers/common/version/version.go b/vendor/github.com/containers/common/version/version.go
index c1ae6c9dd..61fce9d22 100644
--- a/vendor/github.com/containers/common/version/version.go
+++ b/vendor/github.com/containers/common/version/version.go
@@ -1,4 +1,4 @@
package version
// Version is the version of the build.
-const Version = "0.47.4+dev"
+const Version = "0.48.0"
diff --git a/vendor/github.com/containers/image/v5/copy/copy.go b/vendor/github.com/containers/image/v5/copy/copy.go
index 644f82615..d28cc4a3f 100644
--- a/vendor/github.com/containers/image/v5/copy/copy.go
+++ b/vendor/github.com/containers/image/v5/copy/copy.go
@@ -32,7 +32,6 @@ import (
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/vbauerster/mpb/v7"
- "github.com/vbauerster/mpb/v7/decor"
"golang.org/x/sync/semaphore"
"golang.org/x/term"
)
@@ -712,8 +711,6 @@ func (c *copier) copyOneImage(ctx context.Context, policyContext *signature.Poli
// If src.UpdatedImageNeedsLayerDiffIDs(ic.manifestUpdates) will be true, it needs to be true by the time we get here.
ic.diffIDsAreNeeded = src.UpdatedImageNeedsLayerDiffIDs(*ic.manifestUpdates)
- // If encrypted and decryption keys provided, we should try to decrypt
- ic.diffIDsAreNeeded = ic.diffIDsAreNeeded || (isEncrypted(src) && ic.c.ociDecryptConfig != nil) || ic.c.ociEncryptConfig != nil
// If enabled, fetch and compare the destination's manifest. And as an optimization skip updating the destination iff equal
if options.OptimizeDestinationImageAlreadyExists {
@@ -1069,85 +1066,6 @@ func (ic *imageCopier) copyUpdatedConfigAndManifest(ctx context.Context, instanc
return man, manifestDigest, nil
}
-// newProgressPool creates a *mpb.Progress.
-// The caller must eventually call pool.Wait() after the pool will no longer be updated.
-// NOTE: Every progress bar created within the progress pool must either successfully
-// complete or be aborted, or pool.Wait() will hang. That is typically done
-// using "defer bar.Abort(false)", which must be called BEFORE pool.Wait() is called.
-func (c *copier) newProgressPool() *mpb.Progress {
- return mpb.New(mpb.WithWidth(40), mpb.WithOutput(c.progressOutput))
-}
-
-// customPartialBlobDecorFunc implements mpb.DecorFunc for the partial blobs retrieval progress bar
-func customPartialBlobDecorFunc(s decor.Statistics) string {
- if s.Total == 0 {
- pairFmt := "%.1f / %.1f (skipped: %.1f)"
- return fmt.Sprintf(pairFmt, decor.SizeB1024(s.Current), decor.SizeB1024(s.Total), decor.SizeB1024(s.Refill))
- }
- pairFmt := "%.1f / %.1f (skipped: %.1f = %.2f%%)"
- percentage := 100.0 * float64(s.Refill) / float64(s.Total)
- return fmt.Sprintf(pairFmt, decor.SizeB1024(s.Current), decor.SizeB1024(s.Total), decor.SizeB1024(s.Refill), percentage)
-}
-
-// createProgressBar creates a mpb.Bar in pool. Note that if the copier's reportWriter
-// is io.Discard, the progress bar's output will be discarded
-// NOTE: Every progress bar created within a progress pool must either successfully
-// complete or be aborted, or pool.Wait() will hang. That is typically done
-// using "defer bar.Abort(false)", which must happen BEFORE pool.Wait() is called.
-func (c *copier) createProgressBar(pool *mpb.Progress, partial bool, info types.BlobInfo, kind string, onComplete string) *mpb.Bar {
- // shortDigestLen is the length of the digest used for blobs.
- const shortDigestLen = 12
-
- prefix := fmt.Sprintf("Copying %s %s", kind, info.Digest.Encoded())
- // Truncate the prefix (chopping of some part of the digest) to make all progress bars aligned in a column.
- maxPrefixLen := len("Copying blob ") + shortDigestLen
- if len(prefix) > maxPrefixLen {
- prefix = prefix[:maxPrefixLen]
- }
-
- // onComplete will replace prefix once the bar/spinner has completed
- onComplete = prefix + " " + onComplete
-
- // Use a normal progress bar when we know the size (i.e., size > 0).
- // Otherwise, use a spinner to indicate that something's happening.
- var bar *mpb.Bar
- if info.Size > 0 {
- if partial {
- bar = pool.AddBar(info.Size,
- mpb.BarFillerClearOnComplete(),
- mpb.PrependDecorators(
- decor.OnComplete(decor.Name(prefix), onComplete),
- ),
- mpb.AppendDecorators(
- decor.Any(customPartialBlobDecorFunc),
- ),
- )
- } else {
- bar = pool.AddBar(info.Size,
- mpb.BarFillerClearOnComplete(),
- mpb.PrependDecorators(
- decor.OnComplete(decor.Name(prefix), onComplete),
- ),
- mpb.AppendDecorators(
- decor.OnComplete(decor.CountersKibiByte("%.1f / %.1f"), ""),
- ),
- )
- }
- } else {
- bar = pool.New(0,
- mpb.SpinnerStyle(".", "..", "...", "....", "").PositionLeft(),
- mpb.BarFillerClearOnComplete(),
- mpb.PrependDecorators(
- decor.OnComplete(decor.Name(prefix), onComplete),
- ),
- )
- }
- if c.progressOutput == io.Discard {
- c.Printf("Copying %s %s\n", kind, info.Digest)
- }
- return bar
-}
-
// copyConfig copies config.json, if any, from src to dest.
func (c *copier) copyConfig(ctx context.Context, src types.Image) error {
srcInfo := src.ConfigInfo()
@@ -1158,22 +1076,23 @@ func (c *copier) copyConfig(ctx context.Context, src types.Image) error {
}
defer c.concurrentBlobCopiesSemaphore.Release(1)
- configBlob, err := src.ConfigBlob(ctx)
- if err != nil {
- return errors.Wrapf(err, "reading config blob %s", srcInfo.Digest)
- }
-
destInfo, err := func() (types.BlobInfo, error) { // A scope for defer
progressPool := c.newProgressPool()
defer progressPool.Wait()
bar := c.createProgressBar(progressPool, false, srcInfo, "config", "done")
defer bar.Abort(false)
+ configBlob, err := src.ConfigBlob(ctx)
+ if err != nil {
+ return types.BlobInfo{}, errors.Wrapf(err, "reading config blob %s", srcInfo.Digest)
+ }
+
destInfo, err := c.copyBlobFromStream(ctx, bytes.NewReader(configBlob), srcInfo, nil, false, true, false, bar, -1, false)
if err != nil {
return types.BlobInfo{}, err
}
- bar.SetTotal(int64(len(configBlob)), true)
+
+ bar.mark100PercentComplete()
return destInfo, nil
}()
if err != nil {
@@ -1218,11 +1137,15 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
}
cachedDiffID := ic.c.blobInfoCache.UncompressedDigest(srcInfo.Digest) // May be ""
- // Diffs are needed if we are encrypting an image or trying to decrypt an image
- diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == "" || toEncrypt || (isOciEncrypted(srcInfo.MediaType) && ic.c.ociDecryptConfig != nil)
-
- // If we already have the blob, and we don't need to compute the diffID, then we don't need to read it from the source.
- if !diffIDIsNeeded {
+ diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == ""
+ // When encrypting to decrypting, only use the simple code path. We might be able to optimize more
+ // (e.g. if we know the DiffID of an encrypted compressed layer, it might not be necessary to pull, decrypt and decompress again),
+ // but it’s not trivially safe to do such things, so until someone takes the effort to make a comprehensive argument, let’s not.
+ encryptingOrDecrypting := toEncrypt || (isOciEncrypted(srcInfo.MediaType) && ic.c.ociDecryptConfig != nil)
+ canAvoidProcessingCompleteLayer := !diffIDIsNeeded && !encryptingOrDecrypting
+
+ // Don’t read the layer from the source if we already have the blob, and optimizations are acceptable.
+ if canAvoidProcessingCompleteLayer {
// TODO: at this point we don't know whether or not a blob we end up reusing is compressed using an algorithm
// that is acceptable for use on layers in the manifest that we'll be writing later, so if we end up reusing
// a blob that's compressed with e.g. zstd, but we're only allowed to write a v2s2 manifest, this will cause
@@ -1242,9 +1165,9 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
if reused {
logrus.Debugf("Skipping blob %s (already present):", srcInfo.Digest)
func() { // A scope for defer
- bar := ic.c.createProgressBar(pool, false, srcInfo, "blob", "skipped: already exists")
+ bar := ic.c.createProgressBar(pool, false, types.BlobInfo{Digest: blobInfo.Digest, Size: 0}, "blob", "skipped: already exists")
defer bar.Abort(false)
- bar.SetTotal(0, true)
+ bar.mark100PercentComplete()
}()
// Throw an event that the layer has been skipped
@@ -1275,7 +1198,7 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
// of the source file are not known yet and must be fetched.
// Attempt a partial only when the source allows to retrieve a blob partially and
// the destination has support for it.
- if ic.c.rawSource.SupportsGetBlobAt() && ic.c.dest.SupportsPutBlobPartial() && !diffIDIsNeeded {
+ if canAvoidProcessingCompleteLayer && ic.c.rawSource.SupportsGetBlobAt() && ic.c.dest.SupportsPutBlobPartial() {
if reused, blobInfo := func() (bool, types.BlobInfo) { // A scope for defer
bar := ic.c.createProgressBar(pool, true, srcInfo, "blob", "done")
hideProgressBar := true
@@ -1287,12 +1210,12 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
wrapped: ic.c.rawSource,
bar: bar,
}
- bar.SetTotal(srcInfo.Size, false)
info, err := ic.c.dest.PutBlobPartial(ctx, &proxy, srcInfo, ic.c.blobInfoCache)
if err == nil {
- bar.SetRefill(srcInfo.Size - bar.Current())
- bar.SetCurrent(srcInfo.Size)
- bar.SetTotal(srcInfo.Size, true)
+ if srcInfo.Size != -1 {
+ bar.SetRefill(srcInfo.Size - bar.Current())
+ }
+ bar.mark100PercentComplete()
hideProgressBar = false
logrus.Debugf("Retrieved partial blob %v", srcInfo.Digest)
return true, info
@@ -1305,16 +1228,16 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
}
// Fallback: copy the layer, computing the diffID if we need to do so
- srcStream, srcBlobSize, err := ic.c.rawSource.GetBlob(ctx, srcInfo, ic.c.blobInfoCache)
- if err != nil {
- return types.BlobInfo{}, "", errors.Wrapf(err, "reading blob %s", srcInfo.Digest)
- }
- defer srcStream.Close()
-
return func() (types.BlobInfo, digest.Digest, error) { // A scope for defer
bar := ic.c.createProgressBar(pool, false, srcInfo, "blob", "done")
defer bar.Abort(false)
+ srcStream, srcBlobSize, err := ic.c.rawSource.GetBlob(ctx, srcInfo, ic.c.blobInfoCache)
+ if err != nil {
+ return types.BlobInfo{}, "", errors.Wrapf(err, "reading blob %s", srcInfo.Digest)
+ }
+ defer srcStream.Close()
+
blobInfo, diffIDChan, err := ic.copyLayerFromStream(ctx, srcStream, types.BlobInfo{Digest: srcInfo.Digest, Size: srcBlobSize, MediaType: srcInfo.MediaType, Annotations: srcInfo.Annotations}, diffIDIsNeeded, toEncrypt, bar, layerIndex, emptyLayer)
if err != nil {
return types.BlobInfo{}, "", err
@@ -1330,14 +1253,22 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
return types.BlobInfo{}, "", errors.Wrap(diffIDResult.err, "computing layer DiffID")
}
logrus.Debugf("Computed DiffID %s for layer %s", diffIDResult.digest, srcInfo.Digest)
- // This is safe because we have just computed diffIDResult.Digest ourselves, and in the process
- // we have read all of the input blob, so srcInfo.Digest must have been validated by digestingReader.
- ic.c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, diffIDResult.digest)
+ // Don’t record any associations that involve encrypted data. This is a bit crude,
+ // some blob substitutions (replacing pulls of encrypted data with local reuse of known decryption outcomes)
+ // might be safe, but it’s not trivially obvious, so let’s be conservative for now.
+ // This crude approach also means we don’t need to record whether a blob is encrypted
+ // in the blob info cache (which would probably be necessary for any more complex logic),
+ // and the simplicity is attractive.
+ if !encryptingOrDecrypting {
+ // This is safe because we have just computed diffIDResult.Digest ourselves, and in the process
+ // we have read all of the input blob, so srcInfo.Digest must have been validated by digestingReader.
+ ic.c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, diffIDResult.digest)
+ }
diffID = diffIDResult.digest
}
}
- bar.SetTotal(srcInfo.Size, true)
+ bar.mark100PercentComplete()
return blobInfo, diffID, nil
}()
}
@@ -1347,7 +1278,7 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
// perhaps (de/re/)compressing the stream,
// and returns a complete blobInfo of the copied blob and perhaps a <-chan diffIDResult if diffIDIsNeeded, to be read by the caller.
func (ic *imageCopier) copyLayerFromStream(ctx context.Context, srcStream io.Reader, srcInfo types.BlobInfo,
- diffIDIsNeeded bool, toEncrypt bool, bar *mpb.Bar, layerIndex int, emptyLayer bool) (types.BlobInfo, <-chan diffIDResult, error) {
+ diffIDIsNeeded bool, toEncrypt bool, bar *progressBar, layerIndex int, emptyLayer bool) (types.BlobInfo, <-chan diffIDResult, error) {
var getDiffIDRecorder func(compressiontypes.DecompressorFunc) io.Writer // = nil
var diffIDChan chan diffIDResult
@@ -1424,7 +1355,7 @@ func (r errorAnnotationReader) Read(b []byte) (n int, err error) {
// and returns a complete blobInfo of the copied blob.
func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, srcInfo types.BlobInfo,
getOriginalLayerCopyWriter func(decompressor compressiontypes.DecompressorFunc) io.Writer,
- canModifyBlob bool, isConfig bool, toEncrypt bool, bar *mpb.Bar, layerIndex int, emptyLayer bool) (types.BlobInfo, error) {
+ canModifyBlob bool, isConfig bool, toEncrypt bool, bar *progressBar, layerIndex int, emptyLayer bool) (types.BlobInfo, error) {
if isConfig { // This is guaranteed by the caller, but set it here to be explicit.
canModifyBlob = false
}
@@ -1444,6 +1375,9 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
}
var destStream io.Reader = digestingReader
+ // === Update progress bars
+ destStream = bar.ProxyReader(destStream)
+
// === Decrypt the stream, if required.
var decrypted bool
if isOciEncrypted(srcInfo.MediaType) && c.ociDecryptConfig != nil {
@@ -1478,9 +1412,6 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
logrus.Debugf("blob %s with type %s should be compressed with %s, but compressor appears to be %s", srcInfo.Digest.String(), srcInfo.MediaType, expectedCompressionFormat.Name(), compressionFormat.Name())
}
- // === Update progress bars
- destStream = bar.ProxyReader(destStream)
-
// === Send a copy of the original, uncompressed, stream, to a separate path if necessary.
var originalLayerReader io.Reader // DO NOT USE this other than to drain the input if no other consumer in the pipeline has done so.
if getOriginalLayerCopyWriter != nil {
@@ -1681,19 +1612,27 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
return types.BlobInfo{}, errors.Errorf("Internal error writing blob %s, blob with digest %s saved with digest %s", srcInfo.Digest, inputInfo.Digest, uploadedInfo.Digest)
}
if digestingReader.validationSucceeded {
- // If compressionOperation != types.PreserveOriginal, we now have two reliable digest values:
- // srcinfo.Digest describes the pre-compressionOperation input, verified by digestingReader
- // uploadedInfo.Digest describes the post-compressionOperation output, computed by PutBlob
- // (because inputInfo.Digest == "", this must have been computed afresh).
- switch compressionOperation {
- case types.PreserveOriginal:
- break // Do nothing, we have only one digest and we might not have even verified it.
- case types.Compress:
- c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest)
- case types.Decompress:
- c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest)
- default:
- return types.BlobInfo{}, errors.Errorf("Internal error: Unexpected compressionOperation value %#v", compressionOperation)
+ // Don’t record any associations that involve encrypted data. This is a bit crude,
+ // some blob substitutions (replacing pulls of encrypted data with local reuse of known decryption outcomes)
+ // might be safe, but it’s not trivially obvious, so let’s be conservative for now.
+ // This crude approach also means we don’t need to record whether a blob is encrypted
+ // in the blob info cache (which would probably be necessary for any more complex logic),
+ // and the simplicity is attractive.
+ if !encrypted && !decrypted {
+ // If compressionOperation != types.PreserveOriginal, we now have two reliable digest values:
+ // srcinfo.Digest describes the pre-compressionOperation input, verified by digestingReader
+ // uploadedInfo.Digest describes the post-compressionOperation output, computed by PutBlob
+ // (because inputInfo.Digest == "", this must have been computed afresh).
+ switch compressionOperation {
+ case types.PreserveOriginal:
+ break // Do nothing, we have only one digest and we might not have even verified it.
+ case types.Compress:
+ c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest)
+ case types.Decompress:
+ c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest)
+ default:
+ return types.BlobInfo{}, errors.Errorf("Internal error: Unexpected compressionOperation value %#v", compressionOperation)
+ }
}
if uploadCompressorName != "" && uploadCompressorName != internalblobinfocache.UnknownCompression {
c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, uploadCompressorName)
diff --git a/vendor/github.com/containers/image/v5/copy/progress_bars.go b/vendor/github.com/containers/image/v5/copy/progress_bars.go
new file mode 100644
index 000000000..585d86057
--- /dev/null
+++ b/vendor/github.com/containers/image/v5/copy/progress_bars.go
@@ -0,0 +1,148 @@
+package copy
+
+import (
+ "context"
+ "fmt"
+ "io"
+
+ "github.com/containers/image/v5/internal/private"
+ "github.com/containers/image/v5/types"
+ "github.com/vbauerster/mpb/v7"
+ "github.com/vbauerster/mpb/v7/decor"
+)
+
+// newProgressPool creates a *mpb.Progress.
+// The caller must eventually call pool.Wait() after the pool will no longer be updated.
+// NOTE: Every progress bar created within the progress pool must either successfully
+// complete or be aborted, or pool.Wait() will hang. That is typically done
+// using "defer bar.Abort(false)", which must be called BEFORE pool.Wait() is called.
+func (c *copier) newProgressPool() *mpb.Progress {
+ return mpb.New(mpb.WithWidth(40), mpb.WithOutput(c.progressOutput))
+}
+
+// customPartialBlobDecorFunc implements mpb.DecorFunc for the partial blobs retrieval progress bar
+func customPartialBlobDecorFunc(s decor.Statistics) string {
+ if s.Total == 0 {
+ pairFmt := "%.1f / %.1f (skipped: %.1f)"
+ return fmt.Sprintf(pairFmt, decor.SizeB1024(s.Current), decor.SizeB1024(s.Total), decor.SizeB1024(s.Refill))
+ }
+ pairFmt := "%.1f / %.1f (skipped: %.1f = %.2f%%)"
+ percentage := 100.0 * float64(s.Refill) / float64(s.Total)
+ return fmt.Sprintf(pairFmt, decor.SizeB1024(s.Current), decor.SizeB1024(s.Total), decor.SizeB1024(s.Refill), percentage)
+}
+
+// progressBar wraps a *mpb.Bar, allowing us to add extra state and methods.
+type progressBar struct {
+ *mpb.Bar
+ originalSize int64 // or -1 if unknown
+}
+
+// createProgressBar creates a progressBar in pool. Note that if the copier's reportWriter
+// is io.Discard, the progress bar's output will be discarded
+//
+// NOTE: Every progress bar created within a progress pool must either successfully
+// complete or be aborted, or pool.Wait() will hang. That is typically done
+// using "defer bar.Abort(false)", which must happen BEFORE pool.Wait() is called.
+//
+// As a convention, most users of progress bars should call mark100PercentComplete on full success;
+// by convention, we don't leave progress bars in partial state when fully done
+// (even if we copied much less data than anticipated).
+func (c *copier) createProgressBar(pool *mpb.Progress, partial bool, info types.BlobInfo, kind string, onComplete string) *progressBar {
+ // shortDigestLen is the length of the digest used for blobs.
+ const shortDigestLen = 12
+
+ prefix := fmt.Sprintf("Copying %s %s", kind, info.Digest.Encoded())
+ // Truncate the prefix (chopping of some part of the digest) to make all progress bars aligned in a column.
+ maxPrefixLen := len("Copying blob ") + shortDigestLen
+ if len(prefix) > maxPrefixLen {
+ prefix = prefix[:maxPrefixLen]
+ }
+
+ // onComplete will replace prefix once the bar/spinner has completed
+ onComplete = prefix + " " + onComplete
+
+ // Use a normal progress bar when we know the size (i.e., size > 0).
+ // Otherwise, use a spinner to indicate that something's happening.
+ var bar *mpb.Bar
+ if info.Size > 0 {
+ if partial {
+ bar = pool.AddBar(info.Size,
+ mpb.BarFillerClearOnComplete(),
+ mpb.PrependDecorators(
+ decor.OnComplete(decor.Name(prefix), onComplete),
+ ),
+ mpb.AppendDecorators(
+ decor.Any(customPartialBlobDecorFunc),
+ ),
+ )
+ } else {
+ bar = pool.AddBar(info.Size,
+ mpb.BarFillerClearOnComplete(),
+ mpb.PrependDecorators(
+ decor.OnComplete(decor.Name(prefix), onComplete),
+ ),
+ mpb.AppendDecorators(
+ decor.OnComplete(decor.CountersKibiByte("%.1f / %.1f"), ""),
+ ),
+ )
+ }
+ } else {
+ bar = pool.New(0,
+ mpb.SpinnerStyle(".", "..", "...", "....", "").PositionLeft(),
+ mpb.BarFillerClearOnComplete(),
+ mpb.PrependDecorators(
+ decor.OnComplete(decor.Name(prefix), onComplete),
+ ),
+ )
+ }
+ if c.progressOutput == io.Discard {
+ c.Printf("Copying %s %s\n", kind, info.Digest)
+ }
+ return &progressBar{
+ Bar: bar,
+ originalSize: info.Size,
+ }
+}
+
+// mark100PercentComplete marks the progres bars as 100% complete;
+// it may do so by possibly advancing the current state if it is below the known total.
+func (bar *progressBar) mark100PercentComplete() {
+ if bar.originalSize > 0 {
+ // We can't call bar.SetTotal even if we wanted to; the total can not be changed
+ // after a progress bar is created with a definite total.
+ bar.SetCurrent(bar.originalSize) // This triggers the completion condition.
+ } else {
+ // -1 = unknown size
+ // 0 is somewhat of a a special case: Unlike c/image, where 0 is a definite known
+ // size (possible at least in theory), in mpb, zero-sized progress bars are treated
+ // as unknown size, in particular they are not configured to be marked as
+ // complete on bar.Current() reaching bar.total (because that would happen already
+ // when creating the progress bar).
+ // That means that we are both _allowed_ to call SetTotal, and we _have to_.
+ bar.SetTotal(-1, true) // total < 0 = set it to bar.Current(), report it; and mark the bar as complete.
+ }
+}
+
+// blobChunkAccessorProxy wraps a BlobChunkAccessor and updates a *progressBar
+// with the number of received bytes.
+type blobChunkAccessorProxy struct {
+ wrapped private.BlobChunkAccessor // The underlying BlobChunkAccessor
+ bar *progressBar // A progress bar updated with the number of bytes read so far
+}
+
+// GetBlobAt returns a sequential channel of readers that contain data for the requested
+// blob chunks, and a channel that might get a single error value.
+// The specified chunks must be not overlapping and sorted by their offset.
+// The readers must be fully consumed, in the order they are returned, before blocking
+// to read the next chunk.
+func (s *blobChunkAccessorProxy) GetBlobAt(ctx context.Context, info types.BlobInfo, chunks []private.ImageSourceChunk) (chan io.ReadCloser, chan error, error) {
+ rc, errs, err := s.wrapped.GetBlobAt(ctx, info, chunks)
+ if err == nil {
+ total := int64(0)
+ for _, c := range chunks {
+ total += int64(c.Length)
+ }
+ s.bar.IncrInt64(total)
+ }
+ return rc, errs, err
+}
diff --git a/vendor/github.com/containers/image/v5/copy/progress_reader.go b/vendor/github.com/containers/image/v5/copy/progress_channel.go
index de23cec1b..d5e9e09bd 100644
--- a/vendor/github.com/containers/image/v5/copy/progress_reader.go
+++ b/vendor/github.com/containers/image/v5/copy/progress_channel.go
@@ -1,16 +1,13 @@
package copy
import (
- "context"
"io"
"time"
- "github.com/containers/image/v5/internal/private"
"github.com/containers/image/v5/types"
- "github.com/vbauerster/mpb/v7"
)
-// progressReader is a reader that reports its progress on an interval.
+// progressReader is a reader that reports its progress to a types.ProgressProperties channel on an interval.
type progressReader struct {
source io.Reader
channel chan<- types.ProgressProperties
@@ -80,27 +77,3 @@ func (r *progressReader) Read(p []byte) (int, error) {
}
return n, err
}
-
-// blobChunkAccessorProxy wraps a BlobChunkAccessor and keeps track of how many bytes
-// are received.
-type blobChunkAccessorProxy struct {
- wrapped private.BlobChunkAccessor // The underlying BlobChunkAccessor
- bar *mpb.Bar // A progress bar updated with the number of bytes read so far
-}
-
-// GetBlobAt returns a sequential channel of readers that contain data for the requested
-// blob chunks, and a channel that might get a single error value.
-// The specified chunks must be not overlapping and sorted by their offset.
-// The readers must be fully consumed, in the order they are returned, before blocking
-// to read the next chunk.
-func (s *blobChunkAccessorProxy) GetBlobAt(ctx context.Context, info types.BlobInfo, chunks []private.ImageSourceChunk) (chan io.ReadCloser, chan error, error) {
- rc, errs, err := s.wrapped.GetBlobAt(ctx, info, chunks)
- if err == nil {
- total := int64(0)
- for _, c := range chunks {
- total += int64(c.Length)
- }
- s.bar.IncrInt64(total)
- }
- return rc, errs, err
-}
diff --git a/vendor/github.com/containers/image/v5/version/version.go b/vendor/github.com/containers/image/v5/version/version.go
index 9447d53c4..6295b5493 100644
--- a/vendor/github.com/containers/image/v5/version/version.go
+++ b/vendor/github.com/containers/image/v5/version/version.go
@@ -11,7 +11,7 @@ const (
VersionPatch = 1
// VersionDev indicates development branch. Releases will be empty string.
- VersionDev = "-dev"
+ VersionDev = ""
)
// Version is the specification version that the package types support.
diff --git a/vendor/github.com/containers/ocicrypt/MAINTAINERS b/vendor/github.com/containers/ocicrypt/MAINTAINERS
index e6a7d1f0a..af38d03bf 100644
--- a/vendor/github.com/containers/ocicrypt/MAINTAINERS
+++ b/vendor/github.com/containers/ocicrypt/MAINTAINERS
@@ -3,3 +3,4 @@
# Github ID, Name, Email Address
lumjjb, Brandon Lum, lumjjb@gmail.com
stefanberger, Stefan Berger, stefanb@linux.ibm.com
+arronwy, Arron Wang, arron.wang@intel.com
diff --git a/vendor/github.com/containers/ocicrypt/crypto/pkcs11/pkcs11helpers.go b/vendor/github.com/containers/ocicrypt/crypto/pkcs11/pkcs11helpers.go
index 448e88c7c..7d80f5f84 100644
--- a/vendor/github.com/containers/ocicrypt/crypto/pkcs11/pkcs11helpers.go
+++ b/vendor/github.com/containers/ocicrypt/crypto/pkcs11/pkcs11helpers.go
@@ -40,8 +40,6 @@ import (
var (
// OAEPLabel defines the label we use for OAEP encryption; this cannot be changed
OAEPLabel = []byte("")
- // OAEPDefaultHash defines the default hash used for OAEP encryption; this cannot be changed
- OAEPDefaultHash = "sha1"
// OAEPSha1Params describes the OAEP parameters with sha1 hash algorithm; needed by SoftHSM
OAEPSha1Params = &pkcs11.OAEPParams{
@@ -69,12 +67,12 @@ func rsaPublicEncryptOAEP(pubKey *rsa.PublicKey, plaintext []byte) ([]byte, stri
)
oaephash := os.Getenv("OCICRYPT_OAEP_HASHALG")
- // The default is 'sha1'
+ // The default is sha256 (previously was sha1)
switch strings.ToLower(oaephash) {
- case "sha1", "":
+ case "sha1":
hashfunc = sha1.New()
hashalg = "sha1"
- case "sha256":
+ case "sha256", "":
hashfunc = sha256.New()
hashalg = "sha256"
default:
@@ -283,12 +281,12 @@ func publicEncryptOAEP(pubKey *Pkcs11KeyFileObject, plaintext []byte) ([]byte, s
var oaep *pkcs11.OAEPParams
oaephash := os.Getenv("OCICRYPT_OAEP_HASHALG")
- // the default is sha1
+ // The default is sha256 (previously was sha1)
switch strings.ToLower(oaephash) {
- case "sha1", "":
+ case "sha1":
oaep = OAEPSha1Params
hashalg = "sha1"
- case "sha256":
+ case "sha256", "":
oaep = OAEPSha256Params
hashalg = "sha256"
default:
@@ -333,7 +331,7 @@ func privateDecryptOAEP(privKeyObj *Pkcs11KeyFileObject, ciphertext []byte, hash
var oaep *pkcs11.OAEPParams
- // the default is sha1
+ // An empty string from the Hash in the JSON historically defaults to sha1.
switch hashalg {
case "sha1", "":
oaep = OAEPSha1Params
@@ -410,9 +408,6 @@ func EncryptMultiple(pubKeys []interface{}, data []byte) ([]byte, error) {
return nil, err
}
- if hashalg == OAEPDefaultHash {
- hashalg = ""
- }
recipient := Pkcs11Recipient{
Version: 0,
Blob: base64.StdEncoding.EncodeToString(ciphertext),
@@ -431,15 +426,18 @@ func EncryptMultiple(pubKeys []interface{}, data []byte) ([]byte, error) {
// {
// "version": 0,
// "blob": <base64 encoded RSA OAEP encrypted blob>,
-// "hash": <hash used for OAEP other than 'sha256'>
+// "hash": <hash used for OAEP other than 'sha1'>
// } ,
// {
// "version": 0,
// "blob": <base64 encoded RSA OAEP encrypted blob>,
-// "hash": <hash used for OAEP other than 'sha256'>
+// "hash": <hash used for OAEP other than 'sha1'>
// } ,
// [...]
// }
+// Note: More recent versions of this code explicitly write 'sha1'
+// while older versions left it empty in case of 'sha1'.
+//
func Decrypt(privKeyObjs []*Pkcs11KeyFileObject, pkcs11blobstr []byte) ([]byte, error) {
pkcs11blob := Pkcs11Blob{}
err := json.Unmarshal(pkcs11blobstr, &pkcs11blob)
diff --git a/vendor/github.com/containers/storage/VERSION b/vendor/github.com/containers/storage/VERSION
index 79833f2ce..32b7211cb 100644
--- a/vendor/github.com/containers/storage/VERSION
+++ b/vendor/github.com/containers/storage/VERSION
@@ -1 +1 @@
-1.39.0+dev
+1.40.0
diff --git a/vendor/github.com/containers/storage/drivers/chown_unix.go b/vendor/github.com/containers/storage/drivers/chown_unix.go
index 3c508b66b..c598b936d 100644
--- a/vendor/github.com/containers/storage/drivers/chown_unix.go
+++ b/vendor/github.com/containers/storage/drivers/chown_unix.go
@@ -76,7 +76,7 @@ func (c *platformChowner) LChown(path string, info os.FileInfo, toHost, toContai
UID: uid,
GID: gid,
}
- mappedPair, err := toHost.ToHost(pair)
+ mappedPair, err := toHost.ToHostOverflow(pair)
if err != nil {
return fmt.Errorf("error mapping container ID pair %#v for %q to host: %v", pair, path, err)
}
diff --git a/vendor/github.com/containers/storage/drivers/driver_freebsd.go b/vendor/github.com/containers/storage/drivers/driver_freebsd.go
index 79a591288..143cccf92 100644
--- a/vendor/github.com/containers/storage/drivers/driver_freebsd.go
+++ b/vendor/github.com/containers/storage/drivers/driver_freebsd.go
@@ -1,7 +1,6 @@
package graphdriver
import (
- "fmt"
"golang.org/x/sys/unix"
"github.com/containers/storage/pkg/mount"
diff --git a/vendor/github.com/containers/storage/drivers/overlay/overlay.go b/vendor/github.com/containers/storage/drivers/overlay/overlay.go
index 550dc7d39..09d24ae8b 100644
--- a/vendor/github.com/containers/storage/drivers/overlay/overlay.go
+++ b/vendor/github.com/containers/storage/drivers/overlay/overlay.go
@@ -939,6 +939,16 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable
rootUID = int(st.UID())
rootGID = int(st.GID())
}
+
+ if _, err := system.Lstat(dir); err == nil {
+ logrus.Warnf("Trying to create a layer %#v while directory %q already exists; removing it first", id, dir)
+ // Don’t just os.RemoveAll(dir) here; d.Remove also removes the link in linkDir,
+ // so that we can’t end up with two symlinks in linkDir pointing to the same layer.
+ if err := d.Remove(id); err != nil {
+ return errors.Wrapf(err, "removing a pre-existing layer directory %q", dir)
+ }
+ }
+
if err := idtools.MkdirAllAndChownNew(dir, 0700, idPair); err != nil {
return err
}
@@ -1389,7 +1399,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO
}
for err == nil {
absLowers = append(absLowers, filepath.Join(dir, nameWithSuffix("diff", diffN)))
- relLowers = append(relLowers, dumbJoin(string(link), "..", nameWithSuffix("diff", diffN)))
+ relLowers = append(relLowers, dumbJoin(linkDir, string(link), "..", nameWithSuffix("diff", diffN)))
diffN++
st, err = os.Stat(filepath.Join(dir, nameWithSuffix("diff", diffN)))
if err == nil && !permsKnown {
diff --git a/vendor/github.com/containers/storage/drivers/zfs/zfs_freebsd.go b/vendor/github.com/containers/storage/drivers/zfs/zfs_freebsd.go
index fd98ad305..61a2ed871 100644
--- a/vendor/github.com/containers/storage/drivers/zfs/zfs_freebsd.go
+++ b/vendor/github.com/containers/storage/drivers/zfs/zfs_freebsd.go
@@ -2,7 +2,6 @@ package zfs
import (
"fmt"
- "strings"
"github.com/containers/storage/drivers"
"github.com/pkg/errors"
@@ -26,19 +25,10 @@ func checkRootdirFs(rootdir string) error {
}
func getMountpoint(id string) string {
- maxlen := 12
-
- // we need to preserve filesystem suffix
- suffix := strings.SplitN(id, "-", 2)
-
- if len(suffix) > 1 {
- return id[:maxlen] + "-" + suffix[1]
- }
-
- return id[:maxlen]
+ return id
}
func detachUnmount(mountpoint string) error {
- // FreeBSD doesn't have an equivalent to MNT_DETACH
- return unix.Unmount(mountpoint, 0)
+ // FreeBSD's MNT_FORCE is roughly equivalent to MNT_DETACH
+ return unix.Unmount(mountpoint, unix.MNT_FORCE)
}
diff --git a/vendor/github.com/containers/storage/go.mod b/vendor/github.com/containers/storage/go.mod
index 1915ea65d..5d8c59960 100644
--- a/vendor/github.com/containers/storage/go.mod
+++ b/vendor/github.com/containers/storage/go.mod
@@ -12,7 +12,7 @@ require (
github.com/google/go-intervals v0.0.2
github.com/hashicorp/go-multierror v1.1.1
github.com/json-iterator/go v1.1.12
- github.com/klauspost/compress v1.15.1
+ github.com/klauspost/compress v1.15.2
github.com/klauspost/pgzip v1.2.5
github.com/mattn/go-shellwords v1.0.12
github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible
diff --git a/vendor/github.com/containers/storage/go.sum b/vendor/github.com/containers/storage/go.sum
index cd5bf3b97..97a0d167d 100644
--- a/vendor/github.com/containers/storage/go.sum
+++ b/vendor/github.com/containers/storage/go.sum
@@ -424,8 +424,9 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
-github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A=
github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
+github.com/klauspost/compress v1.15.2 h1:3WH+AG7s2+T8o3nrM/8u2rdqUEcQhmga7smjrT41nAw=
+github.com/klauspost/compress v1.15.2/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
diff --git a/vendor/github.com/containers/storage/layers.go b/vendor/github.com/containers/storage/layers.go
index 34d27ffa3..bba8d7588 100644
--- a/vendor/github.com/containers/storage/layers.go
+++ b/vendor/github.com/containers/storage/layers.go
@@ -692,11 +692,10 @@ func (r *layerStore) PutAdditionalLayer(id string, parentLayer *Layer, names []s
return copyLayer(layer), nil
}
-func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, flags map[string]interface{}, diff io.Reader) (layer *Layer, size int64, err error) {
+func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, flags map[string]interface{}, diff io.Reader) (*Layer, int64, error) {
if !r.IsReadWrite() {
return nil, -1, errors.Wrapf(ErrStoreIsReadOnly, "not allowed to create new layers at %q", r.layerspath())
}
- size = -1
if err := os.MkdirAll(r.rundir, 0700); err != nil {
return nil, -1, err
}
@@ -762,6 +761,60 @@ func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLab
if mountLabel != "" {
label.ReserveLabel(mountLabel)
}
+
+ // Before actually creating the layer, make a persistent record of it with incompleteFlag,
+ // so that future processes have a chance to delete it.
+ layer := &Layer{
+ ID: id,
+ Parent: parent,
+ Names: names,
+ MountLabel: mountLabel,
+ Metadata: templateMetadata,
+ Created: time.Now().UTC(),
+ CompressedDigest: templateCompressedDigest,
+ CompressedSize: templateCompressedSize,
+ UncompressedDigest: templateUncompressedDigest,
+ UncompressedSize: templateUncompressedSize,
+ CompressionType: templateCompressionType,
+ UIDs: templateUIDs,
+ GIDs: templateGIDs,
+ Flags: make(map[string]interface{}),
+ UIDMap: copyIDMap(moreOptions.UIDMap),
+ GIDMap: copyIDMap(moreOptions.GIDMap),
+ BigDataNames: []string{},
+ }
+ r.layers = append(r.layers, layer)
+ r.idindex.Add(id)
+ r.byid[id] = layer
+ for _, name := range names {
+ r.byname[name] = layer
+ }
+ for flag, value := range flags {
+ layer.Flags[flag] = value
+ }
+ layer.Flags[incompleteFlag] = true
+
+ succeeded := false
+ cleanupFailureContext := ""
+ defer func() {
+ if !succeeded {
+ // On any error, try both removing the driver's data as well
+ // as the in-memory layer record.
+ if err2 := r.Delete(layer.ID); err2 != nil {
+ if cleanupFailureContext == "" {
+ cleanupFailureContext = "unknown: cleanupFailureContext not set at the failure site"
+ }
+ logrus.Errorf("While recovering from a failure (%s), error deleting layer %#v: %v", cleanupFailureContext, layer.ID, err2)
+ }
+ }
+ }()
+
+ err := r.Save()
+ if err != nil {
+ cleanupFailureContext = "saving incomplete layer metadata"
+ return nil, -1, err
+ }
+
idMappings := idtools.NewIDMappingsFromMaps(moreOptions.UIDMap, moreOptions.GIDMap)
opts := drivers.CreateOpts{
MountLabel: mountLabel,
@@ -769,132 +822,67 @@ func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLab
IDMappings: idMappings,
}
if moreOptions.TemplateLayer != "" {
- if err = r.driver.CreateFromTemplate(id, moreOptions.TemplateLayer, templateIDMappings, parent, parentMappings, &opts, writeable); err != nil {
+ if err := r.driver.CreateFromTemplate(id, moreOptions.TemplateLayer, templateIDMappings, parent, parentMappings, &opts, writeable); err != nil {
+ cleanupFailureContext = "creating a layer from template"
return nil, -1, errors.Wrapf(err, "error creating copy of template layer %q with ID %q", moreOptions.TemplateLayer, id)
}
oldMappings = templateIDMappings
} else {
if writeable {
- if err = r.driver.CreateReadWrite(id, parent, &opts); err != nil {
+ if err := r.driver.CreateReadWrite(id, parent, &opts); err != nil {
+ cleanupFailureContext = "creating a read-write layer"
return nil, -1, errors.Wrapf(err, "error creating read-write layer with ID %q", id)
}
} else {
- if err = r.driver.Create(id, parent, &opts); err != nil {
+ if err := r.driver.Create(id, parent, &opts); err != nil {
+ cleanupFailureContext = "creating a read-only layer"
return nil, -1, errors.Wrapf(err, "error creating layer with ID %q", id)
}
}
oldMappings = parentMappings
}
if !reflect.DeepEqual(oldMappings.UIDs(), idMappings.UIDs()) || !reflect.DeepEqual(oldMappings.GIDs(), idMappings.GIDs()) {
- if err = r.driver.UpdateLayerIDMap(id, oldMappings, idMappings, mountLabel); err != nil {
- // We don't have a record of this layer, but at least
- // try to clean it up underneath us.
- if err2 := r.driver.Remove(id); err2 != nil {
- logrus.Errorf("While recovering from a failure creating in UpdateLayerIDMap, error deleting layer %#v: %v", id, err2)
- }
+ if err := r.driver.UpdateLayerIDMap(id, oldMappings, idMappings, mountLabel); err != nil {
+ cleanupFailureContext = "in UpdateLayerIDMap"
return nil, -1, err
}
}
if len(templateTSdata) > 0 {
if err := os.MkdirAll(filepath.Dir(r.tspath(id)), 0o700); err != nil {
- // We don't have a record of this layer, but at least
- // try to clean it up underneath us.
- if err2 := r.driver.Remove(id); err2 != nil {
- logrus.Errorf("While recovering from a failure creating in UpdateLayerIDMap, error deleting layer %#v: %v", id, err2)
- }
+ cleanupFailureContext = "creating tar-split parent directory for a copy from template"
return nil, -1, err
}
- if err = ioutils.AtomicWriteFile(r.tspath(id), templateTSdata, 0o600); err != nil {
- // We don't have a record of this layer, but at least
- // try to clean it up underneath us.
- if err2 := r.driver.Remove(id); err2 != nil {
- logrus.Errorf("While recovering from a failure creating in UpdateLayerIDMap, error deleting layer %#v: %v", id, err2)
- }
+ if err := ioutils.AtomicWriteFile(r.tspath(id), templateTSdata, 0o600); err != nil {
+ cleanupFailureContext = "creating a tar-split copy from template"
return nil, -1, err
}
}
- if err == nil {
- layer = &Layer{
- ID: id,
- Parent: parent,
- Names: names,
- MountLabel: mountLabel,
- Metadata: templateMetadata,
- Created: time.Now().UTC(),
- CompressedDigest: templateCompressedDigest,
- CompressedSize: templateCompressedSize,
- UncompressedDigest: templateUncompressedDigest,
- UncompressedSize: templateUncompressedSize,
- CompressionType: templateCompressionType,
- UIDs: templateUIDs,
- GIDs: templateGIDs,
- Flags: make(map[string]interface{}),
- UIDMap: copyIDMap(moreOptions.UIDMap),
- GIDMap: copyIDMap(moreOptions.GIDMap),
- BigDataNames: []string{},
- }
- r.layers = append(r.layers, layer)
- r.idindex.Add(id)
- r.byid[id] = layer
- for _, name := range names {
- r.byname[name] = layer
- }
- for flag, value := range flags {
- layer.Flags[flag] = value
- }
- savedIncompleteLayer := false
- if diff != nil {
- layer.Flags[incompleteFlag] = true
- err = r.Save()
- if err != nil {
- // We don't have a record of this layer, but at least
- // try to clean it up underneath us.
- if err2 := r.driver.Remove(id); err2 != nil {
- logrus.Errorf("While recovering from a failure saving incomplete layer metadata, error deleting layer %#v: %v", id, err2)
- }
- return nil, -1, err
- }
- savedIncompleteLayer = true
- size, err = r.applyDiffWithOptions(layer.ID, moreOptions, diff)
- if err != nil {
- if err2 := r.Delete(layer.ID); err2 != nil {
- // Either a driver error or an error saving.
- // We now have a layer that's been marked for
- // deletion but which we failed to remove.
- logrus.Errorf("While recovering from a failure applying layer diff, error deleting layer %#v: %v", layer.ID, err2)
- }
- return nil, -1, err
- }
- delete(layer.Flags, incompleteFlag)
- } else {
- // applyDiffWithOptions in the `diff != nil` case handles this bit for us
- if layer.CompressedDigest != "" {
- r.bycompressedsum[layer.CompressedDigest] = append(r.bycompressedsum[layer.CompressedDigest], layer.ID)
- }
- if layer.UncompressedDigest != "" {
- r.byuncompressedsum[layer.UncompressedDigest] = append(r.byuncompressedsum[layer.UncompressedDigest], layer.ID)
- }
- }
- err = r.Save()
+
+ var size int64 = -1
+ if diff != nil {
+ size, err = r.applyDiffWithOptions(layer.ID, moreOptions, diff)
if err != nil {
- if savedIncompleteLayer {
- if err2 := r.Delete(layer.ID); err2 != nil {
- // Either a driver error or an error saving.
- // We now have a layer that's been marked for
- // deletion but which we failed to remove.
- logrus.Errorf("While recovering from a failure saving finished layer metadata, error deleting layer %#v: %v", layer.ID, err2)
- }
- } else {
- // We don't have a record of this layer, but at least
- // try to clean it up underneath us.
- if err2 := r.driver.Remove(id); err2 != nil {
- logrus.Errorf("While recovering from a failure saving finished layer metadata, error deleting layer %#v in graph driver: %v", id, err2)
- }
- }
+ cleanupFailureContext = "applying layer diff"
return nil, -1, err
}
- layer = copyLayer(layer)
+ } else {
+ // applyDiffWithOptions in the `diff != nil` case handles this bit for us
+ if layer.CompressedDigest != "" {
+ r.bycompressedsum[layer.CompressedDigest] = append(r.bycompressedsum[layer.CompressedDigest], layer.ID)
+ }
+ if layer.UncompressedDigest != "" {
+ r.byuncompressedsum[layer.UncompressedDigest] = append(r.byuncompressedsum[layer.UncompressedDigest], layer.ID)
+ }
}
+ delete(layer.Flags, incompleteFlag)
+ err = r.Save()
+ if err != nil {
+ cleanupFailureContext = "saving finished layer metadata"
+ return nil, -1, err
+ }
+
+ layer = copyLayer(layer)
+ succeeded = true
return layer, size, err
}
diff --git a/vendor/github.com/containers/storage/pkg/idtools/idtools.go b/vendor/github.com/containers/storage/pkg/idtools/idtools.go
index 7c8f4d10c..7a8fec0ce 100644
--- a/vendor/github.com/containers/storage/pkg/idtools/idtools.go
+++ b/vendor/github.com/containers/storage/pkg/idtools/idtools.go
@@ -3,15 +3,18 @@ package idtools
import (
"bufio"
"fmt"
+ "io/ioutil"
"os"
"os/user"
"sort"
"strconv"
"strings"
+ "sync"
"syscall"
"github.com/containers/storage/pkg/system"
"github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
)
// IDMap contains a single entry for user namespace range remapping. An array
@@ -203,6 +206,67 @@ func (i *IDMappings) ToHost(pair IDPair) (IDPair, error) {
return target, err
}
+var (
+ overflowUIDOnce sync.Once
+ overflowGIDOnce sync.Once
+ overflowUID int
+ overflowGID int
+)
+
+// getOverflowUID returns the UID mapped to the overflow user
+func getOverflowUID() int {
+ overflowUIDOnce.Do(func() {
+ // 65534 is the value on older kernels where /proc/sys/kernel/overflowuid is not present
+ overflowUID = 65534
+ if content, err := ioutil.ReadFile("/proc/sys/kernel/overflowuid"); err == nil {
+ if tmp, err := strconv.Atoi(string(content)); err == nil {
+ overflowUID = tmp
+ }
+ }
+ })
+ return overflowUID
+}
+
+// getOverflowUID returns the GID mapped to the overflow user
+func getOverflowGID() int {
+ overflowGIDOnce.Do(func() {
+ // 65534 is the value on older kernels where /proc/sys/kernel/overflowgid is not present
+ overflowGID = 65534
+ if content, err := ioutil.ReadFile("/proc/sys/kernel/overflowgid"); err == nil {
+ if tmp, err := strconv.Atoi(string(content)); err == nil {
+ overflowGID = tmp
+ }
+ }
+ })
+ return overflowGID
+}
+
+// ToHost returns the host UID and GID for the container uid, gid.
+// Remapping is only performed if the ids aren't already the remapped root ids
+// If the mapping is not possible because the target ID is not mapped into
+// the namespace, then the overflow ID is used.
+func (i *IDMappings) ToHostOverflow(pair IDPair) (IDPair, error) {
+ var err error
+ target := i.RootPair()
+
+ if pair.UID != target.UID {
+ target.UID, err = RawToHost(pair.UID, i.uids)
+ if err != nil {
+ target.UID = getOverflowUID()
+ logrus.Debugf("Failed to map UID %v to the target mapping, using the overflow ID %v", pair.UID, target.UID)
+ }
+ }
+
+ if pair.GID != target.GID {
+ target.GID, err = RawToHost(pair.GID, i.gids)
+ if err != nil {
+ target.GID = getOverflowGID()
+ logrus.Debugf("Failed to map GID %v to the target mapping, using the overflow ID %v", pair.GID, target.GID)
+ }
+ }
+ return target, nil
+}
+
// ToContainer returns the container UID and GID for the host uid and gid
func (i *IDMappings) ToContainer(pair IDPair) (int, int, error) {
uid, err := RawToContainer(pair.UID, i.uids)
diff --git a/vendor/github.com/containers/storage/pkg/mount/flags_freebsd.go b/vendor/github.com/containers/storage/pkg/mount/flags_freebsd.go
new file mode 100644
index 000000000..3ba99cf93
--- /dev/null
+++ b/vendor/github.com/containers/storage/pkg/mount/flags_freebsd.go
@@ -0,0 +1,48 @@
+package mount
+
+import (
+ "golang.org/x/sys/unix"
+)
+
+const (
+ // RDONLY will mount the file system read-only.
+ RDONLY = unix.MNT_RDONLY
+
+ // NOSUID will not allow set-user-identifier or set-group-identifier bits to
+ // take effect.
+ NOSUID = unix.MNT_NOSUID
+
+ // NOEXEC will not allow execution of any binaries on the mounted file system.
+ NOEXEC = unix.MNT_NOEXEC
+
+ // SYNCHRONOUS will allow I/O to the file system to be done synchronously.
+ SYNCHRONOUS = unix.MNT_SYNCHRONOUS
+
+ // REMOUNT will attempt to remount an already-mounted file system. This is
+ // commonly used to change the mount flags for a file system, especially to
+ // make a readonly file system writeable. It does not change device or mount
+ // point.
+ REMOUNT = unix.MNT_UPDATE
+
+ // NOATIME will not update the file access time when reading from a file.
+ NOATIME = unix.MNT_NOATIME
+
+ mntDetach = unix.MNT_FORCE
+
+ NODIRATIME = 0
+ NODEV = 0
+ DIRSYNC = 0
+ MANDLOCK = 0
+ BIND = 0
+ RBIND = 0
+ UNBINDABLE = 0
+ RUNBINDABLE = 0
+ PRIVATE = 0
+ RPRIVATE = 0
+ SLAVE = 0
+ RSLAVE = 0
+ SHARED = 0
+ RSHARED = 0
+ RELATIME = 0
+ STRICTATIME = 0
+)
diff --git a/vendor/github.com/containers/storage/pkg/mount/flags_unsupported.go b/vendor/github.com/containers/storage/pkg/mount/flags_unsupported.go
index 9afd26d4c..ee0f593a5 100644
--- a/vendor/github.com/containers/storage/pkg/mount/flags_unsupported.go
+++ b/vendor/github.com/containers/storage/pkg/mount/flags_unsupported.go
@@ -1,4 +1,5 @@
-// +build !linux
+//go:build !linux && !freebsd
+// +build !linux,!freebsd
package mount
diff --git a/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go b/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go
index b31cf99d0..72ceec3dd 100644
--- a/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go
+++ b/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go
@@ -28,14 +28,25 @@ func allocateIOVecs(options []string) []C.struct_iovec {
func mount(device, target, mType string, flag uintptr, data string) error {
isNullFS := false
- xs := strings.Split(data, ",")
- for _, x := range xs {
- if x == "bind" {
- isNullFS = true
+ options := []string{"fspath", target}
+
+ if data != "" {
+ xs := strings.Split(data, ",")
+ for _, x := range xs {
+ if x == "bind" {
+ isNullFS = true
+ continue
+ }
+ opt := strings.SplitN(x, "=", 2)
+ options = append(options, opt[0])
+ if len(opt) == 2 {
+ options = append(options, opt[1])
+ } else {
+ options = append(options, "")
+ }
}
}
- options := []string{"fspath", target}
if isNullFS {
options = append(options, "fstype", "nullfs", "target", device)
} else {
diff --git a/vendor/github.com/containers/storage/pkg/reexec/command_freebsd.go b/vendor/github.com/containers/storage/pkg/reexec/command_freebsd.go
new file mode 100644
index 000000000..6f63ae991
--- /dev/null
+++ b/vendor/github.com/containers/storage/pkg/reexec/command_freebsd.go
@@ -0,0 +1,37 @@
+// +build freebsd
+
+package reexec
+
+import (
+ "context"
+ "os"
+ "os/exec"
+
+ "golang.org/x/sys/unix"
+)
+
+// Self returns the path to the current process's binary.
+// Uses sysctl.
+func Self() string {
+ path, err := unix.SysctlArgs("kern.proc.pathname", -1)
+ if err == nil {
+ return path
+ }
+ return os.Args[0]
+}
+
+// Command returns *exec.Cmd which has Path as current binary.
+// For example if current binary is "docker" at "/usr/bin/", then cmd.Path will
+// be set to "/usr/bin/docker".
+func Command(args ...string) *exec.Cmd {
+ cmd := exec.Command(Self())
+ cmd.Args = args
+ return cmd
+}
+
+// CommandContext returns *exec.Cmd which has Path as current binary.
+func CommandContext(ctx context.Context, args ...string) *exec.Cmd {
+ cmd := exec.CommandContext(ctx, Self())
+ cmd.Args = args
+ return cmd
+}
diff --git a/vendor/github.com/containers/storage/pkg/reexec/command_unix.go b/vendor/github.com/containers/storage/pkg/reexec/command_unix.go
index 9dd8cb9bb..a56ada216 100644
--- a/vendor/github.com/containers/storage/pkg/reexec/command_unix.go
+++ b/vendor/github.com/containers/storage/pkg/reexec/command_unix.go
@@ -1,4 +1,5 @@
-// +build freebsd solaris darwin
+//go:build solaris || darwin
+// +build solaris darwin
package reexec
diff --git a/vendor/github.com/klauspost/compress/.gitignore b/vendor/github.com/klauspost/compress/.gitignore
index b35f8449b..d31b37815 100644
--- a/vendor/github.com/klauspost/compress/.gitignore
+++ b/vendor/github.com/klauspost/compress/.gitignore
@@ -23,3 +23,10 @@ _testmain.go
*.test
*.prof
/s2/cmd/_s2sx/sfx-exe
+
+# Linux perf files
+perf.data
+perf.data.old
+
+# gdb history
+.gdb_history
diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md
index 0e2dc116a..5b7cf781a 100644
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@@ -17,6 +17,13 @@ This package provides various compression algorithms.
# changelog
+* Mar 11, 2022 (v1.15.1)
+ * huff0: Add x86 assembly of Decode4X by @WojciechMula in [#512](https://github.com/klauspost/compress/pull/512)
+ * zstd: Reuse zip decoders in [#514](https://github.com/klauspost/compress/pull/514)
+ * zstd: Detect extra block data and report as corrupted in [#520](https://github.com/klauspost/compress/pull/520)
+ * zstd: Handle zero sized frame content size stricter in [#521](https://github.com/klauspost/compress/pull/521)
+ * zstd: Add stricter block size checks in [#523](https://github.com/klauspost/compress/pull/523)
+
* Mar 3, 2022 (v1.15.0)
* zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498)
* zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505)
diff --git a/vendor/github.com/klauspost/compress/go.mod b/vendor/github.com/klauspost/compress/go.mod
index 5aa64a436..b605e2d52 100644
--- a/vendor/github.com/klauspost/compress/go.mod
+++ b/vendor/github.com/klauspost/compress/go.mod
@@ -1,3 +1,3 @@
module github.com/klauspost/compress
-go 1.15
+go 1.16
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
index d47f6644f..ce8e93bcd 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
@@ -12,14 +12,14 @@ import (
// decompress4x_main_loop_x86 is an x86 assembler implementation
// of Decompress4X when tablelog > 8.
-// go:noescape
+//go:noescape
func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
// decompress4x_8b_loop_x86 is an x86 assembler implementation
// of Decompress4X when tablelog <= 8 which decodes 4 entries
// per loop.
-// go:noescape
+//go:noescape
func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go
new file mode 100644
index 000000000..3954c5121
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go
@@ -0,0 +1,34 @@
+// Package cpuinfo gives runtime info about the current CPU.
+//
+// This is a very limited module meant for use internally
+// in this project. For more versatile solution check
+// https://github.com/klauspost/cpuid.
+package cpuinfo
+
+// HasBMI1 checks whether an x86 CPU supports the BMI1 extension.
+func HasBMI1() bool {
+ return hasBMI1
+}
+
+// HasBMI2 checks whether an x86 CPU supports the BMI2 extension.
+func HasBMI2() bool {
+ return hasBMI2
+}
+
+// DisableBMI2 will disable BMI2, for testing purposes.
+// Call returned function to restore previous state.
+func DisableBMI2() func() {
+ old := hasBMI2
+ hasBMI2 = false
+ return func() {
+ hasBMI2 = old
+ }
+}
+
+// HasBMI checks whether an x86 CPU supports both BMI1 and BMI2 extensions.
+func HasBMI() bool {
+ return HasBMI1() && HasBMI2()
+}
+
+var hasBMI1 bool
+var hasBMI2 bool
diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go
new file mode 100644
index 000000000..e802579c4
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go
@@ -0,0 +1,11 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+package cpuinfo
+
+// go:noescape
+func x86extensions() (bmi1, bmi2 bool)
+
+func init() {
+ hasBMI1, hasBMI2 = x86extensions()
+}
diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s
new file mode 100644
index 000000000..4465fbe9e
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s
@@ -0,0 +1,36 @@
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+#include "funcdata.h"
+#include "go_asm.h"
+
+TEXT ·x86extensions(SB), NOSPLIT, $0
+ // 1. determine max EAX value
+ XORQ AX, AX
+ CPUID
+
+ CMPQ AX, $7
+ JB unsupported
+
+ // 2. EAX = 7, ECX = 0 --- see Table 3-8 "Information Returned by CPUID Instruction"
+ MOVQ $7, AX
+ MOVQ $0, CX
+ CPUID
+
+ BTQ $3, BX // bit 3 = BMI1
+ SETCS AL
+
+ BTQ $8, BX // bit 8 = BMI2
+ SETCS AH
+
+ MOVB AL, bmi1+0(FP)
+ MOVB AH, bmi2+1(FP)
+ RET
+
+unsupported:
+ XORQ AX, AX
+ MOVB AL, bmi1+0(FP)
+ MOVB AL, bmi2+1(FP)
+ RET
diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md
index e3445ac19..beb7fa872 100644
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -386,47 +386,31 @@ In practice this means that concurrency is often limited to utilizing about 3 co
### Benchmarks
-These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd).
-
The first two are streaming decodes and the last are smaller inputs.
-
+
+Running on AMD Ryzen 9 3950X 16-Core Processor. AMD64 assembly used.
+
```
-BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op
-BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op
-
-BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op
-BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op
-
-Concurrent performance:
-
-BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op
-
-BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op
-BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op
+BenchmarkDecoderSilesia-32 5 206878840 ns/op 1024.50 MB/s 49808 B/op 43 allocs/op
+BenchmarkDecoderEnwik9-32 1 1271809000 ns/op 786.28 MB/s 72048 B/op 52 allocs/op
+
+Concurrent blocks, performance:
+
+BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 67356 17857 ns/op 10321.96 MB/s 22.48 pct 102 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 266656 4421 ns/op 26823.21 MB/s 11.89 pct 19 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 20992 56842 ns/op 8477.17 MB/s 39.90 pct 754 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 27456 43932 ns/op 9714.01 MB/s 33.27 pct 524 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 78432 15047 ns/op 8319.15 MB/s 40.34 pct 66 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 65800 18436 ns/op 8249.63 MB/s 37.75 pct 88 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 102993 11523 ns/op 35546.09 MB/s 3.637 pct 143 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1000000 1070 ns/op 95720.98 MB/s 80.53 pct 3 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 749802 1752 ns/op 70272.35 MB/s 100.0 pct 5 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 22640 52934 ns/op 13263.37 MB/s 26.25 pct 1014 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/html.zst-32 226412 5232 ns/op 19572.27 MB/s 14.49 pct 20 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 923041 1276 ns/op 3194.71 MB/s 31.26 pct 0 B/op 0 allocs/op
```
-This reflects the performance around May 2020, but this may be out of date.
+This reflects the performance around May 2022, but this may be out of date.
## Zstd inside ZIP files
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index 7d567a54a..b2bca3301 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -5,9 +5,14 @@
package zstd
import (
+ "bytes"
+ "encoding/binary"
"errors"
"fmt"
"io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
"sync"
"github.com/klauspost/compress/huff0"
@@ -38,6 +43,9 @@ const (
// maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
maxCompressedBlockSize = 128 << 10
+ compressedBlockOverAlloc = 16
+ maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc
+
// Maximum possible block size (all Raw+Uncompressed).
maxBlockSize = (1 << 21) - 1
@@ -136,7 +144,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
b.Type = blockType((bh >> 1) & 3)
// find size.
cSize := int(bh >> 3)
- maxSize := maxBlockSize
+ maxSize := maxCompressedBlockSizeAlloc
switch b.Type {
case blockTypeReserved:
return ErrReservedBlockType
@@ -157,9 +165,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
println("Data size on stream:", cSize)
}
b.RLESize = 0
- maxSize = maxCompressedBlockSize
+ maxSize = maxCompressedBlockSizeAlloc
if windowSize < maxCompressedBlockSize && b.lowMem {
- maxSize = int(windowSize)
+ maxSize = int(windowSize) + compressedBlockOverAlloc
}
if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
if debugDecoder {
@@ -190,9 +198,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
// Read block data.
if cap(b.dataStorage) < cSize {
if b.lowMem || cSize > maxCompressedBlockSize {
- b.dataStorage = make([]byte, 0, cSize)
+ b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
} else {
- b.dataStorage = make([]byte, 0, maxCompressedBlockSize)
+ b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
}
}
if cap(b.dst) <= maxSize {
@@ -486,10 +494,15 @@ func (b *blockDec) decodeCompressed(hist *history) error {
b.dst = append(b.dst, hist.decoders.literals...)
return nil
}
- err = hist.decoders.decodeSync(hist)
+ before := len(hist.decoders.out)
+ err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:])
if err != nil {
return err
}
+ if hist.decoders.maxSyncLen > 0 {
+ hist.decoders.maxSyncLen += uint64(before)
+ hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out))
+ }
b.dst = hist.decoders.out
hist.recentOffsets = hist.decoders.prevOffset
return nil
@@ -632,6 +645,22 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
println("initializing sequences:", err)
return err
}
+ // Extract blocks...
+ if false && hist.dict == nil {
+ fatalErr := func(err error) {
+ if err != nil {
+ panic(err)
+ }
+ }
+ fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize)
+ var buf bytes.Buffer
+ fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse))
+ fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
+ fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
+ buf.Write(in)
+ ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
+ }
+
return nil
}
@@ -650,6 +679,7 @@ func (b *blockDec) decodeSequences(hist *history) error {
}
hist.decoders.windowSize = hist.windowSize
hist.decoders.prevOffset = hist.recentOffsets
+
err := hist.decoders.decode(b.sequence)
hist.recentOffsets = hist.decoders.prevOffset
return err
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index 9fcdaac1d..c65ea9795 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -347,18 +347,20 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
}
frame.history.setDict(&dict)
}
-
- if frame.FrameContentSize != fcsUnknown && frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
- return dst, ErrDecoderSizeExceeded
+ if frame.WindowSize > d.o.maxWindowSize {
+ return dst, ErrWindowSizeExceeded
}
- if frame.FrameContentSize < 1<<30 {
- // Never preallocate more than 1 GB up front.
+ if frame.FrameContentSize != fcsUnknown {
+ if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
+ return dst, ErrDecoderSizeExceeded
+ }
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
- dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
+ dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc)
copy(dst2, dst)
dst = dst2
}
}
+
if cap(dst) == 0 {
// Allocate len(input) * 2 by default if nothing is provided
// and we didn't get frame content size.
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
index fd05c9bb0..fc52ebc40 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
@@ -31,7 +31,7 @@ func (o *decoderOptions) setDefault() {
if o.concurrent > 4 {
o.concurrent = 4
}
- o.maxDecodedSize = 1 << 63
+ o.maxDecodedSize = 64 << 30
}
// WithDecoderLowmem will set whether to use a lower amount of memory,
@@ -66,7 +66,7 @@ func WithDecoderConcurrency(n int) DOption {
// WithDecoderMaxMemory allows to set a maximum decoded size for in-memory
// non-streaming operations or maximum window size for streaming operations.
// This can be used to control memory usage of potentially hostile content.
-// Maximum and default is 1 << 63 bytes.
+// Maximum is 1 << 63 bytes. Default is 64GiB.
func WithDecoderMaxMemory(n uint64) DOption {
return func(o *decoderOptions) error {
if n == 0 {
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index 11089d223..509d5cece 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -326,6 +326,19 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
d.history.ignoreBuffer = len(dst)
// Store input length, so we only check new data.
crcStart := len(dst)
+ d.history.decoders.maxSyncLen = 0
+ if d.FrameContentSize != fcsUnknown {
+ d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
+ if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
+ return dst, ErrDecoderSizeExceeded
+ }
+ if uint64(cap(dst)) < d.history.decoders.maxSyncLen {
+ // Alloc for output
+ dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
+ copy(dst2, dst)
+ dst = dst2
+ }
+ }
var err error
for {
err = dec.reset(d.rawInput, d.WindowSize)
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
index bb3d4fd6c..fde4e6b60 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
@@ -5,8 +5,10 @@
package zstd
import (
+ "encoding/binary"
"errors"
"fmt"
+ "io"
)
const (
@@ -182,6 +184,29 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
return s.buildDtable()
}
+func (s *fseDecoder) mustReadFrom(r io.Reader) {
+ fatalErr := func(err error) {
+ if err != nil {
+ panic(err)
+ }
+ }
+ // dt [maxTablesize]decSymbol // Decompression table.
+ // symbolLen uint16 // Length of active part of the symbol table.
+ // actualTableLog uint8 // Selected tablelog.
+ // maxBits uint8 // Maximum number of additional bits
+ // // used for table creation to avoid allocations.
+ // stateTable [256]uint16
+ // norm [maxSymbolValue + 1]int16
+ // preDefined bool
+ fatalErr(binary.Read(r, binary.LittleEndian, &s.dt))
+ fatalErr(binary.Read(r, binary.LittleEndian, &s.symbolLen))
+ fatalErr(binary.Read(r, binary.LittleEndian, &s.actualTableLog))
+ fatalErr(binary.Read(r, binary.LittleEndian, &s.maxBits))
+ fatalErr(binary.Read(r, binary.LittleEndian, &s.stateTable))
+ fatalErr(binary.Read(r, binary.LittleEndian, &s.norm))
+ fatalErr(binary.Read(r, binary.LittleEndian, &s.preDefined))
+}
+
// decSymbol contains information about a state entry,
// Including the state offset base, the output symbol and
// the number of bits to read for the low part of the destination state.
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go
index 819f1461b..e80139dd9 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -73,6 +73,7 @@ type sequenceDecs struct {
seqSize int
windowSize int
maxBits uint8
+ maxSyncLen uint64
}
// initialize all 3 decoders from the stream input.
@@ -98,153 +99,13 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) erro
return nil
}
-// decode sequences from the stream with the provided history.
-func (s *sequenceDecs) decode(seqs []seqVals) error {
- br := s.br
-
- // Grab full sizes tables, to avoid bounds checks.
- llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
- llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
- s.seqSize = 0
- litRemain := len(s.literals)
- maxBlockSize := maxCompressedBlockSize
- if s.windowSize < maxBlockSize {
- maxBlockSize = s.windowSize
- }
- for i := range seqs {
- var ll, mo, ml int
- if br.off > 4+((maxOffsetBits+16+16)>>3) {
- // inlined function:
- // ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
-
- // Final will not read from stream.
- var llB, mlB, moB uint8
- ll, llB = llState.final()
- ml, mlB = mlState.final()
- mo, moB = ofState.final()
-
- // extra bits are stored in reverse order.
- br.fillFast()
- mo += br.getBits(moB)
- if s.maxBits > 32 {
- br.fillFast()
- }
- ml += br.getBits(mlB)
- ll += br.getBits(llB)
-
- if moB > 1 {
- s.prevOffset[2] = s.prevOffset[1]
- s.prevOffset[1] = s.prevOffset[0]
- s.prevOffset[0] = mo
- } else {
- // mo = s.adjustOffset(mo, ll, moB)
- // Inlined for rather big speedup
- if ll == 0 {
- // There is an exception though, when current sequence's literals_length = 0.
- // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
- // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
- mo++
- }
-
- if mo == 0 {
- mo = s.prevOffset[0]
- } else {
- var temp int
- if mo == 3 {
- temp = s.prevOffset[0] - 1
- } else {
- temp = s.prevOffset[mo]
- }
-
- if temp == 0 {
- // 0 is not valid; input is corrupted; force offset to 1
- println("WARNING: temp was 0")
- temp = 1
- }
-
- if mo != 1 {
- s.prevOffset[2] = s.prevOffset[1]
- }
- s.prevOffset[1] = s.prevOffset[0]
- s.prevOffset[0] = temp
- mo = temp
- }
- }
- br.fillFast()
- } else {
- if br.overread() {
- if debugDecoder {
- printf("reading sequence %d, exceeded available data\n", i)
- }
- return io.ErrUnexpectedEOF
- }
- ll, mo, ml = s.next(br, llState, mlState, ofState)
- br.fill()
- }
-
- if debugSequences {
- println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
- }
- // Evaluate.
- // We might be doing this async, so do it early.
- if mo == 0 && ml > 0 {
- return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
- }
- if ml > maxMatchLen {
- return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
- }
- s.seqSize += ll + ml
- if s.seqSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
- }
- litRemain -= ll
- if litRemain < 0 {
- return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll)
- }
- seqs[i] = seqVals{
- ll: ll,
- ml: ml,
- mo: mo,
- }
- if i == len(seqs)-1 {
- // This is the last sequence, so we shouldn't update state.
- break
- }
-
- // Manually inlined, ~ 5-20% faster
- // Update all 3 states at once. Approx 20% faster.
- nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
- if nBits == 0 {
- llState = llTable[llState.newState()&maxTableMask]
- mlState = mlTable[mlState.newState()&maxTableMask]
- ofState = ofTable[ofState.newState()&maxTableMask]
- } else {
- bits := br.get32BitsFast(nBits)
- lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
- llState = llTable[(llState.newState()+lowBits)&maxTableMask]
-
- lowBits = uint16(bits >> (ofState.nbBits() & 31))
- lowBits &= bitMask[mlState.nbBits()&15]
- mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
-
- lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
- ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
- }
- }
- s.seqSize += litRemain
- if s.seqSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
- }
- err := br.close()
- if err != nil {
- printf("Closing sequences: %v, %+v\n", err, *br)
- }
- return err
-}
-
// execute will execute the decoded sequence with the provided history.
// The sequence must be evaluated before being sent.
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
+ if len(s.dict) == 0 {
+ return s.executeSimple(seqs, hist)
+ }
+
// Ensure we have enough output size...
if len(s.out)+s.seqSize > cap(s.out) {
addBytes := s.seqSize + len(s.out)
@@ -341,14 +202,19 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
}
// decode sequences from the stream with the provided history.
-func (s *sequenceDecs) decodeSync(history *history) error {
+func (s *sequenceDecs) decodeSync(hist []byte) error {
+ if true {
+ supported, err := s.decodeSyncSimple(hist)
+ if supported {
+ return err
+ }
+ }
br := s.br
seqs := s.nSeqs
startSize := len(s.out)
// Grab full sizes tables, to avoid bounds checks.
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
- hist := history.b[history.ignoreBuffer:]
out := s.out
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
@@ -433,7 +299,7 @@ func (s *sequenceDecs) decodeSync(history *history) error {
}
size := ll + ml + len(out)
if size-startSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
+ return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
}
if size > cap(out) {
// Not enough size, which can happen under high volume block streaming conditions
@@ -463,13 +329,13 @@ func (s *sequenceDecs) decodeSync(history *history) error {
if mo > len(out)+len(hist) || mo > s.windowSize {
if len(s.dict) == 0 {
- return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist))
+ return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
}
// we may be in dictionary.
dictO := len(s.dict) - (mo - (len(out) + len(hist)))
if dictO < 0 || dictO >= len(s.dict) {
- return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist))
+ return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
}
end := dictO + ml
if end > len(s.dict) {
@@ -543,8 +409,8 @@ func (s *sequenceDecs) decodeSync(history *history) error {
}
// Check if space for literals
- if len(s.literals)+len(s.out)-startSize > maxBlockSize {
- return fmt.Errorf("output (%d) bigger than max block size (%d)", len(s.out), maxBlockSize)
+ if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
+ return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
}
// Add final literals
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
new file mode 100644
index 000000000..4676b09cc
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
@@ -0,0 +1,350 @@
+//go:build amd64 && !appengine && !noasm && gc
+// +build amd64,!appengine,!noasm,gc
+
+package zstd
+
+import (
+ "fmt"
+
+ "github.com/klauspost/compress/internal/cpuinfo"
+)
+
+type decodeSyncAsmContext struct {
+ llTable []decSymbol
+ mlTable []decSymbol
+ ofTable []decSymbol
+ llState uint64
+ mlState uint64
+ ofState uint64
+ iteration int
+ litRemain int
+ out []byte
+ outPosition int
+ literals []byte
+ litPosition int
+ history []byte
+ windowSize int
+ ll int // set on error (not for all errors, please refer to _generate/gen.go)
+ ml int // set on error (not for all errors, please refer to _generate/gen.go)
+ mo int // set on error (not for all errors, please refer to _generate/gen.go)
+}
+
+// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
+//
+// Please refer to seqdec_generic.go for the reference implementation.
+//go:noescape
+func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+
+// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
+//go:noescape
+func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+
+// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
+//go:noescape
+func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+
+// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
+//go:noescape
+func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+
+// decode sequences from the stream with the provided history but without a dictionary.
+func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
+ if len(s.dict) > 0 {
+ return false, nil
+ }
+ if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
+ return false, nil
+ }
+ useSafe := false
+ if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
+ useSafe = true
+ }
+ if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
+ useSafe = true
+ }
+ br := s.br
+
+ maxBlockSize := maxCompressedBlockSize
+ if s.windowSize < maxBlockSize {
+ maxBlockSize = s.windowSize
+ }
+
+ ctx := decodeSyncAsmContext{
+ llTable: s.litLengths.fse.dt[:maxTablesize],
+ mlTable: s.matchLengths.fse.dt[:maxTablesize],
+ ofTable: s.offsets.fse.dt[:maxTablesize],
+ llState: uint64(s.litLengths.state.state),
+ mlState: uint64(s.matchLengths.state.state),
+ ofState: uint64(s.offsets.state.state),
+ iteration: s.nSeqs - 1,
+ litRemain: len(s.literals),
+ out: s.out,
+ outPosition: len(s.out),
+ literals: s.literals,
+ windowSize: s.windowSize,
+ history: hist,
+ }
+
+ s.seqSize = 0
+ startSize := len(s.out)
+
+ var errCode int
+ if cpuinfo.HasBMI2() {
+ if useSafe {
+ errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
+ } else {
+ errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
+ }
+ } else {
+ if useSafe {
+ errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
+ } else {
+ errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
+ }
+ }
+ switch errCode {
+ case noError:
+ break
+
+ case errorMatchLenOfsMismatch:
+ return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
+
+ case errorMatchLenTooBig:
+ return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
+
+ case errorMatchOffTooBig:
+ return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
+ ctx.mo, ctx.outPosition+len(hist)-startSize)
+
+ case errorNotEnoughLiterals:
+ return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
+ ctx.ll, ctx.litRemain+ctx.ll)
+
+ case errorNotEnoughSpace:
+ size := ctx.outPosition + ctx.ll + ctx.ml
+ if debugDecoder {
+ println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
+ }
+ return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
+
+ default:
+ return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
+ }
+
+ s.seqSize += ctx.litRemain
+ if s.seqSize > maxBlockSize {
+ return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ }
+ err := br.close()
+ if err != nil {
+ printf("Closing sequences: %v, %+v\n", err, *br)
+ return true, err
+ }
+
+ s.literals = s.literals[ctx.litPosition:]
+ t := ctx.outPosition
+ s.out = s.out[:t]
+
+ // Add final literals
+ s.out = append(s.out, s.literals...)
+ if debugDecoder {
+ t += len(s.literals)
+ if t != len(s.out) {
+ panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
+ }
+ }
+
+ return true, nil
+}
+
+// --------------------------------------------------------------------------------
+
+type decodeAsmContext struct {
+ llTable []decSymbol
+ mlTable []decSymbol
+ ofTable []decSymbol
+ llState uint64
+ mlState uint64
+ ofState uint64
+ iteration int
+ seqs []seqVals
+ litRemain int
+}
+
+const noError = 0
+
+// error reported when mo == 0 && ml > 0
+const errorMatchLenOfsMismatch = 1
+
+// error reported when ml > maxMatchLen
+const errorMatchLenTooBig = 2
+
+// error reported when mo > available history or mo > s.windowSize
+const errorMatchOffTooBig = 3
+
+// error reported when the sum of literal lengths exeeceds the literal buffer size
+const errorNotEnoughLiterals = 4
+
+// error reported when capacity of `out` is too small
+const errorNotEnoughSpace = 5
+
+// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
+//
+// Please refer to seqdec_generic.go for the reference implementation.
+//go:noescape
+func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+
+// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
+//
+// Please refer to seqdec_generic.go for the reference implementation.
+//go:noescape
+func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+
+// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
+//go:noescape
+func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+
+// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
+//go:noescape
+func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+
+// decode sequences from the stream without the provided history.
+func (s *sequenceDecs) decode(seqs []seqVals) error {
+ br := s.br
+
+ maxBlockSize := maxCompressedBlockSize
+ if s.windowSize < maxBlockSize {
+ maxBlockSize = s.windowSize
+ }
+
+ ctx := decodeAsmContext{
+ llTable: s.litLengths.fse.dt[:maxTablesize],
+ mlTable: s.matchLengths.fse.dt[:maxTablesize],
+ ofTable: s.offsets.fse.dt[:maxTablesize],
+ llState: uint64(s.litLengths.state.state),
+ mlState: uint64(s.matchLengths.state.state),
+ ofState: uint64(s.offsets.state.state),
+ seqs: seqs,
+ iteration: len(seqs) - 1,
+ litRemain: len(s.literals),
+ }
+
+ s.seqSize = 0
+ lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
+ var errCode int
+ if cpuinfo.HasBMI2() {
+ if lte56bits {
+ errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
+ } else {
+ errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
+ }
+ } else {
+ if lte56bits {
+ errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
+ } else {
+ errCode = sequenceDecs_decode_amd64(s, br, &ctx)
+ }
+ }
+ if errCode != 0 {
+ i := len(seqs) - ctx.iteration - 1
+ switch errCode {
+ case errorMatchLenOfsMismatch:
+ ml := ctx.seqs[i].ml
+ return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
+
+ case errorMatchLenTooBig:
+ ml := ctx.seqs[i].ml
+ return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
+
+ case errorNotEnoughLiterals:
+ ll := ctx.seqs[i].ll
+ return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
+ }
+
+ return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
+ }
+
+ if ctx.litRemain < 0 {
+ return fmt.Errorf("literal count is too big: total available %d, total requested %d",
+ len(s.literals), len(s.literals)-ctx.litRemain)
+ }
+
+ s.seqSize += ctx.litRemain
+ if s.seqSize > maxBlockSize {
+ return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ }
+ err := br.close()
+ if err != nil {
+ printf("Closing sequences: %v, %+v\n", err, *br)
+ }
+ return err
+}
+
+// --------------------------------------------------------------------------------
+
+type executeAsmContext struct {
+ seqs []seqVals
+ seqIndex int
+ out []byte
+ history []byte
+ literals []byte
+ outPosition int
+ litPosition int
+ windowSize int
+}
+
+// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
+//
+// Returns false if a match offset is too big.
+//
+// Please refer to seqdec_generic.go for the reference implementation.
+//go:noescape
+func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
+
+// executeSimple handles cases when dictionary is not used.
+func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
+ // Ensure we have enough output size...
+ if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
+ addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
+ s.out = append(s.out, make([]byte, addBytes)...)
+ s.out = s.out[:len(s.out)-addBytes]
+ }
+
+ if debugDecoder {
+ printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
+ }
+
+ var t = len(s.out)
+ out := s.out[:t+s.seqSize]
+
+ ctx := executeAsmContext{
+ seqs: seqs,
+ seqIndex: 0,
+ out: out,
+ history: hist,
+ outPosition: t,
+ litPosition: 0,
+ literals: s.literals,
+ windowSize: s.windowSize,
+ }
+
+ ok := sequenceDecs_executeSimple_amd64(&ctx)
+ if !ok {
+ return fmt.Errorf("match offset (%d) bigger than current history (%d)",
+ seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
+ }
+ s.literals = s.literals[ctx.litPosition:]
+ t = ctx.outPosition
+
+ // Add final literals
+ copy(out[t:], s.literals)
+ if debugDecoder {
+ t += len(s.literals)
+ if t != len(out) {
+ panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
+ }
+ }
+ s.out = out
+
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
new file mode 100644
index 000000000..01cc23fa8
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -0,0 +1,3519 @@
+// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT.
+
+//go:build !appengine && !noasm && gc && !noasm
+// +build !appengine,!noasm,gc,!noasm
+
+// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+// Requires: CMOV
+TEXT ·sequenceDecs_decode_amd64(SB), $8-32
+ MOVQ br+8(FP), AX
+ MOVQ 32(AX), DX
+ MOVBQZX 40(AX), BX
+ MOVQ 24(AX), SI
+ MOVQ (AX), AX
+ ADDQ SI, AX
+ MOVQ AX, (SP)
+ MOVQ ctx+16(FP), AX
+ MOVQ 72(AX), DI
+ MOVQ 80(AX), R8
+ MOVQ 88(AX), R9
+ MOVQ 104(AX), R10
+ MOVQ s+0(FP), AX
+ MOVQ 144(AX), R11
+ MOVQ 152(AX), R12
+ MOVQ 160(AX), R13
+
+sequenceDecs_decode_amd64_main_loop:
+ MOVQ (SP), R14
+
+ // Fill bitreader to have enough for the offset and match length.
+ CMPQ SI, $0x08
+ JL sequenceDecs_decode_amd64_fill_byte_by_byte
+ MOVQ BX, AX
+ SHRQ $0x03, AX
+ SUBQ AX, R14
+ MOVQ (R14), DX
+ SUBQ AX, SI
+ ANDQ $0x07, BX
+ JMP sequenceDecs_decode_amd64_fill_end
+
+sequenceDecs_decode_amd64_fill_byte_by_byte:
+ CMPQ SI, $0x00
+ JLE sequenceDecs_decode_amd64_fill_end
+ CMPQ BX, $0x07
+ JLE sequenceDecs_decode_amd64_fill_end
+ SHLQ $0x08, DX
+ SUBQ $0x01, R14
+ SUBQ $0x01, SI
+ SUBQ $0x08, BX
+ MOVBQZX (R14), AX
+ ORQ AX, DX
+ JMP sequenceDecs_decode_amd64_fill_byte_by_byte
+
+sequenceDecs_decode_amd64_fill_end:
+ // Update offset
+ MOVQ R9, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R15
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R15
+ ADDQ R15, AX
+ MOVQ AX, 16(R10)
+
+ // Update match length
+ MOVQ R8, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R15
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R15
+ ADDQ R15, AX
+ MOVQ AX, 8(R10)
+
+ // Fill bitreader to have enough for the remaining
+ CMPQ SI, $0x08
+ JL sequenceDecs_decode_amd64_fill_2_byte_by_byte
+ MOVQ BX, AX
+ SHRQ $0x03, AX
+ SUBQ AX, R14
+ MOVQ (R14), DX
+ SUBQ AX, SI
+ ANDQ $0x07, BX
+ JMP sequenceDecs_decode_amd64_fill_2_end
+
+sequenceDecs_decode_amd64_fill_2_byte_by_byte:
+ CMPQ SI, $0x00
+ JLE sequenceDecs_decode_amd64_fill_2_end
+ CMPQ BX, $0x07
+ JLE sequenceDecs_decode_amd64_fill_2_end
+ SHLQ $0x08, DX
+ SUBQ $0x01, R14
+ SUBQ $0x01, SI
+ SUBQ $0x08, BX
+ MOVBQZX (R14), AX
+ ORQ AX, DX
+ JMP sequenceDecs_decode_amd64_fill_2_byte_by_byte
+
+sequenceDecs_decode_amd64_fill_2_end:
+ // Update literal length
+ MOVQ DI, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R15
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R15
+ ADDQ R15, AX
+ MOVQ AX, (R10)
+
+ // Fill bitreader for state updates
+ MOVQ R14, (SP)
+ MOVQ R9, AX
+ SHRQ $0x08, AX
+ MOVBQZX AL, AX
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decode_amd64_skip_update
+
+ // Update Literal Length State
+ MOVBQZX DI, R14
+ SHRQ $0x10, DI
+ MOVWQZX DI, DI
+ CMPQ R14, $0x00
+ JZ sequenceDecs_decode_amd64_llState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R14, BX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVQ R14, CX
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, DI
+
+sequenceDecs_decode_amd64_llState_updateState_skip_zero:
+ // Load ctx.llTable
+ MOVQ ctx+16(FP), CX
+ MOVQ (CX), CX
+ MOVQ (CX)(DI*8), DI
+
+ // Update Match Length State
+ MOVBQZX R8, R14
+ SHRQ $0x10, R8
+ MOVWQZX R8, R8
+ CMPQ R14, $0x00
+ JZ sequenceDecs_decode_amd64_mlState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R14, BX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVQ R14, CX
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, R8
+
+sequenceDecs_decode_amd64_mlState_updateState_skip_zero:
+ // Load ctx.mlTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 24(CX), CX
+ MOVQ (CX)(R8*8), R8
+
+ // Update Offset State
+ MOVBQZX R9, R14
+ SHRQ $0x10, R9
+ MOVWQZX R9, R9
+ CMPQ R14, $0x00
+ JZ sequenceDecs_decode_amd64_ofState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R14, BX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVQ R14, CX
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, R9
+
+sequenceDecs_decode_amd64_ofState_updateState_skip_zero:
+ // Load ctx.ofTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R9*8), R9
+
+sequenceDecs_decode_amd64_skip_update:
+ // Adjust offset
+ MOVQ 16(R10), CX
+ CMPQ AX, $0x01
+ JBE sequenceDecs_decode_amd64_adjust_offsetB_1_or_0
+ MOVQ R12, R13
+ MOVQ R11, R12
+ MOVQ CX, R11
+ JMP sequenceDecs_decode_amd64_adjust_end
+
+sequenceDecs_decode_amd64_adjust_offsetB_1_or_0:
+ CMPQ (R10), $0x00000000
+ JNE sequenceDecs_decode_amd64_adjust_offset_maybezero
+ INCQ CX
+ JMP sequenceDecs_decode_amd64_adjust_offset_nonzero
+
+sequenceDecs_decode_amd64_adjust_offset_maybezero:
+ TESTQ CX, CX
+ JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero
+ MOVQ R11, CX
+ JMP sequenceDecs_decode_amd64_adjust_end
+
+sequenceDecs_decode_amd64_adjust_offset_nonzero:
+ CMPQ CX, $0x01
+ JB sequenceDecs_decode_amd64_adjust_zero
+ JEQ sequenceDecs_decode_amd64_adjust_one
+ CMPQ CX, $0x02
+ JA sequenceDecs_decode_amd64_adjust_three
+ JMP sequenceDecs_decode_amd64_adjust_two
+
+sequenceDecs_decode_amd64_adjust_zero:
+ MOVQ R11, AX
+ JMP sequenceDecs_decode_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_amd64_adjust_one:
+ MOVQ R12, AX
+ JMP sequenceDecs_decode_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_amd64_adjust_two:
+ MOVQ R13, AX
+ JMP sequenceDecs_decode_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_amd64_adjust_three:
+ LEAQ -1(R11), AX
+
+sequenceDecs_decode_amd64_adjust_test_temp_valid:
+ TESTQ AX, AX
+ JNZ sequenceDecs_decode_amd64_adjust_temp_valid
+ MOVQ $0x00000001, AX
+
+sequenceDecs_decode_amd64_adjust_temp_valid:
+ CMPQ CX, $0x01
+ CMOVQNE R12, R13
+ MOVQ R11, R12
+ MOVQ AX, R11
+ MOVQ AX, CX
+
+sequenceDecs_decode_amd64_adjust_end:
+ MOVQ CX, 16(R10)
+
+ // Check values
+ MOVQ 8(R10), AX
+ MOVQ (R10), R14
+ LEAQ (AX)(R14*1), R15
+ MOVQ s+0(FP), BP
+ ADDQ R15, 256(BP)
+ MOVQ ctx+16(FP), R15
+ SUBQ R14, 128(R15)
+ JS error_not_enough_literals
+ CMPQ AX, $0x00020002
+ JA sequenceDecs_decode_amd64_error_match_len_too_big
+ TESTQ CX, CX
+ JNZ sequenceDecs_decode_amd64_match_len_ofs_ok
+ TESTQ AX, AX
+ JNZ sequenceDecs_decode_amd64_error_match_len_ofs_mismatch
+
+sequenceDecs_decode_amd64_match_len_ofs_ok:
+ ADDQ $0x18, R10
+ MOVQ ctx+16(FP), AX
+ DECQ 96(AX)
+ JNS sequenceDecs_decode_amd64_main_loop
+ MOVQ s+0(FP), AX
+ MOVQ R11, 144(AX)
+ MOVQ R12, 152(AX)
+ MOVQ R13, 160(AX)
+ MOVQ br+8(FP), AX
+ MOVQ DX, 32(AX)
+ MOVB BL, 40(AX)
+ MOVQ SI, 24(AX)
+
+ // Return success
+ MOVQ $0x00000000, ret+24(FP)
+ RET
+
+ // Return with match length error
+sequenceDecs_decode_amd64_error_match_len_ofs_mismatch:
+ MOVQ $0x00000001, ret+24(FP)
+ RET
+
+ // Return with match too long error
+sequenceDecs_decode_amd64_error_match_len_too_big:
+ MOVQ $0x00000002, ret+24(FP)
+ RET
+
+ // Return with match offset too long error
+ MOVQ $0x00000003, ret+24(FP)
+ RET
+
+ // Return with not enough literals error
+error_not_enough_literals:
+ MOVQ $0x00000004, ret+24(FP)
+ RET
+
+ // Return with not enough output space error
+ MOVQ $0x00000005, ret+24(FP)
+ RET
+
+// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+// Requires: CMOV
+TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
+ MOVQ br+8(FP), AX
+ MOVQ 32(AX), DX
+ MOVBQZX 40(AX), BX
+ MOVQ 24(AX), SI
+ MOVQ (AX), AX
+ ADDQ SI, AX
+ MOVQ AX, (SP)
+ MOVQ ctx+16(FP), AX
+ MOVQ 72(AX), DI
+ MOVQ 80(AX), R8
+ MOVQ 88(AX), R9
+ MOVQ 104(AX), R10
+ MOVQ s+0(FP), AX
+ MOVQ 144(AX), R11
+ MOVQ 152(AX), R12
+ MOVQ 160(AX), R13
+
+sequenceDecs_decode_56_amd64_main_loop:
+ MOVQ (SP), R14
+
+ // Fill bitreader to have enough for the offset and match length.
+ CMPQ SI, $0x08
+ JL sequenceDecs_decode_56_amd64_fill_byte_by_byte
+ MOVQ BX, AX
+ SHRQ $0x03, AX
+ SUBQ AX, R14
+ MOVQ (R14), DX
+ SUBQ AX, SI
+ ANDQ $0x07, BX
+ JMP sequenceDecs_decode_56_amd64_fill_end
+
+sequenceDecs_decode_56_amd64_fill_byte_by_byte:
+ CMPQ SI, $0x00
+ JLE sequenceDecs_decode_56_amd64_fill_end
+ CMPQ BX, $0x07
+ JLE sequenceDecs_decode_56_amd64_fill_end
+ SHLQ $0x08, DX
+ SUBQ $0x01, R14
+ SUBQ $0x01, SI
+ SUBQ $0x08, BX
+ MOVBQZX (R14), AX
+ ORQ AX, DX
+ JMP sequenceDecs_decode_56_amd64_fill_byte_by_byte
+
+sequenceDecs_decode_56_amd64_fill_end:
+ // Update offset
+ MOVQ R9, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R15
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R15
+ ADDQ R15, AX
+ MOVQ AX, 16(R10)
+
+ // Update match length
+ MOVQ R8, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R15
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R15
+ ADDQ R15, AX
+ MOVQ AX, 8(R10)
+
+ // Update literal length
+ MOVQ DI, AX
+ MOVQ BX, CX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R15
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R15
+ ADDQ R15, AX
+ MOVQ AX, (R10)
+
+ // Fill bitreader for state updates
+ MOVQ R14, (SP)
+ MOVQ R9, AX
+ SHRQ $0x08, AX
+ MOVBQZX AL, AX
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decode_56_amd64_skip_update
+
+ // Update Literal Length State
+ MOVBQZX DI, R14
+ SHRQ $0x10, DI
+ MOVWQZX DI, DI
+ CMPQ R14, $0x00
+ JZ sequenceDecs_decode_56_amd64_llState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R14, BX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVQ R14, CX
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, DI
+
+sequenceDecs_decode_56_amd64_llState_updateState_skip_zero:
+ // Load ctx.llTable
+ MOVQ ctx+16(FP), CX
+ MOVQ (CX), CX
+ MOVQ (CX)(DI*8), DI
+
+ // Update Match Length State
+ MOVBQZX R8, R14
+ SHRQ $0x10, R8
+ MOVWQZX R8, R8
+ CMPQ R14, $0x00
+ JZ sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R14, BX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVQ R14, CX
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, R8
+
+sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero:
+ // Load ctx.mlTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 24(CX), CX
+ MOVQ (CX)(R8*8), R8
+
+ // Update Offset State
+ MOVBQZX R9, R14
+ SHRQ $0x10, R9
+ MOVWQZX R9, R9
+ CMPQ R14, $0x00
+ JZ sequenceDecs_decode_56_amd64_ofState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R14, BX
+ MOVQ DX, R15
+ SHLQ CL, R15
+ MOVQ R14, CX
+ NEGQ CX
+ SHRQ CL, R15
+ ADDQ R15, R9
+
+sequenceDecs_decode_56_amd64_ofState_updateState_skip_zero:
+ // Load ctx.ofTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R9*8), R9
+
+sequenceDecs_decode_56_amd64_skip_update:
+ // Adjust offset
+ MOVQ 16(R10), CX
+ CMPQ AX, $0x01
+ JBE sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0
+ MOVQ R12, R13
+ MOVQ R11, R12
+ MOVQ CX, R11
+ JMP sequenceDecs_decode_56_amd64_adjust_end
+
+sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0:
+ CMPQ (R10), $0x00000000
+ JNE sequenceDecs_decode_56_amd64_adjust_offset_maybezero
+ INCQ CX
+ JMP sequenceDecs_decode_56_amd64_adjust_offset_nonzero
+
+sequenceDecs_decode_56_amd64_adjust_offset_maybezero:
+ TESTQ CX, CX
+ JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero
+ MOVQ R11, CX
+ JMP sequenceDecs_decode_56_amd64_adjust_end
+
+sequenceDecs_decode_56_amd64_adjust_offset_nonzero:
+ CMPQ CX, $0x01
+ JB sequenceDecs_decode_56_amd64_adjust_zero
+ JEQ sequenceDecs_decode_56_amd64_adjust_one
+ CMPQ CX, $0x02
+ JA sequenceDecs_decode_56_amd64_adjust_three
+ JMP sequenceDecs_decode_56_amd64_adjust_two
+
+sequenceDecs_decode_56_amd64_adjust_zero:
+ MOVQ R11, AX
+ JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_56_amd64_adjust_one:
+ MOVQ R12, AX
+ JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_56_amd64_adjust_two:
+ MOVQ R13, AX
+ JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid
+
+sequenceDecs_decode_56_amd64_adjust_three:
+ LEAQ -1(R11), AX
+
+sequenceDecs_decode_56_amd64_adjust_test_temp_valid:
+ TESTQ AX, AX
+ JNZ sequenceDecs_decode_56_amd64_adjust_temp_valid
+ MOVQ $0x00000001, AX
+
+sequenceDecs_decode_56_amd64_adjust_temp_valid:
+ CMPQ CX, $0x01
+ CMOVQNE R12, R13
+ MOVQ R11, R12
+ MOVQ AX, R11
+ MOVQ AX, CX
+
+sequenceDecs_decode_56_amd64_adjust_end:
+ MOVQ CX, 16(R10)
+
+ // Check values
+ MOVQ 8(R10), AX
+ MOVQ (R10), R14
+ LEAQ (AX)(R14*1), R15
+ MOVQ s+0(FP), BP
+ ADDQ R15, 256(BP)
+ MOVQ ctx+16(FP), R15
+ SUBQ R14, 128(R15)
+ JS error_not_enough_literals
+ CMPQ AX, $0x00020002
+ JA sequenceDecs_decode_56_amd64_error_match_len_too_big
+ TESTQ CX, CX
+ JNZ sequenceDecs_decode_56_amd64_match_len_ofs_ok
+ TESTQ AX, AX
+ JNZ sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch
+
+sequenceDecs_decode_56_amd64_match_len_ofs_ok:
+ ADDQ $0x18, R10
+ MOVQ ctx+16(FP), AX
+ DECQ 96(AX)
+ JNS sequenceDecs_decode_56_amd64_main_loop
+ MOVQ s+0(FP), AX
+ MOVQ R11, 144(AX)
+ MOVQ R12, 152(AX)
+ MOVQ R13, 160(AX)
+ MOVQ br+8(FP), AX
+ MOVQ DX, 32(AX)
+ MOVB BL, 40(AX)
+ MOVQ SI, 24(AX)
+
+ // Return success
+ MOVQ $0x00000000, ret+24(FP)
+ RET
+
+ // Return with match length error
+sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch:
+ MOVQ $0x00000001, ret+24(FP)
+ RET
+
+ // Return with match too long error
+sequenceDecs_decode_56_amd64_error_match_len_too_big:
+ MOVQ $0x00000002, ret+24(FP)
+ RET
+
+ // Return with match offset too long error
+ MOVQ $0x00000003, ret+24(FP)
+ RET
+
+ // Return with not enough literals error
+error_not_enough_literals:
+ MOVQ $0x00000004, ret+24(FP)
+ RET
+
+ // Return with not enough output space error
+ MOVQ $0x00000005, ret+24(FP)
+ RET
+
+// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+// Requires: BMI, BMI2, CMOV
+TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
+ MOVQ br+8(FP), CX
+ MOVQ 32(CX), AX
+ MOVBQZX 40(CX), DX
+ MOVQ 24(CX), BX
+ MOVQ (CX), CX
+ ADDQ BX, CX
+ MOVQ CX, (SP)
+ MOVQ ctx+16(FP), CX
+ MOVQ 72(CX), SI
+ MOVQ 80(CX), DI
+ MOVQ 88(CX), R8
+ MOVQ 104(CX), R9
+ MOVQ s+0(FP), CX
+ MOVQ 144(CX), R10
+ MOVQ 152(CX), R11
+ MOVQ 160(CX), R12
+
+sequenceDecs_decode_bmi2_main_loop:
+ MOVQ (SP), R13
+
+ // Fill bitreader to have enough for the offset and match length.
+ CMPQ BX, $0x08
+ JL sequenceDecs_decode_bmi2_fill_byte_by_byte
+ MOVQ DX, CX
+ SHRQ $0x03, CX
+ SUBQ CX, R13
+ MOVQ (R13), AX
+ SUBQ CX, BX
+ ANDQ $0x07, DX
+ JMP sequenceDecs_decode_bmi2_fill_end
+
+sequenceDecs_decode_bmi2_fill_byte_by_byte:
+ CMPQ BX, $0x00
+ JLE sequenceDecs_decode_bmi2_fill_end
+ CMPQ DX, $0x07
+ JLE sequenceDecs_decode_bmi2_fill_end
+ SHLQ $0x08, AX
+ SUBQ $0x01, R13
+ SUBQ $0x01, BX
+ SUBQ $0x08, DX
+ MOVBQZX (R13), CX
+ ORQ CX, AX
+ JMP sequenceDecs_decode_bmi2_fill_byte_by_byte
+
+sequenceDecs_decode_bmi2_fill_end:
+ // Update offset
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R14
+ MOVQ AX, R15
+ LEAQ (DX)(R14*1), CX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ MOVQ CX, DX
+ MOVQ R8, CX
+ SHRQ $0x20, CX
+ ADDQ R15, CX
+ MOVQ CX, 16(R9)
+
+ // Update match length
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, DI, R14
+ MOVQ AX, R15
+ LEAQ (DX)(R14*1), CX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ MOVQ CX, DX
+ MOVQ DI, CX
+ SHRQ $0x20, CX
+ ADDQ R15, CX
+ MOVQ CX, 8(R9)
+
+ // Fill bitreader to have enough for the remaining
+ CMPQ BX, $0x08
+ JL sequenceDecs_decode_bmi2_fill_2_byte_by_byte
+ MOVQ DX, CX
+ SHRQ $0x03, CX
+ SUBQ CX, R13
+ MOVQ (R13), AX
+ SUBQ CX, BX
+ ANDQ $0x07, DX
+ JMP sequenceDecs_decode_bmi2_fill_2_end
+
+sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
+ CMPQ BX, $0x00
+ JLE sequenceDecs_decode_bmi2_fill_2_end
+ CMPQ DX, $0x07
+ JLE sequenceDecs_decode_bmi2_fill_2_end
+ SHLQ $0x08, AX
+ SUBQ $0x01, R13
+ SUBQ $0x01, BX
+ SUBQ $0x08, DX
+ MOVBQZX (R13), CX
+ ORQ CX, AX
+ JMP sequenceDecs_decode_bmi2_fill_2_byte_by_byte
+
+sequenceDecs_decode_bmi2_fill_2_end:
+ // Update literal length
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, SI, R14
+ MOVQ AX, R15
+ LEAQ (DX)(R14*1), CX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ MOVQ CX, DX
+ MOVQ SI, CX
+ SHRQ $0x20, CX
+ ADDQ R15, CX
+ MOVQ CX, (R9)
+
+ // Fill bitreader for state updates
+ MOVQ R13, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R13
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decode_bmi2_skip_update
+
+ // Update Literal Length State
+ MOVBQZX SI, R14
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, SI, SI
+ LEAQ (DX)(R14*1), CX
+ MOVQ AX, R15
+ MOVQ CX, DX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ ADDQ R15, SI
+
+ // Load ctx.llTable
+ MOVQ ctx+16(FP), CX
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
+
+ // Update Match Length State
+ MOVBQZX DI, R14
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, DI, DI
+ LEAQ (DX)(R14*1), CX
+ MOVQ AX, R15
+ MOVQ CX, DX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ ADDQ R15, DI
+
+ // Load ctx.mlTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 24(CX), CX
+ MOVQ (CX)(DI*8), DI
+
+ // Update Offset State
+ MOVBQZX R8, R14
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, R8, R8
+ LEAQ (DX)(R14*1), CX
+ MOVQ AX, R15
+ MOVQ CX, DX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ ADDQ R15, R8
+
+ // Load ctx.ofTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
+
+sequenceDecs_decode_bmi2_skip_update:
+ // Adjust offset
+ MOVQ 16(R9), CX
+ CMPQ R13, $0x01
+ JBE sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0
+ MOVQ R11, R12
+ MOVQ R10, R11
+ MOVQ CX, R10
+ JMP sequenceDecs_decode_bmi2_adjust_end
+
+sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0:
+ CMPQ (R9), $0x00000000
+ JNE sequenceDecs_decode_bmi2_adjust_offset_maybezero
+ INCQ CX
+ JMP sequenceDecs_decode_bmi2_adjust_offset_nonzero
+
+sequenceDecs_decode_bmi2_adjust_offset_maybezero:
+ TESTQ CX, CX
+ JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero
+ MOVQ R10, CX
+ JMP sequenceDecs_decode_bmi2_adjust_end
+
+sequenceDecs_decode_bmi2_adjust_offset_nonzero:
+ CMPQ CX, $0x01
+ JB sequenceDecs_decode_bmi2_adjust_zero
+ JEQ sequenceDecs_decode_bmi2_adjust_one
+ CMPQ CX, $0x02
+ JA sequenceDecs_decode_bmi2_adjust_three
+ JMP sequenceDecs_decode_bmi2_adjust_two
+
+sequenceDecs_decode_bmi2_adjust_zero:
+ MOVQ R10, R13
+ JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_bmi2_adjust_one:
+ MOVQ R11, R13
+ JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_bmi2_adjust_two:
+ MOVQ R12, R13
+ JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_bmi2_adjust_three:
+ LEAQ -1(R10), R13
+
+sequenceDecs_decode_bmi2_adjust_test_temp_valid:
+ TESTQ R13, R13
+ JNZ sequenceDecs_decode_bmi2_adjust_temp_valid
+ MOVQ $0x00000001, R13
+
+sequenceDecs_decode_bmi2_adjust_temp_valid:
+ CMPQ CX, $0x01
+ CMOVQNE R11, R12
+ MOVQ R10, R11
+ MOVQ R13, R10
+ MOVQ R13, CX
+
+sequenceDecs_decode_bmi2_adjust_end:
+ MOVQ CX, 16(R9)
+
+ // Check values
+ MOVQ 8(R9), R13
+ MOVQ (R9), R14
+ LEAQ (R13)(R14*1), R15
+ MOVQ s+0(FP), BP
+ ADDQ R15, 256(BP)
+ MOVQ ctx+16(FP), R15
+ SUBQ R14, 128(R15)
+ JS error_not_enough_literals
+ CMPQ R13, $0x00020002
+ JA sequenceDecs_decode_bmi2_error_match_len_too_big
+ TESTQ CX, CX
+ JNZ sequenceDecs_decode_bmi2_match_len_ofs_ok
+ TESTQ R13, R13
+ JNZ sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch
+
+sequenceDecs_decode_bmi2_match_len_ofs_ok:
+ ADDQ $0x18, R9
+ MOVQ ctx+16(FP), CX
+ DECQ 96(CX)
+ JNS sequenceDecs_decode_bmi2_main_loop
+ MOVQ s+0(FP), CX
+ MOVQ R10, 144(CX)
+ MOVQ R11, 152(CX)
+ MOVQ R12, 160(CX)
+ MOVQ br+8(FP), CX
+ MOVQ AX, 32(CX)
+ MOVB DL, 40(CX)
+ MOVQ BX, 24(CX)
+
+ // Return success
+ MOVQ $0x00000000, ret+24(FP)
+ RET
+
+ // Return with match length error
+sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch:
+ MOVQ $0x00000001, ret+24(FP)
+ RET
+
+ // Return with match too long error
+sequenceDecs_decode_bmi2_error_match_len_too_big:
+ MOVQ $0x00000002, ret+24(FP)
+ RET
+
+ // Return with match offset too long error
+ MOVQ $0x00000003, ret+24(FP)
+ RET
+
+ // Return with not enough literals error
+error_not_enough_literals:
+ MOVQ $0x00000004, ret+24(FP)
+ RET
+
+ // Return with not enough output space error
+ MOVQ $0x00000005, ret+24(FP)
+ RET
+
+// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
+// Requires: BMI, BMI2, CMOV
+TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
+ MOVQ br+8(FP), CX
+ MOVQ 32(CX), AX
+ MOVBQZX 40(CX), DX
+ MOVQ 24(CX), BX
+ MOVQ (CX), CX
+ ADDQ BX, CX
+ MOVQ CX, (SP)
+ MOVQ ctx+16(FP), CX
+ MOVQ 72(CX), SI
+ MOVQ 80(CX), DI
+ MOVQ 88(CX), R8
+ MOVQ 104(CX), R9
+ MOVQ s+0(FP), CX
+ MOVQ 144(CX), R10
+ MOVQ 152(CX), R11
+ MOVQ 160(CX), R12
+
+sequenceDecs_decode_56_bmi2_main_loop:
+ MOVQ (SP), R13
+
+ // Fill bitreader to have enough for the offset and match length.
+ CMPQ BX, $0x08
+ JL sequenceDecs_decode_56_bmi2_fill_byte_by_byte
+ MOVQ DX, CX
+ SHRQ $0x03, CX
+ SUBQ CX, R13
+ MOVQ (R13), AX
+ SUBQ CX, BX
+ ANDQ $0x07, DX
+ JMP sequenceDecs_decode_56_bmi2_fill_end
+
+sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
+ CMPQ BX, $0x00
+ JLE sequenceDecs_decode_56_bmi2_fill_end
+ CMPQ DX, $0x07
+ JLE sequenceDecs_decode_56_bmi2_fill_end
+ SHLQ $0x08, AX
+ SUBQ $0x01, R13
+ SUBQ $0x01, BX
+ SUBQ $0x08, DX
+ MOVBQZX (R13), CX
+ ORQ CX, AX
+ JMP sequenceDecs_decode_56_bmi2_fill_byte_by_byte
+
+sequenceDecs_decode_56_bmi2_fill_end:
+ // Update offset
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R14
+ MOVQ AX, R15
+ LEAQ (DX)(R14*1), CX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ MOVQ CX, DX
+ MOVQ R8, CX
+ SHRQ $0x20, CX
+ ADDQ R15, CX
+ MOVQ CX, 16(R9)
+
+ // Update match length
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, DI, R14
+ MOVQ AX, R15
+ LEAQ (DX)(R14*1), CX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ MOVQ CX, DX
+ MOVQ DI, CX
+ SHRQ $0x20, CX
+ ADDQ R15, CX
+ MOVQ CX, 8(R9)
+
+ // Update literal length
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, SI, R14
+ MOVQ AX, R15
+ LEAQ (DX)(R14*1), CX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ MOVQ CX, DX
+ MOVQ SI, CX
+ SHRQ $0x20, CX
+ ADDQ R15, CX
+ MOVQ CX, (R9)
+
+ // Fill bitreader for state updates
+ MOVQ R13, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R13
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decode_56_bmi2_skip_update
+
+ // Update Literal Length State
+ MOVBQZX SI, R14
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, SI, SI
+ LEAQ (DX)(R14*1), CX
+ MOVQ AX, R15
+ MOVQ CX, DX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ ADDQ R15, SI
+
+ // Load ctx.llTable
+ MOVQ ctx+16(FP), CX
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
+
+ // Update Match Length State
+ MOVBQZX DI, R14
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, DI, DI
+ LEAQ (DX)(R14*1), CX
+ MOVQ AX, R15
+ MOVQ CX, DX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ ADDQ R15, DI
+
+ // Load ctx.mlTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 24(CX), CX
+ MOVQ (CX)(DI*8), DI
+
+ // Update Offset State
+ MOVBQZX R8, R14
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, R8, R8
+ LEAQ (DX)(R14*1), CX
+ MOVQ AX, R15
+ MOVQ CX, DX
+ ROLQ CL, R15
+ BZHIQ R14, R15, R15
+ ADDQ R15, R8
+
+ // Load ctx.ofTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
+
+sequenceDecs_decode_56_bmi2_skip_update:
+ // Adjust offset
+ MOVQ 16(R9), CX
+ CMPQ R13, $0x01
+ JBE sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0
+ MOVQ R11, R12
+ MOVQ R10, R11
+ MOVQ CX, R10
+ JMP sequenceDecs_decode_56_bmi2_adjust_end
+
+sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0:
+ CMPQ (R9), $0x00000000
+ JNE sequenceDecs_decode_56_bmi2_adjust_offset_maybezero
+ INCQ CX
+ JMP sequenceDecs_decode_56_bmi2_adjust_offset_nonzero
+
+sequenceDecs_decode_56_bmi2_adjust_offset_maybezero:
+ TESTQ CX, CX
+ JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero
+ MOVQ R10, CX
+ JMP sequenceDecs_decode_56_bmi2_adjust_end
+
+sequenceDecs_decode_56_bmi2_adjust_offset_nonzero:
+ CMPQ CX, $0x01
+ JB sequenceDecs_decode_56_bmi2_adjust_zero
+ JEQ sequenceDecs_decode_56_bmi2_adjust_one
+ CMPQ CX, $0x02
+ JA sequenceDecs_decode_56_bmi2_adjust_three
+ JMP sequenceDecs_decode_56_bmi2_adjust_two
+
+sequenceDecs_decode_56_bmi2_adjust_zero:
+ MOVQ R10, R13
+ JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_56_bmi2_adjust_one:
+ MOVQ R11, R13
+ JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_56_bmi2_adjust_two:
+ MOVQ R12, R13
+ JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid
+
+sequenceDecs_decode_56_bmi2_adjust_three:
+ LEAQ -1(R10), R13
+
+sequenceDecs_decode_56_bmi2_adjust_test_temp_valid:
+ TESTQ R13, R13
+ JNZ sequenceDecs_decode_56_bmi2_adjust_temp_valid
+ MOVQ $0x00000001, R13
+
+sequenceDecs_decode_56_bmi2_adjust_temp_valid:
+ CMPQ CX, $0x01
+ CMOVQNE R11, R12
+ MOVQ R10, R11
+ MOVQ R13, R10
+ MOVQ R13, CX
+
+sequenceDecs_decode_56_bmi2_adjust_end:
+ MOVQ CX, 16(R9)
+
+ // Check values
+ MOVQ 8(R9), R13
+ MOVQ (R9), R14
+ LEAQ (R13)(R14*1), R15
+ MOVQ s+0(FP), BP
+ ADDQ R15, 256(BP)
+ MOVQ ctx+16(FP), R15
+ SUBQ R14, 128(R15)
+ JS error_not_enough_literals
+ CMPQ R13, $0x00020002
+ JA sequenceDecs_decode_56_bmi2_error_match_len_too_big
+ TESTQ CX, CX
+ JNZ sequenceDecs_decode_56_bmi2_match_len_ofs_ok
+ TESTQ R13, R13
+ JNZ sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch
+
+sequenceDecs_decode_56_bmi2_match_len_ofs_ok:
+ ADDQ $0x18, R9
+ MOVQ ctx+16(FP), CX
+ DECQ 96(CX)
+ JNS sequenceDecs_decode_56_bmi2_main_loop
+ MOVQ s+0(FP), CX
+ MOVQ R10, 144(CX)
+ MOVQ R11, 152(CX)
+ MOVQ R12, 160(CX)
+ MOVQ br+8(FP), CX
+ MOVQ AX, 32(CX)
+ MOVB DL, 40(CX)
+ MOVQ BX, 24(CX)
+
+ // Return success
+ MOVQ $0x00000000, ret+24(FP)
+ RET
+
+ // Return with match length error
+sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch:
+ MOVQ $0x00000001, ret+24(FP)
+ RET
+
+ // Return with match too long error
+sequenceDecs_decode_56_bmi2_error_match_len_too_big:
+ MOVQ $0x00000002, ret+24(FP)
+ RET
+
+ // Return with match offset too long error
+ MOVQ $0x00000003, ret+24(FP)
+ RET
+
+ // Return with not enough literals error
+error_not_enough_literals:
+ MOVQ $0x00000004, ret+24(FP)
+ RET
+
+ // Return with not enough output space error
+ MOVQ $0x00000005, ret+24(FP)
+ RET
+
+// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
+// Requires: SSE
+TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
+ MOVQ ctx+0(FP), R10
+ MOVQ 8(R10), CX
+ TESTQ CX, CX
+ JZ empty_seqs
+ MOVQ (R10), AX
+ MOVQ 24(R10), DX
+ MOVQ 32(R10), BX
+ MOVQ 80(R10), SI
+ MOVQ 104(R10), DI
+ MOVQ 120(R10), R8
+ MOVQ 56(R10), R9
+ MOVQ 64(R10), R10
+ ADDQ R10, R9
+
+ // seqsBase += 24 * seqIndex
+ LEAQ (DX)(DX*2), R11
+ SHLQ $0x03, R11
+ ADDQ R11, AX
+
+ // outBase += outPosition
+ ADDQ DI, BX
+
+main_loop:
+ MOVQ (AX), R11
+ MOVQ 16(AX), R12
+ MOVQ 8(AX), R13
+
+ // Copy literals
+ TESTQ R11, R11
+ JZ check_offset
+ XORQ R14, R14
+ TESTQ $0x00000001, R11
+ JZ copy_1_word
+ MOVB (SI)(R14*1), R15
+ MOVB R15, (BX)(R14*1)
+ ADDQ $0x01, R14
+
+copy_1_word:
+ TESTQ $0x00000002, R11
+ JZ copy_1_dword
+ MOVW (SI)(R14*1), R15
+ MOVW R15, (BX)(R14*1)
+ ADDQ $0x02, R14
+
+copy_1_dword:
+ TESTQ $0x00000004, R11
+ JZ copy_1_qword
+ MOVL (SI)(R14*1), R15
+ MOVL R15, (BX)(R14*1)
+ ADDQ $0x04, R14
+
+copy_1_qword:
+ TESTQ $0x00000008, R11
+ JZ copy_1_test
+ MOVQ (SI)(R14*1), R15
+ MOVQ R15, (BX)(R14*1)
+ ADDQ $0x08, R14
+ JMP copy_1_test
+
+copy_1:
+ MOVUPS (SI)(R14*1), X0
+ MOVUPS X0, (BX)(R14*1)
+ ADDQ $0x10, R14
+
+copy_1_test:
+ CMPQ R14, R11
+ JB copy_1
+ ADDQ R11, SI
+ ADDQ R11, BX
+ ADDQ R11, DI
+
+ // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+ LEAQ (DI)(R10*1), R11
+ CMPQ R12, R11
+ JG error_match_off_too_big
+ CMPQ R12, R8
+ JG error_match_off_too_big
+
+ // Copy match from history
+ MOVQ R12, R11
+ SUBQ DI, R11
+ JLS copy_match
+ MOVQ R9, R14
+ SUBQ R11, R14
+ CMPQ R13, R11
+ JGE copy_all_from_history
+ XORQ R11, R11
+ TESTQ $0x00000001, R13
+ JZ copy_4_word
+ MOVB (R14)(R11*1), R12
+ MOVB R12, (BX)(R11*1)
+ ADDQ $0x01, R11
+
+copy_4_word:
+ TESTQ $0x00000002, R13
+ JZ copy_4_dword
+ MOVW (R14)(R11*1), R12
+ MOVW R12, (BX)(R11*1)
+ ADDQ $0x02, R11
+
+copy_4_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_4_qword
+ MOVL (R14)(R11*1), R12
+ MOVL R12, (BX)(R11*1)
+ ADDQ $0x04, R11
+
+copy_4_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_4_test
+ MOVQ (R14)(R11*1), R12
+ MOVQ R12, (BX)(R11*1)
+ ADDQ $0x08, R11
+ JMP copy_4_test
+
+copy_4:
+ MOVUPS (R14)(R11*1), X0
+ MOVUPS X0, (BX)(R11*1)
+ ADDQ $0x10, R11
+
+copy_4_test:
+ CMPQ R11, R13
+ JB copy_4
+ ADDQ R13, DI
+ ADDQ R13, BX
+ ADDQ $0x18, AX
+ INCQ DX
+ CMPQ DX, CX
+ JB main_loop
+ JMP loop_finished
+
+copy_all_from_history:
+ XORQ R15, R15
+ TESTQ $0x00000001, R11
+ JZ copy_5_word
+ MOVB (R14)(R15*1), BP
+ MOVB BP, (BX)(R15*1)
+ ADDQ $0x01, R15
+
+copy_5_word:
+ TESTQ $0x00000002, R11
+ JZ copy_5_dword
+ MOVW (R14)(R15*1), BP
+ MOVW BP, (BX)(R15*1)
+ ADDQ $0x02, R15
+
+copy_5_dword:
+ TESTQ $0x00000004, R11
+ JZ copy_5_qword
+ MOVL (R14)(R15*1), BP
+ MOVL BP, (BX)(R15*1)
+ ADDQ $0x04, R15
+
+copy_5_qword:
+ TESTQ $0x00000008, R11
+ JZ copy_5_test
+ MOVQ (R14)(R15*1), BP
+ MOVQ BP, (BX)(R15*1)
+ ADDQ $0x08, R15
+ JMP copy_5_test
+
+copy_5:
+ MOVUPS (R14)(R15*1), X0
+ MOVUPS X0, (BX)(R15*1)
+ ADDQ $0x10, R15
+
+copy_5_test:
+ CMPQ R15, R11
+ JB copy_5
+ ADDQ R11, BX
+ ADDQ R11, DI
+ SUBQ R11, R13
+
+ // Copy match from the current buffer
+copy_match:
+ TESTQ R13, R13
+ JZ handle_loop
+ MOVQ BX, R11
+ SUBQ R12, R11
+
+ // ml <= mo
+ CMPQ R13, R12
+ JA copy_overlapping_match
+
+ // Copy non-overlapping match
+ XORQ R12, R12
+
+copy_2:
+ MOVUPS (R11)(R12*1), X0
+ MOVUPS X0, (BX)(R12*1)
+ ADDQ $0x10, R12
+ CMPQ R12, R13
+ JB copy_2
+ ADDQ R13, BX
+ ADDQ R13, DI
+ JMP handle_loop
+
+ // Copy overlapping match
+copy_overlapping_match:
+ XORQ R12, R12
+
+copy_slow_3:
+ MOVB (R11)(R12*1), R14
+ MOVB R14, (BX)(R12*1)
+ INCQ R12
+ CMPQ R12, R13
+ JB copy_slow_3
+ ADDQ R13, BX
+ ADDQ R13, DI
+
+handle_loop:
+ ADDQ $0x18, AX
+ INCQ DX
+ CMPQ DX, CX
+ JB main_loop
+
+loop_finished:
+ // Return value
+ MOVB $0x01, ret+8(FP)
+
+ // Update the context
+ MOVQ ctx+0(FP), AX
+ MOVQ DX, 24(AX)
+ MOVQ DI, 104(AX)
+ MOVQ 80(AX), CX
+ SUBQ CX, SI
+ MOVQ SI, 112(AX)
+ RET
+
+error_match_off_too_big:
+ // Return value
+ MOVB $0x00, ret+8(FP)
+
+ // Update the context
+ MOVQ ctx+0(FP), AX
+ MOVQ DX, 24(AX)
+ MOVQ DI, 104(AX)
+ MOVQ 80(AX), CX
+ SUBQ CX, SI
+ MOVQ SI, 112(AX)
+ RET
+
+empty_seqs:
+ // Return value
+ MOVB $0x01, ret+8(FP)
+ RET
+
+// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+// Requires: CMOV, SSE
+TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
+ MOVQ br+8(FP), AX
+ MOVQ 32(AX), DX
+ MOVBQZX 40(AX), BX
+ MOVQ 24(AX), SI
+ MOVQ (AX), AX
+ ADDQ SI, AX
+ MOVQ AX, (SP)
+ MOVQ ctx+16(FP), AX
+ MOVQ 72(AX), DI
+ MOVQ 80(AX), R8
+ MOVQ 88(AX), R9
+ MOVQ 112(AX), R10
+ MOVQ 128(AX), CX
+ MOVQ CX, 32(SP)
+ MOVQ 144(AX), R11
+ MOVQ 136(AX), R12
+ MOVQ 200(AX), CX
+ MOVQ CX, 56(SP)
+ MOVQ 176(AX), CX
+ MOVQ CX, 48(SP)
+ MOVQ 184(AX), AX
+ MOVQ AX, 40(SP)
+ MOVQ 40(SP), AX
+ ADDQ AX, 48(SP)
+
+ // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ ADDQ R10, 32(SP)
+
+ // outBase += outPosition
+ ADDQ R12, R10
+
+sequenceDecs_decodeSync_amd64_main_loop:
+ MOVQ (SP), R13
+
+ // Fill bitreader to have enough for the offset and match length.
+ CMPQ SI, $0x08
+ JL sequenceDecs_decodeSync_amd64_fill_byte_by_byte
+ MOVQ BX, AX
+ SHRQ $0x03, AX
+ SUBQ AX, R13
+ MOVQ (R13), DX
+ SUBQ AX, SI
+ ANDQ $0x07, BX
+ JMP sequenceDecs_decodeSync_amd64_fill_end
+
+sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
+ CMPQ SI, $0x00
+ JLE sequenceDecs_decodeSync_amd64_fill_end
+ CMPQ BX, $0x07
+ JLE sequenceDecs_decodeSync_amd64_fill_end
+ SHLQ $0x08, DX
+ SUBQ $0x01, R13
+ SUBQ $0x01, SI
+ SUBQ $0x08, BX
+ MOVBQZX (R13), AX
+ ORQ AX, DX
+ JMP sequenceDecs_decodeSync_amd64_fill_byte_by_byte
+
+sequenceDecs_decodeSync_amd64_fill_end:
+ // Update offset
+ MOVQ R9, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R14
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R14
+ ADDQ R14, AX
+ MOVQ AX, 8(SP)
+
+ // Update match length
+ MOVQ R8, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R14
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R14
+ ADDQ R14, AX
+ MOVQ AX, 16(SP)
+
+ // Fill bitreader to have enough for the remaining
+ CMPQ SI, $0x08
+ JL sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte
+ MOVQ BX, AX
+ SHRQ $0x03, AX
+ SUBQ AX, R13
+ MOVQ (R13), DX
+ SUBQ AX, SI
+ ANDQ $0x07, BX
+ JMP sequenceDecs_decodeSync_amd64_fill_2_end
+
+sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
+ CMPQ SI, $0x00
+ JLE sequenceDecs_decodeSync_amd64_fill_2_end
+ CMPQ BX, $0x07
+ JLE sequenceDecs_decodeSync_amd64_fill_2_end
+ SHLQ $0x08, DX
+ SUBQ $0x01, R13
+ SUBQ $0x01, SI
+ SUBQ $0x08, BX
+ MOVBQZX (R13), AX
+ ORQ AX, DX
+ JMP sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte
+
+sequenceDecs_decodeSync_amd64_fill_2_end:
+ // Update literal length
+ MOVQ DI, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R14
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R14
+ ADDQ R14, AX
+ MOVQ AX, 24(SP)
+
+ // Fill bitreader for state updates
+ MOVQ R13, (SP)
+ MOVQ R9, AX
+ SHRQ $0x08, AX
+ MOVBQZX AL, AX
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decodeSync_amd64_skip_update
+
+ // Update Literal Length State
+ MOVBQZX DI, R13
+ SHRQ $0x10, DI
+ MOVWQZX DI, DI
+ CMPQ R13, $0x00
+ JZ sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R13, BX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVQ R13, CX
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, DI
+
+sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero:
+ // Load ctx.llTable
+ MOVQ ctx+16(FP), CX
+ MOVQ (CX), CX
+ MOVQ (CX)(DI*8), DI
+
+ // Update Match Length State
+ MOVBQZX R8, R13
+ SHRQ $0x10, R8
+ MOVWQZX R8, R8
+ CMPQ R13, $0x00
+ JZ sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R13, BX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVQ R13, CX
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, R8
+
+sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero:
+ // Load ctx.mlTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 24(CX), CX
+ MOVQ (CX)(R8*8), R8
+
+ // Update Offset State
+ MOVBQZX R9, R13
+ SHRQ $0x10, R9
+ MOVWQZX R9, R9
+ CMPQ R13, $0x00
+ JZ sequenceDecs_decodeSync_amd64_ofState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R13, BX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVQ R13, CX
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, R9
+
+sequenceDecs_decodeSync_amd64_ofState_updateState_skip_zero:
+ // Load ctx.ofTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R9*8), R9
+
+sequenceDecs_decodeSync_amd64_skip_update:
+ // Adjust offset
+ MOVQ s+0(FP), CX
+ MOVQ 8(SP), R13
+ CMPQ AX, $0x01
+ JBE sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0
+ MOVUPS 144(CX), X0
+ MOVQ R13, 144(CX)
+ MOVUPS X0, 152(CX)
+ JMP sequenceDecs_decodeSync_amd64_adjust_end
+
+sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0:
+ CMPQ 24(SP), $0x00000000
+ JNE sequenceDecs_decodeSync_amd64_adjust_offset_maybezero
+ INCQ R13
+ JMP sequenceDecs_decodeSync_amd64_adjust_offset_nonzero
+
+sequenceDecs_decodeSync_amd64_adjust_offset_maybezero:
+ TESTQ R13, R13
+ JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero
+ MOVQ 144(CX), R13
+ JMP sequenceDecs_decodeSync_amd64_adjust_end
+
+sequenceDecs_decodeSync_amd64_adjust_offset_nonzero:
+ MOVQ R13, AX
+ XORQ R14, R14
+ MOVQ $-1, R15
+ CMPQ R13, $0x03
+ CMOVQEQ R14, AX
+ CMOVQEQ R15, R14
+ LEAQ 144(CX), R15
+ ADDQ (R15)(AX*8), R14
+ JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid
+ MOVQ $0x00000001, R14
+
+sequenceDecs_decodeSync_amd64_adjust_temp_valid:
+ CMPQ R13, $0x01
+ JZ sequenceDecs_decodeSync_amd64_adjust_skip
+ MOVQ 152(CX), AX
+ MOVQ AX, 160(CX)
+
+sequenceDecs_decodeSync_amd64_adjust_skip:
+ MOVQ 144(CX), AX
+ MOVQ AX, 152(CX)
+ MOVQ R14, 144(CX)
+ MOVQ R14, R13
+
+sequenceDecs_decodeSync_amd64_adjust_end:
+ MOVQ R13, 8(SP)
+
+ // Check values
+ MOVQ 16(SP), AX
+ MOVQ 24(SP), CX
+ LEAQ (AX)(CX*1), R14
+ MOVQ s+0(FP), R15
+ ADDQ R14, 256(R15)
+ MOVQ ctx+16(FP), R14
+ SUBQ CX, 104(R14)
+ JS error_not_enough_literals
+ CMPQ AX, $0x00020002
+ JA sequenceDecs_decodeSync_amd64_error_match_len_too_big
+ TESTQ R13, R13
+ JNZ sequenceDecs_decodeSync_amd64_match_len_ofs_ok
+ TESTQ AX, AX
+ JNZ sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch
+
+sequenceDecs_decodeSync_amd64_match_len_ofs_ok:
+ MOVQ 24(SP), AX
+ MOVQ 8(SP), CX
+ MOVQ 16(SP), R13
+
+ // Check if we have enough space in s.out
+ LEAQ (AX)(R13*1), R14
+ ADDQ R10, R14
+ CMPQ R14, 32(SP)
+ JA error_not_enough_space
+
+ // Copy literals
+ TESTQ AX, AX
+ JZ check_offset
+ XORQ R14, R14
+ TESTQ $0x00000001, AX
+ JZ copy_1_word
+ MOVB (R11)(R14*1), R15
+ MOVB R15, (R10)(R14*1)
+ ADDQ $0x01, R14
+
+copy_1_word:
+ TESTQ $0x00000002, AX
+ JZ copy_1_dword
+ MOVW (R11)(R14*1), R15
+ MOVW R15, (R10)(R14*1)
+ ADDQ $0x02, R14
+
+copy_1_dword:
+ TESTQ $0x00000004, AX
+ JZ copy_1_qword
+ MOVL (R11)(R14*1), R15
+ MOVL R15, (R10)(R14*1)
+ ADDQ $0x04, R14
+
+copy_1_qword:
+ TESTQ $0x00000008, AX
+ JZ copy_1_test
+ MOVQ (R11)(R14*1), R15
+ MOVQ R15, (R10)(R14*1)
+ ADDQ $0x08, R14
+ JMP copy_1_test
+
+copy_1:
+ MOVUPS (R11)(R14*1), X0
+ MOVUPS X0, (R10)(R14*1)
+ ADDQ $0x10, R14
+
+copy_1_test:
+ CMPQ R14, AX
+ JB copy_1
+ ADDQ AX, R11
+ ADDQ AX, R10
+ ADDQ AX, R12
+
+ // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+ MOVQ R12, AX
+ ADDQ 40(SP), AX
+ CMPQ CX, AX
+ JG error_match_off_too_big
+ CMPQ CX, 56(SP)
+ JG error_match_off_too_big
+
+ // Copy match from history
+ MOVQ CX, AX
+ SUBQ R12, AX
+ JLS copy_match
+ MOVQ 48(SP), R14
+ SUBQ AX, R14
+ CMPQ R13, AX
+ JGE copy_all_from_history
+ XORQ AX, AX
+ TESTQ $0x00000001, R13
+ JZ copy_4_word
+ MOVB (R14)(AX*1), CL
+ MOVB CL, (R10)(AX*1)
+ ADDQ $0x01, AX
+
+copy_4_word:
+ TESTQ $0x00000002, R13
+ JZ copy_4_dword
+ MOVW (R14)(AX*1), CX
+ MOVW CX, (R10)(AX*1)
+ ADDQ $0x02, AX
+
+copy_4_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_4_qword
+ MOVL (R14)(AX*1), CX
+ MOVL CX, (R10)(AX*1)
+ ADDQ $0x04, AX
+
+copy_4_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_4_test
+ MOVQ (R14)(AX*1), CX
+ MOVQ CX, (R10)(AX*1)
+ ADDQ $0x08, AX
+ JMP copy_4_test
+
+copy_4:
+ MOVUPS (R14)(AX*1), X0
+ MOVUPS X0, (R10)(AX*1)
+ ADDQ $0x10, AX
+
+copy_4_test:
+ CMPQ AX, R13
+ JB copy_4
+ ADDQ R13, R12
+ ADDQ R13, R10
+ JMP handle_loop
+ JMP loop_finished
+
+copy_all_from_history:
+ XORQ R15, R15
+ TESTQ $0x00000001, AX
+ JZ copy_5_word
+ MOVB (R14)(R15*1), BP
+ MOVB BP, (R10)(R15*1)
+ ADDQ $0x01, R15
+
+copy_5_word:
+ TESTQ $0x00000002, AX
+ JZ copy_5_dword
+ MOVW (R14)(R15*1), BP
+ MOVW BP, (R10)(R15*1)
+ ADDQ $0x02, R15
+
+copy_5_dword:
+ TESTQ $0x00000004, AX
+ JZ copy_5_qword
+ MOVL (R14)(R15*1), BP
+ MOVL BP, (R10)(R15*1)
+ ADDQ $0x04, R15
+
+copy_5_qword:
+ TESTQ $0x00000008, AX
+ JZ copy_5_test
+ MOVQ (R14)(R15*1), BP
+ MOVQ BP, (R10)(R15*1)
+ ADDQ $0x08, R15
+ JMP copy_5_test
+
+copy_5:
+ MOVUPS (R14)(R15*1), X0
+ MOVUPS X0, (R10)(R15*1)
+ ADDQ $0x10, R15
+
+copy_5_test:
+ CMPQ R15, AX
+ JB copy_5
+ ADDQ AX, R10
+ ADDQ AX, R12
+ SUBQ AX, R13
+
+ // Copy match from the current buffer
+copy_match:
+ TESTQ R13, R13
+ JZ handle_loop
+ MOVQ R10, AX
+ SUBQ CX, AX
+
+ // ml <= mo
+ CMPQ R13, CX
+ JA copy_overlapping_match
+
+ // Copy non-overlapping match
+ XORQ CX, CX
+
+copy_2:
+ MOVUPS (AX)(CX*1), X0
+ MOVUPS X0, (R10)(CX*1)
+ ADDQ $0x10, CX
+ CMPQ CX, R13
+ JB copy_2
+ ADDQ R13, R10
+ ADDQ R13, R12
+ JMP handle_loop
+
+ // Copy overlapping match
+copy_overlapping_match:
+ XORQ CX, CX
+
+copy_slow_3:
+ MOVB (AX)(CX*1), R14
+ MOVB R14, (R10)(CX*1)
+ INCQ CX
+ CMPQ CX, R13
+ JB copy_slow_3
+ ADDQ R13, R10
+ ADDQ R13, R12
+
+handle_loop:
+ MOVQ ctx+16(FP), AX
+ DECQ 96(AX)
+ JNS sequenceDecs_decodeSync_amd64_main_loop
+
+loop_finished:
+ MOVQ br+8(FP), AX
+ MOVQ DX, 32(AX)
+ MOVB BL, 40(AX)
+ MOVQ SI, 24(AX)
+
+ // Update the context
+ MOVQ ctx+16(FP), AX
+ MOVQ R12, 136(AX)
+ MOVQ 144(AX), CX
+ SUBQ CX, R11
+ MOVQ R11, 168(AX)
+
+ // Return success
+ MOVQ $0x00000000, ret+24(FP)
+ RET
+
+ // Return with match length error
+sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch:
+ MOVQ 16(SP), AX
+ MOVQ ctx+16(FP), CX
+ MOVQ AX, 216(CX)
+ MOVQ $0x00000001, ret+24(FP)
+ RET
+
+ // Return with match too long error
+sequenceDecs_decodeSync_amd64_error_match_len_too_big:
+ MOVQ ctx+16(FP), AX
+ MOVQ 16(SP), CX
+ MOVQ CX, 216(AX)
+ MOVQ $0x00000002, ret+24(FP)
+ RET
+
+ // Return with match offset too long error
+error_match_off_too_big:
+ MOVQ ctx+16(FP), AX
+ MOVQ 8(SP), CX
+ MOVQ CX, 224(AX)
+ MOVQ R12, 136(AX)
+ MOVQ $0x00000003, ret+24(FP)
+ RET
+
+ // Return with not enough literals error
+error_not_enough_literals:
+ MOVQ ctx+16(FP), AX
+ MOVQ 24(SP), CX
+ MOVQ CX, 208(AX)
+ MOVQ $0x00000004, ret+24(FP)
+ RET
+
+ // Return with not enough output space error
+error_not_enough_space:
+ MOVQ ctx+16(FP), AX
+ MOVQ 24(SP), CX
+ MOVQ CX, 208(AX)
+ MOVQ 16(SP), CX
+ MOVQ CX, 216(AX)
+ MOVQ R12, 136(AX)
+ MOVQ $0x00000005, ret+24(FP)
+ RET
+
+// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+// Requires: BMI, BMI2, CMOV, SSE
+TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
+ MOVQ br+8(FP), CX
+ MOVQ 32(CX), AX
+ MOVBQZX 40(CX), DX
+ MOVQ 24(CX), BX
+ MOVQ (CX), CX
+ ADDQ BX, CX
+ MOVQ CX, (SP)
+ MOVQ ctx+16(FP), CX
+ MOVQ 72(CX), SI
+ MOVQ 80(CX), DI
+ MOVQ 88(CX), R8
+ MOVQ 112(CX), R9
+ MOVQ 128(CX), R10
+ MOVQ R10, 32(SP)
+ MOVQ 144(CX), R10
+ MOVQ 136(CX), R11
+ MOVQ 200(CX), R12
+ MOVQ R12, 56(SP)
+ MOVQ 176(CX), R12
+ MOVQ R12, 48(SP)
+ MOVQ 184(CX), CX
+ MOVQ CX, 40(SP)
+ MOVQ 40(SP), CX
+ ADDQ CX, 48(SP)
+
+ // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ ADDQ R9, 32(SP)
+
+ // outBase += outPosition
+ ADDQ R11, R9
+
+sequenceDecs_decodeSync_bmi2_main_loop:
+ MOVQ (SP), R12
+
+ // Fill bitreader to have enough for the offset and match length.
+ CMPQ BX, $0x08
+ JL sequenceDecs_decodeSync_bmi2_fill_byte_by_byte
+ MOVQ DX, CX
+ SHRQ $0x03, CX
+ SUBQ CX, R12
+ MOVQ (R12), AX
+ SUBQ CX, BX
+ ANDQ $0x07, DX
+ JMP sequenceDecs_decodeSync_bmi2_fill_end
+
+sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
+ CMPQ BX, $0x00
+ JLE sequenceDecs_decodeSync_bmi2_fill_end
+ CMPQ DX, $0x07
+ JLE sequenceDecs_decodeSync_bmi2_fill_end
+ SHLQ $0x08, AX
+ SUBQ $0x01, R12
+ SUBQ $0x01, BX
+ SUBQ $0x08, DX
+ MOVBQZX (R12), CX
+ ORQ CX, AX
+ JMP sequenceDecs_decodeSync_bmi2_fill_byte_by_byte
+
+sequenceDecs_decodeSync_bmi2_fill_end:
+ // Update offset
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R13
+ MOVQ AX, R14
+ LEAQ (DX)(R13*1), CX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ MOVQ CX, DX
+ MOVQ R8, CX
+ SHRQ $0x20, CX
+ ADDQ R14, CX
+ MOVQ CX, 8(SP)
+
+ // Update match length
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, DI, R13
+ MOVQ AX, R14
+ LEAQ (DX)(R13*1), CX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ MOVQ CX, DX
+ MOVQ DI, CX
+ SHRQ $0x20, CX
+ ADDQ R14, CX
+ MOVQ CX, 16(SP)
+
+ // Fill bitreader to have enough for the remaining
+ CMPQ BX, $0x08
+ JL sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte
+ MOVQ DX, CX
+ SHRQ $0x03, CX
+ SUBQ CX, R12
+ MOVQ (R12), AX
+ SUBQ CX, BX
+ ANDQ $0x07, DX
+ JMP sequenceDecs_decodeSync_bmi2_fill_2_end
+
+sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
+ CMPQ BX, $0x00
+ JLE sequenceDecs_decodeSync_bmi2_fill_2_end
+ CMPQ DX, $0x07
+ JLE sequenceDecs_decodeSync_bmi2_fill_2_end
+ SHLQ $0x08, AX
+ SUBQ $0x01, R12
+ SUBQ $0x01, BX
+ SUBQ $0x08, DX
+ MOVBQZX (R12), CX
+ ORQ CX, AX
+ JMP sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte
+
+sequenceDecs_decodeSync_bmi2_fill_2_end:
+ // Update literal length
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, SI, R13
+ MOVQ AX, R14
+ LEAQ (DX)(R13*1), CX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ MOVQ CX, DX
+ MOVQ SI, CX
+ SHRQ $0x20, CX
+ ADDQ R14, CX
+ MOVQ CX, 24(SP)
+
+ // Fill bitreader for state updates
+ MOVQ R12, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R12
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decodeSync_bmi2_skip_update
+
+ // Update Literal Length State
+ MOVBQZX SI, R13
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, SI, SI
+ LEAQ (DX)(R13*1), CX
+ MOVQ AX, R14
+ MOVQ CX, DX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ ADDQ R14, SI
+
+ // Load ctx.llTable
+ MOVQ ctx+16(FP), CX
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
+
+ // Update Match Length State
+ MOVBQZX DI, R13
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, DI, DI
+ LEAQ (DX)(R13*1), CX
+ MOVQ AX, R14
+ MOVQ CX, DX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ ADDQ R14, DI
+
+ // Load ctx.mlTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 24(CX), CX
+ MOVQ (CX)(DI*8), DI
+
+ // Update Offset State
+ MOVBQZX R8, R13
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, R8, R8
+ LEAQ (DX)(R13*1), CX
+ MOVQ AX, R14
+ MOVQ CX, DX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ ADDQ R14, R8
+
+ // Load ctx.ofTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
+
+sequenceDecs_decodeSync_bmi2_skip_update:
+ // Adjust offset
+ MOVQ s+0(FP), CX
+ MOVQ 8(SP), R13
+ CMPQ R12, $0x01
+ JBE sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0
+ MOVUPS 144(CX), X0
+ MOVQ R13, 144(CX)
+ MOVUPS X0, 152(CX)
+ JMP sequenceDecs_decodeSync_bmi2_adjust_end
+
+sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0:
+ CMPQ 24(SP), $0x00000000
+ JNE sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero
+ INCQ R13
+ JMP sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero
+
+sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero:
+ TESTQ R13, R13
+ JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero
+ MOVQ 144(CX), R13
+ JMP sequenceDecs_decodeSync_bmi2_adjust_end
+
+sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero:
+ MOVQ R13, R12
+ XORQ R14, R14
+ MOVQ $-1, R15
+ CMPQ R13, $0x03
+ CMOVQEQ R14, R12
+ CMOVQEQ R15, R14
+ LEAQ 144(CX), R15
+ ADDQ (R15)(R12*8), R14
+ JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid
+ MOVQ $0x00000001, R14
+
+sequenceDecs_decodeSync_bmi2_adjust_temp_valid:
+ CMPQ R13, $0x01
+ JZ sequenceDecs_decodeSync_bmi2_adjust_skip
+ MOVQ 152(CX), R12
+ MOVQ R12, 160(CX)
+
+sequenceDecs_decodeSync_bmi2_adjust_skip:
+ MOVQ 144(CX), R12
+ MOVQ R12, 152(CX)
+ MOVQ R14, 144(CX)
+ MOVQ R14, R13
+
+sequenceDecs_decodeSync_bmi2_adjust_end:
+ MOVQ R13, 8(SP)
+
+ // Check values
+ MOVQ 16(SP), CX
+ MOVQ 24(SP), R12
+ LEAQ (CX)(R12*1), R14
+ MOVQ s+0(FP), R15
+ ADDQ R14, 256(R15)
+ MOVQ ctx+16(FP), R14
+ SUBQ R12, 104(R14)
+ JS error_not_enough_literals
+ CMPQ CX, $0x00020002
+ JA sequenceDecs_decodeSync_bmi2_error_match_len_too_big
+ TESTQ R13, R13
+ JNZ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok
+ TESTQ CX, CX
+ JNZ sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch
+
+sequenceDecs_decodeSync_bmi2_match_len_ofs_ok:
+ MOVQ 24(SP), CX
+ MOVQ 8(SP), R12
+ MOVQ 16(SP), R13
+
+ // Check if we have enough space in s.out
+ LEAQ (CX)(R13*1), R14
+ ADDQ R9, R14
+ CMPQ R14, 32(SP)
+ JA error_not_enough_space
+
+ // Copy literals
+ TESTQ CX, CX
+ JZ check_offset
+ XORQ R14, R14
+ TESTQ $0x00000001, CX
+ JZ copy_1_word
+ MOVB (R10)(R14*1), R15
+ MOVB R15, (R9)(R14*1)
+ ADDQ $0x01, R14
+
+copy_1_word:
+ TESTQ $0x00000002, CX
+ JZ copy_1_dword
+ MOVW (R10)(R14*1), R15
+ MOVW R15, (R9)(R14*1)
+ ADDQ $0x02, R14
+
+copy_1_dword:
+ TESTQ $0x00000004, CX
+ JZ copy_1_qword
+ MOVL (R10)(R14*1), R15
+ MOVL R15, (R9)(R14*1)
+ ADDQ $0x04, R14
+
+copy_1_qword:
+ TESTQ $0x00000008, CX
+ JZ copy_1_test
+ MOVQ (R10)(R14*1), R15
+ MOVQ R15, (R9)(R14*1)
+ ADDQ $0x08, R14
+ JMP copy_1_test
+
+copy_1:
+ MOVUPS (R10)(R14*1), X0
+ MOVUPS X0, (R9)(R14*1)
+ ADDQ $0x10, R14
+
+copy_1_test:
+ CMPQ R14, CX
+ JB copy_1
+ ADDQ CX, R10
+ ADDQ CX, R9
+ ADDQ CX, R11
+
+ // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+ MOVQ R11, CX
+ ADDQ 40(SP), CX
+ CMPQ R12, CX
+ JG error_match_off_too_big
+ CMPQ R12, 56(SP)
+ JG error_match_off_too_big
+
+ // Copy match from history
+ MOVQ R12, CX
+ SUBQ R11, CX
+ JLS copy_match
+ MOVQ 48(SP), R14
+ SUBQ CX, R14
+ CMPQ R13, CX
+ JGE copy_all_from_history
+ XORQ CX, CX
+ TESTQ $0x00000001, R13
+ JZ copy_4_word
+ MOVB (R14)(CX*1), R12
+ MOVB R12, (R9)(CX*1)
+ ADDQ $0x01, CX
+
+copy_4_word:
+ TESTQ $0x00000002, R13
+ JZ copy_4_dword
+ MOVW (R14)(CX*1), R12
+ MOVW R12, (R9)(CX*1)
+ ADDQ $0x02, CX
+
+copy_4_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_4_qword
+ MOVL (R14)(CX*1), R12
+ MOVL R12, (R9)(CX*1)
+ ADDQ $0x04, CX
+
+copy_4_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_4_test
+ MOVQ (R14)(CX*1), R12
+ MOVQ R12, (R9)(CX*1)
+ ADDQ $0x08, CX
+ JMP copy_4_test
+
+copy_4:
+ MOVUPS (R14)(CX*1), X0
+ MOVUPS X0, (R9)(CX*1)
+ ADDQ $0x10, CX
+
+copy_4_test:
+ CMPQ CX, R13
+ JB copy_4
+ ADDQ R13, R11
+ ADDQ R13, R9
+ JMP handle_loop
+ JMP loop_finished
+
+copy_all_from_history:
+ XORQ R15, R15
+ TESTQ $0x00000001, CX
+ JZ copy_5_word
+ MOVB (R14)(R15*1), BP
+ MOVB BP, (R9)(R15*1)
+ ADDQ $0x01, R15
+
+copy_5_word:
+ TESTQ $0x00000002, CX
+ JZ copy_5_dword
+ MOVW (R14)(R15*1), BP
+ MOVW BP, (R9)(R15*1)
+ ADDQ $0x02, R15
+
+copy_5_dword:
+ TESTQ $0x00000004, CX
+ JZ copy_5_qword
+ MOVL (R14)(R15*1), BP
+ MOVL BP, (R9)(R15*1)
+ ADDQ $0x04, R15
+
+copy_5_qword:
+ TESTQ $0x00000008, CX
+ JZ copy_5_test
+ MOVQ (R14)(R15*1), BP
+ MOVQ BP, (R9)(R15*1)
+ ADDQ $0x08, R15
+ JMP copy_5_test
+
+copy_5:
+ MOVUPS (R14)(R15*1), X0
+ MOVUPS X0, (R9)(R15*1)
+ ADDQ $0x10, R15
+
+copy_5_test:
+ CMPQ R15, CX
+ JB copy_5
+ ADDQ CX, R9
+ ADDQ CX, R11
+ SUBQ CX, R13
+
+ // Copy match from the current buffer
+copy_match:
+ TESTQ R13, R13
+ JZ handle_loop
+ MOVQ R9, CX
+ SUBQ R12, CX
+
+ // ml <= mo
+ CMPQ R13, R12
+ JA copy_overlapping_match
+
+ // Copy non-overlapping match
+ XORQ R12, R12
+
+copy_2:
+ MOVUPS (CX)(R12*1), X0
+ MOVUPS X0, (R9)(R12*1)
+ ADDQ $0x10, R12
+ CMPQ R12, R13
+ JB copy_2
+ ADDQ R13, R9
+ ADDQ R13, R11
+ JMP handle_loop
+
+ // Copy overlapping match
+copy_overlapping_match:
+ XORQ R12, R12
+
+copy_slow_3:
+ MOVB (CX)(R12*1), R14
+ MOVB R14, (R9)(R12*1)
+ INCQ R12
+ CMPQ R12, R13
+ JB copy_slow_3
+ ADDQ R13, R9
+ ADDQ R13, R11
+
+handle_loop:
+ MOVQ ctx+16(FP), CX
+ DECQ 96(CX)
+ JNS sequenceDecs_decodeSync_bmi2_main_loop
+
+loop_finished:
+ MOVQ br+8(FP), CX
+ MOVQ AX, 32(CX)
+ MOVB DL, 40(CX)
+ MOVQ BX, 24(CX)
+
+ // Update the context
+ MOVQ ctx+16(FP), AX
+ MOVQ R11, 136(AX)
+ MOVQ 144(AX), CX
+ SUBQ CX, R10
+ MOVQ R10, 168(AX)
+
+ // Return success
+ MOVQ $0x00000000, ret+24(FP)
+ RET
+
+ // Return with match length error
+sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch:
+ MOVQ 16(SP), AX
+ MOVQ ctx+16(FP), CX
+ MOVQ AX, 216(CX)
+ MOVQ $0x00000001, ret+24(FP)
+ RET
+
+ // Return with match too long error
+sequenceDecs_decodeSync_bmi2_error_match_len_too_big:
+ MOVQ ctx+16(FP), AX
+ MOVQ 16(SP), CX
+ MOVQ CX, 216(AX)
+ MOVQ $0x00000002, ret+24(FP)
+ RET
+
+ // Return with match offset too long error
+error_match_off_too_big:
+ MOVQ ctx+16(FP), AX
+ MOVQ 8(SP), CX
+ MOVQ CX, 224(AX)
+ MOVQ R11, 136(AX)
+ MOVQ $0x00000003, ret+24(FP)
+ RET
+
+ // Return with not enough literals error
+error_not_enough_literals:
+ MOVQ ctx+16(FP), AX
+ MOVQ 24(SP), CX
+ MOVQ CX, 208(AX)
+ MOVQ $0x00000004, ret+24(FP)
+ RET
+
+ // Return with not enough output space error
+error_not_enough_space:
+ MOVQ ctx+16(FP), AX
+ MOVQ 24(SP), CX
+ MOVQ CX, 208(AX)
+ MOVQ 16(SP), CX
+ MOVQ CX, 216(AX)
+ MOVQ R11, 136(AX)
+ MOVQ $0x00000005, ret+24(FP)
+ RET
+
+// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+// Requires: CMOV, SSE
+TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
+ MOVQ br+8(FP), AX
+ MOVQ 32(AX), DX
+ MOVBQZX 40(AX), BX
+ MOVQ 24(AX), SI
+ MOVQ (AX), AX
+ ADDQ SI, AX
+ MOVQ AX, (SP)
+ MOVQ ctx+16(FP), AX
+ MOVQ 72(AX), DI
+ MOVQ 80(AX), R8
+ MOVQ 88(AX), R9
+ MOVQ 112(AX), R10
+ MOVQ 128(AX), CX
+ MOVQ CX, 32(SP)
+ MOVQ 144(AX), R11
+ MOVQ 136(AX), R12
+ MOVQ 200(AX), CX
+ MOVQ CX, 56(SP)
+ MOVQ 176(AX), CX
+ MOVQ CX, 48(SP)
+ MOVQ 184(AX), AX
+ MOVQ AX, 40(SP)
+ MOVQ 40(SP), AX
+ ADDQ AX, 48(SP)
+
+ // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ ADDQ R10, 32(SP)
+
+ // outBase += outPosition
+ ADDQ R12, R10
+
+sequenceDecs_decodeSync_safe_amd64_main_loop:
+ MOVQ (SP), R13
+
+ // Fill bitreader to have enough for the offset and match length.
+ CMPQ SI, $0x08
+ JL sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte
+ MOVQ BX, AX
+ SHRQ $0x03, AX
+ SUBQ AX, R13
+ MOVQ (R13), DX
+ SUBQ AX, SI
+ ANDQ $0x07, BX
+ JMP sequenceDecs_decodeSync_safe_amd64_fill_end
+
+sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
+ CMPQ SI, $0x00
+ JLE sequenceDecs_decodeSync_safe_amd64_fill_end
+ CMPQ BX, $0x07
+ JLE sequenceDecs_decodeSync_safe_amd64_fill_end
+ SHLQ $0x08, DX
+ SUBQ $0x01, R13
+ SUBQ $0x01, SI
+ SUBQ $0x08, BX
+ MOVBQZX (R13), AX
+ ORQ AX, DX
+ JMP sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte
+
+sequenceDecs_decodeSync_safe_amd64_fill_end:
+ // Update offset
+ MOVQ R9, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R14
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R14
+ ADDQ R14, AX
+ MOVQ AX, 8(SP)
+
+ // Update match length
+ MOVQ R8, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R14
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R14
+ ADDQ R14, AX
+ MOVQ AX, 16(SP)
+
+ // Fill bitreader to have enough for the remaining
+ CMPQ SI, $0x08
+ JL sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte
+ MOVQ BX, AX
+ SHRQ $0x03, AX
+ SUBQ AX, R13
+ MOVQ (R13), DX
+ SUBQ AX, SI
+ ANDQ $0x07, BX
+ JMP sequenceDecs_decodeSync_safe_amd64_fill_2_end
+
+sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
+ CMPQ SI, $0x00
+ JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end
+ CMPQ BX, $0x07
+ JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end
+ SHLQ $0x08, DX
+ SUBQ $0x01, R13
+ SUBQ $0x01, SI
+ SUBQ $0x08, BX
+ MOVBQZX (R13), AX
+ ORQ AX, DX
+ JMP sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte
+
+sequenceDecs_decodeSync_safe_amd64_fill_2_end:
+ // Update literal length
+ MOVQ DI, AX
+ MOVQ BX, CX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVB AH, CL
+ ADDQ CX, BX
+ NEGL CX
+ SHRQ CL, R14
+ SHRQ $0x20, AX
+ TESTQ CX, CX
+ CMOVQEQ CX, R14
+ ADDQ R14, AX
+ MOVQ AX, 24(SP)
+
+ // Fill bitreader for state updates
+ MOVQ R13, (SP)
+ MOVQ R9, AX
+ SHRQ $0x08, AX
+ MOVBQZX AL, AX
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decodeSync_safe_amd64_skip_update
+
+ // Update Literal Length State
+ MOVBQZX DI, R13
+ SHRQ $0x10, DI
+ MOVWQZX DI, DI
+ CMPQ R13, $0x00
+ JZ sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R13, BX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVQ R13, CX
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, DI
+
+sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero:
+ // Load ctx.llTable
+ MOVQ ctx+16(FP), CX
+ MOVQ (CX), CX
+ MOVQ (CX)(DI*8), DI
+
+ // Update Match Length State
+ MOVBQZX R8, R13
+ SHRQ $0x10, R8
+ MOVWQZX R8, R8
+ CMPQ R13, $0x00
+ JZ sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R13, BX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVQ R13, CX
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, R8
+
+sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero:
+ // Load ctx.mlTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 24(CX), CX
+ MOVQ (CX)(R8*8), R8
+
+ // Update Offset State
+ MOVBQZX R9, R13
+ SHRQ $0x10, R9
+ MOVWQZX R9, R9
+ CMPQ R13, $0x00
+ JZ sequenceDecs_decodeSync_safe_amd64_ofState_updateState_skip_zero
+ MOVQ BX, CX
+ ADDQ R13, BX
+ MOVQ DX, R14
+ SHLQ CL, R14
+ MOVQ R13, CX
+ NEGQ CX
+ SHRQ CL, R14
+ ADDQ R14, R9
+
+sequenceDecs_decodeSync_safe_amd64_ofState_updateState_skip_zero:
+ // Load ctx.ofTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R9*8), R9
+
+sequenceDecs_decodeSync_safe_amd64_skip_update:
+ // Adjust offset
+ MOVQ s+0(FP), CX
+ MOVQ 8(SP), R13
+ CMPQ AX, $0x01
+ JBE sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0
+ MOVUPS 144(CX), X0
+ MOVQ R13, 144(CX)
+ MOVUPS X0, 152(CX)
+ JMP sequenceDecs_decodeSync_safe_amd64_adjust_end
+
+sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0:
+ CMPQ 24(SP), $0x00000000
+ JNE sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero
+ INCQ R13
+ JMP sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero
+
+sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero:
+ TESTQ R13, R13
+ JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero
+ MOVQ 144(CX), R13
+ JMP sequenceDecs_decodeSync_safe_amd64_adjust_end
+
+sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero:
+ MOVQ R13, AX
+ XORQ R14, R14
+ MOVQ $-1, R15
+ CMPQ R13, $0x03
+ CMOVQEQ R14, AX
+ CMOVQEQ R15, R14
+ LEAQ 144(CX), R15
+ ADDQ (R15)(AX*8), R14
+ JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid
+ MOVQ $0x00000001, R14
+
+sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid:
+ CMPQ R13, $0x01
+ JZ sequenceDecs_decodeSync_safe_amd64_adjust_skip
+ MOVQ 152(CX), AX
+ MOVQ AX, 160(CX)
+
+sequenceDecs_decodeSync_safe_amd64_adjust_skip:
+ MOVQ 144(CX), AX
+ MOVQ AX, 152(CX)
+ MOVQ R14, 144(CX)
+ MOVQ R14, R13
+
+sequenceDecs_decodeSync_safe_amd64_adjust_end:
+ MOVQ R13, 8(SP)
+
+ // Check values
+ MOVQ 16(SP), AX
+ MOVQ 24(SP), CX
+ LEAQ (AX)(CX*1), R14
+ MOVQ s+0(FP), R15
+ ADDQ R14, 256(R15)
+ MOVQ ctx+16(FP), R14
+ SUBQ CX, 104(R14)
+ JS error_not_enough_literals
+ CMPQ AX, $0x00020002
+ JA sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big
+ TESTQ R13, R13
+ JNZ sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok
+ TESTQ AX, AX
+ JNZ sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch
+
+sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok:
+ MOVQ 24(SP), AX
+ MOVQ 8(SP), CX
+ MOVQ 16(SP), R13
+
+ // Check if we have enough space in s.out
+ LEAQ (AX)(R13*1), R14
+ ADDQ R10, R14
+ CMPQ R14, 32(SP)
+ JA error_not_enough_space
+
+ // Copy literals
+ TESTQ AX, AX
+ JZ check_offset
+ XORQ R14, R14
+ TESTQ $0x00000001, AX
+ JZ copy_1_word
+ MOVB (R11)(R14*1), R15
+ MOVB R15, (R10)(R14*1)
+ ADDQ $0x01, R14
+
+copy_1_word:
+ TESTQ $0x00000002, AX
+ JZ copy_1_dword
+ MOVW (R11)(R14*1), R15
+ MOVW R15, (R10)(R14*1)
+ ADDQ $0x02, R14
+
+copy_1_dword:
+ TESTQ $0x00000004, AX
+ JZ copy_1_qword
+ MOVL (R11)(R14*1), R15
+ MOVL R15, (R10)(R14*1)
+ ADDQ $0x04, R14
+
+copy_1_qword:
+ TESTQ $0x00000008, AX
+ JZ copy_1_test
+ MOVQ (R11)(R14*1), R15
+ MOVQ R15, (R10)(R14*1)
+ ADDQ $0x08, R14
+ JMP copy_1_test
+
+copy_1:
+ MOVUPS (R11)(R14*1), X0
+ MOVUPS X0, (R10)(R14*1)
+ ADDQ $0x10, R14
+
+copy_1_test:
+ CMPQ R14, AX
+ JB copy_1
+ ADDQ AX, R11
+ ADDQ AX, R10
+ ADDQ AX, R12
+
+ // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+ MOVQ R12, AX
+ ADDQ 40(SP), AX
+ CMPQ CX, AX
+ JG error_match_off_too_big
+ CMPQ CX, 56(SP)
+ JG error_match_off_too_big
+
+ // Copy match from history
+ MOVQ CX, AX
+ SUBQ R12, AX
+ JLS copy_match
+ MOVQ 48(SP), R14
+ SUBQ AX, R14
+ CMPQ R13, AX
+ JGE copy_all_from_history
+ XORQ AX, AX
+ TESTQ $0x00000001, R13
+ JZ copy_4_word
+ MOVB (R14)(AX*1), CL
+ MOVB CL, (R10)(AX*1)
+ ADDQ $0x01, AX
+
+copy_4_word:
+ TESTQ $0x00000002, R13
+ JZ copy_4_dword
+ MOVW (R14)(AX*1), CX
+ MOVW CX, (R10)(AX*1)
+ ADDQ $0x02, AX
+
+copy_4_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_4_qword
+ MOVL (R14)(AX*1), CX
+ MOVL CX, (R10)(AX*1)
+ ADDQ $0x04, AX
+
+copy_4_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_4_test
+ MOVQ (R14)(AX*1), CX
+ MOVQ CX, (R10)(AX*1)
+ ADDQ $0x08, AX
+ JMP copy_4_test
+
+copy_4:
+ MOVUPS (R14)(AX*1), X0
+ MOVUPS X0, (R10)(AX*1)
+ ADDQ $0x10, AX
+
+copy_4_test:
+ CMPQ AX, R13
+ JB copy_4
+ ADDQ R13, R12
+ ADDQ R13, R10
+ JMP handle_loop
+ JMP loop_finished
+
+copy_all_from_history:
+ XORQ R15, R15
+ TESTQ $0x00000001, AX
+ JZ copy_5_word
+ MOVB (R14)(R15*1), BP
+ MOVB BP, (R10)(R15*1)
+ ADDQ $0x01, R15
+
+copy_5_word:
+ TESTQ $0x00000002, AX
+ JZ copy_5_dword
+ MOVW (R14)(R15*1), BP
+ MOVW BP, (R10)(R15*1)
+ ADDQ $0x02, R15
+
+copy_5_dword:
+ TESTQ $0x00000004, AX
+ JZ copy_5_qword
+ MOVL (R14)(R15*1), BP
+ MOVL BP, (R10)(R15*1)
+ ADDQ $0x04, R15
+
+copy_5_qword:
+ TESTQ $0x00000008, AX
+ JZ copy_5_test
+ MOVQ (R14)(R15*1), BP
+ MOVQ BP, (R10)(R15*1)
+ ADDQ $0x08, R15
+ JMP copy_5_test
+
+copy_5:
+ MOVUPS (R14)(R15*1), X0
+ MOVUPS X0, (R10)(R15*1)
+ ADDQ $0x10, R15
+
+copy_5_test:
+ CMPQ R15, AX
+ JB copy_5
+ ADDQ AX, R10
+ ADDQ AX, R12
+ SUBQ AX, R13
+
+ // Copy match from the current buffer
+copy_match:
+ TESTQ R13, R13
+ JZ handle_loop
+ MOVQ R10, AX
+ SUBQ CX, AX
+
+ // ml <= mo
+ CMPQ R13, CX
+ JA copy_overlapping_match
+
+ // Copy non-overlapping match
+ XORQ CX, CX
+ TESTQ $0x00000001, R13
+ JZ copy_2_word
+ MOVB (AX)(CX*1), R14
+ MOVB R14, (R10)(CX*1)
+ ADDQ $0x01, CX
+
+copy_2_word:
+ TESTQ $0x00000002, R13
+ JZ copy_2_dword
+ MOVW (AX)(CX*1), R14
+ MOVW R14, (R10)(CX*1)
+ ADDQ $0x02, CX
+
+copy_2_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_2_qword
+ MOVL (AX)(CX*1), R14
+ MOVL R14, (R10)(CX*1)
+ ADDQ $0x04, CX
+
+copy_2_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_2_test
+ MOVQ (AX)(CX*1), R14
+ MOVQ R14, (R10)(CX*1)
+ ADDQ $0x08, CX
+ JMP copy_2_test
+
+copy_2:
+ MOVUPS (AX)(CX*1), X0
+ MOVUPS X0, (R10)(CX*1)
+ ADDQ $0x10, CX
+
+copy_2_test:
+ CMPQ CX, R13
+ JB copy_2
+ ADDQ R13, R10
+ ADDQ R13, R12
+ JMP handle_loop
+
+ // Copy overlapping match
+copy_overlapping_match:
+ XORQ CX, CX
+
+copy_slow_3:
+ MOVB (AX)(CX*1), R14
+ MOVB R14, (R10)(CX*1)
+ INCQ CX
+ CMPQ CX, R13
+ JB copy_slow_3
+ ADDQ R13, R10
+ ADDQ R13, R12
+
+handle_loop:
+ MOVQ ctx+16(FP), AX
+ DECQ 96(AX)
+ JNS sequenceDecs_decodeSync_safe_amd64_main_loop
+
+loop_finished:
+ MOVQ br+8(FP), AX
+ MOVQ DX, 32(AX)
+ MOVB BL, 40(AX)
+ MOVQ SI, 24(AX)
+
+ // Update the context
+ MOVQ ctx+16(FP), AX
+ MOVQ R12, 136(AX)
+ MOVQ 144(AX), CX
+ SUBQ CX, R11
+ MOVQ R11, 168(AX)
+
+ // Return success
+ MOVQ $0x00000000, ret+24(FP)
+ RET
+
+ // Return with match length error
+sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch:
+ MOVQ 16(SP), AX
+ MOVQ ctx+16(FP), CX
+ MOVQ AX, 216(CX)
+ MOVQ $0x00000001, ret+24(FP)
+ RET
+
+ // Return with match too long error
+sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big:
+ MOVQ ctx+16(FP), AX
+ MOVQ 16(SP), CX
+ MOVQ CX, 216(AX)
+ MOVQ $0x00000002, ret+24(FP)
+ RET
+
+ // Return with match offset too long error
+error_match_off_too_big:
+ MOVQ ctx+16(FP), AX
+ MOVQ 8(SP), CX
+ MOVQ CX, 224(AX)
+ MOVQ R12, 136(AX)
+ MOVQ $0x00000003, ret+24(FP)
+ RET
+
+ // Return with not enough literals error
+error_not_enough_literals:
+ MOVQ ctx+16(FP), AX
+ MOVQ 24(SP), CX
+ MOVQ CX, 208(AX)
+ MOVQ $0x00000004, ret+24(FP)
+ RET
+
+ // Return with not enough output space error
+error_not_enough_space:
+ MOVQ ctx+16(FP), AX
+ MOVQ 24(SP), CX
+ MOVQ CX, 208(AX)
+ MOVQ 16(SP), CX
+ MOVQ CX, 216(AX)
+ MOVQ R12, 136(AX)
+ MOVQ $0x00000005, ret+24(FP)
+ RET
+
+// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
+// Requires: BMI, BMI2, CMOV, SSE
+TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
+ MOVQ br+8(FP), CX
+ MOVQ 32(CX), AX
+ MOVBQZX 40(CX), DX
+ MOVQ 24(CX), BX
+ MOVQ (CX), CX
+ ADDQ BX, CX
+ MOVQ CX, (SP)
+ MOVQ ctx+16(FP), CX
+ MOVQ 72(CX), SI
+ MOVQ 80(CX), DI
+ MOVQ 88(CX), R8
+ MOVQ 112(CX), R9
+ MOVQ 128(CX), R10
+ MOVQ R10, 32(SP)
+ MOVQ 144(CX), R10
+ MOVQ 136(CX), R11
+ MOVQ 200(CX), R12
+ MOVQ R12, 56(SP)
+ MOVQ 176(CX), R12
+ MOVQ R12, 48(SP)
+ MOVQ 184(CX), CX
+ MOVQ CX, 40(SP)
+ MOVQ 40(SP), CX
+ ADDQ CX, 48(SP)
+
+ // Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+ ADDQ R9, 32(SP)
+
+ // outBase += outPosition
+ ADDQ R11, R9
+
+sequenceDecs_decodeSync_safe_bmi2_main_loop:
+ MOVQ (SP), R12
+
+ // Fill bitreader to have enough for the offset and match length.
+ CMPQ BX, $0x08
+ JL sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte
+ MOVQ DX, CX
+ SHRQ $0x03, CX
+ SUBQ CX, R12
+ MOVQ (R12), AX
+ SUBQ CX, BX
+ ANDQ $0x07, DX
+ JMP sequenceDecs_decodeSync_safe_bmi2_fill_end
+
+sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
+ CMPQ BX, $0x00
+ JLE sequenceDecs_decodeSync_safe_bmi2_fill_end
+ CMPQ DX, $0x07
+ JLE sequenceDecs_decodeSync_safe_bmi2_fill_end
+ SHLQ $0x08, AX
+ SUBQ $0x01, R12
+ SUBQ $0x01, BX
+ SUBQ $0x08, DX
+ MOVBQZX (R12), CX
+ ORQ CX, AX
+ JMP sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte
+
+sequenceDecs_decodeSync_safe_bmi2_fill_end:
+ // Update offset
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R13
+ MOVQ AX, R14
+ LEAQ (DX)(R13*1), CX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ MOVQ CX, DX
+ MOVQ R8, CX
+ SHRQ $0x20, CX
+ ADDQ R14, CX
+ MOVQ CX, 8(SP)
+
+ // Update match length
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, DI, R13
+ MOVQ AX, R14
+ LEAQ (DX)(R13*1), CX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ MOVQ CX, DX
+ MOVQ DI, CX
+ SHRQ $0x20, CX
+ ADDQ R14, CX
+ MOVQ CX, 16(SP)
+
+ // Fill bitreader to have enough for the remaining
+ CMPQ BX, $0x08
+ JL sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte
+ MOVQ DX, CX
+ SHRQ $0x03, CX
+ SUBQ CX, R12
+ MOVQ (R12), AX
+ SUBQ CX, BX
+ ANDQ $0x07, DX
+ JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_end
+
+sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
+ CMPQ BX, $0x00
+ JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end
+ CMPQ DX, $0x07
+ JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end
+ SHLQ $0x08, AX
+ SUBQ $0x01, R12
+ SUBQ $0x01, BX
+ SUBQ $0x08, DX
+ MOVBQZX (R12), CX
+ ORQ CX, AX
+ JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte
+
+sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
+ // Update literal length
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, SI, R13
+ MOVQ AX, R14
+ LEAQ (DX)(R13*1), CX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ MOVQ CX, DX
+ MOVQ SI, CX
+ SHRQ $0x20, CX
+ ADDQ R14, CX
+ MOVQ CX, 24(SP)
+
+ // Fill bitreader for state updates
+ MOVQ R12, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R12
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decodeSync_safe_bmi2_skip_update
+
+ // Update Literal Length State
+ MOVBQZX SI, R13
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, SI, SI
+ LEAQ (DX)(R13*1), CX
+ MOVQ AX, R14
+ MOVQ CX, DX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ ADDQ R14, SI
+
+ // Load ctx.llTable
+ MOVQ ctx+16(FP), CX
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
+
+ // Update Match Length State
+ MOVBQZX DI, R13
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, DI, DI
+ LEAQ (DX)(R13*1), CX
+ MOVQ AX, R14
+ MOVQ CX, DX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ ADDQ R14, DI
+
+ // Load ctx.mlTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 24(CX), CX
+ MOVQ (CX)(DI*8), DI
+
+ // Update Offset State
+ MOVBQZX R8, R13
+ MOVQ $0x00001010, CX
+ BEXTRQ CX, R8, R8
+ LEAQ (DX)(R13*1), CX
+ MOVQ AX, R14
+ MOVQ CX, DX
+ ROLQ CL, R14
+ BZHIQ R13, R14, R14
+ ADDQ R14, R8
+
+ // Load ctx.ofTable
+ MOVQ ctx+16(FP), CX
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
+
+sequenceDecs_decodeSync_safe_bmi2_skip_update:
+ // Adjust offset
+ MOVQ s+0(FP), CX
+ MOVQ 8(SP), R13
+ CMPQ R12, $0x01
+ JBE sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0
+ MOVUPS 144(CX), X0
+ MOVQ R13, 144(CX)
+ MOVUPS X0, 152(CX)
+ JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0:
+ CMPQ 24(SP), $0x00000000
+ JNE sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero
+ INCQ R13
+ JMP sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero:
+ TESTQ R13, R13
+ JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero
+ MOVQ 144(CX), R13
+ JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero:
+ MOVQ R13, R12
+ XORQ R14, R14
+ MOVQ $-1, R15
+ CMPQ R13, $0x03
+ CMOVQEQ R14, R12
+ CMOVQEQ R15, R14
+ LEAQ 144(CX), R15
+ ADDQ (R15)(R12*8), R14
+ JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid
+ MOVQ $0x00000001, R14
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid:
+ CMPQ R13, $0x01
+ JZ sequenceDecs_decodeSync_safe_bmi2_adjust_skip
+ MOVQ 152(CX), R12
+ MOVQ R12, 160(CX)
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_skip:
+ MOVQ 144(CX), R12
+ MOVQ R12, 152(CX)
+ MOVQ R14, 144(CX)
+ MOVQ R14, R13
+
+sequenceDecs_decodeSync_safe_bmi2_adjust_end:
+ MOVQ R13, 8(SP)
+
+ // Check values
+ MOVQ 16(SP), CX
+ MOVQ 24(SP), R12
+ LEAQ (CX)(R12*1), R14
+ MOVQ s+0(FP), R15
+ ADDQ R14, 256(R15)
+ MOVQ ctx+16(FP), R14
+ SUBQ R12, 104(R14)
+ JS error_not_enough_literals
+ CMPQ CX, $0x00020002
+ JA sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big
+ TESTQ R13, R13
+ JNZ sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok
+ TESTQ CX, CX
+ JNZ sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch
+
+sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok:
+ MOVQ 24(SP), CX
+ MOVQ 8(SP), R12
+ MOVQ 16(SP), R13
+
+ // Check if we have enough space in s.out
+ LEAQ (CX)(R13*1), R14
+ ADDQ R9, R14
+ CMPQ R14, 32(SP)
+ JA error_not_enough_space
+
+ // Copy literals
+ TESTQ CX, CX
+ JZ check_offset
+ XORQ R14, R14
+ TESTQ $0x00000001, CX
+ JZ copy_1_word
+ MOVB (R10)(R14*1), R15
+ MOVB R15, (R9)(R14*1)
+ ADDQ $0x01, R14
+
+copy_1_word:
+ TESTQ $0x00000002, CX
+ JZ copy_1_dword
+ MOVW (R10)(R14*1), R15
+ MOVW R15, (R9)(R14*1)
+ ADDQ $0x02, R14
+
+copy_1_dword:
+ TESTQ $0x00000004, CX
+ JZ copy_1_qword
+ MOVL (R10)(R14*1), R15
+ MOVL R15, (R9)(R14*1)
+ ADDQ $0x04, R14
+
+copy_1_qword:
+ TESTQ $0x00000008, CX
+ JZ copy_1_test
+ MOVQ (R10)(R14*1), R15
+ MOVQ R15, (R9)(R14*1)
+ ADDQ $0x08, R14
+ JMP copy_1_test
+
+copy_1:
+ MOVUPS (R10)(R14*1), X0
+ MOVUPS X0, (R9)(R14*1)
+ ADDQ $0x10, R14
+
+copy_1_test:
+ CMPQ R14, CX
+ JB copy_1
+ ADDQ CX, R10
+ ADDQ CX, R9
+ ADDQ CX, R11
+
+ // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+ MOVQ R11, CX
+ ADDQ 40(SP), CX
+ CMPQ R12, CX
+ JG error_match_off_too_big
+ CMPQ R12, 56(SP)
+ JG error_match_off_too_big
+
+ // Copy match from history
+ MOVQ R12, CX
+ SUBQ R11, CX
+ JLS copy_match
+ MOVQ 48(SP), R14
+ SUBQ CX, R14
+ CMPQ R13, CX
+ JGE copy_all_from_history
+ XORQ CX, CX
+ TESTQ $0x00000001, R13
+ JZ copy_4_word
+ MOVB (R14)(CX*1), R12
+ MOVB R12, (R9)(CX*1)
+ ADDQ $0x01, CX
+
+copy_4_word:
+ TESTQ $0x00000002, R13
+ JZ copy_4_dword
+ MOVW (R14)(CX*1), R12
+ MOVW R12, (R9)(CX*1)
+ ADDQ $0x02, CX
+
+copy_4_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_4_qword
+ MOVL (R14)(CX*1), R12
+ MOVL R12, (R9)(CX*1)
+ ADDQ $0x04, CX
+
+copy_4_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_4_test
+ MOVQ (R14)(CX*1), R12
+ MOVQ R12, (R9)(CX*1)
+ ADDQ $0x08, CX
+ JMP copy_4_test
+
+copy_4:
+ MOVUPS (R14)(CX*1), X0
+ MOVUPS X0, (R9)(CX*1)
+ ADDQ $0x10, CX
+
+copy_4_test:
+ CMPQ CX, R13
+ JB copy_4
+ ADDQ R13, R11
+ ADDQ R13, R9
+ JMP handle_loop
+ JMP loop_finished
+
+copy_all_from_history:
+ XORQ R15, R15
+ TESTQ $0x00000001, CX
+ JZ copy_5_word
+ MOVB (R14)(R15*1), BP
+ MOVB BP, (R9)(R15*1)
+ ADDQ $0x01, R15
+
+copy_5_word:
+ TESTQ $0x00000002, CX
+ JZ copy_5_dword
+ MOVW (R14)(R15*1), BP
+ MOVW BP, (R9)(R15*1)
+ ADDQ $0x02, R15
+
+copy_5_dword:
+ TESTQ $0x00000004, CX
+ JZ copy_5_qword
+ MOVL (R14)(R15*1), BP
+ MOVL BP, (R9)(R15*1)
+ ADDQ $0x04, R15
+
+copy_5_qword:
+ TESTQ $0x00000008, CX
+ JZ copy_5_test
+ MOVQ (R14)(R15*1), BP
+ MOVQ BP, (R9)(R15*1)
+ ADDQ $0x08, R15
+ JMP copy_5_test
+
+copy_5:
+ MOVUPS (R14)(R15*1), X0
+ MOVUPS X0, (R9)(R15*1)
+ ADDQ $0x10, R15
+
+copy_5_test:
+ CMPQ R15, CX
+ JB copy_5
+ ADDQ CX, R9
+ ADDQ CX, R11
+ SUBQ CX, R13
+
+ // Copy match from the current buffer
+copy_match:
+ TESTQ R13, R13
+ JZ handle_loop
+ MOVQ R9, CX
+ SUBQ R12, CX
+
+ // ml <= mo
+ CMPQ R13, R12
+ JA copy_overlapping_match
+
+ // Copy non-overlapping match
+ XORQ R12, R12
+ TESTQ $0x00000001, R13
+ JZ copy_2_word
+ MOVB (CX)(R12*1), R14
+ MOVB R14, (R9)(R12*1)
+ ADDQ $0x01, R12
+
+copy_2_word:
+ TESTQ $0x00000002, R13
+ JZ copy_2_dword
+ MOVW (CX)(R12*1), R14
+ MOVW R14, (R9)(R12*1)
+ ADDQ $0x02, R12
+
+copy_2_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_2_qword
+ MOVL (CX)(R12*1), R14
+ MOVL R14, (R9)(R12*1)
+ ADDQ $0x04, R12
+
+copy_2_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_2_test
+ MOVQ (CX)(R12*1), R14
+ MOVQ R14, (R9)(R12*1)
+ ADDQ $0x08, R12
+ JMP copy_2_test
+
+copy_2:
+ MOVUPS (CX)(R12*1), X0
+ MOVUPS X0, (R9)(R12*1)
+ ADDQ $0x10, R12
+
+copy_2_test:
+ CMPQ R12, R13
+ JB copy_2
+ ADDQ R13, R9
+ ADDQ R13, R11
+ JMP handle_loop
+
+ // Copy overlapping match
+copy_overlapping_match:
+ XORQ R12, R12
+
+copy_slow_3:
+ MOVB (CX)(R12*1), R14
+ MOVB R14, (R9)(R12*1)
+ INCQ R12
+ CMPQ R12, R13
+ JB copy_slow_3
+ ADDQ R13, R9
+ ADDQ R13, R11
+
+handle_loop:
+ MOVQ ctx+16(FP), CX
+ DECQ 96(CX)
+ JNS sequenceDecs_decodeSync_safe_bmi2_main_loop
+
+loop_finished:
+ MOVQ br+8(FP), CX
+ MOVQ AX, 32(CX)
+ MOVB DL, 40(CX)
+ MOVQ BX, 24(CX)
+
+ // Update the context
+ MOVQ ctx+16(FP), AX
+ MOVQ R11, 136(AX)
+ MOVQ 144(AX), CX
+ SUBQ CX, R10
+ MOVQ R10, 168(AX)
+
+ // Return success
+ MOVQ $0x00000000, ret+24(FP)
+ RET
+
+ // Return with match length error
+sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch:
+ MOVQ 16(SP), AX
+ MOVQ ctx+16(FP), CX
+ MOVQ AX, 216(CX)
+ MOVQ $0x00000001, ret+24(FP)
+ RET
+
+ // Return with match too long error
+sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big:
+ MOVQ ctx+16(FP), AX
+ MOVQ 16(SP), CX
+ MOVQ CX, 216(AX)
+ MOVQ $0x00000002, ret+24(FP)
+ RET
+
+ // Return with match offset too long error
+error_match_off_too_big:
+ MOVQ ctx+16(FP), AX
+ MOVQ 8(SP), CX
+ MOVQ CX, 224(AX)
+ MOVQ R11, 136(AX)
+ MOVQ $0x00000003, ret+24(FP)
+ RET
+
+ // Return with not enough literals error
+error_not_enough_literals:
+ MOVQ ctx+16(FP), AX
+ MOVQ 24(SP), CX
+ MOVQ CX, 208(AX)
+ MOVQ $0x00000004, ret+24(FP)
+ RET
+
+ // Return with not enough output space error
+error_not_enough_space:
+ MOVQ ctx+16(FP), AX
+ MOVQ 24(SP), CX
+ MOVQ CX, 208(AX)
+ MOVQ 16(SP), CX
+ MOVQ CX, 216(AX)
+ MOVQ R11, 136(AX)
+ MOVQ $0x00000005, ret+24(FP)
+ RET
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
new file mode 100644
index 000000000..c3452bc3a
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
@@ -0,0 +1,237 @@
+//go:build !amd64 || appengine || !gc || noasm
+// +build !amd64 appengine !gc noasm
+
+package zstd
+
+import (
+ "fmt"
+ "io"
+)
+
+// decode sequences from the stream with the provided history but without dictionary.
+func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
+ return false, nil
+}
+
+// decode sequences from the stream without the provided history.
+func (s *sequenceDecs) decode(seqs []seqVals) error {
+ br := s.br
+
+ // Grab full sizes tables, to avoid bounds checks.
+ llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
+ llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
+ s.seqSize = 0
+ litRemain := len(s.literals)
+
+ maxBlockSize := maxCompressedBlockSize
+ if s.windowSize < maxBlockSize {
+ maxBlockSize = s.windowSize
+ }
+ for i := range seqs {
+ var ll, mo, ml int
+ if br.off > 4+((maxOffsetBits+16+16)>>3) {
+ // inlined function:
+ // ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
+
+ // Final will not read from stream.
+ var llB, mlB, moB uint8
+ ll, llB = llState.final()
+ ml, mlB = mlState.final()
+ mo, moB = ofState.final()
+
+ // extra bits are stored in reverse order.
+ br.fillFast()
+ mo += br.getBits(moB)
+ if s.maxBits > 32 {
+ br.fillFast()
+ }
+ ml += br.getBits(mlB)
+ ll += br.getBits(llB)
+
+ if moB > 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = mo
+ } else {
+ // mo = s.adjustOffset(mo, ll, moB)
+ // Inlined for rather big speedup
+ if ll == 0 {
+ // There is an exception though, when current sequence's literals_length = 0.
+ // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
+ // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
+ mo++
+ }
+
+ if mo == 0 {
+ mo = s.prevOffset[0]
+ } else {
+ var temp int
+ if mo == 3 {
+ temp = s.prevOffset[0] - 1
+ } else {
+ temp = s.prevOffset[mo]
+ }
+
+ if temp == 0 {
+ // 0 is not valid; input is corrupted; force offset to 1
+ println("WARNING: temp was 0")
+ temp = 1
+ }
+
+ if mo != 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ }
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = temp
+ mo = temp
+ }
+ }
+ br.fillFast()
+ } else {
+ if br.overread() {
+ if debugDecoder {
+ printf("reading sequence %d, exceeded available data\n", i)
+ }
+ return io.ErrUnexpectedEOF
+ }
+ ll, mo, ml = s.next(br, llState, mlState, ofState)
+ br.fill()
+ }
+
+ if debugSequences {
+ println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
+ }
+ // Evaluate.
+ // We might be doing this async, so do it early.
+ if mo == 0 && ml > 0 {
+ return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
+ }
+ if ml > maxMatchLen {
+ return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
+ }
+ s.seqSize += ll + ml
+ if s.seqSize > maxBlockSize {
+ return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ }
+ litRemain -= ll
+ if litRemain < 0 {
+ return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll)
+ }
+ seqs[i] = seqVals{
+ ll: ll,
+ ml: ml,
+ mo: mo,
+ }
+ if i == len(seqs)-1 {
+ // This is the last sequence, so we shouldn't update state.
+ break
+ }
+
+ // Manually inlined, ~ 5-20% faster
+ // Update all 3 states at once. Approx 20% faster.
+ nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
+ if nBits == 0 {
+ llState = llTable[llState.newState()&maxTableMask]
+ mlState = mlTable[mlState.newState()&maxTableMask]
+ ofState = ofTable[ofState.newState()&maxTableMask]
+ } else {
+ bits := br.get32BitsFast(nBits)
+ lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
+ llState = llTable[(llState.newState()+lowBits)&maxTableMask]
+
+ lowBits = uint16(bits >> (ofState.nbBits() & 31))
+ lowBits &= bitMask[mlState.nbBits()&15]
+ mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
+
+ lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
+ ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
+ }
+ }
+ s.seqSize += litRemain
+ if s.seqSize > maxBlockSize {
+ return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
+ }
+ err := br.close()
+ if err != nil {
+ printf("Closing sequences: %v, %+v\n", err, *br)
+ }
+ return err
+}
+
+// executeSimple handles cases when a dictionary is not used.
+func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
+ // Ensure we have enough output size...
+ if len(s.out)+s.seqSize > cap(s.out) {
+ addBytes := s.seqSize + len(s.out)
+ s.out = append(s.out, make([]byte, addBytes)...)
+ s.out = s.out[:len(s.out)-addBytes]
+ }
+
+ if debugDecoder {
+ printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
+ }
+
+ var t = len(s.out)
+ out := s.out[:t+s.seqSize]
+
+ for _, seq := range seqs {
+ // Add literals
+ copy(out[t:], s.literals[:seq.ll])
+ t += seq.ll
+ s.literals = s.literals[seq.ll:]
+
+ // Malformed input
+ if seq.mo > t+len(hist) || seq.mo > s.windowSize {
+ return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
+ }
+
+ // Copy from history.
+ if v := seq.mo - t; v > 0 {
+ // v is the start position in history from end.
+ start := len(hist) - v
+ if seq.ml > v {
+ // Some goes into the current block.
+ // Copy remainder of history
+ copy(out[t:], hist[start:])
+ t += v
+ seq.ml -= v
+ } else {
+ copy(out[t:], hist[start:start+seq.ml])
+ t += seq.ml
+ continue
+ }
+ }
+
+ // We must be in the current buffer now
+ if seq.ml > 0 {
+ start := t - seq.mo
+ if seq.ml <= t-start {
+ // No overlap
+ copy(out[t:], out[start:start+seq.ml])
+ t += seq.ml
+ } else {
+ // Overlapping copy
+ // Extend destination slice and copy one byte at the time.
+ src := out[start : start+seq.ml]
+ dst := out[t:]
+ dst = dst[:len(src)]
+ t += len(src)
+ // Destination is the space we just added.
+ for i := range src {
+ dst[i] = src[i]
+ }
+ }
+ }
+ }
+ // Add final literals
+ copy(out[t:], s.literals)
+ if debugDecoder {
+ t += len(s.literals)
+ if t != len(out) {
+ panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
+ }
+ }
+ s.out = out
+
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go
index ffffcbc25..b53f606a1 100644
--- a/vendor/github.com/klauspost/compress/zstd/zip.go
+++ b/vendor/github.com/klauspost/compress/zstd/zip.go
@@ -21,23 +21,34 @@ const ZipMethodPKWare = 20
var zipReaderPool sync.Pool
// newZipReader creates a pooled zip decompressor.
-func newZipReader(r io.Reader) io.ReadCloser {
- dec, ok := zipReaderPool.Get().(*Decoder)
- if ok {
- dec.Reset(r)
- } else {
- d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
- if err != nil {
- panic(err)
+func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser {
+ pool := &zipReaderPool
+ if len(opts) > 0 {
+ opts = append([]DOption{WithDecoderLowmem(true), WithDecoderMaxWindow(128 << 20)}, opts...)
+ // Force concurrency 1
+ opts = append(opts, WithDecoderConcurrency(1))
+ // Create our own pool
+ pool = &sync.Pool{}
+ }
+ return func(r io.Reader) io.ReadCloser {
+ dec, ok := pool.Get().(*Decoder)
+ if ok {
+ dec.Reset(r)
+ } else {
+ d, err := NewReader(r, opts...)
+ if err != nil {
+ panic(err)
+ }
+ dec = d
}
- dec = d
+ return &pooledZipReader{dec: dec, pool: pool}
}
- return &pooledZipReader{dec: dec}
}
type pooledZipReader struct {
- mu sync.Mutex // guards Close and Read
- dec *Decoder
+ mu sync.Mutex // guards Close and Read
+ pool *sync.Pool
+ dec *Decoder
}
func (r *pooledZipReader) Read(p []byte) (n int, err error) {
@@ -48,8 +59,8 @@ func (r *pooledZipReader) Read(p []byte) (n int, err error) {
}
dec, err := r.dec.Read(p)
if err == io.EOF {
- err = r.dec.Reset(nil)
- zipReaderPool.Put(r.dec)
+ r.dec.Reset(nil)
+ r.pool.Put(r.dec)
r.dec = nil
}
return dec, err
@@ -61,7 +72,7 @@ func (r *pooledZipReader) Close() error {
var err error
if r.dec != nil {
err = r.dec.Reset(nil)
- zipReaderPool.Put(r.dec)
+ r.pool.Put(r.dec)
r.dec = nil
}
return err
@@ -115,6 +126,9 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
// ZipDecompressor returns a decompressor that can be registered with zip libraries.
// See ZipCompressor for example.
-func ZipDecompressor() func(r io.Reader) io.ReadCloser {
- return newZipReader
+// Options can be specified. WithDecoderConcurrency(1) is forced,
+// and by default a 128MB maximum decompression window is specified.
+// The window size can be overridden if required.
+func ZipDecompressor(opts ...DOption) func(r io.Reader) io.ReadCloser {
+ return newZipReader(opts...)
}