diff options
Diffstat (limited to 'vendor/github.com')
39 files changed, 4864 insertions, 520 deletions
diff --git a/vendor/github.com/containers/common/version/version.go b/vendor/github.com/containers/common/version/version.go index c1ae6c9dd..61fce9d22 100644 --- a/vendor/github.com/containers/common/version/version.go +++ b/vendor/github.com/containers/common/version/version.go @@ -1,4 +1,4 @@ package version // Version is the version of the build. -const Version = "0.47.4+dev" +const Version = "0.48.0" diff --git a/vendor/github.com/containers/image/v5/copy/copy.go b/vendor/github.com/containers/image/v5/copy/copy.go index 644f82615..d28cc4a3f 100644 --- a/vendor/github.com/containers/image/v5/copy/copy.go +++ b/vendor/github.com/containers/image/v5/copy/copy.go @@ -32,7 +32,6 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/vbauerster/mpb/v7" - "github.com/vbauerster/mpb/v7/decor" "golang.org/x/sync/semaphore" "golang.org/x/term" ) @@ -712,8 +711,6 @@ func (c *copier) copyOneImage(ctx context.Context, policyContext *signature.Poli // If src.UpdatedImageNeedsLayerDiffIDs(ic.manifestUpdates) will be true, it needs to be true by the time we get here. ic.diffIDsAreNeeded = src.UpdatedImageNeedsLayerDiffIDs(*ic.manifestUpdates) - // If encrypted and decryption keys provided, we should try to decrypt - ic.diffIDsAreNeeded = ic.diffIDsAreNeeded || (isEncrypted(src) && ic.c.ociDecryptConfig != nil) || ic.c.ociEncryptConfig != nil // If enabled, fetch and compare the destination's manifest. And as an optimization skip updating the destination iff equal if options.OptimizeDestinationImageAlreadyExists { @@ -1069,85 +1066,6 @@ func (ic *imageCopier) copyUpdatedConfigAndManifest(ctx context.Context, instanc return man, manifestDigest, nil } -// newProgressPool creates a *mpb.Progress. -// The caller must eventually call pool.Wait() after the pool will no longer be updated. -// NOTE: Every progress bar created within the progress pool must either successfully -// complete or be aborted, or pool.Wait() will hang. That is typically done -// using "defer bar.Abort(false)", which must be called BEFORE pool.Wait() is called. -func (c *copier) newProgressPool() *mpb.Progress { - return mpb.New(mpb.WithWidth(40), mpb.WithOutput(c.progressOutput)) -} - -// customPartialBlobDecorFunc implements mpb.DecorFunc for the partial blobs retrieval progress bar -func customPartialBlobDecorFunc(s decor.Statistics) string { - if s.Total == 0 { - pairFmt := "%.1f / %.1f (skipped: %.1f)" - return fmt.Sprintf(pairFmt, decor.SizeB1024(s.Current), decor.SizeB1024(s.Total), decor.SizeB1024(s.Refill)) - } - pairFmt := "%.1f / %.1f (skipped: %.1f = %.2f%%)" - percentage := 100.0 * float64(s.Refill) / float64(s.Total) - return fmt.Sprintf(pairFmt, decor.SizeB1024(s.Current), decor.SizeB1024(s.Total), decor.SizeB1024(s.Refill), percentage) -} - -// createProgressBar creates a mpb.Bar in pool. Note that if the copier's reportWriter -// is io.Discard, the progress bar's output will be discarded -// NOTE: Every progress bar created within a progress pool must either successfully -// complete or be aborted, or pool.Wait() will hang. That is typically done -// using "defer bar.Abort(false)", which must happen BEFORE pool.Wait() is called. -func (c *copier) createProgressBar(pool *mpb.Progress, partial bool, info types.BlobInfo, kind string, onComplete string) *mpb.Bar { - // shortDigestLen is the length of the digest used for blobs. - const shortDigestLen = 12 - - prefix := fmt.Sprintf("Copying %s %s", kind, info.Digest.Encoded()) - // Truncate the prefix (chopping of some part of the digest) to make all progress bars aligned in a column. - maxPrefixLen := len("Copying blob ") + shortDigestLen - if len(prefix) > maxPrefixLen { - prefix = prefix[:maxPrefixLen] - } - - // onComplete will replace prefix once the bar/spinner has completed - onComplete = prefix + " " + onComplete - - // Use a normal progress bar when we know the size (i.e., size > 0). - // Otherwise, use a spinner to indicate that something's happening. - var bar *mpb.Bar - if info.Size > 0 { - if partial { - bar = pool.AddBar(info.Size, - mpb.BarFillerClearOnComplete(), - mpb.PrependDecorators( - decor.OnComplete(decor.Name(prefix), onComplete), - ), - mpb.AppendDecorators( - decor.Any(customPartialBlobDecorFunc), - ), - ) - } else { - bar = pool.AddBar(info.Size, - mpb.BarFillerClearOnComplete(), - mpb.PrependDecorators( - decor.OnComplete(decor.Name(prefix), onComplete), - ), - mpb.AppendDecorators( - decor.OnComplete(decor.CountersKibiByte("%.1f / %.1f"), ""), - ), - ) - } - } else { - bar = pool.New(0, - mpb.SpinnerStyle(".", "..", "...", "....", "").PositionLeft(), - mpb.BarFillerClearOnComplete(), - mpb.PrependDecorators( - decor.OnComplete(decor.Name(prefix), onComplete), - ), - ) - } - if c.progressOutput == io.Discard { - c.Printf("Copying %s %s\n", kind, info.Digest) - } - return bar -} - // copyConfig copies config.json, if any, from src to dest. func (c *copier) copyConfig(ctx context.Context, src types.Image) error { srcInfo := src.ConfigInfo() @@ -1158,22 +1076,23 @@ func (c *copier) copyConfig(ctx context.Context, src types.Image) error { } defer c.concurrentBlobCopiesSemaphore.Release(1) - configBlob, err := src.ConfigBlob(ctx) - if err != nil { - return errors.Wrapf(err, "reading config blob %s", srcInfo.Digest) - } - destInfo, err := func() (types.BlobInfo, error) { // A scope for defer progressPool := c.newProgressPool() defer progressPool.Wait() bar := c.createProgressBar(progressPool, false, srcInfo, "config", "done") defer bar.Abort(false) + configBlob, err := src.ConfigBlob(ctx) + if err != nil { + return types.BlobInfo{}, errors.Wrapf(err, "reading config blob %s", srcInfo.Digest) + } + destInfo, err := c.copyBlobFromStream(ctx, bytes.NewReader(configBlob), srcInfo, nil, false, true, false, bar, -1, false) if err != nil { return types.BlobInfo{}, err } - bar.SetTotal(int64(len(configBlob)), true) + + bar.mark100PercentComplete() return destInfo, nil }() if err != nil { @@ -1218,11 +1137,15 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to } cachedDiffID := ic.c.blobInfoCache.UncompressedDigest(srcInfo.Digest) // May be "" - // Diffs are needed if we are encrypting an image or trying to decrypt an image - diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == "" || toEncrypt || (isOciEncrypted(srcInfo.MediaType) && ic.c.ociDecryptConfig != nil) - - // If we already have the blob, and we don't need to compute the diffID, then we don't need to read it from the source. - if !diffIDIsNeeded { + diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == "" + // When encrypting to decrypting, only use the simple code path. We might be able to optimize more + // (e.g. if we know the DiffID of an encrypted compressed layer, it might not be necessary to pull, decrypt and decompress again), + // but it’s not trivially safe to do such things, so until someone takes the effort to make a comprehensive argument, let’s not. + encryptingOrDecrypting := toEncrypt || (isOciEncrypted(srcInfo.MediaType) && ic.c.ociDecryptConfig != nil) + canAvoidProcessingCompleteLayer := !diffIDIsNeeded && !encryptingOrDecrypting + + // Don’t read the layer from the source if we already have the blob, and optimizations are acceptable. + if canAvoidProcessingCompleteLayer { // TODO: at this point we don't know whether or not a blob we end up reusing is compressed using an algorithm // that is acceptable for use on layers in the manifest that we'll be writing later, so if we end up reusing // a blob that's compressed with e.g. zstd, but we're only allowed to write a v2s2 manifest, this will cause @@ -1242,9 +1165,9 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to if reused { logrus.Debugf("Skipping blob %s (already present):", srcInfo.Digest) func() { // A scope for defer - bar := ic.c.createProgressBar(pool, false, srcInfo, "blob", "skipped: already exists") + bar := ic.c.createProgressBar(pool, false, types.BlobInfo{Digest: blobInfo.Digest, Size: 0}, "blob", "skipped: already exists") defer bar.Abort(false) - bar.SetTotal(0, true) + bar.mark100PercentComplete() }() // Throw an event that the layer has been skipped @@ -1275,7 +1198,7 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to // of the source file are not known yet and must be fetched. // Attempt a partial only when the source allows to retrieve a blob partially and // the destination has support for it. - if ic.c.rawSource.SupportsGetBlobAt() && ic.c.dest.SupportsPutBlobPartial() && !diffIDIsNeeded { + if canAvoidProcessingCompleteLayer && ic.c.rawSource.SupportsGetBlobAt() && ic.c.dest.SupportsPutBlobPartial() { if reused, blobInfo := func() (bool, types.BlobInfo) { // A scope for defer bar := ic.c.createProgressBar(pool, true, srcInfo, "blob", "done") hideProgressBar := true @@ -1287,12 +1210,12 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to wrapped: ic.c.rawSource, bar: bar, } - bar.SetTotal(srcInfo.Size, false) info, err := ic.c.dest.PutBlobPartial(ctx, &proxy, srcInfo, ic.c.blobInfoCache) if err == nil { - bar.SetRefill(srcInfo.Size - bar.Current()) - bar.SetCurrent(srcInfo.Size) - bar.SetTotal(srcInfo.Size, true) + if srcInfo.Size != -1 { + bar.SetRefill(srcInfo.Size - bar.Current()) + } + bar.mark100PercentComplete() hideProgressBar = false logrus.Debugf("Retrieved partial blob %v", srcInfo.Digest) return true, info @@ -1305,16 +1228,16 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to } // Fallback: copy the layer, computing the diffID if we need to do so - srcStream, srcBlobSize, err := ic.c.rawSource.GetBlob(ctx, srcInfo, ic.c.blobInfoCache) - if err != nil { - return types.BlobInfo{}, "", errors.Wrapf(err, "reading blob %s", srcInfo.Digest) - } - defer srcStream.Close() - return func() (types.BlobInfo, digest.Digest, error) { // A scope for defer bar := ic.c.createProgressBar(pool, false, srcInfo, "blob", "done") defer bar.Abort(false) + srcStream, srcBlobSize, err := ic.c.rawSource.GetBlob(ctx, srcInfo, ic.c.blobInfoCache) + if err != nil { + return types.BlobInfo{}, "", errors.Wrapf(err, "reading blob %s", srcInfo.Digest) + } + defer srcStream.Close() + blobInfo, diffIDChan, err := ic.copyLayerFromStream(ctx, srcStream, types.BlobInfo{Digest: srcInfo.Digest, Size: srcBlobSize, MediaType: srcInfo.MediaType, Annotations: srcInfo.Annotations}, diffIDIsNeeded, toEncrypt, bar, layerIndex, emptyLayer) if err != nil { return types.BlobInfo{}, "", err @@ -1330,14 +1253,22 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to return types.BlobInfo{}, "", errors.Wrap(diffIDResult.err, "computing layer DiffID") } logrus.Debugf("Computed DiffID %s for layer %s", diffIDResult.digest, srcInfo.Digest) - // This is safe because we have just computed diffIDResult.Digest ourselves, and in the process - // we have read all of the input blob, so srcInfo.Digest must have been validated by digestingReader. - ic.c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, diffIDResult.digest) + // Don’t record any associations that involve encrypted data. This is a bit crude, + // some blob substitutions (replacing pulls of encrypted data with local reuse of known decryption outcomes) + // might be safe, but it’s not trivially obvious, so let’s be conservative for now. + // This crude approach also means we don’t need to record whether a blob is encrypted + // in the blob info cache (which would probably be necessary for any more complex logic), + // and the simplicity is attractive. + if !encryptingOrDecrypting { + // This is safe because we have just computed diffIDResult.Digest ourselves, and in the process + // we have read all of the input blob, so srcInfo.Digest must have been validated by digestingReader. + ic.c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, diffIDResult.digest) + } diffID = diffIDResult.digest } } - bar.SetTotal(srcInfo.Size, true) + bar.mark100PercentComplete() return blobInfo, diffID, nil }() } @@ -1347,7 +1278,7 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to // perhaps (de/re/)compressing the stream, // and returns a complete blobInfo of the copied blob and perhaps a <-chan diffIDResult if diffIDIsNeeded, to be read by the caller. func (ic *imageCopier) copyLayerFromStream(ctx context.Context, srcStream io.Reader, srcInfo types.BlobInfo, - diffIDIsNeeded bool, toEncrypt bool, bar *mpb.Bar, layerIndex int, emptyLayer bool) (types.BlobInfo, <-chan diffIDResult, error) { + diffIDIsNeeded bool, toEncrypt bool, bar *progressBar, layerIndex int, emptyLayer bool) (types.BlobInfo, <-chan diffIDResult, error) { var getDiffIDRecorder func(compressiontypes.DecompressorFunc) io.Writer // = nil var diffIDChan chan diffIDResult @@ -1424,7 +1355,7 @@ func (r errorAnnotationReader) Read(b []byte) (n int, err error) { // and returns a complete blobInfo of the copied blob. func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, srcInfo types.BlobInfo, getOriginalLayerCopyWriter func(decompressor compressiontypes.DecompressorFunc) io.Writer, - canModifyBlob bool, isConfig bool, toEncrypt bool, bar *mpb.Bar, layerIndex int, emptyLayer bool) (types.BlobInfo, error) { + canModifyBlob bool, isConfig bool, toEncrypt bool, bar *progressBar, layerIndex int, emptyLayer bool) (types.BlobInfo, error) { if isConfig { // This is guaranteed by the caller, but set it here to be explicit. canModifyBlob = false } @@ -1444,6 +1375,9 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr } var destStream io.Reader = digestingReader + // === Update progress bars + destStream = bar.ProxyReader(destStream) + // === Decrypt the stream, if required. var decrypted bool if isOciEncrypted(srcInfo.MediaType) && c.ociDecryptConfig != nil { @@ -1478,9 +1412,6 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr logrus.Debugf("blob %s with type %s should be compressed with %s, but compressor appears to be %s", srcInfo.Digest.String(), srcInfo.MediaType, expectedCompressionFormat.Name(), compressionFormat.Name()) } - // === Update progress bars - destStream = bar.ProxyReader(destStream) - // === Send a copy of the original, uncompressed, stream, to a separate path if necessary. var originalLayerReader io.Reader // DO NOT USE this other than to drain the input if no other consumer in the pipeline has done so. if getOriginalLayerCopyWriter != nil { @@ -1681,19 +1612,27 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr return types.BlobInfo{}, errors.Errorf("Internal error writing blob %s, blob with digest %s saved with digest %s", srcInfo.Digest, inputInfo.Digest, uploadedInfo.Digest) } if digestingReader.validationSucceeded { - // If compressionOperation != types.PreserveOriginal, we now have two reliable digest values: - // srcinfo.Digest describes the pre-compressionOperation input, verified by digestingReader - // uploadedInfo.Digest describes the post-compressionOperation output, computed by PutBlob - // (because inputInfo.Digest == "", this must have been computed afresh). - switch compressionOperation { - case types.PreserveOriginal: - break // Do nothing, we have only one digest and we might not have even verified it. - case types.Compress: - c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest) - case types.Decompress: - c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest) - default: - return types.BlobInfo{}, errors.Errorf("Internal error: Unexpected compressionOperation value %#v", compressionOperation) + // Don’t record any associations that involve encrypted data. This is a bit crude, + // some blob substitutions (replacing pulls of encrypted data with local reuse of known decryption outcomes) + // might be safe, but it’s not trivially obvious, so let’s be conservative for now. + // This crude approach also means we don’t need to record whether a blob is encrypted + // in the blob info cache (which would probably be necessary for any more complex logic), + // and the simplicity is attractive. + if !encrypted && !decrypted { + // If compressionOperation != types.PreserveOriginal, we now have two reliable digest values: + // srcinfo.Digest describes the pre-compressionOperation input, verified by digestingReader + // uploadedInfo.Digest describes the post-compressionOperation output, computed by PutBlob + // (because inputInfo.Digest == "", this must have been computed afresh). + switch compressionOperation { + case types.PreserveOriginal: + break // Do nothing, we have only one digest and we might not have even verified it. + case types.Compress: + c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest) + case types.Decompress: + c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest) + default: + return types.BlobInfo{}, errors.Errorf("Internal error: Unexpected compressionOperation value %#v", compressionOperation) + } } if uploadCompressorName != "" && uploadCompressorName != internalblobinfocache.UnknownCompression { c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, uploadCompressorName) diff --git a/vendor/github.com/containers/image/v5/copy/progress_bars.go b/vendor/github.com/containers/image/v5/copy/progress_bars.go new file mode 100644 index 000000000..585d86057 --- /dev/null +++ b/vendor/github.com/containers/image/v5/copy/progress_bars.go @@ -0,0 +1,148 @@ +package copy + +import ( + "context" + "fmt" + "io" + + "github.com/containers/image/v5/internal/private" + "github.com/containers/image/v5/types" + "github.com/vbauerster/mpb/v7" + "github.com/vbauerster/mpb/v7/decor" +) + +// newProgressPool creates a *mpb.Progress. +// The caller must eventually call pool.Wait() after the pool will no longer be updated. +// NOTE: Every progress bar created within the progress pool must either successfully +// complete or be aborted, or pool.Wait() will hang. That is typically done +// using "defer bar.Abort(false)", which must be called BEFORE pool.Wait() is called. +func (c *copier) newProgressPool() *mpb.Progress { + return mpb.New(mpb.WithWidth(40), mpb.WithOutput(c.progressOutput)) +} + +// customPartialBlobDecorFunc implements mpb.DecorFunc for the partial blobs retrieval progress bar +func customPartialBlobDecorFunc(s decor.Statistics) string { + if s.Total == 0 { + pairFmt := "%.1f / %.1f (skipped: %.1f)" + return fmt.Sprintf(pairFmt, decor.SizeB1024(s.Current), decor.SizeB1024(s.Total), decor.SizeB1024(s.Refill)) + } + pairFmt := "%.1f / %.1f (skipped: %.1f = %.2f%%)" + percentage := 100.0 * float64(s.Refill) / float64(s.Total) + return fmt.Sprintf(pairFmt, decor.SizeB1024(s.Current), decor.SizeB1024(s.Total), decor.SizeB1024(s.Refill), percentage) +} + +// progressBar wraps a *mpb.Bar, allowing us to add extra state and methods. +type progressBar struct { + *mpb.Bar + originalSize int64 // or -1 if unknown +} + +// createProgressBar creates a progressBar in pool. Note that if the copier's reportWriter +// is io.Discard, the progress bar's output will be discarded +// +// NOTE: Every progress bar created within a progress pool must either successfully +// complete or be aborted, or pool.Wait() will hang. That is typically done +// using "defer bar.Abort(false)", which must happen BEFORE pool.Wait() is called. +// +// As a convention, most users of progress bars should call mark100PercentComplete on full success; +// by convention, we don't leave progress bars in partial state when fully done +// (even if we copied much less data than anticipated). +func (c *copier) createProgressBar(pool *mpb.Progress, partial bool, info types.BlobInfo, kind string, onComplete string) *progressBar { + // shortDigestLen is the length of the digest used for blobs. + const shortDigestLen = 12 + + prefix := fmt.Sprintf("Copying %s %s", kind, info.Digest.Encoded()) + // Truncate the prefix (chopping of some part of the digest) to make all progress bars aligned in a column. + maxPrefixLen := len("Copying blob ") + shortDigestLen + if len(prefix) > maxPrefixLen { + prefix = prefix[:maxPrefixLen] + } + + // onComplete will replace prefix once the bar/spinner has completed + onComplete = prefix + " " + onComplete + + // Use a normal progress bar when we know the size (i.e., size > 0). + // Otherwise, use a spinner to indicate that something's happening. + var bar *mpb.Bar + if info.Size > 0 { + if partial { + bar = pool.AddBar(info.Size, + mpb.BarFillerClearOnComplete(), + mpb.PrependDecorators( + decor.OnComplete(decor.Name(prefix), onComplete), + ), + mpb.AppendDecorators( + decor.Any(customPartialBlobDecorFunc), + ), + ) + } else { + bar = pool.AddBar(info.Size, + mpb.BarFillerClearOnComplete(), + mpb.PrependDecorators( + decor.OnComplete(decor.Name(prefix), onComplete), + ), + mpb.AppendDecorators( + decor.OnComplete(decor.CountersKibiByte("%.1f / %.1f"), ""), + ), + ) + } + } else { + bar = pool.New(0, + mpb.SpinnerStyle(".", "..", "...", "....", "").PositionLeft(), + mpb.BarFillerClearOnComplete(), + mpb.PrependDecorators( + decor.OnComplete(decor.Name(prefix), onComplete), + ), + ) + } + if c.progressOutput == io.Discard { + c.Printf("Copying %s %s\n", kind, info.Digest) + } + return &progressBar{ + Bar: bar, + originalSize: info.Size, + } +} + +// mark100PercentComplete marks the progres bars as 100% complete; +// it may do so by possibly advancing the current state if it is below the known total. +func (bar *progressBar) mark100PercentComplete() { + if bar.originalSize > 0 { + // We can't call bar.SetTotal even if we wanted to; the total can not be changed + // after a progress bar is created with a definite total. + bar.SetCurrent(bar.originalSize) // This triggers the completion condition. + } else { + // -1 = unknown size + // 0 is somewhat of a a special case: Unlike c/image, where 0 is a definite known + // size (possible at least in theory), in mpb, zero-sized progress bars are treated + // as unknown size, in particular they are not configured to be marked as + // complete on bar.Current() reaching bar.total (because that would happen already + // when creating the progress bar). + // That means that we are both _allowed_ to call SetTotal, and we _have to_. + bar.SetTotal(-1, true) // total < 0 = set it to bar.Current(), report it; and mark the bar as complete. + } +} + +// blobChunkAccessorProxy wraps a BlobChunkAccessor and updates a *progressBar +// with the number of received bytes. +type blobChunkAccessorProxy struct { + wrapped private.BlobChunkAccessor // The underlying BlobChunkAccessor + bar *progressBar // A progress bar updated with the number of bytes read so far +} + +// GetBlobAt returns a sequential channel of readers that contain data for the requested +// blob chunks, and a channel that might get a single error value. +// The specified chunks must be not overlapping and sorted by their offset. +// The readers must be fully consumed, in the order they are returned, before blocking +// to read the next chunk. +func (s *blobChunkAccessorProxy) GetBlobAt(ctx context.Context, info types.BlobInfo, chunks []private.ImageSourceChunk) (chan io.ReadCloser, chan error, error) { + rc, errs, err := s.wrapped.GetBlobAt(ctx, info, chunks) + if err == nil { + total := int64(0) + for _, c := range chunks { + total += int64(c.Length) + } + s.bar.IncrInt64(total) + } + return rc, errs, err +} diff --git a/vendor/github.com/containers/image/v5/copy/progress_reader.go b/vendor/github.com/containers/image/v5/copy/progress_channel.go index de23cec1b..d5e9e09bd 100644 --- a/vendor/github.com/containers/image/v5/copy/progress_reader.go +++ b/vendor/github.com/containers/image/v5/copy/progress_channel.go @@ -1,16 +1,13 @@ package copy import ( - "context" "io" "time" - "github.com/containers/image/v5/internal/private" "github.com/containers/image/v5/types" - "github.com/vbauerster/mpb/v7" ) -// progressReader is a reader that reports its progress on an interval. +// progressReader is a reader that reports its progress to a types.ProgressProperties channel on an interval. type progressReader struct { source io.Reader channel chan<- types.ProgressProperties @@ -80,27 +77,3 @@ func (r *progressReader) Read(p []byte) (int, error) { } return n, err } - -// blobChunkAccessorProxy wraps a BlobChunkAccessor and keeps track of how many bytes -// are received. -type blobChunkAccessorProxy struct { - wrapped private.BlobChunkAccessor // The underlying BlobChunkAccessor - bar *mpb.Bar // A progress bar updated with the number of bytes read so far -} - -// GetBlobAt returns a sequential channel of readers that contain data for the requested -// blob chunks, and a channel that might get a single error value. -// The specified chunks must be not overlapping and sorted by their offset. -// The readers must be fully consumed, in the order they are returned, before blocking -// to read the next chunk. -func (s *blobChunkAccessorProxy) GetBlobAt(ctx context.Context, info types.BlobInfo, chunks []private.ImageSourceChunk) (chan io.ReadCloser, chan error, error) { - rc, errs, err := s.wrapped.GetBlobAt(ctx, info, chunks) - if err == nil { - total := int64(0) - for _, c := range chunks { - total += int64(c.Length) - } - s.bar.IncrInt64(total) - } - return rc, errs, err -} diff --git a/vendor/github.com/containers/image/v5/version/version.go b/vendor/github.com/containers/image/v5/version/version.go index 9447d53c4..6295b5493 100644 --- a/vendor/github.com/containers/image/v5/version/version.go +++ b/vendor/github.com/containers/image/v5/version/version.go @@ -11,7 +11,7 @@ const ( VersionPatch = 1 // VersionDev indicates development branch. Releases will be empty string. - VersionDev = "-dev" + VersionDev = "" ) // Version is the specification version that the package types support. diff --git a/vendor/github.com/containers/ocicrypt/MAINTAINERS b/vendor/github.com/containers/ocicrypt/MAINTAINERS index e6a7d1f0a..af38d03bf 100644 --- a/vendor/github.com/containers/ocicrypt/MAINTAINERS +++ b/vendor/github.com/containers/ocicrypt/MAINTAINERS @@ -3,3 +3,4 @@ # Github ID, Name, Email Address lumjjb, Brandon Lum, lumjjb@gmail.com stefanberger, Stefan Berger, stefanb@linux.ibm.com +arronwy, Arron Wang, arron.wang@intel.com diff --git a/vendor/github.com/containers/ocicrypt/crypto/pkcs11/pkcs11helpers.go b/vendor/github.com/containers/ocicrypt/crypto/pkcs11/pkcs11helpers.go index 448e88c7c..7d80f5f84 100644 --- a/vendor/github.com/containers/ocicrypt/crypto/pkcs11/pkcs11helpers.go +++ b/vendor/github.com/containers/ocicrypt/crypto/pkcs11/pkcs11helpers.go @@ -40,8 +40,6 @@ import ( var ( // OAEPLabel defines the label we use for OAEP encryption; this cannot be changed OAEPLabel = []byte("") - // OAEPDefaultHash defines the default hash used for OAEP encryption; this cannot be changed - OAEPDefaultHash = "sha1" // OAEPSha1Params describes the OAEP parameters with sha1 hash algorithm; needed by SoftHSM OAEPSha1Params = &pkcs11.OAEPParams{ @@ -69,12 +67,12 @@ func rsaPublicEncryptOAEP(pubKey *rsa.PublicKey, plaintext []byte) ([]byte, stri ) oaephash := os.Getenv("OCICRYPT_OAEP_HASHALG") - // The default is 'sha1' + // The default is sha256 (previously was sha1) switch strings.ToLower(oaephash) { - case "sha1", "": + case "sha1": hashfunc = sha1.New() hashalg = "sha1" - case "sha256": + case "sha256", "": hashfunc = sha256.New() hashalg = "sha256" default: @@ -283,12 +281,12 @@ func publicEncryptOAEP(pubKey *Pkcs11KeyFileObject, plaintext []byte) ([]byte, s var oaep *pkcs11.OAEPParams oaephash := os.Getenv("OCICRYPT_OAEP_HASHALG") - // the default is sha1 + // The default is sha256 (previously was sha1) switch strings.ToLower(oaephash) { - case "sha1", "": + case "sha1": oaep = OAEPSha1Params hashalg = "sha1" - case "sha256": + case "sha256", "": oaep = OAEPSha256Params hashalg = "sha256" default: @@ -333,7 +331,7 @@ func privateDecryptOAEP(privKeyObj *Pkcs11KeyFileObject, ciphertext []byte, hash var oaep *pkcs11.OAEPParams - // the default is sha1 + // An empty string from the Hash in the JSON historically defaults to sha1. switch hashalg { case "sha1", "": oaep = OAEPSha1Params @@ -410,9 +408,6 @@ func EncryptMultiple(pubKeys []interface{}, data []byte) ([]byte, error) { return nil, err } - if hashalg == OAEPDefaultHash { - hashalg = "" - } recipient := Pkcs11Recipient{ Version: 0, Blob: base64.StdEncoding.EncodeToString(ciphertext), @@ -431,15 +426,18 @@ func EncryptMultiple(pubKeys []interface{}, data []byte) ([]byte, error) { // { // "version": 0, // "blob": <base64 encoded RSA OAEP encrypted blob>, -// "hash": <hash used for OAEP other than 'sha256'> +// "hash": <hash used for OAEP other than 'sha1'> // } , // { // "version": 0, // "blob": <base64 encoded RSA OAEP encrypted blob>, -// "hash": <hash used for OAEP other than 'sha256'> +// "hash": <hash used for OAEP other than 'sha1'> // } , // [...] // } +// Note: More recent versions of this code explicitly write 'sha1' +// while older versions left it empty in case of 'sha1'. +// func Decrypt(privKeyObjs []*Pkcs11KeyFileObject, pkcs11blobstr []byte) ([]byte, error) { pkcs11blob := Pkcs11Blob{} err := json.Unmarshal(pkcs11blobstr, &pkcs11blob) diff --git a/vendor/github.com/containers/storage/VERSION b/vendor/github.com/containers/storage/VERSION index 79833f2ce..32b7211cb 100644 --- a/vendor/github.com/containers/storage/VERSION +++ b/vendor/github.com/containers/storage/VERSION @@ -1 +1 @@ -1.39.0+dev +1.40.0 diff --git a/vendor/github.com/containers/storage/drivers/chown_unix.go b/vendor/github.com/containers/storage/drivers/chown_unix.go index 3c508b66b..c598b936d 100644 --- a/vendor/github.com/containers/storage/drivers/chown_unix.go +++ b/vendor/github.com/containers/storage/drivers/chown_unix.go @@ -76,7 +76,7 @@ func (c *platformChowner) LChown(path string, info os.FileInfo, toHost, toContai UID: uid, GID: gid, } - mappedPair, err := toHost.ToHost(pair) + mappedPair, err := toHost.ToHostOverflow(pair) if err != nil { return fmt.Errorf("error mapping container ID pair %#v for %q to host: %v", pair, path, err) } diff --git a/vendor/github.com/containers/storage/drivers/driver_freebsd.go b/vendor/github.com/containers/storage/drivers/driver_freebsd.go index 79a591288..143cccf92 100644 --- a/vendor/github.com/containers/storage/drivers/driver_freebsd.go +++ b/vendor/github.com/containers/storage/drivers/driver_freebsd.go @@ -1,7 +1,6 @@ package graphdriver import ( - "fmt" "golang.org/x/sys/unix" "github.com/containers/storage/pkg/mount" diff --git a/vendor/github.com/containers/storage/drivers/overlay/overlay.go b/vendor/github.com/containers/storage/drivers/overlay/overlay.go index 550dc7d39..09d24ae8b 100644 --- a/vendor/github.com/containers/storage/drivers/overlay/overlay.go +++ b/vendor/github.com/containers/storage/drivers/overlay/overlay.go @@ -939,6 +939,16 @@ func (d *Driver) create(id, parent string, opts *graphdriver.CreateOpts, disable rootUID = int(st.UID()) rootGID = int(st.GID()) } + + if _, err := system.Lstat(dir); err == nil { + logrus.Warnf("Trying to create a layer %#v while directory %q already exists; removing it first", id, dir) + // Don’t just os.RemoveAll(dir) here; d.Remove also removes the link in linkDir, + // so that we can’t end up with two symlinks in linkDir pointing to the same layer. + if err := d.Remove(id); err != nil { + return errors.Wrapf(err, "removing a pre-existing layer directory %q", dir) + } + } + if err := idtools.MkdirAllAndChownNew(dir, 0700, idPair); err != nil { return err } @@ -1389,7 +1399,7 @@ func (d *Driver) get(id string, disableShifting bool, options graphdriver.MountO } for err == nil { absLowers = append(absLowers, filepath.Join(dir, nameWithSuffix("diff", diffN))) - relLowers = append(relLowers, dumbJoin(string(link), "..", nameWithSuffix("diff", diffN))) + relLowers = append(relLowers, dumbJoin(linkDir, string(link), "..", nameWithSuffix("diff", diffN))) diffN++ st, err = os.Stat(filepath.Join(dir, nameWithSuffix("diff", diffN))) if err == nil && !permsKnown { diff --git a/vendor/github.com/containers/storage/drivers/zfs/zfs_freebsd.go b/vendor/github.com/containers/storage/drivers/zfs/zfs_freebsd.go index fd98ad305..61a2ed871 100644 --- a/vendor/github.com/containers/storage/drivers/zfs/zfs_freebsd.go +++ b/vendor/github.com/containers/storage/drivers/zfs/zfs_freebsd.go @@ -2,7 +2,6 @@ package zfs import ( "fmt" - "strings" "github.com/containers/storage/drivers" "github.com/pkg/errors" @@ -26,19 +25,10 @@ func checkRootdirFs(rootdir string) error { } func getMountpoint(id string) string { - maxlen := 12 - - // we need to preserve filesystem suffix - suffix := strings.SplitN(id, "-", 2) - - if len(suffix) > 1 { - return id[:maxlen] + "-" + suffix[1] - } - - return id[:maxlen] + return id } func detachUnmount(mountpoint string) error { - // FreeBSD doesn't have an equivalent to MNT_DETACH - return unix.Unmount(mountpoint, 0) + // FreeBSD's MNT_FORCE is roughly equivalent to MNT_DETACH + return unix.Unmount(mountpoint, unix.MNT_FORCE) } diff --git a/vendor/github.com/containers/storage/go.mod b/vendor/github.com/containers/storage/go.mod index 1915ea65d..5d8c59960 100644 --- a/vendor/github.com/containers/storage/go.mod +++ b/vendor/github.com/containers/storage/go.mod @@ -12,7 +12,7 @@ require ( github.com/google/go-intervals v0.0.2 github.com/hashicorp/go-multierror v1.1.1 github.com/json-iterator/go v1.1.12 - github.com/klauspost/compress v1.15.1 + github.com/klauspost/compress v1.15.2 github.com/klauspost/pgzip v1.2.5 github.com/mattn/go-shellwords v1.0.12 github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible diff --git a/vendor/github.com/containers/storage/go.sum b/vendor/github.com/containers/storage/go.sum index cd5bf3b97..97a0d167d 100644 --- a/vendor/github.com/containers/storage/go.sum +++ b/vendor/github.com/containers/storage/go.sum @@ -424,8 +424,9 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A= github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= +github.com/klauspost/compress v1.15.2 h1:3WH+AG7s2+T8o3nrM/8u2rdqUEcQhmga7smjrT41nAw= +github.com/klauspost/compress v1.15.2/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= diff --git a/vendor/github.com/containers/storage/layers.go b/vendor/github.com/containers/storage/layers.go index 34d27ffa3..bba8d7588 100644 --- a/vendor/github.com/containers/storage/layers.go +++ b/vendor/github.com/containers/storage/layers.go @@ -692,11 +692,10 @@ func (r *layerStore) PutAdditionalLayer(id string, parentLayer *Layer, names []s return copyLayer(layer), nil } -func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, flags map[string]interface{}, diff io.Reader) (layer *Layer, size int64, err error) { +func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLabel string, options map[string]string, moreOptions *LayerOptions, writeable bool, flags map[string]interface{}, diff io.Reader) (*Layer, int64, error) { if !r.IsReadWrite() { return nil, -1, errors.Wrapf(ErrStoreIsReadOnly, "not allowed to create new layers at %q", r.layerspath()) } - size = -1 if err := os.MkdirAll(r.rundir, 0700); err != nil { return nil, -1, err } @@ -762,6 +761,60 @@ func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLab if mountLabel != "" { label.ReserveLabel(mountLabel) } + + // Before actually creating the layer, make a persistent record of it with incompleteFlag, + // so that future processes have a chance to delete it. + layer := &Layer{ + ID: id, + Parent: parent, + Names: names, + MountLabel: mountLabel, + Metadata: templateMetadata, + Created: time.Now().UTC(), + CompressedDigest: templateCompressedDigest, + CompressedSize: templateCompressedSize, + UncompressedDigest: templateUncompressedDigest, + UncompressedSize: templateUncompressedSize, + CompressionType: templateCompressionType, + UIDs: templateUIDs, + GIDs: templateGIDs, + Flags: make(map[string]interface{}), + UIDMap: copyIDMap(moreOptions.UIDMap), + GIDMap: copyIDMap(moreOptions.GIDMap), + BigDataNames: []string{}, + } + r.layers = append(r.layers, layer) + r.idindex.Add(id) + r.byid[id] = layer + for _, name := range names { + r.byname[name] = layer + } + for flag, value := range flags { + layer.Flags[flag] = value + } + layer.Flags[incompleteFlag] = true + + succeeded := false + cleanupFailureContext := "" + defer func() { + if !succeeded { + // On any error, try both removing the driver's data as well + // as the in-memory layer record. + if err2 := r.Delete(layer.ID); err2 != nil { + if cleanupFailureContext == "" { + cleanupFailureContext = "unknown: cleanupFailureContext not set at the failure site" + } + logrus.Errorf("While recovering from a failure (%s), error deleting layer %#v: %v", cleanupFailureContext, layer.ID, err2) + } + } + }() + + err := r.Save() + if err != nil { + cleanupFailureContext = "saving incomplete layer metadata" + return nil, -1, err + } + idMappings := idtools.NewIDMappingsFromMaps(moreOptions.UIDMap, moreOptions.GIDMap) opts := drivers.CreateOpts{ MountLabel: mountLabel, @@ -769,132 +822,67 @@ func (r *layerStore) Put(id string, parentLayer *Layer, names []string, mountLab IDMappings: idMappings, } if moreOptions.TemplateLayer != "" { - if err = r.driver.CreateFromTemplate(id, moreOptions.TemplateLayer, templateIDMappings, parent, parentMappings, &opts, writeable); err != nil { + if err := r.driver.CreateFromTemplate(id, moreOptions.TemplateLayer, templateIDMappings, parent, parentMappings, &opts, writeable); err != nil { + cleanupFailureContext = "creating a layer from template" return nil, -1, errors.Wrapf(err, "error creating copy of template layer %q with ID %q", moreOptions.TemplateLayer, id) } oldMappings = templateIDMappings } else { if writeable { - if err = r.driver.CreateReadWrite(id, parent, &opts); err != nil { + if err := r.driver.CreateReadWrite(id, parent, &opts); err != nil { + cleanupFailureContext = "creating a read-write layer" return nil, -1, errors.Wrapf(err, "error creating read-write layer with ID %q", id) } } else { - if err = r.driver.Create(id, parent, &opts); err != nil { + if err := r.driver.Create(id, parent, &opts); err != nil { + cleanupFailureContext = "creating a read-only layer" return nil, -1, errors.Wrapf(err, "error creating layer with ID %q", id) } } oldMappings = parentMappings } if !reflect.DeepEqual(oldMappings.UIDs(), idMappings.UIDs()) || !reflect.DeepEqual(oldMappings.GIDs(), idMappings.GIDs()) { - if err = r.driver.UpdateLayerIDMap(id, oldMappings, idMappings, mountLabel); err != nil { - // We don't have a record of this layer, but at least - // try to clean it up underneath us. - if err2 := r.driver.Remove(id); err2 != nil { - logrus.Errorf("While recovering from a failure creating in UpdateLayerIDMap, error deleting layer %#v: %v", id, err2) - } + if err := r.driver.UpdateLayerIDMap(id, oldMappings, idMappings, mountLabel); err != nil { + cleanupFailureContext = "in UpdateLayerIDMap" return nil, -1, err } } if len(templateTSdata) > 0 { if err := os.MkdirAll(filepath.Dir(r.tspath(id)), 0o700); err != nil { - // We don't have a record of this layer, but at least - // try to clean it up underneath us. - if err2 := r.driver.Remove(id); err2 != nil { - logrus.Errorf("While recovering from a failure creating in UpdateLayerIDMap, error deleting layer %#v: %v", id, err2) - } + cleanupFailureContext = "creating tar-split parent directory for a copy from template" return nil, -1, err } - if err = ioutils.AtomicWriteFile(r.tspath(id), templateTSdata, 0o600); err != nil { - // We don't have a record of this layer, but at least - // try to clean it up underneath us. - if err2 := r.driver.Remove(id); err2 != nil { - logrus.Errorf("While recovering from a failure creating in UpdateLayerIDMap, error deleting layer %#v: %v", id, err2) - } + if err := ioutils.AtomicWriteFile(r.tspath(id), templateTSdata, 0o600); err != nil { + cleanupFailureContext = "creating a tar-split copy from template" return nil, -1, err } } - if err == nil { - layer = &Layer{ - ID: id, - Parent: parent, - Names: names, - MountLabel: mountLabel, - Metadata: templateMetadata, - Created: time.Now().UTC(), - CompressedDigest: templateCompressedDigest, - CompressedSize: templateCompressedSize, - UncompressedDigest: templateUncompressedDigest, - UncompressedSize: templateUncompressedSize, - CompressionType: templateCompressionType, - UIDs: templateUIDs, - GIDs: templateGIDs, - Flags: make(map[string]interface{}), - UIDMap: copyIDMap(moreOptions.UIDMap), - GIDMap: copyIDMap(moreOptions.GIDMap), - BigDataNames: []string{}, - } - r.layers = append(r.layers, layer) - r.idindex.Add(id) - r.byid[id] = layer - for _, name := range names { - r.byname[name] = layer - } - for flag, value := range flags { - layer.Flags[flag] = value - } - savedIncompleteLayer := false - if diff != nil { - layer.Flags[incompleteFlag] = true - err = r.Save() - if err != nil { - // We don't have a record of this layer, but at least - // try to clean it up underneath us. - if err2 := r.driver.Remove(id); err2 != nil { - logrus.Errorf("While recovering from a failure saving incomplete layer metadata, error deleting layer %#v: %v", id, err2) - } - return nil, -1, err - } - savedIncompleteLayer = true - size, err = r.applyDiffWithOptions(layer.ID, moreOptions, diff) - if err != nil { - if err2 := r.Delete(layer.ID); err2 != nil { - // Either a driver error or an error saving. - // We now have a layer that's been marked for - // deletion but which we failed to remove. - logrus.Errorf("While recovering from a failure applying layer diff, error deleting layer %#v: %v", layer.ID, err2) - } - return nil, -1, err - } - delete(layer.Flags, incompleteFlag) - } else { - // applyDiffWithOptions in the `diff != nil` case handles this bit for us - if layer.CompressedDigest != "" { - r.bycompressedsum[layer.CompressedDigest] = append(r.bycompressedsum[layer.CompressedDigest], layer.ID) - } - if layer.UncompressedDigest != "" { - r.byuncompressedsum[layer.UncompressedDigest] = append(r.byuncompressedsum[layer.UncompressedDigest], layer.ID) - } - } - err = r.Save() + + var size int64 = -1 + if diff != nil { + size, err = r.applyDiffWithOptions(layer.ID, moreOptions, diff) if err != nil { - if savedIncompleteLayer { - if err2 := r.Delete(layer.ID); err2 != nil { - // Either a driver error or an error saving. - // We now have a layer that's been marked for - // deletion but which we failed to remove. - logrus.Errorf("While recovering from a failure saving finished layer metadata, error deleting layer %#v: %v", layer.ID, err2) - } - } else { - // We don't have a record of this layer, but at least - // try to clean it up underneath us. - if err2 := r.driver.Remove(id); err2 != nil { - logrus.Errorf("While recovering from a failure saving finished layer metadata, error deleting layer %#v in graph driver: %v", id, err2) - } - } + cleanupFailureContext = "applying layer diff" return nil, -1, err } - layer = copyLayer(layer) + } else { + // applyDiffWithOptions in the `diff != nil` case handles this bit for us + if layer.CompressedDigest != "" { + r.bycompressedsum[layer.CompressedDigest] = append(r.bycompressedsum[layer.CompressedDigest], layer.ID) + } + if layer.UncompressedDigest != "" { + r.byuncompressedsum[layer.UncompressedDigest] = append(r.byuncompressedsum[layer.UncompressedDigest], layer.ID) + } } + delete(layer.Flags, incompleteFlag) + err = r.Save() + if err != nil { + cleanupFailureContext = "saving finished layer metadata" + return nil, -1, err + } + + layer = copyLayer(layer) + succeeded = true return layer, size, err } diff --git a/vendor/github.com/containers/storage/pkg/idtools/idtools.go b/vendor/github.com/containers/storage/pkg/idtools/idtools.go index 7c8f4d10c..7a8fec0ce 100644 --- a/vendor/github.com/containers/storage/pkg/idtools/idtools.go +++ b/vendor/github.com/containers/storage/pkg/idtools/idtools.go @@ -3,15 +3,18 @@ package idtools import ( "bufio" "fmt" + "io/ioutil" "os" "os/user" "sort" "strconv" "strings" + "sync" "syscall" "github.com/containers/storage/pkg/system" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // IDMap contains a single entry for user namespace range remapping. An array @@ -203,6 +206,67 @@ func (i *IDMappings) ToHost(pair IDPair) (IDPair, error) { return target, err } +var ( + overflowUIDOnce sync.Once + overflowGIDOnce sync.Once + overflowUID int + overflowGID int +) + +// getOverflowUID returns the UID mapped to the overflow user +func getOverflowUID() int { + overflowUIDOnce.Do(func() { + // 65534 is the value on older kernels where /proc/sys/kernel/overflowuid is not present + overflowUID = 65534 + if content, err := ioutil.ReadFile("/proc/sys/kernel/overflowuid"); err == nil { + if tmp, err := strconv.Atoi(string(content)); err == nil { + overflowUID = tmp + } + } + }) + return overflowUID +} + +// getOverflowUID returns the GID mapped to the overflow user +func getOverflowGID() int { + overflowGIDOnce.Do(func() { + // 65534 is the value on older kernels where /proc/sys/kernel/overflowgid is not present + overflowGID = 65534 + if content, err := ioutil.ReadFile("/proc/sys/kernel/overflowgid"); err == nil { + if tmp, err := strconv.Atoi(string(content)); err == nil { + overflowGID = tmp + } + } + }) + return overflowGID +} + +// ToHost returns the host UID and GID for the container uid, gid. +// Remapping is only performed if the ids aren't already the remapped root ids +// If the mapping is not possible because the target ID is not mapped into +// the namespace, then the overflow ID is used. +func (i *IDMappings) ToHostOverflow(pair IDPair) (IDPair, error) { + var err error + target := i.RootPair() + + if pair.UID != target.UID { + target.UID, err = RawToHost(pair.UID, i.uids) + if err != nil { + target.UID = getOverflowUID() + logrus.Debugf("Failed to map UID %v to the target mapping, using the overflow ID %v", pair.UID, target.UID) + } + } + + if pair.GID != target.GID { + target.GID, err = RawToHost(pair.GID, i.gids) + if err != nil { + target.GID = getOverflowGID() + logrus.Debugf("Failed to map GID %v to the target mapping, using the overflow ID %v", pair.GID, target.GID) + } + } + return target, nil +} + // ToContainer returns the container UID and GID for the host uid and gid func (i *IDMappings) ToContainer(pair IDPair) (int, int, error) { uid, err := RawToContainer(pair.UID, i.uids) diff --git a/vendor/github.com/containers/storage/pkg/mount/flags_freebsd.go b/vendor/github.com/containers/storage/pkg/mount/flags_freebsd.go new file mode 100644 index 000000000..3ba99cf93 --- /dev/null +++ b/vendor/github.com/containers/storage/pkg/mount/flags_freebsd.go @@ -0,0 +1,48 @@ +package mount + +import ( + "golang.org/x/sys/unix" +) + +const ( + // RDONLY will mount the file system read-only. + RDONLY = unix.MNT_RDONLY + + // NOSUID will not allow set-user-identifier or set-group-identifier bits to + // take effect. + NOSUID = unix.MNT_NOSUID + + // NOEXEC will not allow execution of any binaries on the mounted file system. + NOEXEC = unix.MNT_NOEXEC + + // SYNCHRONOUS will allow I/O to the file system to be done synchronously. + SYNCHRONOUS = unix.MNT_SYNCHRONOUS + + // REMOUNT will attempt to remount an already-mounted file system. This is + // commonly used to change the mount flags for a file system, especially to + // make a readonly file system writeable. It does not change device or mount + // point. + REMOUNT = unix.MNT_UPDATE + + // NOATIME will not update the file access time when reading from a file. + NOATIME = unix.MNT_NOATIME + + mntDetach = unix.MNT_FORCE + + NODIRATIME = 0 + NODEV = 0 + DIRSYNC = 0 + MANDLOCK = 0 + BIND = 0 + RBIND = 0 + UNBINDABLE = 0 + RUNBINDABLE = 0 + PRIVATE = 0 + RPRIVATE = 0 + SLAVE = 0 + RSLAVE = 0 + SHARED = 0 + RSHARED = 0 + RELATIME = 0 + STRICTATIME = 0 +) diff --git a/vendor/github.com/containers/storage/pkg/mount/flags_unsupported.go b/vendor/github.com/containers/storage/pkg/mount/flags_unsupported.go index 9afd26d4c..ee0f593a5 100644 --- a/vendor/github.com/containers/storage/pkg/mount/flags_unsupported.go +++ b/vendor/github.com/containers/storage/pkg/mount/flags_unsupported.go @@ -1,4 +1,5 @@ -// +build !linux +//go:build !linux && !freebsd +// +build !linux,!freebsd package mount diff --git a/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go b/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go index b31cf99d0..72ceec3dd 100644 --- a/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go +++ b/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go @@ -28,14 +28,25 @@ func allocateIOVecs(options []string) []C.struct_iovec { func mount(device, target, mType string, flag uintptr, data string) error { isNullFS := false - xs := strings.Split(data, ",") - for _, x := range xs { - if x == "bind" { - isNullFS = true + options := []string{"fspath", target} + + if data != "" { + xs := strings.Split(data, ",") + for _, x := range xs { + if x == "bind" { + isNullFS = true + continue + } + opt := strings.SplitN(x, "=", 2) + options = append(options, opt[0]) + if len(opt) == 2 { + options = append(options, opt[1]) + } else { + options = append(options, "") + } } } - options := []string{"fspath", target} if isNullFS { options = append(options, "fstype", "nullfs", "target", device) } else { diff --git a/vendor/github.com/containers/storage/pkg/reexec/command_freebsd.go b/vendor/github.com/containers/storage/pkg/reexec/command_freebsd.go new file mode 100644 index 000000000..6f63ae991 --- /dev/null +++ b/vendor/github.com/containers/storage/pkg/reexec/command_freebsd.go @@ -0,0 +1,37 @@ +// +build freebsd + +package reexec + +import ( + "context" + "os" + "os/exec" + + "golang.org/x/sys/unix" +) + +// Self returns the path to the current process's binary. +// Uses sysctl. +func Self() string { + path, err := unix.SysctlArgs("kern.proc.pathname", -1) + if err == nil { + return path + } + return os.Args[0] +} + +// Command returns *exec.Cmd which has Path as current binary. +// For example if current binary is "docker" at "/usr/bin/", then cmd.Path will +// be set to "/usr/bin/docker". +func Command(args ...string) *exec.Cmd { + cmd := exec.Command(Self()) + cmd.Args = args + return cmd +} + +// CommandContext returns *exec.Cmd which has Path as current binary. +func CommandContext(ctx context.Context, args ...string) *exec.Cmd { + cmd := exec.CommandContext(ctx, Self()) + cmd.Args = args + return cmd +} diff --git a/vendor/github.com/containers/storage/pkg/reexec/command_unix.go b/vendor/github.com/containers/storage/pkg/reexec/command_unix.go index 9dd8cb9bb..a56ada216 100644 --- a/vendor/github.com/containers/storage/pkg/reexec/command_unix.go +++ b/vendor/github.com/containers/storage/pkg/reexec/command_unix.go @@ -1,4 +1,5 @@ -// +build freebsd solaris darwin +//go:build solaris || darwin +// +build solaris darwin package reexec diff --git a/vendor/github.com/klauspost/compress/.gitignore b/vendor/github.com/klauspost/compress/.gitignore index b35f8449b..d31b37815 100644 --- a/vendor/github.com/klauspost/compress/.gitignore +++ b/vendor/github.com/klauspost/compress/.gitignore @@ -23,3 +23,10 @@ _testmain.go *.test *.prof /s2/cmd/_s2sx/sfx-exe + +# Linux perf files +perf.data +perf.data.old + +# gdb history +.gdb_history diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 0e2dc116a..5b7cf781a 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -17,6 +17,13 @@ This package provides various compression algorithms. # changelog
+* Mar 11, 2022 (v1.15.1)
+ * huff0: Add x86 assembly of Decode4X by @WojciechMula in [#512](https://github.com/klauspost/compress/pull/512)
+ * zstd: Reuse zip decoders in [#514](https://github.com/klauspost/compress/pull/514)
+ * zstd: Detect extra block data and report as corrupted in [#520](https://github.com/klauspost/compress/pull/520)
+ * zstd: Handle zero sized frame content size stricter in [#521](https://github.com/klauspost/compress/pull/521)
+ * zstd: Add stricter block size checks in [#523](https://github.com/klauspost/compress/pull/523)
+
* Mar 3, 2022 (v1.15.0)
* zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498)
* zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505)
diff --git a/vendor/github.com/klauspost/compress/go.mod b/vendor/github.com/klauspost/compress/go.mod index 5aa64a436..b605e2d52 100644 --- a/vendor/github.com/klauspost/compress/go.mod +++ b/vendor/github.com/klauspost/compress/go.mod @@ -1,3 +1,3 @@ module github.com/klauspost/compress -go 1.15 +go 1.16 diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go index d47f6644f..ce8e93bcd 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go @@ -12,14 +12,14 @@ import ( // decompress4x_main_loop_x86 is an x86 assembler implementation // of Decompress4X when tablelog > 8. -// go:noescape +//go:noescape func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, peekBits uint8, buf *byte, tbl *dEntrySingle) uint8 // decompress4x_8b_loop_x86 is an x86 assembler implementation // of Decompress4X when tablelog <= 8 which decodes 4 entries // per loop. -// go:noescape +//go:noescape func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, peekBits uint8, buf *byte, tbl *dEntrySingle) uint8 diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go new file mode 100644 index 000000000..3954c5121 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo.go @@ -0,0 +1,34 @@ +// Package cpuinfo gives runtime info about the current CPU. +// +// This is a very limited module meant for use internally +// in this project. For more versatile solution check +// https://github.com/klauspost/cpuid. +package cpuinfo + +// HasBMI1 checks whether an x86 CPU supports the BMI1 extension. +func HasBMI1() bool { + return hasBMI1 +} + +// HasBMI2 checks whether an x86 CPU supports the BMI2 extension. +func HasBMI2() bool { + return hasBMI2 +} + +// DisableBMI2 will disable BMI2, for testing purposes. +// Call returned function to restore previous state. +func DisableBMI2() func() { + old := hasBMI2 + hasBMI2 = false + return func() { + hasBMI2 = old + } +} + +// HasBMI checks whether an x86 CPU supports both BMI1 and BMI2 extensions. +func HasBMI() bool { + return HasBMI1() && HasBMI2() +} + +var hasBMI1 bool +var hasBMI2 bool diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go new file mode 100644 index 000000000..e802579c4 --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go @@ -0,0 +1,11 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +package cpuinfo + +// go:noescape +func x86extensions() (bmi1, bmi2 bool) + +func init() { + hasBMI1, hasBMI2 = x86extensions() +} diff --git a/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s new file mode 100644 index 000000000..4465fbe9e --- /dev/null +++ b/vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s @@ -0,0 +1,36 @@ +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" +#include "funcdata.h" +#include "go_asm.h" + +TEXT ·x86extensions(SB), NOSPLIT, $0 + // 1. determine max EAX value + XORQ AX, AX + CPUID + + CMPQ AX, $7 + JB unsupported + + // 2. EAX = 7, ECX = 0 --- see Table 3-8 "Information Returned by CPUID Instruction" + MOVQ $7, AX + MOVQ $0, CX + CPUID + + BTQ $3, BX // bit 3 = BMI1 + SETCS AL + + BTQ $8, BX // bit 8 = BMI2 + SETCS AH + + MOVB AL, bmi1+0(FP) + MOVB AH, bmi2+1(FP) + RET + +unsupported: + XORQ AX, AX + MOVB AL, bmi1+0(FP) + MOVB AL, bmi2+1(FP) + RET diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md index e3445ac19..beb7fa872 100644 --- a/vendor/github.com/klauspost/compress/zstd/README.md +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -386,47 +386,31 @@ In practice this means that concurrency is often limited to utilizing about 3 co ### Benchmarks -These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd). - The first two are streaming decodes and the last are smaller inputs. - + +Running on AMD Ryzen 9 3950X 16-Core Processor. AMD64 assembly used. + ``` -BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op -BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op - -BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op -BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op - -Concurrent performance: - -BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op - -BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op -BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op +BenchmarkDecoderSilesia-32 5 206878840 ns/op 1024.50 MB/s 49808 B/op 43 allocs/op +BenchmarkDecoderEnwik9-32 1 1271809000 ns/op 786.28 MB/s 72048 B/op 52 allocs/op + +Concurrent blocks, performance: + +BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 67356 17857 ns/op 10321.96 MB/s 22.48 pct 102 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 266656 4421 ns/op 26823.21 MB/s 11.89 pct 19 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 20992 56842 ns/op 8477.17 MB/s 39.90 pct 754 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 27456 43932 ns/op 9714.01 MB/s 33.27 pct 524 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 78432 15047 ns/op 8319.15 MB/s 40.34 pct 66 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 65800 18436 ns/op 8249.63 MB/s 37.75 pct 88 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 102993 11523 ns/op 35546.09 MB/s 3.637 pct 143 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1000000 1070 ns/op 95720.98 MB/s 80.53 pct 3 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 749802 1752 ns/op 70272.35 MB/s 100.0 pct 5 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 22640 52934 ns/op 13263.37 MB/s 26.25 pct 1014 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/html.zst-32 226412 5232 ns/op 19572.27 MB/s 14.49 pct 20 B/op 0 allocs/op +BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 923041 1276 ns/op 3194.71 MB/s 31.26 pct 0 B/op 0 allocs/op ``` -This reflects the performance around May 2020, but this may be out of date. +This reflects the performance around May 2022, but this may be out of date. ## Zstd inside ZIP files diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go index 7d567a54a..b2bca3301 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockdec.go +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -5,9 +5,14 @@ package zstd import ( + "bytes" + "encoding/binary" "errors" "fmt" "io" + "io/ioutil" + "os" + "path/filepath" "sync" "github.com/klauspost/compress/huff0" @@ -38,6 +43,9 @@ const ( // maxCompressedBlockSize is the biggest allowed compressed block size (128KB) maxCompressedBlockSize = 128 << 10 + compressedBlockOverAlloc = 16 + maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc + // Maximum possible block size (all Raw+Uncompressed). maxBlockSize = (1 << 21) - 1 @@ -136,7 +144,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { b.Type = blockType((bh >> 1) & 3) // find size. cSize := int(bh >> 3) - maxSize := maxBlockSize + maxSize := maxCompressedBlockSizeAlloc switch b.Type { case blockTypeReserved: return ErrReservedBlockType @@ -157,9 +165,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { println("Data size on stream:", cSize) } b.RLESize = 0 - maxSize = maxCompressedBlockSize + maxSize = maxCompressedBlockSizeAlloc if windowSize < maxCompressedBlockSize && b.lowMem { - maxSize = int(windowSize) + maxSize = int(windowSize) + compressedBlockOverAlloc } if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize { if debugDecoder { @@ -190,9 +198,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { // Read block data. if cap(b.dataStorage) < cSize { if b.lowMem || cSize > maxCompressedBlockSize { - b.dataStorage = make([]byte, 0, cSize) + b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc) } else { - b.dataStorage = make([]byte, 0, maxCompressedBlockSize) + b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc) } } if cap(b.dst) <= maxSize { @@ -486,10 +494,15 @@ func (b *blockDec) decodeCompressed(hist *history) error { b.dst = append(b.dst, hist.decoders.literals...) return nil } - err = hist.decoders.decodeSync(hist) + before := len(hist.decoders.out) + err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:]) if err != nil { return err } + if hist.decoders.maxSyncLen > 0 { + hist.decoders.maxSyncLen += uint64(before) + hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out)) + } b.dst = hist.decoders.out hist.recentOffsets = hist.decoders.prevOffset return nil @@ -632,6 +645,22 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) { println("initializing sequences:", err) return err } + // Extract blocks... + if false && hist.dict == nil { + fatalErr := func(err error) { + if err != nil { + panic(err) + } + } + fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize) + var buf bytes.Buffer + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse)) + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse)) + fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse)) + buf.Write(in) + ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm) + } + return nil } @@ -650,6 +679,7 @@ func (b *blockDec) decodeSequences(hist *history) error { } hist.decoders.windowSize = hist.windowSize hist.decoders.prevOffset = hist.recentOffsets + err := hist.decoders.decode(b.sequence) hist.recentOffsets = hist.decoders.prevOffset return err diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 9fcdaac1d..c65ea9795 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -347,18 +347,20 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { } frame.history.setDict(&dict) } - - if frame.FrameContentSize != fcsUnknown && frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) { - return dst, ErrDecoderSizeExceeded + if frame.WindowSize > d.o.maxWindowSize { + return dst, ErrWindowSizeExceeded } - if frame.FrameContentSize < 1<<30 { - // Never preallocate more than 1 GB up front. + if frame.FrameContentSize != fcsUnknown { + if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) { + return dst, ErrDecoderSizeExceeded + } if cap(dst)-len(dst) < int(frame.FrameContentSize) { - dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)) + dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc) copy(dst2, dst) dst = dst2 } } + if cap(dst) == 0 { // Allocate len(input) * 2 by default if nothing is provided // and we didn't get frame content size. diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go index fd05c9bb0..fc52ebc40 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -31,7 +31,7 @@ func (o *decoderOptions) setDefault() { if o.concurrent > 4 { o.concurrent = 4 } - o.maxDecodedSize = 1 << 63 + o.maxDecodedSize = 64 << 30 } // WithDecoderLowmem will set whether to use a lower amount of memory, @@ -66,7 +66,7 @@ func WithDecoderConcurrency(n int) DOption { // WithDecoderMaxMemory allows to set a maximum decoded size for in-memory // non-streaming operations or maximum window size for streaming operations. // This can be used to control memory usage of potentially hostile content. -// Maximum and default is 1 << 63 bytes. +// Maximum is 1 << 63 bytes. Default is 64GiB. func WithDecoderMaxMemory(n uint64) DOption { return func(o *decoderOptions) error { if n == 0 { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 11089d223..509d5cece 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -326,6 +326,19 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { d.history.ignoreBuffer = len(dst) // Store input length, so we only check new data. crcStart := len(dst) + d.history.decoders.maxSyncLen = 0 + if d.FrameContentSize != fcsUnknown { + d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst)) + if d.history.decoders.maxSyncLen > d.o.maxDecodedSize { + return dst, ErrDecoderSizeExceeded + } + if uint64(cap(dst)) < d.history.decoders.maxSyncLen { + // Alloc for output + dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc) + copy(dst2, dst) + dst = dst2 + } + } var err error for { err = dec.reset(d.rawInput, d.WindowSize) diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go index bb3d4fd6c..fde4e6b60 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go @@ -5,8 +5,10 @@ package zstd import ( + "encoding/binary" "errors" "fmt" + "io" ) const ( @@ -182,6 +184,29 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { return s.buildDtable() } +func (s *fseDecoder) mustReadFrom(r io.Reader) { + fatalErr := func(err error) { + if err != nil { + panic(err) + } + } + // dt [maxTablesize]decSymbol // Decompression table. + // symbolLen uint16 // Length of active part of the symbol table. + // actualTableLog uint8 // Selected tablelog. + // maxBits uint8 // Maximum number of additional bits + // // used for table creation to avoid allocations. + // stateTable [256]uint16 + // norm [maxSymbolValue + 1]int16 + // preDefined bool + fatalErr(binary.Read(r, binary.LittleEndian, &s.dt)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.symbolLen)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.actualTableLog)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.maxBits)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.stateTable)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.norm)) + fatalErr(binary.Read(r, binary.LittleEndian, &s.preDefined)) +} + // decSymbol contains information about a state entry, // Including the state offset base, the output symbol and // the number of bits to read for the low part of the destination state. diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go index 819f1461b..e80139dd9 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec.go +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -73,6 +73,7 @@ type sequenceDecs struct { seqSize int windowSize int maxBits uint8 + maxSyncLen uint64 } // initialize all 3 decoders from the stream input. @@ -98,153 +99,13 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) erro return nil } -// decode sequences from the stream with the provided history. -func (s *sequenceDecs) decode(seqs []seqVals) error { - br := s.br - - // Grab full sizes tables, to avoid bounds checks. - llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] - llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state - s.seqSize = 0 - litRemain := len(s.literals) - maxBlockSize := maxCompressedBlockSize - if s.windowSize < maxBlockSize { - maxBlockSize = s.windowSize - } - for i := range seqs { - var ll, mo, ml int - if br.off > 4+((maxOffsetBits+16+16)>>3) { - // inlined function: - // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) - - // Final will not read from stream. - var llB, mlB, moB uint8 - ll, llB = llState.final() - ml, mlB = mlState.final() - mo, moB = ofState.final() - - // extra bits are stored in reverse order. - br.fillFast() - mo += br.getBits(moB) - if s.maxBits > 32 { - br.fillFast() - } - ml += br.getBits(mlB) - ll += br.getBits(llB) - - if moB > 1 { - s.prevOffset[2] = s.prevOffset[1] - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = mo - } else { - // mo = s.adjustOffset(mo, ll, moB) - // Inlined for rather big speedup - if ll == 0 { - // There is an exception though, when current sequence's literals_length = 0. - // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, - // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. - mo++ - } - - if mo == 0 { - mo = s.prevOffset[0] - } else { - var temp int - if mo == 3 { - temp = s.prevOffset[0] - 1 - } else { - temp = s.prevOffset[mo] - } - - if temp == 0 { - // 0 is not valid; input is corrupted; force offset to 1 - println("WARNING: temp was 0") - temp = 1 - } - - if mo != 1 { - s.prevOffset[2] = s.prevOffset[1] - } - s.prevOffset[1] = s.prevOffset[0] - s.prevOffset[0] = temp - mo = temp - } - } - br.fillFast() - } else { - if br.overread() { - if debugDecoder { - printf("reading sequence %d, exceeded available data\n", i) - } - return io.ErrUnexpectedEOF - } - ll, mo, ml = s.next(br, llState, mlState, ofState) - br.fill() - } - - if debugSequences { - println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml) - } - // Evaluate. - // We might be doing this async, so do it early. - if mo == 0 && ml > 0 { - return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) - } - if ml > maxMatchLen { - return fmt.Errorf("match len (%d) bigger than max allowed length", ml) - } - s.seqSize += ll + ml - if s.seqSize > maxBlockSize { - return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) - } - litRemain -= ll - if litRemain < 0 { - return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll) - } - seqs[i] = seqVals{ - ll: ll, - ml: ml, - mo: mo, - } - if i == len(seqs)-1 { - // This is the last sequence, so we shouldn't update state. - break - } - - // Manually inlined, ~ 5-20% faster - // Update all 3 states at once. Approx 20% faster. - nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits() - if nBits == 0 { - llState = llTable[llState.newState()&maxTableMask] - mlState = mlTable[mlState.newState()&maxTableMask] - ofState = ofTable[ofState.newState()&maxTableMask] - } else { - bits := br.get32BitsFast(nBits) - lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) - llState = llTable[(llState.newState()+lowBits)&maxTableMask] - - lowBits = uint16(bits >> (ofState.nbBits() & 31)) - lowBits &= bitMask[mlState.nbBits()&15] - mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask] - - lowBits = uint16(bits) & bitMask[ofState.nbBits()&15] - ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask] - } - } - s.seqSize += litRemain - if s.seqSize > maxBlockSize { - return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) - } - err := br.close() - if err != nil { - printf("Closing sequences: %v, %+v\n", err, *br) - } - return err -} - // execute will execute the decoded sequence with the provided history. // The sequence must be evaluated before being sent. func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { + if len(s.dict) == 0 { + return s.executeSimple(seqs, hist) + } + // Ensure we have enough output size... if len(s.out)+s.seqSize > cap(s.out) { addBytes := s.seqSize + len(s.out) @@ -341,14 +202,19 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error { } // decode sequences from the stream with the provided history. -func (s *sequenceDecs) decodeSync(history *history) error { +func (s *sequenceDecs) decodeSync(hist []byte) error { + if true { + supported, err := s.decodeSyncSimple(hist) + if supported { + return err + } + } br := s.br seqs := s.nSeqs startSize := len(s.out) // Grab full sizes tables, to avoid bounds checks. llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state - hist := history.b[history.ignoreBuffer:] out := s.out maxBlockSize := maxCompressedBlockSize if s.windowSize < maxBlockSize { @@ -433,7 +299,7 @@ func (s *sequenceDecs) decodeSync(history *history) error { } size := ll + ml + len(out) if size-startSize > maxBlockSize { - return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize) + return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize) } if size > cap(out) { // Not enough size, which can happen under high volume block streaming conditions @@ -463,13 +329,13 @@ func (s *sequenceDecs) decodeSync(history *history) error { if mo > len(out)+len(hist) || mo > s.windowSize { if len(s.dict) == 0 { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize) } // we may be in dictionary. dictO := len(s.dict) - (mo - (len(out) + len(hist))) if dictO < 0 || dictO >= len(s.dict) { - return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)) + return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize) } end := dictO + ml if end > len(s.dict) { @@ -543,8 +409,8 @@ func (s *sequenceDecs) decodeSync(history *history) error { } // Check if space for literals - if len(s.literals)+len(s.out)-startSize > maxBlockSize { - return fmt.Errorf("output (%d) bigger than max block size (%d)", len(s.out), maxBlockSize) + if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize) } // Add final literals diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go new file mode 100644 index 000000000..4676b09cc --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go @@ -0,0 +1,350 @@ +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +package zstd + +import ( + "fmt" + + "github.com/klauspost/compress/internal/cpuinfo" +) + +type decodeSyncAsmContext struct { + llTable []decSymbol + mlTable []decSymbol + ofTable []decSymbol + llState uint64 + mlState uint64 + ofState uint64 + iteration int + litRemain int + out []byte + outPosition int + literals []byte + litPosition int + history []byte + windowSize int + ll int // set on error (not for all errors, please refer to _generate/gen.go) + ml int // set on error (not for all errors, please refer to _generate/gen.go) + mo int // set on error (not for all errors, please refer to _generate/gen.go) +} + +// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions. +//go:noescape +func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer. +//go:noescape +func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer. +//go:noescape +func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int + +// decode sequences from the stream with the provided history but without a dictionary. +func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { + if len(s.dict) > 0 { + return false, nil + } + if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize { + return false, nil + } + useSafe := false + if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc { + useSafe = true + } + if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) { + useSafe = true + } + br := s.br + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + + ctx := decodeSyncAsmContext{ + llTable: s.litLengths.fse.dt[:maxTablesize], + mlTable: s.matchLengths.fse.dt[:maxTablesize], + ofTable: s.offsets.fse.dt[:maxTablesize], + llState: uint64(s.litLengths.state.state), + mlState: uint64(s.matchLengths.state.state), + ofState: uint64(s.offsets.state.state), + iteration: s.nSeqs - 1, + litRemain: len(s.literals), + out: s.out, + outPosition: len(s.out), + literals: s.literals, + windowSize: s.windowSize, + history: hist, + } + + s.seqSize = 0 + startSize := len(s.out) + + var errCode int + if cpuinfo.HasBMI2() { + if useSafe { + errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx) + } else { + errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx) + } + } else { + if useSafe { + errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx) + } else { + errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx) + } + } + switch errCode { + case noError: + break + + case errorMatchLenOfsMismatch: + return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml) + + case errorMatchLenTooBig: + return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml) + + case errorMatchOffTooBig: + return true, fmt.Errorf("match offset (%d) bigger than current history (%d)", + ctx.mo, ctx.outPosition+len(hist)-startSize) + + case errorNotEnoughLiterals: + return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", + ctx.ll, ctx.litRemain+ctx.ll) + + case errorNotEnoughSpace: + size := ctx.outPosition + ctx.ll + ctx.ml + if debugDecoder { + println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize) + } + return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize) + + default: + return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode) + } + + s.seqSize += ctx.litRemain + if s.seqSize > maxBlockSize { + return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + return true, err + } + + s.literals = s.literals[ctx.litPosition:] + t := ctx.outPosition + s.out = s.out[:t] + + // Add final literals + s.out = append(s.out, s.literals...) + if debugDecoder { + t += len(s.literals) + if t != len(s.out) { + panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t)) + } + } + + return true, nil +} + +// -------------------------------------------------------------------------------- + +type decodeAsmContext struct { + llTable []decSymbol + mlTable []decSymbol + ofTable []decSymbol + llState uint64 + mlState uint64 + ofState uint64 + iteration int + seqs []seqVals + litRemain int +} + +const noError = 0 + +// error reported when mo == 0 && ml > 0 +const errorMatchLenOfsMismatch = 1 + +// error reported when ml > maxMatchLen +const errorMatchLenTooBig = 2 + +// error reported when mo > available history or mo > s.windowSize +const errorMatchOffTooBig = 3 + +// error reported when the sum of literal lengths exeeceds the literal buffer size +const errorNotEnoughLiterals = 4 + +// error reported when capacity of `out` is too small +const errorNotEnoughSpace = 5 + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions. +//go:noescape +func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions. +//go:noescape +func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int + +// decode sequences from the stream without the provided history. +func (s *sequenceDecs) decode(seqs []seqVals) error { + br := s.br + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + + ctx := decodeAsmContext{ + llTable: s.litLengths.fse.dt[:maxTablesize], + mlTable: s.matchLengths.fse.dt[:maxTablesize], + ofTable: s.offsets.fse.dt[:maxTablesize], + llState: uint64(s.litLengths.state.state), + mlState: uint64(s.matchLengths.state.state), + ofState: uint64(s.offsets.state.state), + seqs: seqs, + iteration: len(seqs) - 1, + litRemain: len(s.literals), + } + + s.seqSize = 0 + lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56 + var errCode int + if cpuinfo.HasBMI2() { + if lte56bits { + errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx) + } else { + errCode = sequenceDecs_decode_bmi2(s, br, &ctx) + } + } else { + if lte56bits { + errCode = sequenceDecs_decode_56_amd64(s, br, &ctx) + } else { + errCode = sequenceDecs_decode_amd64(s, br, &ctx) + } + } + if errCode != 0 { + i := len(seqs) - ctx.iteration - 1 + switch errCode { + case errorMatchLenOfsMismatch: + ml := ctx.seqs[i].ml + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + + case errorMatchLenTooBig: + ml := ctx.seqs[i].ml + return fmt.Errorf("match len (%d) bigger than max allowed length", ml) + + case errorNotEnoughLiterals: + ll := ctx.seqs[i].ll + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll) + } + + return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode) + } + + if ctx.litRemain < 0 { + return fmt.Errorf("literal count is too big: total available %d, total requested %d", + len(s.literals), len(s.literals)-ctx.litRemain) + } + + s.seqSize += ctx.litRemain + if s.seqSize > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + } + return err +} + +// -------------------------------------------------------------------------------- + +type executeAsmContext struct { + seqs []seqVals + seqIndex int + out []byte + history []byte + literals []byte + outPosition int + litPosition int + windowSize int +} + +// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm. +// +// Returns false if a match offset is too big. +// +// Please refer to seqdec_generic.go for the reference implementation. +//go:noescape +func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool + +// executeSimple handles cases when dictionary is not used. +func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { + // Ensure we have enough output size... + if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) { + addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] + } + + if debugDecoder { + printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize) + } + + var t = len(s.out) + out := s.out[:t+s.seqSize] + + ctx := executeAsmContext{ + seqs: seqs, + seqIndex: 0, + out: out, + history: hist, + outPosition: t, + litPosition: 0, + literals: s.literals, + windowSize: s.windowSize, + } + + ok := sequenceDecs_executeSimple_amd64(&ctx) + if !ok { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", + seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist)) + } + s.literals = s.literals[ctx.litPosition:] + t = ctx.outPosition + + // Add final literals + copy(out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) + } + } + s.out = out + + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s new file mode 100644 index 000000000..01cc23fa8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -0,0 +1,3519 @@ +// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT. + +//go:build !appengine && !noasm && gc && !noasm +// +build !appengine,!noasm,gc,!noasm + +// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: CMOV +TEXT ·sequenceDecs_decode_amd64(SB), $8-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 104(AX), R10 + MOVQ s+0(FP), AX + MOVQ 144(AX), R11 + MOVQ 152(AX), R12 + MOVQ 160(AX), R13 + +sequenceDecs_decode_amd64_main_loop: + MOVQ (SP), R14 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decode_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_amd64_fill_end + +sequenceDecs_decode_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decode_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_amd64_fill_byte_by_byte + +sequenceDecs_decode_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 16(R10) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 8(R10) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decode_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_amd64_fill_2_end + +sequenceDecs_decode_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_amd64_fill_2_end + CMPQ BX, $0x07 + JLE sequenceDecs_decode_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_amd64_fill_2_byte_by_byte + +sequenceDecs_decode_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, (R10) + + // Fill bitreader for state updates + MOVQ R14, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R14 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R14, $0x00 + JZ sequenceDecs_decode_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, DI + +sequenceDecs_decode_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R14 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R8 + +sequenceDecs_decode_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R14 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R9 + +sequenceDecs_decode_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decode_amd64_skip_update: + // Adjust offset + MOVQ 16(R10), CX + CMPQ AX, $0x01 + JBE sequenceDecs_decode_amd64_adjust_offsetB_1_or_0 + MOVQ R12, R13 + MOVQ R11, R12 + MOVQ CX, R11 + JMP sequenceDecs_decode_amd64_adjust_end + +sequenceDecs_decode_amd64_adjust_offsetB_1_or_0: + CMPQ (R10), $0x00000000 + JNE sequenceDecs_decode_amd64_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_amd64_adjust_offset_nonzero + +sequenceDecs_decode_amd64_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero + MOVQ R11, CX + JMP sequenceDecs_decode_amd64_adjust_end + +sequenceDecs_decode_amd64_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_amd64_adjust_zero + JEQ sequenceDecs_decode_amd64_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_amd64_adjust_three + JMP sequenceDecs_decode_amd64_adjust_two + +sequenceDecs_decode_amd64_adjust_zero: + MOVQ R11, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_one: + MOVQ R12, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_two: + MOVQ R13, AX + JMP sequenceDecs_decode_amd64_adjust_test_temp_valid + +sequenceDecs_decode_amd64_adjust_three: + LEAQ -1(R11), AX + +sequenceDecs_decode_amd64_adjust_test_temp_valid: + TESTQ AX, AX + JNZ sequenceDecs_decode_amd64_adjust_temp_valid + MOVQ $0x00000001, AX + +sequenceDecs_decode_amd64_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R12, R13 + MOVQ R11, R12 + MOVQ AX, R11 + MOVQ AX, CX + +sequenceDecs_decode_amd64_adjust_end: + MOVQ CX, 16(R10) + + // Check values + MOVQ 8(R10), AX + MOVQ (R10), R14 + LEAQ (AX)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decode_amd64_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decode_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decode_amd64_match_len_ofs_ok: + ADDQ $0x18, R10 + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decode_amd64_main_loop + MOVQ s+0(FP), AX + MOVQ R11, 144(AX) + MOVQ R12, 152(AX) + MOVQ R13, 160(AX) + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_amd64_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_amd64_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: CMOV +TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 104(AX), R10 + MOVQ s+0(FP), AX + MOVQ 144(AX), R11 + MOVQ 152(AX), R12 + MOVQ 160(AX), R13 + +sequenceDecs_decode_56_amd64_main_loop: + MOVQ (SP), R14 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decode_56_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R14 + MOVQ (R14), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decode_56_amd64_fill_end + +sequenceDecs_decode_56_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decode_56_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decode_56_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R14 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R14), AX + ORQ AX, DX + JMP sequenceDecs_decode_56_amd64_fill_byte_by_byte + +sequenceDecs_decode_56_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 16(R10) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, 8(R10) + + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R15 + SHLQ CL, R15 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R15 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R15 + ADDQ R15, AX + MOVQ AX, (R10) + + // Fill bitreader for state updates + MOVQ R14, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_56_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R14 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R14, $0x00 + JZ sequenceDecs_decode_56_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, DI + +sequenceDecs_decode_56_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R14 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R8 + +sequenceDecs_decode_56_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R14 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R14, $0x00 + JZ sequenceDecs_decode_56_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R14, BX + MOVQ DX, R15 + SHLQ CL, R15 + MOVQ R14, CX + NEGQ CX + SHRQ CL, R15 + ADDQ R15, R9 + +sequenceDecs_decode_56_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decode_56_amd64_skip_update: + // Adjust offset + MOVQ 16(R10), CX + CMPQ AX, $0x01 + JBE sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0 + MOVQ R12, R13 + MOVQ R11, R12 + MOVQ CX, R11 + JMP sequenceDecs_decode_56_amd64_adjust_end + +sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0: + CMPQ (R10), $0x00000000 + JNE sequenceDecs_decode_56_amd64_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_56_amd64_adjust_offset_nonzero + +sequenceDecs_decode_56_amd64_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero + MOVQ R11, CX + JMP sequenceDecs_decode_56_amd64_adjust_end + +sequenceDecs_decode_56_amd64_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_56_amd64_adjust_zero + JEQ sequenceDecs_decode_56_amd64_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_56_amd64_adjust_three + JMP sequenceDecs_decode_56_amd64_adjust_two + +sequenceDecs_decode_56_amd64_adjust_zero: + MOVQ R11, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_one: + MOVQ R12, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_two: + MOVQ R13, AX + JMP sequenceDecs_decode_56_amd64_adjust_test_temp_valid + +sequenceDecs_decode_56_amd64_adjust_three: + LEAQ -1(R11), AX + +sequenceDecs_decode_56_amd64_adjust_test_temp_valid: + TESTQ AX, AX + JNZ sequenceDecs_decode_56_amd64_adjust_temp_valid + MOVQ $0x00000001, AX + +sequenceDecs_decode_56_amd64_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R12, R13 + MOVQ R11, R12 + MOVQ AX, R11 + MOVQ AX, CX + +sequenceDecs_decode_56_amd64_adjust_end: + MOVQ CX, 16(R10) + + // Check values + MOVQ 8(R10), AX + MOVQ (R10), R14 + LEAQ (AX)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decode_56_amd64_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_56_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decode_56_amd64_match_len_ofs_ok: + ADDQ $0x18, R10 + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decode_56_amd64_main_loop + MOVQ s+0(FP), AX + MOVQ R11, 144(AX) + MOVQ R12, 152(AX) + MOVQ R13, 160(AX) + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_56_amd64_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_56_amd64_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: BMI, BMI2, CMOV +TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 104(CX), R9 + MOVQ s+0(FP), CX + MOVQ 144(CX), R10 + MOVQ 152(CX), R11 + MOVQ 160(CX), R12 + +sequenceDecs_decode_bmi2_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decode_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_bmi2_fill_end + +sequenceDecs_decode_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decode_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_bmi2_fill_byte_by_byte + +sequenceDecs_decode_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 16(R9) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 8(R9) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decode_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_bmi2_fill_2_end + +sequenceDecs_decode_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_bmi2_fill_2_end + CMPQ DX, $0x07 + JLE sequenceDecs_decode_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_bmi2_fill_2_byte_by_byte + +sequenceDecs_decode_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, (R9) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decode_bmi2_skip_update: + // Adjust offset + MOVQ 16(R9), CX + CMPQ R13, $0x01 + JBE sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0 + MOVQ R11, R12 + MOVQ R10, R11 + MOVQ CX, R10 + JMP sequenceDecs_decode_bmi2_adjust_end + +sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0: + CMPQ (R9), $0x00000000 + JNE sequenceDecs_decode_bmi2_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_bmi2_adjust_offset_nonzero + +sequenceDecs_decode_bmi2_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero + MOVQ R10, CX + JMP sequenceDecs_decode_bmi2_adjust_end + +sequenceDecs_decode_bmi2_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_bmi2_adjust_zero + JEQ sequenceDecs_decode_bmi2_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_bmi2_adjust_three + JMP sequenceDecs_decode_bmi2_adjust_two + +sequenceDecs_decode_bmi2_adjust_zero: + MOVQ R10, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_one: + MOVQ R11, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_two: + MOVQ R12, R13 + JMP sequenceDecs_decode_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_bmi2_adjust_three: + LEAQ -1(R10), R13 + +sequenceDecs_decode_bmi2_adjust_test_temp_valid: + TESTQ R13, R13 + JNZ sequenceDecs_decode_bmi2_adjust_temp_valid + MOVQ $0x00000001, R13 + +sequenceDecs_decode_bmi2_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R11, R12 + MOVQ R10, R11 + MOVQ R13, R10 + MOVQ R13, CX + +sequenceDecs_decode_bmi2_adjust_end: + MOVQ CX, 16(R9) + + // Check values + MOVQ 8(R9), R13 + MOVQ (R9), R14 + LEAQ (R13)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ R13, $0x00020002 + JA sequenceDecs_decode_bmi2_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_bmi2_match_len_ofs_ok + TESTQ R13, R13 + JNZ sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decode_bmi2_match_len_ofs_ok: + ADDQ $0x18, R9 + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decode_bmi2_main_loop + MOVQ s+0(FP), CX + MOVQ R10, 144(CX) + MOVQ R11, 152(CX) + MOVQ R12, 160(CX) + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_bmi2_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_bmi2_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int +// Requires: BMI, BMI2, CMOV +TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 104(CX), R9 + MOVQ s+0(FP), CX + MOVQ 144(CX), R10 + MOVQ 152(CX), R11 + MOVQ 160(CX), R12 + +sequenceDecs_decode_56_bmi2_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decode_56_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R13 + MOVQ (R13), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decode_56_bmi2_fill_end + +sequenceDecs_decode_56_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decode_56_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decode_56_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R13 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R13), CX + ORQ CX, AX + JMP sequenceDecs_decode_56_bmi2_fill_byte_by_byte + +sequenceDecs_decode_56_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 16(R9) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, 8(R9) + + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R14 + MOVQ AX, R15 + LEAQ (DX)(R14*1), CX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R15, CX + MOVQ CX, (R9) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decode_56_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R14 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R14*1), CX + MOVQ AX, R15 + MOVQ CX, DX + ROLQ CL, R15 + BZHIQ R14, R15, R15 + ADDQ R15, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decode_56_bmi2_skip_update: + // Adjust offset + MOVQ 16(R9), CX + CMPQ R13, $0x01 + JBE sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0 + MOVQ R11, R12 + MOVQ R10, R11 + MOVQ CX, R10 + JMP sequenceDecs_decode_56_bmi2_adjust_end + +sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0: + CMPQ (R9), $0x00000000 + JNE sequenceDecs_decode_56_bmi2_adjust_offset_maybezero + INCQ CX + JMP sequenceDecs_decode_56_bmi2_adjust_offset_nonzero + +sequenceDecs_decode_56_bmi2_adjust_offset_maybezero: + TESTQ CX, CX + JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero + MOVQ R10, CX + JMP sequenceDecs_decode_56_bmi2_adjust_end + +sequenceDecs_decode_56_bmi2_adjust_offset_nonzero: + CMPQ CX, $0x01 + JB sequenceDecs_decode_56_bmi2_adjust_zero + JEQ sequenceDecs_decode_56_bmi2_adjust_one + CMPQ CX, $0x02 + JA sequenceDecs_decode_56_bmi2_adjust_three + JMP sequenceDecs_decode_56_bmi2_adjust_two + +sequenceDecs_decode_56_bmi2_adjust_zero: + MOVQ R10, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_one: + MOVQ R11, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_two: + MOVQ R12, R13 + JMP sequenceDecs_decode_56_bmi2_adjust_test_temp_valid + +sequenceDecs_decode_56_bmi2_adjust_three: + LEAQ -1(R10), R13 + +sequenceDecs_decode_56_bmi2_adjust_test_temp_valid: + TESTQ R13, R13 + JNZ sequenceDecs_decode_56_bmi2_adjust_temp_valid + MOVQ $0x00000001, R13 + +sequenceDecs_decode_56_bmi2_adjust_temp_valid: + CMPQ CX, $0x01 + CMOVQNE R11, R12 + MOVQ R10, R11 + MOVQ R13, R10 + MOVQ R13, CX + +sequenceDecs_decode_56_bmi2_adjust_end: + MOVQ CX, 16(R9) + + // Check values + MOVQ 8(R9), R13 + MOVQ (R9), R14 + LEAQ (R13)(R14*1), R15 + MOVQ s+0(FP), BP + ADDQ R15, 256(BP) + MOVQ ctx+16(FP), R15 + SUBQ R14, 128(R15) + JS error_not_enough_literals + CMPQ R13, $0x00020002 + JA sequenceDecs_decode_56_bmi2_error_match_len_too_big + TESTQ CX, CX + JNZ sequenceDecs_decode_56_bmi2_match_len_ofs_ok + TESTQ R13, R13 + JNZ sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decode_56_bmi2_match_len_ofs_ok: + ADDQ $0x18, R9 + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decode_56_bmi2_main_loop + MOVQ s+0(FP), CX + MOVQ R10, 144(CX) + MOVQ R11, 152(CX) + MOVQ R12, 160(CX) + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decode_56_bmi2_error_match_len_ofs_mismatch: + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decode_56_bmi2_error_match_len_too_big: + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool +// Requires: SSE +TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9 + MOVQ ctx+0(FP), R10 + MOVQ 8(R10), CX + TESTQ CX, CX + JZ empty_seqs + MOVQ (R10), AX + MOVQ 24(R10), DX + MOVQ 32(R10), BX + MOVQ 80(R10), SI + MOVQ 104(R10), DI + MOVQ 120(R10), R8 + MOVQ 56(R10), R9 + MOVQ 64(R10), R10 + ADDQ R10, R9 + + // seqsBase += 24 * seqIndex + LEAQ (DX)(DX*2), R11 + SHLQ $0x03, R11 + ADDQ R11, AX + + // outBase += outPosition + ADDQ DI, BX + +main_loop: + MOVQ (AX), R11 + MOVQ 16(AX), R12 + MOVQ 8(AX), R13 + + // Copy literals + TESTQ R11, R11 + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, R11 + JZ copy_1_word + MOVB (SI)(R14*1), R15 + MOVB R15, (BX)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, R11 + JZ copy_1_dword + MOVW (SI)(R14*1), R15 + MOVW R15, (BX)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, R11 + JZ copy_1_qword + MOVL (SI)(R14*1), R15 + MOVL R15, (BX)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, R11 + JZ copy_1_test + MOVQ (SI)(R14*1), R15 + MOVQ R15, (BX)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (SI)(R14*1), X0 + MOVUPS X0, (BX)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, R11 + JB copy_1 + ADDQ R11, SI + ADDQ R11, BX + ADDQ R11, DI + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + LEAQ (DI)(R10*1), R11 + CMPQ R12, R11 + JG error_match_off_too_big + CMPQ R12, R8 + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, R11 + SUBQ DI, R11 + JLS copy_match + MOVQ R9, R14 + SUBQ R11, R14 + CMPQ R13, R11 + JGE copy_all_from_history + XORQ R11, R11 + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(R11*1), R12 + MOVB R12, (BX)(R11*1) + ADDQ $0x01, R11 + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(R11*1), R12 + MOVW R12, (BX)(R11*1) + ADDQ $0x02, R11 + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(R11*1), R12 + MOVL R12, (BX)(R11*1) + ADDQ $0x04, R11 + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(R11*1), R12 + MOVQ R12, (BX)(R11*1) + ADDQ $0x08, R11 + JMP copy_4_test + +copy_4: + MOVUPS (R14)(R11*1), X0 + MOVUPS X0, (BX)(R11*1) + ADDQ $0x10, R11 + +copy_4_test: + CMPQ R11, R13 + JB copy_4 + ADDQ R13, DI + ADDQ R13, BX + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, R11 + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (BX)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, R11 + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (BX)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, R11 + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (BX)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, R11 + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (BX)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (BX)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, R11 + JB copy_5 + ADDQ R11, BX + ADDQ R11, DI + SUBQ R11, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ BX, R11 + SUBQ R12, R11 + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + XORQ R12, R12 + +copy_2: + MOVUPS (R11)(R12*1), X0 + MOVUPS X0, (BX)(R12*1) + ADDQ $0x10, R12 + CMPQ R12, R13 + JB copy_2 + ADDQ R13, BX + ADDQ R13, DI + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + XORQ R12, R12 + +copy_slow_3: + MOVB (R11)(R12*1), R14 + MOVB R14, (BX)(R12*1) + INCQ R12 + CMPQ R12, R13 + JB copy_slow_3 + ADDQ R13, BX + ADDQ R13, DI + +handle_loop: + ADDQ $0x18, AX + INCQ DX + CMPQ DX, CX + JB main_loop + +loop_finished: + // Return value + MOVB $0x01, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + MOVQ 80(AX), CX + SUBQ CX, SI + MOVQ SI, 112(AX) + RET + +error_match_off_too_big: + // Return value + MOVB $0x00, ret+8(FP) + + // Update the context + MOVQ ctx+0(FP), AX + MOVQ DX, 24(AX) + MOVQ DI, 104(AX) + MOVQ 80(AX), CX + SUBQ CX, SI + MOVQ SI, 112(AX) + RET + +empty_seqs: + // Return value + MOVB $0x01, ret+8(FP) + RET + +// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: CMOV, SSE +TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 112(AX), R10 + MOVQ 128(AX), CX + MOVQ CX, 32(SP) + MOVQ 144(AX), R11 + MOVQ 136(AX), R12 + MOVQ 200(AX), CX + MOVQ CX, 56(SP) + MOVQ 176(AX), CX + MOVQ CX, 48(SP) + MOVQ 184(AX), AX + MOVQ AX, 40(SP) + MOVQ 40(SP), AX + ADDQ AX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R10, 32(SP) + + // outBase += outPosition + ADDQ R12, R10 + +sequenceDecs_decodeSync_amd64_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_amd64_fill_end + +sequenceDecs_decodeSync_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_amd64_fill_byte_by_byte + +sequenceDecs_decodeSync_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 8(SP) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_amd64_fill_2_end + +sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_amd64_fill_2_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte + +sequenceDecs_decodeSync_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 24(SP) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R13 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, DI + +sequenceDecs_decodeSync_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R13 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R8 + +sequenceDecs_decodeSync_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R13 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R9 + +sequenceDecs_decodeSync_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decodeSync_amd64_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ AX, $0x01 + JBE sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_amd64_adjust_end + +sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_amd64_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_amd64_adjust_offset_nonzero + +sequenceDecs_decodeSync_amd64_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_amd64_adjust_end + +sequenceDecs_decodeSync_amd64_adjust_offset_nonzero: + MOVQ R13, AX + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, AX + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(AX*8), R14 + JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_amd64_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_amd64_adjust_skip + MOVQ 152(CX), AX + MOVQ AX, 160(CX) + +sequenceDecs_decodeSync_amd64_adjust_skip: + MOVQ 144(CX), AX + MOVQ AX, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_amd64_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), AX + MOVQ 24(SP), CX + LEAQ (AX)(CX*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ CX, 104(R14) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decodeSync_amd64_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_amd64_match_len_ofs_ok: + MOVQ 24(SP), AX + MOVQ 8(SP), CX + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (AX)(R13*1), R14 + ADDQ R10, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ AX, AX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, AX + JZ copy_1_word + MOVB (R11)(R14*1), R15 + MOVB R15, (R10)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, AX + JZ copy_1_dword + MOVW (R11)(R14*1), R15 + MOVW R15, (R10)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, AX + JZ copy_1_qword + MOVL (R11)(R14*1), R15 + MOVL R15, (R10)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, AX + JZ copy_1_test + MOVQ (R11)(R14*1), R15 + MOVQ R15, (R10)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R11)(R14*1), X0 + MOVUPS X0, (R10)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, AX + JB copy_1 + ADDQ AX, R11 + ADDQ AX, R10 + ADDQ AX, R12 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R12, AX + ADDQ 40(SP), AX + CMPQ CX, AX + JG error_match_off_too_big + CMPQ CX, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ CX, AX + SUBQ R12, AX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ AX, R14 + CMPQ R13, AX + JGE copy_all_from_history + XORQ AX, AX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(AX*1), CL + MOVB CL, (R10)(AX*1) + ADDQ $0x01, AX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(AX*1), CX + MOVW CX, (R10)(AX*1) + ADDQ $0x02, AX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(AX*1), CX + MOVL CX, (R10)(AX*1) + ADDQ $0x04, AX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(AX*1), CX + MOVQ CX, (R10)(AX*1) + ADDQ $0x08, AX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(AX*1), X0 + MOVUPS X0, (R10)(AX*1) + ADDQ $0x10, AX + +copy_4_test: + CMPQ AX, R13 + JB copy_4 + ADDQ R13, R12 + ADDQ R13, R10 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, AX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R10)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, AX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R10)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, AX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R10)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, AX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R10)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R10)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, AX + JB copy_5 + ADDQ AX, R10 + ADDQ AX, R12 + SUBQ AX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R10, AX + SUBQ CX, AX + + // ml <= mo + CMPQ R13, CX + JA copy_overlapping_match + + // Copy non-overlapping match + XORQ CX, CX + +copy_2: + MOVUPS (AX)(CX*1), X0 + MOVUPS X0, (R10)(CX*1) + ADDQ $0x10, CX + CMPQ CX, R13 + JB copy_2 + ADDQ R13, R10 + ADDQ R13, R12 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + XORQ CX, CX + +copy_slow_3: + MOVB (AX)(CX*1), R14 + MOVB R14, (R10)(CX*1) + INCQ CX + CMPQ CX, R13 + JB copy_slow_3 + ADDQ R13, R10 + ADDQ R13, R12 + +handle_loop: + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decodeSync_amd64_main_loop + +loop_finished: + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R12, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R11 + MOVQ R11, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_amd64_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_amd64_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: BMI, BMI2, CMOV, SSE +TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 112(CX), R9 + MOVQ 128(CX), R10 + MOVQ R10, 32(SP) + MOVQ 144(CX), R10 + MOVQ 136(CX), R11 + MOVQ 200(CX), R12 + MOVQ R12, 56(SP) + MOVQ 176(CX), R12 + MOVQ R12, 48(SP) + MOVQ 184(CX), CX + MOVQ CX, 40(SP) + MOVQ 40(SP), CX + ADDQ CX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R9, 32(SP) + + // outBase += outPosition + ADDQ R11, R9 + +sequenceDecs_decodeSync_bmi2_main_loop: + MOVQ (SP), R12 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_bmi2_fill_end + +sequenceDecs_decodeSync_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_bmi2_fill_byte_by_byte + +sequenceDecs_decodeSync_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 8(SP) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_bmi2_fill_2_end + +sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_bmi2_fill_2_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte + +sequenceDecs_decodeSync_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 24(SP) + + // Fill bitreader for state updates + MOVQ R12, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R12 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decodeSync_bmi2_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ R12, $0x01 + JBE sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_bmi2_adjust_end + +sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero + +sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_bmi2_adjust_end + +sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero: + MOVQ R13, R12 + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, R12 + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(R12*8), R14 + JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_bmi2_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_bmi2_adjust_skip + MOVQ 152(CX), R12 + MOVQ R12, 160(CX) + +sequenceDecs_decodeSync_bmi2_adjust_skip: + MOVQ 144(CX), R12 + MOVQ R12, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_bmi2_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), CX + MOVQ 24(SP), R12 + LEAQ (CX)(R12*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ R12, 104(R14) + JS error_not_enough_literals + CMPQ CX, $0x00020002 + JA sequenceDecs_decodeSync_bmi2_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok + TESTQ CX, CX + JNZ sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_bmi2_match_len_ofs_ok: + MOVQ 24(SP), CX + MOVQ 8(SP), R12 + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (CX)(R13*1), R14 + ADDQ R9, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ CX, CX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, CX + JZ copy_1_word + MOVB (R10)(R14*1), R15 + MOVB R15, (R9)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, CX + JZ copy_1_dword + MOVW (R10)(R14*1), R15 + MOVW R15, (R9)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, CX + JZ copy_1_qword + MOVL (R10)(R14*1), R15 + MOVL R15, (R9)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, CX + JZ copy_1_test + MOVQ (R10)(R14*1), R15 + MOVQ R15, (R9)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R10)(R14*1), X0 + MOVUPS X0, (R9)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, CX + JB copy_1 + ADDQ CX, R10 + ADDQ CX, R9 + ADDQ CX, R11 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R11, CX + ADDQ 40(SP), CX + CMPQ R12, CX + JG error_match_off_too_big + CMPQ R12, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, CX + SUBQ R11, CX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ CX, R14 + CMPQ R13, CX + JGE copy_all_from_history + XORQ CX, CX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(CX*1), R12 + MOVB R12, (R9)(CX*1) + ADDQ $0x01, CX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(CX*1), R12 + MOVW R12, (R9)(CX*1) + ADDQ $0x02, CX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(CX*1), R12 + MOVL R12, (R9)(CX*1) + ADDQ $0x04, CX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(CX*1), R12 + MOVQ R12, (R9)(CX*1) + ADDQ $0x08, CX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(CX*1), X0 + MOVUPS X0, (R9)(CX*1) + ADDQ $0x10, CX + +copy_4_test: + CMPQ CX, R13 + JB copy_4 + ADDQ R13, R11 + ADDQ R13, R9 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, CX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R9)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, CX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R9)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, CX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R9)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, CX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R9)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R9)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, CX + JB copy_5 + ADDQ CX, R9 + ADDQ CX, R11 + SUBQ CX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R9, CX + SUBQ R12, CX + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + XORQ R12, R12 + +copy_2: + MOVUPS (CX)(R12*1), X0 + MOVUPS X0, (R9)(R12*1) + ADDQ $0x10, R12 + CMPQ R12, R13 + JB copy_2 + ADDQ R13, R9 + ADDQ R13, R11 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + XORQ R12, R12 + +copy_slow_3: + MOVB (CX)(R12*1), R14 + MOVB R14, (R9)(R12*1) + INCQ R12 + CMPQ R12, R13 + JB copy_slow_3 + ADDQ R13, R9 + ADDQ R13, R11 + +handle_loop: + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decodeSync_bmi2_main_loop + +loop_finished: + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R11, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R10 + MOVQ R10, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_bmi2_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_bmi2_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: CMOV, SSE +TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 + MOVQ br+8(FP), AX + MOVQ 32(AX), DX + MOVBQZX 40(AX), BX + MOVQ 24(AX), SI + MOVQ (AX), AX + ADDQ SI, AX + MOVQ AX, (SP) + MOVQ ctx+16(FP), AX + MOVQ 72(AX), DI + MOVQ 80(AX), R8 + MOVQ 88(AX), R9 + MOVQ 112(AX), R10 + MOVQ 128(AX), CX + MOVQ CX, 32(SP) + MOVQ 144(AX), R11 + MOVQ 136(AX), R12 + MOVQ 200(AX), CX + MOVQ CX, 56(SP) + MOVQ 176(AX), CX + MOVQ CX, 48(SP) + MOVQ 184(AX), AX + MOVQ AX, 40(SP) + MOVQ 40(SP), AX + ADDQ AX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R10, 32(SP) + + // outBase += outPosition + ADDQ R12, R10 + +sequenceDecs_decodeSync_safe_amd64_main_loop: + MOVQ (SP), R13 + + // Fill bitreader to have enough for the offset and match length. + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_safe_amd64_fill_end + +sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_safe_amd64_fill_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_safe_amd64_fill_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte + +sequenceDecs_decodeSync_safe_amd64_fill_end: + // Update offset + MOVQ R9, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 8(SP) + + // Update match length + MOVQ R8, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ SI, $0x08 + JL sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte + MOVQ BX, AX + SHRQ $0x03, AX + SUBQ AX, R13 + MOVQ (R13), DX + SUBQ AX, SI + ANDQ $0x07, BX + JMP sequenceDecs_decodeSync_safe_amd64_fill_2_end + +sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte: + CMPQ SI, $0x00 + JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end + CMPQ BX, $0x07 + JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end + SHLQ $0x08, DX + SUBQ $0x01, R13 + SUBQ $0x01, SI + SUBQ $0x08, BX + MOVBQZX (R13), AX + ORQ AX, DX + JMP sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte + +sequenceDecs_decodeSync_safe_amd64_fill_2_end: + // Update literal length + MOVQ DI, AX + MOVQ BX, CX + MOVQ DX, R14 + SHLQ CL, R14 + MOVB AH, CL + ADDQ CX, BX + NEGL CX + SHRQ CL, R14 + SHRQ $0x20, AX + TESTQ CX, CX + CMOVQEQ CX, R14 + ADDQ R14, AX + MOVQ AX, 24(SP) + + // Fill bitreader for state updates + MOVQ R13, (SP) + MOVQ R9, AX + SHRQ $0x08, AX + MOVBQZX AL, AX + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_skip_update + + // Update Literal Length State + MOVBQZX DI, R13 + SHRQ $0x10, DI + MOVWQZX DI, DI + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, DI + +sequenceDecs_decodeSync_safe_amd64_llState_updateState_skip_zero: + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(DI*8), DI + + // Update Match Length State + MOVBQZX R8, R13 + SHRQ $0x10, R8 + MOVWQZX R8, R8 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R8 + +sequenceDecs_decodeSync_safe_amd64_mlState_updateState_skip_zero: + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(R8*8), R8 + + // Update Offset State + MOVBQZX R9, R13 + SHRQ $0x10, R9 + MOVWQZX R9, R9 + CMPQ R13, $0x00 + JZ sequenceDecs_decodeSync_safe_amd64_ofState_updateState_skip_zero + MOVQ BX, CX + ADDQ R13, BX + MOVQ DX, R14 + SHLQ CL, R14 + MOVQ R13, CX + NEGQ CX + SHRQ CL, R14 + ADDQ R14, R9 + +sequenceDecs_decodeSync_safe_amd64_ofState_updateState_skip_zero: + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R9*8), R9 + +sequenceDecs_decodeSync_safe_amd64_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ AX, $0x01 + JBE sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_safe_amd64_adjust_end + +sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero + +sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_safe_amd64_adjust_end + +sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero: + MOVQ R13, AX + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, AX + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(AX*8), R14 + JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_safe_amd64_adjust_skip + MOVQ 152(CX), AX + MOVQ AX, 160(CX) + +sequenceDecs_decodeSync_safe_amd64_adjust_skip: + MOVQ 144(CX), AX + MOVQ AX, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_safe_amd64_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), AX + MOVQ 24(SP), CX + LEAQ (AX)(CX*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ CX, 104(R14) + JS error_not_enough_literals + CMPQ AX, $0x00020002 + JA sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok + TESTQ AX, AX + JNZ sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_safe_amd64_match_len_ofs_ok: + MOVQ 24(SP), AX + MOVQ 8(SP), CX + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (AX)(R13*1), R14 + ADDQ R10, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ AX, AX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, AX + JZ copy_1_word + MOVB (R11)(R14*1), R15 + MOVB R15, (R10)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, AX + JZ copy_1_dword + MOVW (R11)(R14*1), R15 + MOVW R15, (R10)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, AX + JZ copy_1_qword + MOVL (R11)(R14*1), R15 + MOVL R15, (R10)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, AX + JZ copy_1_test + MOVQ (R11)(R14*1), R15 + MOVQ R15, (R10)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R11)(R14*1), X0 + MOVUPS X0, (R10)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, AX + JB copy_1 + ADDQ AX, R11 + ADDQ AX, R10 + ADDQ AX, R12 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R12, AX + ADDQ 40(SP), AX + CMPQ CX, AX + JG error_match_off_too_big + CMPQ CX, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ CX, AX + SUBQ R12, AX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ AX, R14 + CMPQ R13, AX + JGE copy_all_from_history + XORQ AX, AX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(AX*1), CL + MOVB CL, (R10)(AX*1) + ADDQ $0x01, AX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(AX*1), CX + MOVW CX, (R10)(AX*1) + ADDQ $0x02, AX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(AX*1), CX + MOVL CX, (R10)(AX*1) + ADDQ $0x04, AX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(AX*1), CX + MOVQ CX, (R10)(AX*1) + ADDQ $0x08, AX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(AX*1), X0 + MOVUPS X0, (R10)(AX*1) + ADDQ $0x10, AX + +copy_4_test: + CMPQ AX, R13 + JB copy_4 + ADDQ R13, R12 + ADDQ R13, R10 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, AX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R10)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, AX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R10)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, AX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R10)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, AX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R10)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R10)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, AX + JB copy_5 + ADDQ AX, R10 + ADDQ AX, R12 + SUBQ AX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R10, AX + SUBQ CX, AX + + // ml <= mo + CMPQ R13, CX + JA copy_overlapping_match + + // Copy non-overlapping match + XORQ CX, CX + TESTQ $0x00000001, R13 + JZ copy_2_word + MOVB (AX)(CX*1), R14 + MOVB R14, (R10)(CX*1) + ADDQ $0x01, CX + +copy_2_word: + TESTQ $0x00000002, R13 + JZ copy_2_dword + MOVW (AX)(CX*1), R14 + MOVW R14, (R10)(CX*1) + ADDQ $0x02, CX + +copy_2_dword: + TESTQ $0x00000004, R13 + JZ copy_2_qword + MOVL (AX)(CX*1), R14 + MOVL R14, (R10)(CX*1) + ADDQ $0x04, CX + +copy_2_qword: + TESTQ $0x00000008, R13 + JZ copy_2_test + MOVQ (AX)(CX*1), R14 + MOVQ R14, (R10)(CX*1) + ADDQ $0x08, CX + JMP copy_2_test + +copy_2: + MOVUPS (AX)(CX*1), X0 + MOVUPS X0, (R10)(CX*1) + ADDQ $0x10, CX + +copy_2_test: + CMPQ CX, R13 + JB copy_2 + ADDQ R13, R10 + ADDQ R13, R12 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + XORQ CX, CX + +copy_slow_3: + MOVB (AX)(CX*1), R14 + MOVB R14, (R10)(CX*1) + INCQ CX + CMPQ CX, R13 + JB copy_slow_3 + ADDQ R13, R10 + ADDQ R13, R12 + +handle_loop: + MOVQ ctx+16(FP), AX + DECQ 96(AX) + JNS sequenceDecs_decodeSync_safe_amd64_main_loop + +loop_finished: + MOVQ br+8(FP), AX + MOVQ DX, 32(AX) + MOVB BL, 40(AX) + MOVQ SI, 24(AX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R12, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R11 + MOVQ R11, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_safe_amd64_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_safe_amd64_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R12, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET + +// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int +// Requires: BMI, BMI2, CMOV, SSE +TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 + MOVQ br+8(FP), CX + MOVQ 32(CX), AX + MOVBQZX 40(CX), DX + MOVQ 24(CX), BX + MOVQ (CX), CX + ADDQ BX, CX + MOVQ CX, (SP) + MOVQ ctx+16(FP), CX + MOVQ 72(CX), SI + MOVQ 80(CX), DI + MOVQ 88(CX), R8 + MOVQ 112(CX), R9 + MOVQ 128(CX), R10 + MOVQ R10, 32(SP) + MOVQ 144(CX), R10 + MOVQ 136(CX), R11 + MOVQ 200(CX), R12 + MOVQ R12, 56(SP) + MOVQ 176(CX), R12 + MOVQ R12, 48(SP) + MOVQ 184(CX), CX + MOVQ CX, 40(SP) + MOVQ 40(SP), CX + ADDQ CX, 48(SP) + + // Calculate poiter to s.out[cap(s.out)] (a past-end pointer) + ADDQ R9, 32(SP) + + // outBase += outPosition + ADDQ R11, R9 + +sequenceDecs_decodeSync_safe_bmi2_main_loop: + MOVQ (SP), R12 + + // Fill bitreader to have enough for the offset and match length. + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_end + +sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte + +sequenceDecs_decodeSync_safe_bmi2_fill_end: + // Update offset + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ R8, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 8(SP) + + // Update match length + MOVQ $0x00000808, CX + BEXTRQ CX, DI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ DI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 16(SP) + + // Fill bitreader to have enough for the remaining + CMPQ BX, $0x08 + JL sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte + MOVQ DX, CX + SHRQ $0x03, CX + SUBQ CX, R12 + MOVQ (R12), AX + SUBQ CX, BX + ANDQ $0x07, DX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_end + +sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte: + CMPQ BX, $0x00 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end + CMPQ DX, $0x07 + JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end + SHLQ $0x08, AX + SUBQ $0x01, R12 + SUBQ $0x01, BX + SUBQ $0x08, DX + MOVBQZX (R12), CX + ORQ CX, AX + JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte + +sequenceDecs_decodeSync_safe_bmi2_fill_2_end: + // Update literal length + MOVQ $0x00000808, CX + BEXTRQ CX, SI, R13 + MOVQ AX, R14 + LEAQ (DX)(R13*1), CX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + MOVQ CX, DX + MOVQ SI, CX + SHRQ $0x20, CX + ADDQ R14, CX + MOVQ CX, 24(SP) + + // Fill bitreader for state updates + MOVQ R12, (SP) + MOVQ $0x00000808, CX + BEXTRQ CX, R8, R12 + MOVQ ctx+16(FP), CX + CMPQ 96(CX), $0x00 + JZ sequenceDecs_decodeSync_safe_bmi2_skip_update + + // Update Literal Length State + MOVBQZX SI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, SI, SI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, SI + + // Load ctx.llTable + MOVQ ctx+16(FP), CX + MOVQ (CX), CX + MOVQ (CX)(SI*8), SI + + // Update Match Length State + MOVBQZX DI, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, DI, DI + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, DI + + // Load ctx.mlTable + MOVQ ctx+16(FP), CX + MOVQ 24(CX), CX + MOVQ (CX)(DI*8), DI + + // Update Offset State + MOVBQZX R8, R13 + MOVQ $0x00001010, CX + BEXTRQ CX, R8, R8 + LEAQ (DX)(R13*1), CX + MOVQ AX, R14 + MOVQ CX, DX + ROLQ CL, R14 + BZHIQ R13, R14, R14 + ADDQ R14, R8 + + // Load ctx.ofTable + MOVQ ctx+16(FP), CX + MOVQ 48(CX), CX + MOVQ (CX)(R8*8), R8 + +sequenceDecs_decodeSync_safe_bmi2_skip_update: + // Adjust offset + MOVQ s+0(FP), CX + MOVQ 8(SP), R13 + CMPQ R12, $0x01 + JBE sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0 + MOVUPS 144(CX), X0 + MOVQ R13, 144(CX) + MOVUPS X0, 152(CX) + JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end + +sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0: + CMPQ 24(SP), $0x00000000 + JNE sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero + INCQ R13 + JMP sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero + +sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero: + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero + MOVQ 144(CX), R13 + JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end + +sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero: + MOVQ R13, R12 + XORQ R14, R14 + MOVQ $-1, R15 + CMPQ R13, $0x03 + CMOVQEQ R14, R12 + CMOVQEQ R15, R14 + LEAQ 144(CX), R15 + ADDQ (R15)(R12*8), R14 + JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid + MOVQ $0x00000001, R14 + +sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid: + CMPQ R13, $0x01 + JZ sequenceDecs_decodeSync_safe_bmi2_adjust_skip + MOVQ 152(CX), R12 + MOVQ R12, 160(CX) + +sequenceDecs_decodeSync_safe_bmi2_adjust_skip: + MOVQ 144(CX), R12 + MOVQ R12, 152(CX) + MOVQ R14, 144(CX) + MOVQ R14, R13 + +sequenceDecs_decodeSync_safe_bmi2_adjust_end: + MOVQ R13, 8(SP) + + // Check values + MOVQ 16(SP), CX + MOVQ 24(SP), R12 + LEAQ (CX)(R12*1), R14 + MOVQ s+0(FP), R15 + ADDQ R14, 256(R15) + MOVQ ctx+16(FP), R14 + SUBQ R12, 104(R14) + JS error_not_enough_literals + CMPQ CX, $0x00020002 + JA sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big + TESTQ R13, R13 + JNZ sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok + TESTQ CX, CX + JNZ sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch + +sequenceDecs_decodeSync_safe_bmi2_match_len_ofs_ok: + MOVQ 24(SP), CX + MOVQ 8(SP), R12 + MOVQ 16(SP), R13 + + // Check if we have enough space in s.out + LEAQ (CX)(R13*1), R14 + ADDQ R9, R14 + CMPQ R14, 32(SP) + JA error_not_enough_space + + // Copy literals + TESTQ CX, CX + JZ check_offset + XORQ R14, R14 + TESTQ $0x00000001, CX + JZ copy_1_word + MOVB (R10)(R14*1), R15 + MOVB R15, (R9)(R14*1) + ADDQ $0x01, R14 + +copy_1_word: + TESTQ $0x00000002, CX + JZ copy_1_dword + MOVW (R10)(R14*1), R15 + MOVW R15, (R9)(R14*1) + ADDQ $0x02, R14 + +copy_1_dword: + TESTQ $0x00000004, CX + JZ copy_1_qword + MOVL (R10)(R14*1), R15 + MOVL R15, (R9)(R14*1) + ADDQ $0x04, R14 + +copy_1_qword: + TESTQ $0x00000008, CX + JZ copy_1_test + MOVQ (R10)(R14*1), R15 + MOVQ R15, (R9)(R14*1) + ADDQ $0x08, R14 + JMP copy_1_test + +copy_1: + MOVUPS (R10)(R14*1), X0 + MOVUPS X0, (R9)(R14*1) + ADDQ $0x10, R14 + +copy_1_test: + CMPQ R14, CX + JB copy_1 + ADDQ CX, R10 + ADDQ CX, R9 + ADDQ CX, R11 + + // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize) +check_offset: + MOVQ R11, CX + ADDQ 40(SP), CX + CMPQ R12, CX + JG error_match_off_too_big + CMPQ R12, 56(SP) + JG error_match_off_too_big + + // Copy match from history + MOVQ R12, CX + SUBQ R11, CX + JLS copy_match + MOVQ 48(SP), R14 + SUBQ CX, R14 + CMPQ R13, CX + JGE copy_all_from_history + XORQ CX, CX + TESTQ $0x00000001, R13 + JZ copy_4_word + MOVB (R14)(CX*1), R12 + MOVB R12, (R9)(CX*1) + ADDQ $0x01, CX + +copy_4_word: + TESTQ $0x00000002, R13 + JZ copy_4_dword + MOVW (R14)(CX*1), R12 + MOVW R12, (R9)(CX*1) + ADDQ $0x02, CX + +copy_4_dword: + TESTQ $0x00000004, R13 + JZ copy_4_qword + MOVL (R14)(CX*1), R12 + MOVL R12, (R9)(CX*1) + ADDQ $0x04, CX + +copy_4_qword: + TESTQ $0x00000008, R13 + JZ copy_4_test + MOVQ (R14)(CX*1), R12 + MOVQ R12, (R9)(CX*1) + ADDQ $0x08, CX + JMP copy_4_test + +copy_4: + MOVUPS (R14)(CX*1), X0 + MOVUPS X0, (R9)(CX*1) + ADDQ $0x10, CX + +copy_4_test: + CMPQ CX, R13 + JB copy_4 + ADDQ R13, R11 + ADDQ R13, R9 + JMP handle_loop + JMP loop_finished + +copy_all_from_history: + XORQ R15, R15 + TESTQ $0x00000001, CX + JZ copy_5_word + MOVB (R14)(R15*1), BP + MOVB BP, (R9)(R15*1) + ADDQ $0x01, R15 + +copy_5_word: + TESTQ $0x00000002, CX + JZ copy_5_dword + MOVW (R14)(R15*1), BP + MOVW BP, (R9)(R15*1) + ADDQ $0x02, R15 + +copy_5_dword: + TESTQ $0x00000004, CX + JZ copy_5_qword + MOVL (R14)(R15*1), BP + MOVL BP, (R9)(R15*1) + ADDQ $0x04, R15 + +copy_5_qword: + TESTQ $0x00000008, CX + JZ copy_5_test + MOVQ (R14)(R15*1), BP + MOVQ BP, (R9)(R15*1) + ADDQ $0x08, R15 + JMP copy_5_test + +copy_5: + MOVUPS (R14)(R15*1), X0 + MOVUPS X0, (R9)(R15*1) + ADDQ $0x10, R15 + +copy_5_test: + CMPQ R15, CX + JB copy_5 + ADDQ CX, R9 + ADDQ CX, R11 + SUBQ CX, R13 + + // Copy match from the current buffer +copy_match: + TESTQ R13, R13 + JZ handle_loop + MOVQ R9, CX + SUBQ R12, CX + + // ml <= mo + CMPQ R13, R12 + JA copy_overlapping_match + + // Copy non-overlapping match + XORQ R12, R12 + TESTQ $0x00000001, R13 + JZ copy_2_word + MOVB (CX)(R12*1), R14 + MOVB R14, (R9)(R12*1) + ADDQ $0x01, R12 + +copy_2_word: + TESTQ $0x00000002, R13 + JZ copy_2_dword + MOVW (CX)(R12*1), R14 + MOVW R14, (R9)(R12*1) + ADDQ $0x02, R12 + +copy_2_dword: + TESTQ $0x00000004, R13 + JZ copy_2_qword + MOVL (CX)(R12*1), R14 + MOVL R14, (R9)(R12*1) + ADDQ $0x04, R12 + +copy_2_qword: + TESTQ $0x00000008, R13 + JZ copy_2_test + MOVQ (CX)(R12*1), R14 + MOVQ R14, (R9)(R12*1) + ADDQ $0x08, R12 + JMP copy_2_test + +copy_2: + MOVUPS (CX)(R12*1), X0 + MOVUPS X0, (R9)(R12*1) + ADDQ $0x10, R12 + +copy_2_test: + CMPQ R12, R13 + JB copy_2 + ADDQ R13, R9 + ADDQ R13, R11 + JMP handle_loop + + // Copy overlapping match +copy_overlapping_match: + XORQ R12, R12 + +copy_slow_3: + MOVB (CX)(R12*1), R14 + MOVB R14, (R9)(R12*1) + INCQ R12 + CMPQ R12, R13 + JB copy_slow_3 + ADDQ R13, R9 + ADDQ R13, R11 + +handle_loop: + MOVQ ctx+16(FP), CX + DECQ 96(CX) + JNS sequenceDecs_decodeSync_safe_bmi2_main_loop + +loop_finished: + MOVQ br+8(FP), CX + MOVQ AX, 32(CX) + MOVB DL, 40(CX) + MOVQ BX, 24(CX) + + // Update the context + MOVQ ctx+16(FP), AX + MOVQ R11, 136(AX) + MOVQ 144(AX), CX + SUBQ CX, R10 + MOVQ R10, 168(AX) + + // Return success + MOVQ $0x00000000, ret+24(FP) + RET + + // Return with match length error +sequenceDecs_decodeSync_safe_bmi2_error_match_len_ofs_mismatch: + MOVQ 16(SP), AX + MOVQ ctx+16(FP), CX + MOVQ AX, 216(CX) + MOVQ $0x00000001, ret+24(FP) + RET + + // Return with match too long error +sequenceDecs_decodeSync_safe_bmi2_error_match_len_too_big: + MOVQ ctx+16(FP), AX + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ $0x00000002, ret+24(FP) + RET + + // Return with match offset too long error +error_match_off_too_big: + MOVQ ctx+16(FP), AX + MOVQ 8(SP), CX + MOVQ CX, 224(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000003, ret+24(FP) + RET + + // Return with not enough literals error +error_not_enough_literals: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ $0x00000004, ret+24(FP) + RET + + // Return with not enough output space error +error_not_enough_space: + MOVQ ctx+16(FP), AX + MOVQ 24(SP), CX + MOVQ CX, 208(AX) + MOVQ 16(SP), CX + MOVQ CX, 216(AX) + MOVQ R11, 136(AX) + MOVQ $0x00000005, ret+24(FP) + RET diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go new file mode 100644 index 000000000..c3452bc3a --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_generic.go @@ -0,0 +1,237 @@ +//go:build !amd64 || appengine || !gc || noasm +// +build !amd64 appengine !gc noasm + +package zstd + +import ( + "fmt" + "io" +) + +// decode sequences from the stream with the provided history but without dictionary. +func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) { + return false, nil +} + +// decode sequences from the stream without the provided history. +func (s *sequenceDecs) decode(seqs []seqVals) error { + br := s.br + + // Grab full sizes tables, to avoid bounds checks. + llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize] + llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state + s.seqSize = 0 + litRemain := len(s.literals) + + maxBlockSize := maxCompressedBlockSize + if s.windowSize < maxBlockSize { + maxBlockSize = s.windowSize + } + for i := range seqs { + var ll, mo, ml int + if br.off > 4+((maxOffsetBits+16+16)>>3) { + // inlined function: + // ll, mo, ml = s.nextFast(br, llState, mlState, ofState) + + // Final will not read from stream. + var llB, mlB, moB uint8 + ll, llB = llState.final() + ml, mlB = mlState.final() + mo, moB = ofState.final() + + // extra bits are stored in reverse order. + br.fillFast() + mo += br.getBits(moB) + if s.maxBits > 32 { + br.fillFast() + } + ml += br.getBits(mlB) + ll += br.getBits(llB) + + if moB > 1 { + s.prevOffset[2] = s.prevOffset[1] + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = mo + } else { + // mo = s.adjustOffset(mo, ll, moB) + // Inlined for rather big speedup + if ll == 0 { + // There is an exception though, when current sequence's literals_length = 0. + // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, + // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. + mo++ + } + + if mo == 0 { + mo = s.prevOffset[0] + } else { + var temp int + if mo == 3 { + temp = s.prevOffset[0] - 1 + } else { + temp = s.prevOffset[mo] + } + + if temp == 0 { + // 0 is not valid; input is corrupted; force offset to 1 + println("WARNING: temp was 0") + temp = 1 + } + + if mo != 1 { + s.prevOffset[2] = s.prevOffset[1] + } + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = temp + mo = temp + } + } + br.fillFast() + } else { + if br.overread() { + if debugDecoder { + printf("reading sequence %d, exceeded available data\n", i) + } + return io.ErrUnexpectedEOF + } + ll, mo, ml = s.next(br, llState, mlState, ofState) + br.fill() + } + + if debugSequences { + println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml) + } + // Evaluate. + // We might be doing this async, so do it early. + if mo == 0 && ml > 0 { + return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml) + } + if ml > maxMatchLen { + return fmt.Errorf("match len (%d) bigger than max allowed length", ml) + } + s.seqSize += ll + ml + if s.seqSize > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + litRemain -= ll + if litRemain < 0 { + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll) + } + seqs[i] = seqVals{ + ll: ll, + ml: ml, + mo: mo, + } + if i == len(seqs)-1 { + // This is the last sequence, so we shouldn't update state. + break + } + + // Manually inlined, ~ 5-20% faster + // Update all 3 states at once. Approx 20% faster. + nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits() + if nBits == 0 { + llState = llTable[llState.newState()&maxTableMask] + mlState = mlTable[mlState.newState()&maxTableMask] + ofState = ofTable[ofState.newState()&maxTableMask] + } else { + bits := br.get32BitsFast(nBits) + lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) + llState = llTable[(llState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits >> (ofState.nbBits() & 31)) + lowBits &= bitMask[mlState.nbBits()&15] + mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits) & bitMask[ofState.nbBits()&15] + ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask] + } + } + s.seqSize += litRemain + if s.seqSize > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize) + } + err := br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + } + return err +} + +// executeSimple handles cases when a dictionary is not used. +func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error { + // Ensure we have enough output size... + if len(s.out)+s.seqSize > cap(s.out) { + addBytes := s.seqSize + len(s.out) + s.out = append(s.out, make([]byte, addBytes)...) + s.out = s.out[:len(s.out)-addBytes] + } + + if debugDecoder { + printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize) + } + + var t = len(s.out) + out := s.out[:t+s.seqSize] + + for _, seq := range seqs { + // Add literals + copy(out[t:], s.literals[:seq.ll]) + t += seq.ll + s.literals = s.literals[seq.ll:] + + // Malformed input + if seq.mo > t+len(hist) || seq.mo > s.windowSize { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist)) + } + + // Copy from history. + if v := seq.mo - t; v > 0 { + // v is the start position in history from end. + start := len(hist) - v + if seq.ml > v { + // Some goes into the current block. + // Copy remainder of history + copy(out[t:], hist[start:]) + t += v + seq.ml -= v + } else { + copy(out[t:], hist[start:start+seq.ml]) + t += seq.ml + continue + } + } + + // We must be in the current buffer now + if seq.ml > 0 { + start := t - seq.mo + if seq.ml <= t-start { + // No overlap + copy(out[t:], out[start:start+seq.ml]) + t += seq.ml + } else { + // Overlapping copy + // Extend destination slice and copy one byte at the time. + src := out[start : start+seq.ml] + dst := out[t:] + dst = dst[:len(src)] + t += len(src) + // Destination is the space we just added. + for i := range src { + dst[i] = src[i] + } + } + } + } + // Add final literals + copy(out[t:], s.literals) + if debugDecoder { + t += len(s.literals) + if t != len(out) { + panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize)) + } + } + s.out = out + + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go index ffffcbc25..b53f606a1 100644 --- a/vendor/github.com/klauspost/compress/zstd/zip.go +++ b/vendor/github.com/klauspost/compress/zstd/zip.go @@ -21,23 +21,34 @@ const ZipMethodPKWare = 20 var zipReaderPool sync.Pool // newZipReader creates a pooled zip decompressor. -func newZipReader(r io.Reader) io.ReadCloser { - dec, ok := zipReaderPool.Get().(*Decoder) - if ok { - dec.Reset(r) - } else { - d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true)) - if err != nil { - panic(err) +func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser { + pool := &zipReaderPool + if len(opts) > 0 { + opts = append([]DOption{WithDecoderLowmem(true), WithDecoderMaxWindow(128 << 20)}, opts...) + // Force concurrency 1 + opts = append(opts, WithDecoderConcurrency(1)) + // Create our own pool + pool = &sync.Pool{} + } + return func(r io.Reader) io.ReadCloser { + dec, ok := pool.Get().(*Decoder) + if ok { + dec.Reset(r) + } else { + d, err := NewReader(r, opts...) + if err != nil { + panic(err) + } + dec = d } - dec = d + return &pooledZipReader{dec: dec, pool: pool} } - return &pooledZipReader{dec: dec} } type pooledZipReader struct { - mu sync.Mutex // guards Close and Read - dec *Decoder + mu sync.Mutex // guards Close and Read + pool *sync.Pool + dec *Decoder } func (r *pooledZipReader) Read(p []byte) (n int, err error) { @@ -48,8 +59,8 @@ func (r *pooledZipReader) Read(p []byte) (n int, err error) { } dec, err := r.dec.Read(p) if err == io.EOF { - err = r.dec.Reset(nil) - zipReaderPool.Put(r.dec) + r.dec.Reset(nil) + r.pool.Put(r.dec) r.dec = nil } return dec, err @@ -61,7 +72,7 @@ func (r *pooledZipReader) Close() error { var err error if r.dec != nil { err = r.dec.Reset(nil) - zipReaderPool.Put(r.dec) + r.pool.Put(r.dec) r.dec = nil } return err @@ -115,6 +126,9 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) { // ZipDecompressor returns a decompressor that can be registered with zip libraries. // See ZipCompressor for example. -func ZipDecompressor() func(r io.Reader) io.ReadCloser { - return newZipReader +// Options can be specified. WithDecoderConcurrency(1) is forced, +// and by default a 128MB maximum decompression window is specified. +// The window size can be overridden if required. +func ZipDecompressor(opts ...DOption) func(r io.Reader) io.ReadCloser { + return newZipReader(opts...) } |