From 5aef11026a850bb99d8394dba17810bf05d727bc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Apr 2021 07:58:32 +0000 Subject: Bump github.com/containers/storage from 1.29.0 to 1.30.0 Bumps [github.com/containers/storage](https://github.com/containers/storage) from 1.29.0 to 1.30.0. - [Release notes](https://github.com/containers/storage/releases) - [Changelog](https://github.com/containers/storage/blob/master/docs/containers-storage-changes.md) - [Commits](https://github.com/containers/storage/compare/v1.29.0...v1.30.0) Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 9 +- vendor/github.com/containers/storage/VERSION | 2 +- .../containers/storage/drivers/btrfs/btrfs.go | 44 +- vendor/github.com/containers/storage/go.mod | 2 +- vendor/github.com/containers/storage/go.sum | 6 +- .../containers/storage/pkg/unshare/unshare.go | 22 + vendor/github.com/golang/snappy/.gitignore | 16 + vendor/github.com/golang/snappy/AUTHORS | 17 + vendor/github.com/golang/snappy/CONTRIBUTORS | 39 ++ vendor/github.com/golang/snappy/LICENSE | 27 + vendor/github.com/golang/snappy/README | 107 +++ vendor/github.com/golang/snappy/decode.go | 241 +++++++ vendor/github.com/golang/snappy/decode_amd64.s | 490 ++++++++++++++ vendor/github.com/golang/snappy/decode_arm64.s | 494 ++++++++++++++ vendor/github.com/golang/snappy/decode_asm.go | 15 + vendor/github.com/golang/snappy/decode_other.go | 115 ++++ vendor/github.com/golang/snappy/encode.go | 289 ++++++++ vendor/github.com/golang/snappy/encode_amd64.s | 730 +++++++++++++++++++++ vendor/github.com/golang/snappy/encode_arm64.s | 722 ++++++++++++++++++++ vendor/github.com/golang/snappy/encode_asm.go | 30 + vendor/github.com/golang/snappy/encode_other.go | 238 +++++++ vendor/github.com/golang/snappy/go.mod | 1 + vendor/github.com/golang/snappy/snappy.go | 98 +++ .../github.com/klauspost/compress/flate/deflate.go | 3 +- .../github.com/klauspost/compress/fse/compress.go | 1 - .../klauspost/compress/huff0/compress.go | 1 - .../klauspost/compress/snappy/.gitignore | 16 - .../github.com/klauspost/compress/snappy/AUTHORS | 15 - .../klauspost/compress/snappy/CONTRIBUTORS | 37 -- .../github.com/klauspost/compress/snappy/LICENSE | 27 - vendor/github.com/klauspost/compress/snappy/README | 107 --- .../github.com/klauspost/compress/snappy/decode.go | 237 ------- .../klauspost/compress/snappy/decode_amd64.go | 14 - .../klauspost/compress/snappy/decode_amd64.s | 482 -------------- .../klauspost/compress/snappy/decode_other.go | 115 ---- .../github.com/klauspost/compress/snappy/encode.go | 285 -------- .../klauspost/compress/snappy/encode_amd64.go | 29 - .../klauspost/compress/snappy/encode_amd64.s | 730 --------------------- .../klauspost/compress/snappy/encode_other.go | 238 ------- .../klauspost/compress/snappy/runbench.cmd | 2 - .../github.com/klauspost/compress/snappy/snappy.go | 98 --- .../github.com/klauspost/compress/zstd/blockenc.go | 12 +- .../klauspost/compress/zstd/decodeheader.go | 4 +- .../github.com/klauspost/compress/zstd/decoder.go | 26 +- .../klauspost/compress/zstd/decoder_options.go | 5 +- .../github.com/klauspost/compress/zstd/enc_base.go | 13 +- .../github.com/klauspost/compress/zstd/encoder.go | 2 +- .../klauspost/compress/zstd/encoder_options.go | 2 +- .../github.com/klauspost/compress/zstd/framedec.go | 2 +- .../klauspost/compress/zstd/fse_encoder.go | 1 - .../klauspost/compress/zstd/fse_predefined.go | 2 +- .../github.com/klauspost/compress/zstd/seqenc.go | 1 - .../github.com/klauspost/compress/zstd/snappy.go | 3 +- vendor/github.com/klauspost/compress/zstd/zstd.go | 18 +- vendor/modules.txt | 7 +- 56 files changed, 3753 insertions(+), 2538 deletions(-) create mode 100644 vendor/github.com/golang/snappy/.gitignore create mode 100644 vendor/github.com/golang/snappy/AUTHORS create mode 100644 vendor/github.com/golang/snappy/CONTRIBUTORS create mode 100644 vendor/github.com/golang/snappy/LICENSE create mode 100644 vendor/github.com/golang/snappy/README create mode 100644 vendor/github.com/golang/snappy/decode.go create mode 100644 vendor/github.com/golang/snappy/decode_amd64.s create mode 100644 vendor/github.com/golang/snappy/decode_arm64.s create mode 100644 vendor/github.com/golang/snappy/decode_asm.go create mode 100644 vendor/github.com/golang/snappy/decode_other.go create mode 100644 vendor/github.com/golang/snappy/encode.go create mode 100644 vendor/github.com/golang/snappy/encode_amd64.s create mode 100644 vendor/github.com/golang/snappy/encode_arm64.s create mode 100644 vendor/github.com/golang/snappy/encode_asm.go create mode 100644 vendor/github.com/golang/snappy/encode_other.go create mode 100644 vendor/github.com/golang/snappy/go.mod create mode 100644 vendor/github.com/golang/snappy/snappy.go delete mode 100644 vendor/github.com/klauspost/compress/snappy/.gitignore delete mode 100644 vendor/github.com/klauspost/compress/snappy/AUTHORS delete mode 100644 vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS delete mode 100644 vendor/github.com/klauspost/compress/snappy/LICENSE delete mode 100644 vendor/github.com/klauspost/compress/snappy/README delete mode 100644 vendor/github.com/klauspost/compress/snappy/decode.go delete mode 100644 vendor/github.com/klauspost/compress/snappy/decode_amd64.go delete mode 100644 vendor/github.com/klauspost/compress/snappy/decode_amd64.s delete mode 100644 vendor/github.com/klauspost/compress/snappy/decode_other.go delete mode 100644 vendor/github.com/klauspost/compress/snappy/encode.go delete mode 100644 vendor/github.com/klauspost/compress/snappy/encode_amd64.go delete mode 100644 vendor/github.com/klauspost/compress/snappy/encode_amd64.s delete mode 100644 vendor/github.com/klauspost/compress/snappy/encode_other.go delete mode 100644 vendor/github.com/klauspost/compress/snappy/runbench.cmd delete mode 100644 vendor/github.com/klauspost/compress/snappy/snappy.go diff --git a/go.mod b/go.mod index a225cb89e..d36d6ab48 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,7 @@ require ( github.com/containers/image/v5 v5.11.0 github.com/containers/ocicrypt v1.1.1 github.com/containers/psgo v1.5.2 - github.com/containers/storage v1.29.0 + github.com/containers/storage v1.30.0 github.com/coreos/go-systemd/v22 v22.3.1 github.com/coreos/stream-metadata-go v0.0.0-20210225230131-70edb9eb47b3 github.com/cri-o/ocicni v0.2.1-0.20210301205850-541cf7c703cf diff --git a/go.sum b/go.sum index 87fc57360..e0acd7cb6 100644 --- a/go.sum +++ b/go.sum @@ -213,8 +213,8 @@ github.com/containers/storage v1.23.5/go.mod h1:ha26Q6ngehFNhf3AWoXldvAvwI4jFe3E github.com/containers/storage v1.24.8/go.mod h1:YC+2pY8SkfEAcZkwycxYbpK8EiRbx5soPPwz9dxe4IQ= github.com/containers/storage v1.28.0/go.mod h1:ixAwO7Bj31cigqPEG7aCz+PYmxkDxbIFdUFioYdxbzI= github.com/containers/storage v1.28.1/go.mod h1:5bwiMh2LkrN3AWIfDFMH7A/xbVNLcve+oeXYvHvW8cc= -github.com/containers/storage v1.29.0 h1:l3Vh6+IiMKLGfQZ3rDkF84m+KF1Qb0XEcilWC+pYo2o= -github.com/containers/storage v1.29.0/go.mod h1:u84RU4CCufGeJBNTRNwMB+FoE+AiFeFw4SsMoqAOeCM= +github.com/containers/storage v1.30.0 h1:KS6zmoPyy0Qcx1HCCiseQ0ysSckRvtiuoVpIGh9iwQA= +github.com/containers/storage v1.30.0/go.mod h1:M/xn0pg6ReYFrLtWl5YELI/a4Xjq+Z3e5GJxQrJCcDI= github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-iptables v0.4.5/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU= @@ -379,6 +379,8 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3 h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -497,8 +499,9 @@ github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYs github.com/klauspost/compress v1.11.5/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.12/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.11.13 h1:eSvu8Tmq6j2psUJqJrLcWH6K3w5Dwc+qipbaA6eVEN4= github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.12.1 h1:/+xsCsk06wE38cyiqOR/o7U2fSftcH72xD+BQXmja/g= +github.com/klauspost/compress v1.12.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= diff --git a/vendor/github.com/containers/storage/VERSION b/vendor/github.com/containers/storage/VERSION index 5e57fb895..034552a83 100644 --- a/vendor/github.com/containers/storage/VERSION +++ b/vendor/github.com/containers/storage/VERSION @@ -1 +1 @@ -1.29.0 +1.30.0 diff --git a/vendor/github.com/containers/storage/drivers/btrfs/btrfs.go b/vendor/github.com/containers/storage/drivers/btrfs/btrfs.go index f0d8b548b..3903b1ddd 100644 --- a/vendor/github.com/containers/storage/drivers/btrfs/btrfs.go +++ b/vendor/github.com/containers/storage/drivers/btrfs/btrfs.go @@ -88,7 +88,7 @@ func Init(home string, options graphdriver.Options) (graphdriver.Driver, error) } if userDiskQuota { - if err := driver.subvolEnableQuota(); err != nil { + if err := driver.enableQuota(); err != nil { return nil, err } } @@ -159,10 +159,6 @@ func (d *Driver) Metadata(id string) (map[string]string, error) { // Cleanup unmounts the home directory. func (d *Driver) Cleanup() error { - if err := d.subvolDisableQuota(); err != nil { - return err - } - return mount.Unmount(d.home) } @@ -320,7 +316,7 @@ func (d *Driver) updateQuotaStatus() { d.once.Do(func() { if !d.quotaEnabled { // In case quotaEnabled is not set, check qgroup and update quotaEnabled as needed - if err := subvolQgroupStatus(d.home); err != nil { + if err := qgroupStatus(d.home); err != nil { // quota is still not enabled return } @@ -329,7 +325,7 @@ func (d *Driver) updateQuotaStatus() { }) } -func (d *Driver) subvolEnableQuota() error { +func (d *Driver) enableQuota() error { d.updateQuotaStatus() if d.quotaEnabled { @@ -355,32 +351,6 @@ func (d *Driver) subvolEnableQuota() error { return nil } -func (d *Driver) subvolDisableQuota() error { - d.updateQuotaStatus() - - if !d.quotaEnabled { - return nil - } - - dir, err := openDir(d.home) - if err != nil { - return err - } - defer closeDir(dir) - - var args C.struct_btrfs_ioctl_quota_ctl_args - args.cmd = C.BTRFS_QUOTA_CTL_DISABLE - _, _, errno := unix.Syscall(unix.SYS_IOCTL, getDirFd(dir), C.BTRFS_IOC_QUOTA_CTL, - uintptr(unsafe.Pointer(&args))) - if errno != 0 { - return fmt.Errorf("Failed to disable btrfs quota for %s: %v", dir, errno.Error()) - } - - d.quotaEnabled = false - - return nil -} - func (d *Driver) subvolRescanQuota() error { d.updateQuotaStatus() @@ -423,11 +393,11 @@ func subvolLimitQgroup(path string, size uint64) error { return nil } -// subvolQgroupStatus performs a BTRFS_IOC_TREE_SEARCH on the root path +// qgroupStatus performs a BTRFS_IOC_TREE_SEARCH on the root path // with search key of BTRFS_QGROUP_STATUS_KEY. // In case qgroup is enabled, the returned key type will match BTRFS_QGROUP_STATUS_KEY. // For more details please see https://github.com/kdave/btrfs-progs/blob/v4.9/qgroup.c#L1035 -func subvolQgroupStatus(path string) error { +func qgroupStatus(path string) error { dir, err := openDir(path) if err != nil { return err @@ -603,7 +573,7 @@ func (d *Driver) setStorageSize(dir string, driver *Driver) error { return fmt.Errorf("btrfs: storage size cannot be less than %s", units.HumanSize(float64(d.options.minSpace))) } - if err := d.subvolEnableQuota(); err != nil { + if err := d.enableQuota(); err != nil { return err } @@ -674,7 +644,7 @@ func (d *Driver) Get(id string, options graphdriver.MountOpts) (string, error) { if quota, err := ioutil.ReadFile(d.quotasDirID(id)); err == nil { if size, err := strconv.ParseUint(string(quota), 10, 64); err == nil && size >= d.options.minSpace { - if err := d.subvolEnableQuota(); err != nil { + if err := d.enableQuota(); err != nil { return "", err } if err := subvolLimitQgroup(dir, size); err != nil { diff --git a/vendor/github.com/containers/storage/go.mod b/vendor/github.com/containers/storage/go.mod index 3a455653a..6a34d06c6 100644 --- a/vendor/github.com/containers/storage/go.mod +++ b/vendor/github.com/containers/storage/go.mod @@ -9,7 +9,7 @@ require ( github.com/docker/go-units v0.4.0 github.com/google/go-intervals v0.0.2 github.com/hashicorp/go-multierror v1.1.1 - github.com/klauspost/compress v1.11.13 + github.com/klauspost/compress v1.12.1 github.com/klauspost/pgzip v1.2.5 github.com/mattn/go-shellwords v1.0.11 github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible diff --git a/vendor/github.com/containers/storage/go.sum b/vendor/github.com/containers/storage/go.sum index 1de8a9825..28edd4a7e 100644 --- a/vendor/github.com/containers/storage/go.sum +++ b/vendor/github.com/containers/storage/go.sum @@ -267,6 +267,8 @@ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvq github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -337,8 +339,8 @@ github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQL github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.11.13 h1:eSvu8Tmq6j2psUJqJrLcWH6K3w5Dwc+qipbaA6eVEN4= -github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.12.1 h1:/+xsCsk06wE38cyiqOR/o7U2fSftcH72xD+BQXmja/g= +github.com/klauspost/compress v1.12.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= diff --git a/vendor/github.com/containers/storage/pkg/unshare/unshare.go b/vendor/github.com/containers/storage/pkg/unshare/unshare.go index a9210b0bf..53cfeb0ec 100644 --- a/vendor/github.com/containers/storage/pkg/unshare/unshare.go +++ b/vendor/github.com/containers/storage/pkg/unshare/unshare.go @@ -7,12 +7,17 @@ import ( "sync" "github.com/pkg/errors" + "github.com/syndtr/gocapability/capability" ) var ( homeDirOnce sync.Once homeDirErr error homeDir string + + hasCapSysAdminOnce sync.Once + hasCapSysAdminRet bool + hasCapSysAdminErr error ) // HomeDir returns the home directory for the current user. @@ -32,3 +37,20 @@ func HomeDir() (string, error) { }) return homeDir, homeDirErr } + +// HasCapSysAdmin returns whether the current process has CAP_SYS_ADMIN. +func HasCapSysAdmin() (bool, error) { + hasCapSysAdminOnce.Do(func() { + currentCaps, err := capability.NewPid2(0) + if err != nil { + hasCapSysAdminErr = err + return + } + if err = currentCaps.Load(); err != nil { + hasCapSysAdminErr = err + return + } + hasCapSysAdminRet = currentCaps.Get(capability.EFFECTIVE, capability.CAP_SYS_ADMIN) + }) + return hasCapSysAdminRet, hasCapSysAdminErr +} diff --git a/vendor/github.com/golang/snappy/.gitignore b/vendor/github.com/golang/snappy/.gitignore new file mode 100644 index 000000000..042091d9b --- /dev/null +++ b/vendor/github.com/golang/snappy/.gitignore @@ -0,0 +1,16 @@ +cmd/snappytool/snappytool +testdata/bench + +# These explicitly listed benchmark data files are for an obsolete version of +# snappy_test.go. +testdata/alice29.txt +testdata/asyoulik.txt +testdata/fireworks.jpeg +testdata/geo.protodata +testdata/html +testdata/html_x_4 +testdata/kppkn.gtb +testdata/lcet10.txt +testdata/paper-100k.pdf +testdata/plrabn12.txt +testdata/urls.10K diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS new file mode 100644 index 000000000..203e84eba --- /dev/null +++ b/vendor/github.com/golang/snappy/AUTHORS @@ -0,0 +1,17 @@ +# This is the official list of Snappy-Go authors for copyright purposes. +# This file is distinct from the CONTRIBUTORS files. +# See the latter for an explanation. + +# Names should be added to this file as +# Name or Organization +# The email address is not required for organizations. + +# Please keep the list sorted. + +Amazon.com, Inc +Damian Gryski +Google Inc. +Jan Mercl <0xjnml@gmail.com> +Klaus Post +Rodolfo Carvalho +Sebastien Binet diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS new file mode 100644 index 000000000..d9914732b --- /dev/null +++ b/vendor/github.com/golang/snappy/CONTRIBUTORS @@ -0,0 +1,39 @@ +# This is the official list of people who can contribute +# (and typically have contributed) code to the Snappy-Go repository. +# The AUTHORS file lists the copyright holders; this file +# lists people. For example, Google employees are listed here +# but not in AUTHORS, because Google holds the copyright. +# +# The submission process automatically checks to make sure +# that people submitting code are listed in this file (by email address). +# +# Names should be added to this file only after verifying that +# the individual or the individual's organization has agreed to +# the appropriate Contributor License Agreement, found here: +# +# http://code.google.com/legal/individual-cla-v1.0.html +# http://code.google.com/legal/corporate-cla-v1.0.html +# +# The agreement for individuals can be filled out on the web. +# +# When adding J Random Contributor's name to this file, +# either J's name or J's organization's name should be +# added to the AUTHORS file, depending on whether the +# individual or corporate CLA was used. + +# Names should be added to this file like so: +# Name + +# Please keep the list sorted. + +Damian Gryski +Jan Mercl <0xjnml@gmail.com> +Jonathan Swinney +Kai Backman +Klaus Post +Marc-Antoine Ruel +Nigel Tao +Rob Pike +Rodolfo Carvalho +Russ Cox +Sebastien Binet diff --git a/vendor/github.com/golang/snappy/LICENSE b/vendor/github.com/golang/snappy/LICENSE new file mode 100644 index 000000000..6050c10f4 --- /dev/null +++ b/vendor/github.com/golang/snappy/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/golang/snappy/README b/vendor/github.com/golang/snappy/README new file mode 100644 index 000000000..cea12879a --- /dev/null +++ b/vendor/github.com/golang/snappy/README @@ -0,0 +1,107 @@ +The Snappy compression format in the Go programming language. + +To download and install from source: +$ go get github.com/golang/snappy + +Unless otherwise noted, the Snappy-Go source files are distributed +under the BSD-style license found in the LICENSE file. + + + +Benchmarks. + +The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten +or so files, the same set used by the C++ Snappy code (github.com/google/snappy +and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @ +3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29: + +"go test -test.bench=." + +_UFlat0-8 2.19GB/s ± 0% html +_UFlat1-8 1.41GB/s ± 0% urls +_UFlat2-8 23.5GB/s ± 2% jpg +_UFlat3-8 1.91GB/s ± 0% jpg_200 +_UFlat4-8 14.0GB/s ± 1% pdf +_UFlat5-8 1.97GB/s ± 0% html4 +_UFlat6-8 814MB/s ± 0% txt1 +_UFlat7-8 785MB/s ± 0% txt2 +_UFlat8-8 857MB/s ± 0% txt3 +_UFlat9-8 719MB/s ± 1% txt4 +_UFlat10-8 2.84GB/s ± 0% pb +_UFlat11-8 1.05GB/s ± 0% gaviota + +_ZFlat0-8 1.04GB/s ± 0% html +_ZFlat1-8 534MB/s ± 0% urls +_ZFlat2-8 15.7GB/s ± 1% jpg +_ZFlat3-8 740MB/s ± 3% jpg_200 +_ZFlat4-8 9.20GB/s ± 1% pdf +_ZFlat5-8 991MB/s ± 0% html4 +_ZFlat6-8 379MB/s ± 0% txt1 +_ZFlat7-8 352MB/s ± 0% txt2 +_ZFlat8-8 396MB/s ± 1% txt3 +_ZFlat9-8 327MB/s ± 1% txt4 +_ZFlat10-8 1.33GB/s ± 1% pb +_ZFlat11-8 605MB/s ± 1% gaviota + + + +"go test -test.bench=. -tags=noasm" + +_UFlat0-8 621MB/s ± 2% html +_UFlat1-8 494MB/s ± 1% urls +_UFlat2-8 23.2GB/s ± 1% jpg +_UFlat3-8 1.12GB/s ± 1% jpg_200 +_UFlat4-8 4.35GB/s ± 1% pdf +_UFlat5-8 609MB/s ± 0% html4 +_UFlat6-8 296MB/s ± 0% txt1 +_UFlat7-8 288MB/s ± 0% txt2 +_UFlat8-8 309MB/s ± 1% txt3 +_UFlat9-8 280MB/s ± 1% txt4 +_UFlat10-8 753MB/s ± 0% pb +_UFlat11-8 400MB/s ± 0% gaviota + +_ZFlat0-8 409MB/s ± 1% html +_ZFlat1-8 250MB/s ± 1% urls +_ZFlat2-8 12.3GB/s ± 1% jpg +_ZFlat3-8 132MB/s ± 0% jpg_200 +_ZFlat4-8 2.92GB/s ± 0% pdf +_ZFlat5-8 405MB/s ± 1% html4 +_ZFlat6-8 179MB/s ± 1% txt1 +_ZFlat7-8 170MB/s ± 1% txt2 +_ZFlat8-8 189MB/s ± 1% txt3 +_ZFlat9-8 164MB/s ± 1% txt4 +_ZFlat10-8 479MB/s ± 1% pb +_ZFlat11-8 270MB/s ± 1% gaviota + + + +For comparison (Go's encoded output is byte-for-byte identical to C++'s), here +are the numbers from C++ Snappy's + +make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log + +BM_UFlat/0 2.4GB/s html +BM_UFlat/1 1.4GB/s urls +BM_UFlat/2 21.8GB/s jpg +BM_UFlat/3 1.5GB/s jpg_200 +BM_UFlat/4 13.3GB/s pdf +BM_UFlat/5 2.1GB/s html4 +BM_UFlat/6 1.0GB/s txt1 +BM_UFlat/7 959.4MB/s txt2 +BM_UFlat/8 1.0GB/s txt3 +BM_UFlat/9 864.5MB/s txt4 +BM_UFlat/10 2.9GB/s pb +BM_UFlat/11 1.2GB/s gaviota + +BM_ZFlat/0 944.3MB/s html (22.31 %) +BM_ZFlat/1 501.6MB/s urls (47.78 %) +BM_ZFlat/2 14.3GB/s jpg (99.95 %) +BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %) +BM_ZFlat/4 8.3GB/s pdf (83.30 %) +BM_ZFlat/5 903.5MB/s html4 (22.52 %) +BM_ZFlat/6 336.0MB/s txt1 (57.88 %) +BM_ZFlat/7 312.3MB/s txt2 (61.91 %) +BM_ZFlat/8 353.1MB/s txt3 (54.99 %) +BM_ZFlat/9 289.9MB/s txt4 (66.26 %) +BM_ZFlat/10 1.2GB/s pb (19.68 %) +BM_ZFlat/11 527.4MB/s gaviota (37.72 %) diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go new file mode 100644 index 000000000..f1e04b172 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode.go @@ -0,0 +1,241 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" + "io" +) + +var ( + // ErrCorrupt reports that the input is invalid. + ErrCorrupt = errors.New("snappy: corrupt input") + // ErrTooLarge reports that the uncompressed length is too large. + ErrTooLarge = errors.New("snappy: decoded block is too large") + // ErrUnsupported reports that the input isn't supported. + ErrUnsupported = errors.New("snappy: unsupported input") + + errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") +) + +// DecodedLen returns the length of the decoded block. +func DecodedLen(src []byte) (int, error) { + v, _, err := decodedLen(src) + return v, err +} + +// decodedLen returns the length of the decoded block and the number of bytes +// that the length header occupied. +func decodedLen(src []byte) (blockLen, headerLen int, err error) { + v, n := binary.Uvarint(src) + if n <= 0 || v > 0xffffffff { + return 0, 0, ErrCorrupt + } + + const wordSize = 32 << (^uint(0) >> 32 & 1) + if wordSize == 32 && v > 0x7fffffff { + return 0, 0, ErrTooLarge + } + return int(v), n, nil +} + +const ( + decodeErrCodeCorrupt = 1 + decodeErrCodeUnsupportedLiteralLength = 2 +) + +// Decode returns the decoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire decoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// Decode handles the Snappy block format, not the Snappy stream format. +func Decode(dst, src []byte) ([]byte, error) { + dLen, s, err := decodedLen(src) + if err != nil { + return nil, err + } + if dLen <= len(dst) { + dst = dst[:dLen] + } else { + dst = make([]byte, dLen) + } + switch decode(dst, src[s:]) { + case 0: + return dst, nil + case decodeErrCodeUnsupportedLiteralLength: + return nil, errUnsupportedLiteralLength + } + return nil, ErrCorrupt +} + +// NewReader returns a new Reader that decompresses from r, using the framing +// format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +func NewReader(r io.Reader) *Reader { + return &Reader{ + r: r, + decoded: make([]byte, maxBlockSize), + buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), + } +} + +// Reader is an io.Reader that can read Snappy-compressed bytes. +// +// Reader handles the Snappy stream format, not the Snappy block format. +type Reader struct { + r io.Reader + err error + decoded []byte + buf []byte + // decoded[i:j] contains decoded bytes that have not yet been passed on. + i, j int + readHeader bool +} + +// Reset discards any buffered data, resets all state, and switches the Snappy +// reader to read from r. This permits reusing a Reader rather than allocating +// a new one. +func (r *Reader) Reset(reader io.Reader) { + r.r = reader + r.err = nil + r.i = 0 + r.j = 0 + r.readHeader = false +} + +func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { + if _, r.err = io.ReadFull(r.r, p); r.err != nil { + if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { + r.err = ErrCorrupt + } + return false + } + return true +} + +// Read satisfies the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + if r.err != nil { + return 0, r.err + } + for { + if r.i < r.j { + n := copy(p, r.decoded[r.i:r.j]) + r.i += n + return n, nil + } + if !r.readFull(r.buf[:4], true) { + return 0, r.err + } + chunkType := r.buf[0] + if !r.readHeader { + if chunkType != chunkTypeStreamIdentifier { + r.err = ErrCorrupt + return 0, r.err + } + r.readHeader = true + } + chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 + if chunkLen > len(r.buf) { + r.err = ErrUnsupported + return 0, r.err + } + + // The chunk types are specified at + // https://github.com/google/snappy/blob/master/framing_format.txt + switch chunkType { + case chunkTypeCompressedData: + // Section 4.2. Compressed data (chunk type 0x00). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + buf := r.buf[:chunkLen] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + buf = buf[checksumSize:] + + n, err := DecodedLen(buf) + if err != nil { + r.err = err + return 0, r.err + } + if n > len(r.decoded) { + r.err = ErrCorrupt + return 0, r.err + } + if _, err := Decode(r.decoded, buf); err != nil { + r.err = err + return 0, r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return 0, r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeUncompressedData: + // Section 4.3. Uncompressed data (chunk type 0x01). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + buf := r.buf[:checksumSize] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + // Read directly into r.decoded instead of via r.buf. + n := chunkLen - checksumSize + if n > len(r.decoded) { + r.err = ErrCorrupt + return 0, r.err + } + if !r.readFull(r.decoded[:n], false) { + return 0, r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return 0, r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeStreamIdentifier: + // Section 4.1. Stream identifier (chunk type 0xff). + if chunkLen != len(magicBody) { + r.err = ErrCorrupt + return 0, r.err + } + if !r.readFull(r.buf[:len(magicBody)], false) { + return 0, r.err + } + for i := 0; i < len(magicBody); i++ { + if r.buf[i] != magicBody[i] { + r.err = ErrCorrupt + return 0, r.err + } + } + continue + } + + if chunkType <= 0x7f { + // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). + r.err = ErrUnsupported + return 0, r.err + } + // Section 4.4 Padding (chunk type 0xfe). + // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). + if !r.readFull(r.buf[:chunkLen], false) { + return 0, r.err + } + } +} diff --git a/vendor/github.com/golang/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s new file mode 100644 index 000000000..e6179f65e --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_amd64.s @@ -0,0 +1,490 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in decode_other.go, except +// where marked with a "!!!". + +// func decode(dst, src []byte) int +// +// All local variables fit into registers. The non-zero stack size is only to +// spill registers and push args when issuing a CALL. The register allocation: +// - AX scratch +// - BX scratch +// - CX length or x +// - DX offset +// - SI &src[s] +// - DI &dst[d] +// + R8 dst_base +// + R9 dst_len +// + R10 dst_base + dst_len +// + R11 src_base +// + R12 src_len +// + R13 src_base + src_len +// - R14 used by doCopy +// - R15 used by doCopy +// +// The registers R8-R13 (marked with a "+") are set at the start of the +// function, and after a CALL returns, and are not otherwise modified. +// +// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI. +// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. +TEXT ·decode(SB), NOSPLIT, $48-56 + // Initialize SI, DI and R8-R13. + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, DI + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, SI + MOVQ R11, R13 + ADDQ R12, R13 + +loop: + // for s < len(src) + CMPQ SI, R13 + JEQ end + + // CX = uint32(src[s]) + // + // switch src[s] & 0x03 + MOVBLZX (SI), CX + MOVL CX, BX + ANDL $3, BX + CMPL BX, $1 + JAE tagCopy + + // ---------------------------------------- + // The code below handles literal tags. + + // case tagLiteral: + // x := uint32(src[s] >> 2) + // switch + SHRL $2, CX + CMPL CX, $60 + JAE tagLit60Plus + + // case x < 60: + // s++ + INCQ SI + +doLit: + // This is the end of the inner "switch", when we have a literal tag. + // + // We assume that CX == x and x fits in a uint32, where x is the variable + // used in the pure Go decode_other.go code. + + // length = int(x) + 1 + // + // Unlike the pure Go code, we don't need to check if length <= 0 because + // CX can hold 64 bits, so the increment cannot overflow. + INCQ CX + + // Prepare to check if copying length bytes will run past the end of dst or + // src. + // + // AX = len(dst) - d + // BX = len(src) - s + MOVQ R10, AX + SUBQ DI, AX + MOVQ R13, BX + SUBQ SI, BX + + // !!! Try a faster technique for short (16 or fewer bytes) copies. + // + // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { + // goto callMemmove // Fall back on calling runtime·memmove. + // } + // + // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s + // against 21 instead of 16, because it cannot assume that all of its input + // is contiguous in memory and so it needs to leave enough source bytes to + // read the next tag without refilling buffers, but Go's Decode assumes + // contiguousness (the src argument is a []byte). + CMPQ CX, $16 + JGT callMemmove + CMPQ AX, $16 + JLT callMemmove + CMPQ BX, $16 + JLT callMemmove + + // !!! Implement the copy from src to dst as a 16-byte load and store. + // (Decode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only length bytes, but that's + // OK. If the input is a valid Snappy encoding then subsequent iterations + // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a + // non-nil error), so the overrun will be ignored. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(SI), X0 + MOVOU X0, 0(DI) + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +callMemmove: + // if length > len(dst)-d || length > len(src)-s { etc } + CMPQ CX, AX + JGT errCorrupt + CMPQ CX, BX + JGT errCorrupt + + // copy(dst[d:], src[s:s+length]) + // + // This means calling runtime·memmove(&dst[d], &src[s], length), so we push + // DI, SI and CX as arguments. Coincidentally, we also need to spill those + // three registers to the stack, to save local variables across the CALL. + MOVQ DI, 0(SP) + MOVQ SI, 8(SP) + MOVQ CX, 16(SP) + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) + CALL runtime·memmove(SB) + + // Restore local variables: unspill registers from the stack and + // re-calculate R8-R13. + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, R13 + ADDQ R12, R13 + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +tagLit60Plus: + // !!! This fragment does the + // + // s += x - 58; if uint(s) > uint(len(src)) { etc } + // + // checks. In the asm version, we code it once instead of once per switch case. + ADDQ CX, SI + SUBQ $58, SI + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // case x == 60: + CMPL CX, $61 + JEQ tagLit61 + JA tagLit62Plus + + // x = uint32(src[s-1]) + MOVBLZX -1(SI), CX + JMP doLit + +tagLit61: + // case x == 61: + // x = uint32(src[s-2]) | uint32(src[s-1])<<8 + MOVWLZX -2(SI), CX + JMP doLit + +tagLit62Plus: + CMPL CX, $62 + JA tagLit63 + + // case x == 62: + // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + MOVWLZX -3(SI), CX + MOVBLZX -1(SI), BX + SHLL $16, BX + ORL BX, CX + JMP doLit + +tagLit63: + // case x == 63: + // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + MOVL -4(SI), CX + JMP doLit + +// The code above handles literal tags. +// ---------------------------------------- +// The code below handles copy tags. + +tagCopy4: + // case tagCopy4: + // s += 5 + ADDQ $5, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // length = 1 + int(src[s-5])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + MOVLQZX -4(SI), DX + JMP doCopy + +tagCopy2: + // case tagCopy2: + // s += 3 + ADDQ $3, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // length = 1 + int(src[s-3])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + MOVWQZX -2(SI), DX + JMP doCopy + +tagCopy: + // We have a copy tag. We assume that: + // - BX == src[s] & 0x03 + // - CX == src[s] + CMPQ BX, $2 + JEQ tagCopy2 + JA tagCopy4 + + // case tagCopy1: + // s += 2 + ADDQ $2, SI + + // if uint(s) > uint(len(src)) { etc } + MOVQ SI, BX + SUBQ R11, BX + CMPQ BX, R12 + JA errCorrupt + + // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + MOVQ CX, DX + ANDQ $0xe0, DX + SHLQ $3, DX + MOVBQZX -1(SI), BX + ORQ BX, DX + + // length = 4 + int(src[s-2])>>2&0x7 + SHRQ $2, CX + ANDQ $7, CX + ADDQ $4, CX + +doCopy: + // This is the end of the outer "switch", when we have a copy tag. + // + // We assume that: + // - CX == length && CX > 0 + // - DX == offset + + // if offset <= 0 { etc } + CMPQ DX, $0 + JLE errCorrupt + + // if d < offset { etc } + MOVQ DI, BX + SUBQ R8, BX + CMPQ BX, DX + JLT errCorrupt + + // if length > len(dst)-d { etc } + MOVQ R10, BX + SUBQ DI, BX + CMPQ CX, BX + JGT errCorrupt + + // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length + // + // Set: + // - R14 = len(dst)-d + // - R15 = &dst[d-offset] + MOVQ R10, R14 + SUBQ DI, R14 + MOVQ DI, R15 + SUBQ DX, R15 + + // !!! Try a faster technique for short (16 or fewer bytes) forward copies. + // + // First, try using two 8-byte load/stores, similar to the doLit technique + // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is + // still OK if offset >= 8. Note that this has to be two 8-byte load/stores + // and not one 16-byte load/store, and the first store has to be before the + // second load, due to the overlap if offset is in the range [8, 16). + // + // if length > 16 || offset < 8 || len(dst)-d < 16 { + // goto slowForwardCopy + // } + // copy 16 bytes + // d += length + CMPQ CX, $16 + JGT slowForwardCopy + CMPQ DX, $8 + JLT slowForwardCopy + CMPQ R14, $16 + JLT slowForwardCopy + MOVQ 0(R15), AX + MOVQ AX, 0(DI) + MOVQ 8(R15), BX + MOVQ BX, 8(DI) + ADDQ CX, DI + JMP loop + +slowForwardCopy: + // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we + // can still try 8-byte load stores, provided we can overrun up to 10 extra + // bytes. As above, the overrun will be fixed up by subsequent iterations + // of the outermost loop. + // + // The C++ snappy code calls this technique IncrementalCopyFastPath. Its + // commentary says: + // + // ---- + // + // The main part of this loop is a simple copy of eight bytes at a time + // until we've copied (at least) the requested amount of bytes. However, + // if d and d-offset are less than eight bytes apart (indicating a + // repeating pattern of length < 8), we first need to expand the pattern in + // order to get the correct results. For instance, if the buffer looks like + // this, with the eight-byte and patterns marked as + // intervals: + // + // abxxxxxxxxxxxx + // [------] d-offset + // [------] d + // + // a single eight-byte copy from to will repeat the pattern + // once, after which we can move two bytes without moving : + // + // ababxxxxxxxxxx + // [------] d-offset + // [------] d + // + // and repeat the exercise until the two no longer overlap. + // + // This allows us to do very well in the special case of one single byte + // repeated many times, without taking a big hit for more general cases. + // + // The worst case of extra writing past the end of the match occurs when + // offset == 1 and length == 1; the last copy will read from byte positions + // [0..7] and write to [4..11], whereas it was only supposed to write to + // position 1. Thus, ten excess bytes. + // + // ---- + // + // That "10 byte overrun" worst case is confirmed by Go's + // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy + // and finishSlowForwardCopy algorithm. + // + // if length > len(dst)-d-10 { + // goto verySlowForwardCopy + // } + SUBQ $10, R14 + CMPQ CX, R14 + JGT verySlowForwardCopy + +makeOffsetAtLeast8: + // !!! As above, expand the pattern so that offset >= 8 and we can use + // 8-byte load/stores. + // + // for offset < 8 { + // copy 8 bytes from dst[d-offset:] to dst[d:] + // length -= offset + // d += offset + // offset += offset + // // The two previous lines together means that d-offset, and therefore + // // R15, is unchanged. + // } + CMPQ DX, $8 + JGE fixUpSlowForwardCopy + MOVQ (R15), BX + MOVQ BX, (DI) + SUBQ DX, CX + ADDQ DX, DI + ADDQ DX, DX + JMP makeOffsetAtLeast8 + +fixUpSlowForwardCopy: + // !!! Add length (which might be negative now) to d (implied by DI being + // &dst[d]) so that d ends up at the right place when we jump back to the + // top of the loop. Before we do that, though, we save DI to AX so that, if + // length is positive, copying the remaining length bytes will write to the + // right place. + MOVQ DI, AX + ADDQ CX, DI + +finishSlowForwardCopy: + // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative + // length means that we overrun, but as above, that will be fixed up by + // subsequent iterations of the outermost loop. + CMPQ CX, $0 + JLE loop + MOVQ (R15), BX + MOVQ BX, (AX) + ADDQ $8, R15 + ADDQ $8, AX + SUBQ $8, CX + JMP finishSlowForwardCopy + +verySlowForwardCopy: + // verySlowForwardCopy is a simple implementation of forward copy. In C + // parlance, this is a do/while loop instead of a while loop, since we know + // that length > 0. In Go syntax: + // + // for { + // dst[d] = dst[d - offset] + // d++ + // length-- + // if length == 0 { + // break + // } + // } + MOVB (R15), BX + MOVB BX, (DI) + INCQ R15 + INCQ DI + DECQ CX + JNZ verySlowForwardCopy + JMP loop + +// The code above handles copy tags. +// ---------------------------------------- + +end: + // This is the end of the "for s < len(src)". + // + // if d != len(dst) { etc } + CMPQ DI, R10 + JNE errCorrupt + + // return 0 + MOVQ $0, ret+48(FP) + RET + +errCorrupt: + // return decodeErrCodeCorrupt + MOVQ $1, ret+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/decode_arm64.s b/vendor/github.com/golang/snappy/decode_arm64.s new file mode 100644 index 000000000..7a3ead17e --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_arm64.s @@ -0,0 +1,494 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in decode_other.go, except +// where marked with a "!!!". + +// func decode(dst, src []byte) int +// +// All local variables fit into registers. The non-zero stack size is only to +// spill registers and push args when issuing a CALL. The register allocation: +// - R2 scratch +// - R3 scratch +// - R4 length or x +// - R5 offset +// - R6 &src[s] +// - R7 &dst[d] +// + R8 dst_base +// + R9 dst_len +// + R10 dst_base + dst_len +// + R11 src_base +// + R12 src_len +// + R13 src_base + src_len +// - R14 used by doCopy +// - R15 used by doCopy +// +// The registers R8-R13 (marked with a "+") are set at the start of the +// function, and after a CALL returns, and are not otherwise modified. +// +// The d variable is implicitly R7 - R8, and len(dst)-d is R10 - R7. +// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6. +TEXT ·decode(SB), NOSPLIT, $56-56 + // Initialize R6, R7 and R8-R13. + MOVD dst_base+0(FP), R8 + MOVD dst_len+8(FP), R9 + MOVD R8, R7 + MOVD R8, R10 + ADD R9, R10, R10 + MOVD src_base+24(FP), R11 + MOVD src_len+32(FP), R12 + MOVD R11, R6 + MOVD R11, R13 + ADD R12, R13, R13 + +loop: + // for s < len(src) + CMP R13, R6 + BEQ end + + // R4 = uint32(src[s]) + // + // switch src[s] & 0x03 + MOVBU (R6), R4 + MOVW R4, R3 + ANDW $3, R3 + MOVW $1, R1 + CMPW R1, R3 + BGE tagCopy + + // ---------------------------------------- + // The code below handles literal tags. + + // case tagLiteral: + // x := uint32(src[s] >> 2) + // switch + MOVW $60, R1 + LSRW $2, R4, R4 + CMPW R4, R1 + BLS tagLit60Plus + + // case x < 60: + // s++ + ADD $1, R6, R6 + +doLit: + // This is the end of the inner "switch", when we have a literal tag. + // + // We assume that R4 == x and x fits in a uint32, where x is the variable + // used in the pure Go decode_other.go code. + + // length = int(x) + 1 + // + // Unlike the pure Go code, we don't need to check if length <= 0 because + // R4 can hold 64 bits, so the increment cannot overflow. + ADD $1, R4, R4 + + // Prepare to check if copying length bytes will run past the end of dst or + // src. + // + // R2 = len(dst) - d + // R3 = len(src) - s + MOVD R10, R2 + SUB R7, R2, R2 + MOVD R13, R3 + SUB R6, R3, R3 + + // !!! Try a faster technique for short (16 or fewer bytes) copies. + // + // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { + // goto callMemmove // Fall back on calling runtime·memmove. + // } + // + // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s + // against 21 instead of 16, because it cannot assume that all of its input + // is contiguous in memory and so it needs to leave enough source bytes to + // read the next tag without refilling buffers, but Go's Decode assumes + // contiguousness (the src argument is a []byte). + CMP $16, R4 + BGT callMemmove + CMP $16, R2 + BLT callMemmove + CMP $16, R3 + BLT callMemmove + + // !!! Implement the copy from src to dst as a 16-byte load and store. + // (Decode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only length bytes, but that's + // OK. If the input is a valid Snappy encoding then subsequent iterations + // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a + // non-nil error), so the overrun will be ignored. + // + // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + LDP 0(R6), (R14, R15) + STP (R14, R15), 0(R7) + + // d += length + // s += length + ADD R4, R7, R7 + ADD R4, R6, R6 + B loop + +callMemmove: + // if length > len(dst)-d || length > len(src)-s { etc } + CMP R2, R4 + BGT errCorrupt + CMP R3, R4 + BGT errCorrupt + + // copy(dst[d:], src[s:s+length]) + // + // This means calling runtime·memmove(&dst[d], &src[s], length), so we push + // R7, R6 and R4 as arguments. Coincidentally, we also need to spill those + // three registers to the stack, to save local variables across the CALL. + MOVD R7, 8(RSP) + MOVD R6, 16(RSP) + MOVD R4, 24(RSP) + MOVD R7, 32(RSP) + MOVD R6, 40(RSP) + MOVD R4, 48(RSP) + CALL runtime·memmove(SB) + + // Restore local variables: unspill registers from the stack and + // re-calculate R8-R13. + MOVD 32(RSP), R7 + MOVD 40(RSP), R6 + MOVD 48(RSP), R4 + MOVD dst_base+0(FP), R8 + MOVD dst_len+8(FP), R9 + MOVD R8, R10 + ADD R9, R10, R10 + MOVD src_base+24(FP), R11 + MOVD src_len+32(FP), R12 + MOVD R11, R13 + ADD R12, R13, R13 + + // d += length + // s += length + ADD R4, R7, R7 + ADD R4, R6, R6 + B loop + +tagLit60Plus: + // !!! This fragment does the + // + // s += x - 58; if uint(s) > uint(len(src)) { etc } + // + // checks. In the asm version, we code it once instead of once per switch case. + ADD R4, R6, R6 + SUB $58, R6, R6 + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // case x == 60: + MOVW $61, R1 + CMPW R1, R4 + BEQ tagLit61 + BGT tagLit62Plus + + // x = uint32(src[s-1]) + MOVBU -1(R6), R4 + B doLit + +tagLit61: + // case x == 61: + // x = uint32(src[s-2]) | uint32(src[s-1])<<8 + MOVHU -2(R6), R4 + B doLit + +tagLit62Plus: + CMPW $62, R4 + BHI tagLit63 + + // case x == 62: + // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + MOVHU -3(R6), R4 + MOVBU -1(R6), R3 + ORR R3<<16, R4 + B doLit + +tagLit63: + // case x == 63: + // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + MOVWU -4(R6), R4 + B doLit + + // The code above handles literal tags. + // ---------------------------------------- + // The code below handles copy tags. + +tagCopy4: + // case tagCopy4: + // s += 5 + ADD $5, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // length = 1 + int(src[s-5])>>2 + MOVD $1, R1 + ADD R4>>2, R1, R4 + + // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + MOVWU -4(R6), R5 + B doCopy + +tagCopy2: + // case tagCopy2: + // s += 3 + ADD $3, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // length = 1 + int(src[s-3])>>2 + MOVD $1, R1 + ADD R4>>2, R1, R4 + + // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + MOVHU -2(R6), R5 + B doCopy + +tagCopy: + // We have a copy tag. We assume that: + // - R3 == src[s] & 0x03 + // - R4 == src[s] + CMP $2, R3 + BEQ tagCopy2 + BGT tagCopy4 + + // case tagCopy1: + // s += 2 + ADD $2, R6, R6 + + // if uint(s) > uint(len(src)) { etc } + MOVD R6, R3 + SUB R11, R3, R3 + CMP R12, R3 + BGT errCorrupt + + // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + MOVD R4, R5 + AND $0xe0, R5 + MOVBU -1(R6), R3 + ORR R5<<3, R3, R5 + + // length = 4 + int(src[s-2])>>2&0x7 + MOVD $7, R1 + AND R4>>2, R1, R4 + ADD $4, R4, R4 + +doCopy: + // This is the end of the outer "switch", when we have a copy tag. + // + // We assume that: + // - R4 == length && R4 > 0 + // - R5 == offset + + // if offset <= 0 { etc } + MOVD $0, R1 + CMP R1, R5 + BLE errCorrupt + + // if d < offset { etc } + MOVD R7, R3 + SUB R8, R3, R3 + CMP R5, R3 + BLT errCorrupt + + // if length > len(dst)-d { etc } + MOVD R10, R3 + SUB R7, R3, R3 + CMP R3, R4 + BGT errCorrupt + + // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length + // + // Set: + // - R14 = len(dst)-d + // - R15 = &dst[d-offset] + MOVD R10, R14 + SUB R7, R14, R14 + MOVD R7, R15 + SUB R5, R15, R15 + + // !!! Try a faster technique for short (16 or fewer bytes) forward copies. + // + // First, try using two 8-byte load/stores, similar to the doLit technique + // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is + // still OK if offset >= 8. Note that this has to be two 8-byte load/stores + // and not one 16-byte load/store, and the first store has to be before the + // second load, due to the overlap if offset is in the range [8, 16). + // + // if length > 16 || offset < 8 || len(dst)-d < 16 { + // goto slowForwardCopy + // } + // copy 16 bytes + // d += length + CMP $16, R4 + BGT slowForwardCopy + CMP $8, R5 + BLT slowForwardCopy + CMP $16, R14 + BLT slowForwardCopy + MOVD 0(R15), R2 + MOVD R2, 0(R7) + MOVD 8(R15), R3 + MOVD R3, 8(R7) + ADD R4, R7, R7 + B loop + +slowForwardCopy: + // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we + // can still try 8-byte load stores, provided we can overrun up to 10 extra + // bytes. As above, the overrun will be fixed up by subsequent iterations + // of the outermost loop. + // + // The C++ snappy code calls this technique IncrementalCopyFastPath. Its + // commentary says: + // + // ---- + // + // The main part of this loop is a simple copy of eight bytes at a time + // until we've copied (at least) the requested amount of bytes. However, + // if d and d-offset are less than eight bytes apart (indicating a + // repeating pattern of length < 8), we first need to expand the pattern in + // order to get the correct results. For instance, if the buffer looks like + // this, with the eight-byte and patterns marked as + // intervals: + // + // abxxxxxxxxxxxx + // [------] d-offset + // [------] d + // + // a single eight-byte copy from to will repeat the pattern + // once, after which we can move two bytes without moving : + // + // ababxxxxxxxxxx + // [------] d-offset + // [------] d + // + // and repeat the exercise until the two no longer overlap. + // + // This allows us to do very well in the special case of one single byte + // repeated many times, without taking a big hit for more general cases. + // + // The worst case of extra writing past the end of the match occurs when + // offset == 1 and length == 1; the last copy will read from byte positions + // [0..7] and write to [4..11], whereas it was only supposed to write to + // position 1. Thus, ten excess bytes. + // + // ---- + // + // That "10 byte overrun" worst case is confirmed by Go's + // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy + // and finishSlowForwardCopy algorithm. + // + // if length > len(dst)-d-10 { + // goto verySlowForwardCopy + // } + SUB $10, R14, R14 + CMP R14, R4 + BGT verySlowForwardCopy + +makeOffsetAtLeast8: + // !!! As above, expand the pattern so that offset >= 8 and we can use + // 8-byte load/stores. + // + // for offset < 8 { + // copy 8 bytes from dst[d-offset:] to dst[d:] + // length -= offset + // d += offset + // offset += offset + // // The two previous lines together means that d-offset, and therefore + // // R15, is unchanged. + // } + CMP $8, R5 + BGE fixUpSlowForwardCopy + MOVD (R15), R3 + MOVD R3, (R7) + SUB R5, R4, R4 + ADD R5, R7, R7 + ADD R5, R5, R5 + B makeOffsetAtLeast8 + +fixUpSlowForwardCopy: + // !!! Add length (which might be negative now) to d (implied by R7 being + // &dst[d]) so that d ends up at the right place when we jump back to the + // top of the loop. Before we do that, though, we save R7 to R2 so that, if + // length is positive, copying the remaining length bytes will write to the + // right place. + MOVD R7, R2 + ADD R4, R7, R7 + +finishSlowForwardCopy: + // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative + // length means that we overrun, but as above, that will be fixed up by + // subsequent iterations of the outermost loop. + MOVD $0, R1 + CMP R1, R4 + BLE loop + MOVD (R15), R3 + MOVD R3, (R2) + ADD $8, R15, R15 + ADD $8, R2, R2 + SUB $8, R4, R4 + B finishSlowForwardCopy + +verySlowForwardCopy: + // verySlowForwardCopy is a simple implementation of forward copy. In C + // parlance, this is a do/while loop instead of a while loop, since we know + // that length > 0. In Go syntax: + // + // for { + // dst[d] = dst[d - offset] + // d++ + // length-- + // if length == 0 { + // break + // } + // } + MOVB (R15), R3 + MOVB R3, (R7) + ADD $1, R15, R15 + ADD $1, R7, R7 + SUB $1, R4, R4 + CBNZ R4, verySlowForwardCopy + B loop + + // The code above handles copy tags. + // ---------------------------------------- + +end: + // This is the end of the "for s < len(src)". + // + // if d != len(dst) { etc } + CMP R10, R7 + BNE errCorrupt + + // return 0 + MOVD $0, ret+48(FP) + RET + +errCorrupt: + // return decodeErrCodeCorrupt + MOVD $1, R2 + MOVD R2, ret+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/decode_asm.go b/vendor/github.com/golang/snappy/decode_asm.go new file mode 100644 index 000000000..7082b3491 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_asm.go @@ -0,0 +1,15 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm +// +build amd64 arm64 + +package snappy + +// decode has the same semantics as in decode_other.go. +// +//go:noescape +func decode(dst, src []byte) int diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go new file mode 100644 index 000000000..2f672be55 --- /dev/null +++ b/vendor/github.com/golang/snappy/decode_other.go @@ -0,0 +1,115 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!arm64 appengine !gc noasm + +package snappy + +// decode writes the decoding of src to dst. It assumes that the varint-encoded +// length of the decompressed bytes has already been read, and that len(dst) +// equals that length. +// +// It returns 0 on success or a decodeErrCodeXxx error code on failure. +func decode(dst, src []byte) int { + var d, s, offset, length int + for s < len(src) { + switch src[s] & 0x03 { + case tagLiteral: + x := uint32(src[s] >> 2) + switch { + case x < 60: + s++ + case x == 60: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-1]) + case x == 61: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-2]) | uint32(src[s-1])<<8 + case x == 62: + s += 4 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + case x == 63: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + } + length = int(x) + 1 + if length <= 0 { + return decodeErrCodeUnsupportedLiteralLength + } + if length > len(dst)-d || length > len(src)-s { + return decodeErrCodeCorrupt + } + copy(dst[d:], src[s:s+length]) + d += length + s += length + continue + + case tagCopy1: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 4 + int(src[s-2])>>2&0x7 + offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + + case tagCopy2: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + + case tagCopy4: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-5])>>2 + offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + } + + if offset <= 0 || d < offset || length > len(dst)-d { + return decodeErrCodeCorrupt + } + // Copy from an earlier sub-slice of dst to a later sub-slice. + // If no overlap, use the built-in copy: + if offset >= length { + copy(dst[d:d+length], dst[d-offset:]) + d += length + continue + } + + // Unlike the built-in copy function, this byte-by-byte copy always runs + // forwards, even if the slices overlap. Conceptually, this is: + // + // d += forwardCopy(dst[d:d+length], dst[d-offset:]) + // + // We align the slices into a and b and show the compiler they are the same size. + // This allows the loop to run without bounds checks. + a := dst[d : d+length] + b := dst[d-offset:] + b = b[:len(a)] + for i := range a { + a[i] = b[i] + } + d += length + } + if d != len(dst) { + return decodeErrCodeCorrupt + } + return 0 +} diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go new file mode 100644 index 000000000..7f2365707 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode.go @@ -0,0 +1,289 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" + "io" +) + +// Encode returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +// +// Encode handles the Snappy block format, not the Snappy stream format. +func Encode(dst, src []byte) []byte { + if n := MaxEncodedLen(len(src)); n < 0 { + panic(ErrTooLarge) + } else if len(dst) < n { + dst = make([]byte, n) + } + + // The block starts with the varint-encoded length of the decompressed bytes. + d := binary.PutUvarint(dst, uint64(len(src))) + + for len(src) > 0 { + p := src + src = nil + if len(p) > maxBlockSize { + p, src = p[:maxBlockSize], p[maxBlockSize:] + } + if len(p) < minNonLiteralBlockSize { + d += emitLiteral(dst[d:], p) + } else { + d += encodeBlock(dst[d:], p) + } + } + return dst[:d] +} + +// inputMargin is the minimum number of extra input bytes to keep, inside +// encodeBlock's inner loop. On some architectures, this margin lets us +// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) +// literals can be implemented as a single load to and store from a 16-byte +// register. That literal's actual length can be as short as 1 byte, so this +// can copy up to 15 bytes too much, but that's OK as subsequent iterations of +// the encoding loop will fix up the copy overrun, and this inputMargin ensures +// that we don't overrun the dst and src buffers. +const inputMargin = 16 - 1 + +// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that +// could be encoded with a copy tag. This is the minimum with respect to the +// algorithm used by encodeBlock, not a minimum enforced by the file format. +// +// The encoded output must start with at least a 1 byte literal, as there are +// no previous bytes to copy. A minimal (1 byte) copy after that, generated +// from an emitCopy call in encodeBlock's main loop, would require at least +// another inputMargin bytes, for the reason above: we want any emitLiteral +// calls inside encodeBlock's main loop to use the fast path if possible, which +// requires being able to overrun by inputMargin bytes. Thus, +// minNonLiteralBlockSize equals 1 + 1 + inputMargin. +// +// The C++ code doesn't use this exact threshold, but it could, as discussed at +// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion +// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an +// optimization. It should not affect the encoded form. This is tested by +// TestSameEncodingAsCppShortCopies. +const minNonLiteralBlockSize = 1 + 1 + inputMargin + +// MaxEncodedLen returns the maximum length of a snappy block, given its +// uncompressed length. +// +// It will return a negative value if srcLen is too large to encode. +func MaxEncodedLen(srcLen int) int { + n := uint64(srcLen) + if n > 0xffffffff { + return -1 + } + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // That is, 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + n = 32 + n + n/6 + if n > 0xffffffff { + return -1 + } + return int(n) +} + +var errClosed = errors.New("snappy: Writer is closed") + +// NewWriter returns a new Writer that compresses to w. +// +// The Writer returned does not buffer writes. There is no need to Flush or +// Close such a Writer. +// +// Deprecated: the Writer returned is not suitable for many small writes, only +// for few large writes. Use NewBufferedWriter instead, which is efficient +// regardless of the frequency and shape of the writes, and remember to Close +// that Writer when done. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + obuf: make([]byte, obufLen), + } +} + +// NewBufferedWriter returns a new Writer that compresses to w, using the +// framing format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +// +// The Writer returned buffers writes. Users must call Close to guarantee all +// data has been forwarded to the underlying io.Writer. They may also call +// Flush zero or more times before calling Close. +func NewBufferedWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + ibuf: make([]byte, 0, maxBlockSize), + obuf: make([]byte, obufLen), + } +} + +// Writer is an io.Writer that can write Snappy-compressed bytes. +// +// Writer handles the Snappy stream format, not the Snappy block format. +type Writer struct { + w io.Writer + err error + + // ibuf is a buffer for the incoming (uncompressed) bytes. + // + // Its use is optional. For backwards compatibility, Writers created by the + // NewWriter function have ibuf == nil, do not buffer incoming bytes, and + // therefore do not need to be Flush'ed or Close'd. + ibuf []byte + + // obuf is a buffer for the outgoing (compressed) bytes. + obuf []byte + + // wroteStreamHeader is whether we have written the stream header. + wroteStreamHeader bool +} + +// Reset discards the writer's state and switches the Snappy writer to write to +// w. This permits reusing a Writer rather than allocating a new one. +func (w *Writer) Reset(writer io.Writer) { + w.w = writer + w.err = nil + if w.ibuf != nil { + w.ibuf = w.ibuf[:0] + } + w.wroteStreamHeader = false +} + +// Write satisfies the io.Writer interface. +func (w *Writer) Write(p []byte) (nRet int, errRet error) { + if w.ibuf == nil { + // Do not buffer incoming bytes. This does not perform or compress well + // if the caller of Writer.Write writes many small slices. This + // behavior is therefore deprecated, but still supported for backwards + // compatibility with code that doesn't explicitly Flush or Close. + return w.write(p) + } + + // The remainder of this method is based on bufio.Writer.Write from the + // standard library. + + for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { + var n int + if len(w.ibuf) == 0 { + // Large write, empty buffer. + // Write directly from p to avoid copy. + n, _ = w.write(p) + } else { + n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + w.Flush() + } + nRet += n + p = p[n:] + } + if w.err != nil { + return nRet, w.err + } + n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + nRet += n + return nRet, nil +} + +func (w *Writer) write(p []byte) (nRet int, errRet error) { + if w.err != nil { + return 0, w.err + } + for len(p) > 0 { + obufStart := len(magicChunk) + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + copy(w.obuf, magicChunk) + obufStart = 0 + } + + var uncompressed []byte + if len(p) > maxBlockSize { + uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] + } else { + uncompressed, p = p, nil + } + checksum := crc(uncompressed) + + // Compress the buffer, discarding the result if the improvement + // isn't at least 12.5%. + compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) + chunkType := uint8(chunkTypeCompressedData) + chunkLen := 4 + len(compressed) + obufEnd := obufHeaderLen + len(compressed) + if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { + chunkType = chunkTypeUncompressedData + chunkLen = 4 + len(uncompressed) + obufEnd = obufHeaderLen + } + + // Fill in the per-chunk header that comes before the body. + w.obuf[len(magicChunk)+0] = chunkType + w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) + w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) + w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) + w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) + w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) + w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) + w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) + + if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { + w.err = err + return nRet, err + } + if chunkType == chunkTypeUncompressedData { + if _, err := w.w.Write(uncompressed); err != nil { + w.err = err + return nRet, err + } + } + nRet += len(uncompressed) + } + return nRet, nil +} + +// Flush flushes the Writer to its underlying io.Writer. +func (w *Writer) Flush() error { + if w.err != nil { + return w.err + } + if len(w.ibuf) == 0 { + return nil + } + w.write(w.ibuf) + w.ibuf = w.ibuf[:0] + return w.err +} + +// Close calls Flush and then closes the Writer. +func (w *Writer) Close() error { + w.Flush() + ret := w.err + if w.err == nil { + w.err = errClosed + } + return ret +} diff --git a/vendor/github.com/golang/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s new file mode 100644 index 000000000..adfd979fe --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_amd64.s @@ -0,0 +1,730 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a +// Go toolchain regression. See https://github.com/golang/go/issues/15426 and +// https://github.com/golang/snappy/issues/29 +// +// As a workaround, the package was built with a known good assembler, and +// those instructions were disassembled by "objdump -d" to yield the +// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 +// style comments, in AT&T asm syntax. Note that rsp here is a physical +// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). +// The instructions were then encoded as "BYTE $0x.." sequences, which assemble +// fine on Go 1.6. + +// The asm code generally follows the pure Go code in encode_other.go, except +// where marked with a "!!!". + +// ---------------------------------------------------------------------------- + +// func emitLiteral(dst, lit []byte) int +// +// All local variables fit into registers. The register allocation: +// - AX len(lit) +// - BX n +// - DX return value +// - DI &dst[i] +// - R10 &lit[0] +// +// The 24 bytes of stack space is to call runtime·memmove. +// +// The unusual register allocation of local variables, such as R10 for the +// source pointer, matches the allocation used at the call site in encodeBlock, +// which makes it easier to manually inline this function. +TEXT ·emitLiteral(SB), NOSPLIT, $24-56 + MOVQ dst_base+0(FP), DI + MOVQ lit_base+24(FP), R10 + MOVQ lit_len+32(FP), AX + MOVQ AX, DX + MOVL AX, BX + SUBL $1, BX + + CMPL BX, $60 + JLT oneByte + CMPL BX, $256 + JLT twoBytes + +threeBytes: + MOVB $0xf4, 0(DI) + MOVW BX, 1(DI) + ADDQ $3, DI + ADDQ $3, DX + JMP memmove + +twoBytes: + MOVB $0xf0, 0(DI) + MOVB BX, 1(DI) + ADDQ $2, DI + ADDQ $2, DX + JMP memmove + +oneByte: + SHLB $2, BX + MOVB BX, 0(DI) + ADDQ $1, DI + ADDQ $1, DX + +memmove: + MOVQ DX, ret+48(FP) + + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // DI, R10 and AX as arguments. + MOVQ DI, 0(SP) + MOVQ R10, 8(SP) + MOVQ AX, 16(SP) + CALL runtime·memmove(SB) + RET + +// ---------------------------------------------------------------------------- + +// func emitCopy(dst []byte, offset, length int) int +// +// All local variables fit into registers. The register allocation: +// - AX length +// - SI &dst[0] +// - DI &dst[i] +// - R11 offset +// +// The unusual register allocation of local variables, such as R11 for the +// offset, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·emitCopy(SB), NOSPLIT, $0-48 + MOVQ dst_base+0(FP), DI + MOVQ DI, SI + MOVQ offset+24(FP), R11 + MOVQ length+32(FP), AX + +loop0: + // for length >= 68 { etc } + CMPL AX, $68 + JLT step1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVB $0xfe, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $64, AX + JMP loop0 + +step1: + // if length > 64 { etc } + CMPL AX, $64 + JLE step2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVB $0xee, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $60, AX + +step2: + // if length >= 12 || offset >= 2048 { goto step3 } + CMPL AX, $12 + JGE step3 + CMPL R11, $2048 + JGE step3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(DI) + SHRL $8, R11 + SHLB $5, R11 + SUBB $4, AX + SHLB $2, AX + ORB AX, R11 + ORB $1, R11 + MOVB R11, 0(DI) + ADDQ $2, DI + + // Return the number of bytes written. + SUBQ SI, DI + MOVQ DI, ret+40(FP) + RET + +step3: + // Emit the remaining copy, encoded as 3 bytes. + SUBL $1, AX + SHLB $2, AX + ORB $2, AX + MOVB AX, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + + // Return the number of bytes written. + SUBQ SI, DI + MOVQ DI, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func extendMatch(src []byte, i, j int) int +// +// All local variables fit into registers. The register allocation: +// - DX &src[0] +// - SI &src[j] +// - R13 &src[len(src) - 8] +// - R14 &src[len(src)] +// - R15 &src[i] +// +// The unusual register allocation of local variables, such as R15 for a source +// pointer, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·extendMatch(SB), NOSPLIT, $0-48 + MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), R14 + MOVQ i+24(FP), R15 + MOVQ j+32(FP), SI + ADDQ DX, R14 + ADDQ DX, R15 + ADDQ DX, SI + MOVQ R14, R13 + SUBQ $8, R13 + +cmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ SI, R13 + JA cmp1 + MOVQ (R15), AX + MOVQ (SI), BX + CMPQ AX, BX + JNE bsf + ADDQ $8, R15 + ADDQ $8, SI + JMP cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, SI + + // Convert from &src[ret] to ret. + SUBQ DX, SI + MOVQ SI, ret+40(FP) + RET + +cmp1: + // In src's tail, compare 1 byte at a time. + CMPQ SI, R14 + JAE extendMatchEnd + MOVB (R15), AX + MOVB (SI), BX + CMPB AX, BX + JNE extendMatchEnd + ADDQ $1, R15 + ADDQ $1, SI + JMP cmp1 + +extendMatchEnd: + // Convert from &src[ret] to ret. + SUBQ DX, SI + MOVQ SI, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func encodeBlock(dst, src []byte) (d int) +// +// All local variables fit into registers, other than "var table". The register +// allocation: +// - AX . . +// - BX . . +// - CX 56 shift (note that amd64 shifts by non-immediates must use CX). +// - DX 64 &src[0], tableSize +// - SI 72 &src[s] +// - DI 80 &dst[d] +// - R9 88 sLimit +// - R10 . &src[nextEmit] +// - R11 96 prevHash, currHash, nextHash, offset +// - R12 104 &src[base], skip +// - R13 . &src[nextS], &src[len(src) - 8] +// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x +// - R15 112 candidate +// +// The second column (56, 64, etc) is the stack offset to spill the registers +// when calling other functions. We could pack this slightly tighter, but it's +// simpler to have a dedicated spill map independent of the function called. +// +// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An +// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill +// local variables (registers) during calls gives 32768 + 56 + 64 = 32888. +TEXT ·encodeBlock(SB), 0, $32888-56 + MOVQ dst_base+0(FP), DI + MOVQ src_base+24(FP), SI + MOVQ src_len+32(FP), R14 + + // shift, tableSize := uint32(32-8), 1<<8 + MOVQ $24, CX + MOVQ $256, DX + +calcShift: + // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + // shift-- + // } + CMPQ DX, $16384 + JGE varTable + CMPQ DX, R14 + JGE varTable + SUBQ $1, CX + SHLQ $1, DX + JMP calcShift + +varTable: + // var table [maxTableSize]uint16 + // + // In the asm code, unlike the Go code, we can zero-initialize only the + // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU + // writes 16 bytes, so we can do only tableSize/8 writes instead of the + // 2048 writes that would zero-initialize all of table's 32768 bytes. + SHRQ $3, DX + LEAQ table-32768(SP), BX + PXOR X0, X0 + +memclr: + MOVOU X0, 0(BX) + ADDQ $16, BX + SUBQ $1, DX + JNZ memclr + + // !!! DX = &src[0] + MOVQ SI, DX + + // sLimit := len(src) - inputMargin + MOVQ R14, R9 + SUBQ $15, R9 + + // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't + // change for the rest of the function. + MOVQ CX, 56(SP) + MOVQ DX, 64(SP) + MOVQ R9, 88(SP) + + // nextEmit := 0 + MOVQ DX, R10 + + // s := 1 + ADDQ $1, SI + + // nextHash := hash(load32(src, s), shift) + MOVL 0(SI), R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + +outer: + // for { etc } + + // skip := 32 + MOVQ $32, R12 + + // nextS := s + MOVQ SI, R13 + + // candidate := 0 + MOVQ $0, R15 + +inner0: + // for { etc } + + // s := nextS + MOVQ R13, SI + + // bytesBetweenHashLookups := skip >> 5 + MOVQ R12, R14 + SHRQ $5, R14 + + // nextS = s + bytesBetweenHashLookups + ADDQ R14, R13 + + // skip += bytesBetweenHashLookups + ADDQ R14, R12 + + // if nextS > sLimit { goto emitRemainder } + MOVQ R13, AX + SUBQ DX, AX + CMPQ AX, R9 + JA emitRemainder + + // candidate = int(table[nextHash]) + // XXX: MOVWQZX table-32768(SP)(R11*2), R15 + // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 + BYTE $0x4e + BYTE $0x0f + BYTE $0xb7 + BYTE $0x7c + BYTE $0x5c + BYTE $0x78 + + // table[nextHash] = uint16(s) + MOVQ SI, AX + SUBQ DX, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // nextHash = hash(load32(src, nextS), shift) + MOVL 0(R13), R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // if load32(src, s) != load32(src, candidate) { continue } break + MOVL 0(SI), AX + MOVL (DX)(R15*1), BX + CMPL AX, BX + JNE inner0 + +fourByteMatch: + // As per the encode_other.go code: + // + // A 4-byte match has been found. We'll later see etc. + + // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment + // on inputMargin in encode.go. + MOVQ SI, AX + SUBQ R10, AX + CMPQ AX, $16 + JLE emitLiteralFastPath + + // ---------------------------------------- + // Begin inline of the emitLiteral call. + // + // d += emitLiteral(dst[d:], src[nextEmit:s]) + + MOVL AX, BX + SUBL $1, BX + + CMPL BX, $60 + JLT inlineEmitLiteralOneByte + CMPL BX, $256 + JLT inlineEmitLiteralTwoBytes + +inlineEmitLiteralThreeBytes: + MOVB $0xf4, 0(DI) + MOVW BX, 1(DI) + ADDQ $3, DI + JMP inlineEmitLiteralMemmove + +inlineEmitLiteralTwoBytes: + MOVB $0xf0, 0(DI) + MOVB BX, 1(DI) + ADDQ $2, DI + JMP inlineEmitLiteralMemmove + +inlineEmitLiteralOneByte: + SHLB $2, BX + MOVB BX, 0(DI) + ADDQ $1, DI + +inlineEmitLiteralMemmove: + // Spill local variables (registers) onto the stack; call; unspill. + // + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // DI, R10 and AX as arguments. + MOVQ DI, 0(SP) + MOVQ R10, 8(SP) + MOVQ AX, 16(SP) + ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)". + MOVQ SI, 72(SP) + MOVQ DI, 80(SP) + MOVQ R15, 112(SP) + CALL runtime·memmove(SB) + MOVQ 56(SP), CX + MOVQ 64(SP), DX + MOVQ 72(SP), SI + MOVQ 80(SP), DI + MOVQ 88(SP), R9 + MOVQ 112(SP), R15 + JMP inner1 + +inlineEmitLiteralEnd: + // End inline of the emitLiteral call. + // ---------------------------------------- + +emitLiteralFastPath: + // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". + MOVB AX, BX + SUBB $1, BX + SHLB $2, BX + MOVB BX, (DI) + ADDQ $1, DI + + // !!! Implement the copy from lit to dst as a 16-byte load and store. + // (Encode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only len(lit) bytes, but that's + // OK. Subsequent iterations will fix up the overrun. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(R10), X0 + MOVOU X0, 0(DI) + ADDQ AX, DI + +inner1: + // for { etc } + + // base := s + MOVQ SI, R12 + + // !!! offset := base - candidate + MOVQ R12, R11 + SUBQ R15, R11 + SUBQ DX, R11 + + // ---------------------------------------- + // Begin inline of the extendMatch call. + // + // s = extendMatch(src, candidate+4, s+4) + + // !!! R14 = &src[len(src)] + MOVQ src_len+32(FP), R14 + ADDQ DX, R14 + + // !!! R13 = &src[len(src) - 8] + MOVQ R14, R13 + SUBQ $8, R13 + + // !!! R15 = &src[candidate + 4] + ADDQ $4, R15 + ADDQ DX, R15 + + // !!! s += 4 + ADDQ $4, SI + +inlineExtendMatchCmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ SI, R13 + JA inlineExtendMatchCmp1 + MOVQ (R15), AX + MOVQ (SI), BX + CMPQ AX, BX + JNE inlineExtendMatchBSF + ADDQ $8, R15 + ADDQ $8, SI + JMP inlineExtendMatchCmp8 + +inlineExtendMatchBSF: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, SI + JMP inlineExtendMatchEnd + +inlineExtendMatchCmp1: + // In src's tail, compare 1 byte at a time. + CMPQ SI, R14 + JAE inlineExtendMatchEnd + MOVB (R15), AX + MOVB (SI), BX + CMPB AX, BX + JNE inlineExtendMatchEnd + ADDQ $1, R15 + ADDQ $1, SI + JMP inlineExtendMatchCmp1 + +inlineExtendMatchEnd: + // End inline of the extendMatch call. + // ---------------------------------------- + + // ---------------------------------------- + // Begin inline of the emitCopy call. + // + // d += emitCopy(dst[d:], base-candidate, s-base) + + // !!! length := s - base + MOVQ SI, AX + SUBQ R12, AX + +inlineEmitCopyLoop0: + // for length >= 68 { etc } + CMPL AX, $68 + JLT inlineEmitCopyStep1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVB $0xfe, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $64, AX + JMP inlineEmitCopyLoop0 + +inlineEmitCopyStep1: + // if length > 64 { etc } + CMPL AX, $64 + JLE inlineEmitCopyStep2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVB $0xee, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $60, AX + +inlineEmitCopyStep2: + // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } + CMPL AX, $12 + JGE inlineEmitCopyStep3 + CMPL R11, $2048 + JGE inlineEmitCopyStep3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(DI) + SHRL $8, R11 + SHLB $5, R11 + SUBB $4, AX + SHLB $2, AX + ORB AX, R11 + ORB $1, R11 + MOVB R11, 0(DI) + ADDQ $2, DI + JMP inlineEmitCopyEnd + +inlineEmitCopyStep3: + // Emit the remaining copy, encoded as 3 bytes. + SUBL $1, AX + SHLB $2, AX + ORB $2, AX + MOVB AX, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + +inlineEmitCopyEnd: + // End inline of the emitCopy call. + // ---------------------------------------- + + // nextEmit = s + MOVQ SI, R10 + + // if s >= sLimit { goto emitRemainder } + MOVQ SI, AX + SUBQ DX, AX + CMPQ AX, R9 + JAE emitRemainder + + // As per the encode_other.go code: + // + // We could immediately etc. + + // x := load64(src, s-1) + MOVQ -1(SI), R14 + + // prevHash := hash(uint32(x>>0), shift) + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // table[prevHash] = uint16(s-1) + MOVQ SI, AX + SUBQ DX, AX + SUBQ $1, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // currHash := hash(uint32(x>>8), shift) + SHRQ $8, R14 + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // candidate = int(table[currHash]) + // XXX: MOVWQZX table-32768(SP)(R11*2), R15 + // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 + BYTE $0x4e + BYTE $0x0f + BYTE $0xb7 + BYTE $0x7c + BYTE $0x5c + BYTE $0x78 + + // table[currHash] = uint16(s) + ADDQ $1, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // if uint32(x>>8) == load32(src, candidate) { continue } + MOVL (DX)(R15*1), BX + CMPL R14, BX + JEQ inner1 + + // nextHash = hash(uint32(x>>16), shift) + SHRQ $8, R14 + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // s++ + ADDQ $1, SI + + // break out of the inner1 for loop, i.e. continue the outer loop. + JMP outer + +emitRemainder: + // if nextEmit < len(src) { etc } + MOVQ src_len+32(FP), AX + ADDQ DX, AX + CMPQ R10, AX + JEQ encodeBlockEnd + + // d += emitLiteral(dst[d:], src[nextEmit:]) + // + // Push args. + MOVQ DI, 0(SP) + MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative. + MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative. + MOVQ R10, 24(SP) + SUBQ R10, AX + MOVQ AX, 32(SP) + MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative. + + // Spill local variables (registers) onto the stack; call; unspill. + MOVQ DI, 80(SP) + CALL ·emitLiteral(SB) + MOVQ 80(SP), DI + + // Finish the "d +=" part of "d += emitLiteral(etc)". + ADDQ 48(SP), DI + +encodeBlockEnd: + MOVQ dst_base+0(FP), AX + SUBQ AX, DI + MOVQ DI, d+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/encode_arm64.s b/vendor/github.com/golang/snappy/encode_arm64.s new file mode 100644 index 000000000..bf83667d7 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_arm64.s @@ -0,0 +1,722 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in encode_other.go, except +// where marked with a "!!!". + +// ---------------------------------------------------------------------------- + +// func emitLiteral(dst, lit []byte) int +// +// All local variables fit into registers. The register allocation: +// - R3 len(lit) +// - R4 n +// - R6 return value +// - R8 &dst[i] +// - R10 &lit[0] +// +// The 32 bytes of stack space is to call runtime·memmove. +// +// The unusual register allocation of local variables, such as R10 for the +// source pointer, matches the allocation used at the call site in encodeBlock, +// which makes it easier to manually inline this function. +TEXT ·emitLiteral(SB), NOSPLIT, $32-56 + MOVD dst_base+0(FP), R8 + MOVD lit_base+24(FP), R10 + MOVD lit_len+32(FP), R3 + MOVD R3, R6 + MOVW R3, R4 + SUBW $1, R4, R4 + + CMPW $60, R4 + BLT oneByte + CMPW $256, R4 + BLT twoBytes + +threeBytes: + MOVD $0xf4, R2 + MOVB R2, 0(R8) + MOVW R4, 1(R8) + ADD $3, R8, R8 + ADD $3, R6, R6 + B memmove + +twoBytes: + MOVD $0xf0, R2 + MOVB R2, 0(R8) + MOVB R4, 1(R8) + ADD $2, R8, R8 + ADD $2, R6, R6 + B memmove + +oneByte: + LSLW $2, R4, R4 + MOVB R4, 0(R8) + ADD $1, R8, R8 + ADD $1, R6, R6 + +memmove: + MOVD R6, ret+48(FP) + + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // R8, R10 and R3 as arguments. + MOVD R8, 8(RSP) + MOVD R10, 16(RSP) + MOVD R3, 24(RSP) + CALL runtime·memmove(SB) + RET + +// ---------------------------------------------------------------------------- + +// func emitCopy(dst []byte, offset, length int) int +// +// All local variables fit into registers. The register allocation: +// - R3 length +// - R7 &dst[0] +// - R8 &dst[i] +// - R11 offset +// +// The unusual register allocation of local variables, such as R11 for the +// offset, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·emitCopy(SB), NOSPLIT, $0-48 + MOVD dst_base+0(FP), R8 + MOVD R8, R7 + MOVD offset+24(FP), R11 + MOVD length+32(FP), R3 + +loop0: + // for length >= 68 { etc } + CMPW $68, R3 + BLT step1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVD $0xfe, R2 + MOVB R2, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUB $64, R3, R3 + B loop0 + +step1: + // if length > 64 { etc } + CMP $64, R3 + BLE step2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVD $0xee, R2 + MOVB R2, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUB $60, R3, R3 + +step2: + // if length >= 12 || offset >= 2048 { goto step3 } + CMP $12, R3 + BGE step3 + CMPW $2048, R11 + BGE step3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(R8) + LSRW $3, R11, R11 + AND $0xe0, R11, R11 + SUB $4, R3, R3 + LSLW $2, R3 + AND $0xff, R3, R3 + ORRW R3, R11, R11 + ORRW $1, R11, R11 + MOVB R11, 0(R8) + ADD $2, R8, R8 + + // Return the number of bytes written. + SUB R7, R8, R8 + MOVD R8, ret+40(FP) + RET + +step3: + // Emit the remaining copy, encoded as 3 bytes. + SUB $1, R3, R3 + AND $0xff, R3, R3 + LSLW $2, R3, R3 + ORRW $2, R3, R3 + MOVB R3, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + + // Return the number of bytes written. + SUB R7, R8, R8 + MOVD R8, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func extendMatch(src []byte, i, j int) int +// +// All local variables fit into registers. The register allocation: +// - R6 &src[0] +// - R7 &src[j] +// - R13 &src[len(src) - 8] +// - R14 &src[len(src)] +// - R15 &src[i] +// +// The unusual register allocation of local variables, such as R15 for a source +// pointer, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·extendMatch(SB), NOSPLIT, $0-48 + MOVD src_base+0(FP), R6 + MOVD src_len+8(FP), R14 + MOVD i+24(FP), R15 + MOVD j+32(FP), R7 + ADD R6, R14, R14 + ADD R6, R15, R15 + ADD R6, R7, R7 + MOVD R14, R13 + SUB $8, R13, R13 + +cmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMP R13, R7 + BHI cmp1 + MOVD (R15), R3 + MOVD (R7), R4 + CMP R4, R3 + BNE bsf + ADD $8, R15, R15 + ADD $8, R7, R7 + B cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. + // RBIT reverses the bit order, then CLZ counts the leading zeros, the + // combination of which finds the least significant bit which is set. + // The arm64 architecture is little-endian, and the shift by 3 converts + // a bit index to a byte index. + EOR R3, R4, R4 + RBIT R4, R4 + CLZ R4, R4 + ADD R4>>3, R7, R7 + + // Convert from &src[ret] to ret. + SUB R6, R7, R7 + MOVD R7, ret+40(FP) + RET + +cmp1: + // In src's tail, compare 1 byte at a time. + CMP R7, R14 + BLS extendMatchEnd + MOVB (R15), R3 + MOVB (R7), R4 + CMP R4, R3 + BNE extendMatchEnd + ADD $1, R15, R15 + ADD $1, R7, R7 + B cmp1 + +extendMatchEnd: + // Convert from &src[ret] to ret. + SUB R6, R7, R7 + MOVD R7, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func encodeBlock(dst, src []byte) (d int) +// +// All local variables fit into registers, other than "var table". The register +// allocation: +// - R3 . . +// - R4 . . +// - R5 64 shift +// - R6 72 &src[0], tableSize +// - R7 80 &src[s] +// - R8 88 &dst[d] +// - R9 96 sLimit +// - R10 . &src[nextEmit] +// - R11 104 prevHash, currHash, nextHash, offset +// - R12 112 &src[base], skip +// - R13 . &src[nextS], &src[len(src) - 8] +// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x +// - R15 120 candidate +// - R16 . hash constant, 0x1e35a7bd +// - R17 . &table +// - . 128 table +// +// The second column (64, 72, etc) is the stack offset to spill the registers +// when calling other functions. We could pack this slightly tighter, but it's +// simpler to have a dedicated spill map independent of the function called. +// +// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An +// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill +// local variables (registers) during calls gives 32768 + 64 + 64 = 32896. +TEXT ·encodeBlock(SB), 0, $32896-56 + MOVD dst_base+0(FP), R8 + MOVD src_base+24(FP), R7 + MOVD src_len+32(FP), R14 + + // shift, tableSize := uint32(32-8), 1<<8 + MOVD $24, R5 + MOVD $256, R6 + MOVW $0xa7bd, R16 + MOVKW $(0x1e35<<16), R16 + +calcShift: + // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + // shift-- + // } + MOVD $16384, R2 + CMP R2, R6 + BGE varTable + CMP R14, R6 + BGE varTable + SUB $1, R5, R5 + LSL $1, R6, R6 + B calcShift + +varTable: + // var table [maxTableSize]uint16 + // + // In the asm code, unlike the Go code, we can zero-initialize only the + // first tableSize elements. Each uint16 element is 2 bytes and each + // iterations writes 64 bytes, so we can do only tableSize/32 writes + // instead of the 2048 writes that would zero-initialize all of table's + // 32768 bytes. This clear could overrun the first tableSize elements, but + // it won't overrun the allocated stack size. + ADD $128, RSP, R17 + MOVD R17, R4 + + // !!! R6 = &src[tableSize] + ADD R6<<1, R17, R6 + +memclr: + STP.P (ZR, ZR), 64(R4) + STP (ZR, ZR), -48(R4) + STP (ZR, ZR), -32(R4) + STP (ZR, ZR), -16(R4) + CMP R4, R6 + BHI memclr + + // !!! R6 = &src[0] + MOVD R7, R6 + + // sLimit := len(src) - inputMargin + MOVD R14, R9 + SUB $15, R9, R9 + + // !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't + // change for the rest of the function. + MOVD R5, 64(RSP) + MOVD R6, 72(RSP) + MOVD R9, 96(RSP) + + // nextEmit := 0 + MOVD R6, R10 + + // s := 1 + ADD $1, R7, R7 + + // nextHash := hash(load32(src, s), shift) + MOVW 0(R7), R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + +outer: + // for { etc } + + // skip := 32 + MOVD $32, R12 + + // nextS := s + MOVD R7, R13 + + // candidate := 0 + MOVD $0, R15 + +inner0: + // for { etc } + + // s := nextS + MOVD R13, R7 + + // bytesBetweenHashLookups := skip >> 5 + MOVD R12, R14 + LSR $5, R14, R14 + + // nextS = s + bytesBetweenHashLookups + ADD R14, R13, R13 + + // skip += bytesBetweenHashLookups + ADD R14, R12, R12 + + // if nextS > sLimit { goto emitRemainder } + MOVD R13, R3 + SUB R6, R3, R3 + CMP R9, R3 + BHI emitRemainder + + // candidate = int(table[nextHash]) + MOVHU 0(R17)(R11<<1), R15 + + // table[nextHash] = uint16(s) + MOVD R7, R3 + SUB R6, R3, R3 + + MOVH R3, 0(R17)(R11<<1) + + // nextHash = hash(load32(src, nextS), shift) + MOVW 0(R13), R11 + MULW R16, R11 + LSRW R5, R11, R11 + + // if load32(src, s) != load32(src, candidate) { continue } break + MOVW 0(R7), R3 + MOVW (R6)(R15*1), R4 + CMPW R4, R3 + BNE inner0 + +fourByteMatch: + // As per the encode_other.go code: + // + // A 4-byte match has been found. We'll later see etc. + + // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment + // on inputMargin in encode.go. + MOVD R7, R3 + SUB R10, R3, R3 + CMP $16, R3 + BLE emitLiteralFastPath + + // ---------------------------------------- + // Begin inline of the emitLiteral call. + // + // d += emitLiteral(dst[d:], src[nextEmit:s]) + + MOVW R3, R4 + SUBW $1, R4, R4 + + MOVW $60, R2 + CMPW R2, R4 + BLT inlineEmitLiteralOneByte + MOVW $256, R2 + CMPW R2, R4 + BLT inlineEmitLiteralTwoBytes + +inlineEmitLiteralThreeBytes: + MOVD $0xf4, R1 + MOVB R1, 0(R8) + MOVW R4, 1(R8) + ADD $3, R8, R8 + B inlineEmitLiteralMemmove + +inlineEmitLiteralTwoBytes: + MOVD $0xf0, R1 + MOVB R1, 0(R8) + MOVB R4, 1(R8) + ADD $2, R8, R8 + B inlineEmitLiteralMemmove + +inlineEmitLiteralOneByte: + LSLW $2, R4, R4 + MOVB R4, 0(R8) + ADD $1, R8, R8 + +inlineEmitLiteralMemmove: + // Spill local variables (registers) onto the stack; call; unspill. + // + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // R8, R10 and R3 as arguments. + MOVD R8, 8(RSP) + MOVD R10, 16(RSP) + MOVD R3, 24(RSP) + + // Finish the "d +=" part of "d += emitLiteral(etc)". + ADD R3, R8, R8 + MOVD R7, 80(RSP) + MOVD R8, 88(RSP) + MOVD R15, 120(RSP) + CALL runtime·memmove(SB) + MOVD 64(RSP), R5 + MOVD 72(RSP), R6 + MOVD 80(RSP), R7 + MOVD 88(RSP), R8 + MOVD 96(RSP), R9 + MOVD 120(RSP), R15 + ADD $128, RSP, R17 + MOVW $0xa7bd, R16 + MOVKW $(0x1e35<<16), R16 + B inner1 + +inlineEmitLiteralEnd: + // End inline of the emitLiteral call. + // ---------------------------------------- + +emitLiteralFastPath: + // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". + MOVB R3, R4 + SUBW $1, R4, R4 + AND $0xff, R4, R4 + LSLW $2, R4, R4 + MOVB R4, (R8) + ADD $1, R8, R8 + + // !!! Implement the copy from lit to dst as a 16-byte load and store. + // (Encode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only len(lit) bytes, but that's + // OK. Subsequent iterations will fix up the overrun. + // + // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + LDP 0(R10), (R0, R1) + STP (R0, R1), 0(R8) + ADD R3, R8, R8 + +inner1: + // for { etc } + + // base := s + MOVD R7, R12 + + // !!! offset := base - candidate + MOVD R12, R11 + SUB R15, R11, R11 + SUB R6, R11, R11 + + // ---------------------------------------- + // Begin inline of the extendMatch call. + // + // s = extendMatch(src, candidate+4, s+4) + + // !!! R14 = &src[len(src)] + MOVD src_len+32(FP), R14 + ADD R6, R14, R14 + + // !!! R13 = &src[len(src) - 8] + MOVD R14, R13 + SUB $8, R13, R13 + + // !!! R15 = &src[candidate + 4] + ADD $4, R15, R15 + ADD R6, R15, R15 + + // !!! s += 4 + ADD $4, R7, R7 + +inlineExtendMatchCmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMP R13, R7 + BHI inlineExtendMatchCmp1 + MOVD (R15), R3 + MOVD (R7), R4 + CMP R4, R3 + BNE inlineExtendMatchBSF + ADD $8, R15, R15 + ADD $8, R7, R7 + B inlineExtendMatchCmp8 + +inlineExtendMatchBSF: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. + // RBIT reverses the bit order, then CLZ counts the leading zeros, the + // combination of which finds the least significant bit which is set. + // The arm64 architecture is little-endian, and the shift by 3 converts + // a bit index to a byte index. + EOR R3, R4, R4 + RBIT R4, R4 + CLZ R4, R4 + ADD R4>>3, R7, R7 + B inlineExtendMatchEnd + +inlineExtendMatchCmp1: + // In src's tail, compare 1 byte at a time. + CMP R7, R14 + BLS inlineExtendMatchEnd + MOVB (R15), R3 + MOVB (R7), R4 + CMP R4, R3 + BNE inlineExtendMatchEnd + ADD $1, R15, R15 + ADD $1, R7, R7 + B inlineExtendMatchCmp1 + +inlineExtendMatchEnd: + // End inline of the extendMatch call. + // ---------------------------------------- + + // ---------------------------------------- + // Begin inline of the emitCopy call. + // + // d += emitCopy(dst[d:], base-candidate, s-base) + + // !!! length := s - base + MOVD R7, R3 + SUB R12, R3, R3 + +inlineEmitCopyLoop0: + // for length >= 68 { etc } + MOVW $68, R2 + CMPW R2, R3 + BLT inlineEmitCopyStep1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVD $0xfe, R1 + MOVB R1, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUBW $64, R3, R3 + B inlineEmitCopyLoop0 + +inlineEmitCopyStep1: + // if length > 64 { etc } + MOVW $64, R2 + CMPW R2, R3 + BLE inlineEmitCopyStep2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVD $0xee, R1 + MOVB R1, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + SUBW $60, R3, R3 + +inlineEmitCopyStep2: + // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } + MOVW $12, R2 + CMPW R2, R3 + BGE inlineEmitCopyStep3 + MOVW $2048, R2 + CMPW R2, R11 + BGE inlineEmitCopyStep3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(R8) + LSRW $8, R11, R11 + LSLW $5, R11, R11 + SUBW $4, R3, R3 + AND $0xff, R3, R3 + LSLW $2, R3, R3 + ORRW R3, R11, R11 + ORRW $1, R11, R11 + MOVB R11, 0(R8) + ADD $2, R8, R8 + B inlineEmitCopyEnd + +inlineEmitCopyStep3: + // Emit the remaining copy, encoded as 3 bytes. + SUBW $1, R3, R3 + LSLW $2, R3, R3 + ORRW $2, R3, R3 + MOVB R3, 0(R8) + MOVW R11, 1(R8) + ADD $3, R8, R8 + +inlineEmitCopyEnd: + // End inline of the emitCopy call. + // ---------------------------------------- + + // nextEmit = s + MOVD R7, R10 + + // if s >= sLimit { goto emitRemainder } + MOVD R7, R3 + SUB R6, R3, R3 + CMP R3, R9 + BLS emitRemainder + + // As per the encode_other.go code: + // + // We could immediately etc. + + // x := load64(src, s-1) + MOVD -1(R7), R14 + + // prevHash := hash(uint32(x>>0), shift) + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // table[prevHash] = uint16(s-1) + MOVD R7, R3 + SUB R6, R3, R3 + SUB $1, R3, R3 + + MOVHU R3, 0(R17)(R11<<1) + + // currHash := hash(uint32(x>>8), shift) + LSR $8, R14, R14 + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // candidate = int(table[currHash]) + MOVHU 0(R17)(R11<<1), R15 + + // table[currHash] = uint16(s) + ADD $1, R3, R3 + MOVHU R3, 0(R17)(R11<<1) + + // if uint32(x>>8) == load32(src, candidate) { continue } + MOVW (R6)(R15*1), R4 + CMPW R4, R14 + BEQ inner1 + + // nextHash = hash(uint32(x>>16), shift) + LSR $8, R14, R14 + MOVW R14, R11 + MULW R16, R11, R11 + LSRW R5, R11, R11 + + // s++ + ADD $1, R7, R7 + + // break out of the inner1 for loop, i.e. continue the outer loop. + B outer + +emitRemainder: + // if nextEmit < len(src) { etc } + MOVD src_len+32(FP), R3 + ADD R6, R3, R3 + CMP R3, R10 + BEQ encodeBlockEnd + + // d += emitLiteral(dst[d:], src[nextEmit:]) + // + // Push args. + MOVD R8, 8(RSP) + MOVD $0, 16(RSP) // Unnecessary, as the callee ignores it, but conservative. + MOVD $0, 24(RSP) // Unnecessary, as the callee ignores it, but conservative. + MOVD R10, 32(RSP) + SUB R10, R3, R3 + MOVD R3, 40(RSP) + MOVD R3, 48(RSP) // Unnecessary, as the callee ignores it, but conservative. + + // Spill local variables (registers) onto the stack; call; unspill. + MOVD R8, 88(RSP) + CALL ·emitLiteral(SB) + MOVD 88(RSP), R8 + + // Finish the "d +=" part of "d += emitLiteral(etc)". + MOVD 56(RSP), R1 + ADD R1, R8, R8 + +encodeBlockEnd: + MOVD dst_base+0(FP), R3 + SUB R3, R8, R8 + MOVD R8, d+48(FP) + RET diff --git a/vendor/github.com/golang/snappy/encode_asm.go b/vendor/github.com/golang/snappy/encode_asm.go new file mode 100644 index 000000000..107c1e714 --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_asm.go @@ -0,0 +1,30 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm +// +build amd64 arm64 + +package snappy + +// emitLiteral has the same semantics as in encode_other.go. +// +//go:noescape +func emitLiteral(dst, lit []byte) int + +// emitCopy has the same semantics as in encode_other.go. +// +//go:noescape +func emitCopy(dst []byte, offset, length int) int + +// extendMatch has the same semantics as in encode_other.go. +// +//go:noescape +func extendMatch(src []byte, i, j int) int + +// encodeBlock has the same semantics as in encode_other.go. +// +//go:noescape +func encodeBlock(dst, src []byte) (d int) diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go new file mode 100644 index 000000000..296d7f0be --- /dev/null +++ b/vendor/github.com/golang/snappy/encode_other.go @@ -0,0 +1,238 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64,!arm64 appengine !gc noasm + +package snappy + +func load32(b []byte, i int) uint32 { + b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= len(lit) && len(lit) <= 65536 +func emitLiteral(dst, lit []byte) int { + i, n := 0, uint(len(lit)-1) + switch { + case n < 60: + dst[0] = uint8(n)<<2 | tagLiteral + i = 1 + case n < 1<<8: + dst[0] = 60<<2 | tagLiteral + dst[1] = uint8(n) + i = 2 + default: + dst[0] = 61<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + i = 3 + } + return i + copy(dst[i:], lit) +} + +// emitCopy writes a copy chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= offset && offset <= 65535 +// 4 <= length && length <= 65535 +func emitCopy(dst []byte, offset, length int) int { + i := 0 + // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The + // threshold for this loop is a little higher (at 68 = 64 + 4), and the + // length emitted down below is is a little lower (at 60 = 64 - 4), because + // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed + // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as + // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as + // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a + // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an + // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. + for length >= 68 { + // Emit a length 64 copy, encoded as 3 bytes. + dst[i+0] = 63<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 64 + } + if length > 64 { + // Emit a length 60 copy, encoded as 3 bytes. + dst[i+0] = 59<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 60 + } + if length >= 12 || offset >= 2048 { + // Emit the remaining copy, encoded as 3 bytes. + dst[i+0] = uint8(length-1)<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + return i + 3 + } + // Emit the remaining copy, encoded as 2 bytes. + dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + dst[i+1] = uint8(offset) + return i + 2 +} + +// extendMatch returns the largest k such that k <= len(src) and that +// src[i:i+k-j] and src[j:k] have the same contents. +// +// It assumes that: +// 0 <= i && i < j && j <= len(src) +func extendMatch(src []byte, i, j int) int { + for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { + } + return j +} + +func hash(u, shift uint32) uint32 { + return (u * 0x1e35a7bd) >> shift +} + +// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlock(dst, src []byte) (d int) { + // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. + // The table element type is uint16, as s < sLimit and sLimit < len(src) + // and len(src) <= maxBlockSize and maxBlockSize == 65536. + const ( + maxTableSize = 1 << 14 + // tableMask is redundant, but helps the compiler eliminate bounds + // checks. + tableMask = maxTableSize - 1 + ) + shift := uint32(32 - 8) + for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + shift-- + } + // In Go, all array elements are zero-initialized, so there is no advantage + // to a smaller tableSize per se. However, it matches the C++ algorithm, + // and in the asm versions of this code, we can get away with zeroing only + // the first tableSize elements. + var table [maxTableSize]uint16 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + nextHash := hash(load32(src, s), shift) + + for { + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match + // is found, immediately go back to looking at every byte. This is a + // small loss (~5% performance, ~0.1% density) for compressible data + // due to more bookkeeping, but for non-compressible data (such as + // JPEG) it's a huge win since the compressor quickly "realizes" the + // data is incompressible and doesn't bother looking for matches + // everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip := 32 + + nextS := s + candidate := 0 + for { + s = nextS + bytesBetweenHashLookups := skip >> 5 + nextS = s + bytesBetweenHashLookups + skip += bytesBetweenHashLookups + if nextS > sLimit { + goto emitRemainder + } + candidate = int(table[nextHash&tableMask]) + table[nextHash&tableMask] = uint16(s) + nextHash = hash(load32(src, nextS), shift) + if load32(src, s) == load32(src, candidate) { + break + } + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + d += emitLiteral(dst[d:], src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + + // Extend the 4-byte match as long as possible. + // + // This is an inlined version of: + // s = extendMatch(src, candidate+4, s+4) + s += 4 + for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { + } + + d += emitCopy(dst[d:], base-candidate, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load64(src, s-1) + prevHash := hash(uint32(x>>0), shift) + table[prevHash&tableMask] = uint16(s - 1) + currHash := hash(uint32(x>>8), shift) + candidate = int(table[currHash&tableMask]) + table[currHash&tableMask] = uint16(s) + if uint32(x>>8) != load32(src, candidate) { + nextHash = hash(uint32(x>>16), shift) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} diff --git a/vendor/github.com/golang/snappy/go.mod b/vendor/github.com/golang/snappy/go.mod new file mode 100644 index 000000000..f6406bb2c --- /dev/null +++ b/vendor/github.com/golang/snappy/go.mod @@ -0,0 +1 @@ +module github.com/golang/snappy diff --git a/vendor/github.com/golang/snappy/snappy.go b/vendor/github.com/golang/snappy/snappy.go new file mode 100644 index 000000000..ece692ea4 --- /dev/null +++ b/vendor/github.com/golang/snappy/snappy.go @@ -0,0 +1,98 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package snappy implements the Snappy compression format. It aims for very +// high speeds and reasonable compression. +// +// There are actually two Snappy formats: block and stream. They are related, +// but different: trying to decompress block-compressed data as a Snappy stream +// will fail, and vice versa. The block format is the Decode and Encode +// functions and the stream format is the Reader and Writer types. +// +// The block format, the more common case, is used when the complete size (the +// number of bytes) of the original data is known upfront, at the time +// compression starts. The stream format, also known as the framing format, is +// for when that isn't always true. +// +// The canonical, C++ implementation is at https://github.com/google/snappy and +// it only implements the block format. +package snappy // import "github.com/golang/snappy" + +import ( + "hash/crc32" +) + +/* +Each encoded block begins with the varint-encoded length of the decoded data, +followed by a sequence of chunks. Chunks begin and end on byte boundaries. The +first byte of each chunk is broken into its 2 least and 6 most significant bits +called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. +Zero means a literal tag. All other values mean a copy tag. + +For literal tags: + - If m < 60, the next 1 + m bytes are literal bytes. + - Otherwise, let n be the little-endian unsigned integer denoted by the next + m - 59 bytes. The next 1 + n bytes after that are literal bytes. + +For copy tags, length bytes are copied from offset bytes ago, in the style of +Lempel-Ziv compression algorithms. In particular: + - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). + The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 + of the offset. The next byte is bits 0-7 of the offset. + - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). + The length is 1 + m. The offset is the little-endian unsigned integer + denoted by the next 2 bytes. + - For l == 3, this tag is a legacy format that is no longer issued by most + encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in + [1, 65). The length is 1 + m. The offset is the little-endian unsigned + integer denoted by the next 4 bytes. +*/ +const ( + tagLiteral = 0x00 + tagCopy1 = 0x01 + tagCopy2 = 0x02 + tagCopy4 = 0x03 +) + +const ( + checksumSize = 4 + chunkHeaderSize = 4 + magicChunk = "\xff\x06\x00\x00" + magicBody + magicBody = "sNaPpY" + + // maxBlockSize is the maximum size of the input to encodeBlock. It is not + // part of the wire format per se, but some parts of the encoder assume + // that an offset fits into a uint16. + // + // Also, for the framing format (Writer type instead of Encode function), + // https://github.com/google/snappy/blob/master/framing_format.txt says + // that "the uncompressed data in a chunk must be no longer than 65536 + // bytes". + maxBlockSize = 65536 + + // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is + // hard coded to be a const instead of a variable, so that obufLen can also + // be a const. Their equivalence is confirmed by + // TestMaxEncodedLenOfMaxBlockSize. + maxEncodedLenOfMaxBlockSize = 76490 + + obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize + obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize +) + +const ( + chunkTypeCompressedData = 0x00 + chunkTypeUncompressedData = 0x01 + chunkTypePadding = 0xfe + chunkTypeStreamIdentifier = 0xff +) + +var crcTable = crc32.MakeTable(crc32.Castagnoli) + +// crc implements the checksum specified in section 3 of +// https://github.com/google/snappy/blob/master/framing_format.txt +func crc(b []byte) uint32 { + c := crc32.Update(0, crcTable, b) + return uint32(c>>15|c<<17) + 0xa282ead8 +} diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go index 3f428d8b5..40b5802de 100644 --- a/vendor/github.com/klauspost/compress/flate/deflate.go +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -440,8 +440,7 @@ func (d *compressor) deflateLazy() { // index and index-1 are already inserted. If there is not enough // lookahead, the last two strings are not inserted into the hash // table. - var newIndex int - newIndex = s.index + prevLength - 1 + newIndex := s.index + prevLength - 1 // Calculate missing hashes end := newIndex if end > s.maxInsertIndex { diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go index 0d31f5ebc..6f341914c 100644 --- a/vendor/github.com/klauspost/compress/fse/compress.go +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -92,7 +92,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTra im := int32((nbBitsOut << 16) - first.deltaNbBits) lu := (im >> nbBitsOut) + first.deltaFindState c.state = c.stateTable[lu] - return } // encode the output symbol provided and write it to the bitstream. diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go index dea115b33..0823c928c 100644 --- a/vendor/github.com/klauspost/compress/huff0/compress.go +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -536,7 +536,6 @@ func (s *Scratch) huffSort() { } nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)} } - return } func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { diff --git a/vendor/github.com/klauspost/compress/snappy/.gitignore b/vendor/github.com/klauspost/compress/snappy/.gitignore deleted file mode 100644 index 042091d9b..000000000 --- a/vendor/github.com/klauspost/compress/snappy/.gitignore +++ /dev/null @@ -1,16 +0,0 @@ -cmd/snappytool/snappytool -testdata/bench - -# These explicitly listed benchmark data files are for an obsolete version of -# snappy_test.go. -testdata/alice29.txt -testdata/asyoulik.txt -testdata/fireworks.jpeg -testdata/geo.protodata -testdata/html -testdata/html_x_4 -testdata/kppkn.gtb -testdata/lcet10.txt -testdata/paper-100k.pdf -testdata/plrabn12.txt -testdata/urls.10K diff --git a/vendor/github.com/klauspost/compress/snappy/AUTHORS b/vendor/github.com/klauspost/compress/snappy/AUTHORS deleted file mode 100644 index bcfa19520..000000000 --- a/vendor/github.com/klauspost/compress/snappy/AUTHORS +++ /dev/null @@ -1,15 +0,0 @@ -# This is the official list of Snappy-Go authors for copyright purposes. -# This file is distinct from the CONTRIBUTORS files. -# See the latter for an explanation. - -# Names should be added to this file as -# Name or Organization -# The email address is not required for organizations. - -# Please keep the list sorted. - -Damian Gryski -Google Inc. -Jan Mercl <0xjnml@gmail.com> -Rodolfo Carvalho -Sebastien Binet diff --git a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS b/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS deleted file mode 100644 index 931ae3160..000000000 --- a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS +++ /dev/null @@ -1,37 +0,0 @@ -# This is the official list of people who can contribute -# (and typically have contributed) code to the Snappy-Go repository. -# The AUTHORS file lists the copyright holders; this file -# lists people. For example, Google employees are listed here -# but not in AUTHORS, because Google holds the copyright. -# -# The submission process automatically checks to make sure -# that people submitting code are listed in this file (by email address). -# -# Names should be added to this file only after verifying that -# the individual or the individual's organization has agreed to -# the appropriate Contributor License Agreement, found here: -# -# http://code.google.com/legal/individual-cla-v1.0.html -# http://code.google.com/legal/corporate-cla-v1.0.html -# -# The agreement for individuals can be filled out on the web. -# -# When adding J Random Contributor's name to this file, -# either J's name or J's organization's name should be -# added to the AUTHORS file, depending on whether the -# individual or corporate CLA was used. - -# Names should be added to this file like so: -# Name - -# Please keep the list sorted. - -Damian Gryski -Jan Mercl <0xjnml@gmail.com> -Kai Backman -Marc-Antoine Ruel -Nigel Tao -Rob Pike -Rodolfo Carvalho -Russ Cox -Sebastien Binet diff --git a/vendor/github.com/klauspost/compress/snappy/LICENSE b/vendor/github.com/klauspost/compress/snappy/LICENSE deleted file mode 100644 index 6050c10f4..000000000 --- a/vendor/github.com/klauspost/compress/snappy/LICENSE +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/klauspost/compress/snappy/README b/vendor/github.com/klauspost/compress/snappy/README deleted file mode 100644 index cea12879a..000000000 --- a/vendor/github.com/klauspost/compress/snappy/README +++ /dev/null @@ -1,107 +0,0 @@ -The Snappy compression format in the Go programming language. - -To download and install from source: -$ go get github.com/golang/snappy - -Unless otherwise noted, the Snappy-Go source files are distributed -under the BSD-style license found in the LICENSE file. - - - -Benchmarks. - -The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten -or so files, the same set used by the C++ Snappy code (github.com/google/snappy -and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @ -3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29: - -"go test -test.bench=." - -_UFlat0-8 2.19GB/s ± 0% html -_UFlat1-8 1.41GB/s ± 0% urls -_UFlat2-8 23.5GB/s ± 2% jpg -_UFlat3-8 1.91GB/s ± 0% jpg_200 -_UFlat4-8 14.0GB/s ± 1% pdf -_UFlat5-8 1.97GB/s ± 0% html4 -_UFlat6-8 814MB/s ± 0% txt1 -_UFlat7-8 785MB/s ± 0% txt2 -_UFlat8-8 857MB/s ± 0% txt3 -_UFlat9-8 719MB/s ± 1% txt4 -_UFlat10-8 2.84GB/s ± 0% pb -_UFlat11-8 1.05GB/s ± 0% gaviota - -_ZFlat0-8 1.04GB/s ± 0% html -_ZFlat1-8 534MB/s ± 0% urls -_ZFlat2-8 15.7GB/s ± 1% jpg -_ZFlat3-8 740MB/s ± 3% jpg_200 -_ZFlat4-8 9.20GB/s ± 1% pdf -_ZFlat5-8 991MB/s ± 0% html4 -_ZFlat6-8 379MB/s ± 0% txt1 -_ZFlat7-8 352MB/s ± 0% txt2 -_ZFlat8-8 396MB/s ± 1% txt3 -_ZFlat9-8 327MB/s ± 1% txt4 -_ZFlat10-8 1.33GB/s ± 1% pb -_ZFlat11-8 605MB/s ± 1% gaviota - - - -"go test -test.bench=. -tags=noasm" - -_UFlat0-8 621MB/s ± 2% html -_UFlat1-8 494MB/s ± 1% urls -_UFlat2-8 23.2GB/s ± 1% jpg -_UFlat3-8 1.12GB/s ± 1% jpg_200 -_UFlat4-8 4.35GB/s ± 1% pdf -_UFlat5-8 609MB/s ± 0% html4 -_UFlat6-8 296MB/s ± 0% txt1 -_UFlat7-8 288MB/s ± 0% txt2 -_UFlat8-8 309MB/s ± 1% txt3 -_UFlat9-8 280MB/s ± 1% txt4 -_UFlat10-8 753MB/s ± 0% pb -_UFlat11-8 400MB/s ± 0% gaviota - -_ZFlat0-8 409MB/s ± 1% html -_ZFlat1-8 250MB/s ± 1% urls -_ZFlat2-8 12.3GB/s ± 1% jpg -_ZFlat3-8 132MB/s ± 0% jpg_200 -_ZFlat4-8 2.92GB/s ± 0% pdf -_ZFlat5-8 405MB/s ± 1% html4 -_ZFlat6-8 179MB/s ± 1% txt1 -_ZFlat7-8 170MB/s ± 1% txt2 -_ZFlat8-8 189MB/s ± 1% txt3 -_ZFlat9-8 164MB/s ± 1% txt4 -_ZFlat10-8 479MB/s ± 1% pb -_ZFlat11-8 270MB/s ± 1% gaviota - - - -For comparison (Go's encoded output is byte-for-byte identical to C++'s), here -are the numbers from C++ Snappy's - -make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log - -BM_UFlat/0 2.4GB/s html -BM_UFlat/1 1.4GB/s urls -BM_UFlat/2 21.8GB/s jpg -BM_UFlat/3 1.5GB/s jpg_200 -BM_UFlat/4 13.3GB/s pdf -BM_UFlat/5 2.1GB/s html4 -BM_UFlat/6 1.0GB/s txt1 -BM_UFlat/7 959.4MB/s txt2 -BM_UFlat/8 1.0GB/s txt3 -BM_UFlat/9 864.5MB/s txt4 -BM_UFlat/10 2.9GB/s pb -BM_UFlat/11 1.2GB/s gaviota - -BM_ZFlat/0 944.3MB/s html (22.31 %) -BM_ZFlat/1 501.6MB/s urls (47.78 %) -BM_ZFlat/2 14.3GB/s jpg (99.95 %) -BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %) -BM_ZFlat/4 8.3GB/s pdf (83.30 %) -BM_ZFlat/5 903.5MB/s html4 (22.52 %) -BM_ZFlat/6 336.0MB/s txt1 (57.88 %) -BM_ZFlat/7 312.3MB/s txt2 (61.91 %) -BM_ZFlat/8 353.1MB/s txt3 (54.99 %) -BM_ZFlat/9 289.9MB/s txt4 (66.26 %) -BM_ZFlat/10 1.2GB/s pb (19.68 %) -BM_ZFlat/11 527.4MB/s gaviota (37.72 %) diff --git a/vendor/github.com/klauspost/compress/snappy/decode.go b/vendor/github.com/klauspost/compress/snappy/decode.go deleted file mode 100644 index 72efb0353..000000000 --- a/vendor/github.com/klauspost/compress/snappy/decode.go +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package snappy - -import ( - "encoding/binary" - "errors" - "io" -) - -var ( - // ErrCorrupt reports that the input is invalid. - ErrCorrupt = errors.New("snappy: corrupt input") - // ErrTooLarge reports that the uncompressed length is too large. - ErrTooLarge = errors.New("snappy: decoded block is too large") - // ErrUnsupported reports that the input isn't supported. - ErrUnsupported = errors.New("snappy: unsupported input") - - errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") -) - -// DecodedLen returns the length of the decoded block. -func DecodedLen(src []byte) (int, error) { - v, _, err := decodedLen(src) - return v, err -} - -// decodedLen returns the length of the decoded block and the number of bytes -// that the length header occupied. -func decodedLen(src []byte) (blockLen, headerLen int, err error) { - v, n := binary.Uvarint(src) - if n <= 0 || v > 0xffffffff { - return 0, 0, ErrCorrupt - } - - const wordSize = 32 << (^uint(0) >> 32 & 1) - if wordSize == 32 && v > 0x7fffffff { - return 0, 0, ErrTooLarge - } - return int(v), n, nil -} - -const ( - decodeErrCodeCorrupt = 1 - decodeErrCodeUnsupportedLiteralLength = 2 -) - -// Decode returns the decoded form of src. The returned slice may be a sub- -// slice of dst if dst was large enough to hold the entire decoded block. -// Otherwise, a newly allocated slice will be returned. -// -// The dst and src must not overlap. It is valid to pass a nil dst. -func Decode(dst, src []byte) ([]byte, error) { - dLen, s, err := decodedLen(src) - if err != nil { - return nil, err - } - if dLen <= len(dst) { - dst = dst[:dLen] - } else { - dst = make([]byte, dLen) - } - switch decode(dst, src[s:]) { - case 0: - return dst, nil - case decodeErrCodeUnsupportedLiteralLength: - return nil, errUnsupportedLiteralLength - } - return nil, ErrCorrupt -} - -// NewReader returns a new Reader that decompresses from r, using the framing -// format described at -// https://github.com/google/snappy/blob/master/framing_format.txt -func NewReader(r io.Reader) *Reader { - return &Reader{ - r: r, - decoded: make([]byte, maxBlockSize), - buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), - } -} - -// Reader is an io.Reader that can read Snappy-compressed bytes. -type Reader struct { - r io.Reader - err error - decoded []byte - buf []byte - // decoded[i:j] contains decoded bytes that have not yet been passed on. - i, j int - readHeader bool -} - -// Reset discards any buffered data, resets all state, and switches the Snappy -// reader to read from r. This permits reusing a Reader rather than allocating -// a new one. -func (r *Reader) Reset(reader io.Reader) { - r.r = reader - r.err = nil - r.i = 0 - r.j = 0 - r.readHeader = false -} - -func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { - if _, r.err = io.ReadFull(r.r, p); r.err != nil { - if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { - r.err = ErrCorrupt - } - return false - } - return true -} - -// Read satisfies the io.Reader interface. -func (r *Reader) Read(p []byte) (int, error) { - if r.err != nil { - return 0, r.err - } - for { - if r.i < r.j { - n := copy(p, r.decoded[r.i:r.j]) - r.i += n - return n, nil - } - if !r.readFull(r.buf[:4], true) { - return 0, r.err - } - chunkType := r.buf[0] - if !r.readHeader { - if chunkType != chunkTypeStreamIdentifier { - r.err = ErrCorrupt - return 0, r.err - } - r.readHeader = true - } - chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 - if chunkLen > len(r.buf) { - r.err = ErrUnsupported - return 0, r.err - } - - // The chunk types are specified at - // https://github.com/google/snappy/blob/master/framing_format.txt - switch chunkType { - case chunkTypeCompressedData: - // Section 4.2. Compressed data (chunk type 0x00). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return 0, r.err - } - buf := r.buf[:chunkLen] - if !r.readFull(buf, false) { - return 0, r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - buf = buf[checksumSize:] - - n, err := DecodedLen(buf) - if err != nil { - r.err = err - return 0, r.err - } - if n > len(r.decoded) { - r.err = ErrCorrupt - return 0, r.err - } - if _, err := Decode(r.decoded, buf); err != nil { - r.err = err - return 0, r.err - } - if crc(r.decoded[:n]) != checksum { - r.err = ErrCorrupt - return 0, r.err - } - r.i, r.j = 0, n - continue - - case chunkTypeUncompressedData: - // Section 4.3. Uncompressed data (chunk type 0x01). - if chunkLen < checksumSize { - r.err = ErrCorrupt - return 0, r.err - } - buf := r.buf[:checksumSize] - if !r.readFull(buf, false) { - return 0, r.err - } - checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 - // Read directly into r.decoded instead of via r.buf. - n := chunkLen - checksumSize - if n > len(r.decoded) { - r.err = ErrCorrupt - return 0, r.err - } - if !r.readFull(r.decoded[:n], false) { - return 0, r.err - } - if crc(r.decoded[:n]) != checksum { - r.err = ErrCorrupt - return 0, r.err - } - r.i, r.j = 0, n - continue - - case chunkTypeStreamIdentifier: - // Section 4.1. Stream identifier (chunk type 0xff). - if chunkLen != len(magicBody) { - r.err = ErrCorrupt - return 0, r.err - } - if !r.readFull(r.buf[:len(magicBody)], false) { - return 0, r.err - } - for i := 0; i < len(magicBody); i++ { - if r.buf[i] != magicBody[i] { - r.err = ErrCorrupt - return 0, r.err - } - } - continue - } - - if chunkType <= 0x7f { - // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). - r.err = ErrUnsupported - return 0, r.err - } - // Section 4.4 Padding (chunk type 0xfe). - // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). - if !r.readFull(r.buf[:chunkLen], false) { - return 0, r.err - } - } -} diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go b/vendor/github.com/klauspost/compress/snappy/decode_amd64.go deleted file mode 100644 index fcd192b84..000000000 --- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -package snappy - -// decode has the same semantics as in decode_other.go. -// -//go:noescape -func decode(dst, src []byte) int diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s b/vendor/github.com/klauspost/compress/snappy/decode_amd64.s deleted file mode 100644 index 1c66e3723..000000000 --- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" - -// The asm code generally follows the pure Go code in decode_other.go, except -// where marked with a "!!!". - -// func decode(dst, src []byte) int -// -// All local variables fit into registers. The non-zero stack size is only to -// spill registers and push args when issuing a CALL. The register allocation: -// - AX scratch -// - BX scratch -// - CX length or x -// - DX offset -// - SI &src[s] -// - DI &dst[d] -// + R8 dst_base -// + R9 dst_len -// + R10 dst_base + dst_len -// + R11 src_base -// + R12 src_len -// + R13 src_base + src_len -// - R14 used by doCopy -// - R15 used by doCopy -// -// The registers R8-R13 (marked with a "+") are set at the start of the -// function, and after a CALL returns, and are not otherwise modified. -// -// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI. -// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. -TEXT ·decode(SB), NOSPLIT, $48-56 - // Initialize SI, DI and R8-R13. - MOVQ dst_base+0(FP), R8 - MOVQ dst_len+8(FP), R9 - MOVQ R8, DI - MOVQ R8, R10 - ADDQ R9, R10 - MOVQ src_base+24(FP), R11 - MOVQ src_len+32(FP), R12 - MOVQ R11, SI - MOVQ R11, R13 - ADDQ R12, R13 - -loop: - // for s < len(src) - CMPQ SI, R13 - JEQ end - - // CX = uint32(src[s]) - // - // switch src[s] & 0x03 - MOVBLZX (SI), CX - MOVL CX, BX - ANDL $3, BX - CMPL BX, $1 - JAE tagCopy - - // ---------------------------------------- - // The code below handles literal tags. - - // case tagLiteral: - // x := uint32(src[s] >> 2) - // switch - SHRL $2, CX - CMPL CX, $60 - JAE tagLit60Plus - - // case x < 60: - // s++ - INCQ SI - -doLit: - // This is the end of the inner "switch", when we have a literal tag. - // - // We assume that CX == x and x fits in a uint32, where x is the variable - // used in the pure Go decode_other.go code. - - // length = int(x) + 1 - // - // Unlike the pure Go code, we don't need to check if length <= 0 because - // CX can hold 64 bits, so the increment cannot overflow. - INCQ CX - - // Prepare to check if copying length bytes will run past the end of dst or - // src. - // - // AX = len(dst) - d - // BX = len(src) - s - MOVQ R10, AX - SUBQ DI, AX - MOVQ R13, BX - SUBQ SI, BX - - // !!! Try a faster technique for short (16 or fewer bytes) copies. - // - // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { - // goto callMemmove // Fall back on calling runtime·memmove. - // } - // - // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s - // against 21 instead of 16, because it cannot assume that all of its input - // is contiguous in memory and so it needs to leave enough source bytes to - // read the next tag without refilling buffers, but Go's Decode assumes - // contiguousness (the src argument is a []byte). - CMPQ CX, $16 - JGT callMemmove - CMPQ AX, $16 - JLT callMemmove - CMPQ BX, $16 - JLT callMemmove - - // !!! Implement the copy from src to dst as a 16-byte load and store. - // (Decode's documentation says that dst and src must not overlap.) - // - // This always copies 16 bytes, instead of only length bytes, but that's - // OK. If the input is a valid Snappy encoding then subsequent iterations - // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a - // non-nil error), so the overrun will be ignored. - // - // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or - // 16-byte loads and stores. This technique probably wouldn't be as - // effective on architectures that are fussier about alignment. - MOVOU 0(SI), X0 - MOVOU X0, 0(DI) - - // d += length - // s += length - ADDQ CX, DI - ADDQ CX, SI - JMP loop - -callMemmove: - // if length > len(dst)-d || length > len(src)-s { etc } - CMPQ CX, AX - JGT errCorrupt - CMPQ CX, BX - JGT errCorrupt - - // copy(dst[d:], src[s:s+length]) - // - // This means calling runtime·memmove(&dst[d], &src[s], length), so we push - // DI, SI and CX as arguments. Coincidentally, we also need to spill those - // three registers to the stack, to save local variables across the CALL. - MOVQ DI, 0(SP) - MOVQ SI, 8(SP) - MOVQ CX, 16(SP) - MOVQ DI, 24(SP) - MOVQ SI, 32(SP) - MOVQ CX, 40(SP) - CALL runtime·memmove(SB) - - // Restore local variables: unspill registers from the stack and - // re-calculate R8-R13. - MOVQ 24(SP), DI - MOVQ 32(SP), SI - MOVQ 40(SP), CX - MOVQ dst_base+0(FP), R8 - MOVQ dst_len+8(FP), R9 - MOVQ R8, R10 - ADDQ R9, R10 - MOVQ src_base+24(FP), R11 - MOVQ src_len+32(FP), R12 - MOVQ R11, R13 - ADDQ R12, R13 - - // d += length - // s += length - ADDQ CX, DI - ADDQ CX, SI - JMP loop - -tagLit60Plus: - // !!! This fragment does the - // - // s += x - 58; if uint(s) > uint(len(src)) { etc } - // - // checks. In the asm version, we code it once instead of once per switch case. - ADDQ CX, SI - SUBQ $58, SI - CMPQ SI, R13 - JA errCorrupt - - // case x == 60: - CMPL CX, $61 - JEQ tagLit61 - JA tagLit62Plus - - // x = uint32(src[s-1]) - MOVBLZX -1(SI), CX - JMP doLit - -tagLit61: - // case x == 61: - // x = uint32(src[s-2]) | uint32(src[s-1])<<8 - MOVWLZX -2(SI), CX - JMP doLit - -tagLit62Plus: - CMPL CX, $62 - JA tagLit63 - - // case x == 62: - // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 - MOVWLZX -3(SI), CX - MOVBLZX -1(SI), BX - SHLL $16, BX - ORL BX, CX - JMP doLit - -tagLit63: - // case x == 63: - // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - MOVL -4(SI), CX - JMP doLit - -// The code above handles literal tags. -// ---------------------------------------- -// The code below handles copy tags. - -tagCopy4: - // case tagCopy4: - // s += 5 - ADDQ $5, SI - - // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 - JA errCorrupt - - // length = 1 + int(src[s-5])>>2 - SHRQ $2, CX - INCQ CX - - // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) - MOVLQZX -4(SI), DX - JMP doCopy - -tagCopy2: - // case tagCopy2: - // s += 3 - ADDQ $3, SI - - // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 - JA errCorrupt - - // length = 1 + int(src[s-3])>>2 - SHRQ $2, CX - INCQ CX - - // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) - MOVWQZX -2(SI), DX - JMP doCopy - -tagCopy: - // We have a copy tag. We assume that: - // - BX == src[s] & 0x03 - // - CX == src[s] - CMPQ BX, $2 - JEQ tagCopy2 - JA tagCopy4 - - // case tagCopy1: - // s += 2 - ADDQ $2, SI - - // if uint(s) > uint(len(src)) { etc } - CMPQ SI, R13 - JA errCorrupt - - // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) - MOVQ CX, DX - ANDQ $0xe0, DX - SHLQ $3, DX - MOVBQZX -1(SI), BX - ORQ BX, DX - - // length = 4 + int(src[s-2])>>2&0x7 - SHRQ $2, CX - ANDQ $7, CX - ADDQ $4, CX - -doCopy: - // This is the end of the outer "switch", when we have a copy tag. - // - // We assume that: - // - CX == length && CX > 0 - // - DX == offset - - // if offset <= 0 { etc } - CMPQ DX, $0 - JLE errCorrupt - - // if d < offset { etc } - MOVQ DI, BX - SUBQ R8, BX - CMPQ BX, DX - JLT errCorrupt - - // if length > len(dst)-d { etc } - MOVQ R10, BX - SUBQ DI, BX - CMPQ CX, BX - JGT errCorrupt - - // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length - // - // Set: - // - R14 = len(dst)-d - // - R15 = &dst[d-offset] - MOVQ R10, R14 - SUBQ DI, R14 - MOVQ DI, R15 - SUBQ DX, R15 - - // !!! Try a faster technique for short (16 or fewer bytes) forward copies. - // - // First, try using two 8-byte load/stores, similar to the doLit technique - // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is - // still OK if offset >= 8. Note that this has to be two 8-byte load/stores - // and not one 16-byte load/store, and the first store has to be before the - // second load, due to the overlap if offset is in the range [8, 16). - // - // if length > 16 || offset < 8 || len(dst)-d < 16 { - // goto slowForwardCopy - // } - // copy 16 bytes - // d += length - CMPQ CX, $16 - JGT slowForwardCopy - CMPQ DX, $8 - JLT slowForwardCopy - CMPQ R14, $16 - JLT slowForwardCopy - MOVQ 0(R15), AX - MOVQ AX, 0(DI) - MOVQ 8(R15), BX - MOVQ BX, 8(DI) - ADDQ CX, DI - JMP loop - -slowForwardCopy: - // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we - // can still try 8-byte load stores, provided we can overrun up to 10 extra - // bytes. As above, the overrun will be fixed up by subsequent iterations - // of the outermost loop. - // - // The C++ snappy code calls this technique IncrementalCopyFastPath. Its - // commentary says: - // - // ---- - // - // The main part of this loop is a simple copy of eight bytes at a time - // until we've copied (at least) the requested amount of bytes. However, - // if d and d-offset are less than eight bytes apart (indicating a - // repeating pattern of length < 8), we first need to expand the pattern in - // order to get the correct results. For instance, if the buffer looks like - // this, with the eight-byte and patterns marked as - // intervals: - // - // abxxxxxxxxxxxx - // [------] d-offset - // [------] d - // - // a single eight-byte copy from to will repeat the pattern - // once, after which we can move two bytes without moving : - // - // ababxxxxxxxxxx - // [------] d-offset - // [------] d - // - // and repeat the exercise until the two no longer overlap. - // - // This allows us to do very well in the special case of one single byte - // repeated many times, without taking a big hit for more general cases. - // - // The worst case of extra writing past the end of the match occurs when - // offset == 1 and length == 1; the last copy will read from byte positions - // [0..7] and write to [4..11], whereas it was only supposed to write to - // position 1. Thus, ten excess bytes. - // - // ---- - // - // That "10 byte overrun" worst case is confirmed by Go's - // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy - // and finishSlowForwardCopy algorithm. - // - // if length > len(dst)-d-10 { - // goto verySlowForwardCopy - // } - SUBQ $10, R14 - CMPQ CX, R14 - JGT verySlowForwardCopy - -makeOffsetAtLeast8: - // !!! As above, expand the pattern so that offset >= 8 and we can use - // 8-byte load/stores. - // - // for offset < 8 { - // copy 8 bytes from dst[d-offset:] to dst[d:] - // length -= offset - // d += offset - // offset += offset - // // The two previous lines together means that d-offset, and therefore - // // R15, is unchanged. - // } - CMPQ DX, $8 - JGE fixUpSlowForwardCopy - MOVQ (R15), BX - MOVQ BX, (DI) - SUBQ DX, CX - ADDQ DX, DI - ADDQ DX, DX - JMP makeOffsetAtLeast8 - -fixUpSlowForwardCopy: - // !!! Add length (which might be negative now) to d (implied by DI being - // &dst[d]) so that d ends up at the right place when we jump back to the - // top of the loop. Before we do that, though, we save DI to AX so that, if - // length is positive, copying the remaining length bytes will write to the - // right place. - MOVQ DI, AX - ADDQ CX, DI - -finishSlowForwardCopy: - // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative - // length means that we overrun, but as above, that will be fixed up by - // subsequent iterations of the outermost loop. - CMPQ CX, $0 - JLE loop - MOVQ (R15), BX - MOVQ BX, (AX) - ADDQ $8, R15 - ADDQ $8, AX - SUBQ $8, CX - JMP finishSlowForwardCopy - -verySlowForwardCopy: - // verySlowForwardCopy is a simple implementation of forward copy. In C - // parlance, this is a do/while loop instead of a while loop, since we know - // that length > 0. In Go syntax: - // - // for { - // dst[d] = dst[d - offset] - // d++ - // length-- - // if length == 0 { - // break - // } - // } - MOVB (R15), BX - MOVB BX, (DI) - INCQ R15 - INCQ DI - DECQ CX - JNZ verySlowForwardCopy - JMP loop - -// The code above handles copy tags. -// ---------------------------------------- - -end: - // This is the end of the "for s < len(src)". - // - // if d != len(dst) { etc } - CMPQ DI, R10 - JNE errCorrupt - - // return 0 - MOVQ $0, ret+48(FP) - RET - -errCorrupt: - // return decodeErrCodeCorrupt - MOVQ $1, ret+48(FP) - RET diff --git a/vendor/github.com/klauspost/compress/snappy/decode_other.go b/vendor/github.com/klauspost/compress/snappy/decode_other.go deleted file mode 100644 index 94a96c5d7..000000000 --- a/vendor/github.com/klauspost/compress/snappy/decode_other.go +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !amd64 appengine !gc noasm - -package snappy - -// decode writes the decoding of src to dst. It assumes that the varint-encoded -// length of the decompressed bytes has already been read, and that len(dst) -// equals that length. -// -// It returns 0 on success or a decodeErrCodeXxx error code on failure. -func decode(dst, src []byte) int { - var d, s, offset, length int - for s < len(src) { - switch src[s] & 0x03 { - case tagLiteral: - x := uint32(src[s] >> 2) - switch { - case x < 60: - s++ - case x == 60: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-1]) - case x == 61: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-2]) | uint32(src[s-1])<<8 - case x == 62: - s += 4 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 - case x == 63: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 - } - length = int(x) + 1 - if length <= 0 { - return decodeErrCodeUnsupportedLiteralLength - } - if length > len(dst)-d || length > len(src)-s { - return decodeErrCodeCorrupt - } - copy(dst[d:], src[s:s+length]) - d += length - s += length - continue - - case tagCopy1: - s += 2 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 4 + int(src[s-2])>>2&0x7 - offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) - - case tagCopy2: - s += 3 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 1 + int(src[s-3])>>2 - offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) - - case tagCopy4: - s += 5 - if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. - return decodeErrCodeCorrupt - } - length = 1 + int(src[s-5])>>2 - offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) - } - - if offset <= 0 || d < offset || length > len(dst)-d { - return decodeErrCodeCorrupt - } - // Copy from an earlier sub-slice of dst to a later sub-slice. - // If no overlap, use the built-in copy: - if offset > length { - copy(dst[d:d+length], dst[d-offset:]) - d += length - continue - } - - // Unlike the built-in copy function, this byte-by-byte copy always runs - // forwards, even if the slices overlap. Conceptually, this is: - // - // d += forwardCopy(dst[d:d+length], dst[d-offset:]) - // - // We align the slices into a and b and show the compiler they are the same size. - // This allows the loop to run without bounds checks. - a := dst[d : d+length] - b := dst[d-offset:] - b = b[:len(a)] - for i := range a { - a[i] = b[i] - } - d += length - } - if d != len(dst) { - return decodeErrCodeCorrupt - } - return 0 -} diff --git a/vendor/github.com/klauspost/compress/snappy/encode.go b/vendor/github.com/klauspost/compress/snappy/encode.go deleted file mode 100644 index 8d393e904..000000000 --- a/vendor/github.com/klauspost/compress/snappy/encode.go +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package snappy - -import ( - "encoding/binary" - "errors" - "io" -) - -// Encode returns the encoded form of src. The returned slice may be a sub- -// slice of dst if dst was large enough to hold the entire encoded block. -// Otherwise, a newly allocated slice will be returned. -// -// The dst and src must not overlap. It is valid to pass a nil dst. -func Encode(dst, src []byte) []byte { - if n := MaxEncodedLen(len(src)); n < 0 { - panic(ErrTooLarge) - } else if len(dst) < n { - dst = make([]byte, n) - } - - // The block starts with the varint-encoded length of the decompressed bytes. - d := binary.PutUvarint(dst, uint64(len(src))) - - for len(src) > 0 { - p := src - src = nil - if len(p) > maxBlockSize { - p, src = p[:maxBlockSize], p[maxBlockSize:] - } - if len(p) < minNonLiteralBlockSize { - d += emitLiteral(dst[d:], p) - } else { - d += encodeBlock(dst[d:], p) - } - } - return dst[:d] -} - -// inputMargin is the minimum number of extra input bytes to keep, inside -// encodeBlock's inner loop. On some architectures, this margin lets us -// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) -// literals can be implemented as a single load to and store from a 16-byte -// register. That literal's actual length can be as short as 1 byte, so this -// can copy up to 15 bytes too much, but that's OK as subsequent iterations of -// the encoding loop will fix up the copy overrun, and this inputMargin ensures -// that we don't overrun the dst and src buffers. -const inputMargin = 16 - 1 - -// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that -// could be encoded with a copy tag. This is the minimum with respect to the -// algorithm used by encodeBlock, not a minimum enforced by the file format. -// -// The encoded output must start with at least a 1 byte literal, as there are -// no previous bytes to copy. A minimal (1 byte) copy after that, generated -// from an emitCopy call in encodeBlock's main loop, would require at least -// another inputMargin bytes, for the reason above: we want any emitLiteral -// calls inside encodeBlock's main loop to use the fast path if possible, which -// requires being able to overrun by inputMargin bytes. Thus, -// minNonLiteralBlockSize equals 1 + 1 + inputMargin. -// -// The C++ code doesn't use this exact threshold, but it could, as discussed at -// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion -// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an -// optimization. It should not affect the encoded form. This is tested by -// TestSameEncodingAsCppShortCopies. -const minNonLiteralBlockSize = 1 + 1 + inputMargin - -// MaxEncodedLen returns the maximum length of a snappy block, given its -// uncompressed length. -// -// It will return a negative value if srcLen is too large to encode. -func MaxEncodedLen(srcLen int) int { - n := uint64(srcLen) - if n > 0xffffffff { - return -1 - } - // Compressed data can be defined as: - // compressed := item* literal* - // item := literal* copy - // - // The trailing literal sequence has a space blowup of at most 62/60 - // since a literal of length 60 needs one tag byte + one extra byte - // for length information. - // - // Item blowup is trickier to measure. Suppose the "copy" op copies - // 4 bytes of data. Because of a special check in the encoding code, - // we produce a 4-byte copy only if the offset is < 65536. Therefore - // the copy op takes 3 bytes to encode, and this type of item leads - // to at most the 62/60 blowup for representing literals. - // - // Suppose the "copy" op copies 5 bytes of data. If the offset is big - // enough, it will take 5 bytes to encode the copy op. Therefore the - // worst case here is a one-byte literal followed by a five-byte copy. - // That is, 6 bytes of input turn into 7 bytes of "compressed" data. - // - // This last factor dominates the blowup, so the final estimate is: - n = 32 + n + n/6 - if n > 0xffffffff { - return -1 - } - return int(n) -} - -var errClosed = errors.New("snappy: Writer is closed") - -// NewWriter returns a new Writer that compresses to w. -// -// The Writer returned does not buffer writes. There is no need to Flush or -// Close such a Writer. -// -// Deprecated: the Writer returned is not suitable for many small writes, only -// for few large writes. Use NewBufferedWriter instead, which is efficient -// regardless of the frequency and shape of the writes, and remember to Close -// that Writer when done. -func NewWriter(w io.Writer) *Writer { - return &Writer{ - w: w, - obuf: make([]byte, obufLen), - } -} - -// NewBufferedWriter returns a new Writer that compresses to w, using the -// framing format described at -// https://github.com/google/snappy/blob/master/framing_format.txt -// -// The Writer returned buffers writes. Users must call Close to guarantee all -// data has been forwarded to the underlying io.Writer. They may also call -// Flush zero or more times before calling Close. -func NewBufferedWriter(w io.Writer) *Writer { - return &Writer{ - w: w, - ibuf: make([]byte, 0, maxBlockSize), - obuf: make([]byte, obufLen), - } -} - -// Writer is an io.Writer that can write Snappy-compressed bytes. -type Writer struct { - w io.Writer - err error - - // ibuf is a buffer for the incoming (uncompressed) bytes. - // - // Its use is optional. For backwards compatibility, Writers created by the - // NewWriter function have ibuf == nil, do not buffer incoming bytes, and - // therefore do not need to be Flush'ed or Close'd. - ibuf []byte - - // obuf is a buffer for the outgoing (compressed) bytes. - obuf []byte - - // wroteStreamHeader is whether we have written the stream header. - wroteStreamHeader bool -} - -// Reset discards the writer's state and switches the Snappy writer to write to -// w. This permits reusing a Writer rather than allocating a new one. -func (w *Writer) Reset(writer io.Writer) { - w.w = writer - w.err = nil - if w.ibuf != nil { - w.ibuf = w.ibuf[:0] - } - w.wroteStreamHeader = false -} - -// Write satisfies the io.Writer interface. -func (w *Writer) Write(p []byte) (nRet int, errRet error) { - if w.ibuf == nil { - // Do not buffer incoming bytes. This does not perform or compress well - // if the caller of Writer.Write writes many small slices. This - // behavior is therefore deprecated, but still supported for backwards - // compatibility with code that doesn't explicitly Flush or Close. - return w.write(p) - } - - // The remainder of this method is based on bufio.Writer.Write from the - // standard library. - - for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { - var n int - if len(w.ibuf) == 0 { - // Large write, empty buffer. - // Write directly from p to avoid copy. - n, _ = w.write(p) - } else { - n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) - w.ibuf = w.ibuf[:len(w.ibuf)+n] - w.Flush() - } - nRet += n - p = p[n:] - } - if w.err != nil { - return nRet, w.err - } - n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) - w.ibuf = w.ibuf[:len(w.ibuf)+n] - nRet += n - return nRet, nil -} - -func (w *Writer) write(p []byte) (nRet int, errRet error) { - if w.err != nil { - return 0, w.err - } - for len(p) > 0 { - obufStart := len(magicChunk) - if !w.wroteStreamHeader { - w.wroteStreamHeader = true - copy(w.obuf, magicChunk) - obufStart = 0 - } - - var uncompressed []byte - if len(p) > maxBlockSize { - uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] - } else { - uncompressed, p = p, nil - } - checksum := crc(uncompressed) - - // Compress the buffer, discarding the result if the improvement - // isn't at least 12.5%. - compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) - chunkType := uint8(chunkTypeCompressedData) - chunkLen := 4 + len(compressed) - obufEnd := obufHeaderLen + len(compressed) - if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { - chunkType = chunkTypeUncompressedData - chunkLen = 4 + len(uncompressed) - obufEnd = obufHeaderLen - } - - // Fill in the per-chunk header that comes before the body. - w.obuf[len(magicChunk)+0] = chunkType - w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) - w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) - w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) - w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) - w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) - w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) - w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) - - if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { - w.err = err - return nRet, err - } - if chunkType == chunkTypeUncompressedData { - if _, err := w.w.Write(uncompressed); err != nil { - w.err = err - return nRet, err - } - } - nRet += len(uncompressed) - } - return nRet, nil -} - -// Flush flushes the Writer to its underlying io.Writer. -func (w *Writer) Flush() error { - if w.err != nil { - return w.err - } - if len(w.ibuf) == 0 { - return nil - } - w.write(w.ibuf) - w.ibuf = w.ibuf[:0] - return w.err -} - -// Close calls Flush and then closes the Writer. -func (w *Writer) Close() error { - w.Flush() - ret := w.err - if w.err == nil { - w.err = errClosed - } - return ret -} diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go b/vendor/github.com/klauspost/compress/snappy/encode_amd64.go deleted file mode 100644 index 150d91bc8..000000000 --- a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -package snappy - -// emitLiteral has the same semantics as in encode_other.go. -// -//go:noescape -func emitLiteral(dst, lit []byte) int - -// emitCopy has the same semantics as in encode_other.go. -// -//go:noescape -func emitCopy(dst []byte, offset, length int) int - -// extendMatch has the same semantics as in encode_other.go. -// -//go:noescape -func extendMatch(src []byte, i, j int) int - -// encodeBlock has the same semantics as in encode_other.go. -// -//go:noescape -func encodeBlock(dst, src []byte) (d int) diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.s b/vendor/github.com/klauspost/compress/snappy/encode_amd64.s deleted file mode 100644 index adfd979fe..000000000 --- a/vendor/github.com/klauspost/compress/snappy/encode_amd64.s +++ /dev/null @@ -1,730 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" - -// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a -// Go toolchain regression. See https://github.com/golang/go/issues/15426 and -// https://github.com/golang/snappy/issues/29 -// -// As a workaround, the package was built with a known good assembler, and -// those instructions were disassembled by "objdump -d" to yield the -// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 -// style comments, in AT&T asm syntax. Note that rsp here is a physical -// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). -// The instructions were then encoded as "BYTE $0x.." sequences, which assemble -// fine on Go 1.6. - -// The asm code generally follows the pure Go code in encode_other.go, except -// where marked with a "!!!". - -// ---------------------------------------------------------------------------- - -// func emitLiteral(dst, lit []byte) int -// -// All local variables fit into registers. The register allocation: -// - AX len(lit) -// - BX n -// - DX return value -// - DI &dst[i] -// - R10 &lit[0] -// -// The 24 bytes of stack space is to call runtime·memmove. -// -// The unusual register allocation of local variables, such as R10 for the -// source pointer, matches the allocation used at the call site in encodeBlock, -// which makes it easier to manually inline this function. -TEXT ·emitLiteral(SB), NOSPLIT, $24-56 - MOVQ dst_base+0(FP), DI - MOVQ lit_base+24(FP), R10 - MOVQ lit_len+32(FP), AX - MOVQ AX, DX - MOVL AX, BX - SUBL $1, BX - - CMPL BX, $60 - JLT oneByte - CMPL BX, $256 - JLT twoBytes - -threeBytes: - MOVB $0xf4, 0(DI) - MOVW BX, 1(DI) - ADDQ $3, DI - ADDQ $3, DX - JMP memmove - -twoBytes: - MOVB $0xf0, 0(DI) - MOVB BX, 1(DI) - ADDQ $2, DI - ADDQ $2, DX - JMP memmove - -oneByte: - SHLB $2, BX - MOVB BX, 0(DI) - ADDQ $1, DI - ADDQ $1, DX - -memmove: - MOVQ DX, ret+48(FP) - - // copy(dst[i:], lit) - // - // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push - // DI, R10 and AX as arguments. - MOVQ DI, 0(SP) - MOVQ R10, 8(SP) - MOVQ AX, 16(SP) - CALL runtime·memmove(SB) - RET - -// ---------------------------------------------------------------------------- - -// func emitCopy(dst []byte, offset, length int) int -// -// All local variables fit into registers. The register allocation: -// - AX length -// - SI &dst[0] -// - DI &dst[i] -// - R11 offset -// -// The unusual register allocation of local variables, such as R11 for the -// offset, matches the allocation used at the call site in encodeBlock, which -// makes it easier to manually inline this function. -TEXT ·emitCopy(SB), NOSPLIT, $0-48 - MOVQ dst_base+0(FP), DI - MOVQ DI, SI - MOVQ offset+24(FP), R11 - MOVQ length+32(FP), AX - -loop0: - // for length >= 68 { etc } - CMPL AX, $68 - JLT step1 - - // Emit a length 64 copy, encoded as 3 bytes. - MOVB $0xfe, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $64, AX - JMP loop0 - -step1: - // if length > 64 { etc } - CMPL AX, $64 - JLE step2 - - // Emit a length 60 copy, encoded as 3 bytes. - MOVB $0xee, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $60, AX - -step2: - // if length >= 12 || offset >= 2048 { goto step3 } - CMPL AX, $12 - JGE step3 - CMPL R11, $2048 - JGE step3 - - // Emit the remaining copy, encoded as 2 bytes. - MOVB R11, 1(DI) - SHRL $8, R11 - SHLB $5, R11 - SUBB $4, AX - SHLB $2, AX - ORB AX, R11 - ORB $1, R11 - MOVB R11, 0(DI) - ADDQ $2, DI - - // Return the number of bytes written. - SUBQ SI, DI - MOVQ DI, ret+40(FP) - RET - -step3: - // Emit the remaining copy, encoded as 3 bytes. - SUBL $1, AX - SHLB $2, AX - ORB $2, AX - MOVB AX, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - - // Return the number of bytes written. - SUBQ SI, DI - MOVQ DI, ret+40(FP) - RET - -// ---------------------------------------------------------------------------- - -// func extendMatch(src []byte, i, j int) int -// -// All local variables fit into registers. The register allocation: -// - DX &src[0] -// - SI &src[j] -// - R13 &src[len(src) - 8] -// - R14 &src[len(src)] -// - R15 &src[i] -// -// The unusual register allocation of local variables, such as R15 for a source -// pointer, matches the allocation used at the call site in encodeBlock, which -// makes it easier to manually inline this function. -TEXT ·extendMatch(SB), NOSPLIT, $0-48 - MOVQ src_base+0(FP), DX - MOVQ src_len+8(FP), R14 - MOVQ i+24(FP), R15 - MOVQ j+32(FP), SI - ADDQ DX, R14 - ADDQ DX, R15 - ADDQ DX, SI - MOVQ R14, R13 - SUBQ $8, R13 - -cmp8: - // As long as we are 8 or more bytes before the end of src, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMPQ SI, R13 - JA cmp1 - MOVQ (R15), AX - MOVQ (SI), BX - CMPQ AX, BX - JNE bsf - ADDQ $8, R15 - ADDQ $8, SI - JMP cmp8 - -bsf: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. The BSF instruction finds the - // least significant 1 bit, the amd64 architecture is little-endian, and - // the shift by 3 converts a bit index to a byte index. - XORQ AX, BX - BSFQ BX, BX - SHRQ $3, BX - ADDQ BX, SI - - // Convert from &src[ret] to ret. - SUBQ DX, SI - MOVQ SI, ret+40(FP) - RET - -cmp1: - // In src's tail, compare 1 byte at a time. - CMPQ SI, R14 - JAE extendMatchEnd - MOVB (R15), AX - MOVB (SI), BX - CMPB AX, BX - JNE extendMatchEnd - ADDQ $1, R15 - ADDQ $1, SI - JMP cmp1 - -extendMatchEnd: - // Convert from &src[ret] to ret. - SUBQ DX, SI - MOVQ SI, ret+40(FP) - RET - -// ---------------------------------------------------------------------------- - -// func encodeBlock(dst, src []byte) (d int) -// -// All local variables fit into registers, other than "var table". The register -// allocation: -// - AX . . -// - BX . . -// - CX 56 shift (note that amd64 shifts by non-immediates must use CX). -// - DX 64 &src[0], tableSize -// - SI 72 &src[s] -// - DI 80 &dst[d] -// - R9 88 sLimit -// - R10 . &src[nextEmit] -// - R11 96 prevHash, currHash, nextHash, offset -// - R12 104 &src[base], skip -// - R13 . &src[nextS], &src[len(src) - 8] -// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x -// - R15 112 candidate -// -// The second column (56, 64, etc) is the stack offset to spill the registers -// when calling other functions. We could pack this slightly tighter, but it's -// simpler to have a dedicated spill map independent of the function called. -// -// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An -// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill -// local variables (registers) during calls gives 32768 + 56 + 64 = 32888. -TEXT ·encodeBlock(SB), 0, $32888-56 - MOVQ dst_base+0(FP), DI - MOVQ src_base+24(FP), SI - MOVQ src_len+32(FP), R14 - - // shift, tableSize := uint32(32-8), 1<<8 - MOVQ $24, CX - MOVQ $256, DX - -calcShift: - // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { - // shift-- - // } - CMPQ DX, $16384 - JGE varTable - CMPQ DX, R14 - JGE varTable - SUBQ $1, CX - SHLQ $1, DX - JMP calcShift - -varTable: - // var table [maxTableSize]uint16 - // - // In the asm code, unlike the Go code, we can zero-initialize only the - // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU - // writes 16 bytes, so we can do only tableSize/8 writes instead of the - // 2048 writes that would zero-initialize all of table's 32768 bytes. - SHRQ $3, DX - LEAQ table-32768(SP), BX - PXOR X0, X0 - -memclr: - MOVOU X0, 0(BX) - ADDQ $16, BX - SUBQ $1, DX - JNZ memclr - - // !!! DX = &src[0] - MOVQ SI, DX - - // sLimit := len(src) - inputMargin - MOVQ R14, R9 - SUBQ $15, R9 - - // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't - // change for the rest of the function. - MOVQ CX, 56(SP) - MOVQ DX, 64(SP) - MOVQ R9, 88(SP) - - // nextEmit := 0 - MOVQ DX, R10 - - // s := 1 - ADDQ $1, SI - - // nextHash := hash(load32(src, s), shift) - MOVL 0(SI), R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - -outer: - // for { etc } - - // skip := 32 - MOVQ $32, R12 - - // nextS := s - MOVQ SI, R13 - - // candidate := 0 - MOVQ $0, R15 - -inner0: - // for { etc } - - // s := nextS - MOVQ R13, SI - - // bytesBetweenHashLookups := skip >> 5 - MOVQ R12, R14 - SHRQ $5, R14 - - // nextS = s + bytesBetweenHashLookups - ADDQ R14, R13 - - // skip += bytesBetweenHashLookups - ADDQ R14, R12 - - // if nextS > sLimit { goto emitRemainder } - MOVQ R13, AX - SUBQ DX, AX - CMPQ AX, R9 - JA emitRemainder - - // candidate = int(table[nextHash]) - // XXX: MOVWQZX table-32768(SP)(R11*2), R15 - // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 - BYTE $0x4e - BYTE $0x0f - BYTE $0xb7 - BYTE $0x7c - BYTE $0x5c - BYTE $0x78 - - // table[nextHash] = uint16(s) - MOVQ SI, AX - SUBQ DX, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // nextHash = hash(load32(src, nextS), shift) - MOVL 0(R13), R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // if load32(src, s) != load32(src, candidate) { continue } break - MOVL 0(SI), AX - MOVL (DX)(R15*1), BX - CMPL AX, BX - JNE inner0 - -fourByteMatch: - // As per the encode_other.go code: - // - // A 4-byte match has been found. We'll later see etc. - - // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment - // on inputMargin in encode.go. - MOVQ SI, AX - SUBQ R10, AX - CMPQ AX, $16 - JLE emitLiteralFastPath - - // ---------------------------------------- - // Begin inline of the emitLiteral call. - // - // d += emitLiteral(dst[d:], src[nextEmit:s]) - - MOVL AX, BX - SUBL $1, BX - - CMPL BX, $60 - JLT inlineEmitLiteralOneByte - CMPL BX, $256 - JLT inlineEmitLiteralTwoBytes - -inlineEmitLiteralThreeBytes: - MOVB $0xf4, 0(DI) - MOVW BX, 1(DI) - ADDQ $3, DI - JMP inlineEmitLiteralMemmove - -inlineEmitLiteralTwoBytes: - MOVB $0xf0, 0(DI) - MOVB BX, 1(DI) - ADDQ $2, DI - JMP inlineEmitLiteralMemmove - -inlineEmitLiteralOneByte: - SHLB $2, BX - MOVB BX, 0(DI) - ADDQ $1, DI - -inlineEmitLiteralMemmove: - // Spill local variables (registers) onto the stack; call; unspill. - // - // copy(dst[i:], lit) - // - // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push - // DI, R10 and AX as arguments. - MOVQ DI, 0(SP) - MOVQ R10, 8(SP) - MOVQ AX, 16(SP) - ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)". - MOVQ SI, 72(SP) - MOVQ DI, 80(SP) - MOVQ R15, 112(SP) - CALL runtime·memmove(SB) - MOVQ 56(SP), CX - MOVQ 64(SP), DX - MOVQ 72(SP), SI - MOVQ 80(SP), DI - MOVQ 88(SP), R9 - MOVQ 112(SP), R15 - JMP inner1 - -inlineEmitLiteralEnd: - // End inline of the emitLiteral call. - // ---------------------------------------- - -emitLiteralFastPath: - // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". - MOVB AX, BX - SUBB $1, BX - SHLB $2, BX - MOVB BX, (DI) - ADDQ $1, DI - - // !!! Implement the copy from lit to dst as a 16-byte load and store. - // (Encode's documentation says that dst and src must not overlap.) - // - // This always copies 16 bytes, instead of only len(lit) bytes, but that's - // OK. Subsequent iterations will fix up the overrun. - // - // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or - // 16-byte loads and stores. This technique probably wouldn't be as - // effective on architectures that are fussier about alignment. - MOVOU 0(R10), X0 - MOVOU X0, 0(DI) - ADDQ AX, DI - -inner1: - // for { etc } - - // base := s - MOVQ SI, R12 - - // !!! offset := base - candidate - MOVQ R12, R11 - SUBQ R15, R11 - SUBQ DX, R11 - - // ---------------------------------------- - // Begin inline of the extendMatch call. - // - // s = extendMatch(src, candidate+4, s+4) - - // !!! R14 = &src[len(src)] - MOVQ src_len+32(FP), R14 - ADDQ DX, R14 - - // !!! R13 = &src[len(src) - 8] - MOVQ R14, R13 - SUBQ $8, R13 - - // !!! R15 = &src[candidate + 4] - ADDQ $4, R15 - ADDQ DX, R15 - - // !!! s += 4 - ADDQ $4, SI - -inlineExtendMatchCmp8: - // As long as we are 8 or more bytes before the end of src, we can load and - // compare 8 bytes at a time. If those 8 bytes are equal, repeat. - CMPQ SI, R13 - JA inlineExtendMatchCmp1 - MOVQ (R15), AX - MOVQ (SI), BX - CMPQ AX, BX - JNE inlineExtendMatchBSF - ADDQ $8, R15 - ADDQ $8, SI - JMP inlineExtendMatchCmp8 - -inlineExtendMatchBSF: - // If those 8 bytes were not equal, XOR the two 8 byte values, and return - // the index of the first byte that differs. The BSF instruction finds the - // least significant 1 bit, the amd64 architecture is little-endian, and - // the shift by 3 converts a bit index to a byte index. - XORQ AX, BX - BSFQ BX, BX - SHRQ $3, BX - ADDQ BX, SI - JMP inlineExtendMatchEnd - -inlineExtendMatchCmp1: - // In src's tail, compare 1 byte at a time. - CMPQ SI, R14 - JAE inlineExtendMatchEnd - MOVB (R15), AX - MOVB (SI), BX - CMPB AX, BX - JNE inlineExtendMatchEnd - ADDQ $1, R15 - ADDQ $1, SI - JMP inlineExtendMatchCmp1 - -inlineExtendMatchEnd: - // End inline of the extendMatch call. - // ---------------------------------------- - - // ---------------------------------------- - // Begin inline of the emitCopy call. - // - // d += emitCopy(dst[d:], base-candidate, s-base) - - // !!! length := s - base - MOVQ SI, AX - SUBQ R12, AX - -inlineEmitCopyLoop0: - // for length >= 68 { etc } - CMPL AX, $68 - JLT inlineEmitCopyStep1 - - // Emit a length 64 copy, encoded as 3 bytes. - MOVB $0xfe, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $64, AX - JMP inlineEmitCopyLoop0 - -inlineEmitCopyStep1: - // if length > 64 { etc } - CMPL AX, $64 - JLE inlineEmitCopyStep2 - - // Emit a length 60 copy, encoded as 3 bytes. - MOVB $0xee, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - SUBL $60, AX - -inlineEmitCopyStep2: - // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } - CMPL AX, $12 - JGE inlineEmitCopyStep3 - CMPL R11, $2048 - JGE inlineEmitCopyStep3 - - // Emit the remaining copy, encoded as 2 bytes. - MOVB R11, 1(DI) - SHRL $8, R11 - SHLB $5, R11 - SUBB $4, AX - SHLB $2, AX - ORB AX, R11 - ORB $1, R11 - MOVB R11, 0(DI) - ADDQ $2, DI - JMP inlineEmitCopyEnd - -inlineEmitCopyStep3: - // Emit the remaining copy, encoded as 3 bytes. - SUBL $1, AX - SHLB $2, AX - ORB $2, AX - MOVB AX, 0(DI) - MOVW R11, 1(DI) - ADDQ $3, DI - -inlineEmitCopyEnd: - // End inline of the emitCopy call. - // ---------------------------------------- - - // nextEmit = s - MOVQ SI, R10 - - // if s >= sLimit { goto emitRemainder } - MOVQ SI, AX - SUBQ DX, AX - CMPQ AX, R9 - JAE emitRemainder - - // As per the encode_other.go code: - // - // We could immediately etc. - - // x := load64(src, s-1) - MOVQ -1(SI), R14 - - // prevHash := hash(uint32(x>>0), shift) - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // table[prevHash] = uint16(s-1) - MOVQ SI, AX - SUBQ DX, AX - SUBQ $1, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // currHash := hash(uint32(x>>8), shift) - SHRQ $8, R14 - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // candidate = int(table[currHash]) - // XXX: MOVWQZX table-32768(SP)(R11*2), R15 - // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 - BYTE $0x4e - BYTE $0x0f - BYTE $0xb7 - BYTE $0x7c - BYTE $0x5c - BYTE $0x78 - - // table[currHash] = uint16(s) - ADDQ $1, AX - - // XXX: MOVW AX, table-32768(SP)(R11*2) - // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) - BYTE $0x66 - BYTE $0x42 - BYTE $0x89 - BYTE $0x44 - BYTE $0x5c - BYTE $0x78 - - // if uint32(x>>8) == load32(src, candidate) { continue } - MOVL (DX)(R15*1), BX - CMPL R14, BX - JEQ inner1 - - // nextHash = hash(uint32(x>>16), shift) - SHRQ $8, R14 - MOVL R14, R11 - IMULL $0x1e35a7bd, R11 - SHRL CX, R11 - - // s++ - ADDQ $1, SI - - // break out of the inner1 for loop, i.e. continue the outer loop. - JMP outer - -emitRemainder: - // if nextEmit < len(src) { etc } - MOVQ src_len+32(FP), AX - ADDQ DX, AX - CMPQ R10, AX - JEQ encodeBlockEnd - - // d += emitLiteral(dst[d:], src[nextEmit:]) - // - // Push args. - MOVQ DI, 0(SP) - MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative. - MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative. - MOVQ R10, 24(SP) - SUBQ R10, AX - MOVQ AX, 32(SP) - MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative. - - // Spill local variables (registers) onto the stack; call; unspill. - MOVQ DI, 80(SP) - CALL ·emitLiteral(SB) - MOVQ 80(SP), DI - - // Finish the "d +=" part of "d += emitLiteral(etc)". - ADDQ 48(SP), DI - -encodeBlockEnd: - MOVQ dst_base+0(FP), AX - SUBQ AX, DI - MOVQ DI, d+48(FP) - RET diff --git a/vendor/github.com/klauspost/compress/snappy/encode_other.go b/vendor/github.com/klauspost/compress/snappy/encode_other.go deleted file mode 100644 index dbcae905e..000000000 --- a/vendor/github.com/klauspost/compress/snappy/encode_other.go +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright 2016 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !amd64 appengine !gc noasm - -package snappy - -func load32(b []byte, i int) uint32 { - b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 -} - -func load64(b []byte, i int) uint64 { - b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 -} - -// emitLiteral writes a literal chunk and returns the number of bytes written. -// -// It assumes that: -// dst is long enough to hold the encoded bytes -// 1 <= len(lit) && len(lit) <= 65536 -func emitLiteral(dst, lit []byte) int { - i, n := 0, uint(len(lit)-1) - switch { - case n < 60: - dst[0] = uint8(n)<<2 | tagLiteral - i = 1 - case n < 1<<8: - dst[0] = 60<<2 | tagLiteral - dst[1] = uint8(n) - i = 2 - default: - dst[0] = 61<<2 | tagLiteral - dst[1] = uint8(n) - dst[2] = uint8(n >> 8) - i = 3 - } - return i + copy(dst[i:], lit) -} - -// emitCopy writes a copy chunk and returns the number of bytes written. -// -// It assumes that: -// dst is long enough to hold the encoded bytes -// 1 <= offset && offset <= 65535 -// 4 <= length && length <= 65535 -func emitCopy(dst []byte, offset, length int) int { - i := 0 - // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The - // threshold for this loop is a little higher (at 68 = 64 + 4), and the - // length emitted down below is is a little lower (at 60 = 64 - 4), because - // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed - // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as - // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as - // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a - // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an - // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. - for length >= 68 { - // Emit a length 64 copy, encoded as 3 bytes. - dst[i+0] = 63<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - i += 3 - length -= 64 - } - if length > 64 { - // Emit a length 60 copy, encoded as 3 bytes. - dst[i+0] = 59<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - i += 3 - length -= 60 - } - if length >= 12 || offset >= 2048 { - // Emit the remaining copy, encoded as 3 bytes. - dst[i+0] = uint8(length-1)<<2 | tagCopy2 - dst[i+1] = uint8(offset) - dst[i+2] = uint8(offset >> 8) - return i + 3 - } - // Emit the remaining copy, encoded as 2 bytes. - dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 - dst[i+1] = uint8(offset) - return i + 2 -} - -// extendMatch returns the largest k such that k <= len(src) and that -// src[i:i+k-j] and src[j:k] have the same contents. -// -// It assumes that: -// 0 <= i && i < j && j <= len(src) -func extendMatch(src []byte, i, j int) int { - for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { - } - return j -} - -func hash(u, shift uint32) uint32 { - return (u * 0x1e35a7bd) >> shift -} - -// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It -// assumes that the varint-encoded length of the decompressed bytes has already -// been written. -// -// It also assumes that: -// len(dst) >= MaxEncodedLen(len(src)) && -// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize -func encodeBlock(dst, src []byte) (d int) { - // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. - // The table element type is uint16, as s < sLimit and sLimit < len(src) - // and len(src) <= maxBlockSize and maxBlockSize == 65536. - const ( - maxTableSize = 1 << 14 - // tableMask is redundant, but helps the compiler eliminate bounds - // checks. - tableMask = maxTableSize - 1 - ) - shift := uint32(32 - 8) - for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { - shift-- - } - // In Go, all array elements are zero-initialized, so there is no advantage - // to a smaller tableSize per se. However, it matches the C++ algorithm, - // and in the asm versions of this code, we can get away with zeroing only - // the first tableSize elements. - var table [maxTableSize]uint16 - - // sLimit is when to stop looking for offset/length copies. The inputMargin - // lets us use a fast path for emitLiteral in the main loop, while we are - // looking for copies. - sLimit := len(src) - inputMargin - - // nextEmit is where in src the next emitLiteral should start from. - nextEmit := 0 - - // The encoded form must start with a literal, as there are no previous - // bytes to copy, so we start looking for hash matches at s == 1. - s := 1 - nextHash := hash(load32(src, s), shift) - - for { - // Copied from the C++ snappy implementation: - // - // Heuristic match skipping: If 32 bytes are scanned with no matches - // found, start looking only at every other byte. If 32 more bytes are - // scanned (or skipped), look at every third byte, etc.. When a match - // is found, immediately go back to looking at every byte. This is a - // small loss (~5% performance, ~0.1% density) for compressible data - // due to more bookkeeping, but for non-compressible data (such as - // JPEG) it's a huge win since the compressor quickly "realizes" the - // data is incompressible and doesn't bother looking for matches - // everywhere. - // - // The "skip" variable keeps track of how many bytes there are since - // the last match; dividing it by 32 (ie. right-shifting by five) gives - // the number of bytes to move ahead for each iteration. - skip := 32 - - nextS := s - candidate := 0 - for { - s = nextS - bytesBetweenHashLookups := skip >> 5 - nextS = s + bytesBetweenHashLookups - skip += bytesBetweenHashLookups - if nextS > sLimit { - goto emitRemainder - } - candidate = int(table[nextHash&tableMask]) - table[nextHash&tableMask] = uint16(s) - nextHash = hash(load32(src, nextS), shift) - if load32(src, s) == load32(src, candidate) { - break - } - } - - // A 4-byte match has been found. We'll later see if more than 4 bytes - // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit - // them as literal bytes. - d += emitLiteral(dst[d:], src[nextEmit:s]) - - // Call emitCopy, and then see if another emitCopy could be our next - // move. Repeat until we find no match for the input immediately after - // what was consumed by the last emitCopy call. - // - // If we exit this loop normally then we need to call emitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can - // exit this loop via goto if we get close to exhausting the input. - for { - // Invariant: we have a 4-byte match at s, and no need to emit any - // literal bytes prior to s. - base := s - - // Extend the 4-byte match as long as possible. - // - // This is an inlined version of: - // s = extendMatch(src, candidate+4, s+4) - s += 4 - for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { - } - - d += emitCopy(dst[d:], base-candidate, s-base) - nextEmit = s - if s >= sLimit { - goto emitRemainder - } - - // We could immediately start working at s now, but to improve - // compression we first update the hash table at s-1 and at s. If - // another emitCopy is not our next move, also calculate nextHash - // at s+1. At least on GOARCH=amd64, these three hash calculations - // are faster as one load64 call (with some shifts) instead of - // three load32 calls. - x := load64(src, s-1) - prevHash := hash(uint32(x>>0), shift) - table[prevHash&tableMask] = uint16(s - 1) - currHash := hash(uint32(x>>8), shift) - candidate = int(table[currHash&tableMask]) - table[currHash&tableMask] = uint16(s) - if uint32(x>>8) != load32(src, candidate) { - nextHash = hash(uint32(x>>16), shift) - s++ - break - } - } - } - -emitRemainder: - if nextEmit < len(src) { - d += emitLiteral(dst[d:], src[nextEmit:]) - } - return d -} diff --git a/vendor/github.com/klauspost/compress/snappy/runbench.cmd b/vendor/github.com/klauspost/compress/snappy/runbench.cmd deleted file mode 100644 index d24eb4b47..000000000 --- a/vendor/github.com/klauspost/compress/snappy/runbench.cmd +++ /dev/null @@ -1,2 +0,0 @@ -del old.txt -go test -bench=. >>old.txt && go test -bench=. >>old.txt && go test -bench=. >>old.txt && benchstat -delta-test=ttest old.txt new.txt diff --git a/vendor/github.com/klauspost/compress/snappy/snappy.go b/vendor/github.com/klauspost/compress/snappy/snappy.go deleted file mode 100644 index ea58ced88..000000000 --- a/vendor/github.com/klauspost/compress/snappy/snappy.go +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2011 The Snappy-Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package snappy implements the Snappy compression format. It aims for very -// high speeds and reasonable compression. -// -// There are actually two Snappy formats: block and stream. They are related, -// but different: trying to decompress block-compressed data as a Snappy stream -// will fail, and vice versa. The block format is the Decode and Encode -// functions and the stream format is the Reader and Writer types. -// -// The block format, the more common case, is used when the complete size (the -// number of bytes) of the original data is known upfront, at the time -// compression starts. The stream format, also known as the framing format, is -// for when that isn't always true. -// -// The canonical, C++ implementation is at https://github.com/google/snappy and -// it only implements the block format. -package snappy - -import ( - "hash/crc32" -) - -/* -Each encoded block begins with the varint-encoded length of the decoded data, -followed by a sequence of chunks. Chunks begin and end on byte boundaries. The -first byte of each chunk is broken into its 2 least and 6 most significant bits -called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. -Zero means a literal tag. All other values mean a copy tag. - -For literal tags: - - If m < 60, the next 1 + m bytes are literal bytes. - - Otherwise, let n be the little-endian unsigned integer denoted by the next - m - 59 bytes. The next 1 + n bytes after that are literal bytes. - -For copy tags, length bytes are copied from offset bytes ago, in the style of -Lempel-Ziv compression algorithms. In particular: - - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). - The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 - of the offset. The next byte is bits 0-7 of the offset. - - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). - The length is 1 + m. The offset is the little-endian unsigned integer - denoted by the next 2 bytes. - - For l == 3, this tag is a legacy format that is no longer issued by most - encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in - [1, 65). The length is 1 + m. The offset is the little-endian unsigned - integer denoted by the next 4 bytes. -*/ -const ( - tagLiteral = 0x00 - tagCopy1 = 0x01 - tagCopy2 = 0x02 - tagCopy4 = 0x03 -) - -const ( - checksumSize = 4 - chunkHeaderSize = 4 - magicChunk = "\xff\x06\x00\x00" + magicBody - magicBody = "sNaPpY" - - // maxBlockSize is the maximum size of the input to encodeBlock. It is not - // part of the wire format per se, but some parts of the encoder assume - // that an offset fits into a uint16. - // - // Also, for the framing format (Writer type instead of Encode function), - // https://github.com/google/snappy/blob/master/framing_format.txt says - // that "the uncompressed data in a chunk must be no longer than 65536 - // bytes". - maxBlockSize = 65536 - - // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is - // hard coded to be a const instead of a variable, so that obufLen can also - // be a const. Their equivalence is confirmed by - // TestMaxEncodedLenOfMaxBlockSize. - maxEncodedLenOfMaxBlockSize = 76490 - - obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize - obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize -) - -const ( - chunkTypeCompressedData = 0x00 - chunkTypeUncompressedData = 0x01 - chunkTypePadding = 0xfe - chunkTypeStreamIdentifier = 0xff -) - -var crcTable = crc32.MakeTable(crc32.Castagnoli) - -// crc implements the checksum specified in section 3 of -// https://github.com/google/snappy/blob/master/framing_format.txt -func crc(b []byte) uint32 { - c := crc32.Update(0, crcTable, b) - return c>>15 | c<<17 + 0xa282ead8 -} diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go index 9647c64e5..e1be092f3 100644 --- a/vendor/github.com/klauspost/compress/zstd/blockenc.go +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -386,9 +386,9 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error { b.output = bh.appendTo(b.output) b.output = append(b.output, lits[0]) return nil + case nil: default: return err - case nil: } // Compressed... // Now, allow reuse @@ -528,11 +528,6 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if debug { println("Adding literals RLE") } - default: - if debug { - println("Adding literals ERROR:", err) - } - return err case nil: // Compressed litLen... if reUsed { @@ -563,6 +558,11 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error { if debug { println("Adding literals compressed") } + default: + if debug { + println("Adding literals ERROR:", err) + } + return err } // Sequence compression diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go index 87896c5ea..69736e8d4 100644 --- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go +++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go @@ -93,7 +93,7 @@ func (h *Header) Decode(in []byte) error { h.HasCheckSum = fhd&(1<<2) != 0 if fhd&(1<<3) != 0 { - return errors.New("Reserved bit set on frame header") + return errors.New("reserved bit set on frame header") } // Read Window_Descriptor @@ -174,7 +174,7 @@ func (h *Header) Decode(in []byte) error { if len(in) < 3 { return nil } - tmp, in := in[:3], in[3:] + tmp := in[:3] bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) h.FirstBlock.Last = bh&1 != 0 blockType := blockType((bh >> 1) & 3) diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index 1d41c25d2..f593e464b 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -179,8 +179,7 @@ func (d *Decoder) Reset(r io.Reader) error { // If bytes buffer and < 1MB, do sync decoding anyway. if bb, ok := r.(byter); ok && bb.Len() < 1<<20 { - var bb2 byter - bb2 = bb + bb2 := bb if debug { println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) } @@ -237,20 +236,17 @@ func (d *Decoder) drainOutput() { println("current already flushed") return } - for { - select { - case v := <-d.current.output: - if v.d != nil { - if debug { - printf("re-adding decoder %p", v.d) - } - d.decoders <- v.d - } - if v.err == errEndOfStream { - println("current flushed") - d.current.flushed = true - return + for v := range d.current.output { + if v.d != nil { + if debug { + printf("re-adding decoder %p", v.d) } + d.decoders <- v.d + } + if v.err == errEndOfStream { + println("current flushed") + d.current.flushed = true + return } } } diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go index 284d38449..c0fd058c2 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -6,7 +6,6 @@ package zstd import ( "errors" - "fmt" "runtime" ) @@ -43,7 +42,7 @@ func WithDecoderLowmem(b bool) DOption { func WithDecoderConcurrency(n int) DOption { return func(o *decoderOptions) error { if n <= 0 { - return fmt.Errorf("Concurrency must be at least 1") + return errors.New("concurrency must be at least 1") } o.concurrent = n return nil @@ -61,7 +60,7 @@ func WithDecoderMaxMemory(n uint64) DOption { return errors.New("WithDecoderMaxMemory must be at least 1") } if n > 1<<63 { - return fmt.Errorf("WithDecoderMaxmemory must be less than 1 << 63") + return errors.New("WithDecoderMaxmemory must be less than 1 << 63") } o.maxDecodedSize = n return nil diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go index 2d4d893eb..60f298648 100644 --- a/vendor/github.com/klauspost/compress/zstd/enc_base.go +++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go @@ -150,14 +150,15 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) { } else { e.crc.Reset() } - if (!singleBlock || d.DictContentSize() > 0) && cap(e.hist) < int(e.maxMatchOff*2)+d.DictContentSize() { - l := e.maxMatchOff*2 + int32(d.DictContentSize()) - // Make it at least 1MB. - if l < 1<<20 { - l = 1 << 20 + if d != nil { + low := e.lowMem + if singleBlock { + e.lowMem = true } - e.hist = make([]byte, 0, l) + e.ensureHist(d.DictContentSize() + maxCompressedBlockSize) + e.lowMem = low } + // We offset current position so everything will be out of reach. // If above reset line, history will be purged. if e.cur < bufferReset { diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go index 6f0265099..4871dd03a 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -340,13 +340,13 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { println("ReadFrom: got EOF final block:", len(e.state.filling)) } return n, nil + case nil: default: if debug { println("ReadFrom: got error:", err) } e.state.err = err return n, err - case nil: } if len(src) > 0 { if debug { diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go index 18a47eb03..16d4ab63c 100644 --- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -73,7 +73,7 @@ func WithEncoderCRC(b bool) EOption { } // WithEncoderConcurrency will set the concurrency, -// meaning the maximum number of decoders to run concurrently. +// meaning the maximum number of encoders to run concurrently. // The value supplied must be at least 1. // By default this will be set to GOMAXPROCS. func WithEncoderConcurrency(n int) EOption { diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index fc4a566d3..693c5f05d 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -121,7 +121,7 @@ func (d *frameDec) reset(br byteBuffer) error { d.SingleSegment = fhd&(1<<5) != 0 if fhd&(1<<3) != 0 { - return errors.New("Reserved bit set on frame header") + return errors.New("reserved bit set on frame header") } // Read Window_Descriptor diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go index b80709d5e..c74681b99 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -708,7 +708,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) { im := int32((nbBitsOut << 16) - first.deltaNbBits) lu := (im >> nbBitsOut) + int32(first.deltaFindState) c.state = c.stateTable[lu] - return } // encode the output symbol provided and write it to the bitstream. diff --git a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go index 6c17dc17f..474cb77d2 100644 --- a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go +++ b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go @@ -59,7 +59,7 @@ func fillBase(dst []baseOffset, base uint32, bits ...uint8) { } for i, bit := range bits { if base > math.MaxInt32 { - panic(fmt.Sprintf("invalid decoding table, base overflows int32")) + panic("invalid decoding table, base overflows int32") } dst[i] = baseOffset{ diff --git a/vendor/github.com/klauspost/compress/zstd/seqenc.go b/vendor/github.com/klauspost/compress/zstd/seqenc.go index 36bcc3cc0..8014174a7 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqenc.go +++ b/vendor/github.com/klauspost/compress/zstd/seqenc.go @@ -35,7 +35,6 @@ func (s *seqCoders) setPrev(ll, ml, of *fseEncoder) { // Ensure we cannot reuse by accident prevEnc := *prev prevEnc.symbolLen = 0 - return } compareSwap(ll, &s.llEnc, &s.llPrev) compareSwap(ml, &s.mlEnc, &s.mlPrev) diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go index c95fe5111..9d9d1d567 100644 --- a/vendor/github.com/klauspost/compress/zstd/snappy.go +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -10,8 +10,8 @@ import ( "hash/crc32" "io" + "github.com/golang/snappy" "github.com/klauspost/compress/huff0" - "github.com/klauspost/compress/snappy" ) const ( @@ -185,7 +185,6 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { r.block.reset(nil) r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen]) if err != nil { - println("snappy.Decode:", err) return written, err } err = r.block.encodeLits(r.block.literals, false) diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go index 9056beef2..1ba308c8b 100644 --- a/vendor/github.com/klauspost/compress/zstd/zstd.go +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -5,6 +5,7 @@ package zstd import ( "bytes" + "encoding/binary" "errors" "log" "math" @@ -126,26 +127,15 @@ func matchLen(a, b []byte) int { } func load3232(b []byte, i int32) uint32 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:4] - return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 + return binary.LittleEndian.Uint32(b[i:]) } func load6432(b []byte, i int32) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } func load64(b []byte, i int) uint64 { - // Help the compiler eliminate bounds checks on the read so it can be done in a single read. - b = b[i:] - b = b[:8] - return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | - uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + return binary.LittleEndian.Uint64(b[i:]) } type byter interface { diff --git a/vendor/modules.txt b/vendor/modules.txt index 51d4f53ca..498f7a443 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -188,7 +188,7 @@ github.com/containers/psgo/internal/dev github.com/containers/psgo/internal/host github.com/containers/psgo/internal/proc github.com/containers/psgo/internal/process -# github.com/containers/storage v1.29.0 +# github.com/containers/storage v1.30.0 github.com/containers/storage github.com/containers/storage/drivers github.com/containers/storage/drivers/aufs @@ -345,6 +345,8 @@ github.com/golang/protobuf/ptypes github.com/golang/protobuf/ptypes/any github.com/golang/protobuf/ptypes/duration github.com/golang/protobuf/ptypes/timestamp +# github.com/golang/snappy v0.0.3 +github.com/golang/snappy # github.com/google/go-cmp v0.5.5 github.com/google/go-cmp/cmp github.com/google/go-cmp/cmp/internal/diff @@ -384,11 +386,10 @@ github.com/json-iterator/go # github.com/juju/ansiterm v0.0.0-20180109212912-720a0952cc2a github.com/juju/ansiterm github.com/juju/ansiterm/tabwriter -# github.com/klauspost/compress v1.11.13 +# github.com/klauspost/compress v1.12.1 github.com/klauspost/compress/flate github.com/klauspost/compress/fse github.com/klauspost/compress/huff0 -github.com/klauspost/compress/snappy github.com/klauspost/compress/zstd github.com/klauspost/compress/zstd/internal/xxhash # github.com/klauspost/pgzip v1.2.5 -- cgit v1.2.3-54-g00ecf