summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/huff0/decompress_amd64.go')
-rw-r--r--vendor/github.com/klauspost/compress/huff0/decompress_amd64.go95
1 files changed, 31 insertions, 64 deletions
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
index ce8e93bcd..3415e5da2 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
@@ -13,19 +13,30 @@ import (
// decompress4x_main_loop_x86 is an x86 assembler implementation
// of Decompress4X when tablelog > 8.
//go:noescape
-func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
- peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
+func decompress4x_main_loop_amd64(ctx *decompress4xContext)
// decompress4x_8b_loop_x86 is an x86 assembler implementation
// of Decompress4X when tablelog <= 8 which decodes 4 entries
// per loop.
//go:noescape
-func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
- peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
+func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
// fallback8BitSize is the size where using Go version is faster.
const fallback8BitSize = 800
+type decompress4xContext struct {
+ pbr0 *bitReaderShifted
+ pbr1 *bitReaderShifted
+ pbr2 *bitReaderShifted
+ pbr3 *bitReaderShifted
+ peekBits uint8
+ out *byte
+ dstEvery int
+ tbl *dEntrySingle
+ decoded int
+ limit *byte
+}
+
// Decompress4X will decompress a 4X encoded stream.
// The length of the supplied input must match the end of a block exactly.
// The *capacity* of the dst slice must match the destination size of
@@ -42,6 +53,7 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
if cap(dst) < fallback8BitSize && use8BitTables {
return d.decompress4X8bit(dst, src)
}
+
var br [4]bitReaderShifted
// Decode "jump table"
start := 6
@@ -71,70 +83,28 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
- // Use temp table to avoid bound checks/append penalty.
- buf := d.buffer()
- var off uint8
var decoded int
- const debug = false
-
- // see: bitReaderShifted.peekBitsFast()
- peekBits := uint8((64 - d.actualTableLog) & 63)
-
- // Decode 2 values from each decoder/loop.
- const bufoff = 256
- for {
- if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
- break
+ if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) {
+ ctx := decompress4xContext{
+ pbr0: &br[0],
+ pbr1: &br[1],
+ pbr2: &br[2],
+ pbr3: &br[3],
+ peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast()
+ out: &out[0],
+ dstEvery: dstEvery,
+ tbl: &single[0],
+ limit: &out[dstEvery-4], // Always stop decoding when first buffer gets here to avoid writing OOB on last.
}
-
if use8BitTables {
- off = decompress4x_8b_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
+ decompress4x_8b_main_loop_amd64(&ctx)
} else {
- off = decompress4x_main_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
- }
- if debug {
- fmt.Print("DEBUG: ")
- fmt.Printf("off=%d,", off)
- for i := 0; i < 4; i++ {
- fmt.Printf(" br[%d]={bitsRead=%d, value=%x, off=%d}",
- i, br[i].bitsRead, br[i].value, br[i].off)
- }
- fmt.Println("")
- }
-
- if off != 0 {
- break
+ decompress4x_main_loop_amd64(&ctx)
}
- if bufoff > dstEvery {
- d.bufs.Put(buf)
- return nil, errors.New("corruption detected: stream overrun 1")
- }
- copy(out, buf[0][:])
- copy(out[dstEvery:], buf[1][:])
- copy(out[dstEvery*2:], buf[2][:])
- copy(out[dstEvery*3:], buf[3][:])
- out = out[bufoff:]
- decoded += bufoff * 4
- // There must at least be 3 buffers left.
- if len(out) < dstEvery*3 {
- d.bufs.Put(buf)
- return nil, errors.New("corruption detected: stream overrun 2")
- }
- }
- if off > 0 {
- ioff := int(off)
- if len(out) < dstEvery*3+ioff {
- d.bufs.Put(buf)
- return nil, errors.New("corruption detected: stream overrun 3")
- }
- copy(out, buf[0][:off])
- copy(out[dstEvery:], buf[1][:off])
- copy(out[dstEvery*2:], buf[2][:off])
- copy(out[dstEvery*3:], buf[3][:off])
- decoded += int(off) * 4
- out = out[off:]
+ decoded = ctx.decoded
+ out = out[decoded/4:]
}
// Decode remaining.
@@ -150,7 +120,6 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
for bitsLeft > 0 {
br.fill()
if offset >= endsAt {
- d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 4")
}
@@ -164,7 +133,6 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
offset++
}
if offset != endsAt {
- d.bufs.Put(buf)
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
}
decoded += offset - dstEvery*i
@@ -173,7 +141,6 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
return nil, err
}
}
- d.bufs.Put(buf)
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}