diff options
Diffstat (limited to 'vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in')
-rw-r--r-- | vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in | 197 |
1 files changed, 0 insertions, 197 deletions
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in b/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in deleted file mode 100644 index 6d477a2c1..000000000 --- a/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in +++ /dev/null @@ -1,197 +0,0 @@ -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" -#include "funcdata.h" -#include "go_asm.h" - - -#define bufoff 256 // see decompress.go, we're using [4][256]byte table - -//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, -// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool) -TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8 -#define off R8 -#define buffer DI -#define table SI - -#define br_bits_read R9 -#define br_value R10 -#define br_offset R11 -#define peek_bits R12 -#define exhausted DX - -#define br0 R13 -#define br1 R14 -#define br2 R15 -#define br3 BP - - MOVQ BP, 0(SP) - - XORQ exhausted, exhausted // exhausted = false - XORQ off, off // off = 0 - - MOVBQZX peekBits+32(FP), peek_bits - MOVQ buf+40(FP), buffer - MOVQ tbl+48(FP), table - - MOVQ pbr0+0(FP), br0 - MOVQ pbr1+8(FP), br1 - MOVQ pbr2+16(FP), br2 - MOVQ pbr3+24(FP), br3 - -main_loop: -{{ define "decode_2_values_x86" }} - // const stream = {{ var "id" }} - // br{{ var "id"}}.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read - MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value - MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset - - // if b.bitsRead >= 32 { - CMPQ br_bits_read, $32 - JB skip_fill{{ var "id" }} - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br{{ var "id" }}), AX - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVQ br_bits_read, CX - SHLQ CL, AX - ORQ AX, br_value - - // exhausted = exhausted || (br{{ var "id"}}.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH - // } -skip_fill{{ var "id" }}: - - // val0 := br{{ var "id"}}.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br{{ var "id"}}.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val1 := br{{ var "id"}}.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br{{ var "id"}}.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - - // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, {{ var "bufofs" }}(buffer)(off*1) - - // SECOND PART: - // val2 := br{{ var "id"}}.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v2 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br{{ var "id"}}.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val3 := br{{ var "id"}}.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v3 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br{{ var "id"}}.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - - // these two writes get coalesced - // buf[stream][off+2] = uint8(v2.entry >> 8) - // buf[stream][off+3] = uint8(v3.entry >> 8) - MOVW BX, {{ var "bufofs" }}+2(buffer)(off*1) - - // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }}) - MOVQ br_value, bitReaderShifted_value(br{{ var "id" }}) - MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }}) -{{ end }} - - {{ set "id" "0" }} - {{ set "ofs" "0" }} - {{ set "bufofs" "0" }} {{/* id * bufoff */}} - {{ template "decode_2_values_x86" . }} - - {{ set "id" "1" }} - {{ set "ofs" "8" }} - {{ set "bufofs" "256" }} - {{ template "decode_2_values_x86" . }} - - {{ set "id" "2" }} - {{ set "ofs" "16" }} - {{ set "bufofs" "512" }} - {{ template "decode_2_values_x86" . }} - - {{ set "id" "3" }} - {{ set "ofs" "24" }} - {{ set "bufofs" "768" }} - {{ template "decode_2_values_x86" . }} - - ADDQ $4, off // off += 2 - - TESTB DH, DH // any br[i].ofs < 4? - JNZ end - - CMPQ off, $bufoff - JL main_loop -end: - MOVQ 0(SP), BP - - MOVB off, ret+56(FP) - RET -#undef off -#undef buffer -#undef table - -#undef br_bits_read -#undef br_value -#undef br_offset -#undef peek_bits -#undef exhausted - -#undef br0 -#undef br1 -#undef br2 -#undef br3 |