summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s')
-rw-r--r--vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s622
1 files changed, 397 insertions, 225 deletions
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
index 2585b2e98..212c6cac3 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -705,60 +705,55 @@ sequenceDecs_decode_bmi2_fill_2_end:
MOVQ CX, (R9)
// Fill bitreader for state updates
- MOVQ R13, (SP)
- MOVQ $0x00000808, CX
- BEXTRQ CX, R8, R13
- MOVQ ctx+16(FP), CX
- CMPQ 96(CX), $0x00
- JZ sequenceDecs_decode_bmi2_skip_update
-
- // Update Literal Length State
- MOVBQZX SI, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, SI, SI
+ MOVQ R13, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R13
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decode_bmi2_skip_update
+ LEAQ (SI)(DI*1), R14
+ ADDQ R8, R14
+ MOVBQZX R14, R14
LEAQ (DX)(R14*1), CX
MOVQ AX, R15
MOVQ CX, DX
ROLQ CL, R15
BZHIQ R14, R15, R15
- ADDQ R15, SI
- // Load ctx.llTable
+ // Update Offset State
+ BZHIQ R8, R15, CX
+ SHRXQ R8, R15, R15
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, R8, R8
+ ADDQ CX, R8
+
+ // Load ctx.ofTable
MOVQ ctx+16(FP), CX
- MOVQ (CX), CX
- MOVQ (CX)(SI*8), SI
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
// Update Match Length State
- MOVBQZX DI, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, DI, DI
- LEAQ (DX)(R14*1), CX
- MOVQ AX, R15
- MOVQ CX, DX
- ROLQ CL, R15
- BZHIQ R14, R15, R15
- ADDQ R15, DI
+ BZHIQ DI, R15, CX
+ SHRXQ DI, R15, R15
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, DI, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
- // Update Offset State
- MOVBQZX R8, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, R8, R8
- LEAQ (DX)(R14*1), CX
- MOVQ AX, R15
- MOVQ CX, DX
- ROLQ CL, R15
- BZHIQ R14, R15, R15
- ADDQ R15, R8
+ // Update Literal Length State
+ BZHIQ SI, R15, CX
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, SI, SI
+ ADDQ CX, SI
- // Load ctx.ofTable
+ // Load ctx.llTable
MOVQ ctx+16(FP), CX
- MOVQ 48(CX), CX
- MOVQ (CX)(R8*8), R8
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
sequenceDecs_decode_bmi2_skip_update:
// Adjust offset
@@ -965,60 +960,55 @@ sequenceDecs_decode_56_bmi2_fill_end:
MOVQ CX, (R9)
// Fill bitreader for state updates
- MOVQ R13, (SP)
- MOVQ $0x00000808, CX
- BEXTRQ CX, R8, R13
- MOVQ ctx+16(FP), CX
- CMPQ 96(CX), $0x00
- JZ sequenceDecs_decode_56_bmi2_skip_update
-
- // Update Literal Length State
- MOVBQZX SI, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, SI, SI
+ MOVQ R13, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R13
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decode_56_bmi2_skip_update
+ LEAQ (SI)(DI*1), R14
+ ADDQ R8, R14
+ MOVBQZX R14, R14
LEAQ (DX)(R14*1), CX
MOVQ AX, R15
MOVQ CX, DX
ROLQ CL, R15
BZHIQ R14, R15, R15
- ADDQ R15, SI
- // Load ctx.llTable
+ // Update Offset State
+ BZHIQ R8, R15, CX
+ SHRXQ R8, R15, R15
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, R8, R8
+ ADDQ CX, R8
+
+ // Load ctx.ofTable
MOVQ ctx+16(FP), CX
- MOVQ (CX), CX
- MOVQ (CX)(SI*8), SI
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
// Update Match Length State
- MOVBQZX DI, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, DI, DI
- LEAQ (DX)(R14*1), CX
- MOVQ AX, R15
- MOVQ CX, DX
- ROLQ CL, R15
- BZHIQ R14, R15, R15
- ADDQ R15, DI
+ BZHIQ DI, R15, CX
+ SHRXQ DI, R15, R15
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, DI, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
- // Update Offset State
- MOVBQZX R8, R14
- MOVQ $0x00001010, CX
- BEXTRQ CX, R8, R8
- LEAQ (DX)(R14*1), CX
- MOVQ AX, R15
- MOVQ CX, DX
- ROLQ CL, R15
- BZHIQ R14, R15, R15
- ADDQ R15, R8
+ // Update Literal Length State
+ BZHIQ SI, R15, CX
+ MOVQ $0x00001010, R14
+ BEXTRQ R14, SI, SI
+ ADDQ CX, SI
- // Load ctx.ofTable
+ // Load ctx.llTable
MOVQ ctx+16(FP), CX
- MOVQ 48(CX), CX
- MOVQ (CX)(R8*8), R8
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
sequenceDecs_decode_56_bmi2_skip_update:
// Adjust offset
@@ -1171,6 +1161,228 @@ main_loop:
TESTQ R11, R11
JZ check_offset
XORQ R14, R14
+
+copy_1:
+ MOVUPS (SI)(R14*1), X0
+ MOVUPS X0, (BX)(R14*1)
+ ADDQ $0x10, R14
+ CMPQ R14, R11
+ JB copy_1
+ ADDQ R11, SI
+ ADDQ R11, BX
+ ADDQ R11, DI
+
+ // Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
+check_offset:
+ LEAQ (DI)(R10*1), R11
+ CMPQ R12, R11
+ JG error_match_off_too_big
+ CMPQ R12, R8
+ JG error_match_off_too_big
+
+ // Copy match from history
+ MOVQ R12, R11
+ SUBQ DI, R11
+ JLS copy_match
+ MOVQ R9, R14
+ SUBQ R11, R14
+ CMPQ R13, R11
+ JGE copy_all_from_history
+ XORQ R11, R11
+ TESTQ $0x00000001, R13
+ JZ copy_4_word
+ MOVB (R14)(R11*1), R12
+ MOVB R12, (BX)(R11*1)
+ ADDQ $0x01, R11
+
+copy_4_word:
+ TESTQ $0x00000002, R13
+ JZ copy_4_dword
+ MOVW (R14)(R11*1), R12
+ MOVW R12, (BX)(R11*1)
+ ADDQ $0x02, R11
+
+copy_4_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_4_qword
+ MOVL (R14)(R11*1), R12
+ MOVL R12, (BX)(R11*1)
+ ADDQ $0x04, R11
+
+copy_4_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_4_test
+ MOVQ (R14)(R11*1), R12
+ MOVQ R12, (BX)(R11*1)
+ ADDQ $0x08, R11
+ JMP copy_4_test
+
+copy_4:
+ MOVUPS (R14)(R11*1), X0
+ MOVUPS X0, (BX)(R11*1)
+ ADDQ $0x10, R11
+
+copy_4_test:
+ CMPQ R11, R13
+ JB copy_4
+ ADDQ R13, DI
+ ADDQ R13, BX
+ ADDQ $0x18, AX
+ INCQ DX
+ CMPQ DX, CX
+ JB main_loop
+ JMP loop_finished
+
+copy_all_from_history:
+ XORQ R15, R15
+ TESTQ $0x00000001, R11
+ JZ copy_5_word
+ MOVB (R14)(R15*1), BP
+ MOVB BP, (BX)(R15*1)
+ ADDQ $0x01, R15
+
+copy_5_word:
+ TESTQ $0x00000002, R11
+ JZ copy_5_dword
+ MOVW (R14)(R15*1), BP
+ MOVW BP, (BX)(R15*1)
+ ADDQ $0x02, R15
+
+copy_5_dword:
+ TESTQ $0x00000004, R11
+ JZ copy_5_qword
+ MOVL (R14)(R15*1), BP
+ MOVL BP, (BX)(R15*1)
+ ADDQ $0x04, R15
+
+copy_5_qword:
+ TESTQ $0x00000008, R11
+ JZ copy_5_test
+ MOVQ (R14)(R15*1), BP
+ MOVQ BP, (BX)(R15*1)
+ ADDQ $0x08, R15
+ JMP copy_5_test
+
+copy_5:
+ MOVUPS (R14)(R15*1), X0
+ MOVUPS X0, (BX)(R15*1)
+ ADDQ $0x10, R15
+
+copy_5_test:
+ CMPQ R15, R11
+ JB copy_5
+ ADDQ R11, BX
+ ADDQ R11, DI
+ SUBQ R11, R13
+
+ // Copy match from the current buffer
+copy_match:
+ TESTQ R13, R13
+ JZ handle_loop
+ MOVQ BX, R11
+ SUBQ R12, R11
+
+ // ml <= mo
+ CMPQ R13, R12
+ JA copy_overlapping_match
+
+ // Copy non-overlapping match
+ ADDQ R13, DI
+ MOVQ BX, R12
+ ADDQ R13, BX
+
+copy_2:
+ MOVUPS (R11), X0
+ MOVUPS X0, (R12)
+ ADDQ $0x10, R11
+ ADDQ $0x10, R12
+ SUBQ $0x10, R13
+ JHI copy_2
+ JMP handle_loop
+
+ // Copy overlapping match
+copy_overlapping_match:
+ ADDQ R13, DI
+
+copy_slow_3:
+ MOVB (R11), R12
+ MOVB R12, (BX)
+ INCQ R11
+ INCQ BX
+ DECQ R13
+ JNZ copy_slow_3
+
+handle_loop:
+ ADDQ $0x18, AX
+ INCQ DX
+ CMPQ DX, CX
+ JB main_loop
+
+loop_finished:
+ // Return value
+ MOVB $0x01, ret+8(FP)
+
+ // Update the context
+ MOVQ ctx+0(FP), AX
+ MOVQ DX, 24(AX)
+ MOVQ DI, 104(AX)
+ MOVQ 80(AX), CX
+ SUBQ CX, SI
+ MOVQ SI, 112(AX)
+ RET
+
+error_match_off_too_big:
+ // Return value
+ MOVB $0x00, ret+8(FP)
+
+ // Update the context
+ MOVQ ctx+0(FP), AX
+ MOVQ DX, 24(AX)
+ MOVQ DI, 104(AX)
+ MOVQ 80(AX), CX
+ SUBQ CX, SI
+ MOVQ SI, 112(AX)
+ RET
+
+empty_seqs:
+ // Return value
+ MOVB $0x01, ret+8(FP)
+ RET
+
+// func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
+// Requires: SSE
+TEXT ·sequenceDecs_executeSimple_safe_amd64(SB), $8-9
+ MOVQ ctx+0(FP), R10
+ MOVQ 8(R10), CX
+ TESTQ CX, CX
+ JZ empty_seqs
+ MOVQ (R10), AX
+ MOVQ 24(R10), DX
+ MOVQ 32(R10), BX
+ MOVQ 80(R10), SI
+ MOVQ 104(R10), DI
+ MOVQ 120(R10), R8
+ MOVQ 56(R10), R9
+ MOVQ 64(R10), R10
+ ADDQ R10, R9
+
+ // seqsBase += 24 * seqIndex
+ LEAQ (DX)(DX*2), R11
+ SHLQ $0x03, R11
+ ADDQ R11, AX
+
+ // outBase += outPosition
+ ADDQ DI, BX
+
+main_loop:
+ MOVQ (AX), R11
+ MOVQ 16(AX), R12
+ MOVQ 8(AX), R13
+
+ // Copy literals
+ TESTQ R11, R11
+ JZ check_offset
+ XORQ R14, R14
TESTQ $0x00000001, R11
JZ copy_1_word
MOVB (SI)(R14*1), R15
@@ -1326,18 +1538,46 @@ copy_match:
JA copy_overlapping_match
// Copy non-overlapping match
- ADDQ R13, DI
- MOVQ BX, R12
- ADDQ R13, BX
+ ADDQ R13, DI
+ XORQ R12, R12
+ TESTQ $0x00000001, R13
+ JZ copy_2_word
+ MOVB (R11)(R12*1), R14
+ MOVB R14, (BX)(R12*1)
+ ADDQ $0x01, R12
+
+copy_2_word:
+ TESTQ $0x00000002, R13
+ JZ copy_2_dword
+ MOVW (R11)(R12*1), R14
+ MOVW R14, (BX)(R12*1)
+ ADDQ $0x02, R12
+
+copy_2_dword:
+ TESTQ $0x00000004, R13
+ JZ copy_2_qword
+ MOVL (R11)(R12*1), R14
+ MOVL R14, (BX)(R12*1)
+ ADDQ $0x04, R12
+
+copy_2_qword:
+ TESTQ $0x00000008, R13
+ JZ copy_2_test
+ MOVQ (R11)(R12*1), R14
+ MOVQ R14, (BX)(R12*1)
+ ADDQ $0x08, R12
+ JMP copy_2_test
copy_2:
- MOVUPS (R11), X0
- MOVUPS X0, (R12)
- ADDQ $0x10, R11
+ MOVUPS (R11)(R12*1), X0
+ MOVUPS X0, (BX)(R12*1)
ADDQ $0x10, R12
- SUBQ $0x10, R13
- JHI copy_2
- JMP handle_loop
+
+copy_2_test:
+ CMPQ R12, R13
+ JB copy_2
+ ADDQ R13, BX
+ JMP handle_loop
// Copy overlapping match
copy_overlapping_match:
@@ -1673,45 +1913,16 @@ sequenceDecs_decodeSync_amd64_match_len_ofs_ok:
TESTQ AX, AX
JZ check_offset
XORQ R14, R14
- TESTQ $0x00000001, AX
- JZ copy_1_word
- MOVB (R11)(R14*1), R15
- MOVB R15, (R10)(R14*1)
- ADDQ $0x01, R14
-
-copy_1_word:
- TESTQ $0x00000002, AX
- JZ copy_1_dword
- MOVW (R11)(R14*1), R15
- MOVW R15, (R10)(R14*1)
- ADDQ $0x02, R14
-
-copy_1_dword:
- TESTQ $0x00000004, AX
- JZ copy_1_qword
- MOVL (R11)(R14*1), R15
- MOVL R15, (R10)(R14*1)
- ADDQ $0x04, R14
-
-copy_1_qword:
- TESTQ $0x00000008, AX
- JZ copy_1_test
- MOVQ (R11)(R14*1), R15
- MOVQ R15, (R10)(R14*1)
- ADDQ $0x08, R14
- JMP copy_1_test
copy_1:
MOVUPS (R11)(R14*1), X0
MOVUPS X0, (R10)(R14*1)
ADDQ $0x10, R14
-
-copy_1_test:
- CMPQ R14, AX
- JB copy_1
- ADDQ AX, R11
- ADDQ AX, R10
- ADDQ AX, R12
+ CMPQ R14, AX
+ JB copy_1
+ ADDQ AX, R11
+ ADDQ AX, R10
+ ADDQ AX, R12
// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
check_offset:
@@ -2044,60 +2255,55 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
MOVQ CX, 24(SP)
// Fill bitreader for state updates
- MOVQ R12, (SP)
- MOVQ $0x00000808, CX
- BEXTRQ CX, R8, R12
- MOVQ ctx+16(FP), CX
- CMPQ 96(CX), $0x00
- JZ sequenceDecs_decodeSync_bmi2_skip_update
-
- // Update Literal Length State
- MOVBQZX SI, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, SI, SI
+ MOVQ R12, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R12
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decodeSync_bmi2_skip_update
+ LEAQ (SI)(DI*1), R13
+ ADDQ R8, R13
+ MOVBQZX R13, R13
LEAQ (DX)(R13*1), CX
MOVQ AX, R14
MOVQ CX, DX
ROLQ CL, R14
BZHIQ R13, R14, R14
- ADDQ R14, SI
- // Load ctx.llTable
+ // Update Offset State
+ BZHIQ R8, R14, CX
+ SHRXQ R8, R14, R14
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, R8, R8
+ ADDQ CX, R8
+
+ // Load ctx.ofTable
MOVQ ctx+16(FP), CX
- MOVQ (CX), CX
- MOVQ (CX)(SI*8), SI
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
// Update Match Length State
- MOVBQZX DI, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, DI, DI
- LEAQ (DX)(R13*1), CX
- MOVQ AX, R14
- MOVQ CX, DX
- ROLQ CL, R14
- BZHIQ R13, R14, R14
- ADDQ R14, DI
+ BZHIQ DI, R14, CX
+ SHRXQ DI, R14, R14
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, DI, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
- // Update Offset State
- MOVBQZX R8, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, R8, R8
- LEAQ (DX)(R13*1), CX
- MOVQ AX, R14
- MOVQ CX, DX
- ROLQ CL, R14
- BZHIQ R13, R14, R14
- ADDQ R14, R8
+ // Update Literal Length State
+ BZHIQ SI, R14, CX
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, SI, SI
+ ADDQ CX, SI
- // Load ctx.ofTable
+ // Load ctx.llTable
MOVQ ctx+16(FP), CX
- MOVQ 48(CX), CX
- MOVQ (CX)(R8*8), R8
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
sequenceDecs_decodeSync_bmi2_skip_update:
// Adjust offset
@@ -2180,45 +2386,16 @@ sequenceDecs_decodeSync_bmi2_match_len_ofs_ok:
TESTQ CX, CX
JZ check_offset
XORQ R14, R14
- TESTQ $0x00000001, CX
- JZ copy_1_word
- MOVB (R10)(R14*1), R15
- MOVB R15, (R9)(R14*1)
- ADDQ $0x01, R14
-
-copy_1_word:
- TESTQ $0x00000002, CX
- JZ copy_1_dword
- MOVW (R10)(R14*1), R15
- MOVW R15, (R9)(R14*1)
- ADDQ $0x02, R14
-
-copy_1_dword:
- TESTQ $0x00000004, CX
- JZ copy_1_qword
- MOVL (R10)(R14*1), R15
- MOVL R15, (R9)(R14*1)
- ADDQ $0x04, R14
-
-copy_1_qword:
- TESTQ $0x00000008, CX
- JZ copy_1_test
- MOVQ (R10)(R14*1), R15
- MOVQ R15, (R9)(R14*1)
- ADDQ $0x08, R14
- JMP copy_1_test
copy_1:
MOVUPS (R10)(R14*1), X0
MOVUPS X0, (R9)(R14*1)
ADDQ $0x10, R14
-
-copy_1_test:
- CMPQ R14, CX
- JB copy_1
- ADDQ CX, R10
- ADDQ CX, R9
- ADDQ CX, R11
+ CMPQ R14, CX
+ JB copy_1
+ ADDQ CX, R10
+ ADDQ CX, R9
+ ADDQ CX, R11
// Malformed input if seq.mo > t+len(hist) || seq.mo > s.windowSize)
check_offset:
@@ -3108,60 +3285,55 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
MOVQ CX, 24(SP)
// Fill bitreader for state updates
- MOVQ R12, (SP)
- MOVQ $0x00000808, CX
- BEXTRQ CX, R8, R12
- MOVQ ctx+16(FP), CX
- CMPQ 96(CX), $0x00
- JZ sequenceDecs_decodeSync_safe_bmi2_skip_update
-
- // Update Literal Length State
- MOVBQZX SI, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, SI, SI
+ MOVQ R12, (SP)
+ MOVQ $0x00000808, CX
+ BEXTRQ CX, R8, R12
+ MOVQ ctx+16(FP), CX
+ CMPQ 96(CX), $0x00
+ JZ sequenceDecs_decodeSync_safe_bmi2_skip_update
+ LEAQ (SI)(DI*1), R13
+ ADDQ R8, R13
+ MOVBQZX R13, R13
LEAQ (DX)(R13*1), CX
MOVQ AX, R14
MOVQ CX, DX
ROLQ CL, R14
BZHIQ R13, R14, R14
- ADDQ R14, SI
- // Load ctx.llTable
+ // Update Offset State
+ BZHIQ R8, R14, CX
+ SHRXQ R8, R14, R14
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, R8, R8
+ ADDQ CX, R8
+
+ // Load ctx.ofTable
MOVQ ctx+16(FP), CX
- MOVQ (CX), CX
- MOVQ (CX)(SI*8), SI
+ MOVQ 48(CX), CX
+ MOVQ (CX)(R8*8), R8
// Update Match Length State
- MOVBQZX DI, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, DI, DI
- LEAQ (DX)(R13*1), CX
- MOVQ AX, R14
- MOVQ CX, DX
- ROLQ CL, R14
- BZHIQ R13, R14, R14
- ADDQ R14, DI
+ BZHIQ DI, R14, CX
+ SHRXQ DI, R14, R14
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, DI, DI
+ ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
MOVQ 24(CX), CX
MOVQ (CX)(DI*8), DI
- // Update Offset State
- MOVBQZX R8, R13
- MOVQ $0x00001010, CX
- BEXTRQ CX, R8, R8
- LEAQ (DX)(R13*1), CX
- MOVQ AX, R14
- MOVQ CX, DX
- ROLQ CL, R14
- BZHIQ R13, R14, R14
- ADDQ R14, R8
+ // Update Literal Length State
+ BZHIQ SI, R14, CX
+ MOVQ $0x00001010, R13
+ BEXTRQ R13, SI, SI
+ ADDQ CX, SI
- // Load ctx.ofTable
+ // Load ctx.llTable
MOVQ ctx+16(FP), CX
- MOVQ 48(CX), CX
- MOVQ (CX)(R8*8), R8
+ MOVQ (CX), CX
+ MOVQ (CX)(SI*8), SI
sequenceDecs_decodeSync_safe_bmi2_skip_update:
// Adjust offset