aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
blob: da32b4420e3a6121249c0a1354b98791632332d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.

//go:build !appengine && !noasm && gc && !noasm
// +build !appengine,!noasm,gc,!noasm

// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
TEXT ·buildDtable_asm(SB), $0-24
	MOVQ ctx+8(FP), CX
	MOVQ s+0(FP), DI

	// Load values
	MOVBQZX 4098(DI), DX
	XORQ    AX, AX
	BTSQ    DX, AX
	MOVQ    (CX), BX
	MOVQ    16(CX), SI
	LEAQ    -1(AX), R8
	MOVQ    8(CX), CX
	MOVWQZX 4096(DI), DI

	// End load values
	// Init, lay down lowprob symbols
	XORQ R9, R9
	JMP  init_main_loop_condition

init_main_loop:
	MOVWQSX (CX)(R9*2), R10
	CMPW    R10, $-1
	JNE     do_not_update_high_threshold
	MOVB    R9, 1(SI)(R8*8)
	DECQ    R8
	MOVQ    $0x0000000000000001, R10

do_not_update_high_threshold:
	MOVW R10, (BX)(R9*2)
	INCQ R9

init_main_loop_condition:
	CMPQ R9, DI
	JL   init_main_loop

	// Spread symbols
	// Calculate table step
	MOVQ AX, R9
	SHRQ $0x01, R9
	MOVQ AX, R10
	SHRQ $0x03, R10
	LEAQ 3(R9)(R10*1), R9

	// Fill add bits values
	LEAQ -1(AX), R10
	XORQ R11, R11
	XORQ R12, R12
	JMP  spread_main_loop_condition

spread_main_loop:
	XORQ    R13, R13
	MOVWQSX (CX)(R12*2), R14
	JMP     spread_inner_loop_condition

spread_inner_loop:
	MOVB R12, 1(SI)(R11*8)

adjust_position:
	ADDQ R9, R11
	ANDQ R10, R11
	CMPQ R11, R8
	JG   adjust_position
	INCQ R13

spread_inner_loop_condition:
	CMPQ R13, R14
	JL   spread_inner_loop
	INCQ R12

spread_main_loop_condition:
	CMPQ  R12, DI
	JL    spread_main_loop
	TESTQ R11, R11
	JZ    spread_check_ok
	MOVQ  ctx+8(FP), AX
	MOVQ  R11, 24(AX)
	MOVQ  $+1, ret+16(FP)
	RET

spread_check_ok:
	// Build Decoding table
	XORQ DI, DI

build_table_main_table:
	MOVBQZX 1(SI)(DI*8), CX
	MOVWQZX (BX)(CX*2), R8
	LEAQ    1(R8), R9
	MOVW    R9, (BX)(CX*2)
	MOVQ    R8, R9
	BSRQ    R9, R9
	MOVQ    DX, CX
	SUBQ    R9, CX
	SHLQ    CL, R8
	SUBQ    AX, R8
	MOVB    CL, (SI)(DI*8)
	MOVW    R8, 2(SI)(DI*8)
	CMPQ    R8, AX
	JLE     build_table_check1_ok
	MOVQ    ctx+8(FP), CX
	MOVQ    R8, 24(CX)
	MOVQ    AX, 32(CX)
	MOVQ    $+2, ret+16(FP)
	RET

build_table_check1_ok:
	TESTB CL, CL
	JNZ   build_table_check2_ok
	CMPW  R8, DI
	JNE   build_table_check2_ok
	MOVQ  ctx+8(FP), AX
	MOVQ  R8, 24(AX)
	MOVQ  DI, 32(AX)
	MOVQ  $+3, ret+16(FP)
	RET

build_table_check2_ok:
	INCQ DI
	CMPQ DI, AX
	JL   build_table_main_table
	MOVQ $+0, ret+16(FP)
	RET