summaryrefslogtreecommitdiff
path: root/vendor/github.com/bits-and-blooms/bitset/popcnt_amd64.s
blob: 666c0dcc17f5a32db55a71c69246cfeabf58e664 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
// +build !go1.9
// +build amd64,!appengine

TEXT ·hasAsm(SB),4,$0-1
MOVQ $1, AX
CPUID
SHRQ $23, CX
ANDQ $1, CX
MOVB CX, ret+0(FP)
RET

#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2

TEXT ·popcntSliceAsm(SB),4,$0-32
XORQ	AX, AX
MOVQ	s+0(FP), SI
MOVQ	s_len+8(FP), CX
TESTQ	CX, CX
JZ		popcntSliceEnd
popcntSliceLoop:
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
ADDQ	DX, AX
ADDQ	$8, SI
LOOP	popcntSliceLoop
popcntSliceEnd:
MOVQ	AX, ret+24(FP)
RET

TEXT ·popcntMaskSliceAsm(SB),4,$0-56
XORQ	AX, AX
MOVQ	s+0(FP), SI
MOVQ	s_len+8(FP), CX
TESTQ	CX, CX
JZ		popcntMaskSliceEnd
MOVQ	m+24(FP), DI
popcntMaskSliceLoop:
MOVQ	(DI), DX
NOTQ	DX
ANDQ	(SI), DX
POPCNTQ_DX_DX
ADDQ	DX, AX
ADDQ	$8, SI
ADDQ	$8, DI
LOOP	popcntMaskSliceLoop
popcntMaskSliceEnd:
MOVQ	AX, ret+48(FP)
RET

TEXT ·popcntAndSliceAsm(SB),4,$0-56
XORQ	AX, AX
MOVQ	s+0(FP), SI
MOVQ	s_len+8(FP), CX
TESTQ	CX, CX
JZ		popcntAndSliceEnd
MOVQ	m+24(FP), DI
popcntAndSliceLoop:
MOVQ	(DI), DX
ANDQ	(SI), DX
POPCNTQ_DX_DX
ADDQ	DX, AX
ADDQ	$8, SI
ADDQ	$8, DI
LOOP	popcntAndSliceLoop
popcntAndSliceEnd:
MOVQ	AX, ret+48(FP)
RET

TEXT ·popcntOrSliceAsm(SB),4,$0-56
XORQ	AX, AX
MOVQ	s+0(FP), SI
MOVQ	s_len+8(FP), CX
TESTQ	CX, CX
JZ		popcntOrSliceEnd
MOVQ	m+24(FP), DI
popcntOrSliceLoop:
MOVQ	(DI), DX
ORQ		(SI), DX
POPCNTQ_DX_DX
ADDQ	DX, AX
ADDQ	$8, SI
ADDQ	$8, DI
LOOP	popcntOrSliceLoop
popcntOrSliceEnd:
MOVQ	AX, ret+48(FP)
RET

TEXT ·popcntXorSliceAsm(SB),4,$0-56
XORQ	AX, AX
MOVQ	s+0(FP), SI
MOVQ	s_len+8(FP), CX
TESTQ	CX, CX
JZ		popcntXorSliceEnd
MOVQ	m+24(FP), DI
popcntXorSliceLoop:
MOVQ	(DI), DX
XORQ	(SI), DX
POPCNTQ_DX_DX
ADDQ	DX, AX
ADDQ	$8, SI
ADDQ	$8, DI
LOOP	popcntXorSliceLoop
popcntXorSliceEnd:
MOVQ	AX, ret+48(FP)
RET