Skip to content

Commit b420ef4

Browse files
committed
Add unit test for SIMD popcount
Add unit test for SIMD popcount Signed-off-by: chzhoo <[email protected]>
1 parent 6d72a9e commit b420ef4

File tree

3 files changed

+132
-22
lines changed

3 files changed

+132
-22
lines changed

src/bitops.c

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -48,21 +48,49 @@ static const unsigned char bitsinbyte[256] = {
4848
#ifdef HAVE_AVX2
4949
/* The SIMD version of popcount enhances performance through parallel lookup tables. */
5050
ATTRIBUTE_TARGET_AVX2
51-
static long long serverPopcountAVX2(void *s, long count) {
51+
long long popcountAVX2(void *s, long count) {
5252
long i = 0;
5353
unsigned char *p = (unsigned char *)s;
5454
long long bits = 0;
5555

5656
const __m256i lookup = _mm256_setr_epi8(
57-
/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2,
58-
/* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3,
59-
/* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3,
60-
/* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4,
61-
/* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2,
62-
/* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3,
63-
/* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3,
64-
/* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4);
65-
57+
/* First Lane [0:127] */
58+
/* 0 */ 0,
59+
/* 1 */ 1,
60+
/* 2 */ 1,
61+
/* 3 */ 2,
62+
/* 4 */ 1,
63+
/* 5 */ 2,
64+
/* 6 */ 2,
65+
/* 7 */ 3,
66+
/* 8 */ 1,
67+
/* 9 */ 2,
68+
/* a */ 2,
69+
/* b */ 3,
70+
/* c */ 2,
71+
/* d */ 3,
72+
/* e */ 3,
73+
/* f */ 4,
74+
75+
/* Second Lane [128:255] identical to first lane due to lane isolation in _mm256_shuffle_epi8.
76+
* For more information, see following URL
77+
* https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi8 */
78+
/* 0 */ 0,
79+
/* 1 */ 1,
80+
/* 2 */ 1,
81+
/* 3 */ 2,
82+
/* 4 */ 1,
83+
/* 5 */ 2,
84+
/* 6 */ 2,
85+
/* 7 */ 3,
86+
/* 8 */ 1,
87+
/* 9 */ 2,
88+
/* a */ 2,
89+
/* b */ 3,
90+
/* c */ 2,
91+
/* d */ 3,
92+
/* e */ 3,
93+
/* f */ 4);
6694
const __m256i low_mask = _mm256_set1_epi8(0x0f);
6795
__m256i acc = _mm256_setzero_si256();
6896

@@ -124,18 +152,8 @@ static long long serverPopcountAVX2(void *s, long count) {
124152
}
125153
#endif
126154

127-
/* Count number of bits set in the binary array pointed by 's' and long
128-
* 'count' bytes. The implementation of this function is required to
129-
* work with an input string length up to 512 MB or more (server.proto_max_bulk_len) */
130-
long long serverPopcount(void *s, long count) {
131-
#ifdef HAVE_AVX2
132-
/* If length of s >= 256 bits and the CPU supports AVX2,
133-
* we prefer to use the SIMD version */
134-
if (count >= 32) {
135-
return serverPopcountAVX2(s, count);
136-
}
137-
#endif
138-
155+
/* The scalar version of popcount based on lookup tables. */
156+
long long popcountScalar(void *s, long count) {
139157
long long bits = 0;
140158
unsigned char *p = s;
141159
uint32_t *p4;
@@ -187,6 +205,20 @@ long long serverPopcount(void *s, long count) {
187205
return bits;
188206
}
189207

208+
/* Count number of bits set in the binary array pointed by 's' and long
209+
* 'count' bytes. The implementation of this function is required to
210+
* work with an input string length up to 512 MB or more (server.proto_max_bulk_len) */
211+
long long serverPopcount(void *s, long count) {
212+
#ifdef HAVE_AVX2
213+
/* If length of s >= 256 bits and the CPU supports AVX2,
214+
* we prefer to use the SIMD version */
215+
if (count >= 32) {
216+
return popcountAVX2(s, count);
217+
}
218+
#endif
219+
return popcountScalar(s, count);
220+
}
221+
190222
/* Return the position of the first bit set to one (if 'bit' is 1) or
191223
* zero (if 'bit' is 0) in the bitmap starting at 's' and long 'count' bytes.
192224
*

src/unit/test_files.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ int test_postWriteToReplica(int argc, char **argv, int flags);
105105
int test_backupAndUpdateClientArgv(int argc, char **argv, int flags);
106106
int test_rewriteClientCommandArgument(int argc, char **argv, int flags);
107107
int test_object_with_key(int argc, char **argv, int flags);
108+
int test_popcount(int argc, char **argv, int flags);
108109
int test_quicklistCreateList(int argc, char **argv, int flags);
109110
int test_quicklistAddToTailOfEmptyList(int argc, char **argv, int flags);
110111
int test_quicklistAddToHeadOfEmptyList(int argc, char **argv, int flags);
@@ -240,6 +241,7 @@ unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys},
240241
unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}};
241242
unitTest __test_networking_c[] = {{"test_writeToReplica", test_writeToReplica}, {"test_postWriteToReplica", test_postWriteToReplica}, {"test_backupAndUpdateClientArgv", test_backupAndUpdateClientArgv}, {"test_rewriteClientCommandArgument", test_rewriteClientCommandArgument}, {NULL, NULL}};
242243
unitTest __test_object_c[] = {{"test_object_with_key", test_object_with_key}, {NULL, NULL}};
244+
unitTest __test_popcount_c[] = {{"test_popcount", test_popcount}, {NULL, NULL}};
243245
unitTest __test_quicklist_c[] = {{"test_quicklistCreateList", test_quicklistCreateList}, {"test_quicklistAddToTailOfEmptyList", test_quicklistAddToTailOfEmptyList}, {"test_quicklistAddToHeadOfEmptyList", test_quicklistAddToHeadOfEmptyList}, {"test_quicklistAddToTail5xAtCompress", test_quicklistAddToTail5xAtCompress}, {"test_quicklistAddToHead5xAtCompress", test_quicklistAddToHead5xAtCompress}, {"test_quicklistAddToTail500xAtCompress", test_quicklistAddToTail500xAtCompress}, {"test_quicklistAddToHead500xAtCompress", test_quicklistAddToHead500xAtCompress}, {"test_quicklistRotateEmpty", test_quicklistRotateEmpty}, {"test_quicklistComprassionPlainNode", test_quicklistComprassionPlainNode}, {"test_quicklistNextPlainNode", test_quicklistNextPlainNode}, {"test_quicklistRotatePlainNode", test_quicklistRotatePlainNode}, {"test_quicklistRotateOneValOnce", test_quicklistRotateOneValOnce}, {"test_quicklistRotate500Val5000TimesAtCompress", test_quicklistRotate500Val5000TimesAtCompress}, {"test_quicklistPopEmpty", test_quicklistPopEmpty}, {"test_quicklistPop1StringFrom1", test_quicklistPop1StringFrom1}, {"test_quicklistPopHead1NumberFrom1", test_quicklistPopHead1NumberFrom1}, {"test_quicklistPopHead500From500", test_quicklistPopHead500From500}, {"test_quicklistPopHead5000From500", test_quicklistPopHead5000From500}, {"test_quicklistIterateForwardOver500List", test_quicklistIterateForwardOver500List}, {"test_quicklistIterateReverseOver500List", test_quicklistIterateReverseOver500List}, {"test_quicklistInsertAfter1Element", test_quicklistInsertAfter1Element}, {"test_quicklistInsertBefore1Element", test_quicklistInsertBefore1Element}, {"test_quicklistInsertHeadWhileHeadNodeIsFull", test_quicklistInsertHeadWhileHeadNodeIsFull}, {"test_quicklistInsertTailWhileTailNodeIsFull", test_quicklistInsertTailWhileTailNodeIsFull}, {"test_quicklistInsertOnceInElementsWhileIteratingAtCompress", test_quicklistInsertOnceInElementsWhileIteratingAtCompress}, {"test_quicklistInsertBefore250NewInMiddleOf500ElementsAtCompress", test_quicklistInsertBefore250NewInMiddleOf500ElementsAtCompress}, {"test_quicklistInsertAfter250NewInMiddleOf500ElementsAtCompress", test_quicklistInsertAfter250NewInMiddleOf500ElementsAtCompress}, {"test_quicklistDuplicateEmptyList", test_quicklistDuplicateEmptyList}, {"test_quicklistDuplicateListOf1Element", test_quicklistDuplicateListOf1Element}, {"test_quicklistDuplicateListOf500", test_quicklistDuplicateListOf500}, {"test_quicklistIndex1200From500ListAtFill", test_quicklistIndex1200From500ListAtFill}, {"test_quicklistIndex12From500ListAtFill", test_quicklistIndex12From500ListAtFill}, {"test_quicklistIndex100From500ListAtFill", test_quicklistIndex100From500ListAtFill}, {"test_quicklistIndexTooBig1From50ListAtFill", test_quicklistIndexTooBig1From50ListAtFill}, {"test_quicklistDeleteRangeEmptyList", test_quicklistDeleteRangeEmptyList}, {"test_quicklistDeleteRangeOfEntireNodeInListOfOneNode", test_quicklistDeleteRangeOfEntireNodeInListOfOneNode}, {"test_quicklistDeleteRangeOfEntireNodeWithOverflowCounts", test_quicklistDeleteRangeOfEntireNodeWithOverflowCounts}, {"test_quicklistDeleteMiddle100Of500List", test_quicklistDeleteMiddle100Of500List}, {"test_quicklistDeleteLessThanFillButAcrossNodes", test_quicklistDeleteLessThanFillButAcrossNodes}, {"test_quicklistDeleteNegative1From500List", test_quicklistDeleteNegative1From500List}, {"test_quicklistDeleteNegative1From500ListWithOverflowCounts", test_quicklistDeleteNegative1From500ListWithOverflowCounts}, {"test_quicklistDeleteNegative100From500List", test_quicklistDeleteNegative100From500List}, {"test_quicklistDelete10Count5From50List", test_quicklistDelete10Count5From50List}, {"test_quicklistNumbersOnlyListRead", test_quicklistNumbersOnlyListRead}, {"test_quicklistNumbersLargerListRead", test_quicklistNumbersLargerListRead}, {"test_quicklistNumbersLargerListReadB", test_quicklistNumbersLargerListReadB}, {"test_quicklistLremTestAtCompress", test_quicklistLremTestAtCompress}, {"test_quicklistIterateReverseDeleteAtCompress", test_quicklistIterateReverseDeleteAtCompress}, {"test_quicklistIteratorAtIndexTestAtCompress", test_quicklistIteratorAtIndexTestAtCompress}, {"test_quicklistLtrimTestAAtCompress", test_quicklistLtrimTestAAtCompress}, {"test_quicklistLtrimTestBAtCompress", test_quicklistLtrimTestBAtCompress}, {"test_quicklistLtrimTestCAtCompress", test_quicklistLtrimTestCAtCompress}, {"test_quicklistLtrimTestDAtCompress", test_quicklistLtrimTestDAtCompress}, {"test_quicklistVerifySpecificCompressionOfInteriorNodes", test_quicklistVerifySpecificCompressionOfInteriorNodes}, {"test_quicklistBookmarkGetUpdatedToNextItem", test_quicklistBookmarkGetUpdatedToNextItem}, {"test_quicklistBookmarkLimit", test_quicklistBookmarkLimit}, {"test_quicklistCompressAndDecompressQuicklistListpackNode", test_quicklistCompressAndDecompressQuicklistListpackNode}, {"test_quicklistCompressAndDecomressQuicklistPlainNodeLargeThanUINT32MAX", test_quicklistCompressAndDecomressQuicklistPlainNodeLargeThanUINT32MAX}, {NULL, NULL}};
244246
unitTest __test_rax_c[] = {{"test_raxRandomWalk", test_raxRandomWalk}, {"test_raxIteratorUnitTests", test_raxIteratorUnitTests}, {"test_raxTryInsertUnitTests", test_raxTryInsertUnitTests}, {"test_raxRegressionTest1", test_raxRegressionTest1}, {"test_raxRegressionTest2", test_raxRegressionTest2}, {"test_raxRegressionTest3", test_raxRegressionTest3}, {"test_raxRegressionTest4", test_raxRegressionTest4}, {"test_raxRegressionTest5", test_raxRegressionTest5}, {"test_raxRegressionTest6", test_raxRegressionTest6}, {"test_raxBenchmark", test_raxBenchmark}, {"test_raxHugeKey", test_raxHugeKey}, {"test_raxFuzz", test_raxFuzz}, {NULL, NULL}};
245247
unitTest __test_sds_c[] = {{"test_sds", test_sds}, {"test_typesAndAllocSize", test_typesAndAllocSize}, {"test_sdsHeaderSizes", test_sdsHeaderSizes}, {"test_sdssplitargs", test_sdssplitargs}, {NULL, NULL}};
@@ -264,6 +266,7 @@ struct unitTestSuite {
264266
{"test_listpack.c", __test_listpack_c},
265267
{"test_networking.c", __test_networking_c},
266268
{"test_object.c", __test_object_c},
269+
{"test_popcount.c", __test_popcount_c},
267270
{"test_quicklist.c", __test_quicklist_c},
268271
{"test_rax.c", __test_rax_c},
269272
{"test_sds.c", __test_sds_c},

src/unit/test_popcount.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#include <time.h>
2+
3+
#include "test_help.h"
4+
5+
#include "../config.h"
6+
#include "../zmalloc.h"
7+
8+
extern long long popcountScalar(void *s, long count);
9+
#ifdef HAVE_AVX2
10+
extern long long popcountAVX2(void *s, long count);
11+
#endif
12+
13+
static long long bitcount(void *s, long count) {
14+
long long bits = 0;
15+
uint8_t *p = (uint8_t *)s;
16+
for (int x = 0; x < count; x += 1) {
17+
uint8_t val = *(x + p);
18+
while (val) {
19+
bits += val & 1;
20+
val >>= 1;
21+
}
22+
}
23+
return bits;
24+
}
25+
26+
static int test_case(const char *msg, int size) {
27+
uint8_t buf[size];
28+
int fuzzing = 1000;
29+
for (int y = 0; y < fuzzing; y += 1) {
30+
for (int z = 0; z < size; z += 1) {
31+
buf[z] = rand() % 256;
32+
}
33+
34+
long long expect = bitcount(buf, size);
35+
long long ret_scalar = popcountScalar(buf, size);
36+
TEST_ASSERT_MESSAGE(msg, expect == ret_scalar);
37+
#ifdef HAVE_AVX2
38+
long long ret_avx2 = popcountAVX2(buf, size);
39+
TEST_ASSERT_MESSAGE(msg, expect == ret_avx2);
40+
#endif
41+
}
42+
43+
return 0;
44+
}
45+
46+
int test_popcount(int argc, char **argv, int flags) {
47+
UNUSED(argc);
48+
UNUSED(argv);
49+
UNUSED(flags);
50+
51+
srand(time(NULL));
52+
53+
#define TEST_CASE(MSG, SIZE) \
54+
if (test_case(MSG, SIZE)) { \
55+
return 1; \
56+
}
57+
58+
/* The AVX2 version divides the array into the following 3 parts."
59+
* Part A Part B Part C
60+
* +-----------------+--------------+---------+
61+
* | 8 * 32bytes * X | 32bytes * Y | Z bytes |
62+
* +-----------------+--------------+---------+
63+
*/
64+
/* So we test the following cases */
65+
TEST_CASE("Popcount: Part A", 8 * 32 * 2);
66+
TEST_CASE("Popcount: Part B", 32 * 2);
67+
TEST_CASE("Popcount: Part C", 2);
68+
TEST_CASE("Popcount: Part A + Part B", 8 * 32 * 7 + 32 * 2);
69+
TEST_CASE("Popcount: Part A + Part C", 8 * 32 * 11 + 7);
70+
TEST_CASE("Popcount: Part A + Part B + Part C", 8 * 32 * 3 + 3 * 32 + 5);
71+
TEST_CASE("Popcount: Corner case", 0);
72+
#undef TEST_CASE
73+
74+
return 0;
75+
}

0 commit comments

Comments
 (0)