Skip to content

Commit db5775e

Browse files
zuiderkwastmadolson
authored andcommitted
Listpack encoding for sets (redis#11290)
Small sets with not only integer elements are listpack encoded, by default up to 128 elements, max 64 bytes per element, new config `set-max-listpack-entries` and `set-max-listpack-value`. This saves memory for small sets compared to using a hashtable. Sets with only integers, even very small sets, are still intset encoded (up to 1G limit, etc.). Larger sets are hashtable encoded. This PR increments the RDB version, and has an effect on OBJECT ENCODING Possible conversions when elements are added: intset -> listpack listpack -> hashtable intset -> hashtable Note: No conversion happens when elements are deleted. If all elements are deleted and then added again, the set is deleted and recreated, thus implicitly converted to a smaller encoding.
1 parent 63d7c06 commit db5775e

File tree

19 files changed

+1132
-344
lines changed

19 files changed

+1132
-344
lines changed

redis.conf

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1951,13 +1951,20 @@ list-max-listpack-size -2
19511951
# etc.
19521952
list-compress-depth 0
19531953

1954-
# Sets have a special encoding in just one case: when a set is composed
1954+
# Sets have a special encoding when a set is composed
19551955
# of just strings that happen to be integers in radix 10 in the range
19561956
# of 64 bit signed integers.
19571957
# The following configuration setting sets the limit in the size of the
19581958
# set in order to use this special memory saving encoding.
19591959
set-max-intset-entries 512
19601960

1961+
# Sets containing non-integer values are also encoded using a memory efficient
1962+
# data structure when they have a small number of entries, and the biggest entry
1963+
# does not exceed a given threshold. These thresholds can be configured using
1964+
# the following directives.
1965+
set-max-listpack-entries 128
1966+
set-max-listpack-value 64
1967+
19611968
# Similarly to hashes and lists, sorted sets are also specially encoded in
19621969
# order to save a lot of space. This encoding is only used when the length and
19631970
# elements of a sorted set are below the following limits:

src/aof.c

Lines changed: 21 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1818,56 +1818,31 @@ int rewriteListObject(rio *r, robj *key, robj *o) {
18181818
* The function returns 0 on error, 1 on success. */
18191819
int rewriteSetObject(rio *r, robj *key, robj *o) {
18201820
long long count = 0, items = setTypeSize(o);
1821-
1822-
if (o->encoding == OBJ_ENCODING_INTSET) {
1823-
int ii = 0;
1824-
int64_t llval;
1825-
1826-
while(intsetGet(o->ptr,ii++,&llval)) {
1827-
if (count == 0) {
1828-
int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
1829-
AOF_REWRITE_ITEMS_PER_CMD : items;
1830-
1831-
if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
1832-
!rioWriteBulkString(r,"SADD",4) ||
1833-
!rioWriteBulkObject(r,key))
1834-
{
1835-
return 0;
1836-
}
1821+
setTypeIterator *si = setTypeInitIterator(o);
1822+
char *str;
1823+
size_t len;
1824+
int64_t llval;
1825+
while (setTypeNext(si, &str, &len, &llval) != -1) {
1826+
if (count == 0) {
1827+
int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
1828+
AOF_REWRITE_ITEMS_PER_CMD : items;
1829+
if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
1830+
!rioWriteBulkString(r,"SADD",4) ||
1831+
!rioWriteBulkObject(r,key))
1832+
{
1833+
return 0;
18371834
}
1838-
if (!rioWriteBulkLongLong(r,llval)) return 0;
1839-
if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
1840-
items--;
18411835
}
1842-
} else if (o->encoding == OBJ_ENCODING_HT) {
1843-
dictIterator *di = dictGetIterator(o->ptr);
1844-
dictEntry *de;
1845-
1846-
while((de = dictNext(di)) != NULL) {
1847-
sds ele = dictGetKey(de);
1848-
if (count == 0) {
1849-
int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
1850-
AOF_REWRITE_ITEMS_PER_CMD : items;
1851-
1852-
if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
1853-
!rioWriteBulkString(r,"SADD",4) ||
1854-
!rioWriteBulkObject(r,key))
1855-
{
1856-
dictReleaseIterator(di);
1857-
return 0;
1858-
}
1859-
}
1860-
if (!rioWriteBulkString(r,ele,sdslen(ele))) {
1861-
dictReleaseIterator(di);
1862-
return 0;
1863-
}
1864-
if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
1865-
items--;
1836+
size_t written = str ?
1837+
rioWriteBulkString(r, str, len) : rioWriteBulkLongLong(r, llval);
1838+
if (!written) {
1839+
setTypeReleaseIterator(si);
1840+
return 0;
18661841
}
1867-
dictReleaseIterator(di);
1868-
} else {
1869-
serverPanic("Unknown set encoding");
1842+
if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
1843+
items--;
18701844
}
1845+
setTypeReleaseIterator(si);
18711846
return 1;
18721847
}
18731848

src/config.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3130,6 +3130,8 @@ standardConfig static_configs[] = {
31303130
/* Size_t configs */
31313131
createSizeTConfig("hash-max-listpack-entries", "hash-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_entries, 512, INTEGER_CONFIG, NULL, NULL),
31323132
createSizeTConfig("set-max-intset-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_intset_entries, 512, INTEGER_CONFIG, NULL, NULL),
3133+
createSizeTConfig("set-max-listpack-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
3134+
createSizeTConfig("set-max-listpack-value", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_value, 64, INTEGER_CONFIG, NULL, NULL),
31333135
createSizeTConfig("zset-max-listpack-entries", "zset-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.zset_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
31343136
createSizeTConfig("active-defrag-ignore-bytes", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.active_defrag_ignore_bytes, 100<<20, MEMORY_CONFIG, NULL, NULL), /* Default: don't defrag if frag overhead is below 100mb */
31353137
createSizeTConfig("hash-max-listpack-value", "hash-max-ziplist-value", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_value, 64, MEMORY_CONFIG, NULL, NULL),

src/db.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -915,14 +915,16 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
915915
} while (cursor &&
916916
maxiterations-- &&
917917
listLength(keys) < (unsigned long)count);
918-
} else if (o->type == OBJ_SET) {
918+
} else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_INTSET) {
919919
int pos = 0;
920920
int64_t ll;
921921

922922
while(intsetGet(o->ptr,pos++,&ll))
923923
listAddNodeTail(keys,createStringObjectFromLongLong(ll));
924924
cursor = 0;
925-
} else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) {
925+
} else if ((o->type == OBJ_HASH || o->type == OBJ_ZSET || o->type == OBJ_SET) &&
926+
o->encoding == OBJ_ENCODING_LISTPACK)
927+
{
926928
unsigned char *p = lpFirst(o->ptr);
927929
unsigned char *vstr;
928930
int64_t vlen;

src/defrag.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -874,10 +874,12 @@ long defragKey(redisDb *db, dictEntry *de) {
874874
} else if (ob->type == OBJ_SET) {
875875
if (ob->encoding == OBJ_ENCODING_HT) {
876876
defragged += defragSet(db, de);
877-
} else if (ob->encoding == OBJ_ENCODING_INTSET) {
878-
intset *newis, *is = ob->ptr;
879-
if ((newis = activeDefragAlloc(is)))
880-
defragged++, ob->ptr = newis;
877+
} else if (ob->encoding == OBJ_ENCODING_INTSET ||
878+
ob->encoding == OBJ_ENCODING_LISTPACK)
879+
{
880+
void *newptr, *ptr = ob->ptr;
881+
if ((newptr = activeDefragAlloc(ptr)))
882+
defragged++, ob->ptr = newptr;
881883
} else {
882884
serverPanic("Unknown set encoding");
883885
}

src/intset.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,17 @@ int64_t intsetRandom(intset *is) {
265265
return _intsetGet(is,rand()%len);
266266
}
267267

268+
/* Return the largest member. */
269+
int64_t intsetMax(intset *is) {
270+
uint32_t len = intrev32ifbe(is->length);
271+
return _intsetGet(is, len - 1);
272+
}
273+
274+
/* Return the smallest member. */
275+
int64_t intsetMin(intset *is) {
276+
return _intsetGet(is, 0);
277+
}
278+
268279
/* Get the value at the given position. When this position is
269280
* out of range the function returns 0, when in range it returns 1. */
270281
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
@@ -425,6 +436,8 @@ int intsetTest(int argc, char **argv, int flags) {
425436
is = intsetAdd(is,6,&success); assert(success);
426437
is = intsetAdd(is,4,&success); assert(success);
427438
is = intsetAdd(is,4,&success); assert(!success);
439+
assert(6 == intsetMax(is));
440+
assert(4 == intsetMin(is));
428441
ok();
429442
zfree(is);
430443
}

src/intset.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
4343
intset *intsetRemove(intset *is, int64_t value, int *success);
4444
uint8_t intsetFind(intset *is, int64_t value);
4545
int64_t intsetRandom(intset *is);
46+
int64_t intsetMax(intset *is);
47+
int64_t intsetMin(intset *is);
4648
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
4749
uint32_t intsetLen(const intset *is);
4850
size_t intsetBlobLen(intset *is);

0 commit comments

Comments
 (0)