diff --git a/cmake/Modules/SourceFiles.cmake b/cmake/Modules/SourceFiles.cmake index 0e484e5179..861d782070 100644 --- a/cmake/Modules/SourceFiles.cmake +++ b/cmake/Modules/SourceFiles.cmake @@ -117,7 +117,10 @@ set(VALKEY_SERVER_SRCS ${CMAKE_SOURCE_DIR}/src/connection.c ${CMAKE_SOURCE_DIR}/src/unix.c ${CMAKE_SOURCE_DIR}/src/server.c - ${CMAKE_SOURCE_DIR}/src/logreqres.c) + ${CMAKE_SOURCE_DIR}/src/logreqres.c + ${CMAKE_SOURCE_DIR}/src/entry.c + ${CMAKE_SOURCE_DIR}/src/vset.c) + # valkey-cli set(VALKEY_CLI_SRCS diff --git a/src/Makefile b/src/Makefile index 5391168701..b9f2e9f0ee 100644 --- a/src/Makefile +++ b/src/Makefile @@ -423,7 +423,7 @@ ENGINE_NAME=valkey SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX) ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX) ENGINE_TRACE_OBJ=trace/trace.o trace/trace_commands.o trace/trace_db.o trace/trace_cluster.o trace/trace_server.o trace/trace_rdb.o trace/trace_aof.o -ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o +ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o entry.o vset.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o ENGINE_SERVER_OBJ+=$(ENGINE_TRACE_OBJ) ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX) ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o sds.o util.o sha256.o diff --git a/src/anet.c b/src/anet.c index 8bc1626966..5524e9cf4c 100644 --- a/src/anet.c +++ b/src/anet.c @@ -52,8 +52,6 @@ #include "util.h" #include "serverassert.h" -#define UNUSED(x) (void)(x) - static void anetSetError(char *err, const char *fmt, ...) { va_list ap; diff --git a/src/aof.c b/src/aof.c index 9b72aff0f5..567acdf60c 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1955,12 +1955,32 @@ static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) { * The function returns 0 on error, 1 on success. */ int rewriteHashObject(rio *r, robj *key, robj *o) { hashTypeIterator hi; - long long count = 0, items = hashTypeLength(o); - + long long count = 0, volatile_items = 0, non_volatile_items; + /* First serialize volatile items if exist */ + if (hashTypeHasVolatileElements(o)) { + hashTypeInitVolatileIterator(o, &hi); + while (hashTypeNext(&hi) != C_ERR) { + long long expiry = entryGetExpiry(hi.next); + sds field = entryGetField(hi.next); + sds value = entryGetValue(hi.next); + if (rioWriteBulkCount(r, '*', 8) == 0) return 0; + if (rioWriteBulkString(r, "HSETEX", 6) == 0) return 0; + if (rioWriteBulkObject(r, key) == 0) return 0; + if (rioWriteBulkString(r, "PXAT", 4) == 0) return 0; + if (rioWriteBulkLongLong(r, expiry) == 0) return 0; + if (rioWriteBulkString(r, "FIELDS", 6) == 0) return 0; + if (rioWriteBulkLongLong(r, 1) == 0) return 0; + if (rioWriteBulkString(r, field, sdslen(field)) == 0) return 0; + if (rioWriteBulkString(r, value, sdslen(value)) == 0) return 0; + volatile_items++; + } + hashTypeResetIterator(&hi); + } + non_volatile_items = hashTypeLength(o) - volatile_items; hashTypeInitIterator(o, &hi); while (hashTypeNext(&hi) != C_ERR) { if (count == 0) { - int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? AOF_REWRITE_ITEMS_PER_CMD : items; + int cmd_items = (non_volatile_items > AOF_REWRITE_ITEMS_PER_CMD) ? AOF_REWRITE_ITEMS_PER_CMD : non_volatile_items; if (!rioWriteBulkCount(r, '*', 2 + cmd_items * 2) || !rioWriteBulkString(r, "HMSET", 5) || !rioWriteBulkObject(r, key)) { @@ -1969,16 +1989,18 @@ int rewriteHashObject(rio *r, robj *key, robj *o) { } } + if (volatile_items > 0 && entryHasExpiry(hi.next)) + continue; + if (!rioWriteHashIteratorCursor(r, &hi, OBJ_HASH_FIELD) || !rioWriteHashIteratorCursor(r, &hi, OBJ_HASH_VALUE)) { hashTypeResetIterator(&hi); return 0; } if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; + non_volatile_items--; } hashTypeResetIterator(&hi); - return 1; } diff --git a/src/commands.def b/src/commands.def index 689b08be47..71d5a11473 100644 --- a/src/commands.def +++ b/src/commands.def @@ -3564,6 +3564,119 @@ struct COMMAND_ARG HEXISTS_Args[] = { {MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; +/********** HEXPIRE ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HEXPIRE history */ +#define HEXPIRE_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HEXPIRE tips */ +#define HEXPIRE_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HEXPIRE key specs */ +keySpec HEXPIRE_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HEXPIRE condition argument table */ +struct COMMAND_ARG HEXPIRE_condition_Subargs[] = { +{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HEXPIRE fields argument table */ +struct COMMAND_ARG HEXPIRE_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HEXPIRE argument table */ +struct COMMAND_ARG HEXPIRE_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=HEXPIRE_condition_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HEXPIRE_fields_Subargs}, +}; + +/********** HEXPIREAT ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HEXPIREAT history */ +#define HEXPIREAT_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HEXPIREAT tips */ +#define HEXPIREAT_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HEXPIREAT key specs */ +keySpec HEXPIREAT_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HEXPIREAT condition argument table */ +struct COMMAND_ARG HEXPIREAT_condition_Subargs[] = { +{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HEXPIREAT fields argument table */ +struct COMMAND_ARG HEXPIREAT_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HEXPIREAT argument table */ +struct COMMAND_ARG HEXPIREAT_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-seconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"9.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=HEXPIREAT_condition_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HEXPIREAT_fields_Subargs}, +}; + +/********** HEXPIRETIME ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HEXPIRETIME history */ +#define HEXPIRETIME_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HEXPIRETIME tips */ +#define HEXPIRETIME_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HEXPIRETIME key specs */ +keySpec HEXPIRETIME_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HEXPIRETIME fields argument table */ +struct COMMAND_ARG HEXPIRETIME_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HEXPIRETIME argument table */ +struct COMMAND_ARG HEXPIRETIME_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HEXPIRETIME_fields_Subargs}, +}; + /********** HGET ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -3615,6 +3728,47 @@ struct COMMAND_ARG HGETALL_Args[] = { {MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; +/********** HGETEX ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HGETEX history */ +#define HGETEX_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HGETEX tips */ +#define HGETEX_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HGETEX key specs */ +keySpec HGETEX_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HGETEX expiration argument table */ +struct COMMAND_ARG HGETEX_expiration_Subargs[] = { +{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,"EX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,"PX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-seconds",ARG_TYPE_UNIX_TIME,-1,"EXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,"PXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("persist",ARG_TYPE_PURE_TOKEN,-1,"PERSIST",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HGETEX fields argument table */ +struct COMMAND_ARG HGETEX_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HGETEX argument table */ +struct COMMAND_ARG HGETEX_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("expiration",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,5,NULL),.subargs=HGETEX_expiration_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HGETEX_fields_Subargs}, +}; + /********** HINCRBY ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -3773,6 +3927,181 @@ struct COMMAND_ARG HMSET_Args[] = { {MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=HMSET_data_Subargs}, }; +/********** HPERSIST ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HPERSIST history */ +#define HPERSIST_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HPERSIST tips */ +#define HPERSIST_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HPERSIST key specs */ +keySpec HPERSIST_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HPERSIST fields argument table */ +struct COMMAND_ARG HPERSIST_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HPERSIST argument table */ +struct COMMAND_ARG HPERSIST_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPERSIST_fields_Subargs}, +}; + +/********** HPEXPIRE ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HPEXPIRE history */ +#define HPEXPIRE_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HPEXPIRE tips */ +#define HPEXPIRE_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HPEXPIRE key specs */ +keySpec HPEXPIRE_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HPEXPIRE condition argument table */ +struct COMMAND_ARG HPEXPIRE_condition_Subargs[] = { +{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HPEXPIRE fields argument table */ +struct COMMAND_ARG HPEXPIRE_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HPEXPIRE argument table */ +struct COMMAND_ARG HPEXPIRE_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"9.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=HPEXPIRE_condition_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPEXPIRE_fields_Subargs}, +}; + +/********** HPEXPIREAT ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HPEXPIREAT history */ +#define HPEXPIREAT_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HPEXPIREAT tips */ +#define HPEXPIREAT_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HPEXPIREAT key specs */ +keySpec HPEXPIREAT_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HPEXPIREAT condition argument table */ +struct COMMAND_ARG HPEXPIREAT_condition_Subargs[] = { +{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HPEXPIREAT fields argument table */ +struct COMMAND_ARG HPEXPIREAT_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HPEXPIREAT argument table */ +struct COMMAND_ARG HPEXPIREAT_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"9.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=HPEXPIREAT_condition_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPEXPIREAT_fields_Subargs}, +}; + +/********** HPEXPIRETIME ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HPEXPIRETIME history */ +#define HPEXPIRETIME_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HPEXPIRETIME tips */ +#define HPEXPIRETIME_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HPEXPIRETIME key specs */ +keySpec HPEXPIRETIME_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HPEXPIRETIME fields argument table */ +struct COMMAND_ARG HPEXPIRETIME_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HPEXPIRETIME argument table */ +struct COMMAND_ARG HPEXPIRETIME_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPEXPIRETIME_fields_Subargs}, +}; + +/********** HPTTL ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HPTTL history */ +#define HPTTL_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HPTTL tips */ +#define HPTTL_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HPTTL key specs */ +keySpec HPTTL_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HPTTL fields argument table */ +struct COMMAND_ARG HPTTL_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HPTTL argument table */ +struct COMMAND_ARG HPTTL_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPTTL_fields_Subargs}, +}; + /********** HRANDFIELD ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -3869,6 +4198,60 @@ struct COMMAND_ARG HSET_Args[] = { {MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=HSET_data_Subargs}, }; +/********** HSETEX ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HSETEX history */ +#define HSETEX_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HSETEX tips */ +#define HSETEX_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HSETEX key specs */ +keySpec HSETEX_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HSETEX fields_condition argument table */ +struct COMMAND_ARG HSETEX_fields_condition_Subargs[] = { +{MAKE_ARG("fnx",ARG_TYPE_PURE_TOKEN,-1,"FNX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fxx",ARG_TYPE_PURE_TOKEN,-1,"FXX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HSETEX expiration argument table */ +struct COMMAND_ARG HSETEX_expiration_Subargs[] = { +{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,"EX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,"PX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-seconds",ARG_TYPE_UNIX_TIME,-1,"EXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,"PXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("keepttl",ARG_TYPE_PURE_TOKEN,-1,"KEEPTTL",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HSETEX fields data argument table */ +struct COMMAND_ARG HSETEX_fields_data_Subargs[] = { +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HSETEX fields argument table */ +struct COMMAND_ARG HSETEX_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=HSETEX_fields_data_Subargs}, +}; + +/* HSETEX argument table */ +struct COMMAND_ARG HSETEX_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields-condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=HSETEX_fields_condition_Subargs}, +{MAKE_ARG("expiration",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,5,NULL),.subargs=HSETEX_expiration_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HSETEX_fields_Subargs}, +}; + /********** HSETNX ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -3920,6 +4303,37 @@ struct COMMAND_ARG HSTRLEN_Args[] = { {MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; +/********** HTTL ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HTTL history */ +#define HTTL_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HTTL tips */ +#define HTTL_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HTTL key specs */ +keySpec HTTL_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HTTL fields argument table */ +struct COMMAND_ARG HTTL_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HTTL argument table */ +struct COMMAND_ARG HTTL_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HTTL_fields_Subargs}, +}; + /********** HVALS ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -11278,19 +11692,30 @@ struct COMMAND_STRUCT serverCommandTable[] = { /* hash */ {MAKE_CMD("hdel","Deletes one or more fields and their values from a hash. Deletes the hash if no fields remain.","O(N) where N is the number of fields to be removed.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HDEL_History,1,HDEL_Tips,0,hdelCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HDEL_Keyspecs,1,NULL,2),.args=HDEL_Args}, {MAKE_CMD("hexists","Determines whether a field exists in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXISTS_History,0,HEXISTS_Tips,0,hexistsCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXISTS_Keyspecs,1,NULL,2),.args=HEXISTS_Args}, +{MAKE_CMD("hexpire","Set expiry time on hash fields.","O(N) where N is the number of specified fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRE_History,0,HEXPIRE_Tips,0,hexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRE_Keyspecs,1,NULL,4),.args=HEXPIRE_Args}, +{MAKE_CMD("hexpireat","Set expiry time on hash fields.","O(N) where N is the number of specified fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIREAT_History,0,HEXPIREAT_Tips,0,hexpireatCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIREAT_Keyspecs,1,NULL,4),.args=HEXPIREAT_Args}, +{MAKE_CMD("hexpiretime","Returns Unix timestamps in seconds since the epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,2),.args=HEXPIRETIME_Args}, {MAKE_CMD("hget","Returns the value of a field in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGET_History,0,HGET_Tips,0,hgetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGET_Keyspecs,1,NULL,2),.args=HGET_Args}, {MAKE_CMD("hgetall","Returns all fields and values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETALL_History,0,HGETALL_Tips,1,hgetallCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HGETALL_Keyspecs,1,NULL,1),.args=HGETALL_Args}, +{MAKE_CMD("hgetex","Get the value of one or more fields of a given hash key, and optionally set their expiration time or time-to-live (TTL).","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETEX_History,0,HGETEX_Tips,0,hgetexCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HGETEX_Keyspecs,1,NULL,3),.args=HGETEX_Args}, {MAKE_CMD("hincrby","Increments the integer value of a field in a hash by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBY_History,0,HINCRBY_Tips,0,hincrbyCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBY_Keyspecs,1,NULL,3),.args=HINCRBY_Args}, {MAKE_CMD("hincrbyfloat","Increments the floating point value of a field by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBYFLOAT_History,0,HINCRBYFLOAT_Tips,0,hincrbyfloatCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBYFLOAT_Keyspecs,1,NULL,3),.args=HINCRBYFLOAT_Args}, {MAKE_CMD("hkeys","Returns all fields in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HKEYS_History,0,HKEYS_Tips,1,hkeysCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HKEYS_Keyspecs,1,NULL,1),.args=HKEYS_Args}, {MAKE_CMD("hlen","Returns the number of fields in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HLEN_History,0,HLEN_Tips,0,hlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HLEN_Keyspecs,1,NULL,1),.args=HLEN_Args}, {MAKE_CMD("hmget","Returns the values of all fields in a hash.","O(N) where N is the number of fields being requested.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HMGET_History,0,HMGET_Tips,0,hmgetCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HMGET_Keyspecs,1,NULL,2),.args=HMGET_Args}, {MAKE_CMD("hmset","Sets the values of multiple fields.","O(N) where N is the number of fields being set.","2.0.0",CMD_DOC_DEPRECATED,"`HSET` with multiple field-value pairs","4.0.0","hash",COMMAND_GROUP_HASH,HMSET_History,0,HMSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HMSET_Keyspecs,1,NULL,2),.args=HMSET_Args}, +{MAKE_CMD("hpersist","Remove the existing expiration on a hash key's field(s).","O(1) for each field assigned with TTL, so O(N) to persist N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPERSIST_History,0,HPERSIST_Tips,0,hpersistCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPERSIST_Keyspecs,1,NULL,2),.args=HPERSIST_Args}, +{MAKE_CMD("hpexpire","Set expiry time on hash object.","O(N) where N is the number of specified fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRE_History,0,HPEXPIRE_Tips,0,hpexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRE_Keyspecs,1,NULL,4),.args=HPEXPIRE_Args}, +{MAKE_CMD("hpexpireat","Set expiration time on hash field.","O(N) where N is the number of specified fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIREAT_History,0,HPEXPIREAT_Tips,0,hpexpireatCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIREAT_Keyspecs,1,NULL,4),.args=HPEXPIREAT_Args}, +{MAKE_CMD("hpexpiretime","Returns the Unix timestamp in milliseconds since Unix epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRETIME_History,0,HPEXPIRETIME_Tips,0,hpexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRETIME_Keyspecs,1,NULL,2),.args=HPEXPIRETIME_Args}, +{MAKE_CMD("hpttl","Returns the remaining time to live in milliseconds of a hash key's field(s) that have an associated expiration.","O(1) for each field assigned with TTL, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPTTL_History,0,HPTTL_Tips,0,hpttlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPTTL_Keyspecs,1,NULL,2),.args=HPTTL_Args}, {MAKE_CMD("hrandfield","Returns one or more random fields from a hash.","O(N) where N is the number of fields returned","6.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HRANDFIELD_History,0,HRANDFIELD_Tips,1,hrandfieldCommand,-2,CMD_READONLY,ACL_CATEGORY_HASH,HRANDFIELD_Keyspecs,1,NULL,2),.args=HRANDFIELD_Args}, {MAKE_CMD("hscan","Iterates over fields and values of a hash.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSCAN_History,0,HSCAN_Tips,1,hscanCommand,-3,CMD_READONLY,ACL_CATEGORY_HASH,HSCAN_Keyspecs,1,NULL,5),.args=HSCAN_Args}, {MAKE_CMD("hset","Creates or modifies the value of a field in a hash.","O(1) for each field/value pair added, so O(N) to add N field/value pairs when the command is called with multiple field/value pairs.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSET_History,1,HSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSET_Keyspecs,1,NULL,2),.args=HSET_Args}, +{MAKE_CMD("hsetex","Set the value of one or more fields of a given hash key, and optionally set their expiration time.","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSETEX_History,0,HSETEX_Tips,0,hsetexCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSETEX_Keyspecs,1,NULL,4),.args=HSETEX_Args}, {MAKE_CMD("hsetnx","Sets the value of a field in a hash only when the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSETNX_History,0,HSETNX_Tips,0,hsetnxCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSETNX_Keyspecs,1,NULL,3),.args=HSETNX_Args}, {MAKE_CMD("hstrlen","Returns the length of the value of a field.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSTRLEN_History,0,HSTRLEN_Tips,0,hstrlenCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HSTRLEN_Keyspecs,1,NULL,2),.args=HSTRLEN_Args}, +{MAKE_CMD("httl","Returns the remaining time to live (in seconds) of a hash key's field(s) that have an associated expiration.","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HTTL_History,0,HTTL_Tips,0,httlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HTTL_Keyspecs,1,NULL,2),.args=HTTL_Args}, {MAKE_CMD("hvals","Returns all values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HVALS_History,0,HVALS_Tips,1,hvalsCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HVALS_Keyspecs,1,NULL,1),.args=HVALS_Args}, /* hyperloglog */ {MAKE_CMD("pfadd","Adds elements to a HyperLogLog key. Creates the key if it doesn't exist.","O(1) to add every element.","2.8.9",CMD_DOC_NONE,NULL,NULL,"hyperloglog",COMMAND_GROUP_HYPERLOGLOG,PFADD_History,0,PFADD_Tips,0,pfaddCommand,-2,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HYPERLOGLOG,PFADD_Keyspecs,1,NULL,2),.args=PFADD_Args}, diff --git a/src/commands/hexpire.json b/src/commands/hexpire.json new file mode 100644 index 0000000000..338fe53dd4 --- /dev/null +++ b/src/commands/hexpire.json @@ -0,0 +1,118 @@ +{ + "HEXPIRE": { + "summary": "Set expiry time on hash fields.", + "complexity": "O(N) where N is the number of specified fields.", + "group": "hash", + "since": "9.0.0", + "arity": -6, + "function": "hexpireCommand", + "command_flags": [ + "WRITE", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of integer codes indicating the result of setting expiry on each specified field, in the same order as the fields are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the HASH, or key does not exist.", + "const": -2 + }, + { + "description": "The specified NX | XX | GT | LT condition has not been met.", + "const": 0 + }, + { + "description": "The expiration time was applied.", + "const": 1 + }, + { + "description": "When called with a 0 second", + "const": 2 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "seconds", + "type": "integer" + }, + { + "name": "condition", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "nx", + "type": "pure-token", + "token": "NX" + }, + { + "name": "xx", + "type": "pure-token", + "token": "XX" + }, + { + "name": "gt", + "type": "pure-token", + "token": "GT" + }, + { + "name": "lt", + "type": "pure-token", + "token": "LT" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} diff --git a/src/commands/hexpireat.json b/src/commands/hexpireat.json new file mode 100644 index 0000000000..995391f0e6 --- /dev/null +++ b/src/commands/hexpireat.json @@ -0,0 +1,120 @@ +{ + "HEXPIREAT": { + "summary": "Set expiry time on hash fields.", + "complexity": "O(N) where N is the number of specified fields.", + "group": "hash", + "since": "9.0.0", + "arity": -6, + "function": "hexpireatCommand", + "command_flags": [ + "WRITE", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of integer codes indicating the result of setting expiry on each specified field, in the same order as the fields are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the HASH, or HASH is empty.", + "const": -2 + }, + { + "description": "The specified NX | XX | GT | LT condition has not been met.", + "const": 0 + }, + { + "description": "The expiration time was applied.", + "const": 1 + }, + { + "description": "When called with a 0 second or is called with a past Unix time in seconds.", + "const": 2 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "unix-time-seconds", + "type": "integer" + }, + { + "name": "condition", + "type": "oneof", + "optional": true, + "since": "9.0.0", + "arguments": [ + { + "name": "nx", + "type": "pure-token", + "token": "NX" + }, + { + "name": "xx", + "type": "pure-token", + "token": "XX" + }, + { + "name": "gt", + "type": "pure-token", + "token": "GT" + }, + { + "name": "lt", + "type": "pure-token", + "token": "LT" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} diff --git a/src/commands/hexpiretime.json b/src/commands/hexpiretime.json new file mode 100644 index 0000000000..82c4d5c70e --- /dev/null +++ b/src/commands/hexpiretime.json @@ -0,0 +1,85 @@ +{ + "HEXPIRETIME": { + "summary": "Returns Unix timestamps in seconds since the epoch at which the given key's field(s) will expire", + "complexity": "O(1) for each field, so O(N) for N items when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "hexpiretimeCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of getting the absolute expiry timestamp of the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the provided hash key, or the hash key is empty.", + "const": -2 + }, + { + "description": "Field exists in the provided hash key, but has no expiration associated with it.", + "const": -1 + }, + { + "description": "The expiration time associated with the hash key field, in seconds.", + "type": "integer", + "minimum": 0 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} diff --git a/src/commands/hgetex.json b/src/commands/hgetex.json new file mode 100644 index 0000000000..ec25c79fa5 --- /dev/null +++ b/src/commands/hgetex.json @@ -0,0 +1,118 @@ +{ + "HGETEX": { + "summary": "Get the value of one or more fields of a given hash key, and optionally set their expiration time or time-to-live (TTL).", + "complexity": "O(1)", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "hgetexCommand", + "command_flags": [ + "WRITE", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "oneOf": [ + { + "description": "List of values associated with the given fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + { + "description": "Key does not exist.", + "type": "null" + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "expiration", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "seconds", + "type": "integer", + "token": "EX" + }, + { + "name": "milliseconds", + "type": "integer", + "token": "PX" + }, + { + "name": "unix-time-seconds", + "type": "unix-time", + "token": "EXAT" + }, + { + "name": "unix-time-milliseconds", + "type": "unix-time", + "token": "PXAT" + }, + { + "name": "persist", + "type": "pure-token", + "token": "PERSIST" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} diff --git a/src/commands/hpersist.json b/src/commands/hpersist.json new file mode 100644 index 0000000000..180d3e9016 --- /dev/null +++ b/src/commands/hpersist.json @@ -0,0 +1,84 @@ +{ + "HPERSIST": { + "summary": "Remove the existing expiration on a hash key's field(s).", + "complexity": "O(1) for each field assigned with TTL, so O(N) to persist N items when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "hpersistCommand", + "command_flags": [ + "WRITE", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of integer codes indicating the result of setting expiry on each specified field, in the same order as the fields are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the provided hash key, or the hash key does not exist.", + "const": -2 + }, + { + "description": "Field exists in the provided hash key, but has no expiration associated with it.", + "const": -1 + }, + { + "description": "The expiration time was removed from the hash key field.", + "const": 1 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} diff --git a/src/commands/hpexpire.json b/src/commands/hpexpire.json new file mode 100644 index 0000000000..0cdec60a3a --- /dev/null +++ b/src/commands/hpexpire.json @@ -0,0 +1,120 @@ +{ + "HPEXPIRE": { + "summary": "Set expiry time on hash object.", + "complexity": "O(N) where N is the number of specified fields.", + "group": "hash", + "since": "9.0.0", + "arity": -6, + "function": "hpexpireCommand", + "command_flags": [ + "WRITE", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of integer codes indicating the result of setting expiry on each specified field, in the same order as the fields are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the HASH, or HASH is empty.", + "const": -2 + }, + { + "description": "The specified NX | XX | GT | LT condition has not been met.", + "const": 0 + }, + { + "description": "The expiration time was applied.", + "const": 1 + }, + { + "description": "When called with a 0 millisecond", + "const": 2 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "milliseconds", + "type": "integer" + }, + { + "name": "condition", + "type": "oneof", + "optional": true, + "since": "9.0.0", + "arguments": [ + { + "name": "nx", + "type": "pure-token", + "token": "NX" + }, + { + "name": "xx", + "type": "pure-token", + "token": "XX" + }, + { + "name": "gt", + "type": "pure-token", + "token": "GT" + }, + { + "name": "lt", + "type": "pure-token", + "token": "LT" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} diff --git a/src/commands/hpexpireat.json b/src/commands/hpexpireat.json new file mode 100644 index 0000000000..a696b3a138 --- /dev/null +++ b/src/commands/hpexpireat.json @@ -0,0 +1,120 @@ +{ + "HPEXPIREAT": { + "summary": "Set expiration time on hash field.", + "complexity": "O(N) where N is the number of specified fields.", + "group": "hash", + "since": "9.0.0", + "arity": -6, + "function": "hpexpireatCommand", + "command_flags": [ + "WRITE", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of integer codes indicating the result of setting expiry on each specified field, in the same order as the fields are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the HASH, or HASH is empty.", + "const": -2 + }, + { + "description": "The specified NX | XX | GT | LT condition has not been met.", + "const": 0 + }, + { + "description": "The expiration time was applied.", + "const": 1 + }, + { + "description": "When called with a 0 second or is called with a past Unix time in milliseconds.", + "const": 2 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "unix-time-milliseconds", + "type": "integer" + }, + { + "name": "condition", + "type": "oneof", + "optional": true, + "since": "9.0.0", + "arguments": [ + { + "name": "nx", + "type": "pure-token", + "token": "NX" + }, + { + "name": "xx", + "type": "pure-token", + "token": "XX" + }, + { + "name": "gt", + "type": "pure-token", + "token": "GT" + }, + { + "name": "lt", + "type": "pure-token", + "token": "LT" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} diff --git a/src/commands/hpexpiretime.json b/src/commands/hpexpiretime.json new file mode 100644 index 0000000000..6a2be6a22f --- /dev/null +++ b/src/commands/hpexpiretime.json @@ -0,0 +1,85 @@ +{ + "HPEXPIRETIME": { + "summary": "Returns the Unix timestamp in milliseconds since Unix epoch at which the given key's field(s) will expire", + "complexity": "O(1) for each field, so O(N) for N items when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "hpexpiretimeCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of getting the absolute expiry timestamp of the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the provided hash key, or the hash key is empty.", + "const": -2 + }, + { + "description": "Field exists in the provided hash key, but has no expiration associated with it.", + "const": -1 + }, + { + "description": "The expiration time associated with the hash key field, in milliseconds.", + "type": "integer", + "minimum": 0 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} diff --git a/src/commands/hpttl.json b/src/commands/hpttl.json new file mode 100644 index 0000000000..f1c7da24c7 --- /dev/null +++ b/src/commands/hpttl.json @@ -0,0 +1,85 @@ +{ + "HPTTL": { + "summary": "Returns the remaining time to live in milliseconds of a hash key's field(s) that have an associated expiration.", + "complexity": "O(1) for each field assigned with TTL, so O(N) for N items when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "hpttlCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of getting the remaining time-to-live of the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the provided hash key, or the hash key is empty", + "const": -2 + }, + { + "description": "Field exists in the provided hash key, but has no expiration associated with it.", + "const": -1 + }, + { + "description": "The expiration time associated with the hash key field, in milliseconds.", + "type": "integer", + "minimum": 0 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} diff --git a/src/commands/hsetex.json b/src/commands/hsetex.json new file mode 100644 index 0000000000..7e1df6ead0 --- /dev/null +++ b/src/commands/hsetex.json @@ -0,0 +1,135 @@ +{ + "HSETEX": { + "summary": "Set the value of one or more fields of a given hash key, and optionally set their expiration time.", + "complexity": "O(1)", + "group": "hash", + "since": "9.0.0", + "arity": -6, + "function": "hsetexCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "INSERT" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "oneOf": [ + { + "description": "None of the provided fields value and or expiration time was set.", + "const": 0 + }, + { + "description": "All the fields value and or expiration time was set.", + "const": 1 + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields-condition", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "fnx", + "type": "pure-token", + "token": "FNX" + }, + { + "name": "fxx", + "type": "pure-token", + "token": "FXX" + } + ] + }, + { + "name": "expiration", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "seconds", + "type": "integer", + "token": "EX" + }, + { + "name": "milliseconds", + "type": "integer", + "token": "PX" + }, + { + "name": "unix-time-seconds", + "type": "unix-time", + "token": "EXAT" + }, + { + "name": "unix-time-milliseconds", + "type": "unix-time", + "token": "PXAT" + }, + { + "name": "keepttl", + "type": "pure-token", + "token": "KEEPTTL" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "data", + "type": "block", + "multiple": true, + "arguments": [ + { + "name": "field", + "type": "string" + }, + { + "name": "value", + "type": "string" + } + ] + } + ] + } + ] + } +} diff --git a/src/commands/httl.json b/src/commands/httl.json new file mode 100644 index 0000000000..6d3ab789a7 --- /dev/null +++ b/src/commands/httl.json @@ -0,0 +1,85 @@ +{ + "HTTL": { + "summary": "Returns the remaining time to live (in seconds) of a hash key's field(s) that have an associated expiration.", + "complexity": "O(1) for each field, so O(N) for N items when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "httlCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of getting the remaining time-to-live of the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the provided hash key, or the hash key is empty", + "const": -2 + }, + { + "description": "Field exists in the provided hash key, but has no expiration associated with it.", + "const": -1 + }, + { + "description": "The expiration time associated with the hash key field, in seconds.", + "type": "integer", + "minimum": 0 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} diff --git a/src/db.c b/src/db.c index a892df0f2d..effff5f1bf 100644 --- a/src/db.c +++ b/src/db.c @@ -35,6 +35,7 @@ #include "io_threads.h" #include "module.h" #include "vector.h" +#include "expire.h" #include #include @@ -43,17 +44,6 @@ * C-level DB API *----------------------------------------------------------------------------*/ -/* Flags for expireIfNeeded */ -#define EXPIRE_FORCE_DELETE_EXPIRED 1 -#define EXPIRE_AVOID_DELETE_EXPIRED 2 - -/* Return values for expireIfNeeded */ -typedef enum { - KEY_VALID = 0, /* Could be volatile and not yet expired, non-volatile, or even non-existing key. */ - KEY_EXPIRED, /* Logically expired but not yet deleted. */ - KEY_DELETED /* The key was deleted now. */ -} keyStatus; - static keyStatus expireIfNeededWithDictIndex(serverDb *db, robj *key, robj *val, int flags, int dict_index); static keyStatus expireIfNeeded(serverDb *db, robj *key, robj *val, int flags); static int keyIsExpiredWithDictIndex(serverDb *db, robj *key, int dict_index); @@ -125,7 +115,7 @@ robj *lookupKey(serverDb *db, robj *key, int flags) { /* Update the access time for the ageing algorithm. * Don't do it if we have a saving child, as this will trigger * a copy on write madness. */ - if (server.current_client && server.current_client->flag.no_touch && + if (server.current_client && server.current_client->flag.no_touch && server.executing_client && server.executing_client->cmd->proc != touchCommand) flags |= LOOKUP_NOTOUCH; if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)) { @@ -1004,9 +994,9 @@ void hashtableScanCallback(void *privdata, void *entry) { key = node->ele; /* zset data is copied after filtering by key */ } else if (o->type == OBJ_HASH) { - key = hashTypeEntryGetField(entry); + key = entryGetField(entry); if (!data->only_keys) { - val = hashTypeEntryGetValue(entry); + val = entryGetValue(entry); } } else { serverPanic("Type not handled in hashtable SCAN callback."); @@ -1900,16 +1890,6 @@ void propagateDeletion(serverDb *db, robj *key, int lazy) { server.replication_allowed = prev_replication_allowed; } -/* Returns 1 if the expire value is expired, 0 otherwise. */ -static int timestampIsExpired(mstime_t when) { - if (when < 0) return 0; /* no expire */ - mstime_t now = commandTimeSnapshot(); - - /* The key expired if the current (virtual or real) time is greater - * than the expire time of the key. */ - return now > when; -} - /* Use this instead of keyIsExpired if you already have the value object. */ static int objectIsExpired(robj *val) { /* Don't expire anything while loading. It will be done later. */ @@ -1925,7 +1905,7 @@ static int keyIsExpiredWithDictIndexImpl(serverDb *db, robj *key, int dict_index /* Don't expire anything while loading. It will be done later. */ if (server.loading) return 0; mstime_t when = getExpireWithDictIndex(db, key, dict_index); - return timestampIsExpired(when); + return timestampIsExpired(when) ? 1 : 0; } /* Check if the key is expired. */ @@ -1953,52 +1933,11 @@ static keyStatus expireIfNeededWithDictIndex(serverDb *db, robj *key, robj *val, } else { if (!keyIsExpiredWithDictIndexImpl(db, key, dict_index)) return KEY_VALID; } - - /* If we are running in the context of a replica, instead of - * evicting the expired key from the database, we return ASAP: - * the replica key expiration is controlled by the primary that will - * send us synthesized DEL operations for expired keys. The - * exception is when write operations are performed on writable - * replicas. - * - * Still we try to return the right information to the caller, - * that is, KEY_VALID if we think the key should still be valid, - * KEY_EXPIRED if we think the key is expired but don't want to delete it at this time. - * - * When replicating commands from the primary, keys are never considered - * expired. */ - if (server.primary_host != NULL) { - if (server.current_client && (server.current_client->flag.primary)) return KEY_VALID; - if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return KEY_EXPIRED; - } else if (server.import_mode) { - /* If we are running in the import mode on a primary, instead of - * evicting the expired key from the database, we return ASAP: - * the key expiration is controlled by the import source that will - * send us synthesized DEL operations for expired keys. The - * exception is when write operations are performed on this server - * because it's a primary. - * - * Notice: other clients, apart from the import source, should not access - * the data imported by import source. - * - * Still we try to return the right information to the caller, - * that is, KEY_VALID if we think the key should still be valid, - * KEY_EXPIRED if we think the key is expired but don't want to delete it at this time. - * - * When receiving commands from the import source, keys are never considered - * expired. */ - if (server.current_client && (server.current_client->flag.import_source)) return KEY_VALID; - if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return KEY_EXPIRED; - } - - /* In some cases we're explicitly instructed to return an indication of a - * missing key without actually deleting it, even on primaries. */ - if (flags & EXPIRE_AVOID_DELETE_EXPIRED) return KEY_EXPIRED; - - /* If 'expire' action is paused, for whatever reason, then don't expire any key. - * Typically, at the end of the pause we will properly expire the key OR we - * will have failed over and the new primary will send us the expire. */ - if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return KEY_EXPIRED; + expirationPolicy policy = getExpirationPolicyWithFlags(flags); + if (policy == POLICY_IGNORE_EXPIRE) /* Ignore keys expiration. treat all keys as valid. */ + return KEY_VALID; + else if (policy == POLICY_KEEP_EXPIRED) /* Treat expired keys as invalid, but do not delete them. */ + return KEY_EXPIRED; /* The key needs to be converted from static to heap before deleted */ int static_key = key->refcount == OBJ_STATIC_REFCOUNT; diff --git a/src/defrag.c b/src/defrag.c index 9ea8a10741..8eb0e32acc 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -39,6 +39,7 @@ */ #include "server.h" +#include "entry.h" #include "hashtable.h" #include "eval.h" #include "script.h" @@ -442,18 +443,27 @@ static void scanLaterSet(robj *ob, unsigned long *cursor) { } /* Hashtable scan callback for hash datatype */ -static void activeDefragHashTypeEntry(void *privdata, void *element_ref) { - UNUSED(privdata); - hashTypeEntry **entry_ref = (hashTypeEntry **)element_ref; - - hashTypeEntry *new_entry = hashTypeEntryDefrag(*entry_ref, activeDefragAlloc, activeDefragSds); - if (new_entry) *entry_ref = new_entry; +static void activeDefragEntry(void *privdata, void *element_ref) { + entry **entry_ref = (entry **)element_ref; + entry *old_entry = *entry_ref, *new_entry = NULL; + long long old_expiry = entryGetExpiry(old_entry); + + new_entry = entryDefrag(*entry_ref, activeDefragAlloc, activeDefragSds); + if (new_entry) { + /* In case the entry is tracked we need to update it in the volatile set */ + if (entryHasExpiry(new_entry)) { + robj *obj = (robj *)privdata; + serverAssert(obj); + hashTypeTrackUpdateEntry(obj, old_entry, new_entry, old_expiry, entryGetExpiry(new_entry)); + } + *entry_ref = new_entry; + } } static void scanLaterHash(robj *ob, unsigned long *cursor) { serverAssert(ob->type == OBJ_HASH && ob->encoding == OBJ_ENCODING_HASHTABLE); hashtable *ht = ob->ptr; - *cursor = hashtableScanDefrag(ht, *cursor, activeDefragHashTypeEntry, NULL, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); + *cursor = hashtableScanDefrag(ht, *cursor, activeDefragEntry, ob, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); } static void defragQuicklist(robj *ob) { @@ -498,7 +508,7 @@ static void defragHash(robj *ob) { } else { unsigned long cursor = 0; do { - cursor = hashtableScanDefrag(ht, cursor, activeDefragHashTypeEntry, NULL, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); + cursor = hashtableScanDefrag(ht, cursor, activeDefragEntry, ob, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); } while (cursor != 0); } /* defrag the hashtable struct and tables */ diff --git a/src/entry.c b/src/entry.c new file mode 100644 index 0000000000..097a36387c --- /dev/null +++ b/src/entry.c @@ -0,0 +1,410 @@ +#include +#include "server.h" +#include "serverassert.h" +#include "entry.h" + +#include + +/*----------------------------------------------------------------------------- + * Entry API + *----------------------------------------------------------------------------*/ + +/* The entry pointer is the field sds. We encode the entry layout type + * in the field SDS header. Field type SDS_TYPE_5 doesn't have any spare bits to + * encode this so we use it only for the first layout type. + * + * Entry with embedded value, used for small sizes. The value is stored as + * SDS_TYPE_8. The field can use any SDS type. + * + * Entry can also have expiration timestamp, which is the UNIX timestamp for it to be expired. + * For aligned fast access, we keep the expiry timestamp prior to the start of the sds header. + * + * +--------------+--------------+---------------+ + * | Expiration | field | value | + * | 1234567890LL | hdr "foo" \0 | hdr8 "bar" \0 | + * +--------------+--------------+---------------+ + * + * Entry with value pointer, used for larger fields and values. The field is SDS + * type 8 or higher. + * + * +--------------+-------+--------------+ + * | Expiration | value | field | + * | 1234567890LL | ptr | hdr "foo" \0 | + * +--------------+---^---+--------------+ + * | + * | + * value pointer = value sds + */ + +enum { + /* SDS aux flag. If set, it indicates that the entry has TTL metadata set. */ + FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY = 0, + /* SDS aux flag. If set, it indicates that the entry has an embedded value + * pointer located in memory before the embedded field. If unset, the entry + * instead has an embedded value located after the embedded field. */ + FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR = 1, + FIELD_SDS_AUX_BIT_MAX +}; +static_assert(FIELD_SDS_AUX_BIT_MAX < sizeof(char) - SDS_TYPE_BITS, "too many sds bits are used for entry metadata"); + +/* Returns true in case the entry's value is not embedded in the entry. + * Returns false otherwise. */ +static inline bool entryHasValuePtr(const entry *entry) { + return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR); +} + +/* Returns true in case the entry's value is embedded in the entry. + * Returns false otherwise. */ +bool entryHasEmbeddedValue(entry *entry) { + return (!entryHasValuePtr(entry)); +} + +/* Returns true in case the entry has expiration timestamp. + * Returns false otherwise. */ +bool entryHasExpiry(const entry *entry) { + return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY); +} + +/* The entry pointer is the field sds, but that's an implementation detail. */ +sds entryGetField(const entry *entry) { + return (sds)entry; +} + +/* Returns the location of a pointer to a separately allocated value. Only for + * an entry without an embedded value. */ +static sds *entryGetValueRef(const entry *entry) { + serverAssert(entryHasValuePtr(entry)); + char *field_data = sdsAllocPtr(entry); + field_data -= sizeof(sds); + return (sds *)field_data; +} + +/* Returns the sds of the entry's value. */ +sds entryGetValue(const entry *entry) { + if (entryHasValuePtr(entry)) { + return *entryGetValueRef(entry); + } else { + /* Skip field content, field null terminator and value sds8 hdr. */ + size_t offset = sdslen(entry) + 1 + sdsHdrSize(SDS_TYPE_8); + return (char *)entry + offset; + } +} + +/* Modify the value of this entry and return a pointer to the (potentially new) entry. + * The value is taken by the function and cannot be reused after this function returns. */ +entry *entrySetValue(entry *e, sds value) { + if (entryHasValuePtr(e)) { + sds *value_ref = entryGetValueRef(e); + sdsfree(*value_ref); + *value_ref = value; + return e; + } else { + entry *new_entry = entryUpdate(e, value, entryGetExpiry(e)); + return new_entry; + } +} + +/* Returns the address of the entry allocation. */ +void *entryGetAllocPtr(const entry *entry) { + char *buf = sdsAllocPtr(entry); + if (entryHasValuePtr(entry)) buf -= sizeof(sds); + if (entryHasExpiry(entry)) buf -= sizeof(long long); + return buf; +} + +/**************************************** Entry Expiry API *****************************************/ + +/* Returns the entry expiration timestamp. + * In case this entry has no expiration time, will return EXPIRE_NONE. */ +long long entryGetExpiry(const entry *entry) { + long long expiry = EXPIRY_NONE; + if (entryHasExpiry(entry)) { + char *buf = entryGetAllocPtr(entry); + debugServerAssert((((uintptr_t)buf & 0x7) == 0)); /* Test that the allocation is indeed 8 bytes aligned + * This is needed since we access the expiry as with pointer casting + * which require the access to be 8 bytes aligned. */ + expiry = *(long long *)buf; + } + return expiry; +} + +/* Modify the expiration time of this entry and return a pointer to the (potentially new) entry. */ +entry *entrySetExpiry(entry *e, long long expiry) { + if (entryHasExpiry(e)) { + char *buf = entryGetAllocPtr(e); + debugServerAssert((((uintptr_t)buf & 0x7) == 0)); /* Test that the allocation is indeed 8 bytes aligned + * This is needed since we access the expiry as with pointer casting + * which require the access to be 8 bytes aligned. */ + *(long long *)buf = expiry; + return e; + } + entry *new_entry = entryUpdate(e, NULL, expiry); + return new_entry; +} + +/* Return true in case the entry has assigned expiration or false otherwise. */ +bool entryIsExpired(entry *entry) { + return timestampIsExpired(entryGetExpiry(entry)); +} +/**************************************** Entry Expiry API - End *****************************************/ + +void entryFree(entry *entry) { + if (entryHasValuePtr(entry)) { + sdsfree(entryGetValue(entry)); + } + zfree(entryGetAllocPtr(entry)); +} + +static inline size_t entryReqSize(const_sds field, + sds value, + long long expiry, + bool *is_value_embedded, + int *field_sds_type, + size_t *field_size, + size_t *expiry_size, + size_t *embedded_value_size) { + size_t expiry_alloc_size = (expiry == EXPIRY_NONE) ? 0 : sizeof(long long); + size_t field_len = sdslen(field); + int embedded_field_sds_type = sdsReqType(field_len); + if (embedded_field_sds_type == SDS_TYPE_5 && (expiry_alloc_size > 0)) { + embedded_field_sds_type = SDS_TYPE_8; + } + size_t field_alloc_size = sdsReqSize(field_len, embedded_field_sds_type); + size_t value_len = value ? sdslen(value) : 0; + size_t embedded_value_alloc_size = value ? sdsReqSize(value_len, SDS_TYPE_8) : 0; + size_t alloc_size = field_alloc_size + expiry_alloc_size; + bool embed_value = false; + if (value) { + if (alloc_size + embedded_value_alloc_size <= EMBED_VALUE_MAX_ALLOC_SIZE) { + /* Embed field and value. Value is fixed to SDS_TYPE_8. Unused + * allocation space is recorded in the embedded value's SDS header. + * + * +------+--------------+---------------+ + * | TTL | field | value | + * | | hdr "foo" \0 | hdr8 "bar" \0 | + * +------+--------------+---------------+ + */ + embed_value = true; + alloc_size += embedded_value_alloc_size; + } else { + /* Embed field, but not value. Field must be >= SDS_TYPE_8 to encode to + * indicate this type of entry. + * + * +------+-------+---------------+ + * | TTL | value | field | + * | | ptr | hdr8 "foo" \0 | + * +------+-------+---------------+ + */ + embed_value = false; + alloc_size += sizeof(sds); + if (embedded_field_sds_type == SDS_TYPE_5) { + embedded_field_sds_type = SDS_TYPE_8; + alloc_size -= field_alloc_size; + field_alloc_size = sdsReqSize(field_len, embedded_field_sds_type); + alloc_size += field_alloc_size; + } + } + } + if (expiry_size) *expiry_size = expiry_alloc_size; + if (field_sds_type) *field_sds_type = embedded_field_sds_type; + if (field_size) *field_size = field_alloc_size; + if (embedded_value_size) *embedded_value_size = embedded_value_alloc_size; + if (is_value_embedded) *is_value_embedded = embed_value; + + return alloc_size; +} + +/* Serialize the content of the entry into the provided buffer buf. Make use of the provided arguments provided by a call to entryReqSize. + * Note that this function will take ownership of the value so user should not assume it is valid after this call. */ +static entry *entryWrite(char *buf, + size_t buf_size, + const_sds field, + sds value, + long long expiry, + bool embed_value, + int embedded_field_sds_type, + size_t embedded_field_sds_size, + size_t embedded_value_sds_size, + size_t expiry_size) { + /* Set The expiry if exists */ + if (expiry_size) { + *(long long *)buf = expiry; + buf += expiry_size; + buf_size -= expiry_size; + } + if (value) { + if (!embed_value) { + *(sds *)buf = value; + buf += sizeof(sds); + buf_size -= sizeof(sds); + } else { + sdswrite(buf + embedded_field_sds_size, buf_size - embedded_field_sds_size, SDS_TYPE_8, value, sdslen(value)); + sdsfree(value); + buf_size -= embedded_value_sds_size; + } + } + /* Set the field data */ + entry *new_entry = sdswrite(buf, embedded_field_sds_size, embedded_field_sds_type, field, sdslen(field)); + + /* Field sds aux bits are zero, which we use for this entry encoding. */ + sdsSetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, embed_value ? 0 : 1); + sdsSetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY, expiry_size > 0 ? 1 : 0); + + /* Check that the new entry was built correctly */ + debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1)); + debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0)); + return new_entry; +} + +/* Takes ownership of value. does not take ownership of field */ +entry *entryCreate(const_sds field, sds value, long long expiry) { + bool embed_value = false; + int embedded_field_sds_type; + size_t expiry_size, embedded_value_sds_size, embedded_field_sds_size; + size_t alloc_size = entryReqSize(field, value, expiry, &embed_value, &embedded_field_sds_type, &embedded_field_sds_size, &expiry_size, &embedded_value_sds_size); + size_t buf_size; + + /* allocate the buffer */ + char *buf = zmalloc_usable(alloc_size, &buf_size); + + return entryWrite(buf, buf_size, field, value, expiry, embed_value, embedded_field_sds_type, embedded_field_sds_size, embedded_value_sds_size, expiry_size); +} + +/* Modify the entry's value and/or expiration time. + * In case the provided value is NULL, will use the existing value. + * Note that the value ownership is moved to this function and the caller should assume the + * value is no longer usable after calling this function. */ +entry *entryUpdate(entry *e, sds value, long long expiry) { + sds field = (sds)e; + entry *new_entry = NULL; + + bool update_value = value ? true : false; + long long curr_expiration_time = entryGetExpiry(e); + bool update_expiry = (expiry != curr_expiration_time) ? true : false; + /* Just a sanity check. If nothing changes, lets just return */ + if (!update_value && !update_expiry) + return e; + + if (!value) value = entryGetValue(e); + bool embed_value = false; + int embedded_field_sds_type; + size_t expiry_size, embedded_value_size, embedded_field_size; + size_t required_entry_size = entryReqSize(field, value, expiry, &embed_value, &embedded_field_sds_type, &embedded_field_size, &expiry_size, &embedded_value_size); + size_t current_embedded_allocation_size = entryHasValuePtr(e) ? 0 : entryMemUsage(e); + + bool expiry_add_remove = update_expiry && (curr_expiration_time == EXPIRY_NONE || expiry == EXPIRY_NONE); // In case we are toggling expiration + bool value_change_encoding = update_value && (embed_value != entryHasEmbeddedValue(e)); // In case we change the way value is embedded or not + + + /* We will create a new entry in the following cases: + * 1. In the case were we add or remove expiration. + * 2. We change the way value is encoded + * 3. in the case were we are NOT migrating from an embedded entry to an embedded entry with ~the same size. */ + bool create_new_entry = (expiry_add_remove) || (value_change_encoding) || + (update_value && entryHasEmbeddedValue(e) && + !(required_entry_size <= EMBED_VALUE_MAX_ALLOC_SIZE && + required_entry_size <= current_embedded_allocation_size && + required_entry_size >= current_embedded_allocation_size * 3 / 4)); + + if (!create_new_entry) { + /* In this case we are sure we do not have to allocate new entry, so expiry must already be set. */ + if (update_expiry) { + serverAssert(entryHasExpiry(e)); + char *buf = entryGetAllocPtr(e); + *(long long *)buf = expiry; + } + /* In this case we are sure we do not have to allocate new entry, so value must already be set or we have enough room to embed it. */ + if (update_value) { + if (entryHasValuePtr(e)) { + sds *value_ref = entryGetValueRef(e); + sdsfree(*value_ref); + *value_ref = value; + } else { + /* Skip field content, field null terminator and value sds8 hdr. */ + sds old_value = entryGetValue(e); + /* We are using the same entry memory in order to store a potentially new value. + * In such cases the old value alloc was adjusted to the real buffer size part it was embedded to. + * Since we can potentially write here a smaller value, which requires less allocation space, we would like to + * inherit the old value memory allocation size. */ + size_t value_size = sdsHdrSize(SDS_TYPE_8) + sdsalloc(old_value) + 1; + sdswrite(sdsAllocPtr(old_value), value_size, SDS_TYPE_8, value, sdslen(value)); + sdsfree(value); + } + } + new_entry = e; + + } else { + if (!update_value) { + /* Check if the value can be reused. */ + int value_was_embedded = !entryHasValuePtr(e); + /* In case the original entry value is embedded WE WILL HAVE TO DUPLICATE IT + * if not we have to duplicate it, remove it from the original entry since we are going to delete it.*/ + if (value_was_embedded) { + value = sdsdup(value); + } else { + sds *value_ref = entryGetValueRef(e); + *value_ref = NULL; + } + } + /* allocate the buffer for a new entry */ + size_t buf_size; + char *buf = zmalloc_usable(required_entry_size, &buf_size); + new_entry = entryWrite(buf, buf_size, entryGetField(e), value, expiry, embed_value, embedded_field_sds_type, embedded_field_size, embedded_value_size, expiry_size); + debugServerAssert(new_entry != e); + entryFree(e); + } + /* Check that the new entry was built correctly */ + debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1)); + debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0)); + serverAssert(new_entry); + return new_entry; +} + +/* Returns memory usage of a entry, including all allocations owned by + * the entry. */ +size_t entryMemUsage(entry *entry) { + size_t mem = 0; + + if (entryHasValuePtr(entry)) { + /* In case the value is not embedded we might not be able to sum all the allocation sizes since the field + * header could be too small for holding the real allocation size. */ + mem += zmalloc_usable_size(entryGetAllocPtr(entry)); + } else { + mem += sdsReqSize(sdslen(entry), sdsType(entry)); + if (entryHasExpiry(entry)) mem += sizeof(long long); + } + mem += sdsAllocSize(entryGetValue(entry)); + return mem; +} + +/* Defragments a hashtable entry (field-value pair) if needed, using the + * provided defrag functions. The defrag functions return NULL if the allocation + * was not moved, otherwise they return a pointer to the new memory location. + * A separate sds defrag function is needed because of the unique memory layout + * of sds strings. + * If the location of the entry changed we return the new location, + * otherwise we return NULL. */ +entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)) { + if (entryHasValuePtr(entry)) { + sds *value_ref = entryGetValueRef(entry); + sds new_value = sdsdefragfn(*value_ref); + if (new_value) *value_ref = new_value; + } + char *allocation = entryGetAllocPtr(entry); + char *new_allocation = defragfn(allocation); + if (new_allocation != NULL) { + /* Return the same offset into the new allocation as the entry's offset + * in the old allocation. */ + return new_allocation + ((char *)entry - allocation); + } + return NULL; +} + +/* Used for releasing memory to OS to avoid unnecessary CoW. Called when we've + * forked and memory won't be used again. See zmadvise_dontneed() */ +void entryDismissMemory(entry *entry) { + /* Only dismiss values memory since the field size usually is small. */ + if (entryHasValuePtr(entry)) { + dismissSds(*entryGetValueRef(entry)); + } +} diff --git a/src/entry.h b/src/entry.h new file mode 100644 index 0000000000..f23f3dfc7b --- /dev/null +++ b/src/entry.h @@ -0,0 +1,94 @@ +#ifndef _ENTRY_H_ +#define _ENTRY_H_ + +#include "sds.h" +#include + +/*----------------------------------------------------------------------------- + * Entry + *----------------------------------------------------------------------------*/ + +/* + * The entry pointer is the field `sds`. We encode the entry layout type + * in the SDS header. + * + * An entry represents a key–value pair with an optional expiration timestamp. + * The pointer of type `entry *` always points to the VALUE `sds`. + * + * Layout 1: Embedded Field and Value (Compact Form) + * + * +-------------------+-------------------+-------------------+ + * | Expiration (opt) | Field (sds) | Value (sds) | + * | 8 bytes (int64_t) | "field" + header | "value" + header | + * +-------------------+-------------------+-------------------+ + * ^ + * | + * entry pointer + * + * - Both field and value are small and embedded. + * - The expiration is stored just before the first sds. + * + * + * Layout 2: Pointer-Based Value (Large Values) + * + * +-------------------+-------------------+------------------+ + * | Expiration (opt) | Value pointer | Field (sds) | + * | 8 bytes (int64_t) | 8 bytes (void *) | "field" + header | + * +-------------------+-------------------+------------------+ + * ^ + * | + * entry pointer + * + * - The value is stored separately via a pointer. + * - Used for large value sizes. */ +typedef void entry; + +/* The maximum allocation size we want to use for entries with embedded + * values. */ +#define EMBED_VALUE_MAX_ALLOC_SIZE 128 + +/* Returns the field string (sds) from the entry. */ +sds entryGetField(const entry *entry); + +/* Returns the value string (sds) from the entry. */ +sds entryGetValue(const entry *entry); + +/* Sets or replaces the value string in the entry. May reallocate and return a new pointer. */ +entry *entrySetValue(entry *entry, sds value); + +/* Gets the expiration timestamp (UNIX time in milliseconds). */ +long long entryGetExpiry(const entry *entry); + +/* Returns true if the entry has an expiration timestamp set. */ +bool entryHasExpiry(const entry *entry); + +/* Sets the expiration timestamp. */ +entry *entrySetExpiry(entry *entry, long long expiry); + +/* Returns true if the entry is expired compared to current system time (commandTimeSnapshot). */ +bool entryIsExpired(entry *entry); + +/* Frees the memory used by the entry (including field/value). */ +void entryFree(entry *entry); + +/* Creates a new entry with the given field, value, and optional expiry. */ +entry *entryCreate(const_sds field, sds value, long long expiry); + +/* Updates the value and/or expiry of an existing entry. + * In case value is NULL, will use the existing entry value. + * In case expiry is EXPIRE_NONE, will use the existing entry expiration time. */ +entry *entryUpdate(entry *entry, sds value, long long expiry); + +/* Returns the total memory used by the entry (in bytes). */ +size_t entryMemUsage(entry *entry); + +/* Defragments the entry and returns the new pointer (if moved). */ +entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)); + +/* Advises allocator to dismiss memory used by entry. */ +void entryDismissMemory(entry *entry); + +/* Internal used for debug. No need to use this function except in tests */ +bool entryHasEmbeddedValue(entry *entry); + +#endif diff --git a/src/expire.c b/src/expire.c index 1514cdb03d..b93acf57dd 100644 --- a/src/expire.c +++ b/src/expire.c @@ -537,23 +537,19 @@ int checkAlreadyExpired(long long when) { return (when <= commandTimeSnapshot() && !server.loading && !server.primary_host && !server.import_mode); } -#define EXPIRE_NX (1 << 0) -#define EXPIRE_XX (1 << 1) -#define EXPIRE_GT (1 << 2) -#define EXPIRE_LT (1 << 3) - -/* Parse additional flags of expire commands +/* Parse additional flags of expire commands up to the specify max_index. + * In case max_index will scan all arguments. * * Supported flags: * - NX: set expiry only when the key has no expiry * - XX: set expiry only when the key has an existing expiry * - GT: set expiry only when the new expiry is greater than current one * - LT: set expiry only when the new expiry is less than current one */ -int parseExtendedExpireArgumentsOrReply(client *c, int *flags) { +int parseExtendedExpireArgumentsOrReply(client *c, int *flags, int max_args) { int nx = 0, xx = 0, gt = 0, lt = 0; int j = 3; - while (j < c->argc) { + while (j < max_args) { char *opt = c->argv[j]->ptr; if (!strcasecmp(opt, "nx")) { *flags |= EXPIRE_NX; @@ -587,6 +583,32 @@ int parseExtendedExpireArgumentsOrReply(client *c, int *flags) { return C_OK; } +int convertExpireArgumentToUnixTime(client *c, robj *arg, long long basetime, int unit, long long *unixtime) { + long long when; + if (getLongLongFromObjectOrReply(c, arg, &when, NULL) != C_OK) return C_ERR; + + if (when < 0) { + addReplyErrorExpireTime(c); + return C_ERR; + } + + if (unit == UNIT_SECONDS) { + if (when > LLONG_MAX / 1000 || when < LLONG_MIN / 1000) { + addReplyErrorExpireTime(c); + return C_ERR; + } + when *= 1000; + } + if (when > LLONG_MAX - basetime) { + addReplyErrorExpireTime(c); + return C_ERR; + } + when += basetime; + debugServerAssert(unixtime); + *unixtime = when; + return C_OK; +} + /*----------------------------------------------------------------------------- * Expires Commands *----------------------------------------------------------------------------*/ @@ -607,7 +629,7 @@ void expireGenericCommand(client *c, long long basetime, int unit) { int flag = 0; /* checking optional flags */ - if (parseExtendedExpireArgumentsOrReply(c, &flag) != C_OK) { + if (parseExtendedExpireArgumentsOrReply(c, &flag, c->argc) != C_OK) { return; } @@ -795,3 +817,66 @@ void touchCommand(client *c) { if (lookupKeyRead(c->db, c->argv[j]) != NULL) touched++; addReplyLongLong(c, touched); } + +/* Returns 1 if the expire value is expired, 0 otherwise. */ +bool timestampIsExpired(mstime_t when) { + if (when < 0) return false; /* no expire */ + mstime_t now = commandTimeSnapshot(); + + /* The time indicated by 'when' is considered expired if the current (virtual or real) time is greater + * than it. */ + return now > when; +} + +/* This function verifies if the current conditions allow expiration of keys and fields. + * For some cases expiration is not allowed, but we would still like to ignore the key + * so to treat it as "expired" without actively deleting it. */ +expirationPolicy getExpirationPolicyWithFlags(int flags) { + if (server.loading) return POLICY_IGNORE_EXPIRE; + + /* If we are running in the context of a replica, instead of + * evicting the expired key from the database, we return ASAP: + * the replica key expiration is controlled by the primary that will + * send us synthesized DEL operations for expired keys. The + * exception is when write operations are performed on writable + * replicas. + * + * Still we try to reflect the correct state to the caller, + * that is, POLICY_KEEP_EXPIRED so that the key will be ignored, but not deleted. + * + * When replicating commands from the primary, keys are never considered + * expired, so we return POLICY_IGNORE_EXPIRE */ + if (server.primary_host != NULL) { + if (server.current_client && (server.current_client->flag.primary)) return POLICY_IGNORE_EXPIRE; + if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return POLICY_KEEP_EXPIRED; + } else if (server.import_mode) { + /* If we are running in the import mode on a primary, instead of + * evicting the expired key from the database, we return ASAP: + * the key expiration is controlled by the import source that will + * send us synthesized DEL operations for expired keys. The + * exception is when write operations are performed on this server + * because it's a primary. + * + * Notice: other clients, apart from the import source, should not access + * the data imported by import source. + * + * Still we try to reflect the correct state to the caller, + * that is, POLICY_KEEP_EXPIRED so that the key will be ignored, but not deleted. + * + * When receiving commands from the import source, keys are never considered + * expired, so we return POLICY_IGNORE_EXPIRE */ + if (server.current_client && (server.current_client->flag.import_source)) return POLICY_IGNORE_EXPIRE; + if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return POLICY_KEEP_EXPIRED; + } + + /* In some cases we're explicitly instructed to return an indication of a + * missing key without actually deleting it, even on primaries. */ + if (flags & EXPIRE_AVOID_DELETE_EXPIRED) return POLICY_KEEP_EXPIRED; + + /* If 'expire' action is paused, for whatever reason, then don't expire any key. + * Typically, at the end of the pause we will properly expire the key OR we + * will have failed over and the new primary will send us the expire. */ + if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return POLICY_KEEP_EXPIRED; + + return POLICY_DELETE_EXPIRED; +} diff --git a/src/expire.h b/src/expire.h new file mode 100644 index 0000000000..11ef9d9c10 --- /dev/null +++ b/src/expire.h @@ -0,0 +1,47 @@ +#ifndef EXPIRE_H +#define EXPIRE_H + +#include +#include +#include "monotonic.h" + +/* Special Expiry values */ +#define EXPIRY_NONE -1 + +/* Flags for expireIfNeeded */ +#define EXPIRE_FORCE_DELETE_EXPIRED 1 +#define EXPIRE_AVOID_DELETE_EXPIRED 2 + +#define ACTIVE_EXPIRE_CYCLE_SLOW 0 +#define ACTIVE_EXPIRE_CYCLE_FAST 1 + +/* Command flags for items expiration update conditions */ +#define EXPIRE_NX (1 << 0) +#define EXPIRE_XX (1 << 1) +#define EXPIRE_GT (1 << 2) +#define EXPIRE_LT (1 << 3) + +/* Return values for expireIfNeeded */ +typedef enum { + KEY_VALID = 0, /* Could be volatile and not yet expired, non-volatile, or even nonexistent key. */ + KEY_EXPIRED, /* Logically expired but not yet deleted. */ + KEY_DELETED /* The key was deleted now. */ +} keyStatus; + +/* Return value for getExpirationPolicy */ +typedef enum { + POLICY_IGNORE_EXPIRE, /* Ignore expiration time of items and treat them as valid. */ + POLICY_KEEP_EXPIRED, /* Ignore items which are expired but do not actively delete them. */ + POLICY_DELETE_EXPIRED /* Delete expired keys on access. */ +} expirationPolicy; + +/* Forward declarations */ +typedef struct client client; +typedef struct serverObject robj; + +bool timestampIsExpired(mstime_t when); +expirationPolicy getExpirationPolicyWithFlags(int flags); +int parseExtendedExpireArgumentsOrReply(client *c, int *flags, int max_args); +int convertExpireArgumentToUnixTime(client *c, robj *arg, long long basetime, int unit, long long *unixtime); + +#endif diff --git a/src/hashtable.c b/src/hashtable.c index eb64fd97dd..4d42f41428 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -368,6 +368,12 @@ typedef struct { /* --- Internal functions --- */ +/* --- Access API --- */ +static inline bool validateElementIfNeeded(hashtable *ht, void *elem) { + if (ht->type->validateEntry == NULL) return true; + return ht->type->validateEntry(ht, elem); +} + static bucket *findBucketForInsert(hashtable *ht, uint64_t hash, int *pos_in_bucket, int *table_index); static inline void freeEntry(hashtable *ht, void *entry) { @@ -690,6 +696,9 @@ static inline int checkCandidateInBucket(hashtable *ht, bucket *b, int pos, cons if (compareKeys(ht, key, elem_key) == 0) { /* It's a match. */ assert(pos_in_bucket != NULL); + if (!validateElementIfNeeded(ht, entry)) { + return 0; + } *pos_in_bucket = pos; if (table_index) *table_index = table; return 1; @@ -1132,6 +1141,15 @@ hashtableType *hashtableGetType(hashtable *ht) { return ht->type; } +/* Set the hashtable type and returns the old type of the hashtable. + * NOTE that changing the hashtable type can lead to unexpected results. + * For example, changing the hash function can impact the ability to correctly fetch elements. */ +hashtableType *hashtableSetType(hashtable *ht, hashtableType *type) { + hashtableType *oldtype = ht->type; + ht->type = type; + return oldtype; +} + /* Returns a pointer to the table's metadata (userdata) section. */ void *hashtableMetadata(hashtable *ht) { return &ht->metadata; @@ -1782,10 +1800,10 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f size_t used_before = ht->used[0]; bucket *b = &ht->tables[0][idx]; do { - if (b->presence != 0) { + if (fn && b->presence != 0) { int pos; for (pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { - if (isPositionFilled(b, pos)) { + if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos])) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; fn(privdata, emit); } @@ -1825,9 +1843,9 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f size_t used_before = ht->used[table_small]; bucket *b = &ht->tables[table_small][idx]; do { - if (b->presence) { + if (fn && b->presence) { for (int pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { - if (isPositionFilled(b, pos)) { + if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos])) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; fn(privdata, emit); } @@ -1855,9 +1873,9 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f size_t used_before = ht->used[table_large]; bucket *b = &ht->tables[table_large][idx]; do { - if (b->presence) { + if (fn && b->presence) { for (int pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { - if (isPositionFilled(b, pos)) { + if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos])) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; fn(privdata, emit); } @@ -2047,6 +2065,9 @@ int hashtableNext(hashtableIterator *iterator, void **elemptr) { /* No entry here. */ continue; } + if (!(iter->flags & HASHTABLE_ITER_SKIP_VALIDATION) && !validateElementIfNeeded(iter->hashtable, b->entries[iter->pos_in_bucket])) { + continue; + } /* Return the entry at this position. */ if (elemptr) { *elemptr = b->entries[iter->pos_in_bucket]; diff --git a/src/hashtable.h b/src/hashtable.h index ff02077fc8..3e8ec08ddd 100644 --- a/src/hashtable.h +++ b/src/hashtable.h @@ -31,6 +31,7 @@ #include #include #include +#include /* --- Opaque types --- */ @@ -57,6 +58,8 @@ typedef struct { /* Compare function, returns 0 if the keys are equal. Defaults to just * comparing the pointers for equality. */ int (*keyCompare)(const void *key1, const void *key2); + /* Check for entry access should be masked or not. Masked access will just treat the entry as not-exist. */ + bool (*validateEntry)(hashtable *ht, void *entry); /* Callback to free an entry when it's overwritten or deleted. * Optional. */ void (*entryDestructor)(void *entry); @@ -77,6 +80,7 @@ typedef struct { size_t (*getMetadataSize)(void); /* Flag to disable incremental rehashing */ unsigned instant_rehashing : 1; + } hashtableType; typedef enum { @@ -96,6 +100,7 @@ typedef void (*hashtableScanFunction)(void *privdata, void *entry); /* Iterator flags */ #define HASHTABLE_ITER_SAFE (1 << 0) #define HASHTABLE_ITER_PREFETCH_VALUES (1 << 1) +#define HASHTABLE_ITER_SKIP_VALIDATION (1 << 2) /* --- Prototypes --- */ @@ -113,6 +118,7 @@ hashtable *hashtableCreate(hashtableType *type); void hashtableRelease(hashtable *ht); void hashtableEmpty(hashtable *ht, void(callback)(hashtable *)); hashtableType *hashtableGetType(hashtable *ht); +hashtableType *hashtableSetType(hashtable *ht, hashtableType *type); void *hashtableMetadata(hashtable *ht); size_t hashtableSize(const hashtable *ht); size_t hashtableBuckets(hashtable *ht); diff --git a/src/module.c b/src/module.c index cbc5632ab9..080eec240f 100644 --- a/src/module.c +++ b/src/module.c @@ -5350,11 +5350,11 @@ int VM_HashSet(ValkeyModuleKey *key, int flags, ...) { /* If CFIELDS is active, we can pass the ownership of the * SDS object to the low level function that sets the field * to avoid a useless copy. */ - if (flags & VALKEYMODULE_HASH_CFIELDS) low_flags |= HASH_SET_TAKE_FIELD; + if (flags & VALKEYMODULE_HASH_CFIELDS) low_flags |= (HASH_SET_TAKE_FIELD); robj *argv[2] = {field, value}; hashTypeTryConversion(key->value, argv, 0, 1); - int updated = hashTypeSet(key->value, field->ptr, value->ptr, low_flags); + int updated = hashTypeSet(key->value, field->ptr, value->ptr, EXPIRY_NONE, low_flags); count += (flags & VALKEYMODULE_HASH_COUNT_ALL) ? 1 : updated; /* If CFIELDS is active, SDS string ownership is now of hashTypeSet(), @@ -11224,8 +11224,8 @@ static void moduleScanKeyHashtableCallback(void *privdata, void *entry) { key = node->ele; value = createStringObjectFromLongDouble(node->score, 0); } else if (o->type == OBJ_HASH) { - key = hashTypeEntryGetField(entry); - sds val = hashTypeEntryGetValue(entry); + key = entryGetField(entry); + sds val = entryGetValue(entry); value = createStringObject(val, sdslen(val)); } else { serverPanic("unexpected object type"); diff --git a/src/monotonic.h b/src/monotonic.h index b465f90b10..2880cda858 100644 --- a/src/monotonic.h +++ b/src/monotonic.h @@ -20,6 +20,8 @@ * variable is associated with the monotonic clock and should not be confused * with other types of time.*/ typedef uint64_t monotime; +typedef long long mstime_t; /* millisecond time type. */ +typedef long long ustime_t; /* microsecond time type. */ /* Retrieve counter of micro-seconds relative to an arbitrary point in time. */ extern monotime (*getMonotonicUs)(void); diff --git a/src/object.c b/src/object.c index 34a971e52a..144907c201 100644 --- a/src/object.c +++ b/src/object.c @@ -28,10 +28,12 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "hashtable.h" #include "server.h" #include "serverassert.h" #include "functions.h" #include "intset.h" /* Compact integer set structure */ +#include "vset.h" #include "zmalloc.h" #include "sds.h" #include "module.h" @@ -527,7 +529,10 @@ void freeZsetObject(robj *o) { void freeHashObject(robj *o) { switch (o->encoding) { - case OBJ_ENCODING_HASHTABLE: hashtableRelease((hashtable *)o->ptr); break; + case OBJ_ENCODING_HASHTABLE: + hashTypeFreeVolatileSet(o); + hashtableRelease((hashtable *)o->ptr); + break; case OBJ_ENCODING_LISTPACK: lpFree(o->ptr); break; default: serverPanic("Unknown hash encoding type"); break; } @@ -682,7 +687,7 @@ void dismissHashObject(robj *o, size_t size_hint) { hashtableInitIterator(&iter, ht, 0); void *next; while (hashtableNext(&iter, &next)) { - dismissHashTypeEntry(next); + entryDismissMemory(next); } hashtableResetIterator(&iter); } @@ -1198,16 +1203,18 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { } else if (o->encoding == OBJ_ENCODING_HASHTABLE) { hashtable *ht = o->ptr; hashtableIterator iter; + vset *volatile_fields = hashtableMetadata(ht); hashtableInitIterator(&iter, ht, 0); void *next; asize = zmalloc_size((void *)o) + hashtableMemUsage(ht); while (hashtableNext(&iter, &next) && samples < sample_size) { - elesize += hashTypeEntryMemUsage(next); + elesize += entryMemUsage(next); samples++; } hashtableResetIterator(&iter); if (samples) asize += (double)elesize / samples * hashtableSize(ht); + if (vsetIsValid(volatile_fields)) asize += vsetMemUsage(volatile_fields); } else { serverPanic("Unknown hash encoding"); } diff --git a/src/rdb.c b/src/rdb.c index 0c8a42ef4d..6ec4e064dd 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -32,6 +32,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ +#include "hashtable.h" #include "server.h" #include "lzf.h" /* LZF compression library */ #include "zipmap.h" @@ -717,7 +718,10 @@ int rdbSaveObjectType(rio *rdb, robj *o) { if (o->encoding == OBJ_ENCODING_LISTPACK) return rdbSaveType(rdb, RDB_TYPE_HASH_LISTPACK); else if (o->encoding == OBJ_ENCODING_HASHTABLE) - return rdbSaveType(rdb, RDB_TYPE_HASH); + if (hashTypeHasVolatileElements(o)) + return rdbSaveType(rdb, RDB_TYPE_HASH_2); + else + return rdbSaveType(rdb, RDB_TYPE_HASH); else serverPanic("Unknown hash encoding"); case OBJ_STREAM: return rdbSaveType(rdb, RDB_TYPE_STREAM_LISTPACKS_3); @@ -840,7 +844,6 @@ size_t rdbSaveStreamConsumers(rio *rdb, streamCG *cg) { * Returns -1 on error, number of bytes written on success. */ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { ssize_t n = 0, nwritten = 0; - if (o->type == OBJ_STRING) { /* Save a string value */ if ((n = rdbSaveStringObject(rdb, o)) == -1) return -1; @@ -963,13 +966,14 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { return -1; } nwritten += n; - + /* check if need to add expired time for the hash elements */ + bool add_expiry = hashTypeHasVolatileElements(o); hashtableIterator iter; - hashtableInitIterator(&iter, ht, 0); + hashtableInitIterator(&iter, ht, HASHTABLE_ITER_SKIP_VALIDATION); void *next; while (hashtableNext(&iter, &next)) { - sds field = hashTypeEntryGetField(next); - sds value = hashTypeEntryGetValue(next); + sds field = entryGetField(next); + sds value = entryGetValue(next); if ((n = rdbSaveRawString(rdb, (unsigned char *)field, sdslen(field))) == -1) { hashtableResetIterator(&iter); @@ -981,8 +985,17 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { return -1; } nwritten += n; + if (add_expiry) { + long long expiry = entryGetExpiry(next); + if ((n = rdbSaveMillisecondTime(rdb, expiry) == -1)) { + hashtableResetIterator(&iter); + return -1; + } + nwritten += n; + } } hashtableResetIterator(&iter); + } else { serverPanic("Unknown hash encoding"); } @@ -2073,7 +2086,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { lpSafeToAdd(NULL, totelelen)) { zsetConvert(o, OBJ_ENCODING_LISTPACK); } - } else if (rdbtype == RDB_TYPE_HASH) { + } else if (rdbtype == RDB_TYPE_HASH || rdbtype == RDB_TYPE_HASH_2) { uint64_t len; sds field, value; hashtable *dupSearchHashtable = NULL; @@ -2084,8 +2097,8 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { o = createHashObject(); - /* Too many entries? Use a hash table right from the start. */ - if (len > server.hash_max_listpack_entries) + /* Too many entries or hash object contains elements with expiry? Use a hash table right from the start. */ + if (len > server.hash_max_listpack_entries || rdbtype == RDB_TYPE_HASH_2) hashTypeConvert(o, OBJ_ENCODING_HASHTABLE); else if (deep_integrity_validation) { /* In this mode, we need to guarantee that the server won't crash @@ -2126,21 +2139,23 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { } /* Convert to hash table if size threshold is exceeded */ - if (sdslen(field) > server.hash_max_listpack_value || sdslen(value) > server.hash_max_listpack_value || - !lpSafeToAdd(o->ptr, sdslen(field) + sdslen(value))) { + if (o->encoding != OBJ_ENCODING_HASHTABLE && + (sdslen(field) > server.hash_max_listpack_value || sdslen(value) > server.hash_max_listpack_value || + !lpSafeToAdd(o->ptr, sdslen(field) + sdslen(value)))) { hashTypeConvert(o, OBJ_ENCODING_HASHTABLE); - hashTypeEntry *entry = hashTypeCreateEntry(field, value); + entry *entry = entryCreate(field, value, EXPIRY_NONE); sdsfree(field); if (!hashtableAdd((hashtable *)o->ptr, entry)) { rdbReportCorruptRDB("Duplicate hash fields detected"); if (dupSearchHashtable) hashtableRelease(dupSearchHashtable); - freeHashTypeEntry(entry); + entryFree(entry); decrRefCount(o); return NULL; } break; } + /* Add pair to listpack */ o->ptr = lpAppend(o->ptr, (unsigned char *)field, sdslen(field)); o->ptr = lpAppend(o->ptr, (unsigned char *)value, sdslen(value)); @@ -2178,15 +2193,26 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { return NULL; } + /* Also load the entry expiry */ + long long itemexpiry = EXPIRY_NONE; + if (rdbtype == RDB_TYPE_HASH_2) { + itemexpiry = rdbLoadMillisecondTime(rdb, RDB_VERSION); + if (itemexpiry < EXPIRY_NONE || rioGetReadError(rdb)) return NULL; + } + /* Add pair to hash table */ - hashTypeEntry *entry = hashTypeCreateEntry(field, value); + entry *entry = entryCreate(field, value, itemexpiry); sdsfree(field); if (!hashtableAdd((hashtable *)o->ptr, entry)) { rdbReportCorruptRDB("Duplicate hash fields detected"); - freeHashTypeEntry(entry); + entryFree(entry); decrRefCount(o); return NULL; } + + if (rdbtype == RDB_TYPE_HASH_2 && itemexpiry > 0) { + hashTypeTrackEntry(o, entry); + } } /* All pairs should be read by now */ diff --git a/src/rdb.h b/src/rdb.h index 9f19a3a9ec..1253c3fd05 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -90,32 +90,36 @@ static_assert(RDB_VERSION < RDB_FOREIGN_VERSION_MIN || RDB_VERSION > RDB_FOREIGN /* Map object types to RDB object types. Macros starting with OBJ_ are for * memory storage and may change. Instead RDB types must be fixed because * we store them on disk. */ -#define RDB_TYPE_STRING 0 -#define RDB_TYPE_LIST 1 -#define RDB_TYPE_SET 2 -#define RDB_TYPE_ZSET 3 -#define RDB_TYPE_HASH 4 -#define RDB_TYPE_ZSET_2 5 /* ZSET version 2 with doubles stored in binary. */ -#define RDB_TYPE_MODULE_PRE_GA 6 /* Used in 4.0 release candidates */ -#define RDB_TYPE_MODULE_2 7 /* Module value with annotations for parsing without \ +enum RdbType { + RDB_TYPE_STRING = 0, + RDB_TYPE_LIST = 1, + RDB_TYPE_SET = 2, + RDB_TYPE_ZSET = 3, + RDB_TYPE_HASH = 4, + RDB_TYPE_ZSET_2 = 5, /* ZSET version 2 with doubles stored in binary. */ + RDB_TYPE_MODULE_PRE_GA = 6, /* Used in 4.0 release candidates */ + RDB_TYPE_MODULE_2 = 7, /* Module value with annotations for parsing without \ the generating module being loaded. */ -#define RDB_TYPE_HASH_ZIPMAP 9 -#define RDB_TYPE_LIST_ZIPLIST 10 -#define RDB_TYPE_SET_INTSET 11 -#define RDB_TYPE_ZSET_ZIPLIST 12 -#define RDB_TYPE_HASH_ZIPLIST 13 -#define RDB_TYPE_LIST_QUICKLIST 14 -#define RDB_TYPE_STREAM_LISTPACKS 15 -#define RDB_TYPE_HASH_LISTPACK 16 -#define RDB_TYPE_ZSET_LISTPACK 17 -#define RDB_TYPE_LIST_QUICKLIST_2 18 -#define RDB_TYPE_STREAM_LISTPACKS_2 19 -#define RDB_TYPE_SET_LISTPACK 20 -#define RDB_TYPE_STREAM_LISTPACKS_3 21 -/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType(), and rdb_type_string[] */ + RDB_TYPE_HASH_ZIPMAP = 9, + RDB_TYPE_LIST_ZIPLIST = 10, + RDB_TYPE_SET_INTSET = 11, + RDB_TYPE_ZSET_ZIPLIST = 12, + RDB_TYPE_HASH_ZIPLIST = 13, + RDB_TYPE_LIST_QUICKLIST = 14, + RDB_TYPE_STREAM_LISTPACKS = 15, + RDB_TYPE_HASH_LISTPACK = 16, + RDB_TYPE_ZSET_LISTPACK = 17, + RDB_TYPE_LIST_QUICKLIST_2 = 18, + RDB_TYPE_STREAM_LISTPACKS_2 = 19, + RDB_TYPE_SET_LISTPACK = 20, + RDB_TYPE_STREAM_LISTPACKS_3 = 21, + RDB_TYPE_HASH_2 = 22, + RDB_TYPE_LAST +}; +/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdb_type_string[] */ /* Test if a type is an object type. */ -#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 21)) +#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) < RDB_TYPE_LAST)) /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */ #define RDB_OPCODE_FUNCTION2 245 /* function library data */ diff --git a/src/server.c b/src/server.c index e5d8acca2e..75495ab80e 100644 --- a/src/server.c +++ b/src/server.c @@ -664,20 +664,34 @@ hashtableType subcommandSetType = {.entryGetKey = hashtableSubcommandGetKey, /* Hash type hash table (note that small hashes are represented with listpacks) */ const void *hashHashtableTypeGetKey(const void *entry) { - const hashTypeEntry *hash_entry = entry; - return (const void *)hashTypeEntryGetField(hash_entry); + return (const void *)entryGetField(entry); } void hashHashtableTypeDestructor(void *entry) { - hashTypeEntry *hash_entry = entry; - freeHashTypeEntry(hash_entry); + entryFree(entry); } +size_t hashHashtableTypeMetadataSize(void) { + return sizeof(void *); +} + +extern bool hashHashtableTypeValidate(hashtable *ht, void *entry); + hashtableType hashHashtableType = { .hashFunction = dictSdsHash, .entryGetKey = hashHashtableTypeGetKey, .keyCompare = hashtableSdsKeyCompare, .entryDestructor = hashHashtableTypeDestructor, + .getMetadataSize = hashHashtableTypeMetadataSize, +}; + +hashtableType hashWithVolatileItemsHashtableType = { + .hashFunction = dictSdsHash, + .entryGetKey = hashHashtableTypeGetKey, + .keyCompare = hashtableSdsKeyCompare, + .entryDestructor = hashHashtableTypeDestructor, + .getMetadataSize = hashHashtableTypeMetadataSize, + .validateEntry = hashHashtableTypeValidate, }; /* Hashtable type without destructor */ @@ -2135,6 +2149,9 @@ void createSharedObjects(void) { shared.multi = createSharedString("MULTI"); shared.exec = createSharedString("EXEC"); shared.hset = createSharedString("HSET"); + shared.hdel = createSharedString("HDEL"); + shared.hpexpireat = createSharedString("HPEXPIREAT"); + shared.hpersist = createSharedString("HPERSIST"); shared.srem = createSharedString("SREM"); shared.xgroup = createSharedString("XGROUP"); shared.xclaim = createSharedString("XCLAIM"); @@ -2167,6 +2184,7 @@ void createSharedObjects(void) { shared.special_asterisk = createSharedString("*"); shared.special_equals = createSharedString("="); shared.redacted = createSharedString("(redacted)"); + shared.fields = createSharedString("FIELDS"); for (j = 0; j < OBJ_SHARED_INTEGERS; j++) { shared.integers[j] = makeObjectShared(createObject(OBJ_STRING, (void *)(long)j)); @@ -7333,4 +7351,131 @@ __attribute__((weak)) int main(int argc, char **argv) { aeDeleteEventLoop(server.el); return 0; } + +/* + * The parseExtendedCommandArgumentsOrReply() function performs the common validation for extended + * command arguments used in STRING and HASH commands. + * + * Get specific command extended options - PERSIST/DEL + * Set specific command extended options - XX/NX/GET/IFEQ + * HSET specific command extended options - FXX/FNX + * Common command extended options - EX/EXAT/PX/PXAT/KEEPTTL + * + * Function takes pointers to client, flags, unit, pointer to pointer of expire obj if needed + * to be determined and command_type which can be COMMAND_GET or COMMAND_SET. + * + * If there are any syntax violations C_ERR is returned else C_OK is returned. + * + * Input flags are updated upon parsing the arguments. Unit and expire are updated if there are any + * EX/EXAT/PX/PXAT arguments. Unit is updated to millisecond if PX/PXAT is set. + * + * max_args provides a way to limit the scan to a specific range of arguments. + */ +int parseExtendedCommandArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, robj **compare_val, int command_type, int max_args) { + int j = command_type == COMMAND_SET ? 3 : 2; + for (; j < max_args; j++) { + char *opt = c->argv[j]->ptr; + robj *next = (j == max_args - 1) ? NULL : c->argv[j + 1]; + + /* clang-format off */ + if ((opt[0] == 'n' || opt[0] == 'N') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & ARGS_SET_XX || *flags & ARGS_SET_IFEQ) && (command_type == COMMAND_SET)) + { + *flags |= ARGS_SET_NX; + } else if ((opt[0] == 'x' || opt[0] == 'X') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & ARGS_SET_NX || *flags & ARGS_SET_IFEQ) && (command_type == COMMAND_SET)) + { + *flags |= ARGS_SET_XX; + } else if ((opt[0] == 'f' || opt[0] == 'F') && + (opt[1] == 'n' || opt[1] == 'N') && + (opt[2] == 'x' || opt[2] == 'X') && opt[3] == '\0' && + !(*flags & ARGS_SET_FXX || *flags & ARGS_SET_IFEQ) && (command_type == COMMAND_HSET)) + { + *flags |= ARGS_SET_FNX; + } else if ((opt[0] == 'f' || opt[0] == 'F') && + (opt[1] == 'x' || opt[1] == 'X') && + (opt[2] == 'x' || opt[2] == 'X') && opt[3] == '\0' && + !(*flags & ARGS_SET_FNX || *flags & ARGS_SET_IFEQ) && (command_type == COMMAND_HSET)) + { + *flags |= ARGS_SET_FXX; + } else if ((opt[0] == 'i' || opt[0] == 'I') && + (opt[1] == 'f' || opt[1] == 'F') && + (opt[2] == 'e' || opt[2] == 'E') && + (opt[3] == 'q' || opt[3] == 'Q') && opt[4] == '\0' && + next && + !(*flags & ARGS_SET_NX || *flags & ARGS_SET_XX || *flags & ARGS_SET_IFEQ) && (command_type == COMMAND_SET)) + { + *flags |= ARGS_SET_IFEQ; + *compare_val = next; + j++; + } else if ((opt[0] == 'g' || opt[0] == 'G') && + (opt[1] == 'e' || opt[1] == 'E') && + (opt[2] == 't' || opt[2] == 'T') && opt[3] == '\0' && + (command_type == COMMAND_SET)) + { + *flags |= ARGS_SET_GET; + } else if (!strcasecmp(opt, "KEEPTTL") && !(*flags & ARGS_PERSIST) && + !(*flags & ARGS_EX) && !(*flags & ARGS_EXAT) && + !(*flags & ARGS_PX) && !(*flags & ARGS_PXAT) && (command_type == COMMAND_SET || command_type == COMMAND_HSET)) + { + *flags |= ARGS_KEEPTTL; + } else if (!strcasecmp(opt,"PERSIST") && (command_type == COMMAND_GET || command_type == COMMAND_HGET) && + !(*flags & ARGS_EX) && !(*flags & ARGS_EXAT) && + !(*flags & ARGS_PX) && !(*flags & ARGS_PXAT) && + !(*flags & ARGS_KEEPTTL)) + { + *flags |= ARGS_PERSIST; + } else if ((opt[0] == 'e' || opt[0] == 'E') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & ARGS_KEEPTTL) && !(*flags & ARGS_PERSIST) && + !(*flags & ARGS_EXAT) && !(*flags & ARGS_PX) && + !(*flags & ARGS_PXAT) && next) + { + *flags |= ARGS_EX; + *expire = next; + j++; + } else if ((opt[0] == 'p' || opt[0] == 'P') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & ARGS_KEEPTTL) && !(*flags & ARGS_PERSIST) && + !(*flags & ARGS_EX) && !(*flags & ARGS_EXAT) && + !(*flags & ARGS_PXAT) && next) + { + *flags |= ARGS_PX; + *unit = UNIT_MILLISECONDS; + *expire = next; + j++; + } else if ((opt[0] == 'e' || opt[0] == 'E') && + (opt[1] == 'x' || opt[1] == 'X') && + (opt[2] == 'a' || opt[2] == 'A') && + (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && + !(*flags & ARGS_KEEPTTL) && !(*flags & ARGS_PERSIST) && + !(*flags & ARGS_EX) && !(*flags & ARGS_PX) && + !(*flags & ARGS_PXAT) && next) + { + *flags |= ARGS_EXAT; + *expire = next; + j++; + } else if ((opt[0] == 'p' || opt[0] == 'P') && + (opt[1] == 'x' || opt[1] == 'X') && + (opt[2] == 'a' || opt[2] == 'A') && + (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && + !(*flags & ARGS_KEEPTTL) && !(*flags & ARGS_PERSIST) && + !(*flags & ARGS_EX) && !(*flags & ARGS_EXAT) && + !(*flags & ARGS_PX) && next) + { + *flags |= ARGS_PXAT; + *unit = UNIT_MILLISECONDS; + *expire = next; + j++; + } else { + addReplyErrorObject(c,shared.syntaxerr); + return C_ERR; + } + /* clang-format on */ + } + return C_OK; +} + /* The End */ diff --git a/src/server.h b/src/server.h index 1271e49c78..2f481d4cc7 100644 --- a/src/server.h +++ b/src/server.h @@ -62,9 +62,6 @@ #define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1 : -1] #endif -typedef long long mstime_t; /* millisecond time type. */ -typedef long long ustime_t; /* microsecond time type. */ - #include "ae.h" /* Event driven programming library */ #include "sds.h" /* Dynamic safe strings */ #include "dict.h" /* Hash tables (old implementation) */ @@ -79,10 +76,13 @@ typedef long long ustime_t; /* microsecond time type. */ #include "sparkline.h" /* ASCII graphs API */ #include "quicklist.h" /* Lists are encoded as linked lists of N-elements flat arrays */ +#include "expire.h" /* Expiration public API */ #include "rax.h" /* Radix tree */ #include "connection.h" /* Connection abstraction */ #include "memory_prefetch.h" +#include "vset.h" #include "trace/trace.h" +#include "entry.h" #ifdef USE_LTTNG #define valkey_fork() do_fork() @@ -162,9 +162,6 @@ struct hdr_histogram; #define CLIENT_MEM_USAGE_BUCKET_MAX_LOG 33 /* Bucket for largest clients: sizes above 4GB (2^32) */ #define CLIENT_MEM_USAGE_BUCKETS (1 + CLIENT_MEM_USAGE_BUCKET_MAX_LOG - CLIENT_MEM_USAGE_BUCKET_MIN_LOG) -#define ACTIVE_EXPIRE_CYCLE_SLOW 0 -#define ACTIVE_EXPIRE_CYCLE_FAST 1 - /* Children process will exit with this status code to signal that the * process terminated without an error: this is useful in order to kill * a saving child (RDB or AOF one), without triggering in the parent the @@ -220,6 +217,11 @@ struct hdr_histogram; extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; +#define COMMAND_GET 0 +#define COMMAND_SET 1 +#define COMMAND_HGET 2 +#define COMMAND_HSET 3 + /* Command flags. Please check the definition of struct serverCommand in this file * for more information about the meaning of every flag. */ #define CMD_WRITE (1ULL << 0) @@ -514,9 +516,6 @@ typedef enum { #define SUPERVISED_SYSTEMD 2 #define SUPERVISED_UPSTART 3 -/* Anti-warning macro... */ -#define UNUSED(V) ((void)V) - #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^64 elements */ #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */ #define ZSKIPLIST_MAX_SEARCH 10 @@ -719,6 +718,23 @@ typedef enum { * Data types *----------------------------------------------------------------------------*/ +/* Generic set command string object set flags */ +#define ARGS_NO_FLAGS 0 +#define ARGS_SET_NX (1 << 0) /* Set if key not exists. */ +#define ARGS_SET_XX (1 << 1) /* Set if key exists. */ +#define ARGS_EX (1 << 2) /* Set if time in seconds is given */ +#define ARGS_PX (1 << 3) /* Set if time in ms in given */ +#define ARGS_KEEPTTL (1 << 4) /* Set and keep the ttl */ +#define ARGS_SET_GET (1 << 5) /* Set if want to get key before set */ +#define ARGS_EXAT (1 << 6) /* Set if timestamp in second is given */ +#define ARGS_PXAT (1 << 7) /* Set if timestamp in ms is given */ +#define ARGS_PERSIST (1 << 8) /* Set if we need to remove the ttl */ +#define ARGS_SET_IFEQ (1 << 9) /* Set if we need compare and set */ +#define ARGS_ARGV3 (1 << 10) /* Set if the value is at argv[3]; otherwise it's \ + * at argv[2]. */ +#define ARGS_SET_FNX (1 << 11) /* Set if key item not exists. */ +#define ARGS_SET_FXX (1 << 12) /* Set if key item exists. */ + /* An Object, that is a type able to hold a string / list / set */ /* The actual Object */ @@ -852,8 +868,9 @@ typedef struct replBufBlock { * by integers from 0 (the default database) up to the max configured * database. The database number is the 'id' field in the structure. */ typedef struct serverDb { - kvstore *keys; /* The keyspace for this DB */ - kvstore *expires; /* Timeout of keys with a timeout set */ + kvstore *keys; /* The keyspace for this DB */ + kvstore *expires; /* Timeout of keys with a timeout set */ + kvstore *object_with_volatile_elements; dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/ dict *blocking_keys_unblock_on_nokey; /* Keys with clients waiting for * data, and should be unblocked if key is deleted (XREADEDGROUP). @@ -1361,10 +1378,10 @@ struct sharedObjectsStruct { *bgsaveerr_variants[2], *execaborterr, *noautherr, *noreplicaserr, *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink, *rpop, *lpop, *lpush, - *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax, *emptyscan, *multi, *exec, *left, *right, *hset, *srem, + *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax, *emptyscan, *multi, *exec, *left, *right, *hset, *hdel, *hpexpireat, *hpersist, *srem, *xgroup, *xclaim, *script, *replconf, *eval, *persist, *set, *pexpireat, *pexpire, *time, *pxat, *absttl, *retrycount, *force, *justid, *entriesread, *lastid, *ping, *setid, *keepttl, *load, *createconsumer, *getack, - *special_asterisk, *special_equals, *default_username, *redacted, *ssubscribebulk, *sunsubscribebulk, + *special_asterisk, *special_equals, *default_username, *redacted, *ssubscribebulk, *sunsubscribebulk, *fields, *smessagebulk, *select[PROTO_SHARED_SELECT_CMDS], *integers[OBJ_SHARED_INTEGERS], *mbulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "*\r\n" */ *bulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "$\r\n" */ @@ -1609,7 +1626,6 @@ typedef enum childInfoType { CHILD_INFO_TYPE_RDB_COW_SIZE, CHILD_INFO_TYPE_MODULE_COW_SIZE } childInfoType; - struct valkeyServer { /* General */ pid_t pid; /* Main process pid. */ @@ -2607,11 +2623,13 @@ typedef struct { typedef struct { robj *subject; int encoding; - + bool volatile_items_iter; unsigned char *fptr, *vptr; hashtableIterator iter; + vsetIterator viter; void *next; + } hashTypeIterator; #include "stream.h" /* Stream data type header file. */ @@ -2635,6 +2653,7 @@ extern hashtableType kvstoreKeysHashtableType; extern hashtableType kvstoreExpiresHashtableType; extern double R_Zero, R_PosInf, R_NegInf, R_Nan; extern hashtableType hashHashtableType; +extern hashtableType hashWithVolatileItemsHashtableType; extern dictType stringSetDictType; extern dictType externalStringType; extern dictType sdsHashDictType; @@ -2846,6 +2865,7 @@ int processIOThreadsWriteDone(void); void releaseReplyReferences(client *c); void resetLastWrittenBuf(client *c); +int parseExtendedCommandArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, robj **compare_val, int command_type, int max_args); /* logreqres.c - logging of requests and responses */ void reqresReset(client *c, int free_buf); @@ -3335,16 +3355,14 @@ robj *setTypeDup(robj *o); /* Hash data type */ #define HASH_SET_TAKE_FIELD (1 << 0) #define HASH_SET_TAKE_VALUE (1 << 1) +#define HASH_SET_KEEP_EXPIRY (1 << 2) #define HASH_SET_COPY 0 -typedef void hashTypeEntry; -hashTypeEntry *hashTypeCreateEntry(sds field, sds value); -sds hashTypeEntryGetField(const hashTypeEntry *entry); -sds hashTypeEntryGetValue(const hashTypeEntry *entry); -size_t hashTypeEntryMemUsage(hashTypeEntry *entry); -hashTypeEntry *hashTypeEntryDefrag(hashTypeEntry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)); -void dismissHashTypeEntry(hashTypeEntry *entry); -void freeHashTypeEntry(hashTypeEntry *entry); + +void hashTypeFreeVolatileSet(robj *o); +void hashTypeTrackEntry(robj *o, void *entry); +void hashTypeUntrackEntry(robj *o, void *entry); +void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); void hashTypeConvert(robj *o, int enc); void hashTypeTryConversion(robj *subject, robj **argv, int start, int end); @@ -3352,6 +3370,7 @@ int hashTypeExists(robj *o, sds key); int hashTypeDelete(robj *o, sds key); unsigned long hashTypeLength(const robj *o); void hashTypeInitIterator(robj *subject, hashTypeIterator *hi); +void hashTypeInitVolatileIterator(robj *subject, hashTypeIterator *hi); void hashTypeResetIterator(hashTypeIterator *hi); int hashTypeNext(hashTypeIterator *hi); void hashTypeCurrentFromListpack(hashTypeIterator *hi, @@ -3363,8 +3382,10 @@ sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what); sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what); robj *hashTypeLookupWriteOrCreate(client *c, robj *key); robj *hashTypeGetValueObject(robj *o, sds field); -int hashTypeSet(robj *o, sds field, sds value, int flags); +int hashTypeSet(robj *o, sds field, sds value, long long expiry, int flags); robj *hashTypeDup(robj *o); +bool hashTypeHasVolatileElements(robj *o); +size_t hashTypeNumVolatileElements(robj *o); /* Pub / Sub */ int pubsubUnsubscribeAllChannels(client *c, int notify); @@ -3826,6 +3847,8 @@ void zrankCommand(client *c); void zrevrankCommand(client *c); void hsetCommand(client *c); void hsetnxCommand(client *c); +void hsetexCommand(client *c); +void hgetexCommand(client *c); void hgetCommand(client *c); void hmgetCommand(client *c); void hdelCommand(client *c); @@ -3847,6 +3870,15 @@ void hgetallCommand(client *c); void hexistsCommand(client *c); void hscanCommand(client *c); void hrandfieldCommand(client *c); +void hexpireCommand(client *c); +void hexpireatCommand(client *c); +void hpexpireCommand(client *c); +void hpexpireatCommand(client *c); +void httlCommand(client *c); +void hpttlCommand(client *c); +void hexpiretimeCommand(client *c); +void hpexpiretimeCommand(client *c); +void hpersistCommand(client *c); void configSetCommand(client *c); void configGetCommand(client *c); void configResetStatCommand(client *c); diff --git a/src/serverassert.h b/src/serverassert.h index 5ce8eb2450..88c9815e56 100644 --- a/src/serverassert.h +++ b/src/serverassert.h @@ -63,4 +63,8 @@ void _serverAssert(const char *estr, const char *file, int line); void _serverPanic(const char *file, int line, const char *msg, ...); +#ifndef static_assert +#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1 : -1] +#endif + #endif diff --git a/src/t_hash.c b/src/t_hash.c index 5a8c17e90c..b529355ff2 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -32,233 +32,115 @@ * SPDX-License-Identifier: BSD-3-Clause */ +#include "hashtable.h" +#include "rax.h" +#include "sds.h" +#include "vset.h" #include "server.h" +#include "zmalloc.h" #include -#include +#include +#include "entry.h" + +/* enumeration of all the possible return values of commands manipulating fields expiration. */ +typedef enum { + /* SDS aux flag. If set, it indicates that the entry has TTL metadata set. */ + EXPIRATION_MODIFICATION_NOT_EXIST = -2, /* in case the provided object is NULL or the specific field was not found */ + EXPIRATION_MODIFICATION_SUCCESSFUL = 1, /* if the expiration time was applied or modified */ + EXPIRATION_MODIFICATION_FAILED_CONDITION = 0, /* if the some predefined conditions (e.g hexpire conditional flags) has not been met */ + EXPIRATION_MODIFICATION_FAILED = -1, /* if apply of the expiration modification failed (e.g hpersist on item without expiration) */ + EXPIRATION_MODIFICATION_EXPIRE_ASAP = 2, /* if apply of the expiration modification was set to a time in the past (i.e field is immediately expired) */ +} expiryModificationResult; /*----------------------------------------------------------------------------- - * Hash Entry API + * Hash type Expiry API *----------------------------------------------------------------------------*/ -/* The hashTypeEntry pointer is the field sds. We encode the entry layout type - * in the field SDS header. Field type SDS_TYPE_5 doesn't have any spare bits to - * encode this so we use it only for the first layout type. - * - * Entry with embedded value, used for small sizes. The value is stored as - * SDS_TYPE_8. The field can use any SDS type. - * - * +--------------+---------------+ - * | field | value | - * | hdr "foo" \0 | hdr8 "bar" \0 | - * +------^-------+---------------+ - * | - * | - * entry pointer = field sds - * - * Entry with value pointer, used for larger fields and values. The field is SDS - * type 8 or higher. - * - * +-------+--------------+ - * | value | field | - * | ptr | hdr "foo" \0 | - * +-------+------^-------+ - * | - * | - * entry pointer = field sds - */ +static vset *hashTypeGetVolatileSet(robj *o) { + serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); + vset *set = (vset *)hashtableMetadata(o->ptr); + return vsetIsValid(set) ? set : NULL; +} -/* The maximum allocation size we want to use for entries with embedded - * values. */ -#define EMBED_VALUE_MAX_ALLOC_SIZE 128 - -/* SDS aux flag. If set, it indicates that the entry has an embedded value - * pointer located in memory before the embedded field. If unset, the entry - * instead has an embedded value located after the embedded field. */ -#define FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR 0 - -static inline bool entryHasValuePtr(const hashTypeEntry *entry) { - return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR); -} - -/* Returns the location of a pointer to a separately allocated value. Only for - * an entry without an embedded value. */ -static sds *hashTypeEntryGetValueRef(const hashTypeEntry *entry) { - serverAssert(entryHasValuePtr(entry)); - char *field_data = sdsAllocPtr(entry); - field_data -= sizeof(sds *); - return (sds *)field_data; -} - -/* takes ownership of value, does not take ownership of field */ -hashTypeEntry *hashTypeCreateEntry(sds field, sds value) { - size_t field_len = sdslen(field); - int field_sds_type = sdsReqType(field_len); - size_t field_size = sdsReqSize(field_len, field_sds_type); - size_t value_len = sdslen(value); - size_t value_size = sdsReqSize(value_len, SDS_TYPE_8); - sds embedded_field_sds; - if (field_size + value_size <= EMBED_VALUE_MAX_ALLOC_SIZE) { - /* Embed field and value. Value is fixed to SDS_TYPE_8. Unused - * allocation space is recorded in the embedded value's SDS header. - * - * +--------------+---------------+ - * | field | value | - * | hdr "foo" \0 | hdr8 "bar" \0 | - * +--------------+---------------+ - */ - size_t min_size = field_size + value_size; - size_t buf_size; - char *buf = zmalloc_usable(min_size, &buf_size); - embedded_field_sds = sdswrite(buf, field_size, field_sds_type, field, field_len); - sdswrite(buf + field_size, buf_size - field_size, SDS_TYPE_8, value, value_len); - /* Field sds aux bits are zero, which we use for this entry encoding. */ - sdsSetAuxBit(embedded_field_sds, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, 0); - serverAssert(!entryHasValuePtr(embedded_field_sds)); - sdsfree(value); - } else { - /* Embed field, but not value. Field must be >= SDS_TYPE_8 to encode to - * indicate this type of entry. - * - * +-------+---------------+ - * | value | field | - * | ptr | hdr8 "foo" \0 | - * +-------+---------------+ - */ - char field_sds_type = sdsReqType(field_len); - if (field_sds_type == SDS_TYPE_5) field_sds_type = SDS_TYPE_8; - field_size = sdsReqSize(field_len, field_sds_type); - size_t alloc_size = sizeof(sds *) + field_size; - char *buf = zmalloc(alloc_size); - *(sds *)buf = value; - embedded_field_sds = sdswrite(buf + sizeof(sds *), field_size, field_sds_type, field, field_len); - /* Store the entry encoding type in sds aux bits. */ - sdsSetAuxBit(embedded_field_sds, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, 1); - serverAssert(entryHasValuePtr(embedded_field_sds)); - } - return (void *)embedded_field_sds; -} - -/* The entry pointer is the field sds, but that's an implementation detail. */ -sds hashTypeEntryGetField(const hashTypeEntry *entry) { - return (sds)entry; -} - -sds hashTypeEntryGetValue(const hashTypeEntry *entry) { - if (entryHasValuePtr(entry)) { - return *hashTypeEntryGetValueRef(entry); - } else { - /* Skip field content, field null terminator and value sds8 hdr. */ - size_t offset = sdslen(entry) + 1 + sdsHdrSize(SDS_TYPE_8); - return (char *)entry + offset; - } -} - -/* Returns the address of the entry allocation. */ -static void *hashTypeEntryAllocPtr(hashTypeEntry *entry) { - char *buf = sdsAllocPtr(entry); - if (entryHasValuePtr(entry)) { - buf -= sizeof(sds *); - } - return buf; -} - -/* Frees previous value, takes ownership of new value, returns entry (may be - * reallocated). */ -static hashTypeEntry *hashTypeEntryReplaceValue(hashTypeEntry *entry, sds value) { - sds field = (sds)entry; - size_t field_size = sdsHdrSize(sdsType(field)) + sdsalloc(field) + 1; - size_t value_len = sdslen(value); - size_t value_size = sdsReqSize(value_len, SDS_TYPE_8); - if (!entryHasValuePtr(entry)) { - /* Reuse the allocation if the new value fits and leaves no more than - * 25% unused space after replacing the value. */ - char *alloc_ptr = sdsAllocPtr(entry); - size_t required_size = field_size + value_size; - size_t alloc_size; - if (required_size <= EMBED_VALUE_MAX_ALLOC_SIZE && - required_size <= (alloc_size = hashTypeEntryMemUsage(entry)) && - required_size >= alloc_size * 3 / 4) { - /* It fits in the allocation and leaves max 25% unused space. */ - sdswrite(alloc_ptr + field_size, alloc_size - field_size, SDS_TYPE_8, value, value_len); - sdsfree(value); - return entry; - } - hashTypeEntry *new_entry = hashTypeCreateEntry(hashTypeEntryGetField(entry), value); - freeHashTypeEntry(entry); - return new_entry; - } else { - /* The value pointer is located before the embedded field. */ - if (field_size + value_size <= EMBED_VALUE_MAX_ALLOC_SIZE) { - /* Convert to entry with embedded value. */ - hashTypeEntry *new_entry = hashTypeCreateEntry(field, value); - freeHashTypeEntry(entry); - return new_entry; - } else { - /* Not embedded value. */ - sds *value_ref = hashTypeEntryGetValueRef(entry); - sdsfree(*value_ref); - *value_ref = value; - return entry; - } +bool hashTypeHasVolatileElements(robj *o) { + if (o->encoding == OBJ_ENCODING_HASHTABLE) { + vset *set = hashTypeGetVolatileSet(o); + if (set && !vsetIsEmpty(set)) + return true; } + return false; } -/* Returns memory usage of a hashTypeEntry, including all allocations owned by - * the hashTypeEntry. */ -size_t hashTypeEntryMemUsage(hashTypeEntry *entry) { - size_t mem = 0; - if (entryHasValuePtr(entry)) { - /* Alloc size is not stored in the embedded field. */ - mem = zmalloc_usable_size(hashTypeEntryAllocPtr(entry)); - mem += sdsAllocSize(*hashTypeEntryGetValueRef(entry)); - } else { - /* Remaining alloc size is encoded in the embedded value SDS header. */ - sds field = entry; - sds value = (char *)entry + sdslen(field) + 1 + sdsHdrSize(SDS_TYPE_8); - size_t field_size = sdsHdrSize(sdsType(field)) + sdslen(field) + 1; - size_t value_size = sdsHdrSize(SDS_TYPE_8) + sdsalloc(value) + 1; - mem = field_size + value_size; +/* make any access to the hash object elements ignore the specific elements expiration. + * This is mainly in order to be able to access hash elements which are already expired. */ +static inline void hashTypeIgnoreTTL(robj *o, bool ignore) { + if (o->encoding == OBJ_ENCODING_HASHTABLE) { + /* prevent placing access function if not needed */ + if (!ignore && hashTypeGetVolatileSet(o) == NULL) { + ignore = true; + } + hashtableSetType(o->ptr, ignore ? &hashHashtableType : &hashWithVolatileItemsHashtableType); } - return mem; } -/* Defragments a hashtable entry (field-value pair) if needed, using the - * provided defrag functions. The defrag functions return NULL if the allocation - * was not moved, otherwise they return a pointer to the new memory location. - * A separate sds defrag function is needed because of the unique memory layout - * of sds strings. - * If the location of the hashTypeEntry changed we return the new location, - * otherwise we return NULL. */ -hashTypeEntry *hashTypeEntryDefrag(hashTypeEntry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)) { - if (entryHasValuePtr(entry)) { - sds *value_ref = hashTypeEntryGetValueRef(entry); - sds new_value = sdsdefragfn(*value_ref); - if (new_value) *value_ref = new_value; - } - char *allocation = hashTypeEntryAllocPtr(entry); - char *new_allocation = defragfn(allocation); - if (new_allocation != NULL) { - /* Return the same offset into the new allocation as the entry's offset - * in the old allocation. */ - return new_allocation + ((char *)entry - allocation); +static vset *hashTypeGetOrcreateVolatileSet(robj *o) { + serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); + vset *set = (vset *)hashtableMetadata(o->ptr); + if (!vsetIsValid(set)) { + vsetInit(set); + /* serves mainly for optimization. Use type which supports access function only when needed. */ + hashTypeIgnoreTTL(o, false); } - return NULL; + return set; +} + +void hashTypeFreeVolatileSet(robj *o) { + vset *set = (vset *)hashtableMetadata(o->ptr); + if (vsetIsValid(set)) vsetRelease(set); + /* serves mainly for optimization. by changing the hashtable type we can avoid extra function call in hashtable access */ + hashTypeIgnoreTTL(o, true); +} + +void hashTypeTrackEntry(robj *o, void *entry) { + vset *set = hashTypeGetOrcreateVolatileSet(o); + serverAssert(vsetAddEntry(set, entryGetExpiry, entry)); } -/* Used for releasing memory to OS to avoid unnecessary CoW. Called when we've - * forked and memory won't be used again. See zmadvise_dontneed() */ -void dismissHashTypeEntry(hashTypeEntry *entry) { - /* Only dismiss values memory since the field size usually is small. */ - if (entryHasValuePtr(entry)) { - dismissSds(*hashTypeEntryGetValueRef(entry)); +void hashTypeUntrackEntry(robj *o, void *entry) { + if (!entryHasExpiry(entry)) return; + vset *set = hashTypeGetVolatileSet(o); + debugServerAssert(set); + serverAssert(vsetRemoveEntry(set, entryGetExpiry, entry)); + if (vsetIsEmpty(set)) { + hashTypeFreeVolatileSet(o); } } -void freeHashTypeEntry(hashTypeEntry *entry) { - if (entryHasValuePtr(entry)) { - sdsfree(*hashTypeEntryGetValueRef(entry)); +void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + int old_tracked = (old_entry && old_expiry != EXPIRY_NONE); + int new_tracked = (new_entry && new_expiry != EXPIRY_NONE); + /* If entry was not tracked before and not going to be tracked now, we can simply return */ + if (!old_tracked && !new_tracked) + return; + + vset *set = hashTypeGetOrcreateVolatileSet(o); + debugServerAssert(!old_tracked || !vsetIsEmpty(set)); + + serverAssert(vsetUpdateEntry(set, entryGetExpiry, old_entry, new_entry, old_expiry, new_expiry) == 1); + + if (vsetIsEmpty(set)) { + hashTypeFreeVolatileSet(o); } - zfree(hashTypeEntryAllocPtr(entry)); +} + +bool hashHashtableTypeValidate(hashtable *ht, void *entry) { + UNUSED(ht); + expirationPolicy policy = getExpirationPolicyWithFlags(0); + if (policy == POLICY_IGNORE_EXPIRE) return true; + + if (!entryIsExpired(entry)) return true; + + return false; } /*----------------------------------------------------------------------------- @@ -322,16 +204,6 @@ int hashTypeGetFromListpack(robj *o, sds field, unsigned char **vstr, unsigned i return -1; } -/* Get the value from a hash table encoded hash, identified by field. - * Returns NULL when the field cannot be found, otherwise the SDS value - * is returned. */ -sds hashTypeGetFromHashTable(robj *o, sds field) { - serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); - void *found_element; - if (!hashtableFind(o->ptr, field, &found_element)) return NULL; - return hashTypeEntryGetValue(found_element); -} - /* Higher level function of hashTypeGet*() that returns the hash value * associated with the specified field. If the field is found C_OK * is returned, otherwise C_ERR. The returned object is returned by @@ -340,16 +212,48 @@ sds hashTypeGetFromHashTable(robj *o, sds field) { * * If *vll is populated *vstr is set to NULL, so the caller * can always check the function return by checking the return value - * for C_OK and checking if vll (or vstr) is NULL. */ -int hashTypeGetValue(robj *o, sds field, unsigned char **vstr, unsigned int *vlen, long long *vll) { + * for C_OK and checking if vll (or vstr) is NULL. + * + * If *expiry is populated than the function will also provide the current field expiration time + * or EXPIRY_NONE in case the field has no expiration time defined. */ +int hashTypeGetValue(robj *o, sds field, unsigned char **vstr, unsigned int *vlen, long long *vll, long long *expiry) { if (o->encoding == OBJ_ENCODING_LISTPACK) { *vstr = NULL; - if (hashTypeGetFromListpack(o, field, vstr, vlen, vll) == 0) return C_OK; + if (hashTypeGetFromListpack(o, field, vstr, vlen, vll) == 0) { + if (expiry) *expiry = EXPIRY_NONE; + return C_OK; + } } else if (o->encoding == OBJ_ENCODING_HASHTABLE) { - sds value = hashTypeGetFromHashTable(o, field); - if (value != NULL) { + void *entry = NULL; + hashtableFind(o->ptr, field, &entry); + if (entry) { + sds value = entryGetValue(entry); + serverAssert(value != NULL); *vstr = (unsigned char *)value; *vlen = sdslen(value); + if (expiry) *expiry = entryGetExpiry(entry); + return C_OK; + } + } else { + serverPanic("Unknown hash encoding"); + } + return C_ERR; +} + +/* Returns the expiration time associated with the specified field. + * If the field is found C_OK is returned, otherwise C_ERR. + * The matching item expiration time is assigned to `expiry` memory location, if specified. + * In case the item has no assigned expiration time, -1 is returned. */ +int hashTypeGetExpiry(robj *o, sds field, long long *expiry) { + if (o->encoding == OBJ_ENCODING_LISTPACK) { + if (hashTypeExists(o, field)) { + if (expiry) *expiry = EXPIRY_NONE; + return C_OK; + } + } else if (o->encoding == OBJ_ENCODING_HASHTABLE) { + void *found_element = NULL; + if (hashtableFind(o->ptr, field, &found_element)) { + if (expiry) *expiry = entryGetExpiry(found_element); return C_OK; } } else { @@ -367,7 +271,7 @@ robj *hashTypeGetValueObject(robj *o, sds field) { unsigned int vlen; long long vll; - if (hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_ERR) return NULL; + if (hashTypeGetValue(o, field, &vstr, &vlen, &vll, NULL) == C_ERR) return NULL; if (vstr) return createStringObject((char *)vstr, vlen); else @@ -383,7 +287,7 @@ size_t hashTypeGetValueLength(robj *o, sds field) { unsigned int vlen = UINT_MAX; long long vll = LLONG_MAX; - if (hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK) len = vstr ? vlen : sdigits10(vll); + if (hashTypeGetValue(o, field, &vstr, &vlen, &vll, NULL) == C_OK) len = vstr ? vlen : sdigits10(vll); return len; } @@ -395,7 +299,7 @@ int hashTypeExists(robj *o, sds field) { unsigned int vlen = UINT_MAX; long long vll = LLONG_MAX; - return hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK; + return hashTypeGetValue(o, field, &vstr, &vlen, &vll, NULL) == C_OK; } /* Add a new field, overwrite the old with the new value if it already exists. @@ -416,14 +320,14 @@ int hashTypeExists(robj *o, sds field) { * semantics of copying the values if needed. * */ -int hashTypeSet(robj *o, sds field, sds value, int flags) { +int hashTypeSet(robj *o, sds field, sds value, long long expiry, int flags) { int update = 0; /* Check if the field is too long for listpack, and convert before adding the item. * This is needed for HINCRBY* case since in other commands this is handled early by * hashTypeTryConversion, so this check will be a NOP. */ if (o->encoding == OBJ_ENCODING_LISTPACK) { - if (sdslen(field) > server.hash_max_listpack_value || sdslen(value) > server.hash_max_listpack_value) + if (expiry > 0 || sdslen(field) > server.hash_max_listpack_value || sdslen(value) > server.hash_max_listpack_value) hashTypeConvert(o, OBJ_ENCODING_HASHTABLE); } @@ -465,22 +369,40 @@ int hashTypeSet(robj *o, sds field, sds value, int flags) { v = sdsdup(value); } + /* We have to ignore the TTL when setting an element. this is mainly in order to be able to update an existing expired + * entry and not have it remain in the hashtable with the same field/value. */ + hashTypeIgnoreTTL(o, true); hashtablePosition position; void *existing; if (hashtableFindPositionForInsert(ht, field, &position, &existing)) { /* does not exist yet */ - hashTypeEntry *entry = hashTypeCreateEntry(field, v); + entry *entry = entryCreate(field, v, expiry); hashtableInsertAtPosition(ht, entry, &position); + /* In case an expiry is set on the new entry, we need to track it */ + if (expiry != EXPIRY_NONE) { + hashTypeTrackEntry(o, entry); + } } else { /* exists: replace value */ - void *new_entry = hashTypeEntryReplaceValue(existing, v); + long long entry_expiry = entryGetExpiry(existing); + /* It is possible that the entry is already expired. In this case we can override it, but we need to make sure to expire it first + * and treat it like it did not exist. */ + bool is_expired = timestampIsExpired(entry_expiry); + if (!is_expired && flags & HASH_SET_KEEP_EXPIRY) { + /* In case the HASH_SET_KEEP_EXPIRY will force keeping the existing entry expiry. */ + expiry = entry_expiry; + } + void *new_entry = entryUpdate(existing, v, expiry); if (new_entry != existing) { /* It has been reallocated. */ int replaced = hashtableReplaceReallocatedEntry(ht, existing, new_entry); serverAssert(replaced); } - update = 1; + hashTypeTrackUpdateEntry(o, existing, new_entry, entry_expiry, expiry); + /* since we are exposed to expired entries, we must NOT reflect them as being "updated" */ + update = is_expired ? 0 : 1; } + hashTypeIgnoreTTL(o, false); } else { serverPanic("Unknown hash encoding"); } @@ -492,6 +414,110 @@ int hashTypeSet(robj *o, sds field, sds value, int flags) { return update; } +/* Set expiration on the specific HASH object 'o' item indicated by 'field'. + * returns -2 in case the provided object is NULL or the specific field was not found. + * returns 0 if the specified flag conditions has not been met. + * returns 1 if the expiration time was applied. + * returns 2 when 'expire' indicate a past Unix time. In this case, if the item exists in the HASH, it will also be expired. */ +static expiryModificationResult hashTypeSetExpire(robj *o, sds field, long long expiry, int flag) { + /* If no object we will return -2 */ + if (o == NULL) return EXPIRATION_MODIFICATION_NOT_EXIST; + + if (o->encoding == OBJ_ENCODING_LISTPACK) { + unsigned char *vstr; + unsigned int vlen; + long long vll; + /* We do not want to convert to listpack for no good reason. + * So we first check if the item exists.*/ + if (hashTypeGetFromListpack(o, field, &vstr, &vlen, &vll) < 0) { + return EXPIRATION_MODIFICATION_NOT_EXIST; + } + /* When listpack representation is used, we consider it as infinite TTL, + * so expire command with gt always fail the GT as well as existence(XX). + * Else, we already know we are going to set an expiration so we expend to hashtable encoding. */ + if (flag & EXPIRE_XX || flag & EXPIRE_GT) { + return EXPIRATION_MODIFICATION_FAILED_CONDITION; + } else { + hashTypeConvert(o, OBJ_ENCODING_HASHTABLE); + } + } + + /* we must be hashtable encoded */ + serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); + + hashtable *ht = o->ptr; + void **entry_ref = NULL; + if ((entry_ref = hashtableFindRef(ht, field))) { + entry *current_entry = *entry_ref; + long long current_expire = entryGetExpiry(current_entry); + if (flag) { + /* NX option is set, check no current expiry */ + if (flag & EXPIRE_NX) { + if (current_expire != EXPIRY_NONE) { + return EXPIRATION_MODIFICATION_FAILED_CONDITION; + } + } + + /* XX option is set, check current expiry */ + if (flag & EXPIRE_XX) { + if (current_expire == EXPIRY_NONE) { + return EXPIRATION_MODIFICATION_FAILED_CONDITION; + } + } + + /* GT option is set, check current expiry */ + if (flag & EXPIRE_GT) { + /* When current_expire is -1, we consider it as infinite TTL, + * so expire command with gt always fail the GT. */ + if (expiry <= current_expire || current_expire == EXPIRY_NONE) { + return EXPIRATION_MODIFICATION_FAILED_CONDITION; + } + } + + /* LT option is set, check current expiry */ + if (flag & EXPIRE_LT) { + /* When current_expire -1, we consider it as infinite TTL, + * so if there is an expiry on the key and it's not less than current, we fail the LT. */ + if (current_expire != EXPIRY_NONE && expiry >= current_expire) { + return EXPIRATION_MODIFICATION_FAILED_CONDITION; + } + } + } + *entry_ref = entrySetExpiry(current_entry, expiry); + hashTypeTrackUpdateEntry(o, current_entry, *entry_ref, current_expire, expiry); + return EXPIRATION_MODIFICATION_SUCCESSFUL; + } + return EXPIRATION_MODIFICATION_NOT_EXIST; // we did not find anything to do. return -2 +} + + +static expiryModificationResult hashTypePersist(robj *o, sds field) { + /* NULL object returns -2 */ + if (o == NULL || o->type != OBJ_HASH) return EXPIRATION_MODIFICATION_NOT_EXIST; + + if (o->encoding == OBJ_ENCODING_LISTPACK) { + if (hashTypeExists(o, field)) + /* When listpack representation is used, All items are without expiry */ + return EXPIRATION_MODIFICATION_FAILED; + else + return EXPIRATION_MODIFICATION_NOT_EXIST; // Did not find any element return -2 + } + + hashtable *ht = o->ptr; + void **entry_ref = NULL; + if ((entry_ref = hashtableFindRef(ht, field))) { + entry *current_entry = *entry_ref; + long long current_expire = entryGetExpiry(current_entry); + if (current_expire != EXPIRY_NONE) { + hashTypeUntrackEntry(o, current_entry); + *entry_ref = entryUpdate(current_entry, NULL, EXPIRY_NONE); + return EXPIRATION_MODIFICATION_SUCCESSFUL; + } + return EXPIRATION_MODIFICATION_FAILED; // If the found element has no expiration set, return -1 + } + return EXPIRATION_MODIFICATION_NOT_EXIST; // Did not find any element return -2 +} + /* Delete an element from a hash. * Return 1 on deleted and 0 on not found. */ int hashTypeDelete(robj *o, sds field) { @@ -513,7 +539,12 @@ int hashTypeDelete(robj *o, sds field) { } } else if (o->encoding == OBJ_ENCODING_HASHTABLE) { hashtable *ht = o->ptr; - deleted = hashtableDelete(ht, field); + void *entry = NULL; + deleted = hashtablePop(ht, field, &entry); + if (deleted) { + hashTypeUntrackEntry(o, entry); + entryFree(entry); + } } else { serverPanic("Unknown hash encoding"); } @@ -536,6 +567,7 @@ unsigned long hashTypeLength(const robj *o) { void hashTypeInitIterator(robj *subject, hashTypeIterator *hi) { hi->subject = subject; hi->encoding = subject->encoding; + hi->volatile_items_iter = false; if (hi->encoding == OBJ_ENCODING_LISTPACK) { hi->fptr = NULL; @@ -547,8 +579,27 @@ void hashTypeInitIterator(robj *subject, hashTypeIterator *hi) { } } +void hashTypeInitVolatileIterator(robj *subject, hashTypeIterator *hi) { + hi->subject = subject; + hi->encoding = subject->encoding; + hi->volatile_items_iter = true; + + if (hi->encoding == OBJ_ENCODING_LISTPACK) { + return; + } else if (hi->encoding == OBJ_ENCODING_HASHTABLE) { + vsetInitIterator(hashTypeGetVolatileSet(subject), &hi->viter); + } else { + serverPanic("Unknown hash encoding"); + } +} + void hashTypeResetIterator(hashTypeIterator *hi) { - if (hi->encoding == OBJ_ENCODING_HASHTABLE) hashtableResetIterator(&hi->iter); + if (hi->encoding == OBJ_ENCODING_HASHTABLE) { + if (!hi->volatile_items_iter) + hashtableResetIterator(&hi->iter); + else + vsetResetIterator(&hi->viter); + } } /* Move to the next entry in the hash. Return C_OK when the next entry @@ -558,6 +609,9 @@ int hashTypeNext(hashTypeIterator *hi) { unsigned char *zl; unsigned char *fptr, *vptr; + /* listpack encoding does not have volatile items, so return as iteration end */ + if (hi->volatile_items_iter) return C_ERR; + zl = hi->subject->ptr; fptr = hi->fptr; vptr = hi->vptr; @@ -581,7 +635,11 @@ int hashTypeNext(hashTypeIterator *hi) { hi->fptr = fptr; hi->vptr = vptr; } else if (hi->encoding == OBJ_ENCODING_HASHTABLE) { - if (!hashtableNext(&hi->iter, &hi->next)) return C_ERR; + if (!hi->volatile_items_iter) { + if (!hashtableNext(&hi->iter, &hi->next)) return C_ERR; + } else { + if (!vsetNext(&hi->viter, &hi->next)) return C_ERR; + } } else { serverPanic("Unknown hash encoding"); } @@ -611,9 +669,9 @@ sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what) { serverAssert(hi->encoding == OBJ_ENCODING_HASHTABLE); if (what & OBJ_HASH_FIELD) { - return hashTypeEntryGetField(hi->next); + return entryGetField(hi->next); } else { - return hashTypeEntryGetValue(hi->next); + return entryGetValue(hi->next); } } @@ -682,10 +740,10 @@ void hashTypeConvertListpack(robj *o, int enc) { while (hashTypeNext(&hi) != C_ERR) { sds field = hashTypeCurrentObjectNewSds(&hi, OBJ_HASH_FIELD); sds value = hashTypeCurrentObjectNewSds(&hi, OBJ_HASH_VALUE); - hashTypeEntry *entry = hashTypeCreateEntry(field, value); + entry *entry = entryCreate(field, value, EXPIRY_NONE); sdsfree(field); if (!hashtableAdd(ht, entry)) { - freeHashTypeEntry(entry); + entryFree(entry); hashTypeResetIterator(&hi); /* Needed for gcc ASAN */ serverLogHexDump(LL_WARNING, "listpack with dup elements dump", o->ptr, lpBytes(o->ptr)); serverPanic("Listpack corruption detected"); @@ -731,21 +789,22 @@ robj *hashTypeDup(robj *o) { } else if (o->encoding == OBJ_ENCODING_HASHTABLE) { hashtable *ht = hashtableCreate(&hashHashtableType); hashtableExpand(ht, hashtableSize((const hashtable *)o->ptr)); + hobj = createObject(OBJ_HASH, ht); + hobj->encoding = OBJ_ENCODING_HASHTABLE; hashTypeInitIterator(o, &hi); while (hashTypeNext(&hi) != C_ERR) { /* Extract a field-value pair from an original hash object.*/ sds field = hashTypeCurrentFromHashTable(&hi, OBJ_HASH_FIELD); sds value = hashTypeCurrentFromHashTable(&hi, OBJ_HASH_VALUE); - + long long expiry = entryGetExpiry(hi.next); /* Add a field-value pair to a new hash object. */ - hashTypeEntry *entry = hashTypeCreateEntry(field, sdsdup(value)); + entry *entry = entryCreate(field, sdsdup(value), expiry); hashtableAdd(ht, entry); + if (expiry != EXPIRY_NONE) + hashTypeTrackEntry(hobj, entry); } hashTypeResetIterator(&hi); - - hobj = createObject(OBJ_HASH, ht); - hobj->encoding = OBJ_ENCODING_HASHTABLE; } else { serverPanic("Unknown hash encoding"); } @@ -771,16 +830,33 @@ void hashReplyFromListpackEntry(client *c, listpackEntry *e) { * 'val' can be NULL in which case it's not extracted. */ static void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpackEntry *field, listpackEntry *val) { if (hashobj->encoding == OBJ_ENCODING_HASHTABLE) { - void *entry; - hashtableFairRandomEntry(hashobj->ptr, &entry); - sds sds_field = hashTypeEntryGetField(entry); - field->sval = (unsigned char *)sds_field; - field->slen = sdslen(sds_field); - if (val) { - sds sds_val = hashTypeEntryGetValue(entry); - val->sval = (unsigned char *)sds_val; - val->slen = sdslen(sds_val); + void *e = NULL; + int maxtries = 100; + hashTypeIgnoreTTL(hashobj, true); + while (!e) { + hashtableFairRandomEntry(hashobj->ptr, &e); + if (entryIsExpired(e) && --maxtries) { + e = NULL; + continue; + } else if (maxtries == 0) { + /* in case we will not be able to locate an entry which is not expired, we will just not return any + * result. An alternative would have been that we end up returning an expired entry. */ + field->sval = NULL; + if (val) val->sval = NULL; + break; + } + sds sds_field = entryGetField(e); + field->sval = (unsigned char *)sds_field; + field->slen = sdslen(sds_field); + if (val) { + entry *hash_entry = e; + sds sds_val = entryGetValue(hash_entry); + val->sval = (unsigned char *)sds_val; + val->slen = + sdslen(sds_val); + } } + hashTypeIgnoreTTL(hashobj, false); } else if (hashobj->encoding == OBJ_ENCODING_LISTPACK) { lpRandomPair(hashobj->ptr, hashsize, field, val); } else { @@ -793,61 +869,16 @@ static void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpac * Hash type commands *----------------------------------------------------------------------------*/ -void hsetnxCommand(client *c) { - robj *o; - if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; - - if (hashTypeExists(o, c->argv[2]->ptr)) { - addReply(c, shared.czero); - } else { - hashTypeTryConversion(o, c->argv, 2, 3); - hashTypeSet(o, c->argv[2]->ptr, c->argv[3]->ptr, HASH_SET_COPY); - signalModifiedKey(c, c->db, c->argv[1]); - notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); - server.dirty++; - addReply(c, shared.cone); - } -} - -void hsetCommand(client *c) { - int i, created = 0; - robj *o; - - if ((c->argc % 2) == 1) { - addReplyErrorArity(c); - return; - } - - if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; - hashTypeTryConversion(o, c->argv, 2, c->argc - 1); - - for (i = 2; i < c->argc; i += 2) created += !hashTypeSet(o, c->argv[i]->ptr, c->argv[i + 1]->ptr, HASH_SET_COPY); - - signalModifiedKey(c, c->db, c->argv[1]); - notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); - server.dirty += (c->argc - 2) / 2; - - /* HMSET (deprecated) and HSET return value is different. */ - char *cmdname = c->argv[0]->ptr; - if (cmdname[1] == 's' || cmdname[1] == 'S') { - /* HSET */ - addReplyLongLong(c, created); - } else { - /* HMSET */ - addReply(c, shared.ok); - } -} - void hincrbyCommand(client *c) { long long value, incr, oldvalue; robj *o; sds new; unsigned char *vstr; unsigned int vlen; - + long long expiry = EXPIRY_NONE; if (getLongLongFromObjectOrReply(c, c->argv[3], &incr, NULL) != C_OK) return; if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; - if (hashTypeGetValue(o, c->argv[2]->ptr, &vstr, &vlen, &value) == C_OK) { + if (hashTypeGetValue(o, c->argv[2]->ptr, &vstr, &vlen, &value, &expiry) == C_OK) { if (vstr) { if (string2ll((char *)vstr, vlen, &value) == 0) { addReplyError(c, "hash value is not an integer"); @@ -866,7 +897,7 @@ void hincrbyCommand(client *c) { } value += incr; new = sdsfromlonglong(value); - hashTypeSet(o, c->argv[2]->ptr, new, HASH_SET_TAKE_VALUE); + hashTypeSet(o, c->argv[2]->ptr, new, expiry, HASH_SET_TAKE_VALUE); signalModifiedKey(c, c->db, c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH, "hincrby", c->argv[1], c->db->id); server.dirty++; @@ -880,6 +911,7 @@ void hincrbyfloatCommand(client *c) { sds new; unsigned char *vstr; unsigned int vlen; + long long expiry = EXPIRY_NONE; if (getLongDoubleFromObjectOrReply(c, c->argv[3], &incr, NULL) != C_OK) return; if (isnan(incr) || isinf(incr)) { @@ -887,7 +919,8 @@ void hincrbyfloatCommand(client *c) { return; } if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; - if (hashTypeGetValue(o, c->argv[2]->ptr, &vstr, &vlen, &ll) == C_OK) { + + if (hashTypeGetValue(o, c->argv[2]->ptr, &vstr, &vlen, &ll, &expiry) == C_OK) { if (vstr) { if (string2ld((char *)vstr, vlen, &value) == 0) { addReplyError(c, "hash value is not a float"); @@ -909,7 +942,7 @@ void hincrbyfloatCommand(client *c) { char buf[MAX_LONG_DOUBLE_CHARS]; int len = ld2string(buf, sizeof(buf), value, LD_STR_HUMAN); new = sdsnewlen(buf, len); - hashTypeSet(o, c->argv[2]->ptr, new, HASH_SET_TAKE_VALUE); + hashTypeSet(o, c->argv[2]->ptr, new, expiry, HASH_SET_TAKE_VALUE); signalModifiedKey(c, c->db, c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH, "hincrbyfloat", c->argv[1], c->db->id); server.dirty++; @@ -935,7 +968,7 @@ static void addHashFieldToReply(client *c, robj *o, sds field) { unsigned int vlen = UINT_MAX; long long vll = LLONG_MAX; - if (hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK) { + if (hashTypeGetValue(o, field, &vstr, &vlen, &vll, NULL) == C_OK) { if (vstr) { addReplyBulkCBuffer(c, vstr, vlen); } else { @@ -950,7 +983,6 @@ void hgetCommand(client *c) { robj *o; if ((o = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp])) == NULL || checkType(c, o, OBJ_HASH)) return; - addHashFieldToReply(c, o, c->argv[2]->ptr); } @@ -961,12 +993,16 @@ void hmgetCommand(client *c) { /* Don't abort when the key cannot be found. Non-existing keys are empty * hashes, where HMGET should respond with a series of null bulks. */ o = lookupKeyRead(c->db, c->argv[1]); + if (checkType(c, o, OBJ_HASH)) return; addReplyArrayLen(c, c->argc - 2); for (i = 2; i < c->argc; i++) { addHashFieldToReply(c, o, c->argv[i]->ptr); } + if (o && hashTypeLength(o) == 0) { + dbDelete(c->db, c->argv[1]); + } } void hdelCommand(client *c) { @@ -974,7 +1010,6 @@ void hdelCommand(client *c) { int j, deleted = 0, keyremoved = 0; if ((o = lookupKeyWriteOrReply(c, c->argv[1], shared.czero)) == NULL || checkType(c, o, OBJ_HASH)) return; - for (j = 2; j < c->argc; j++) { if (hashTypeDelete(o, c->argv[j]->ptr)) { deleted++; @@ -1028,10 +1063,395 @@ static void addHashIteratorCursorToReply(writePreparedClient *wpc, hashTypeItera } } +void hsetnxCommand(client *c) { + robj *o; + if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; + if (hashTypeExists(o, c->argv[2]->ptr)) { + addReply(c, shared.czero); + } else { + hashTypeTryConversion(o, c->argv, 2, 3); + hashTypeSet(o, c->argv[2]->ptr, c->argv[3]->ptr, EXPIRY_NONE, HASH_SET_COPY | HASH_SET_KEEP_EXPIRY); + signalModifiedKey(c, c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); + server.dirty++; + addReply(c, shared.cone); + } +} + +void hsetCommand(client *c) { + int i, created = 0; + robj *o; + + if ((c->argc % 2) == 1) { + addReplyErrorArity(c); + return; + } + + if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; + hashTypeTryConversion(o, c->argv, 2, c->argc - 1); + + for (i = 2; i < c->argc; i += 2) created += !hashTypeSet(o, c->argv[i]->ptr, c->argv[i + 1]->ptr, EXPIRY_NONE, HASH_SET_COPY); + + signalModifiedKey(c, c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); + server.dirty += (c->argc - 2) / 2; + + /* HMSET (deprecated) and HSET return value is different. */ + char *cmdname = c->argv[0]->ptr; + if (cmdname[1] == 's' || cmdname[1] == 'S') { + /* HSET */ + addReplyLongLong(c, created); + } else { + /* HMSET */ + addReply(c, shared.ok); + } +} + +/* High-Level Algorithm of HSETEX Command: + * + * - Parse arguments and options: + * Parses optional flags such as NX, XX, FNX, FXX, KEEPTTL, and expiration time options. + * Ensures the number of specified fields matches the actual provided key-value pairs. + * + * - Check object existence conditions: + * Depending on NX/XX flags, verifies whether the hash key must or must not exist. + * Exits early with a zero reply if conditions aren't met. + * + * - Create the hash object if needed: + * If the key does not exist and creation is permitted, allocates a new hash. + * + * - Handle expiration logic: + * Computes the expiry time (relative or absolute). + * If the expiration is in the past, the command proceeds to delete the relevant fields. + * + * - Enforce per-field conditions: + * If FNX (field must not exist) or FXX (field must exist) flags are set, + * ensures all fields satisfy these conditions before proceeding. + * + * - Apply changes: + * Either deletes expired fields or sets fields with optional expiration. + * + * - Clean up and notify: + * Deletes the key if the hash becomes empty. + * Emits keyspace notifications for changes (see below). + * Modifies the command vector for AOF propagation if necessary. + * + * + * Return Value: + * - Returns integer 1 if all fields were successfully updated or deleted. + * - Returns integer 0 if no fields were updated due to condition failures. + * + * + * Keyspace Notifications (if enabled): + * - "hset" — Emitted when fields are added or updated. + * - "hexpire" — Emitted when expiration is set on fields. + * - "hexpired" — Emitted when fields are immediately expired and deleted. + * - "del" — Emitted if the entire key is removed (empty hash). + * + * + * Client Reply: + * - Integer reply: 1 if all changes succeeded, 0 if no changes occurred. */ +void hsetexCommand(client *c) { + robj *o; + robj *expire = NULL; + robj *comparison = NULL; + int unit = UNIT_SECONDS; + int flags = ARGS_NO_FLAGS; + int fields_index = 0; + long long num_fields = 0; + long long when = EXPIRY_NONE; + int i = 0; + int set_flags = HASH_SET_COPY, set_expired = 0; + int changes = 0; + robj **new_argv = NULL; + int new_argc = 0; + + for (; fields_index < c->argc; fields_index++) { + if (!strcasecmp(c->argv[fields_index]->ptr, "fields")) { + /* checking optional flags */ + if (parseExtendedCommandArgumentsOrReply(c, &flags, &unit, &expire, &comparison, COMMAND_HSET, fields_index++) != C_OK) return; + if (getLongLongFromObjectOrReply(c, c->argv[fields_index++], &num_fields, NULL) != C_OK) return; + break; + } + } + /* Check that the parsed fields number matches the real provided number of fields */ + if (!num_fields || num_fields != (c->argc - fields_index) / 2) { + addReplyError(c, "numfields should be greater than 0 and match the provided number of fields"); + return; + } + + o = lookupKeyWrite(c->db, c->argv[1]); + if (checkType(c, o, OBJ_HASH)) + return; + + if (o == NULL) { + o = createHashObject(); + dbAdd(c->db, c->argv[1], &o); + } + + /* Handle parsing and calculating the expiration time. */ + if (flags & ARGS_KEEPTTL) + set_flags |= HASH_SET_KEEP_EXPIRY; + else if (expire) { + long long basetime = (flags & (ARGS_EXAT | ARGS_PXAT)) ? 0 : commandTimeSnapshot(); + + if (convertExpireArgumentToUnixTime(c, expire, basetime, unit, &when) == C_ERR) + return; + + if (checkAlreadyExpired(when)) { + set_expired = 1; + } + } + + /* Check for all fields condition */ + if (flags & (ARGS_SET_FNX | ARGS_SET_FXX)) { + for (i = fields_index; i < c->argc; i += 2) { + if (((flags & ARGS_SET_FNX) && hashTypeExists(o, c->argv[i]->ptr)) || + ((flags & ARGS_SET_FXX) && !hashTypeExists(o, c->argv[i]->ptr))) { + addReply(c, shared.czero); + return; + } + } + } + + /* In case we are expiring all the elements prepare a new argv since we are going to delete all the expired fields. */ + if (set_expired) { + new_argv = zmalloc(sizeof(robj *) * (num_fields + 2)); + new_argv[new_argc++] = shared.hdel; + incrRefCount(shared.hdel); + new_argv[new_argc++] = c->argv[1]; + incrRefCount(c->argv[1]); + } + + for (i = fields_index; i < c->argc; i += 2) { + if (set_expired) { + if (hashTypeDelete(o, c->argv[i]->ptr)) { + new_argv[new_argc++] = c->argv[i]; + incrRefCount(c->argv[i]); + changes++; + } + } else { + hashTypeSet(o, c->argv[i]->ptr, c->argv[i + 1]->ptr, when, set_flags); + changes++; + } + } + + + if (changes) { + notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); + if (set_expired) { + replaceClientCommandVector(c, new_argc, new_argv); + /* We would like to reduce the number of hexpired events in case there are potential many expired fields. */ + notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); + } else if (expire) { + /* Propagate as HSETEX Key Value PXAT millisecond-timestamp if there is + * EX/PX/EXAT flag. */ + if (!(flags & ARGS_PXAT)) { + for (int i = 2; i < fields_index; i++) { + if (c->argv[i + 1] == expire) { + robj *milliseconds_obj = createStringObjectFromLongLong(when); + rewriteClientCommandArgument(c, i, shared.pxat); + rewriteClientCommandArgument(c, i + 1, milliseconds_obj); + decrRefCount(milliseconds_obj); + break; + } + } + } + notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); + } + signalModifiedKey(c, c->db, c->argv[1]); + /* Delete the object in case it was left empty */ + if (hashTypeLength(o) == 0) { + dbDelete(c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + } + server.dirty += changes; + } else { + /* If no changes were done we still need to free the new argv array and the refcount of the first argument. */ + if (set_expired) + decrRefCount(c->argv[1]); + if (new_argv) zfree(new_argv); + } + addReplyLongLong(c, changes == num_fields ? 1 : 0); +} + +/* High-Level Algorithm of HGETEX Command: + * + * - Parses the command for optional arguments, including expiration options, + * persistence flags, and the list of hash fields to retrieve. + * + * - Verifies that the number of fields specified matches the actual arguments, + * and ensures the key exists and is a valid hash type. + * + * - Computes the expiration behavior: + * - If `PERSIST` is provided, removes the expiration from the fields. + * - If an expiration time is specified, calculates it relative or absolute. + * - If already expired, deletes the fields immediately. + * - Otherwise, schedules new expiration timestamps. + * + * - Retrieves and replies with the values for each requested field. + * + * - For each field: + * - If expiration is due: deletes the field. + * - If an expiry is scheduled: updates the field's expiration timestamp. + * - If persisting: clears the field's expiration. + * + * - If any changes were made (deletes, expires, or persists): + * - Rewrites the command vector (for AOF and replication) using HDEL, HPEXPIREAT, or HPERSIST. + * - Issues keyspace notifications accordingly. + * - If the hash becomes empty as a result, deletes the key and notifies. + * + * + * Return Value: + * - Always replies with an array of values for the requested fields (including NULLs for missing fields). + * + * + * Keyspace Notifications (if enabled): + * - "hexpire" — When expiration is added to hash fields. + * - "hexpired" — When fields are immediately expired and deleted. + * - "hpersist" — When expiration is removed from fields. + * - "del" — If the hash becomes empty and is removed entirely. */ +void hgetexCommand(client *c) { + robj *o; + robj *expire = NULL; + robj *comparison = NULL; + int unit = UNIT_SECONDS; + int flags = ARGS_NO_FLAGS; + int fields_index = 0; + long long num_fields = -1; + long long when = EXPIRY_NONE; + int i = 0; + int set_expiry = 0, set_expired = 0, persist = 0; + int changes = 0; + robj **new_argv = NULL; + robj *milliseconds_obj = NULL, *numitems_obj = NULL; + int new_argc = 0; + int milliseconds_index = -1, numitems_index = -1; + + for (; fields_index < c->argc; fields_index++) { + if (!strcasecmp(c->argv[fields_index]->ptr, "fields")) { + /* checking optional flags */ + if (parseExtendedCommandArgumentsOrReply(c, &flags, &unit, &expire, &comparison, COMMAND_HGET, fields_index++) != C_OK) return; + if (getLongLongFromObjectOrReply(c, c->argv[fields_index++], &num_fields, NULL) != C_OK) return; + break; + } + } + + /* Check that the parsed fields number matches the real provided number of fields */ + if (!num_fields || num_fields != (c->argc - fields_index)) { + addReplyError(c, "numfields should be greater than 0 and match the provided number of fields"); + return; + } + + if ((o = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp])) == NULL || checkType(c, o, OBJ_HASH)) return; + + /* Handle parsing and calculating the expiration time. */ + if (flags & ARGS_PERSIST) { + persist = 1; + } else if (expire) { + long long basetime = (flags & (ARGS_EXAT | ARGS_PXAT)) ? 0 : commandTimeSnapshot(); + + if (convertExpireArgumentToUnixTime(c, expire, basetime, unit, &when) == C_ERR) + return; + + if (checkAlreadyExpired(when)) { + set_expired = 1; + when = 0; + } else { + set_expiry = 1; + } + } + + initDeferredReplyBuffer(c); + + addReplyArrayLen(c, num_fields); + /* This command is never propagated as is. It is either propagated as HDEL, HPEXPIREAT or PERSIST. + * This why it doesn't need special handling in feedAppendOnlyFile to convert relative expire time to absolute one. */ + if (set_expiry || set_expired || persist) { + /* allocate a new client argv for replicating the command. */ + new_argv = zmalloc(sizeof(robj *) * (num_fields + 5)); + if (set_expired) + new_argv[new_argc++] = shared.hdel; + else if (persist) + new_argv[new_argc++] = shared.hpersist; + else + new_argv[new_argc++] = shared.hpexpireat; + + new_argv[new_argc++] = c->argv[1]; + incrRefCount(c->argv[1]); + + if (set_expiry) { + new_argv[new_argc++] = NULL; // placeholder for the expiration time + milliseconds_index = new_argc - 1; + } + + if (set_expiry || persist) { + new_argv[new_argc++] = shared.fields; + new_argv[new_argc++] = NULL; // placeholder for the number of objects + numitems_index = new_argc - 1; + } + } + for (i = fields_index; i < c->argc; i++) { + int changed = 0; + addHashFieldToReply(c, o, c->argv[i]->ptr); + if (set_expired) { + changed = hashTypeDelete(o, c->argv[i]->ptr); + } else if (set_expiry) { + changed = (hashTypeSetExpire(o, c->argv[i]->ptr, when, 0) == EXPIRATION_MODIFICATION_SUCCESSFUL) ? 1 : 0; + } else if (persist) { + changed = (hashTypePersist(o, c->argv[i]->ptr) == EXPIRATION_MODIFICATION_SUCCESSFUL) ? 1 : 0; + } + if (changed) { + changes++; + new_argv[new_argc++] = c->argv[i]; + incrRefCount(c->argv[i]); + } + } + + /* rewrite the command vector and persist in case there are changes. + * Also notify keyspace notifications and signal the key was changed. */ + if (changes) { + if (milliseconds_index > 0) { + milliseconds_obj = createStringObjectFromLongLong(when); + new_argv[milliseconds_index] = milliseconds_obj; + incrRefCount(milliseconds_obj); + } + if (numitems_index > 0) { + numitems_obj = createStringObjectFromLongLong(changes); + new_argv[numitems_index] = numitems_obj; + incrRefCount(numitems_obj); + } + replaceClientCommandVector(c, new_argc, new_argv); + if (set_expired) + notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); + else + notifyKeyspaceEvent(NOTIFY_HASH, set_expiry ? "hexpire" : "hpersist", c->argv[1], c->db->id); + if (milliseconds_obj) decrRefCount(milliseconds_obj); + if (numitems_obj) decrRefCount(numitems_obj); + + server.dirty += changes; + signalModifiedKey(c, c->db, c->argv[1]); + + /* Delete the object in case it was left empty */ + if (hashTypeLength(o) == 0) { + dbDelete(c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + } + } else { + /* If no changes were done we still need to free the new argv array and the refcount of the first argument. */ + if (set_expiry || set_expired || persist) { + decrRefCount(c->argv[1]); + } + if (new_argv) zfree(new_argv); + } + + commitDeferredReplyBuffer(c, 1); +} + void genericHgetallCommand(client *c, int flags) { robj *o; hashTypeIterator hi; - int length, count = 0; + int count = 0; robj *emptyResp = (flags & OBJ_HASH_FIELD && flags & OBJ_HASH_VALUE) ? shared.emptymap[c->resp] : shared.emptyarray; if ((o = lookupKeyReadOrReply(c, c->argv[1], emptyResp)) == NULL || checkType(c, o, OBJ_HASH)) return; @@ -1040,13 +1460,7 @@ void genericHgetallCommand(client *c, int flags) { if (!wpc) return; /* We return a map if the user requested fields and values, like in the * HGETALL case. Otherwise to use a flat array makes more sense. */ - length = hashTypeLength(o); - if (flags & OBJ_HASH_FIELD && flags & OBJ_HASH_VALUE) { - addWritePreparedReplyMapLen(wpc, length); - } else { - addWritePreparedReplyArrayLen(wpc, length); - } - + void *replylen = addReplyDeferredLen(c); hashTypeInitIterator(o, &hi); while (hashTypeNext(&hi) != C_ERR) { if (flags & OBJ_HASH_FIELD) { @@ -1060,10 +1474,13 @@ void genericHgetallCommand(client *c, int flags) { } hashTypeResetIterator(&hi); - /* Make sure we returned the right number of elements. */ - if (flags & OBJ_HASH_FIELD && flags & OBJ_HASH_VALUE) count /= 2; - serverAssert(count == length); + if (flags & OBJ_HASH_FIELD && flags & OBJ_HASH_VALUE) { + setDeferredMapLen(c, replylen, count /= 2); + count /= 2; + } else { + setDeferredArrayLen(c, replylen, count); + } } void hkeysCommand(client *c) { @@ -1081,7 +1498,6 @@ void hgetallCommand(client *c) { void hexistsCommand(client *c) { robj *o; if ((o = lookupKeyReadOrReply(c, c->argv[1], shared.czero)) == NULL || checkType(c, o, OBJ_HASH)) return; - addReply(c, hashTypeExists(o, c->argv[2]->ptr) ? shared.cone : shared.czero); } @@ -1111,6 +1527,281 @@ static void hrandfieldReplyWithListpack(writePreparedClient *wpc, unsigned int c } } + +/* High-Level Algorithm of hexpireGenericCommand (used by HEXPIRE, HPEXPIRE, HEXPIREAT, HPEXPIREAT): + * + * - Parses optional flags and the number of hash fields to apply expiration to. + * + * - Converts the given expiration time (relative or absolute) into a Unix timestamp. + * + * - Determines if the given timestamp is already expired: + * - If so, immediately deletes the specified hash fields. + * - If not, updates their expiration metadata. + * + * - Responds with an array of integers: + * - 1 if the expiration was set. + * - 0 if it was unchanged (due to provided condition check failing). + * - -2 if the field does not exist or the hash is empty. + * - 2 if the field was immediately expired and deleted due to provided expiration is 0 or in the past. + * + * - If fields were deleted due to expiration: + * - Rewrites the command as HDEL for replication/AOF. + * - Emits a "hexpired" keyspace event. + * + * - If expiration was newly set: + * - May rewrite the command as HPEXPIREAT if needed. + * - Emits a "hexpire" keyspace event. + * + * - If the hash becomes empty after deletions: + * - Deletes the hash key. + * - Emits a "del" event for the key. + * + * Return Value: + * - An array of integers corresponding to the result for each field. + * + * Keyspace Notifications (if enabled): + * - "hexpired" — when fields are immediately expired and deleted. + * - "hexpire" — when fields receive new expiration timestamps. + * - "del" — when the hash key becomes empty and is removed. */ +void hexpireGenericCommand(client *c, long long basetime, int unit) { + robj *key = c->argv[1], *param = c->argv[2]; + long long when; /* unix time in milliseconds when the key will expire. */ + int flag = 0; + int fields_index = 3; + long long num_fields = 0; + int i, expired = 0, updated = 0; + int set_expired = 0; + robj **new_argv = NULL; + int new_argc = 0; + + for (; fields_index < c->argc; fields_index++) { + if (!strcasecmp(c->argv[fields_index]->ptr, "fields")) { + /* checking optional flags */ + if (parseExtendedExpireArgumentsOrReply(c, &flag, fields_index++) != C_OK) return; + if (getLongLongFromObjectOrReply(c, c->argv[fields_index++], &num_fields, NULL) != C_OK) return; + break; + } + } + + /* Check that the parsed fields number matches the real provided number of fields */ + if (!num_fields || num_fields != (c->argc - fields_index)) { + addReplyError(c, "numfields should be greater than 0 and match the provided number of fields"); + return; + } + + if (convertExpireArgumentToUnixTime(c, param, basetime, unit, &when) == C_ERR) + return; + + if (checkAlreadyExpired(when)) + set_expired = 1; + + robj *obj = lookupKeyWrite(c->db, key); + + /* Non HASH type return simple error */ + if (checkType(c, obj, OBJ_HASH)) { + return; + } + /* From this point we would return array reply */ + addReplyArrayLen(c, num_fields); + + /* In case we are expiring all the elements prepare a new argv since we are going to delete all the expired fields. */ + if (set_expired) { + new_argv = zmalloc(sizeof(robj *) * (num_fields + 3)); + new_argv[new_argc++] = shared.hdel; + incrRefCount(shared.hdel); + new_argv[new_argc++] = c->argv[1]; + incrRefCount(c->argv[1]); + } + + for (i = 0; i < num_fields; i++) { + expiryModificationResult result = EXPIRATION_MODIFICATION_NOT_EXIST; + if (set_expired) { + if (obj && hashTypeDelete(obj, c->argv[fields_index + i]->ptr)) { + /* In case we deleted the field, add it to the new hdel command vector. */ + new_argv[new_argc++] = c->argv[fields_index + i]; + incrRefCount(c->argv[fields_index + i]); + result = EXPIRATION_MODIFICATION_EXPIRE_ASAP; + expired++; + } + } else { + result = hashTypeSetExpire(obj, c->argv[fields_index + i]->ptr, when, flag); + if (result == EXPIRATION_MODIFICATION_SUCCESSFUL) updated++; + } + addReplyLongLong(c, result); + } + + if (expired || updated) { + if (expired) { + replaceClientCommandVector(c, new_argc, new_argv); + /* We would like to reduce the number of hexpired events in case there are potential many expired fields. */ + notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); + } else if (updated) { + /* Propagate as HPEXPIREAT millisecond-timestamp + * Only rewrite the command arg if not already HPEXPIREAT */ + if (c->cmd->proc != hpexpireatCommand) { + rewriteClientCommandArgument(c, 0, shared.hpexpireat); + } + + /* Avoid creating a string object when it's the same as argv[2] parameter */ + if (basetime != 0 || unit == UNIT_SECONDS) { + robj *when_obj = createStringObjectFromLongLong(when); + rewriteClientCommandArgument(c, 2, when_obj); + decrRefCount(when_obj); + } + notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); + } + server.dirty += (expired + updated); // in case there was a change increment the dirty + signalModifiedKey(c, c->db, c->argv[1]); + /* Delete the object in case it was left empty */ + if (hashTypeLength(obj) == 0) { + dbDelete(c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + } + } +} + +void hexpireCommand(client *c) { + hexpireGenericCommand(c, commandTimeSnapshot(), UNIT_SECONDS); +} + +void hexpireatCommand(client *c) { + hexpireGenericCommand(c, 0, UNIT_SECONDS); +} + +void hpexpireCommand(client *c) { + hexpireGenericCommand(c, commandTimeSnapshot(), UNIT_MILLISECONDS); +} + +void hpexpireatCommand(client *c) { + hexpireGenericCommand(c, 0, UNIT_MILLISECONDS); +} + +/* High-Level Algorithm of HPERSIST Command: + * + * - Expects a key and a list of hash fields whose expiration metadata should be removed. + * - Validates that the number of provided fields matches the declared count. + * + * - For each specified field attempts to remove any existing expiration. + * - Replies to the client with an array of integers, each representing the result of persistence for one field: + * - 1 if the expiration for the field was removed. + * - -1 if the field exists, but has no expiration time set. + * - -2 if the field does not exist or the hash is empty. + * + * - If any expirations were removed: + * - Marks the key as modified (for replication/AOF consistency). + * - Emits a "hpersist" keyspace notification. + * + * Keyspace Notifications (if enabled): + * - "hpersist" — emitted once if any field had its expiration removed. */ +void hpersistCommand(client *c) { + int fields_index = 4, result = 0, changes = 0; + long long num_fields = 0; + + if (getLongLongFromObjectOrReply(c, c->argv[fields_index - 1], &num_fields, NULL) != C_OK) return; + + /* Check that the parsed fields number matches the real provided number of fields */ + if (!num_fields || num_fields != (c->argc - fields_index)) { + addReplyError(c, "numfields should be greater than 0 and match the provided number of fields"); + return; + } + + /* From this point we would return array reply */ + addReplyArrayLen(c, num_fields); + + robj *hash = lookupKeyWrite(c->db, c->argv[1]); + if (checkType(c, hash, OBJ_HASH)) + return; + + for (int i = 0; i < num_fields; i++, fields_index++) { + result = hashTypePersist(hash, c->argv[fields_index]->ptr); + if (result == EXPIRATION_MODIFICATION_SUCCESSFUL) { + server.dirty++; + changes++; + } + addReplyLongLong(c, result); + } + if (changes) { + notifyKeyspaceEvent(NOTIFY_HASH, "hpersist", c->argv[1], c->db->id); + signalModifiedKey(c, c->db, c->argv[1]); + } +} + +/* High-Level Algorithm of HTTL / HPTTL / HEXPIRETIME / HPEXPIRETIME Commands: + * + * - These commands return the remaining time to live (TTL) or absolute expiry time + * of one or more fields in a hash. + * + * - HTTL / HPTTL: + * - Return relative TTL of each field (in seconds or milliseconds). + * - TTL is computed as the difference between current time and expiry time. + * + * - HEXPIRETIME / HPEXPIRETIME: + * - Return the absolute Unix time at which each field will expire + * (in seconds or milliseconds, depending on the variant). + * + * For each field requested: + * - If the field or hash does not exist: reply with -2. + * - If the field exists but has no expiration: reply with -1. + * - If the field has an expiration: + * - HTTL / HPTTL: reply with remaining TTL (clamped at 0 if negative). + * - HEXPIRETIME / HPEXPIRETIME: reply with the absolute expiry time. + * + * Return Value: + * - An array of integers, one per field: + * - -2 = hash or field does not exist. + * - -1 = field exists but has no expiration. + * - >=0 = TTL or expiry time, depending on the command variant. + * + * Keyspace Notifications: + * - None emitted; this command is read-only. */ +void httlGenericCommand(client *c, long long basetime, int unit) { + int fields_index = 4; + long long num_fields = 0, result = -2; + + if (getLongLongFromObjectOrReply(c, c->argv[fields_index - 1], &num_fields, NULL) != C_OK) return; + + /* Check that the parsed fields number matches the real provided number of fields */ + if (!num_fields || num_fields != (c->argc - fields_index)) { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + + robj *hash = lookupKeyRead(c->db, c->argv[1]); + + if (checkType(c, hash, OBJ_HASH)) return; + + /* From this point we would return array reply */ + addReplyArrayLen(c, num_fields); + + for (int i = 0; i < num_fields; i++) { + if (!hash || hashTypeGetExpiry(hash, c->argv[fields_index + i]->ptr, &result) == C_ERR) { + addReplyLongLong(c, -2); + } else if (result == EXPIRY_NONE) { + addReplyLongLong(c, -1); + } else { + result = result - basetime; + if (result < 0) result = 0; + addReplyLongLong(c, unit == UNIT_MILLISECONDS ? result : ((result + 500) / 1000)); + } + } +} + +void httlCommand(client *c) { + httlGenericCommand(c, commandTimeSnapshot(), UNIT_SECONDS); +} + +void hpttlCommand(client *c) { + httlGenericCommand(c, commandTimeSnapshot(), UNIT_MILLISECONDS); +} + +void hexpiretimeCommand(client *c) { + httlGenericCommand(c, 0, UNIT_SECONDS); +} + +void hpexpiretimeCommand(client *c) { + httlGenericCommand(c, 0, UNIT_MILLISECONDS); +} + /* How many times bigger should be the hash compared to the requested size * for us to not use the "remove elements" strategy? Read later in the * implementation for more info. */ @@ -1144,26 +1835,30 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { writePreparedClient *wpc = prepareClientForFutureWrites(c); if (!wpc) return; + + void *replylen = addReplyDeferredLen(c); + unsigned long reply_size = 0; + /* CASE 1: The count was negative, so the extraction method is just: * "return N random elements" sampling the whole set every time. * This case is trivial and can be served without auxiliary data * structures. This case is the only one that also needs to return the * elements in random order. */ if (!uniq || count == 1) { - if (withvalues && c->resp == 2) - addWritePreparedReplyArrayLen(wpc, count * 2); - else - addWritePreparedReplyArrayLen(wpc, count); if (hash->encoding == OBJ_ENCODING_HASHTABLE) { while (count--) { - void *entry; - hashtableFairRandomEntry(hash->ptr, &entry); - sds field = hashTypeEntryGetField(entry); - sds value = hashTypeEntryGetValue(entry); + listpackEntry field, value; + hashTypeRandomElement(hash, size, &field, &value); + + /* In case we were unable to locate random element, it is probably because there is no such element + * since all elements are expired. */ + if (!field.sval) break; + if (withvalues && c->resp > 2) addWritePreparedReplyArrayLen(wpc, 2); - addWritePreparedReplyBulkCBuffer(wpc, field, sdslen(field)); - if (withvalues) addWritePreparedReplyBulkCBuffer(wpc, value, sdslen(value)); + addWritePreparedReplyBulkCBuffer(wpc, field.sval, field.slen); + if (withvalues) addWritePreparedReplyBulkCBuffer(wpc, value.sval, value.slen); if (c->flag.close_asap) break; + reply_size++; } } else if (hash->encoding == OBJ_ENCODING_LISTPACK) { listpackEntry *fields, *vals = NULL; @@ -1175,6 +1870,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { while (count) { sample_count = count > limit ? limit : count; count -= sample_count; + reply_size += sample_count; lpRandomPairs(hash->ptr, sample_count, fields, vals); hrandfieldReplyWithListpack(wpc, sample_count, fields, vals); if (c->flag.close_asap) break; @@ -1182,16 +1878,9 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { zfree(fields); zfree(vals); } - return; + goto set_deferred_response; } - /* Initiate reply count, RESP3 responds with nested array, RESP2 with flat one. */ - long reply_size = count < size ? count : size; - if (withvalues && c->resp == 2) - addWritePreparedReplyArrayLen(wpc, reply_size * 2); - else - addWritePreparedReplyArrayLen(wpc, reply_size); - /* CASE 2: * The number of requested elements is greater than the number of * elements inside the hash: simply return the whole hash. */ @@ -1202,11 +1891,14 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { if (withvalues && c->resp > 2) addWritePreparedReplyArrayLen(wpc, 2); addHashIteratorCursorToReply(wpc, &hi, OBJ_HASH_FIELD); if (withvalues) addHashIteratorCursorToReply(wpc, &hi, OBJ_HASH_VALUE); + reply_size++; } hashTypeResetIterator(&hi); - return; + + goto set_deferred_response; } + /* CASE 2.5 listpack only. Sampling unique elements, in non-random order. * Listpack encoded hashes are meant to be relatively small, so * HRANDFIELD_SUB_STRATEGY_MUL isn't necessary and we rather not make @@ -1216,6 +1908,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { * And it is inefficient to repeatedly pick one random element from a * listpack in CASE 4. So we use this instead. */ if (hash->encoding == OBJ_ENCODING_LISTPACK) { + reply_size = count < size ? count : size; listpackEntry *fields, *vals = NULL; fields = zmalloc(sizeof(listpackEntry) * count); if (withvalues) vals = zmalloc(sizeof(listpackEntry) * count); @@ -1223,7 +1916,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { hrandfieldReplyWithListpack(wpc, count, fields, vals); zfree(fields); zfree(vals); - return; + goto set_deferred_response; } /* CASE 3: @@ -1247,24 +1940,25 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { while (hashtableNext(&iter, &entry)) { int res = hashtableAdd(ht, entry); serverAssert(res); + reply_size++; } - serverAssert(hashtableSize(ht) == size); + serverAssert(hashtableSize(ht) == reply_size); hashtableResetIterator(&iter); /* Remove random elements to reach the right count. */ - while (size > count) { + while (reply_size > count) { void *element; hashtableFairRandomEntry(ht, &element); hashtableDelete(ht, element); - size--; + reply_size--; } /* Reply with what's in the temporary hashtable and release memory */ hashtableInitIterator(&iter, ht, 0); void *next; while (hashtableNext(&iter, &next)) { - sds field = hashTypeEntryGetField(next); - sds value = hashTypeEntryGetValue(next); + sds field = entryGetField(next); + sds value = entryGetValue(next); if (withvalues && c->resp > 2) addWritePreparedReplyArrayLen(wpc, 2); addWritePreparedReplyBulkCBuffer(wpc, field, sdslen(field)); if (withvalues) addWritePreparedReplyBulkCBuffer(wpc, value, sdslen(value)); @@ -1287,8 +1981,12 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { while (added < count) { hashTypeRandomElement(hash, size, &field, withvalues ? &value : NULL); - /* Try to add the object to the hashtable. If it already exists - * free it, otherwise increment the number of objects we have + /* In case we were unable to locate random element, it is probably because there is no such element + * since all elements are expired. */ + if (!field.sval) break; + + /* Try to add the object to the hashtable. If expired, stop adding (there are probably non left). + * If it already exists free it, otherwise increment the number of objects we have * in the result hashtable. */ sds sfield = hashSdsFromListpackEntry(&field); if (!hashtableAdd(ht, sfield)) { @@ -1305,7 +2003,15 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { /* Release memory */ hashtableRelease(ht); + reply_size = added; } + +set_deferred_response: + /* Set the reply count, RESP3 responds with nested array, RESP2 with flat one. */ + if (withvalues && c->resp == 2) + setDeferredArrayLen(c, replylen, reply_size * 2); + else + setDeferredArrayLen(c, replylen, reply_size); } /* HRANDFIELD key [ [WITHVALUES]] */ @@ -1328,6 +2034,7 @@ void hrandfieldCommand(client *c) { } } hrandfieldWithCountCommand(c, l, withvalues); + return; } @@ -1335,7 +2042,6 @@ void hrandfieldCommand(client *c) { if ((hash = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp])) == NULL || checkType(c, hash, OBJ_HASH)) { return; } - hashTypeRandomElement(hash, hashTypeLength(hash), &ele, NULL); hashReplyFromListpackEntry(c, &ele); } diff --git a/src/t_string.c b/src/t_string.c index ef3e4bccde..a8c46a8a91 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -55,6 +55,9 @@ static int checkStringLength(client *c, long long size, long long append) { return C_OK; } +/* Forward declaration */ +static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds); + /* The setGenericCommand() function implements the SET operation with different * options and variants. This function is called in order to implement the * following commands: SET, SETEX, PSETEX, SETNX, GETSET. @@ -70,24 +73,6 @@ static int checkStringLength(client *c, long long size, long long append) { * * If ok_reply is NULL "+OK" is used. * If abort_reply is NULL, "$-1" is used. */ - -#define OBJ_NO_FLAGS 0 -#define OBJ_SET_NX (1 << 0) /* Set if key not exists. */ -#define OBJ_SET_XX (1 << 1) /* Set if key exists. */ -#define OBJ_EX (1 << 2) /* Set if time in seconds is given */ -#define OBJ_PX (1 << 3) /* Set if time in ms in given */ -#define OBJ_KEEPTTL (1 << 4) /* Set and keep the ttl */ -#define OBJ_SET_GET (1 << 5) /* Set if want to get key before set */ -#define OBJ_EXAT (1 << 6) /* Set if timestamp in second is given */ -#define OBJ_PXAT (1 << 7) /* Set if timestamp in ms is given */ -#define OBJ_PERSIST (1 << 8) /* Set if we need to remove the ttl */ -#define OBJ_SET_IFEQ (1 << 9) /* Set if we need compare and set */ -#define OBJ_ARGV3 (1 << 10) /* Set if the value is at argv[3]; otherwise it's \ - * at argv[2]. */ - -/* Forward declaration */ -static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds); - void setGenericCommand(client *c, int flags, robj *key, @@ -105,7 +90,7 @@ void setGenericCommand(client *c, return; } - if (flags & OBJ_SET_GET) { + if (flags & ARGS_SET_GET) { initDeferredReplyBuffer(c); if (getGenericCommand(c) == C_ERR) goto cleanup; } @@ -114,26 +99,26 @@ void setGenericCommand(client *c, found = existing_value != NULL; /* Handle the IFEQ conditional check */ - if (flags & OBJ_SET_IFEQ && found) { - if (!(flags & OBJ_SET_GET) && checkType(c, existing_value, OBJ_STRING)) { + if (flags & ARGS_SET_IFEQ && found) { + if (!(flags & ARGS_SET_GET) && checkType(c, existing_value, OBJ_STRING)) { goto cleanup; } if (compareStringObjects(existing_value, comparison) != 0) { - if (!(flags & OBJ_SET_GET)) { + if (!(flags & ARGS_SET_GET)) { addReply(c, abort_reply ? abort_reply : shared.null[c->resp]); } goto cleanup; } - } else if (flags & OBJ_SET_IFEQ && !found) { - if (!(flags & OBJ_SET_GET)) { + } else if (flags & ARGS_SET_IFEQ && !found) { + if (!(flags & ARGS_SET_GET)) { addReply(c, abort_reply ? abort_reply : shared.null[c->resp]); } goto cleanup; } - if ((flags & OBJ_SET_NX && found) || (flags & OBJ_SET_XX && !found)) { - if (!(flags & OBJ_SET_GET)) { + if ((flags & ARGS_SET_NX && found) || (flags & ARGS_SET_XX && !found)) { + if (!(flags & ARGS_SET_GET)) { addReply(c, abort_reply ? abort_reply : shared.null[c->resp]); } goto cleanup; @@ -144,13 +129,13 @@ void setGenericCommand(client *c, * If the key already exists, delete it. */ if (expire && checkAlreadyExpired(milliseconds)) { if (found) deleteExpiredKeyFromOverwriteAndPropagate(c, key); - if (!(flags & OBJ_SET_GET)) addReply(c, shared.ok); + if (!(flags & ARGS_SET_GET)) addReply(c, shared.ok); goto cleanup; } /* When expire is not NULL, we avoid deleting the TTL so it can be updated later instead of being deleted and then * created again. */ - setkey_flags |= ((flags & OBJ_KEEPTTL) || expire) ? SETKEY_KEEPTTL : 0; + setkey_flags |= ((flags & ARGS_KEEPTTL) || expire) ? SETKEY_KEEPTTL : 0; setkey_flags |= found ? SETKEY_ALREADY_EXIST : SETKEY_DOESNT_EXIST; setKey(c, c->db, key, &val, setkey_flags); @@ -158,7 +143,7 @@ void setGenericCommand(client *c, /* By setting the reallocated value back into argv, we can avoid duplicating * a large string value when adding it to the db. */ - c->argv[(flags & OBJ_ARGV3) ? 3 : 2] = val; + c->argv[(flags & ARGS_ARGV3) ? 3 : 2] = val; incrRefCount(val); server.dirty++; @@ -167,7 +152,7 @@ void setGenericCommand(client *c, if (expire) { /* Propagate as SET Key Value PXAT millisecond-timestamp if there is * EX/PX/EXAT flag. */ - if (!(flags & OBJ_PXAT)) { + if (!(flags & ARGS_PXAT)) { robj *milliseconds_obj = createStringObjectFromLongLong(milliseconds); rewriteClientCommandVector(c, 5, shared.set, key, val, shared.pxat, milliseconds_obj); decrRefCount(milliseconds_obj); @@ -175,13 +160,13 @@ void setGenericCommand(client *c, notifyKeyspaceEvent(NOTIFY_GENERIC, "expire", key, c->db->id); } - if (!(flags & OBJ_SET_GET)) { + if (!(flags & ARGS_SET_GET)) { addReply(c, ok_reply ? ok_reply : shared.ok); } /* Propagate without the GET argument (Isn't needed if we had expire since in that case we completely re-written the * command argv) */ - if ((flags & OBJ_SET_GET) && !expire) { + if ((flags & ARGS_SET_GET) && !expire) { int argc = 0; int j; robj **argv = zmalloc((c->argc - 1) * sizeof(robj *)); @@ -227,7 +212,7 @@ static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int if (unit == UNIT_SECONDS) *milliseconds *= 1000; - if ((flags & OBJ_PX) || (flags & OBJ_EX)) { + if ((flags & ARGS_PX) || (flags & ARGS_EX)) { *milliseconds += commandTimeSnapshot(); } @@ -240,118 +225,6 @@ static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int return C_OK; } -#define COMMAND_GET 0 -#define COMMAND_SET 1 -/* - * The parseExtendedStringArgumentsOrReply() function performs the common validation for extended - * string arguments used in SET and GET command. - * - * Get specific commands - PERSIST/DEL - * Set specific commands - XX/NX/GET/IFEQ - * Common commands - EX/EXAT/PX/PXAT/KEEPTTL - * - * Function takes pointers to client, flags, unit, pointer to pointer of expire obj if needed - * to be determined and command_type which can be COMMAND_GET or COMMAND_SET. - * - * If there are any syntax violations C_ERR is returned else C_OK is returned. - * - * Input flags are updated upon parsing the arguments. Unit and expire are updated if there are any - * EX/EXAT/PX/PXAT arguments. Unit is updated to millisecond if PX/PXAT is set. - */ -int parseExtendedStringArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, robj **compare_val, int command_type) { - int j = command_type == COMMAND_GET ? 2 : 3; - for (; j < c->argc; j++) { - char *opt = c->argv[j]->ptr; - robj *next = (j == c->argc - 1) ? NULL : c->argv[j + 1]; - - /* clang-format off */ - if ((opt[0] == 'n' || opt[0] == 'N') && - (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_SET_XX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) - { - *flags |= OBJ_SET_NX; - } else if ((opt[0] == 'x' || opt[0] == 'X') && - (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_SET_NX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) - { - *flags |= OBJ_SET_XX; - } else if ((opt[0] == 'i' || opt[0] == 'I') && - (opt[1] == 'f' || opt[1] == 'F') && - (opt[2] == 'e' || opt[2] == 'E') && - (opt[3] == 'q' || opt[3] == 'Q') && opt[4] == '\0' && - next && !(*flags & OBJ_SET_NX || *flags & OBJ_SET_XX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) - { - *flags |= OBJ_SET_IFEQ; - *compare_val = next; - j++; - } else if ((opt[0] == 'g' || opt[0] == 'G') && - (opt[1] == 'e' || opt[1] == 'E') && - (opt[2] == 't' || opt[2] == 'T') && opt[3] == '\0' && - (command_type == COMMAND_SET)) - { - *flags |= OBJ_SET_GET; - } else if (!strcasecmp(opt, "KEEPTTL") && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && (command_type == COMMAND_SET)) - { - *flags |= OBJ_KEEPTTL; - } else if (!strcasecmp(opt,"PERSIST") && (command_type == COMMAND_GET) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && - !(*flags & OBJ_KEEPTTL)) - { - *flags |= OBJ_PERSIST; - } else if ((opt[0] == 'e' || opt[0] == 'E') && - (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EXAT) && !(*flags & OBJ_PX) && - !(*flags & OBJ_PXAT) && next) - { - *flags |= OBJ_EX; - *expire = next; - j++; - } else if ((opt[0] == 'p' || opt[0] == 'P') && - (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PXAT) && next) - { - *flags |= OBJ_PX; - *unit = UNIT_MILLISECONDS; - *expire = next; - j++; - } else if ((opt[0] == 'e' || opt[0] == 'E') && - (opt[1] == 'x' || opt[1] == 'X') && - (opt[2] == 'a' || opt[2] == 'A') && - (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_PX) && - !(*flags & OBJ_PXAT) && next) - { - *flags |= OBJ_EXAT; - *expire = next; - j++; - } else if ((opt[0] == 'p' || opt[0] == 'P') && - (opt[1] == 'x' || opt[1] == 'X') && - (opt[2] == 'a' || opt[2] == 'A') && - (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PX) && next) - { - *flags |= OBJ_PXAT; - *unit = UNIT_MILLISECONDS; - *expire = next; - j++; - } else { - addReplyErrorObject(c,shared.syntaxerr); - return C_ERR; - } - /* clang-format on */ - } - return C_OK; -} - /* SET key value [NX | XX | IFEQ comparison-value] [GET] * [EX seconds | PX milliseconds | * EXAT seconds-timestamp | PXAT milliseconds-timestamp | KEEPTTL] */ @@ -359,9 +232,9 @@ void setCommand(client *c) { robj *expire = NULL; robj *comparison = NULL; int unit = UNIT_SECONDS; - int flags = OBJ_NO_FLAGS; + int flags = ARGS_NO_FLAGS; - if (parseExtendedStringArgumentsOrReply(c, &flags, &unit, &expire, &comparison, COMMAND_SET) != C_OK) { + if (parseExtendedCommandArgumentsOrReply(c, &flags, &unit, &expire, &comparison, COMMAND_SET, c->argc) != C_OK) { return; } @@ -371,17 +244,17 @@ void setCommand(client *c) { void setnxCommand(client *c) { c->argv[2] = tryObjectEncoding(c->argv[2]); - setGenericCommand(c, OBJ_SET_NX, c->argv[1], c->argv[2], NULL, 0, shared.cone, shared.czero, NULL); + setGenericCommand(c, ARGS_SET_NX, c->argv[1], c->argv[2], NULL, 0, shared.cone, shared.czero, NULL); } void setexCommand(client *c) { c->argv[3] = tryObjectEncoding(c->argv[3]); - setGenericCommand(c, OBJ_EX | OBJ_ARGV3, c->argv[1], c->argv[3], c->argv[2], UNIT_SECONDS, NULL, NULL, NULL); + setGenericCommand(c, ARGS_EX | ARGS_ARGV3, c->argv[1], c->argv[3], c->argv[2], UNIT_SECONDS, NULL, NULL, NULL); } void psetexCommand(client *c) { c->argv[3] = tryObjectEncoding(c->argv[3]); - setGenericCommand(c, OBJ_PX | OBJ_ARGV3, c->argv[1], c->argv[3], c->argv[2], UNIT_MILLISECONDS, NULL, NULL, NULL); + setGenericCommand(c, ARGS_PX | ARGS_ARGV3, c->argv[1], c->argv[3], c->argv[2], UNIT_MILLISECONDS, NULL, NULL, NULL); } /* DELIFEQ key value */ @@ -445,9 +318,9 @@ void getCommand(client *c) { void getexCommand(client *c) { robj *expire = NULL; int unit = UNIT_SECONDS; - int flags = OBJ_NO_FLAGS; + int flags = ARGS_NO_FLAGS; - if (parseExtendedStringArgumentsOrReply(c, &flags, &unit, &expire, NULL, COMMAND_GET) != C_OK) { + if (parseExtendedCommandArgumentsOrReply(c, &flags, &unit, &expire, NULL, COMMAND_GET, c->argc) != C_OK) { return; } @@ -472,7 +345,7 @@ void getexCommand(client *c) { /* This command is never propagated as is. It is either propagated as PEXPIRE[AT],DEL,UNLINK or PERSIST. * This why it doesn't need special handling in feedAppendOnlyFile to convert relative expire time to absolute one. */ - if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(milliseconds)) { + if (((flags & ARGS_PXAT) || (flags & ARGS_EXAT)) && checkAlreadyExpired(milliseconds)) { /* When PXAT/EXAT absolute timestamp is specified, there can be a chance that timestamp * has already elapsed so delete the key in that case. */ deleteExpiredKeyFromOverwriteAndPropagate(c, c->argv[1]); @@ -486,7 +359,7 @@ void getexCommand(client *c) { signalModifiedKey(c, c->db, c->argv[1]); notifyKeyspaceEvent(NOTIFY_GENERIC, "expire", c->argv[1], c->db->id); server.dirty++; - } else if (flags & OBJ_PERSIST) { + } else if (flags & ARGS_PERSIST) { if (removeExpire(c->db, c->argv[1])) { signalModifiedKey(c, c->db, c->argv[1]); rewriteClientCommandVector(c, 2, shared.persist, c->argv[1]); diff --git a/src/unit/test_entry.c b/src/unit/test_entry.c new file mode 100644 index 0000000000..27a2028f95 --- /dev/null +++ b/src/unit/test_entry.c @@ -0,0 +1,471 @@ +#include "../entry.h" +#include "test_help.h" +#include "../expire.h" +#include "../monotonic.h" +#include "../server.h" +#include +#include +#include +#include +#include + +/* Constants for test values */ +#define SHORT_FIELD "foo" +#define SHORT_VALUE "bar" +#define LONG_FIELD "k:123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" +#define LONG_VALUE "v:12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + +/* Verify entry properties */ +static int verify_entry_properties(entry *e, sds field, sds value_copy, long long expiry, bool has_expiry, bool has_valueptr) { + TEST_ASSERT(sdscmp(entryGetField(e), field) == 0); + TEST_ASSERT(sdscmp(entryGetValue(e), value_copy) == 0); + TEST_ASSERT(entryGetExpiry(e) == expiry); + TEST_ASSERT(entryHasExpiry(e) == has_expiry); + TEST_ASSERT(entryHasEmbeddedValue(e) != has_valueptr); + return 0; +} + +/** + * Test entryCreate functunallity: + * 1. embedded with expiry + * 2. embedded without expiry + * 3. non-embedded with expiry + * 4. non-embedded without expiry + */ +int test_entryCreate(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + // Test with embedded value with expiry + sds field1 = sdsnew(SHORT_FIELD); + sds value1 = sdsnew(SHORT_VALUE); + sds value_copy1 = sdsdup(value1); // Keep a copy since entryCreate takes ownership of value + long long expiry1 = 100; + entry *e1 = entryCreate(field1, value1, expiry1); + verify_entry_properties(e1, field1, value_copy1, expiry1, true, false); + + // Test with embedded value with no expiry + sds field2 = sdsnew(SHORT_FIELD); + sds value2 = sdsnew(SHORT_VALUE); + sds value_copy2 = sdsdup(value2); + long long expiry2 = EXPIRY_NONE; + entry *e2 = entryCreate(field2, value2, expiry2); + verify_entry_properties(e2, field2, value_copy2, expiry2, false, false); + + // Test with non-embedded field and value with expiry + sds field3 = sdsnew(LONG_FIELD); + sds value3 = sdsnew(LONG_VALUE); + sds value_copy3 = sdsdup(value3); + long long expiry3 = 100; + entry *e3 = entryCreate(field3, value3, expiry3); + verify_entry_properties(e3, field3, value_copy3, expiry3, true, true); + + // Test with non-embedded field and value with no expiry + sds field4 = sdsnew(LONG_FIELD); + sds value4 = sdsnew(LONG_VALUE); + sds value_copy4 = sdsdup(value4); + long long expiry4 = EXPIRY_NONE; + entry *e4 = entryCreate(field4, value4, expiry4); + verify_entry_properties(e4, field4, value_copy4, expiry4, false, true); + + entryFree(e1); + entryFree(e2); + entryFree(e3); + entryFree(e4); + + // Free field as entryCreate doesn't take ownership + sdsfree(field1); + sdsfree(field2); + sdsfree(field3); + sdsfree(field4); + + sdsfree(value_copy1); + sdsfree(value_copy2); + sdsfree(value_copy3); + sdsfree(value_copy4); + + return 0; +} + +/** + * Test entryUpdate with various combinations of value and expiry changes: + * 1. Update only the value (keeping embedded) + * 2. Update only the expiry (keeping embedded) + * 3. Update both value and expiry (keeping embedded) + * 4. Update with no changes (should return same entry) + * 5. Update to a value that's too large to be embedded + * 6. Update expiry of a non-embedded entry + * 7. Update from non-embedded back to embedded value + * 8. Update entry to less then 3/4 allocation size + * 9. Update entry to more than 3/4 allocation size + * 8. Update entry to exactly 3/4 allocation size + */ +int test_entryUpdate(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + // Create embedded entry + sds value1 = sdsnew(SHORT_VALUE); + sds field = sdsnew(SHORT_FIELD); + sds value_copy1 = sdsdup(value1); + long long expiry1 = 100; + entry *e1 = entryCreate(field, value1, expiry1); + verify_entry_properties(e1, field, value_copy1, expiry1, true, false); + + // Update only value (keeping embedded) + sds value2 = sdsnew("bar2"); + sds value_copy2 = sdsdup(value2); + long long expiry2 = expiry1; + entry *e2 = entryUpdate(e1, value2, expiry2); + verify_entry_properties(e2, field, value_copy2, expiry2, true, false); + + // Update only expiry (keeping embedded) + long long expiry3 = 200; + entry *e3 = entryUpdate(e2, NULL, expiry3); + verify_entry_properties(e3, field, value_copy2, expiry3, true, false); + + // Update both value and expiry (keeping embedded) + sds value4 = sdsnew("bar4"); + long long expiry4 = 300; + sds value_copy4 = sdsdup(value4); + entry *e4 = entryUpdate(e3, value4, expiry4); + verify_entry_properties(e4, field, value_copy4, expiry4, true, false); + + // Update with no changes (should return same entry) + entry *e5 = entryUpdate(e4, NULL, expiry4); + verify_entry_properties(e5, field, value_copy4, expiry4, true, false); + TEST_ASSERT(e5 == e4); + + // Update to a value that's too large to be embedded + sds value6 = sdsnew(LONG_VALUE); + sds value_copy6 = sdsdup(value6); + long long expiry6 = expiry4; + entry *e6 = entryUpdate(e5, value6, expiry6); + verify_entry_properties(e6, field, value_copy6, expiry6, true, true); + + // Update expiry of a non-embedded entry + long long expiry7 = 400; + entry *e7 = entryUpdate(e6, NULL, expiry7); + verify_entry_properties(e7, field, value_copy6, expiry7, true, true); + + // Update from non-embedded back to embedded value + sds value8 = sdsnew("bar8"); + sds value_copy8 = sdsdup(value8); + long long expiry8 = expiry7; + entry *e8 = entryUpdate(e7, value8, expiry8); + verify_entry_properties(e8, field, value_copy8, expiry8, true, false); + + // Update value with identical value (keeping embedded) + sds value9 = sdsnew("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + sds value_copy9 = sdsdup(value9); + long long expiry9 = expiry8; + entry *e9 = entryUpdate(e8, value9, expiry9); + verify_entry_properties(e9, field, value_copy9, expiry9, true, false); + + // Update the value so that memory usage is less than 3/4 of the current allocation size + // Ensuring required_embedded_size < current_embedded_allocation_size * 3 / 4, which creates a new entry + size_t current_embedded_allocation_size = entryMemUsage(e9); + sds value10 = sdsnew("xxxxxxxxxxxxxxxxxxxxx"); + sds value_copy10 = sdsdup(value10); + long long expiry10 = expiry9; + entry *e10 = entryUpdate(e9, value10, expiry10); + verify_entry_properties(e10, field, value_copy10, expiry10, true, false); + TEST_ASSERT(entryMemUsage(e10) < current_embedded_allocation_size * 3 / 4); + TEST_ASSERT(e10 != e9); + + // Update the value so that memory usage is at least 3/4 of the current memory usage + // Ensuring required_embedded_size > current_embedded_allocation_size * 3 / 4 without creating a new entry + current_embedded_allocation_size = entryMemUsage(e10); + sds value11 = sdsnew("yyyyyyyyyyyyy"); + sds value_copy11 = sdsdup(value11); + long long expiry11 = expiry10; + entry *e11 = entryUpdate(e10, value11, expiry11); + verify_entry_properties(e11, field, value_copy11, expiry11, true, false); + TEST_ASSERT(entryMemUsage(e11) >= current_embedded_allocation_size * 3 / 4); + TEST_ASSERT(entryMemUsage(e11) <= current_embedded_allocation_size); + TEST_ASSERT(entryMemUsage(e11) <= + EMBED_VALUE_MAX_ALLOC_SIZE); + TEST_ASSERT(e10 == e11); + + // Update the value so that memory usage is exactly equal to the current allocation size + // Ensuring required_embedded_size == current_embedded_allocation_size without creating a new entry + current_embedded_allocation_size = entryMemUsage(e11); + sds value12 = sdsnew("zzzzzzzzzzzzz"); + sds value_copy12 = sdsdup(value12); + long long expiry12 = expiry11; + entry *e12 = entryUpdate(e11, value12, expiry12); + verify_entry_properties(e11, field, value_copy12, expiry12, true, false); + TEST_ASSERT(entryMemUsage(e12) == current_embedded_allocation_size); + TEST_ASSERT(entryMemUsage(e12) <= EMBED_VALUE_MAX_ALLOC_SIZE); + TEST_ASSERT(e12 == e11); + + entryFree(e12); + sdsfree(field); + sdsfree(value_copy1); + sdsfree(value_copy2); + sdsfree(value_copy4); + sdsfree(value_copy6); + sdsfree(value_copy8); + sdsfree(value_copy9); + sdsfree(value_copy10); + sdsfree(value_copy11); + sdsfree(value_copy12); + + return 0; +} + +/** + * Test setting expiry on an entry: + * 1. No expiry + * 2. Set expiry on entry without expiry + * 3. Update expiry on entry with expiry + * 4. Test with non-embedded entry + * 5. Set expiry on non-embedded entry + */ +int test_entryHasexpiry_entrySetExpiry(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + // No expiry + sds field1 = sdsnew(SHORT_FIELD); + sds value1 = sdsnew(SHORT_VALUE); + entry *e1 = entryCreate(field1, value1, EXPIRY_NONE); + TEST_ASSERT(entryHasExpiry(e1) == false); + TEST_ASSERT(entryGetExpiry(e1) == EXPIRY_NONE); + + // Set expiry on entry without expiry + long long expiry2 = 100; + entry *e2 = entrySetExpiry(e1, expiry2); + TEST_ASSERT(entryHasExpiry(e2) == true); + TEST_ASSERT(entryGetExpiry(e2) == expiry2); + + // Update expiry on entry with expiry + long long expiry3 = 200; + entry *e3 = entrySetExpiry(e2, expiry3); + TEST_ASSERT(entryHasExpiry(e3) == true); + TEST_ASSERT(entryGetExpiry(e3) == expiry3); + TEST_ASSERT(e2 == e3); // Should be the same pointer when just updating expiry + + // Test with non-embedded entry + sds field4 = sdsnew(LONG_FIELD); + sds value4 = sdsnew(LONG_VALUE); + entry *e4 = entryCreate(field4, value4, EXPIRY_NONE); + TEST_ASSERT(entryHasExpiry(e4) == false); + TEST_ASSERT(entryHasEmbeddedValue(e4) == false); + + // Set expiry on entry without expiry + long long expiry5 = 100; + entry *e5 = entrySetExpiry(e4, expiry5); + TEST_ASSERT(entryHasExpiry(e5) == true); + TEST_ASSERT(entryGetExpiry(e5) == expiry5); + + // Update expiry on entry with expiry + long long expiry6 = 200; + entry *e6 = entrySetExpiry(e5, expiry6); + TEST_ASSERT(entryHasExpiry(e6) == true); + TEST_ASSERT(entryGetExpiry(e6) == expiry6); + TEST_ASSERT(e5 == e6); // Should be the same pointer when just updating expiry + + entryFree(e3); + entryFree(e6); + sdsfree(field1); + sdsfree(field4); + + return 0; +} + +/** + * Test entryIsExpired: + * 1. No expiry + * 2. Future expiry + * 3. Current time expiry + * 4. Past expiry + * 5. Test with loading mode + * 6. Test with import mode and import source client + * 7. Test with import mode and import source client and import expiry + * 8. Test with import mode and import source client and import expiry and import expiry is in the past + */ +int test_entryIsExpired(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + // Setup server state + enterExecutionUnit(1, ustime()); + long long current_time = commandTimeSnapshot(); + + // No expiry + sds field1 = sdsnew(SHORT_FIELD); + sds value1 = sdsnew(SHORT_VALUE); + entry *e1 = entryCreate(field1, value1, EXPIRY_NONE); + TEST_ASSERT(entryGetExpiry(e1) == EXPIRY_NONE); + TEST_ASSERT(entryIsExpired(e1) == false); + + // Future expiry + sds field2 = sdsnew(SHORT_FIELD); + sds value2 = sdsnew(SHORT_VALUE); + long long future_time = current_time + 10000; // 10 seconds in future + entry *e2 = entryCreate(field2, value2, future_time); + TEST_ASSERT(entryGetExpiry(e2) == future_time); + TEST_ASSERT(entryIsExpired(e2) == false); + + // Current time expiry + sds field3 = sdsnew(SHORT_FIELD); + sds value3 = sdsnew(SHORT_VALUE); + entry *e3 = entryCreate(field3, value3, current_time); + TEST_ASSERT(entryGetExpiry(e3) == current_time); + TEST_ASSERT(entryIsExpired(e3) == false); + + // Test with past expiry + sds field4 = sdsnew(SHORT_FIELD); + sds value4 = sdsnew(SHORT_VALUE); + long long past_time = current_time - 10000; // 10 seconds ago + entry *e4 = entryCreate(field4, value4, past_time); + TEST_ASSERT(entryGetExpiry(e4) == past_time); + TEST_ASSERT(entryIsExpired(e4) == true); + + entryFree(e1); + entryFree(e2); + entryFree(e3); + entryFree(e4); + sdsfree(field1); + sdsfree(field2); + sdsfree(field3); + sdsfree(field4); + exitExecutionUnit(); + return 0; +} + +/** + * Test entryMemUsage: + * 1. Embedded entry tests: + * - Initial creation without expiry + * - Adding expiry (should increase memory usage) + * - Updating expiry (should not change memory usage) + * - Updating value while keeping it embedded: + * * To smaller value (should not decrease memory usage) + * * To bigger value (should not increase memory usage) + * + * 2. Non-embedded entry tests: + * - Initial creation without expiry + * - Adding expiry (should increase memory usage) + * - Updating expiry (should not change memory usage) + * - Updating value: + * * To smaller value (should decrease memory usage) + * * To bigger value (should increase memory usage) + */ +int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + // Tests with embedded entry + // Embedded entry without expiry + sds field1 = sdsnew(SHORT_FIELD); + sds value1 = sdsnew(SHORT_VALUE); + sds value_copy1 = sdsdup(value1); + long long expiry1 = EXPIRY_NONE; + entry *e1 = entryCreate(field1, value1, expiry1); + size_t e1_entryMemUsage = entryMemUsage(e1); + verify_entry_properties(e1, field1, value_copy1, expiry1, false, false); + TEST_ASSERT(e1_entryMemUsage > 0); + + // Add expiry to embedded entry without expiry + // This should increase memory usage by sizeof(long long) + 2 bytes + // (long long for the expiry value, 2 bytes for SDS header adjustment) + long long expiry2 = 100; + entry *e2 = entrySetExpiry(e1, expiry2); + size_t e2_entryMemUsage = entryMemUsage(e2); + verify_entry_properties(e2, field1, value_copy1, expiry2, true, false); + TEST_ASSERT(zmalloc_usable_size((char *)e2 - sizeof(long long) - 3) == e2_entryMemUsage); + + // Update expiry on an entry that already has one + // This should NOT change memory usage as we're just updating the expiry value (long long) + long long expiry3 = 10000; + entry *e3 = entrySetExpiry(e2, expiry3); + size_t e3_entryMemUsage = entryMemUsage(e3); + verify_entry_properties(e3, field1, value_copy1, expiry3, true, false); + TEST_ASSERT(e3_entryMemUsage == e2_entryMemUsage); + + // Update to smaller value (keeping embedded) + // Memory usage should decrease by the difference in value size (2 bytes) + sds value4 = sdsnew("x"); + sds value_copy4 = sdsdup(value4); + entry *e4 = entrySetValue(e3, value4); + size_t e4_entryMemUsage = entryMemUsage(e4); + verify_entry_properties(e4, field1, value_copy4, expiry3, true, false); + TEST_ASSERT(zmalloc_usable_size((char *)e4 - sizeof(long long) - 3) == e4_entryMemUsage); + + // Update to bigger value (keeping embedded) + // Memory usage should increase by the difference in value size (1 byte) + sds value5 = sdsnew("xx"); + sds value_copy5 = sdsdup(value5); + entry *e5 = entrySetValue(e4, value5); + size_t e5_entryMemUsage = entryMemUsage(e5); + verify_entry_properties(e5, field1, value_copy5, expiry3, true, false); + TEST_ASSERT(zmalloc_usable_size((char *)e5 - sizeof(long long) - 3) == e5_entryMemUsage); + + // Tests with non-embedded entry + // Non-embedded entry without expiry + sds field6 = sdsnew(LONG_FIELD); + field6 = sdscat(field6, LONG_FIELD); // Double the length to ensure non-embedded entry + sds value6 = sdsnew(LONG_VALUE); + sds value_copy6 = sdsdup(value6); + long long expiry6 = EXPIRY_NONE; + entry *e6 = entryCreate(field6, value6, EXPIRY_NONE); + size_t e6_entryMemUsage = entryMemUsage(e6); + verify_entry_properties(e6, field6, value_copy6, expiry6, false, true); + TEST_ASSERT(e6_entryMemUsage > 0); + + // Add expiry to non-embedded entry without expiry + // For non-embedded entries this increases memory by exactly sizeof(long long) + long long expiry7 = 100; + entry *e7 = entrySetExpiry(e6, expiry7); + size_t e7_entryMemUsage = entryMemUsage(e7); + verify_entry_properties(e7, field6, value_copy6, expiry7, true, true); + size_t expected_e7_entry_mem = zmalloc_usable_size((char *)e7 - sizeof(long long) - sizeof(sds) - 3) + sdsAllocSize(value6); + TEST_ASSERT(expected_e7_entry_mem == e7_entryMemUsage); + + // Update expiry on a non-embedded entry that already has one + // This should not change memory usage as we're just updating the expiry value + long long expiry8 = 10000; + entry *e8 = entrySetExpiry(e7, expiry8); + size_t e8_entryMemUsage = entryMemUsage(e8); + verify_entry_properties(e8, field6, value_copy6, expiry8, true, true); + TEST_ASSERT(e8_entryMemUsage == e7_entryMemUsage); + + // Update to smaller value (keeping non-embedded) + // Memory usage should increase by at least the difference between LONG_VALUE and "x" (143) + sds value9 = sdsnew("x"); + sds value_copy9 = sdsdup(value9); + entry *e9 = entrySetValue(e8, value9); + size_t e9_entryMemUsage = entryMemUsage(e9); + verify_entry_properties(e9, field6, value_copy9, expiry8, true, true); + size_t expected_e9_entry_mem = zmalloc_usable_size((char *)e9 - sizeof(long long) - sizeof(sds) - 3) + sdsAllocSize(value9); + TEST_ASSERT(expected_e9_entry_mem == e9_entryMemUsage); + + // Update to bigger value (keeping non-embedded) + // Memory usage increases by the difference in value size (1 byte) + sds value10 = sdsnew("xx"); + sds value_copy10 = sdsdup(value10); + entry *e10 = entrySetValue(e9, value10); + size_t e10_entryMemUsage = entryMemUsage(e10); + size_t expected_10_entry_mem = zmalloc_usable_size((char *)e10 - sizeof(long long) - sizeof(sds) - 3) + sdsAllocSize(value10); + TEST_ASSERT(expected_10_entry_mem == e10_entryMemUsage); + + entryFree(e5); + entryFree(e10); + sdsfree(field1); + sdsfree(field6); + sdsfree(value_copy1); + sdsfree(value_copy4); + sdsfree(value_copy5); + sdsfree(value_copy6); + sdsfree(value_copy9); + sdsfree(value_copy10); + + return 0; +} diff --git a/src/unit/test_files.h b/src/unit/test_files.h index bb00399342..34459201bb 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -20,6 +20,11 @@ int test_dictDisableResizeReduceTo3(int argc, char **argv, int flags); int test_dictDeleteOneKeyTriggerResizeAgain(int argc, char **argv, int flags); int test_dictBenchmark(int argc, char **argv, int flags); int test_endianconv(int argc, char *argv[], int flags); +int test_entryCreate(int argc, char **argv, int flags); +int test_entryUpdate(int argc, char **argv, int flags); +int test_entryHasexpiry_entrySetExpiry(int argc, char **argv, int flags); +int test_entryIsExpired(int argc, char **argv, int flags); +int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int flags); int test_cursor(int argc, char **argv, int flags); int test_set_hash_function_seed(int argc, char **argv, int flags); int test_add_find_delete(int argc, char **argv, int flags); @@ -196,6 +201,14 @@ int test_reclaimFilePageCache(int argc, char **argv, int flags); int test_writePointerWithPadding(int argc, char **argv, int flags); int test_valkey_strtod(int argc, char **argv, int flags); int test_vector(int argc, char **argv, int flags); +int test_vset_add_and_iterate(int argc, char **argv, int flags); +int test_vset_large_batch_same_expiry(int argc, char **argv, int flags); +int test_vset_large_batch_update_entry_same_expiry(int argc, char **argv, int flags); +int test_vset_large_batch_update_entry_multiple_expiries(int argc, char **argv, int flags); +int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags); +int test_vset_add_and_remove_all(int argc, char **argv, int flags); +int test_vset_defrag(int argc, char **argv, int flags); +int test_vset_fuzzer(int argc, char **argv, int flags); int test_ziplistCreateIntList(int argc, char **argv, int flags); int test_ziplistPop(int argc, char **argv, int flags); int test_ziplistGetElementAtIndex3(int argc, char **argv, int flags); @@ -242,6 +255,7 @@ unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}}; unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}}; unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}}; unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, NULL}}; +unitTest __test_entry_c[] = {{"test_entryCreate", test_entryCreate}, {"test_entryUpdate", test_entryUpdate}, {"test_entryHasexpiry_entrySetExpiry", test_entryHasexpiry_entrySetExpiry}, {"test_entryIsExpired", test_entryIsExpired}, {"test_entryMemUsage_entrySetExpiry_entrySetValue", test_entryMemUsage_entrySetExpiry_entrySetValue}, {NULL, NULL}}; unitTest __test_hashtable_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_bucket_chain_length", test_bucket_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_replace_reallocated_entry", test_replace_reallocated_entry}, {"test_incremental_find", test_incremental_find}, {"test_scan", test_scan}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_compact_bucket_chain", test_compact_bucket_chain}, {"test_random_entry", test_random_entry}, {"test_random_entry_with_long_chain", test_random_entry_with_long_chain}, {"test_random_entry_sparse_table", test_random_entry_sparse_table}, {NULL, NULL}}; unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}}; unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyHashtable", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyHashtable}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyHashtable", test_kvstoreIteratorRemoveAllKeysDeleteEmptyHashtable}, {"test_kvstoreHashtableIteratorRemoveAllKeysNoDeleteEmptyHashtable", test_kvstoreHashtableIteratorRemoveAllKeysNoDeleteEmptyHashtable}, {"test_kvstoreHashtableIteratorRemoveAllKeysDeleteEmptyHashtable", test_kvstoreHashtableIteratorRemoveAllKeysDeleteEmptyHashtable}, {NULL, NULL}}; @@ -255,6 +269,7 @@ unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}}; unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {"test_writePointerWithPadding", test_writePointerWithPadding}, {NULL, NULL}}; unitTest __test_valkey_strtod_c[] = {{"test_valkey_strtod", test_valkey_strtod}, {NULL, NULL}}; unitTest __test_vector_c[] = {{"test_vector", test_vector}, {NULL, NULL}}; +unitTest __test_vset_c[] = {{"test_vset_add_and_iterate", test_vset_add_and_iterate}, {"test_vset_large_batch_same_expiry", test_vset_large_batch_same_expiry}, {"test_vset_large_batch_update_entry_same_expiry", test_vset_large_batch_update_entry_same_expiry}, {"test_vset_large_batch_update_entry_multiple_expiries", test_vset_large_batch_update_entry_multiple_expiries}, {"test_vset_iterate_multiple_expiries", test_vset_iterate_multiple_expiries}, {"test_vset_add_and_remove_all", test_vset_add_and_remove_all}, {"test_vset_defrag", test_vset_defrag}, {"test_vset_fuzzer", test_vset_fuzzer}, {NULL, NULL}}; unitTest __test_ziplist_c[] = {{"test_ziplistCreateIntList", test_ziplistCreateIntList}, {"test_ziplistPop", test_ziplistPop}, {"test_ziplistGetElementAtIndex3", test_ziplistGetElementAtIndex3}, {"test_ziplistGetElementOutOfRange", test_ziplistGetElementOutOfRange}, {"test_ziplistGetLastElement", test_ziplistGetLastElement}, {"test_ziplistGetFirstElement", test_ziplistGetFirstElement}, {"test_ziplistGetElementOutOfRangeReverse", test_ziplistGetElementOutOfRangeReverse}, {"test_ziplistIterateThroughFullList", test_ziplistIterateThroughFullList}, {"test_ziplistIterateThroughListFrom1ToEnd", test_ziplistIterateThroughListFrom1ToEnd}, {"test_ziplistIterateThroughListFrom2ToEnd", test_ziplistIterateThroughListFrom2ToEnd}, {"test_ziplistIterateThroughStartOutOfRange", test_ziplistIterateThroughStartOutOfRange}, {"test_ziplistIterateBackToFront", test_ziplistIterateBackToFront}, {"test_ziplistIterateBackToFrontDeletingAllItems", test_ziplistIterateBackToFrontDeletingAllItems}, {"test_ziplistDeleteInclusiveRange0To0", test_ziplistDeleteInclusiveRange0To0}, {"test_ziplistDeleteInclusiveRange0To1", test_ziplistDeleteInclusiveRange0To1}, {"test_ziplistDeleteInclusiveRange1To2", test_ziplistDeleteInclusiveRange1To2}, {"test_ziplistDeleteWithStartIndexOutOfRange", test_ziplistDeleteWithStartIndexOutOfRange}, {"test_ziplistDeleteWithNumOverflow", test_ziplistDeleteWithNumOverflow}, {"test_ziplistDeleteFooWhileIterating", test_ziplistDeleteFooWhileIterating}, {"test_ziplistReplaceWithSameSize", test_ziplistReplaceWithSameSize}, {"test_ziplistReplaceWithDifferentSize", test_ziplistReplaceWithDifferentSize}, {"test_ziplistRegressionTestForOver255ByteStrings", test_ziplistRegressionTestForOver255ByteStrings}, {"test_ziplistRegressionTestDeleteNextToLastEntries", test_ziplistRegressionTestDeleteNextToLastEntries}, {"test_ziplistCreateLongListAndCheckIndices", test_ziplistCreateLongListAndCheckIndices}, {"test_ziplistCompareStringWithZiplistEntries", test_ziplistCompareStringWithZiplistEntries}, {"test_ziplistMergeTest", test_ziplistMergeTest}, {"test_ziplistStressWithRandomPayloadsOfDifferentEncoding", test_ziplistStressWithRandomPayloadsOfDifferentEncoding}, {"test_ziplistCascadeUpdateEdgeCases", test_ziplistCascadeUpdateEdgeCases}, {"test_ziplistInsertEdgeCase", test_ziplistInsertEdgeCase}, {"test_ziplistStressWithVariableSize", test_ziplistStressWithVariableSize}, {"test_BenchmarkziplistFind", test_BenchmarkziplistFind}, {"test_BenchmarkziplistIndex", test_BenchmarkziplistIndex}, {"test_BenchmarkziplistValidateIntegrity", test_BenchmarkziplistValidateIntegrity}, {"test_BenchmarkziplistCompareWithString", test_BenchmarkziplistCompareWithString}, {"test_BenchmarkziplistCompareWithNumber", test_BenchmarkziplistCompareWithNumber}, {"test_ziplistStress__ziplistCascadeUpdate", test_ziplistStress__ziplistCascadeUpdate}, {NULL, NULL}}; unitTest __test_zipmap_c[] = {{"test_zipmapIterateWithLargeKey", test_zipmapIterateWithLargeKey}, {"test_zipmapIterateThroughElements", test_zipmapIterateThroughElements}, {NULL, NULL}}; unitTest __test_zmalloc_c[] = {{"test_zmallocAllocReallocCallocAndFree", test_zmallocAllocReallocCallocAndFree}, {"test_zmallocAllocZeroByteAndFree", test_zmallocAllocZeroByteAndFree}, {NULL, NULL}}; @@ -268,6 +283,7 @@ struct unitTestSuite { {"test_crc64combine.c", __test_crc64combine_c}, {"test_dict.c", __test_dict_c}, {"test_endianconv.c", __test_endianconv_c}, + {"test_entry.c", __test_entry_c}, {"test_hashtable.c", __test_hashtable_c}, {"test_intset.c", __test_intset_c}, {"test_kvstore.c", __test_kvstore_c}, @@ -281,6 +297,7 @@ struct unitTestSuite { {"test_util.c", __test_util_c}, {"test_valkey_strtod.c", __test_valkey_strtod_c}, {"test_vector.c", __test_vector_c}, + {"test_vset.c", __test_vset_c}, {"test_ziplist.c", __test_ziplist_c}, {"test_zipmap.c", __test_zipmap_c}, {"test_zmalloc.c", __test_zmalloc_c}, diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c new file mode 100644 index 0000000000..f864687558 --- /dev/null +++ b/src/unit/test_vset.c @@ -0,0 +1,518 @@ +#include "../vset.h" +#include "../entry.h" +#include "test_help.h" +#include "../zmalloc.h" + +#include +#include +#include +#include +#include +#include +#include + +typedef entry mock_entry; + +static mock_entry *mockCreateEntry(const char *keystr, long long expiry) { + sds field = sdsnew(keystr); + mock_entry *e = entryCreate(field, sdsnew("value"), expiry); + sdsfree(field); + return e; +} + +static void mockFreeEntry(void *entry) { + // printf("mockFreeEntry: %p\n", entry); + entryFree(entry); +} + +static mock_entry *mockEntryUpdate(mock_entry *entry, long long expiry) { + mock_entry *new_entry = entryCreate(entryGetField(entry), sdsdup(entryGetValue(entry)), expiry); + entryFree(entry); + return new_entry; +} + +static long long mockGetExpiry(const void *entry) { + return entryGetExpiry(entry); +} + +int test_vset_add_and_iterate(int argc, char **argv, int flags) { + (void)argc; + (void)argv; + (void)flags; + + vset set; + vsetInit(&set); + + mock_entry *e1 = mockCreateEntry("item1", 123); + mock_entry *e2 = mockCreateEntry("item2", 456); + + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, e1)); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, e2)); + + TEST_ASSERT(!vsetIsEmpty(&set)); + + vsetIterator it; + vsetInitIterator(&set, &it); + + void *entry; + int count = 0; + while (vsetNext(&it, &entry)) { + TEST_EXPECT(entry != NULL); + count++; + } + + TEST_ASSERT(count == 2); + + vsetResetIterator(&it); + vsetRelease(&set); + mockFreeEntry(e1); + mockFreeEntry(e2); + + TEST_PRINT_INFO("Test passed with %d expects", failed_expects); + return 0; +} + +int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { + (void)argc; + (void)argv; + (void)flags; + + vset set; + vsetInit(&set); + + const long long expiry_time = 1000LL; + const int total_entries = 200; + + // Allocate and add 200 entries with same expiry + mock_entry **entries = zmalloc(sizeof(mock_entry *) * total_entries); + TEST_ASSERT(entries != NULL); + + for (int i = 0; i < total_entries; i++) { + char key_buf[32]; + snprintf(key_buf, sizeof(key_buf), "entry_%d", i); + entries[i] = mockCreateEntry(key_buf, expiry_time); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i])); + } + + // Verify set is not empty + TEST_ASSERT(!vsetIsEmpty(&set)); + + // Iterate all entries and count them + vsetIterator it; + vsetInitIterator(&set, &it); + + void *entry; + int count = 0; + while (vsetNext(&it, &entry)) { + TEST_EXPECT(entry != NULL); + count++; + } + TEST_ASSERT(count == total_entries); + + // Cleanup + vsetResetIterator(&it); + vsetRelease(&set); + + for (int i = 0; i < total_entries; i++) { + mockFreeEntry(entries[i]); + } + zfree(entries); + + TEST_PRINT_INFO("Inserted and iterated %d entries with same expiry", total_entries); + return 0; +} + +int test_vset_large_batch_update_entry_same_expiry(int argc, char **argv, int flags) { + (void)argc; + (void)argv; + (void)flags; + + vset set; + vsetInit(&set); + + const long long expiry_time = 1000LL; + const unsigned int total_entries = 1000; + + mock_entry *entries[total_entries]; + + for (unsigned int i = 0; i < total_entries; i++) { + char key_buf[32]; + snprintf(key_buf, sizeof(key_buf), "entry_%d", i); + entries[i] = mockCreateEntry(key_buf, expiry_time); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i])); + } + // Verify set is not empty + TEST_ASSERT(!vsetIsEmpty(&set)); + + // Now iterate and replace all entries + for (unsigned int i = 0; i < total_entries; i++) { + mock_entry *old_entry = entries[i]; + entries[i] = mockEntryUpdate(entries[i], expiry_time); + TEST_ASSERT(vsetUpdateEntry(&set, mockGetExpiry, old_entry, entries[i], expiry_time, expiry_time)); + } + + for (unsigned int i = 0; i < total_entries; i++) { + TEST_ASSERT(vsetRemoveEntry(&set, mockGetExpiry, entries[i])); + } + + // Verify set is empty + TEST_ASSERT(vsetIsEmpty(&set)); + + // Cleanup + for (unsigned int i = 0; i < total_entries; i++) { + mockFreeEntry(entries[i]); + } + + TEST_PRINT_INFO("Inserted, updated and deleted %d entries with same expiry", total_entries); + return 0; +} + +int test_vset_large_batch_update_entry_multiple_expiries(int argc, char **argv, int flags) { + (void)argc; + (void)argv; + (void)flags; + const unsigned int total_entries = 1000; + + vset set; + vsetInit(&set); + + // Prepare entries with mixed expiry times, some duplicates + mock_entry *entries[total_entries]; + + // Initialize keys + for (unsigned int i = 0; i < total_entries; i++) { + char key_buf[32]; + snprintf(key_buf, sizeof(key_buf), "entry_%d", i); + long long expiry_time = rand() % 10000; + entries[i] = mockCreateEntry(key_buf, expiry_time); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i])); + } + // Verify set is not empty + TEST_ASSERT(!vsetIsEmpty(&set)); + + // Now iterate and replace all entries + for (unsigned int i = 0; i < total_entries; i++) { + mock_entry *old_entry = entries[i]; + long long old_expiry = entryGetExpiry(entries[i]); + long long new_expiry = old_expiry + rand() % 100000; + entries[i] = mockEntryUpdate(entries[i], new_expiry); + TEST_ASSERT(vsetUpdateEntry(&set, mockGetExpiry, old_entry, entries[i], old_expiry, new_expiry)); + } + + for (unsigned int i = 0; i < total_entries; i++) { + TEST_ASSERT(vsetRemoveEntry(&set, mockGetExpiry, entries[i])); + } + + // Verify set is empty + TEST_ASSERT(vsetIsEmpty(&set)); + + // Cleanup + for (unsigned int i = 0; i < total_entries; i++) { + mockFreeEntry(entries[i]); + } + + TEST_PRINT_INFO("Inserted, updated and deleted %d entries with different expiry", total_entries); + return 0; +} + +int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { + (void)argc; + (void)argv; + (void)flags; + const unsigned int total_entries = 5; + + vset set; + vsetInit(&set); + + // Prepare entries with mixed expiry times, some duplicates + mock_entry *entries[total_entries]; + + // Initialize keys + for (unsigned int i = 0; i < total_entries; i++) { + char key_buf[32]; + snprintf(key_buf, sizeof(key_buf), "entry_%d", i); + long long expiry_time = rand() % 10000; + entries[i] = mockCreateEntry(key_buf, expiry_time); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i])); + } + + vsetIterator it; + vsetInitIterator(&set, &it); + + int found[5] = {0}; + int total = 0; + + void *entry; + while (vsetNext(&it, &entry)) { + TEST_EXPECT(entry != NULL); + mock_entry *e = (mock_entry *)entry; + + // Match the entries we inserted + for (int i = 0; i < 5; i++) { + if (strcmp(entryGetField(e), entryGetField(entries[i])) == 0) { + found[i] = 1; + break; + } + } + total++; + } + + TEST_ASSERT(total == 5); + + for (int i = 0; i < 5; i++) { + TEST_EXPECT(found[i]); + } + + vsetResetIterator(&it); + vsetRelease(&set); + for (int i = 0; i < 5; i++) mockFreeEntry(entries[i]); + + TEST_PRINT_INFO("Iterated all %d mixed expiry entries successfully", total); + return 0; +} + +int test_vset_add_and_remove_all(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + vset set; + vsetInit(&set); + + const int total_entries = 130; + mock_entry *entries[total_entries]; + long long expiry = 5000; + + for (int i = 0; i < total_entries; i++) { + char key[32]; + snprintf(key, sizeof(key), "key_%d", i); + entries[i] = mockCreateEntry(key, expiry); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i])); + } + + for (int i = 0; i < total_entries; i++) { + TEST_ASSERT(vsetRemoveEntry(&set, mockGetExpiry, entries[i])); + mockFreeEntry(entries[i]); + } + + TEST_ASSERT(vsetIsEmpty(&set)); + vsetRelease(&set); + + TEST_PRINT_INFO("Add/remove %d entries, set size now 0", total_entries); + return 0; +} + +/********************* Fuzzer tests ********************************/ + +#define NUM_ITERATIONS 100000 +#define MAX_ENTRIES 10000 +#define NUM_DEFRAG_STEPS 100 + +/* Global array to simulate a test database */ +mock_entry *mock_entries[MAX_ENTRIES]; +int mock_entry_count = 0; + +/* --------- volatileEntryType Callbacks --------- */ +sds mock_entry_get_key(const void *entry) { + return (sds)entry; +} + +long long mock_entry_get_expiry(const void *entry) { + return mockGetExpiry(entry); +} + +int mock_entry_expire(void *entry, void *ctx) { + mock_entry *e = (mock_entry *)entry; + long long now = *(long long *)ctx; + TEST_ASSERT(mock_entry_get_expiry(entry) <= now); + for (int i = 0; i < mock_entry_count; i++) { + if (mock_entries[i] == e) { + // printf("expire entry %p with expiry %llu\n", e, mockGetExpiry(e)); + mockFreeEntry(e); + mock_entries[i] = mock_entries[--mock_entry_count]; + return 1; + } + } + return 0; +} + +/* --------- Helper Functions --------- */ +mock_entry *mock_entry_create(const char *keystr, long long expiry) { + return mockCreateEntry(keystr, expiry); +} + +int insert_mock_entry(vset *set) { + if (mock_entry_count >= MAX_ENTRIES) return 0; + char keybuf[32]; + snprintf(keybuf, sizeof(keybuf), "key_%d", mock_entry_count); + + long long expiry = rand() % 10000 + 100; + mock_entry *e = mock_entry_create(keybuf, expiry); + // printf("adding entry %p with expiry %llu\n", e, expiry); + TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, e)); + mock_entries[mock_entry_count++] = e; + return 0; +} + +int insert_mock_entry_with_expiry(vset *set, long long expiry) { + if (mock_entry_count >= MAX_ENTRIES) return 0; + char keybuf[32]; + snprintf(keybuf, sizeof(keybuf), "key_%d", mock_entry_count); + + mock_entry *e = mock_entry_create(keybuf, expiry); + // printf("adding entry %p with expiry %llu\n", e, expiry); + TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, e)); + mock_entries[mock_entry_count++] = e; + return 0; +} + +int update_mock_entry(vset *set) { + if (mock_entry_count == 0) return 0; + int idx = rand() % mock_entry_count; + mock_entry *old = mock_entries[idx]; + long long old_expiry = mockGetExpiry(old); + long long new_expiry = old_expiry + (rand() % 500); + mock_entry *updated = mockEntryUpdate(old, new_expiry); + mock_entries[idx] = updated; + // printf("Update entry %p with entry %p with old expiry %llu new expiry %llu\n", old, updated, old_expiry, new_expiry); + TEST_ASSERT(vsetUpdateEntry(set, mockGetExpiry, old, updated, old_expiry, new_expiry)); + return 0; +} + +int remove_mock_entry(vset *set) { + if (mock_entry_count == 0) return 0; + int idx = rand() % mock_entry_count; + mock_entry *e = mock_entries[idx]; + // printf("removing entry %p with expiry %llu\n", e, mockGetExpiry(e)); + TEST_ASSERT(vsetRemoveEntry(set, mockGetExpiry, e)); + mockFreeEntry(e); + mock_entries[idx] = mock_entries[--mock_entry_count]; + + return 0; +} + + +int expire_mock_entries(vset *set, mstime_t now) { + // printf("Before expired entries entries: %d\n", mock_entry_count); + vsetRemoveExpired(set, mockGetExpiry, mock_entry_expire, now, mock_entry_count, &now); + // printf("After expired %zu entries left entries: %d and set is empty: %s\n", count, mock_entry_count, vsetIsEmpty(set) ? "true" : "false"); + return 0; +} + +void *mock_defragfn(void *ptr) { + size_t size = zmalloc_size(ptr); + void *newptr = zmalloc(size); + memcpy(newptr, ptr, size); + zfree(ptr); + return newptr; +} + +int mock_defrag_rax_node(raxNode **noderef) { + raxNode *newnode = mock_defragfn(*noderef); + if (newnode) { + *noderef = newnode; + return 1; + } + return 0; +} + +size_t defrag_vset(vset *set, size_t cursor, size_t steps) { + if (steps == 0) steps = ULONG_MAX; + do { + cursor = vsetScanDefrag(set, cursor, mock_defragfn, mock_defrag_rax_node); + steps--; + } while (cursor != 0 && steps > 0); + return cursor; +} + +int free_mock_entries(void) { + for (int i = 0; i < mock_entry_count; i++) { + mock_entry *e = mock_entries[i]; + mockFreeEntry(e); + } + mock_entry_count = 0; + return 0; +} + +/* --------- Defrag Test --------- */ +int test_vset_defrag(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + srand(time(NULL)); + + vset set; + vsetInit(&set); + + /* defrag empty set */ + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + + /* defrag when single entry */ + insert_mock_entry(&set); + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + + /* defrag when vector */ + for (int i = 0; i < 127 - 1; i++) + insert_mock_entry(&set); + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + + long long expiry = rand() % 10000 + 100; + for (int i = 0; i < 127 * 2; i++) { + insert_mock_entry_with_expiry(&set, expiry); + } + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + + size_t cursor = 0; + for (int i = 0; i < NUM_ITERATIONS; i++) { + if (i % NUM_DEFRAG_STEPS == 0) + cursor = defrag_vset(&set, cursor, NUM_DEFRAG_STEPS); + insert_mock_entry_with_expiry(&set, expiry); + } + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + + vsetRelease(&set); + free_mock_entries(); + + return 0; +} + +/* --------- Fuzzer Test --------- */ +int test_vset_fuzzer(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + srand(time(NULL)); + + vset set; + vsetInit(&set); + + for (int i = 0; i < NUM_ITERATIONS; i++) { + int op = rand() % 5; + switch (op) { + case 0: + case 1: + insert_mock_entry(&set); + break; + case 2: + update_mock_entry(&set); + break; + case 3: + remove_mock_entry(&set); + break; + case 4: + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + break; + } + + if (i % 100 == 0) { + mstime_t now = rand() % 10000; + expire_mock_entries(&set, now); + } + } + /* now expire all the entries and check that we have no entries left */ + expire_mock_entries(&set, LONG_LONG_MAX); + TEST_ASSERT(vsetIsEmpty(&set) && mock_entry_count == 0); + vsetRelease(&set); + free_mock_entries(); /* Just in case */ + return 0; +} diff --git a/src/util.c b/src/util.c index aea6ae5937..0e93bbc7a1 100644 --- a/src/util.c +++ b/src/util.c @@ -59,8 +59,6 @@ #include #endif -#define UNUSED(x) ((void)(x)) - /* Glob-style pattern matching. */ static int stringmatchlen_impl(const char *pattern, int patternLen, diff --git a/src/util.h b/src/util.h index 514346939c..db15f2d900 100644 --- a/src/util.h +++ b/src/util.h @@ -33,6 +33,17 @@ #include #include "sds.h" +/* Anti-warning macro... */ +#ifndef UNUSED +#define UNUSED(V) ((void)V) +#endif + +/* min/max */ +#undef min +#undef max +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) + /* The maximum number of characters needed to represent a long double * as a string (long double has a huge range of some 4952 chars, see LDBL_MAX). * This should be the size of the buffer given to ld2string */ diff --git a/src/valkey-check-rdb.c b/src/valkey-check-rdb.c index fbb7a31ed0..efe2d674d3 100644 --- a/src/valkey-check-rdb.c +++ b/src/valkey-check-rdb.c @@ -146,8 +146,11 @@ char *rdb_type_string[] = { "stream-v2", "set-listpack", "stream-v3", + "hash-volatile-items", }; +static_assert(sizeof(rdb_type_string) / sizeof(rdb_type_string[0]) == RDB_TYPE_LAST, "Mismatch between enum and string table"); + char *type_name[OBJ_TYPE_MAX] = {"string", "list", "set", "zset", "hash", "module", /* module type is special */ "stream"}; diff --git a/src/vset.c b/src/vset.c new file mode 100644 index 0000000000..4a5bc14418 --- /dev/null +++ b/src/vset.c @@ -0,0 +1,2393 @@ +#include "vset.h" +#include "rax.h" +#include "endianconv.h" +#include "serverassert.h" +#include "hashtable.h" +#include "util.h" +#include "zmalloc.h" + +#include +#include +#include + +#ifndef static_assert +#define static_assert _Static_assert +#endif + +/* + *----------------------------------------------------------------------------- + * Volatile Set - Adaptive, Expiry-aware Set Structure + *----------------------------------------------------------------------------- + * + * The `vset` is a dynamic, memory-efficient container for managing + * entries with expiry semantics. It is designed to efficiently track entries + * that expire at varying times and scales to large sets by adapting its internal + * representation as it grows or shrinks. + * + *----------------------------------------------------------------------------- + * Expiry Buckets and Pointer Tagging + *----------------------------------------------------------------------------- + * + * Internally, the `vset` maintains a single `vsetBucket*` pointer, + * which can point to different types of buckets depending on the number of + * entries and the needed resolution. The pointer is tagged using the lowest 3 bits: + * + * #define VSET_BUCKET_NONE -1 + * #define VSET_BUCKET_SINGLE 0x1ULL // pointer to single entry (odd ptr) + * #define VSET_BUCKET_VECTOR 0x2ULL // pointer to pointer vector + * #define VSET_BUCKET_HT 0x4ULL // pointer to hashtable + * #define VSET_BUCKET_RAX 0x6ULL // pointer to radix tree + * + * #define VSET_TAG_MASK 0x7ULL + * #define VSET_PTR_MASK (~VSET_TAG_MASK) + * + * IMPORTANT!!!! - All entries must have LSB set (i.e., be odd-aligned) to be compatible with !!!! + * tagging constraints. + * + *----------------------------------------------------------------------------- + * Time Bucket Management + *----------------------------------------------------------------------------- + * + * Entries are grouped into **time buckets** based on their expiry time. + * Each time bucket represents a window aligned to: + * + * #define VOLATILESET_BUCKET_INTERVAL_MIN (1 << 4) // 16ms + * #define VOLATILESET_BUCKET_INTERVAL_MAX (1 << 13) // 8192ms + * + * A time bucket key is computed by rounding the expiry timestamp up to the + * nearest aligned window using `get_bucket_ts()`. + * + *----------------------------------------------------------------------------- + * Entry Addition and Bucket Promotion + *----------------------------------------------------------------------------- + * + * When a new entry is added: + * + * 1. If the current set is `NONE`, it becomes a `SINGLE` bucket. + * 2. If the set is a `SINGLE` bucket and another entry arrives: + * -> it is promoted to a `VECTOR` bucket (sorted by expiry). + * 3. If the `VECTOR` exceeds `VOLATILESET_VECTOR_BUCKET_MAX_SIZE` (127): + * -> the set becomes a `RAX`, and existing entries are migrated. + * 4. IF the set is using RAX encoding it will locate a bucket to add the entry + * following the strategy explained below. + * + *----------------------------------------------------------------------------- + * RAX Bucket and Dynamic Splitting + *----------------------------------------------------------------------------- + * + * Each bucket in the RAX bucket corresponds to a **time window**, defined by + * its bucket timestamp (`bucket_ts`). This timestamp represents the **END** of + * the time window. Entries in the bucket must expire *before* this timestamp. + * + * Time windows are defined in granular ranges: + * - Minimum granularity: VOLATILESET_BUCKET_INTERVAL_MIN (16 ms) + * - Maximum granularity: VOLATILESET_BUCKET_INTERVAL_MAX (8192 ms) + * + * A bucket can only contain entries that: + * 1. Have expiry < bucket_ts + * 2. Do not fit into any bucket with a smaller timestamp (i.e., earlier window) + * + * The structure allows multiple encodings: + * VSET_BUCKET_SINGLE - A single pointer to one entry. + * VSET_BUCKET_VECTOR - A sorted vector of pointers (up to 127 entries). + * VSET_BUCKET_HT - A hashtable used when vectors become too dense. + * + * Bucket Timestamp (END of window): + * + * |------------------ Bucket Span ------------------| + * [window_start .................................. bucket_ts) + * + * Layout Example: + * + * Timeline: ----------> increasing time -----------> + * +--------------+-------------+---------+ + * | B0 | B1 | B2 | + * | ts=32 | ts=128 | ts=2048 | + * +--------------+-------------+---------+ + * ^ ^ ^ + * | | | + * [E1,E2] ∈ B0 [E3...E7] ∈ B1 [E8...E15] ∈ B2 + * + * All entries expire BEFORE their bucket_ts + * + * Bucket Splitting Strategy: + * ---------------------------------- + * + * When a bucket (e.g. VECTOR) becomes too dense or needs realignment: + * + * 1. Re-align to lower granularity: + * - Adjust the bucket timestamp down to a finer granularity (e.g. 16ms). + * - Only done if ALL entries still fit in the tighter window. + * - Effectively “moves” the bucket to an earlier timestamp. + * + * Example: B(ts=128, span=128ms) -> B(ts=64, span=16ms) + * + * 2. Split into two buckets: + * - Use binary search to find a “natural” boundary based on entry expiry. + * - Original bucket retains its timestamp (but holds fewer entries). + * - New bucket is inserted before the current one with its own tighter timestamp. + * + * Example: + * + * Before: + * [ Entry0 ... Entry126 ] -> B(ts=128) + * + * After Split: + * [ Entry0...Entry62 ] -> New B(ts=64) + * [ Entry63...Entry126 ] -> Original B(ts=128) + * + * 3. Convert to hashtable: + * - When no clean split is found (e.g. all entries share similar expiry), + * and realignment is not possible. + * - This allows efficient O(1) lookups even with clustered expiry values. + * + * Vector B(ts=128) -> Hashtable B(ts=128) + * + * This hierarchical design ensures: + * - Efficient memory usage (tight buckets) + * - Predictable iteration by expiry time + * - Low overhead insertions & deletions + * - Graceful promotion & demotion of bucket types + * + * NOTE: Buckets are always sorted by their `bucket_ts` in the radix tree (RAX), + * which allows efficient search for insertion/removal based on expiry. + * + *----------------------------------------------------------------------------- + * RAX Bucket Layout + *----------------------------------------------------------------------------- + * + * * RAX View with Time Keys: + * + * expiry_buckets = rax * | 0x6 + * + * +--------------------------+ + * | RAX (key = bucket_ts) | + * |--------------------------| + * | "000016" -> [entry1] | <- Vector (SINGLE->VECTOR->HT) + * | "000032" -> [entry2...] | <- Full vector, might split + * | "000048" -> [entry...] | + * +--------------------------+ + * + * * Splitting a Full Vector in RAX: + * + * Suppose vector at key "000032" has 13 entries: + * + * 1. Use binary search to find a transition point in expiry bucket_ts. + * We search the first 2 following entries which belong to different lwo granularity time windows, + * but as close as possible to the middle of the vector: + * [entry1, entry7, ..., entry13] + * ↑ + * split (first where get_bucket_ts(entry) > min_ts) + * + * 2. Create two vectors: + * bucket A -> [entry1..entry6] with key = "000032" + * bucket B -> [entry7..entry13] with key = "000048" + * + * 3. Insert both back to the RAX. + * + *----------------------------------------------------------------------------- + * Bucket Lifecycle + *----------------------------------------------------------------------------- + * + * NONE + * | + * v + * SINGLE (1 entry) + * | + * v + * VECTOR (sorted, up to 127) + * | + * v + * RAX (holds multiple buckets, keyed by each bucket's end timestamp) + * Bucket types within a RAX: + * + * SINGLE + * | + * v + * VECTOR (sorted, up to 127, can split + * | into multiple vectors) + * | + * v + * HASHTABLE (only when a vector can't split) + */ + +/************************************************************************************************************* + * pVector Implementation + *************************************************************************************************************/ + +#define PV_CARD_BITS 30 +#define PV_ALLOC_BITS 34 + +/* Custom vector structure with embedded allocation and length counters */ +typedef struct { + uint64_t len : PV_CARD_BITS; /* Number of elements (cardinality) */ + uint64_t alloc : PV_ALLOC_BITS; /* Allocated memory (zmalloc_size of the current vector allocation) */ + void *data[]; /* Flexible array member */ +} pVector; + +static const size_t PV_HEADER_SIZE = (sizeof(pVector)); + + +/* Returns the number of elements currently stored in the pVector. + * + * Arguments: + * vec - The pVector to query. + * + * Return: + * The number of elements in the vector. + * Note that a NULL is a !!!valid!!! vector - returns 0 if the vector is NULL. */ +static inline uint32_t +pvLen(pVector *vec) { + return (vec ? vec->len : 0); +} + +/* Returns the number of bytes allocated by the os to store the vector. + * This value is equal to the usable size returned by calling zrealloc_usable. + * + * Arguments: + * vec - The pVector to query. + * + * Return: + * The allocation size of the vector + * Note that a NULL is a !!!valid!!! vector - returns 0 if the vector is NULL. */ +static inline uint32_t pvAlloc(pVector *vec) { + return (vec ? vec->alloc : 0); +} + +/* Ensures that a pVector has enough capacity to hold additional elements. + * + * This function guarantees that the given pVector `pv` has at least enough + * allocated space to accommodate `additional` more elements, growing it if necessary. + * If the vector is currently `NULL`, it will be newly allocated. + * + * The allocation is handled using `zmalloc` or `zrealloc_usable`, depending on whether + * the vector is new or already initialized. The internal `alloc` field is updated to + * reflect the actual allocated size. + * + * Arguments: + * pv - Pointer to an existing pVector or NULL. + * additional - The number of additional elements the vector should be able to accommodate. + * + * Return: + * A pointer to the resized (or newly allocated) pVector with sufficient capacity. + * + * Note: + * The `additional` is the number of *additional* elements beyond the current length. + * This function does not modify the vector's logical length (`len`), only its allocation. */ +static pVector *pvMakeRoomFor(pVector *pv, size_t additional) { + if (additional == 0) return pv; + /* Make sure we will have the capacity to store the extra number of elements */ + assert(pvLen(pv) + additional <= (1UL << PV_CARD_BITS) - 1); + + size_t required = PV_HEADER_SIZE + (pvLen(pv) + additional) * sizeof(void *); + + if (pvAlloc(pv) >= required) return pv; + + if (!pv) { + pv = zmalloc(required); + pv->len = 0; + } else { + pv = zrealloc_usable(pv, required, &required); + } + /* Make sure we have the capacity to save the alloation size */ + assert(required <= (size_t)((1ULL << PV_ALLOC_BITS) - 1)); + pv->alloc = required; + return pv; +} + +/* Shrinks a pVector to release unused allocated memory. + * + * This function checks if the current allocation (`used`) for the given + * `pVector` exceeds the memory actually required to store its elements. + * If so, it reallocates the vector to use only the needed memory, helping reduce + * memory overhead and improve space efficiency. + * + * The function uses `zrealloc_usable()` to reallocate memory in a way compatible + * with jemalloc (or other zmalloc backends) and updates the internal allocation + * size (`alloc`) to reflect the new length. + * + * Arguments: + * pv - A pointer to the `pVector` to shrink. + * + * Return: + * A potentially reallocated `pVector` with minimized memory usage. + * + * This function does not change the logical contents of the vector. + * It only adjusts the allocated memory footprint. If no reallocation + * is needed, the original pointer is returned unchanged. + * + * Example: + * pVector *vec = pvNew(); + * // After some insertions and deletions + * vec = pvShrinkToFit(vec); */ +static pVector *pvShrinkToFit(pVector *pv) { + if (!pv) return NULL; + + size_t used = pvAlloc(pv); + size_t required = pvLen(pv) == 0 ? 0 : PV_HEADER_SIZE + pvLen(pv) * sizeof(void *); + + if (used > required) { + if (!required) { + zfree(pv); + return NULL; + } + pv = zrealloc_usable(pv, required, &required); + pv->alloc = required; + } + return pv; +} + +/** + * pvSplit - Splits a pVector into two parts at a given index. + * + * Arguments: + * pv_ptr: A pointer to the pVector* to split. This pointer is + * updated in-place to point to the left portion (elements [0..split_index-1]). + * split_index: The index at which to split the vector. The resulting right + * vector will contain elements [split_index..len-1]. + * + * This function is used to **efficiently split a sorted vector of pointers** + * into two separate vectors. The original vector is truncated in-place to + * only contain the first half, and a new vector is returned containing the + * second half. This allows for logical partitioning of data without scanning + * or reallocating unnecessary memory. + * + * The vector is assumed to be densely packed and its elements are of type `void*`. + * + * Memory is allocated for the new right vector using `zmalloc`, and the unused + * portion of the original vector may be freed or shrunk via `pvShrinkToFit` + * to optimize memory usage. + * + * Return: + * - A new pVector containing the right split [split_index..len-1]. + * + * Side effects: + * - The original vector pointer (`*pv_ptr`) is modified to point to the + * resized left portion. + * + * Example: + * -------- + * Suppose `pv_ptr` points to a vector of 5 elements: + * [A, B, C, D, E] + * + * Calling: + * pVector *right = pvSplit(&pv_ptr, 3); + * + * Results in: + * pv_ptr -> [A, B, C] + * right -> [D, E] + * + * If the split_index is 5 (i.e. the end), the function returns NULL and the + * original vector is unchanged. */ +pVector *pvSplit(pVector **pv_ptr, uint32_t split_index) { + pVector *pv = *pv_ptr; + + /* Handle edge cases: */ + + /* 1. null vector, ot split index which includes the entire vector in the left size + * Should simply return a NULL vector (right size). + */ + if (!pv || split_index >= pvLen(pv)) return NULL; + + /* 2. zero split index means no left side. just return the existing vector and zero the input vector. */ + if (split_index == 0) { + *pv_ptr = NULL; + return pv; + } + + // Number of elements for the right half + uint64_t right_len = pv->len - split_index; + + // Allocate new vector for right part + size_t item_bytes = sizeof(void *); + size_t total_bytes = sizeof(pVector) + right_len * item_bytes; + size_t new_alloc; + pVector *right = zmalloc_usable(total_bytes, &new_alloc); + right->alloc = new_alloc; + right->len = right_len; + + // Copy the right part + memcpy(&right->data[0], &pv->data[split_index], right_len * item_bytes); + + // Shrink original vector + pv->len = split_index; + *pv_ptr = pvShrinkToFit(pv); + + return right; +} + +/* Creates a new pVector with the specified initial capacity. + * + * This function initializes a new pVector capable of holding at least + * `capacity` elements. Internally, it delegates allocation and setup to + * `pvMakeRoomFor`, starting from a NULL vector. + * + * Arguments: + * capacity - The initial number of elements the vector should be able to store. + * + * Return: + * A pointer to the newly allocated pVector. + * Note that a NULL is a !!valid!! cector which size is zero. + * + * Note: + * The logical length (`len`) of the returned vector is initialized to 0. + */ +pVector *pvNew(uint32_t capacity) { + return pvMakeRoomFor(NULL, capacity); +} + +/* Inserts an element at the specified position in the pVector. + * + * Ensures enough capacity for the new element, shifts elements to make space, + * and inserts the given element at the desired position. + * + * Arguments: + * pv - The pVector to insert into (can be NULL). + * elem - The pointer to be inserted. + * idx - The index at which to insert the element (must be ≤ pv->len). + * + * Return: + * The updated pVector with the element inserted. */ +pVector *pvInsertAt(pVector *pv, void *elem, uint32_t idx) { + assert(idx <= pv->len); + pv = pvMakeRoomFor(pv, 1); + + if (idx < pv->len) { + memmove(&pv->data[idx + 1], &pv->data[idx], (pv->len - idx) * sizeof(void *)); + } + + pv->data[idx] = elem; + pv->len++; + return pv; +} + +/* Finds the index of the given element in the pVector. + * + * Parameters: + * pv - The vector to search. + * elem - The element to look for (pointer equality). + * + * Returns: + * The index of the element if found; otherwise, returns pv->len (i.e., not found). + * + * Notes: + * - This compares elements using raw pointer equality (`==`). + * - If pv is NULL or empty, returns 0 as a safe fallback. + * - Return value being equal to pv->len can be used to check for absence. */ +uint32_t pvFind(pVector *pv, void *elem) { + if (!pv || pv->len == 0) return 0; + + for (uint32_t i = 0; i < pv->len; i++) { + if (pv->data[i] == elem) { + return i; + } + } + return pv->len; +} + + +/* Removes the element at the specified index from the pVector. + * + * Shifts elements as necessary and optionally shrinks the vector if memory can be saved. + * If this is the last element in the vector, the vector is freed and NULL is returned. + * + * Arguments: + * pv - The pVector to remove from. + * idx - The index of the element to remove (must be < pv->len). + * + * Return: + * The updated pVector after removal. + * Returns NULL if the last element was removed and the vector was freed. */ +pVector *pvRemoveAt(pVector *pv, uint32_t idx) { + assert(pv && pv->len > 0); + assert(idx < pv->len); + if (pv->len == 1) { + /* Last element being removed; delete vector */ + zfree(pv); + return NULL; + } else if (idx < pv->len - 1UL) + memmove(&pv->data[idx], &pv->data[idx + 1], (pv->len - idx - 1) * sizeof(void *)); + pv->len--; + return pvShrinkToFit(pv); +} + +/* Removes the first matching element from the pVector. + * + * Performs a linear search for the given pointer and removes the first match. + * Updates the vector pointer in case a removal was done. + * + * Arguments: + * pv - A pointer to the pVector to remove from. + * elem - The element pointer to match and remove. + * removed - A pointer to a memory location to store the result of the removal. + * + * Return: + * the vector after the removal attempt */ +pVector *pvRemove(pVector *pv, void *elem, bool *removed) { + bool was_removed = false; + if (pv && pvLen(pv) > 0) { + uint32_t idx = pvFind(pv, elem); + if (idx < pvLen(pv)) { + pv = pvRemoveAt(pv, idx); + was_removed = true; + } + } + *removed = was_removed; + return pv; +} + +/* Retrieves the element at the specified index in the pVector. + * + * Arguments: + * vec - The pVector to retrieve from. + * idx - The index of the element to access. + * + * Return: + * A pointer to the element at the given index. + * Returns NULL if the vector is NULL or the index is out of bounds. */ +void *pvGet(pVector *pv, uint32_t idx) { + assert(pv && idx < pvLen(pv)); + return pv->data[idx]; +} + +/* Frees the memory used by the pVector. + * + * Arguments: + * pv - The pVector to free. + * + * Return: + * None. */ +void pvFree(pVector *pv) { + if (pv) zfree(pv); +} + +/* Appends an element to the end of the given pVector. + * + * Parameters: + * pv - The vector to append to. + * elem - The element to append. + * + * Returns: + * A (possibly reallocated) pVector with the new element inserted at the end. + * + * Notes: + * Internally this uses pvInsert() with the current length of the vector, + * effectively appending the element. */ +pVector *pvPush(pVector *pv, void *elem) { + return pvInsertAt(pv, elem, pvLen(pv)); +} + +/* Removes and optionally returns the last element from the given pVector. + * + * Parameters: + * pv - The vector to remove the element from. + * pelem - Optional pointer to store the popped element. Can be NULL. + * + * Returns: + * A (possibly reallocated) pVector with the last element removed. + * + * Notes: + * Calling this function on an empty vector will trigger assertion. + * You can pass NULL for `pelem` if you don't need the removed value. */ +pVector *pvPop(pVector *pv, void **pelem) { + assert(pvLen(pv) > 0); + uint32_t last_idx = pvLen(pv) - 1; + if (pelem) *pelem = pvGet(pv, last_idx); + return pvRemoveAt(pv, last_idx); +} + +/* Set the element at given index inside the pVector. + * + * Parameters: + * pv - The vector containing the elements to swap. + * idx - Index of the element. + * elem - pointer to the new element. + * + * Returns: + * None. + * + * Preconditions: + * - idx must be valid indices within the vector. */ +void pvSet(pVector *pv, uint32_t idx, void *elem) { + assert(idx < pvLen(pv)); + pv->data[idx] = elem; +} + +/* Swaps two elements at given indices inside the pVector. + * + * Parameters: + * pv - The vector containing the elements to swap. + * idx1 - Index of the first element. + * idx2 - Index of the second element. + * + * Returns: + * None. + * + * Preconditions: + * - idx1 and idx2 must both be valid indices within the vector. + * + * Notes: + * This is a simple in-place swap that uses direct pointer assignment. */ +void pvSwap(pVector *pv, uint32_t idx1, uint32_t idx2) { + assert(pv && pvLen(pv) > 0 && idx1 < pvLen(pv) && idx2 < pvLen(pv)); + void *temp = pv->data[idx1]; + pv->data[idx1] = pv->data[idx2]; + pv->data[idx2] = temp; +} + +/* Sort the elements of a pVector using a user-provided comparison function. + * + * This function performs an in-place sort of the elements in the given pVector. + * It uses the standard C library `qsort()` function under the hood and assumes + * the elements are pointers. The caller must supply a comparison function + * compatible with `qsort()`, which determines the ordering of the elements. + * + * Parameters: + * pv - A pointer to the pVector to sort. + * compare - A function pointer used to compare two elements. This function must + * match the signature: int compare(const void *a, const void *b) + * and return: + * < 0 if *a < *b + * > 0 if *a > *b + * 0 if *a == *b + * + * Returns: + * None. The pVector is sorted in place. + * + * Example: + * int cmp(const void *a, const void *b) { + * return strcmp(*(const char **)a, *(const char **)b); + * } + * + * pvSort(my_vector, cmp); */ +void pvSort(pVector *pv, int (*compare)(const void *a, const void *b)) { + if (pvLen(pv) <= 1) return; + qsort(pv->data, pv->len, sizeof(void *), compare); +} + +/************************************************************************************************************* + * pVector End + *************************************************************************************************************/ + +#define VOLATILESET_BUCKET_INTERVAL_MAX (1LL << 13LL) // 2^13 = 8192 milliseconds +#define VOLATILESET_BUCKET_INTERVAL_MIN (1LL << 4LL) // 2^4 = 16 milliseconds + +#define VOLATILESET_VECTOR_BUCKET_MAX_SIZE 127 + +#define VSET_NONE_BUCKET_PTR ((void *)(uintptr_t) - 1) +#define VSET_BUCKET_NONE -1 // matching the NULL case +#define VSET_BUCKET_SINGLE 0x1UL // xx1 (assuming sds) +#define VSET_BUCKET_VECTOR 0x2UL // 010 +#define VSET_BUCKET_HT 0x4UL // 100 +#define VSET_BUCKET_RAX 0x6UL // 110 + +#define VSET_TAG_MASK 0x7UL +#define VSET_PTR_MASK (~VSET_TAG_MASK) + +// Generic bucket type +typedef void vsetBucket; + +typedef struct vsetInternalIterator { + /* for rax bucket */ + raxIterator riter; + union { + /* for hashtable bucket */ + hashtableIterator hiter; + /* for vector bucket */ + uint32_t viter; + /* for single bucket */ + void *vsingle; + }; + /* the parent of the bucket we are currently iterating on */ + vsetBucket *parent_bucket; + /* the bucket we are currently iterating on */ + vsetBucket *bucket; + /* the pointer entry */ + void *entry; + /* In case of rax encoded set, this is the current iterated bucket timestamp */ + long long bucket_ts; + /* the state of the iteration */ + int iteration_state; +} vsetInternalIterator; + +/* The opaque hashtableIterator is defined as a blob of bytes. */ +static_assert(sizeof(vsetIterator) >= sizeof(vsetInternalIterator), + "Opaque iterator size"); + +/* Conversion from user-facing opaque iterator type to internal struct. */ +static inline vsetInternalIterator *iteratorFromOpaque(vsetIterator *iterator) { + return (vsetInternalIterator *)(void *)iterator; +} + +/* Conversion from user-facing opaque iterator type to internal struct. */ +static inline vsetIterator *opaqueFromIterator(vsetInternalIterator *iterator) { + return (vsetIterator *)(void *)iterator; +} + + +/* Determine bucket type */ +static inline int vsetBucketType(vsetBucket *b) { + assert(b); + if (b == VSET_NONE_BUCKET_PTR) return VSET_BUCKET_NONE; + + uintptr_t bits = (uintptr_t)b; + if (bits & 0x1) + return VSET_BUCKET_SINGLE; + return bits & VSET_TAG_MASK; +} + +/* Access raw pointer */ +static inline void *vsetBucketRawPtr(vsetBucket *b) { + return (void *)((uintptr_t)b & VSET_PTR_MASK); +} + +// Accessors with type assertions +static inline pVector *vsetBucketVector(vsetBucket *b) { + assert(vsetBucketType(b) == VSET_BUCKET_VECTOR); + return (pVector *)vsetBucketRawPtr(b); +} + +static inline hashtable *vsetBucketHashtable(vsetBucket *b) { + assert(vsetBucketType(b) == VSET_BUCKET_HT); + return (hashtable *)vsetBucketRawPtr(b); +} + +static inline rax *vsetBucketRax(vsetBucket *b) { + assert(vsetBucketType(b) == VSET_BUCKET_RAX); + return (rax *)vsetBucketRawPtr(b); +} + +static inline void *vsetBucketSingle(vsetBucket *b) { + return b; +} + +static inline vsetBucket *vsetBucketFromRawPtr(void *ptr, int type) { + uintptr_t p = (uintptr_t)ptr; + return (vsetBucket *)(p | (type & VSET_TAG_MASK)); +} + +static inline vsetBucket *vsetBucketFromVector(pVector *vec) { + return vsetBucketFromRawPtr(vec, VSET_BUCKET_VECTOR); +} + +static inline vsetBucket *vsetBucketFromHashtable(hashtable *ht) { + return vsetBucketFromRawPtr(ht, VSET_BUCKET_HT); +} + +static inline vsetBucket *vsetBucketFromSingle(void *ptr) { + return ptr; +} + +static inline vsetBucket *vsetBucketFromNone(void) { + return VSET_NONE_BUCKET_PTR; +} + +static inline vsetBucket *vsetBucketFromRax(rax *r) { + return vsetBucketFromRawPtr(r, VSET_BUCKET_RAX); +} + +/****************** Helper Functions *******************************************/ + +/* compare 2 expiration times */ +#define EXPIRE_COMPARE(exp1, exp2) (exp1 < exp2 ? -1 : exp1 == exp2 ? 0 \ + : 1) + +/* Since we do not have native posix support for qsort_r, we use this variable to help the vset + * compare function operate entry comparison given a dynamic getExpiry function is passed to + * different vset functions. */ +static __thread vsetGetExpiryFunc current_getter_func; + +static inline void vsetSetExpiryGetter(vsetGetExpiryFunc f) { + assert(current_getter_func == NULL); + current_getter_func = f; +} + +static inline void vsetUnsetExpiryGetter(void) { + current_getter_func = NULL; +} + +static inline vsetGetExpiryFunc vsetGetExpiryGetter(void) { + return current_getter_func; +} + +static int vsetCompareEntries(const void *a, const void *b) { + vsetGetExpiryFunc getExpiry = vsetGetExpiryGetter(); + long long ea = getExpiry(*(void **)a); + long long eb = getExpiry(*(void **)b); + return (ea > eb) - (ea < eb); +} + +/* used for popping form rax bucket where we KNOW all entries are expired. */ +static long long vsetGetExpiryZero(const void *entry) { + UNUSED(entry); + return 0; +} + +static inline long long get_bucket_ts(long long expiry) { + return (expiry & ~(VOLATILESET_BUCKET_INTERVAL_MIN - 1LL)) + VOLATILESET_BUCKET_INTERVAL_MIN; +} + +static inline long long get_max_bucket_ts(long long expiry) { + return (expiry & ~(VOLATILESET_BUCKET_INTERVAL_MAX - 1LL)) + VOLATILESET_BUCKET_INTERVAL_MAX; +} + +static inline size_t encodeExpiryKey(long long expiry, unsigned char *key) { + long long be_ts = htonu64(expiry); + size_t size = sizeof(be_ts); + memcpy(key, &be_ts, size); + return size; +} + +static inline long long decodeExpiryKey(unsigned char *key) { + long long res; + memcpy(&res, key, sizeof(res)); + res = ntohu64(res); + return res; +} + +static inline size_t encodeNewExpiryBucketKey(unsigned char *key, long long expiry) { + long long bucket_ts = get_max_bucket_ts(expiry); + long long be_ts = htonu64(bucket_ts); + size_t size = sizeof(be_ts); + memcpy(key, &be_ts, size); + return size; +} + +/** + * Performs binary search to find the index where the element should be inserted. + * Returns the index where the element should be placed to keep the array sorted. + * + * pv Pointer to the sorted vector + * elem Pointer to the element to insert + * cmp Comparison function (like strcmp-style: <0, ==0, >0) + * returns the insertion index (between 0 and pv->len) */ +static inline uint32_t findInsertPosition(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, long long expiry) { + pVector *pv = vsetBucketVector(bucket); + uint32_t left = 0; + uint32_t right = pvLen(pv); + while (left < right) { + uint32_t mid = (left + right) / 2; + int res = EXPIRE_COMPARE(expiry, getExpiry(pv->data[mid])); + if (res <= 0) + right = mid; + else + left = mid + 1; + } + + return left; // Final position to insert the element +} + +/* findSplitPosition - Locate the first index where a bucket timestamp transition occurs + * + * This function finds a split point in a sorted pointer vector (`pVector`) of elements, + * where elements are grouped by their coarse-grained expiry time buckets. + * The goal is to identify the first pair of adjacent elements `e[i-1]` and `e[i]` + * such that: + * + * get_bucket_ts(getExpiry(e[i - 1])) < get_bucket_ts(getExpiry(e[i])) + * + * The vector is assumed to be sorted by the raw expiry timestamp (in ascending order). + * Bucket timestamps are derived using `get_bucket_ts()` on each element's expiry value. + * + * Arguments: + * - getExpiry: A function pointer that extracts an expiry timestamp from an element. + * - bucket: A pointer to a `vsetBucket` containing a sorted `pVector` of elements. + * - split_ts_out (optional): If provided, it will be set to the bucket timestamp of + * the last element in the lower (left) partition. + * + * The search begins from the middle of the vector and expands outwards in both + * directions, checking for the earliest position where a bucket transition occurs. + * This approach improves locality and helps produce balanced splits where possible. + * + * If a valid split is found, the function returns the index `i` at which the split + * should occur (i.e., elements `[0..i-1]` belong to one bucket, and `[i..len-1]` to another). + * If no split is found (i.e., all elements map to the same bucket), the function + * returns `pv->len`, indicating the entire vector belongs to one bucket. + * + * Return: + * - A split index in the range [1, pv->len), or + * - `pv->len` if no transition is found (no split possible). + * + * Example: + * -------- + * Raw expiry values: [1001, 1002, 1003, 2048, 2049] + * Bucket timestamps: [1024, 1024, 1024, 4096, 4096] + * + * This function returns index 3, as: + * get_bucket_ts(1003) == 1024 + * get_bucket_ts(2048) == 4096 → transition point + * + * So the vector can be split as: + * - Left partition: [1001, 1002, 1003] + * - Right partition: [2048, 2049] */ +static uint32_t findSplitPosition(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, long long *split_ts_out) { + pVector *pv = vsetBucketVector(bucket); + if (!pv || pv->len < 2) return pv ? pv->len : 0; + + int mid = pv->len / 2; + int offset = 0; + + while (1) { + int left = mid - offset; + int right = mid + offset; + + // Check left side (as long as i > 0 to allow e[i-1]) + if (left > 0) { + long long ts1 = get_bucket_ts(getExpiry(pvGet(pv, left - 1))); + long long ts2 = get_bucket_ts(getExpiry(pvGet(pv, left))); + if (ts1 < ts2) { + if (split_ts_out) *split_ts_out = ts1; + return left; + } + } + + // Check right side (as long as i > 0 to allow e[i-1]) + if (right > 0 && right < pv->len) { + long long ts1 = get_bucket_ts(getExpiry(pvGet(pv, right - 1))); + long long ts2 = get_bucket_ts(getExpiry(pvGet(pv, right))); + if (ts1 < ts2) { + if (split_ts_out) *split_ts_out = ts1; + return right; + } + } + + offset++; + if (mid - offset < 1 && mid + offset >= pv->len) break; // searched entire vector + } + + return pv->len; // no split found +} + +#define VSET_BUCKET_KEY_LEN 8 + +/* hash_pointer - Computes a high-quality 64-bit hash from a pointer value. + * + * This function is designed to produce a well-distributed hash from a memory + * pointer, avoiding the common pitfall of poor entropy due to pointer alignment. + * It uses a platform-dependent mixing strategy based on MurmurHash3 finalization + * constants, ensuring good avalanche behavior and low collision rates. + * + * For 32-bit systems: + * The function uses a reduced MurmurHash3 32-bit finalizer: + * - XORs and right shifts to mix higher-order bits into lower ones. + * - Multiplies by large constants to further spread the bits. + * + * + * For 64-bit systems: + * The function uses MurmurHash3 64-bit finalizer constants: + * - These constants are chosen to maximize bit diffusion and avoid hash clustering. + * - This version benefits from the full 64-bit pointer space. + * + * Why this works: + * - Pointers tend to have low entropy in their lower bits (due to alignment). + * - A naive cast to integer leads to clustering and collisions in hash tables. + * - This function performs fast and effective bit mixing to reduce collisions. + * - Ideal for use in pointer-keyed hash tables, interning systems, or caches. + * + * Note: + * - This is not a cryptographic hash. It is suitable for fast, internal use only. + * - Returns a 64-bit hash value, even on 32-bit systems. + * + * Returns: + * A 64-bit hash value derived from the input pointer. */ +static uint64_t hash_pointer(const void *ptr) { + uintptr_t x = (uintptr_t)ptr; +#if UINTPTR_MAX == 0xFFFFFFFF + // 32-bit platform + x ^= x >> 16; + x *= 0x85ebca6b; + x ^= x >> 13; + x *= 0xc2b2ae35; + x ^= x >> 16; + +#else + // 64-bit platform + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; +#endif + return (uint64_t)x; +} + +hashtableType pointerHashtableType = { + .hashFunction = hash_pointer, +}; + +static inline vsetBucket *findBucket(rax *expiry_buckets, long long expiry, unsigned char *key, size_t *key_len, long long *pbucket_ts, raxNode **node) { + *key_len = encodeExpiryKey(expiry, key); + vsetBucket *bucket = vsetBucketFromNone(); + /* First try to locate the first bucket which is larger than the specified key */ + raxIterator iter; + raxStart(&iter, expiry_buckets); + raxSeek(&iter, ">", (unsigned char *)key, *key_len); + + if (raxNext(&iter)) { + long long bucket_ts = decodeExpiryKey(iter.key); + /* If this bucket span over a window to far in the future, it is not a candidate. */ + if (get_max_bucket_ts(expiry) < bucket_ts) { + raxStop(&iter); + return vsetBucketFromNone(); + } + bucket = iter.data; + assert(iter.node->iskey); + if (node) *node = iter.node; + if (key) { + assert(iter.key_len == VSET_BUCKET_KEY_LEN); + memcpy(key, iter.key, iter.key_len); + } + if (pbucket_ts) *pbucket_ts = decodeExpiryKey(iter.key); + } + raxStop(&iter); + return bucket; +} + +/* Free all the vsetBucket memory. + * Since the bucket only holds references to entries the entries themselves are NOT freed */ +static void freeVsetBucket(vsetBucket *bucket) { + switch (vsetBucketType(bucket)) { + case VSET_BUCKET_NONE: + case VSET_BUCKET_SINGLE: + // No internal memory to free + break; + case VSET_BUCKET_VECTOR: + pvFree(vsetBucketVector(bucket)); + break; + case VSET_BUCKET_HT: + hashtableRelease(vsetBucketHashtable(bucket)); + break; + case VSET_BUCKET_RAX: + raxFreeWithCallback(vsetBucketRax(bucket), freeVsetBucket); + break; + default: + panic("Unknown volatile set type in freeVsetBucket"); + } +} + +static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpiry, vsetBucket *bucket, long long bucket_ts, raxNode *node) { + /* We can only split vector encoded buckets */ + if (vsetBucketType(bucket) != VSET_BUCKET_VECTOR) { + return false; + } + size_t key_len; + long long target_bucket_ts = bucket_ts; + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + vsetBucket *new_bucket = vsetBucketFromNone(); + pVector *pv = vsetBucketVector(bucket); + rax *expiry_buckets = vsetBucketRax(parent); + /* first lets sort the vector. we cannot take a decision without it. + * We set the global expiry getter so we can sort according to the provided getExpiry function. + * TODO: After some thought I think it might be better to avoid sorting and attempt a quickselect. just allocate a new vector with the same size. + * Than scan once and choose a pivot which is the median or average bucket_ts. Then move all entries smaller to the new vector. then shrink both vectors as needed. */ + vsetSetExpiryGetter(getExpiry); + pvSort(pv, vsetCompareEntries); + vsetUnsetExpiryGetter(); + + long long max_bucket_ts = get_bucket_ts(getExpiry(pv->data[pvLen(pv) - 1])); + long long min_bucket_ts = get_bucket_ts(getExpiry(pv->data[0])); + + if (max_bucket_ts < bucket_ts) { + /* In case the bucket is already spanning over a larger window than needed, just place the bucket in a new place */ + key_len = encodeExpiryKey(bucket_ts, key); + assert(raxRemove(expiry_buckets, key, key_len, (void **)&new_bucket)); + assert(new_bucket == bucket); + target_bucket_ts = max_bucket_ts; + + } else if (min_bucket_ts != max_bucket_ts) { + /* lets split the bucket. we know we can do it. */ + uint32_t split_index = findSplitPosition(getExpiry, bucket, &target_bucket_ts); + assert(target_bucket_ts < bucket_ts); + assert(split_index != pvLen(pv)); /* no way to split it ??? */ + pVector *new_bucket_vector = vsetBucketVector(bucket); + bucket = vsetBucketFromVector(pvSplit(&new_bucket_vector, split_index)); + new_bucket = vsetBucketFromVector(new_bucket_vector); + assert(pvLen(vsetBucketVector(new_bucket)) > 0); + assert(pvLen(vsetBucketVector(bucket)) > 0); + /* modify the current bucket data pointer */ + key_len = encodeExpiryKey(bucket_ts, key); + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(*set, key, key_len, bucket, NULL); + raxSetData(node, bucket); + + } else { + /* We cannot split the bucket. just return false */ + return false; + } + /* We change the current bucket position OR we split it, either way we have a new bucket to insert. */ + key_len = encodeExpiryKey(target_bucket_ts, key); + raxInsert(expiry_buckets, key, key_len, new_bucket, NULL); + return true; +} + +static inline vsetBucket *insertToBucket_NONE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { + UNUSED(getExpiry); + UNUSED(expiry); + UNUSED(bucket); + return vsetBucketFromSingle(entry); +} + +static inline vsetBucket *insertToBucket_SINGLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { + /* Upgrade to vector */ + pVector *pv = pvNew(2); + void *curr_entry = vsetBucketSingle(bucket); + long long curr_expiry = getExpiry(curr_entry); + if (curr_expiry < expiry) { + pv = pvPush(pv, curr_entry); + pv = pvPush(pv, entry); + } else { + pv = pvPush(pv, entry); + pv = pvPush(pv, curr_entry); + } + bucket = vsetBucketFromVector(pv); + return bucket; +} + +static inline vsetBucket *insertToBucket_VECTOR(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, int pos) { + UNUSED(getExpiry); + UNUSED(expiry); + pVector *pv = vsetBucketVector(bucket); + /* limit of the number of elements in a vector. */ + if (pvLen(pv) >= VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { + // Upgrade to hashtable + hashtable *ht = hashtableCreate(&pointerHashtableType); + for (uint32_t i = 0; i < pvLen(pv); i++) { + hashtableAdd(ht, pvGet(pv, i)); + } + pvFree(pv); + /* Add the new entry as well */ + hashtableAdd(ht, entry); + + return vsetBucketFromHashtable(ht); + } else { + if (pos >= 0) + /* In case we are explicitly provided a position to insert place the entry there */ + return vsetBucketFromVector(pvInsertAt(pv, entry, pos)); + else + /* Otherwise it is better to just push the entry to the vector with less change of memmove and reallocation. */ + return vsetBucketFromVector(pvPush(pv, entry)); + } + return vsetBucketFromNone(); +} + +static inline vsetBucket *insertToBucket_HASHTABLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { + UNUSED(getExpiry); + UNUSED(expiry); + + hashtable *ht = vsetBucketHashtable(bucket); + assert(hashtableAdd(ht, entry)); + return bucket; +} + +static inline vsetBucket *insertToBucket_RAX(vsetGetExpiryFunc getExpiry, vsetBucket *target, void *entry, long long expiry) { + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + size_t key_len; + long long bucket_ts; + rax *expiry_buckets = vsetBucketRax(target); + raxNode *node; + vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); + int type = vsetBucketType(bucket); + if (type == VSET_BUCKET_NONE) { + /* No bucket: create single-entry bucket */ + bucket = insertToBucket_NONE(getExpiry, bucket, entry, expiry); + assert(vsetBucketType(bucket) == VSET_BUCKET_SINGLE); + size_t key_size = encodeNewExpiryBucketKey(key, expiry); + raxInsert(expiry_buckets, key, key_size, bucket, NULL); + return target; + } else if (type == VSET_BUCKET_SINGLE) { + /* Upgrade to vector */ + bucket = insertToBucket_SINGLE(getExpiry, bucket, entry, expiry); + assert(vsetBucketType(bucket) == VSET_BUCKET_VECTOR); + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(expiry_buckets, key, key_len, bucket, NULL); + raxSetData(node, bucket); + } else if (type == VSET_BUCKET_VECTOR) { + pVector *pv = vsetBucketVector(bucket); + if (pvLen(pv) == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { + /* Try to split the bucket. If not possible switch to hashtable encoding. */ + if (!splitBucketIfPossible(target, getExpiry, bucket, bucket_ts, node)) { + /* Can't split? insrt to the vector anyway, it will just expand to hashtable */ + bucket = insertToBucket_VECTOR(getExpiry, bucket, entry, expiry, -1); + assert(vsetBucketType(bucket) == VSET_BUCKET_HT); + /* In order to avoid rax override, we directly change the node data */ + // alternative raxInsert(expiry_buckets, key, key_len, bucket, NULL); + raxSetData(node, bucket); + } else { + /* we split the bucket. go and find again a bucket to place the entry since there can be new options now. */ + return insertToBucket_RAX(getExpiry, target, entry, expiry); + } + } else { + vsetBucket *new_bucket = insertToBucket_VECTOR(getExpiry, bucket, entry, expiry, -1); + if (new_bucket != bucket) + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(expiry_buckets, key, key_len, new_bucket, NULL); + raxSetData(node, new_bucket); + } + } else if (vsetBucketType(bucket) == VSET_BUCKET_HT) { + bucket = insertToBucket_HASHTABLE(getExpiry, bucket, entry, expiry); + } else { + panic("Unknown bucket type in insertToBucket_RAX"); + } + return target; +} + +static inline vsetBucket *removeFromBucket_SINGLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { + UNUSED(getExpiry); + UNUSED(expiry); + + if (vsetBucketSingle(bucket) == entry) { + *removed = true; + return vsetBucketFromNone(); + } else { + *removed = false; + return bucket; + } +} + +static inline vsetBucket *removeFromBucket_VECTOR(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed, bool pop) { + UNUSED(getExpiry); + UNUSED(expiry); + + vsetBucket *new_bucket = bucket; + bool success = false; + pVector *pv = vsetBucketVector(bucket); + /* In case we we removed the entry */ + uint32_t vlen = pvLen(pv); + if (vlen <= 2) { + /* convert to single if needed */ + uint32_t idx = pvFind(pv, entry); + if (idx == vlen) { + success = false; + } else { + if (vlen == 1) + new_bucket = vsetBucketFromNone(); + else + new_bucket = vsetBucketFromSingle(pvGet(pv, idx == 0 ? 1 : 0)); + success = true; + pvFree(pv); + } + } else { + /* pop is a more efficient way to remove an element from the vector. However it may + * change the order of the elements in the vector, so we should ask the user to indicate if to use pop or not. */ + if (pop) { + uint32_t idx = pvFind(pv, entry); + if (idx < vlen) { + void *popped_entry = NULL; + pvSwap(pv, idx, pvLen(pv) - 1); + success = true; + new_bucket = vsetBucketFromVector(pvPop(pv, &popped_entry)); + assert(popped_entry == entry); + } + } else { + pv = pvRemove(pv, entry, &success); + if (success) + new_bucket = vsetBucketFromVector(pv); + } + } + if (removed) *removed = success; + return new_bucket; +} + +static inline vsetBucket *removeFromBucket_HASHTABLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { + UNUSED(getExpiry); + UNUSED(expiry); + + bool success = false; + vsetBucket *new_bucket = bucket; + hashtable *ht = vsetBucketHashtable(bucket); + if (hashtableDelete(ht, entry)) { + success = true; + assert(hashtableSize(ht) > 0); + if (hashtableSize(ht) == 1) { + // Downgrade to SINGLE + hashtableIterator hi; + hashtableInitIterator(&hi, ht, 0); + void *ptr; + hashtableNext(&hi, &ptr); + hashtableRelease(ht); + new_bucket = vsetBucketFromSingle(ptr); + } + } + if (removed) *removed = success; + return new_bucket; +} +static bool removeEntryFromRaxBucket(vsetBucket *rax_bucket, vsetGetExpiryFunc getExpiry, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { + bool removed = false; + switch (vsetBucketType(bucket)) { + case VSET_BUCKET_SINGLE: + bucket = removeFromBucket_SINGLE(getExpiry, bucket, entry, 0, &removed); + if (removed) { + raxRemove(vsetBucketRax(rax_bucket), key, key_len, NULL); + if (pbucket) *pbucket = vsetBucketFromNone(); + } + break; + case VSET_BUCKET_VECTOR: { + vsetBucket *new_bucket = removeFromBucket_VECTOR(getExpiry, bucket, entry, 0, &removed, true); + if (new_bucket != bucket) { + if (vsetBucketType(new_bucket) == VSET_BUCKET_NONE) { + raxRemove(vsetBucketRax(rax_bucket), key, key_len, NULL); + if (pbucket) *pbucket = vsetBucketFromNone(); + } else { + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(*set, key, key_len, new_bucket, NULL); + raxSetData(node, new_bucket); + if (pbucket) *pbucket = new_bucket; + } + } + break; + } + case VSET_BUCKET_HT: { + vsetBucket *new_bucket = removeFromBucket_HASHTABLE(getExpiry, bucket, entry, 0, &removed); + if (new_bucket != bucket) + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(*set, key, key_len, bucket, NULL); + raxSetData(node, new_bucket); + + if (pbucket) *pbucket = new_bucket; + break; + } + default: + panic("Unknown bucket type for removeEntryFromRaxBucket"); + return false; + } + return removed; +} + +static inline bool shrinkRaxBucketIfPossible(vsetBucket **target, vsetGetExpiryFunc getExpiry) { + rax *expiry_buckets = vsetBucketRax(*target); + if (raxSize(expiry_buckets) == 1) { + raxIterator it; + raxStart(&it, expiry_buckets); + assert(raxSeek(&it, "^", NULL, 0)); + assert(raxNext(&it)); + vsetBucket *bucket = it.data; + int bucket_type = vsetBucketType(bucket); + raxStop(&it); + /* We will not convert hashtable to our only bucket since we will lose the ability to scan the items in a sorted way. + * We will also not shrink when we have a full vector, since it might immediately be repopulated. */ + if (bucket_type == VSET_BUCKET_SINGLE || + (bucket_type == VSET_BUCKET_VECTOR && pvLen(vsetBucketVector(bucket)) < VOLATILESET_VECTOR_BUCKET_MAX_SIZE)) { + if (bucket_type == VSET_BUCKET_VECTOR) { + pVector *pv = vsetBucketVector(bucket); + /* first lets sort the vector. we cannot set the target bucket as unsorted vector bucket */ + vsetSetExpiryGetter(getExpiry); + pvSort(pv, vsetCompareEntries); + vsetUnsetExpiryGetter(); + } + /* lets make our bucket to be the only left bucket */ + *target = bucket; + raxFree(expiry_buckets); + return true; + } + } + return false; +} + +static inline vsetBucket *removeFromBucket_RAX(vsetGetExpiryFunc getExpiry, vsetBucket *target, void *entry, long long expiry, bool *removed) { + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + long long bucket_ts; + size_t key_len; + raxNode *node; + rax *expiry_buckets = vsetBucketRax(target); + vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); + assert(bucket != VSET_NONE_BUCKET_PTR); + bool success = removeEntryFromRaxBucket(target, getExpiry, entry, bucket, key, key_len, NULL, node); + if (removed) *removed = success; + // shrink to single bucket if possible + shrinkRaxBucketIfPossible(&target, getExpiry); + return target; +} + +static inline size_t vsetBucketRemoveExpired_NONE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + UNUSED(bucket); + UNUSED(getExpiry); + UNUSED(expiryFunc); + UNUSED(now); + UNUSED(max_count); + UNUSED(ctx); + return 0; +} + +static inline size_t vsetBucketRemoveExpired_SINGLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + void *entry = vsetBucketSingle(*bucket); + if (max_count && getExpiry(entry) <= now) { + freeVsetBucket(*bucket); + *bucket = vsetBucketFromNone(); + if (expiryFunc) expiryFunc(entry, ctx); + return 1; + } + return 0; +} + +static inline size_t vsetBucketRemoveExpired_VECTOR(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + pVector *pv = vsetBucketVector(*bucket); + uint32_t len = min(pvLen(pv), max_count); + uint32_t i = 0; + for (; i < len; i++) { + void *entry = pvGet(pv, i); + /* break as soon as the expiryFunc stops us OR we reached an entry which is not expired */ + if (getExpiry(entry) > now) + break; + if (expiryFunc) expiryFunc(entry, ctx); + } + pVector *new_pv = pvSplit(&pv, i); + *bucket = (new_pv ? vsetBucketFromVector(new_pv) : vsetBucketFromNone()); + pvFree(pv); + return i; +} + +static inline size_t vsetBucketRemoveExpired_HASHTABLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + UNUSED(getExpiry); + UNUSED(now); + hashtable *ht = vsetBucketHashtable(*bucket); + hashtableIterator it; + void *entry; + size_t count = 0; + hashtableInitIterator(&it, ht, HASHTABLE_ITER_SAFE); + while (count < max_count && hashtableNext(&it, &entry)) { + assert(hashtableDelete(ht, entry)); + expiryFunc(entry, ctx); + count++; + } + hashtableResetIterator(&it); + + /* in case we completed scanning the hashtable or a single element is left, we can convert the hashtable. */ + size_t ht_size = hashtableSize(ht); + if (ht_size == 0) { + hashtableRelease(ht); + *bucket = vsetBucketFromNone(); + } else if (ht_size == 1) { + assert(entry); + *bucket = vsetBucketFromSingle(entry); + } + return count; +} + +static inline size_t vsetBucketRemoveExpired_RAX(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + UNUSED(getExpiry); + rax *buckets = vsetBucketRax(*bucket); + size_t count = 0; + while (count < max_count && raxSize(buckets) > 0) { + raxIterator it; + raxStart(&it, buckets); + raxSeek(&it, "^", NULL, 0); + assert(raxNext(&it)); + /* lets start again by going into the first bucket. */ + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + vsetBucket *time_bucket = it.data; + int time_bucket_type = vsetBucketType(time_bucket); + long long time_bucket_ts = decodeExpiryKey(it.key); + memcpy(key, it.key, it.key_len); + size_t key_len = it.key_len; + raxNode *node = it.node; + raxStop(&it); + if (time_bucket_ts > now) + break; + switch (time_bucket_type) { + case VSET_BUCKET_SINGLE: + count += vsetBucketRemoveExpired_SINGLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + break; + case VSET_BUCKET_VECTOR: + count += vsetBucketRemoveExpired_VECTOR(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + break; + case VSET_BUCKET_HT: + count += vsetBucketRemoveExpired_HASHTABLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + break; + default: + panic("Cannot expire entries from bucket which is not single, vector or hashtable"); + } + if (time_bucket == VSET_NONE_BUCKET_PTR) { + /* in case the bucket is freed, we can just remove it and continue to the next bucket. */ + raxRemove(buckets, key, key_len, NULL); + } else { + /* in case the bucket still exists, it must be since we reached the max_count or stopped due to expiry function. + * So we save the new bucket to the rax and bail. */ + raxSetData(node, time_bucket); + break; + } + } + /* if all buckets are removed, */ + if (raxSize(buckets) == 0) { + raxFree(buckets); + *bucket = vsetBucketFromNone(); + } else { + shrinkRaxBucketIfPossible(bucket, getExpiry); + } + return count; +} + +static int vsetBucketNext_NONE(vsetInternalIterator *it, void **entryptr) { + UNUSED(it); + UNUSED(entryptr); + return 0; +} + +static inline int vsetBucketNext_SINGLE(vsetInternalIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + if (init_bucket_scan) { + it->iteration_state = VSET_BUCKET_SINGLE; + it->entry = vsetBucketSingle(it->bucket); + if (entryptr) *entryptr = it->entry; + return 1; + } + return 0; +} + +static inline int vsetBucketNext_VECTOR(vsetInternalIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + pVector *pv = vsetBucketVector(it->bucket); + if (init_bucket_scan) { + it->iteration_state = VSET_BUCKET_VECTOR; + it->viter = 0; + } else { + it->viter++; + } + if (it->viter < pvLen(pv)) { + it->entry = pvGet(pv, it->viter); + } else { + return 0; + } + if (entryptr) *entryptr = it->entry; + return 1; +} + +static inline int vsetBucketNext_HASHTABLE(vsetInternalIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + hashtable *ht = vsetBucketHashtable(it->bucket); + if (init_bucket_scan) { + it->iteration_state = VSET_BUCKET_HT; + hashtableInitIterator(&it->hiter, ht, 0); + } + if (!hashtableNext(&it->hiter, &it->entry)) { + hashtableResetIterator(&it->hiter); + return 0; + } + if (entryptr) *entryptr = it->entry; + return 1; +} + +static inline int vsetBucketNext_RAX(vsetInternalIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + if (init_bucket_scan) { + /* set myself as the parent bucket */ + it->parent_bucket = it->bucket; + raxStart(&it->riter, vsetBucketRax(it->bucket)); + raxSeek(&it->riter, "^", NULL, 0); + } + if (raxNext(&it->riter)) { + /* lets start again by going into the first bucket. */ + it->iteration_state = vsetBucketType(it->riter.data); + it->bucket_ts = decodeExpiryKey(it->riter.key); + it->bucket = it->riter.data; + it->iteration_state = VSET_BUCKET_NONE; + return vsetNext(opaqueFromIterator(it), entryptr); + } else { + /* We currently do not support nested RAX buckets */ + it->parent_bucket = vsetBucketFromNone(); + return 0; + } + return 1; +} + +static inline size_t vsetBucketMemUsage_NONE(vsetBucket *bucket) { + UNUSED(bucket); + return 0; +} + +static inline size_t vsetBucketMemUsage_SINGLE(vsetBucket *bucket) { + UNUSED(bucket); + return 0; +} + +static inline size_t vsetBucketMemUsage_VECTOR(vsetBucket *bucket) { + pVector *pv = vsetBucketVector(bucket); + assert(pv); + return pv->alloc; +} + +static inline size_t vsetBucketMemUsage_HASHTABLE(vsetBucket *bucket) { + hashtable *ht = vsetBucketHashtable(bucket); + return hashtableMemUsage(ht); +} + +static inline size_t vsetBucketMemUsage_RAX(vsetBucket *bucket) { + rax *r = vsetBucketRax(bucket); + size_t total_mem = raxAllocSize(r); + raxIterator it; + raxStart(&it, r); + assert(raxSeek(&it, "^", NULL, 0)); + while (raxNext(&it)) { + switch (vsetBucketType(it.data)) { + case VSET_BUCKET_NONE: + total_mem += vsetBucketMemUsage_NONE(it.data); + break; + case VSET_BUCKET_SINGLE: + total_mem += vsetBucketMemUsage_SINGLE(it.data); + break; + case VSET_BUCKET_VECTOR: + total_mem += vsetBucketMemUsage_VECTOR(it.data); + break; + case VSET_BUCKET_HT: + total_mem += vsetBucketMemUsage_HASHTABLE(it.data); + break; + default: + panic("Unknown bucket type encountered in vsetBucketMemUsage_HASHTABLE"); + } + } + raxStop(&it); + return total_mem; +} + +/* Adds an entry to a volatile set (vset) based on its expiration time. + * + * The volatile set maintains buckets of entries grouped by time windows. Each + * entry is inserted into an appropriate bucket based on its expiry timestamp. + * Buckets are memory-efficient and use dynamic representations that evolve as + * the number of entries grows: + * + * - VSET_BUCKET_NONE: + * Indicates the set is empty. A new SINGLE bucket is created to hold the entry. + * + * - VSET_BUCKET_SINGLE: + * Holds a single entry directly. Upon inserting a second entry, the bucket + * is promoted to a VECTOR, preserving the sorted order. + * + * - VSET_BUCKET_VECTOR: + * Stores entries in a compact, sorted vector. The maximum size is 127 entries. + * If inserting a new entry exceeds the limit: + * - If all entries share the same bucket timestamp (same high-resolution time window), + * the entire vector is moved into a RAX bucket as a single node. + * - Otherwise, each vector entry is redistributed into the new RAX structure. + * + * - VSET_BUCKET_RAX: + * A radix tree (RAX) used for scalable management of multiple time-based buckets. + * Entries are inserted by computing their bucket key based on their expiration timestamp. + * + * The function uses the entry’s expiration time (provided via the getExpiry function) + * to determine the correct bucket. It promotes bucket types as needed to maintain + * sorted and efficient storage. + * + * In all cases, if the insertion causes a structural change (e.g., bucket promotion), + * the pointer to the root of the bucket tree is updated via the `set` pointer. + * + * This function always returns true, as insertion is guaranteed to succeed + * (barring internal memory allocation failure, which is outside its concern). + * + * Notes: + * - Buckets are upgraded in-place based on size and time span distribution. + * - Vector buckets allow binary search insertion to maintain order. + * - Tagged pointers are used to determine bucket types efficiently. + * - It is assumed that all entries have odd-valued pointers (LSB set). + * - Key encoding in RAX is based on the maximum expiration timestamp + * that falls within a fixed window granularity. + * + * Example: + * vset *myset = NULL; + * vsetAddEntry(&myset, extract_expiry, my_object); + * + * // Internally, my_object is placed into the appropriate bucket. */ +bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { + long long expiry = getExpiry(entry); + vsetBucket *expiry_buckets = *set; + assert(expiry_buckets); + int bucket_type = vsetBucketType(expiry_buckets); + switch (bucket_type) { + case VSET_BUCKET_NONE: + expiry_buckets = insertToBucket_NONE(getExpiry, expiry_buckets, entry, expiry); + break; + case VSET_BUCKET_SINGLE: + expiry_buckets = insertToBucket_SINGLE(getExpiry, expiry_buckets, entry, expiry); + break; + case VSET_BUCKET_VECTOR: { + pVector *vec = vsetBucketVector(expiry_buckets); + uint32_t len = pvLen(vec); + /* in case the vector is full, we need to turn into RAX */ + if (len == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { + rax *r = raxNew(); + long long min_expiry = getExpiry(pvGet(vec, 0)); + long long max_expiry = getExpiry(pvGet(vec, len - 1)); + if (get_max_bucket_ts(min_expiry) == get_max_bucket_ts(max_expiry)) { + /* In case we can just insert the bucket, no need to iterate and insert it's elements. we can just push the bucket as a whole. */ + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + size_t key_len = encodeNewExpiryBucketKey(key, max_expiry); + raxInsert(r, key, key_len, expiry_buckets, NULL); + expiry_buckets = vsetBucketFromRax(r); + expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, entry, expiry); + } else { + /* We need to migrate entries to the new set of buckets since we do not know all entries are in the same bucket */ + expiry_buckets = vsetBucketFromRax(r); + for (uint32_t i = 0; i < len; i++) { + void *moved_entry = pvGet(vec, i); + expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, moved_entry, getExpiry(moved_entry)); + } + /* free the vector */ + pvFree(vec); + /* now insert the new entry to the buckets */ + expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, entry, expiry); + } + } else { + uint32_t pos = findInsertPosition(getExpiry, expiry_buckets, expiry); + expiry_buckets = insertToBucket_VECTOR(getExpiry, expiry_buckets, entry, expiry, pos); + } + break; + } + case VSET_BUCKET_RAX: + expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, entry, expiry); + break; + default: + panic("Cannot insert to bucket which is not single, vector or rax"); + } + /* update the set */ + *set = expiry_buckets; + return true; +} + +static inline bool vsetRemoveEntryWithExpiry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry) { + bool removed; + vsetBucket *bucket = *set; + assert(bucket); + int bucket_type = vsetBucketType(bucket); + switch (bucket_type) { + case VSET_BUCKET_NONE: + /* We cannot remove from empty set */ + return false; + case VSET_BUCKET_SINGLE: + bucket = removeFromBucket_SINGLE(getExpiry, bucket, entry, expiry, &removed); + break; + case VSET_BUCKET_VECTOR: + bucket = removeFromBucket_VECTOR(getExpiry, bucket, entry, expiry, &removed, false); + break; + case VSET_BUCKET_HT: + bucket = removeFromBucket_HASHTABLE(getExpiry, bucket, entry, expiry, &removed); + break; + case VSET_BUCKET_RAX: + bucket = removeFromBucket_RAX(getExpiry, bucket, entry, expiry, &removed); + break; + default: + panic("Cannot remove from bucket which is not single, vector, hashtable or rax"); + } + *set = bucket; + return removed; +} + +/* Removes an entry from the volatile set (vset), based on its expiration time. + * + * The volatile set organizes entries into time-based buckets of varying types: + * SINGLE, VECTOR, or RAX. The bucket type determines how entries are stored + * and managed internally. This function will locate and remove the entry + * from its appropriate bucket. + * + * The removal process works as follows: + * + * 1. The expiration timestamp of the entry is used to compute which bucket + * (based on its end time) the entry should reside in. + * + * 2. Depending on the current top-level bucket type of the vset, the function + * dispatches to the appropriate removal handler: + * + * - VSET_BUCKET_SINGLE: + * If the stored entry matches, the bucket is set to NONE. + * + * - VSET_BUCKET_VECTOR: + * Performs a binary search to find and remove the entry from the vector. + * If the resulting vector size drops to 1, it is converted to a SINGLE bucket. + * If the vector becomes empty, it is removed entirely (set to NONE). + * + * - VSET_BUCKET_RAX: + * The function decodes the appropriate bucket key (based on the expiration + * time), looks up the RAX node, and dispatches removal to the sub-bucket. + * If a sub-bucket becomes empty or has only one entry left, its bucket + * type may be downgraded (e.g., to SINGLE or removed). + * + * 3. If the removal results in a structural change (e.g., shrinking a bucket), + * the bucket type may be changed, and the root pointer is updated accordingly. + * + * 4. If the entry is not found in the expected bucket, no action is taken. + * + * Notes: + * - Buckets self-adjust during removal for memory efficiency. + * - The vector bucket keeps entries sorted for fast search/removal. + * - RAX-based sets support a large number of buckets and scale well + * with many time windows. + * - Entries are assumed to have pointer identity (odd-valued pointers). + * - Correct expiration timestamp must be provided for accurate removal. + * + * Return value: + * Returns true if the entry was found and removed successfully. + * Returns false if the entry was not found. + * + * Example usage: + * vsetRemoveEntry(myset, extract_expiry, my_object); + * + * // my_object is removed from the appropriate bucket in myset BUT is not freed. */ +bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { + return vsetRemoveEntryWithExpiry(set, getExpiry, entry, getExpiry(entry)); +} + +static inline vsetBucket *vsetBucketUpdateEntry_SINGLE(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + UNUSED(getExpiry); + UNUSED(old_expiry); + UNUSED(new_expiry); + + if (vsetBucketSingle(bucket) == old_entry) { + return vsetBucketFromSingle(new_entry); + } + return vsetBucketFromNone(); +} + +static inline vsetBucket *vsetBucketUpdateEntry_VECTOR(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + UNUSED(getExpiry); + UNUSED(old_expiry); + UNUSED(new_expiry); + + pVector *pv = vsetBucketVector(bucket); + uint32_t idx = pvFind(pv, old_entry); + /* in case we did not locate the entry, just return NONE bucket */ + if (idx == pvLen(pv)) + return vsetBucketFromNone(); + pvSet(pv, idx, new_entry); + return bucket; +} + +static inline vsetBucket *vsetBucketUpdateEntry_HASHTABLE(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + UNUSED(getExpiry); + UNUSED(old_expiry); + UNUSED(new_expiry); + + /* In this case no need to change anything. */ + if (old_entry == new_entry) + return bucket; + + hashtablePosition pos; + hashtable *ht = vsetBucketHashtable(bucket); + /* We do a two stage pop in order to avoid rehashing. */ + void **ref = hashtableTwoPhasePopFindRef(ht, old_entry, &pos); + if (!ref) { + /* In case no entry found, the rehashing did not pause, so it is safe to return. */ + return vsetBucketFromNone(); + } else { + /* We know for sure the two entries are not the same, so it is safe to add the new and remove the old */ + assert(hashtableAdd(ht, new_entry)); + hashtableTwoPhasePopDelete(ht, &pos); + } + return bucket; +} + +static inline vsetBucket *vsetBucketUpdateEntry_RAX(vsetBucket *target, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + size_t key_len; + long long bucket_ts; + rax *expiry_buckets = vsetBucketRax(target); + raxNode *node; + /* In case new and old are to be updated in the same bucket - just update the bucket. */ + bool update_bucket = (get_bucket_ts(old_expiry) == get_bucket_ts(new_expiry)); + vsetBucket *bucket = findBucket(expiry_buckets, old_expiry, key, &key_len, &bucket_ts, &node); + + if (!update_bucket) { + /* if the old and new entries are in different buckets, remove the old entry and add the new one. */ + if (removeEntryFromRaxBucket(target, getExpiry, old_entry, bucket, key, key_len, NULL, node)) + target = insertToBucket_RAX(getExpiry, target, new_entry, new_expiry); + else + return vsetBucketFromNone(); + } else { + /* Just update the current bucket */ + switch (vsetBucketType(bucket)) { + case VSET_BUCKET_NONE: + /* No bucket means there is no such old entry. return NONE */ + return vsetBucketFromNone(); + case VSET_BUCKET_SINGLE: + bucket = vsetBucketUpdateEntry_SINGLE(bucket, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; + case VSET_BUCKET_VECTOR: + bucket = vsetBucketUpdateEntry_VECTOR(bucket, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; + case VSET_BUCKET_HT: + bucket = vsetBucketUpdateEntry_HASHTABLE(bucket, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; + default: + panic("Unknown bucket type to update entry"); + } + if (bucket) + raxSetData(node, bucket); + else + return vsetBucketFromNone(); + } + return target; +} + +/** + * Updates an existing entry in the volatile set (vset), optionally replacing it + * with a new entry and expiration time. + * + * This function provides a unified interface for removing an old entry and + * adding a new one. It supports three main cases: + * + * 1. Entry identity or expiry time didn't change: + * If the `old_entry` and `new_entry` are the same, and their expiration + * timestamps are also equal, the function returns early with no action taken. + * + * 2. Removal of the old entry: + * If `old_entry` is provided (i.e., not NULL) and its old expiration time + * is valid (`old_expiry != -1`), the function will remove it from the set. + * + * Note: Since the object might already be deallocated (or changed), the + * expiration time is passed explicitly as an argument, rather than + * relying on `getExpiry(old_entry)` which might not be safe to call. + * + * 3. Insertion of the new entry: + * If `new_entry` is provided (i.e., not NULL) and its new expiration time + * is valid (`new_expiry != -1`), the function will insert it into the set. + * + * The function assumes both `vsetRemoveEntryWithExpiry()` and + * `vsetAddEntry()` succeed. It uses assertions to enforce this at runtime, + * assuming this function is used in trusted code paths. + * + * Notes: + * - The update is not atomic. If the removal fails (assertion fails), + * insertion of the new entry does not occur. + * - If the new entry is the same as the old one, but the expiry changed, + * the entry is effectively reinserted in the correct bucket. + * - This is useful for renewal or replacement logic where entries may + * need to change time buckets due to updated TTLs or key mutation. + * + * Return value: + * Always returns true on success. + * In case of assertion failures, the program will abort. + * + * Example usage: + * vsetUpdateEntry(myset, getExpiry, old_ptr, new_ptr, old_ts, new_ts); + */ +bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + assert(*set); + /* Nothing to do */ + if (old_entry == new_entry && old_expiry == new_expiry) + return true; + vsetBucket *updated = vsetBucketFromNone(); + /* case 1 - both entries were tracked. update the bucket */ + if (old_entry && old_expiry != -1 && new_entry && new_expiry != -1) { + switch (vsetBucketType(*set)) { + case VSET_BUCKET_NONE: + return false; + case VSET_BUCKET_SINGLE: + updated = vsetBucketUpdateEntry_SINGLE(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; + case VSET_BUCKET_VECTOR: + if (old_expiry != new_expiry) { + /* NOTE! - in this specific case we might have changed the vector order - need to sort it again (NLogN) */ + /* or remove it from the vector and re-add it (N+LogN). the later also looks cleaner... */ + if (!vsetRemoveEntryWithExpiry(set, getExpiry, old_entry, old_expiry)) + return false; + return vsetAddEntry(set, getExpiry, new_entry); + } + /* We are just updating the entry ref, so sorting is not impacted */ + updated = vsetBucketUpdateEntry_VECTOR(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; + + case VSET_BUCKET_RAX: + updated = vsetBucketUpdateEntry_RAX(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + } + if (updated == VSET_NONE_BUCKET_PTR) + return false; + *set = updated; + return true; + } + /* case 2 - old entry was not tracked. just add the new entry */ + else if ((!old_entry || old_expiry == -1) && new_entry && new_expiry != -1) + return vsetAddEntry(set, getExpiry, new_entry); + /* case 3 - old entry was tracked. new entry is not. just remove the old entry */ + else if ((!new_entry || new_expiry == -1) && old_entry && old_expiry != -1) + /* We cannot take the expiration time from the removed entry, since it might not be allocated anymore. + * For this reason we ask the API user to provide us the removed entry expiration time. */ + return vsetRemoveEntryWithExpiry(set, getExpiry, old_entry, old_expiry); + else + return false; + + return false; +} + +/* vsetPopExpired - Remove expired entries from a volatile set up to a maximum count. + * + * Parameters: + * set: Pointer to the volatile set (vset *) to operate on. + * getExpiry: Function to retrieve the expiration time from an entry. + * expiryFunc: Function to call on each expired entry (e.g., to free or notify). + * now: Current time in milliseconds used to compare against expiry times. + * max_count: Maximum number of expired entries to remove. + * ctx: Opaque context pointer passed through to the expiryFunc callback. + * + * This function delegates expiration popping to a type-specific handler based on the + * internal bucket type of the set. It supports various bucket encodings: + * - NONE + * - SINGLE + * - VECTOR + * - RAX (radix tree) + * - HT (hashtable) + * + * Returns the number of expired entries successfully removed (and passed to expiryFunc). + * + * Panics if the bucket type is unknown or unsupported. + * + * Return: + * Number of expired entries removed (size_t). */ +size_t vsetRemoveExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + vsetBucket *bucket = *set; + int bucket_type = vsetBucketType(bucket); + switch (bucket_type) { + case VSET_BUCKET_NONE: + return vsetBucketRemoveExpired_NONE(set, getExpiry, expiryFunc, now, max_count, ctx); + break; + case VSET_BUCKET_RAX: + return vsetBucketRemoveExpired_RAX(set, getExpiry, expiryFunc, now, max_count, ctx); + break; + case VSET_BUCKET_SINGLE: + return vsetBucketRemoveExpired_SINGLE(set, getExpiry, expiryFunc, now, max_count, ctx); + break; + case VSET_BUCKET_VECTOR: + return vsetBucketRemoveExpired_VECTOR(set, getExpiry, expiryFunc, now, max_count, ctx); + break; + case VSET_BUCKET_HT: + return vsetBucketRemoveExpired_HASHTABLE(set, getExpiry, expiryFunc, now, max_count, ctx); + break; + default: + panic("Unknown volatile set bucket type in vsetPopExpired"); + } + return 0; +} + +/* vsetEstimatedEarliestExpiry - Estimate the earliest expiration time in a volatile set. + * + * Parameters: + * set: Pointer to the volatile set (vset *) to inspect. + * getExpiry: Callback function used to extract the expiration time from a set entry. + * + * Returns the earliest expiration time based on the structure of the volatile set. + * This is an *approximate* value: + * - For bucketed types (e.g., radix tree, vector), it returns the expiry of the first bucket or entry, + * which may not be the actual earliest expiring item. + * - For single-entry sets, it returns the expiry of the sole item. + * - For VSET_BUCKET_NONE, it returns -1 to indicate there is no data. + * + * Supported bucket types: + * - VSET_BUCKET_SINGLE + * - VSET_BUCKET_VECTOR + * - VSET_BUCKET_RAX + * + * Panics if called with an unsupported bucket type. + * + * Return: + * Estimated earliest expiry time in milliseconds, or -1 if the set is empty. */ +long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry) { + int set_type = vsetBucketType(*set); + void *entry = NULL; + long long expiry; + switch (set_type) { + case VSET_BUCKET_NONE: + return -1; + break; + case VSET_BUCKET_RAX: { + rax *r = vsetBucketRax(set); + raxIterator it; + raxStart(&it, r); + expiry = decodeExpiryKey(it.key); + raxStop(&it); + break; + } + case VSET_BUCKET_SINGLE: { + entry = vsetBucketSingle(*set); + expiry = getExpiry(entry); + break; + } + case VSET_BUCKET_VECTOR: { + entry = pvGet(vsetBucketVector(*set), 0); + expiry = getExpiry(entry); + break; + } + default: + panic("Unsupported vset encoding type. Only supported types are single, vector or rax"); + } + return expiry; +} + +/* Advances the volatile set iterator to the next entry. + * + * This function handles iteration over various bucket types in the set. It attempts + * to return the next valid entry, updating the iterator state accordingly. + * + * If the current bucket is exhausted, the iterator automatically switches back to + * the parent bucket (typically used when iterating nested structures, such as RAX buckets). + * + * Parameters: + * - it: Pointer to an initialized vsetInternalIterator. + * - entryptr: Output pointer to receive the next entry. + * + * Returns: + * - true if a next entry is found. + * - false if iteration is complete. */ +bool vsetNext(vsetIterator *iter, void **entryptr) { + vsetInternalIterator *it = iteratorFromOpaque(iter); + vsetBucket *bucket = it->bucket; + int bucket_type = vsetBucketType(bucket); + int ret = 0; + switch (bucket_type) { + case VSET_BUCKET_NONE: + return vsetBucketNext_NONE(it, entryptr); + break; + case VSET_BUCKET_RAX: + return vsetBucketNext_RAX(it, entryptr); + break; + case VSET_BUCKET_SINGLE: + ret = vsetBucketNext_SINGLE(it, entryptr); + break; + case VSET_BUCKET_VECTOR: + ret = vsetBucketNext_VECTOR(it, entryptr); + break; + case VSET_BUCKET_HT: + ret = vsetBucketNext_HASHTABLE(it, entryptr); + break; + default: + panic("Unknown volatile set bucket type in vsetNext"); + } + if (ret == 0) { + /* continue iterating the parent bucket */ + it->iteration_state = vsetBucketType(it->parent_bucket); + it->bucket = it->parent_bucket; + return vsetNext(opaqueFromIterator(it), entryptr); + } + return ret == 1; +} + +size_t vsetMemUsage(vset *set) { + int bucket_type = vsetBucketType(*set); + switch (bucket_type) { + case VSET_BUCKET_NONE: + return vsetBucketMemUsage_NONE(*set); + case VSET_BUCKET_SINGLE: + return vsetBucketMemUsage_SINGLE(*set); + case VSET_BUCKET_VECTOR: + return vsetBucketMemUsage_VECTOR(*set); + case VSET_BUCKET_HT: + panic("Unsupported hashtable bucket type for vset"); + case VSET_BUCKET_RAX: + return vsetBucketMemUsage_RAX(*set); + default: + panic("Unknown set type encountered in vsetMemUsage"); + } + return 0; +} + +/* Initializes a volatile set iterator. + * + * This function prepares the iterator for scanning a volatile set from the beginning. + * It sets the internal state, pointing to the main set bucket, and uses VSET_BUCKET_NONE + * as an initial placeholder to transition correctly into the actual bucket logic. + * + * Parameters: + * - set: Pointer to the volatile set to iterate. + * - it: Pointer to a vsetInternalIterator structure to initialize. */ +void vsetInitIterator(vset *set, vsetIterator *iter) { + vsetInternalIterator *it = iteratorFromOpaque(iter); + it->iteration_state = VSET_BUCKET_NONE; /*lets start by going to the first bucket. */ + it->bucket = *set; + it->bucket_ts = -1; + it->parent_bucket = vsetBucketFromNone(); +} + +/* Finalizes and cleans up an active volatile set iterator. + * + * Some internal iterators (e.g., RAX, hashtable) allocate temporary state. + * This function ensures proper cleanup of those structures when the iteration is done. + * + * Parameters: + * - it: Pointer to the vsetInternalIterator that was previously initialized with vsetInitIterator(). */ +void vsetResetIterator(vsetIterator *iter) { + vsetInternalIterator *it = iteratorFromOpaque(iter); + int bucket_type = vsetBucketType(it->bucket); + int parent_bucket_type = vsetBucketType(it->parent_bucket); + if (parent_bucket_type == VSET_BUCKET_RAX) + raxStop(&it->riter); + if (bucket_type == VSET_BUCKET_HT) + hashtableResetIterator(&it->hiter); +} + +/* Initializes an empty volatile set. + * + * The function sets the set to its initial state by assigning a "NONE" bucket. + * This is the starting point for all volatile sets before entries are inserted. + * + * Parameters: + * - set: Pointer to the volatile set to initialize. */ +void vsetInit(vset *set) { + *set = vsetBucketFromNone(); +} + +/* Clears the volatile set, freeing all memory used for internal buckets. + * + * This function deallocates all internal data structures used by the set (buckets, vectors, + * hash tables, etc.). It does NOT free the entries themselves, since the set only holds + * references. + * + * After this call, the set is reset to an empty state. + * + * Parameters: + * - set: Pointer to the volatile set to clear. */ +void vsetClear(vset *set) { + if (*set == VSET_NONE_BUCKET_PTR) return; + freeVsetBucket(*set); + *set = vsetBucketFromNone(); +} + +/* Same as calling vsetClear, but also de-initialize the set. + * After this call you will have to call vsetInit again in order to continue using the set. */ +void vsetRelease(vset *set) { + vsetClear(set); + *set = NULL; +} + +/* Return true in case this set is an initialized set and false otherwise. */ +bool vsetIsValid(vset *set) { + if (set && *set) { + switch (vsetBucketType(*set)) { + case VSET_BUCKET_NONE: + case VSET_BUCKET_SINGLE: + case VSET_BUCKET_VECTOR: + case VSET_BUCKET_HT: + case VSET_BUCKET_RAX: + return true; + } + } + return false; +} + +/* Checks whether a volatile set is empty. + * + * This function simply checks if the set's current bucket type is VSET_BUCKET_NONE. + * + * Parameters: + * - set: Pointer to the volatile set. + * + * Returns: + * - true if the set contains no entries. + * - false otherwise. */ +bool vsetIsEmpty(vset *set) { + assert(*set); + return vsetBucketType(*set) == VSET_BUCKET_NONE; +} + +/**************** Defrag Logic *********************/ +static struct vsetDefragState { + long long bucket_ts; + size_t bucket_cursor; +} defragState; + +static size_t vsetBucketDefrag_VECTOR(vsetBucket **bucket, size_t cursor, void *(*defragfn)(void *)) { + UNUSED(cursor); + pVector *pv = vsetBucketVector(*bucket); + pv = defragfn(pv); + if (pv) + *bucket = vsetBucketFromVector(pv); + return 0; +} + +static size_t vsetBucketDefrag_HASHTABLE(vsetBucket **bucket, size_t cursor, void *(*defragfn)(void *)) { + hashtable *ht = vsetBucketHashtable(*bucket); + if (cursor == 0) { + /* First time we enter this hashtable, defrag the tables first. */ + hashtable *new_ht = hashtableDefragTables(ht, defragfn); + if (new_ht) { + ht = new_ht; + *bucket = vsetBucketFromHashtable(ht); + } + } + return hashtableScanDefrag(ht, cursor, NULL, NULL, defragfn, 0); +} + +static size_t vsetBucketDefrag_RAX(vsetBucket **bucket, size_t cursor, void *(*defragfn)(void *), int (*defragRaxNode)(raxNode **)) { + struct vsetDefragState *state = (struct vsetDefragState *)cursor; + size_t bucket_cursor = 0; + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + size_t key_len; + long long bucket_ts; + rax *r = vsetBucketRax(*bucket); + raxIterator ri; + + /* init the state if this is the first time we enter the bucket */ + if (!state) { + state = &defragState; + state->bucket_ts = -1; + state->bucket_cursor = 0; + if ((r = defragfn(r))) *bucket = vsetBucketFromRax(r); + r = vsetBucketRax(*bucket); + } + raxStart(&ri, r); + ri.node_cb = defragRaxNode; + if (state->bucket_ts < 0) { + /* No prev timestamp, meaning we are starting a new RAX bucket scan */ + assert(raxSeek(&ri, "^", NULL, 0)); + assert(raxNext(&ri)); /* there MUST be at least one bucket! */ + bucket_ts = decodeExpiryKey(ri.key); + } else { + /* we are continuing a RAX bucket scan. lets try and locate the last scanned bucket. + * If not found we can search for the next one. */ + key_len = encodeExpiryKey(state->bucket_ts, key); + if (state->bucket_cursor) { + /* We were in the middle of scanning a bucket. lets try and continue there. + * It is possible that this bucket was deleted. if so we will get to a new bucket + * which is also fine. */ + assert(raxSeek(&ri, ">=", key, key_len)); + } else { + /* in case we completed the last bucket, lets progress to a later bucket */ + assert(raxSeek(&ri, ">", key, key_len)); + } + /* in case we reached the end of the RAX, we are done. */ + if (!raxNext(&ri)) { + return 0; + } + bucket_ts = decodeExpiryKey(ri.key); + if (state->bucket_ts != bucket_ts) { + /* if this is a new bucket, lets start from the beginning */ + bucket_cursor = 0; + } else { + bucket_cursor = state->bucket_cursor; + } + } + raxStop(&ri); + vsetBucket *time_bucket = ri.data; + switch (vsetBucketType(time_bucket)) { + case VSET_BUCKET_NONE: + case VSET_BUCKET_SINGLE: + bucket_cursor = 0; + break; + case VSET_BUCKET_VECTOR: + bucket_cursor = vsetBucketDefrag_VECTOR(&time_bucket, bucket_cursor, defragfn); + if (time_bucket != ri.data) + raxSetData(ri.node, time_bucket); + break; + case VSET_BUCKET_HT: + bucket_cursor = vsetBucketDefrag_HASHTABLE(&time_bucket, bucket_cursor, defragfn); + if (time_bucket != ri.data) + raxSetData(ri.node, time_bucket); + break; + default: + panic("Unsupported vset bucket type for RAX bucket. Only supported types are single, vector or hashtable"); + } + /* if we reached here, we are not done. lets return the state and next time we can continue from this bucket. */ + state->bucket_ts = bucket_ts; + state->bucket_cursor = bucket_cursor; + return (size_t)state; +} + +size_t vsetScanDefrag(vset *set, size_t cursor, void *(*defragfn)(void *), int (*defragRaxNode)(raxNode **)) { + switch (vsetBucketType(*set)) { + case VSET_BUCKET_NONE: + case VSET_BUCKET_SINGLE: + /* nothing to do */ + return 0; + case VSET_BUCKET_VECTOR: + return vsetBucketDefrag_VECTOR(set, cursor, defragfn); + case VSET_BUCKET_RAX: + return vsetBucketDefrag_RAX(set, cursor, defragfn, defragRaxNode); + default: + panic("Unknown vset node type to defrag"); + } + return 0; +} diff --git a/src/vset.h b/src/vset.h new file mode 100644 index 0000000000..7349aa46ed --- /dev/null +++ b/src/vset.h @@ -0,0 +1,97 @@ +#ifndef VOLATILESET_H +#define VOLATILESET_H + +#include +#include + +#include "hashtable.h" +#include "rax.h" +#include "sds.h" +#include "monotonic.h" /* for mstime_t*/ + +/* + *----------------------------------------------------------------------------- + * Volatile Set - Adaptive, Expiry-aware Set Structure + *----------------------------------------------------------------------------- + * + * The `vset` is a dynamic, memory-efficient container for managing + * entries with expiry semantics. It is designed to efficiently track entries + * that expire at varying times and scales to large sets by adapting its internal + * representation as it grows or shrinks. + * + *----------------------------------------------------------------------------- + * Public API + *----------------------------------------------------------------------------- + * + * Create/Free: + * vsetInit(vset *set) - used in order to initialize a new vset. + * void vsetClear(vset *set) - used in order to empty all the data in a vset. + * void vsetRelease(vset *set) - just like vsetClear, but also release the set itself so it will become unusable. + * and will require a new call to vsetInit in order to continue using the set. + * Example: + * vset set; + * vsetInit(&set); + * // add some elements to the vset + * vsetClear(&set); + * // verify the set is empty: + * assert(vsetIsEmpty(&set)); + * + * Mutation: + * bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) - used in order to insert a new entry into the set. + * The API also make use of the provided getExpiry function in order to compare the 'entry' expiration time of the other existing + * entries in the set. + * + * bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) - used in order to remove and entry from the set. + * + * bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, + * void *new_entry, long long old_expiry, + * long long new_expiry) - is used in order to update an existing entry in the set. + * Note that the implementation assumes the 'old_entry' might not point to a valid memory location, thus it require that the 'old_expiry' + * is provided and matches the old entry expiration time. + * + * Expiry Retrieval/Removal: + * long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry) - will return an estimation to the lowest expiry time of + * the entries which currently exists in the set. Because of the semi-sorted ordering this implementation is using, the returned value MIGHT not be the 'real' minimum + * but rather some value which is the maximum among a group of entries which are all close or equal to the 'real' minimum. + * + * size_t vsetRemoveExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) - can be used + * in order to remove up to max_count entries from the vset. The removed entries will all satisfy the condition that their expiration time is smaller than the provided now. + * Note that there are no guarantees about the order to the entries. + * + * Utilities: + * bool vsetIsEmpty(vset *set) - used in order to check if a given set has any entries. + * + * Iteration: + * void vsetInitIterator(vset *set, vsetIterator *it) - used to initialize a new vset iterator. + * bool vsetNext(vsetIterator *it, void **entryptr) - used to iterate to the next element. Will return false if there are no more elements. + * void vsetResetIterator(vsetIterator *it) - used in order to reset the iterator at the end of the iteration. + * + * Note that the vset iterator is NOT safe, Meaning you should not change the set while iterating it. Adding entries and/or removing entries + * can result in unexpected behavior.! */ + +/* Return the absolute expiration time in milliseconds for the provided entry */ +typedef long long (*vsetGetExpiryFunc)(const void *entry); +/* Callback to be optionally provided to vsetPopExpired. when item is removed from the vset this callback will also be applied. */ +typedef int (*vsetExpiryFunc)(void *entry, void *ctx); +// vset is just a pointer to a bucket +typedef void *vset; + +typedef uint8_t vsetIterator[560]; + +bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); +bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); +bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); +bool vsetIsEmpty(vset *set); +void vsetInitIterator(vset *set, vsetIterator *it); +bool vsetNext(vsetIterator *it, void **entryptr); +void vsetResetIterator(vsetIterator *it); +void vsetInit(vset *set); +void vsetClear(vset *set); +void vsetRelease(vset *set); +bool vsetIsValid(vset *set); +long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry); +size_t vsetRemoveExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx); +size_t vsetMemUsage(vset *set); +size_t vsetScanDefrag(vset *set, size_t cursor, void *(*defragfn)(void *), int (*defragRaxNode)(raxNode **)); + +#endif diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl new file mode 100644 index 0000000000..c8989dace1 --- /dev/null +++ b/tests/unit/hashexpire.tcl @@ -0,0 +1,2639 @@ + +proc info_field {info field} { + foreach line [split $info "\n"] { + if {[string match "$field:*" $line]} { + return [string trim [lindex [split $line ":"] 1]] + } + } + return [s field_name] +} + +proc get_short_expire_value {command} { + expr { + ($command eq "HEXPIRE" || $command eq "EX") ? 1 : + ($command eq "HPEXPIRE" || $command eq "PX") ? 10 : + ($command eq "HEXPIREAT" || $command eq "EXAT") ? [clock seconds] + 1 : + [clock milliseconds] + 10 + } +} + +proc get_long_expire_value {command} { + expr { + ($command eq "HEXPIRE" || $command eq "EX") ? 60000000 : + ($command eq "HPEXPIRE" || $command eq "PX") ? 60000000 : + ($command eq "HEXPIREAT" || $command eq "EXAT") ? [clock seconds] + 60000000 : + [clock milliseconds] + 60000000 + } +} + +proc get_longer_then_long_expire_value {command} { + expr { + ($command eq "HEXPIRE" || $command eq "EX") ? 1200000000 : + ($command eq "HPEXPIRE" || $command eq "PX") ? 1200000000 : + ($command eq "HEXPIREAT" || $command eq "EXAT") ? [clock seconds] + 1200000000 : + [clock milliseconds] + 1200000000 + } +} + +proc get_past_zero_expire_value {command} { + expr { + ($command eq "HEXPIRE" || $command eq "EX") ? 0 : + ($command eq "HPEXPIRE" || $command eq "PX") ? 0 : + ($command eq "HEXPIREAT" || $command eq "EXAT") ? [clock seconds] - 200000 : + [clock milliseconds] - 200000 + } +} + +proc get_check_ttl_command {command} { + if {$command eq "EX"} { + return "HTTL" + } elseif {$command eq "PX"} { + return "HPTTL" + } elseif {$command eq "EXAT"} { + return "HEXPIRETIME" + } else { + return "HPEXPIRETIME" + } +} + +proc assert_keyevent_patterns {rd key args} { + foreach event_type $args { + set event [$rd read] + assert_match "pmessage __keyevent@* __keyevent@*:$event_type $key" $event + } +} + +proc setup_replication_test {primary replica primary_host primary_port} { + $primary FLUSHALL + $replica replicaof $primary_host $primary_port + wait_for_condition 50 100 { + [lindex [$replica role] 0] eq {slave} && + [string match {*master_link_status:up*} [$replica info replication]] + } else { + fail "Can't turn the instance into a replica" + } + set primary_initial_expired [info_field [$primary info stats] expired_subkeys] + set replica_initial_expired [info_field [$replica info stats] expired_subkeys] + return [list $primary_initial_expired $replica_initial_expired] +} + +proc setup_single_keyspace_notification {r} { + $r config set notify-keyspace-events KEA + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + return $rd +} + + +start_server {tags {"hashexpire"}} { + ####### Valid scenarios tests ####### + foreach command {EX PX EXAT PXAT} { + test "HGETEX $command expiry" { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + r HSET myhash f1 v1 + + set ttl_cmd [get_check_ttl_command $command] + set expire_time [get_long_expire_value $command] + + # Verify HGETEX command + assert_equal "v1" [r HGETEX myhash $command $expire_time FIELDS 1 f1] + set expire_result [r $ttl_cmd myhash FIELDS 1 f1] + + # Verify expiry + if {[regexp "AT$" $command]} { + assert_equal $expire_result $expire_time + } else { + assert_morethan $expire_result 0 + } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test "HGETEX $command with mix of existing and non-existing fields" { + r FLUSHALL + r HSET myhash f1 v1 f3 v3 + + # HGETEX on exist/non-exist fields + assert_equal "v1 {} v3" [r HGETEX myhash $command [get_long_expire_value $command] FIELDS 3 f1 f2 f3] + + # Verification checks (f2 should not be created) + assert_equal "" [r HGET myhash f2] + assert_equal -2 [r HTTL myhash FIELDS 1 f2] + assert_morethan [r HTTL myhash FIELDS 1 f1] 0 + assert_morethan [r HTTL myhash FIELDS 1 f3] 0 + } + + test "HGETEX $command on more then 1 field" { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + r HSET myhash f1 v1 f2 v2 + + set ttl_cmd [get_check_ttl_command $command] + set expire_time [get_long_expire_value $command] + + assert_equal "v1 v2" [r HGETEX myhash $command $expire_time FIELDS 2 f1 f2] + + # Verify expiration + if {[regexp "AT$" $command]} { + assert_equal $expire_time [r $ttl_cmd myhash FIELDS 1 f1] + assert_equal $expire_time [r $ttl_cmd myhash FIELDS 1 f2] + } else { + assert_morethan [r $ttl_cmd myhash FIELDS 1 f1] 0 + assert_morethan [r $ttl_cmd myhash FIELDS 1 f2] 0 + } + + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test "HGETEX $command -> PERSIST" { + r FLUSHALL + r HSET myhash f1 v1 + r HSETEX myhash EX 10000 FIELDS 1 f2 v2 + + set ttl_cmd [get_check_ttl_command $command] + set expire_time [get_long_expire_value $command] + + assert_equal "v1" [r HGETEX myhash $command $expire_time FIELDS 1 f1] + if {[regexp "AT$" $command]} { + assert_equal $expire_time [r $ttl_cmd myhash FIELDS 1 f1] + } else { + assert_morethan [r $ttl_cmd myhash FIELDS 1 f1] 0 + } + + assert_equal "v1" [r HGETEX myhash PERSIST FIELDS 1 f1] + assert_equal -1 [r HTTL myhash FIELDS 1 f1] + # Verify f2 still has ttl + assert_morethan [r HTTL myhash FIELDS 1 f2] 100 + } + + test "HGETEX $command on non-exist field" { + r FLUSHALL + r HSET myhash f1 v1 + assert_equal {{}} [r HGETEX myhash $command [get_short_expire_value $command] FIELDS 1 f2] + } + + test "HGETEX $command on non-exist key" { + r FLUSHALL + assert_equal "" [r HGETEX myhash $command [get_long_expire_value $command] FIELDS 1 f2] + } + + test "HGETEX $command with duplicate field names" { + r FLUSHALL + r HSET myhash f1 v1 + assert_equal "v1 v1" [r HGETEX myhash $command [get_long_expire_value $command] FIELDS 2 f1 f1] + } + + + test "HGETEX $command overwrites existing field TTL with bigger value" { + r FLUSHALL + r HSETEX myhash $command [get_long_expire_value $command] FIELDS 1 f1 v1 + set old_ttl [r HTTL myhash FIELDS 1 f1] + r HGETEX myhash $command [get_longer_then_long_expire_value $command] FIELDS 1 f1 + set new_ttl [r HTTL myhash FIELDS 1 f1] + assert {$new_ttl > $old_ttl} + } + + test "HGETEX $command overwrites existing field TTL with smaller value" { + r FLUSHALL + r HSETEX myhash $command [get_long_expire_value $command] FIELDS 1 f1 v1 + set old_ttl [r HTTL myhash FIELDS 1 f1] + r HGETEX myhash $command [get_short_expire_value $command] FIELDS 1 f1 + set new_ttl [r HTTL myhash FIELDS 1 f1] + assert {$new_ttl <= $old_ttl} + } + } + + foreach command {EX PX} { + test "HGETEX $command with 0 ttl" { + r FLUSHALL + r HSET myhash f1 v1 + assert_equal "v1" [r HGETEX myhash $command 0 FIELDS 1 f1] + assert_equal "" [r HGET myhash f1] + assert_equal -2 [r HTTL myhash FIELDS 1 f1] + } + } + + foreach command {EXAT PXAT} { + test "HGETEX $command with past expiry" { + r FLUSHALL + r HSET myhash f1 v1 + assert_equal "v1" [r HGETEX myhash $command [get_past_zero_expire_value $command] FIELDS 1 f1] + assert_equal "" [r HGET myhash f1] + assert_equal -2 [r HTTL myhash FIELDS 1 f1] + } + } + + test {HGETEX - verify no change when field does not exist} { + r FLUSHALL + r HSET myhash f1 v1 + set mem_before [r MEMORY USAGE myhash] + assert_equal {{}} [r HGETEX myhash EX 1 FIELDS 1 f2] + set memory_after [r MEMORY USAGE myhash] + assert_equal $mem_before $memory_after + } + + ####### Invalid scenarios tests ####### + test {HGETEX EX- multiple options used (EX + PX)} { + r FLUSHALL + r HSET myhash f1 v1 + assert_error "ERR*" {r HGETEX myhash EX 60 PX 1000 FIELDS 1 f1} + } + + test {HGETEX EXAT- multiple options used (EXAT + PXAT)} { + r FLUSHALL + r HSET myhash f1 v1 + assert_error "ERR*" {r HGETEX myhash EXAT [expr {[clock seconds] + 100}] PXAT [expr {[clock milliseconds] + 100000}] 1000 FIELDS 1 f1} + } + + # Common error scenarios for all commands + foreach cmd {EX PX EXAT PXAT} { + test "HGETEX $cmd- missing TTL value" { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash $cmd FIELDS 1 f1} e + set e + } {ERR *} + + test "HGETEX $cmd- negative TTL" { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash $cmd -10 FIELDS 1 f1} e + set e + } {ERR invalid expire time in 'hgetex' command} + + test "HGETEX $cmd- non-integer TTL" { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash $cmd abc FIELDS 1 f1} e + set e + } {ERR value is not an integer or out of range} + + test "HGETEX $cmd- missing FIELDS keyword" { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash $cmd [get_short_expire_value $cmd] 1 f1} e + set e + } {ERR *} + + test "HGETEX $cmd- wrong numfields count (too few fields)" { + r FLUSHALL + r HSET myhash f1 v1 f2 v2 + catch {r HGETEX myhash $cmd [get_short_expire_value $cmd] FIELDS 2 f1} e + set e + } {ERR *} + + test "HGETEX $cmd- wrong numfields count (too many fields)" { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash $cmd [get_short_expire_value $cmd] FIELDS 1 f1 f2} e + set e + } {ERR *} + + test "HGETEX $cmd- key is wrong type (string instead of hash)" { + r FLUSHALL + r SET mystring "v1" + catch {r HGETEX mystring $cmd [get_short_expire_value $cmd] FIELDS 1 f1} e + set e + } {WRONGTYPE Operation against a key holding the wrong kind of value} + + test "HGETEX $cmd with FIELDS 0" { + r FLUSHALL + catch {r HGETEX myhash $cmd [get_short_expire_value $cmd] FIELDS 0} e + set e + } {ERR *} + + test "HGETEX $cmd with negative numfields" { + r FLUSHALL + catch {r HGETEX myhash $cmd [get_short_expire_value $cmd] FIELDS -10} e + set e + } {ERR *} + + test "HGETEX $cmd with missing key" { + r FLUSHALL + catch {r HGETEX $cmd [get_short_expire_value $cmd] FIELDS 1 f1} e + set e + } {ERR *} + } +} + +## HGETEX -> Keyspace notification tests #### +start_server {tags {"hashexpire"}} { + if {$::singledb} { + set db 0 + } else { + set db 9 + } + set all_h_pattern "h*" + set hexpire_pattern "hexpire" + set hpersist_pattern "hpersist" + + r config set notify-keyspace-events KEA + + foreach command {EX PX EXAT PXAT} { + test "HGETEX $command generates hexpire keyspace notification" { + r FLUSHALL + r HSET myhash f1 v1 + + set rd [setup_single_keyspace_notification r] + + r HGETEX myhash $command [get_long_expire_value $command] FIELDS 1 f1 + + assert_keyevent_patterns $rd myhash hexpire + $rd close + } + + test "HGETEX $command with multiple fields generates single notification" { + r FLUSHALL + r HSET myhash f1 v1 f2 v2 f3 v3 + + set rd [setup_single_keyspace_notification r] + + r HGETEX myhash $command [get_long_expire_value $command] FIELDS 3 f1 f2 f3 + + assert_keyevent_patterns $rd myhash hexpire + # Verify no notification (getting hset and not hexpire) + r HSET dummy dummy dummy + assert_keyevent_patterns $rd dummy hset + $rd close + } + + test "HGETEX $command on non-existent field generates no notification" { + r FLUSHALL + r HSET myhash f1 v1 + + set rd [setup_single_keyspace_notification r] + + # This HGETEX targets a non-existent field, so no notification about hexpire should be emitted + r HGETEX myhash $command [get_long_expire_value $command] FIELDS 1 f2 + + # Verify no notification (getting hset and not hexpire) + r HSET dummy dummy dummy + assert_keyevent_patterns $rd dummy hset + + $rd close + } + } + + test {HGETEX PERSIST generates hpersist keyspace notification} { + r FLUSHALL + r HSET myhash f1 v1 + r HEXPIRE myhash 60 FIELDS 1 f1 + + set rd [setup_single_keyspace_notification r] + + r HGETEX myhash PERSIST FIELDS 1 f1 + + assert_keyevent_patterns $rd myhash hpersist + $rd close + } + + foreach command {EX PX EXAT PXAT} { + + test "HGETEX $command 0/past time works correctly with 1 field" { + r FLUSHALL + + # Create hash with field + r HSET myhash f1 v1 + assert_equal 1 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + set rd [setup_single_keyspace_notification r] + + # Set field to expire immediately + r HGETEX myhash $command [get_past_zero_expire_value $command] FIELDS 1 f1 + + # Verify field and keys are deleted + assert_keyevent_patterns $rd myhash hexpired del + assert_equal -2 [r HTTL myhash FIELDS 1 f1] + assert_equal 0 [r HLEN myhash] + assert_equal 0 [r EXISTS myhash] + assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + $rd close + } + + test "HGETEX $command 0/past time works correctly with 1 field on field with expire" { + r FLUSHALL + + # Create hash with field + r HSETEX myhash EX 1000 FIELDS 1 f1 v1 + assert_equal 1 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + set rd [setup_single_keyspace_notification r] + + # Set field to expire immediately + r HGETEX myhash $command [get_past_zero_expire_value $command] FIELDS 1 f1 + + # Verify field and keys are deleted + assert_keyevent_patterns $rd myhash hexpired del + assert_equal -2 [r HTTL myhash FIELDS 1 f1] + assert_equal 0 [r HLEN myhash] + assert_equal 0 [r EXISTS myhash] + assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + $rd close + } + + test "HGETEX $command 0/past time works correctly with more then 1 field" { + r FLUSHALL + + # Create hash with field + r HSET myhash f1 v1 f2 v2 + assert_equal 2 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + set rd [setup_single_keyspace_notification r] + + # Set field to expire immediately + r HGETEX myhash $command [get_past_zero_expire_value $command] FIELDS 1 f2 + + # Verify field and keys are deleted + assert_keyevent_patterns $rd myhash hexpired + assert_equal -2 [r HTTL myhash FIELDS 1 f2] + assert_equal 1 [r HLEN myhash] + assert_equal 1 [r EXISTS myhash] + assert_match 1 [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + $rd close + } + + test "HGETEX $command 0/past time works correctly with more then 1 field and expire" { + r FLUSHALL + + # Create hash with field + r HSET myhash f1 v1 f2 v2 f3 v3 f4 v4 + r HEXPIRE myhash 1000000 FIELDS 1 f1 + assert_equal 4 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + set rd [setup_single_keyspace_notification r] + + # Set field to expire immediately + r HGETEX myhash $command [get_past_zero_expire_value $command] FIELDS 1 f1 + + # Verify field and keys are deleted + assert_keyevent_patterns $rd myhash hexpired + assert_equal -2 [r HTTL myhash FIELDS 1 f1] + assert_equal 3 [r HLEN myhash] + assert_equal 1 [r EXISTS myhash] + assert_match 1 [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + $rd close + } + } +} + +# HSETEX #### +start_server {tags {"hashexpire"}} { + test {HSETEX KEEPTTL - preserves existing TTL of field} { + r FLUSHALL + + # Set a field with a known TTL + r HSETEX myhash PX 1000 FIELDS 1 field1 val1 + set original_pttl [r HPTTL myhash FIELDS 1 field1] + set original_expiretime [r HEXPIRETIME myhash FIELDS 1 field1] + + # Validate TTL is active and expiretime is in the future + assert {$original_pttl > 0} + assert {$original_expiretime > [clock seconds]} + + # Overwrite the field with KEEPTTL + r HSETEX myhash KEEPTTL FIELDS 1 field1 newval + + # Ensure TTL is preserved + set updated_pttl [r HPTTL myhash FIELDS 1 field1] + set updated_expiretime [r HEXPIRETIME myhash FIELDS 1 field1] + assert {$updated_pttl > 0} + assert {$updated_pttl <= $original_pttl} + assert_equal $original_expiretime $updated_expiretime + + # Ensure value was updated + assert_equal newval [r HGET myhash field1] + } + + test {HSETEX EX - FIELDS 0 returns error} { + r FLUSHALL + catch {r HSETEX myhash EX 10 FIELDS 0} e + set e + } {ERR *} + + test {HSETEX EX - test negative ttl} { + set ttl -10 + catch {r HSETEX myhash EX $ttl FIELDS 1 field1 val1} e + set e + } {ERR invalid expire time in 'hsetex' command} + + test {HSETEX EX - test non-numeric ttl} { + set ttl abc + catch {r HSETEX myhash EX $ttl FIELDS 1 field1 val1} e + set e + } {ERR value is not an integer or out of range} + + test {HSETEX EX - overwrite field resets TTL} { + r FLUSHALL + r HSETEX myhash EX 100 FIELDS 1 field1 val1 + r HSETEX myhash EX 200 FIELDS 1 field1 newval + assert_equal 200 [r HTTL myhash FIELDS 1 field1] + assert_equal newval [r HGET myhash field1] + } + + test {HSETEX EX - test zero ttl expires immediately} { + r FLUSHALL + r HSETEX myhash EX 0 FIELDS 1 field1 val1 + after 10 + assert_equal 0 [r HEXISTS myhash field1] + } + + test {HSETEX EX - test mix of expiring and persistent fields} { + r FLUSHALL + r HSET myhash field2 "persistent" + r HSETEX myhash EX 1 FIELDS 1 field1 "temp" + after 1100 + assert_equal 0 [r HEXISTS myhash field1] + assert_equal 1 [r HEXISTS myhash field2] + } + + test {HSETEX EX - test missing TTL} { + catch {r HSETEX myhash EX FIELDS 1 field1 val1} e + set e + } {ERR *} + + test {HSETEX EX - mismatched field/value count} { + catch {r HSETEX myhash EX 10 FIELDS 2 field1 val1} e + set e + } {ERR *} + + + + ###### PX ####### + + test {HSETEX PX - test negative ttl} { + set ttl -50 + catch {r HSETEX myhash PX $ttl FIELDS 1 field1 val1} e + set e + } {ERR invalid expire time in 'hsetex' command} + + test {HSETEX PX - test non-numeric ttl} { + set ttl xyz + catch {r HSETEX myhash PX $ttl FIELDS 1 field1 val1} e + set e + } {ERR value is not an integer or out of range} + + test {HSETEX PX - overwrite field resets TTL} { + r FLUSHALL + r HSETEX myhash PX 10000 FIELDS 1 field1 val1 + r HSETEX myhash PX 20000 FIELDS 1 field1 newval + set ttl [r HPTTL myhash FIELDS 1 field1] + assert {$ttl >= 19000 && $ttl <= 20000} + assert_equal newval [r HGET myhash field1] + } + + test {HSETEX PX - test zero ttl expires immediately} { + r FLUSHALL + r HSETEX myhash PX 0 FIELDS 1 field1 val1 + after 10 + assert_equal 0 [r HEXISTS myhash field1] + } + + test {HSETEX PX - test mix of expiring and persistent fields} { + r FLUSHALL + r HSET myhash field2 "persistent" + r HSETEX myhash PX 10 FIELDS 1 field1 "temp" + after 20 + assert_equal 0 [r HEXISTS myhash field1] + assert_equal 1 [r HEXISTS myhash field2] + } + + test {HSETEX PX - test missing TTL} { + catch {r HSETEX myhash PX FIELDS 1 field1 val1} e + set e + } {ERR *} + + # test {HSETEX PX - mismatched field/value count} { + # catch {r HSETEX myhash PX 100 FIELDS 2 field1 val1} e + # set e + # } {ERR wrong number of arguments for 'hsetex' command} + + + ## FNX/FXX + + # hsetex throws ERR *, it shouldn't + test {HSETEX EX FNX - set only if none of the fields exist} { + r FLUSHALL + r HSET myhash field1 val1 + set res [r HSETEX myhash EX 10 FNX FIELDS 1 field1 val2] + assert_equal 0 $res + assert_equal val1 [r HGET myhash field1] + + # Now try with all-new fields + set res [r HSETEX myhash EX 10 FNX FIELDS 2 f2 v2 f3 v3] + assert_equal 1 $res + assert_equal v2 [r HGET myhash f2] + assert_equal v3 [r HGET myhash f3] + } + + test {HSETEX EX FXX - set only if all fields exist} { + r FLUSHALL + r HSET myhash field1 val1 field2 val2 + set res [r HSETEX myhash EX 10 FXX FIELDS 2 field1 new1 field2 new2] + assert_equal 1 $res + assert_equal new1 [r HGET myhash field1] + assert_equal new2 [r HGET myhash field2] + + # Now try when one field doesn't exist + set res [r HSETEX myhash EX 10 FXX FIELDS 2 field1 x fieldX y] + assert_equal 0 $res + assert_equal new1 [r HGET myhash field1] + assert_equal 0 [r HEXISTS myhash fieldX] + } + + # Syntax error: HSETEX myhash PX 100 FNX FIELDS 2 x 2 y 3 + test {HSETEX PX FNX - partial conflict returns 0} { + r FLUSHALL + r HSET myhash x 1 + set res [r HSETEX myhash PX 100 FNX FIELDS 2 x 2 y 3] + assert_equal 0 $res + assert_equal 1 [r HEXISTS myhash x] + assert_equal 0 [r HEXISTS myhash y] + } + + test {HSETEX PX FXX - one field missing returns 0} { + r FLUSHALL + r HSET myhash a 1 + set res [r HSETEX myhash PX 100 FXX FIELDS 2 a 2 b 3] + assert_equal 0 $res + assert_equal 1 [r HGET myhash a] + assert_equal 0 [r HEXISTS myhash b] + } + + test {HSETEX EX - FNX and FXX conflict error} { + catch {r HSETEX myhash EX 10 FNX FXX FIELDS 1 x y} e + set e + } {ERR *} + + ###### Test EXPIRE ############# + + + # Basic Expiry Functionality + test {HEXPIRE - set TTL on existing field} { + r FLUSHALL + r HSET myhash field1 hello + r HEXPIRE myhash 10 FIELDS 1 field1 + set ttl [r HTTL myhash FIELDS 1 field1] + assert {$ttl > 0} + } + + test {HEXPIRE - TTL 0 deletes field} { + r FLUSHALL + r HSET myhash field1 goodbye + set res [r HEXPIRE myhash 0 FIELDS 1 field1] + assert_equal {2} $res + assert_equal 0 [r HEXISTS myhash field1] + } + + test {HEXPIRE - negative TTL returns error} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIRE myhash -5 FIELDS 1 field1} e + set e + } {ERR invalid expire time in 'hexpire' command} + + test {HEXPIRE - wrong type key returns error} { + r FLUSHALL + r SET myhash notahash + catch {r HEXPIRE myhash 10 FIELDS 1 field1} e + set e + } {WRONGTYPE Operation against a key holding the wrong kind of value} + + # Conditionals: NX + test {HEXPIRE NX - only set when field has no TTL} { + r FLUSHALL + r HSETEX myhash PX 100 FIELDS 1 field1 val + set res [r HEXPIRE myhash 10 NX FIELDS 1 field1] + assert_equal {0} $res + + r HSET myhash field2 val2 + set res2 [r HEXPIRE myhash 10 NX FIELDS 1 field2] + assert_equal {1} $res2 + } + + # Conditionals: XX + test {HEXPIRE XX - only set when field has TTL} { + r FLUSHALL + r HSET myhash field1 val1 field2 val2 + r HEXPIRE myhash 20 FIELDS 1 field1 + set res [r HEXPIRE myhash 30 XX FIELDS 2 field1 field2] + assert_equal {1 0} $res + } + + # Conditionals: GT + test {HEXPIRE GT - only set if new TTL > existing TTL} { + r FLUSHALL + r HSETEX myhash EX 300 FIELDS 1 field1 val1 + after 10 + set res [r HEXPIRE myhash 600 GT FIELDS 1 field1] ;# 600s > 300s remaining + assert_equal {1} $res + + # GT should fail if field is persistent + r HSET myhash field2 val2 + set res2 [r HEXPIRE myhash 1 GT FIELDS 1 field2] + assert_equal {0} $res2 + } + + # Conditionals: LT + test {HEXPIRE LT - only set if new TTL < existing TTL} { + r FLUSHALL + r HSETEX myhash EX 600 FIELDS 1 field1 val1 + set res [r HEXPIRE myhash 1 LT FIELDS 1 field1] + assert_equal {1} $res + + ## TODO this is an expected behavior really? what does non existintg ttl mean? + r HSET myhash field2 val2 + set res2 [r HEXPIRE myhash 1 LT FIELDS 1 field2] + assert_equal {1} $res2 + } + + # TTL Refresh + test {HEXPIRE - refresh TTL with new value} { + r FLUSHALL + r HSET myhash field1 val1 + r HEXPIRE myhash 1 FIELDS 1 field1 + after 500 + r HEXPIRE myhash 3 FIELDS 1 field1 + set ttl [r HTTL myhash FIELDS 1 field1] + assert {$ttl >= 2} + } + + # HEXPIRE on a non-existent field + test {HEXPIRE on a non-existent field (should not create field)} { + r FLUSHALL + r HSET myhash f1 v1 + r HEXPIRE myhash 1000 FIELDS 1 f2 + assert_equal 0 [r HEXISTS myhash f2] + assert_equal -2 [r HTTL myhash FIELDS 1 f2] + } + + # Error Cases + test {HEXPIRE - conflicting conditions error} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIRE myhash 10 NX XX FIELDS 1 field1} e + set e + } {ERR NX and XX, GT or LT options at the same time are not compatible} + + test {HEXPIRE - missing FIELDS error} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIRE myhash 10} e + set e + } {ERR wrong number of arguments for 'hexpire' command} + + test {HEXPIRE - no fields after FIELDS keyword} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIRE myhash 10 FIELDS 0} e + set e + } {ERR wrong number of arguments for 'hexpire' command} + + test {HEXPIRE - non-integer TTL error} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIRE myhash abc FIELDS 1 field1} e + set e + } {ERR value is not an integer or out of range} + + test {HEXPIRE - non-existing key returns -2} { + r FLUSHALL + set res [r HEXPIRE nokey 10 FIELDS 1 field1] + assert_equal {-2} $res + } + + test {HEXPIRE EX - set TTL on multiple fields} { + r FLUSHALL + r HSET myhash fieldA valA fieldB valB + set ttl 100 + r HEXPIRE myhash $ttl FIELDS 2 fieldA fieldB + + set ttlA [r HTTL myhash FIELDS 1 fieldA] + set ttlB [r HTTL myhash FIELDS 1 fieldB] + + assert { $ttlA > 0 && $ttlA <= $ttl } + assert { $ttlB > 0 && $ttlB <= $ttl } + } {} + + test {HEXPIRE returns -2 on non-existing key} { + r FLUSHALL + assert_equal {-2 -2} [r HEXPIRE nokey 10 FIELDS 2 field1 field2] + } {} + + test {HEXPIRE - GT condition fails when field has no TTL} { + r FLUSHALL + r HSET myhash f1 v1 + assert_equal 0 [r HEXPIRE myhash 10 GT fields 1 f1] + } + + test {HEXPIRE - LT condition succeeds when field has no TTL} { + r FLUSHALL + r HSET myhash f1 v1 + assert_equal 1 [r HEXPIRE myhash 10 LT fields 1 f1] + } + + ##### HTTL ##### + test {HTTL - persistent field returns -1} { + r FLUSHALL + r HSET myhash field1 val1 + assert_equal -1 [r HTTL myhash FIELDS 1 field1] + } {} + + test {HTTL - non-existent field returns -2} { + r FLUSHALL + r HSET myhash field1 val1 + assert_equal -2 [r HTTL myhash FIELDS 1 nofield] + } {} + + test {HTTL - non-existent key returns -2} { + r FLUSHALL + assert_equal -2 [r HTTL nokey FIELDS 1 field1] + } {} + + ##### EXPIRETIME ###### + + # Basic Expiry Functionality + test {HEXPIREAT - set absolute expiry on field} { + r FLUSHALL + r HSET myhash field1 hello + set now [clock seconds] + set exp [expr {$now + 30}] + r HEXPIREAT myhash $exp FIELDS 1 field1 + set etime [r HEXPIRETIME myhash FIELDS 1 field1] + assert_equal $exp $etime + } + + test {HEXPIREAT - timestamp in past deletes field immediately} { + r FLUSHALL + r HSET myhash field1 gone + set past [expr {[clock seconds] - 1000}] + set res [r HEXPIREAT myhash $past FIELDS 1 field1] + assert_equal {2} $res + assert_equal 0 [r HEXISTS myhash field1] + } + + + test {HEXPIREAT - set TTL on multiple fields (existing + non-existing)} { + r FLUSHALL + r HSET myhash field1 hello field2 world + set exp [expr {[clock seconds] + 10}] + set res [r HEXPIREAT myhash $exp FIELDS 3 field1 field2 fieldX] + assert_equal {1 1 -2} $res + } + + + # Conditionals: NX + test {HEXPIREAT NX - only set when field has no TTL} { + r FLUSHALL + r HSETEX myhash EX 100 FIELDS 1 field1 val + set exp [expr {[clock seconds] + 100}] + set res [r HEXPIREAT myhash $exp NX FIELDS 1 field1] + assert_equal {0} $res + + r HSET myhash field2 val2 + set res2 [r HEXPIREAT myhash $exp NX FIELDS 1 field2] + assert_equal {1} $res2 + } + + # Conditionals: XX + test {HEXPIREAT XX - only set when field has TTL} { + r FLUSHALL + r HSET myhash field1 val1 field2 val2 + set exp1 [expr {[clock seconds] + 20}] + r HEXPIREAT myhash $exp1 FIELDS 1 field1 + set exp2 [expr {[clock seconds] + 30}] + set res [r HEXPIREAT myhash $exp2 XX FIELDS 2 field1 field2] + assert_equal {1 0} $res + } + + # Conditionals: GT + test {HEXPIREAT GT - only set if new expiry > existing} { + r FLUSHALL + r HSETEX myhash PX 5000 FIELDS 1 field1 val1 + after 10 + set now [clock seconds] + set future [expr {$now + 10}] + set res [r HEXPIREAT myhash $future GT FIELDS 1 field1] + assert_equal {1} $res + + r HSET myhash field2 val2 + set res2 [r HEXPIREAT myhash $future GT FIELDS 1 field2] + assert_equal {0} $res2 + } + + + # Conditionals: LT + test {HEXPIREAT LT - only set if new expiry < existing} { + r FLUSHALL + set now [clock seconds] + # now + 20K seconds + set long_future_expiration [expr {$now + 20000}] + # now + 1K seconds + set short_future_expiration [expr {$now + 1000}] + r HSETEX myhash EX $long_future_expiration FIELDS 1 field1 val1 + assert_equal {1} [r HEXPIREAT myhash $short_future_expiration LT FIELDS 1 field1] + + r HSET myhash field2 val2 + assert_equal {1} [r HEXPIREAT myhash $short_future_expiration LT FIELDS 1 field2] + # TODO is this the expected behavior? if no TTL exist, it should be treated as minimum ttl possible? + } + + test {HEXPIREAT - refresh TTL with new future timestamp} { + r FLUSHALL + r HSET myhash field1 val1 + + # Set initial expiry to very near future + set ts1 [expr {[clock seconds] + 10}] + r HEXPIREAT myhash $ts1 FIELDS 1 field1 + + # Immediately refresh to a further expiry (no sleep needed) + set ts2 [expr {$ts1 + 5}] + r HEXPIREAT myhash $ts2 FIELDS 1 field1 + + # Confirm that expiry was updated + set actual [r HEXPIRETIME myhash FIELDS 1 field1] + assert_equal $ts2 $actual + } + + + # TTL Validations + test {HEXPIREAT - TTL is accurate via HEXPIRETIME} { + r FLUSHALL + r HSET myhash field1 val1 + set ts [expr {[clock seconds] + 50}] + r HEXPIREAT myhash $ts FIELDS 1 field1 + set returned [r HEXPIRETIME myhash FIELDS 1 field1] + assert_equal $ts $returned + } + + # Error Cases + test {HEXPIREAT - conflicting options error} { + r FLUSHALL + r HSET myhash field1 val + set ts [expr {[clock seconds] + 5}] + catch {r HEXPIREAT myhash $ts NX XX FIELDS 1 field1} e + set e + } {ERR NX and XX, GT or LT options at the same time are not compatible} + + + + test {HEXPIREAT - missing FIELDS keyword} { + r FLUSHALL + r HSET myhash field1 val + set ts [expr {[clock seconds] + 5}] + catch {r HEXPIREAT myhash $ts} e + set e + } {ERR wrong number of arguments for 'hexpireat' command} + + test {HEXPIREAT - no fields after FIELDS} { + r FLUSHALL + r HSET myhash field1 val + set ts [expr {[clock seconds] + 5}] + catch {r HEXPIREAT myhash $ts FIELDS 0} e + set e + } {ERR wrong number of arguments for 'hexpireat' command} + + test {HEXPIREAT - non-integer timestamp} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIREAT myhash tomorrow FIELDS 1 field1} e + set e + } {ERR value is not an integer or out of range} + + + + test {HEXPIREAT - non-existing key returns -2} { + r FLUSHALL + set ts [expr {[clock seconds] + 5}] + set res [r HEXPIREAT nokey $ts FIELDS 1 field1] + assert_equal {-2} $res + } + + #################### HEXPIRETIME ################## + + # Basic TTL retrieval + test {HEXPIRETIME - returns expiry timestamp for single field with TTL} { + r FLUSHALL + r HSET myhash field1 val + set ts [expr {[clock seconds] + 3}] + r HEXPIREAT myhash $ts FIELDS 1 field1 + set out [r HEXPIRETIME myhash FIELDS 1 field1] + assert_equal $ts $out + } + + + # No expiration set + test {HEXPIRETIME - field has no TTL returns -1} { + r FLUSHALL + r HSET myhash field1 val + set out [r HEXPIRETIME myhash FIELDS 1 field1] + assert_equal -1 $out + } + + # Non-existent field + test {HEXPIRETIME - field does not exist returns -2} { + r FLUSHALL + r HSET myhash field1 val + set out [r HEXPIRETIME myhash FIELDS 1 fieldX] + assert_equal -2 $out + } + + # Non-existent key + test {HEXPIRETIME - key does not exist returns -2} { + r FLUSHALL + set out [r HEXPIRETIME missingkey FIELDS 1 field1] + assert_equal -2 $out + } + + # Multiple fields: mix of TTL, no TTL, and missing + test {HEXPIRETIME - multiple fields mixed cases} { + r FLUSHALL + r HSET myhash f1 a f2 b + set now [clock seconds] + r HEXPIREAT myhash [expr {$now + 100}] FIELDS 1 f1 + set out [r HEXPIRETIME myhash FIELDS 3 f1 f2 f3] + # Should return: expiry for f1, -1 for f2 (no TTL), -2 for f3 (not found) + assert_equal [list [expr {$now + 100}] -1 -2] $out + } + + # Invalid usages + test {HEXPIRETIME - no FIELDS keyword} { + r FLUSHALL + r HSET myhash f1 a + catch {r HEXPIRETIME myhash} e + set e + } {ERR wrong number of arguments for 'hexpiretime' command} + + test {HEXPIRETIME - FIELDS 0} { + r FLUSHALL + r HSET myhash f1 a + catch {r HEXPIRETIME myhash FIELDS 0} e + set e + } {ERR wrong number of arguments for 'hexpiretime' command} + + test {HEXPIRETIME - wrong FIELDS count} { + r FLUSHALL + r HSET myhash f1 a + catch {r HEXPIRETIME myhash FIELDS 1} e + set e + } {ERR wrong number of arguments for 'hexpiretime' command} + + test {HEXPIRETIME - wrong type key} { + r FLUSHALL + r SET myhash "not a hash" + catch {r HEXPIRETIME myhash FIELDS 1 f1} e + set e + } {WRONGTYPE Operation against a key holding the wrong kind of value} + + + # Basic expiration in milliseconds + test {HPEXPIREAT - set absolute expiry with ms precision} { + r FLUSHALL + r HSET myhash field1 val + set now [clock milliseconds] + set future [expr {$now + 123456789}] + r HPEXPIREAT myhash $future FIELDS 1 field1 + set t [r HPEXPIRETIME myhash FIELDS 1 field1] + assert_equal $future $t + } + + test {HPEXPIREAT - past timestamp deletes field immediately} { + r FLUSHALL + r HSET myhash field1 val + set past [expr {[clock milliseconds] - 10000}] + set res [r HPEXPIREAT myhash $past FIELDS 1 field1] + assert_equal {2} $res + assert_equal 0 [r HEXISTS myhash field1] + } + + test {HPEXPIREAT - non-existent key returns -2} { + r FLUSHALL + set ts [expr {[clock milliseconds] + 1000}] + set res [r HPEXPIREAT nokey $ts FIELDS 1 field1] + assert_equal {-2} $res + } + + test {HPEXPIREAT - mixed fields} { + r FLUSHALL + r HSET myhash f1 a f2 b + set ts [expr {[clock milliseconds] + 200000}] + set res [r HPEXPIREAT myhash $ts FIELDS 3 f1 f2 fX] + assert_equal {1 1 -2} $res + } + + test {HPEXPIREAT - GT and LT options with success and failure cases} { + r FLUSHALL + r HSET myhash f1 a + + # Setup: assign a baseline expiry time + set now [clock milliseconds] + set ts1 [expr {$now + 10000}] + set ts2 [expr {$now + 20000}] + r HPEXPIREAT myhash $ts1 FIELDS 1 f1 + + # --- GT Case --- + # ts2 > ts1 → should succeed + set res_gt_pass [r HPEXPIREAT myhash $ts2 GT FIELDS 1 f1] + assert_equal {1} $res_gt_pass + + # ts1 < ts2 → now try GT with ts1 again (should fail because ts2 is already set) + set res_gt_fail [r HPEXPIREAT myhash $ts1 GT FIELDS 1 f1] + assert_equal {0} $res_gt_fail + + # --- LT Case --- + # ts1 < ts2 → LT should fail + set res_lt_fail [r HPEXPIREAT myhash $ts2 LT FIELDS 1 f1] + assert_equal {0} $res_lt_fail + + # ts1 < ts2 → try LT with earlier timestamp, should succeed + set ts0 [expr {$now + 5000}] + set res_lt_pass [r HPEXPIREAT myhash $ts0 LT FIELDS 1 f1] + assert_equal {1} $res_lt_pass + } + + test {HPEXPIREAT - invalid inputs} { + r FLUSHALL + r HSET myhash f1 a + catch {r HPEXPIREAT myhash abc FIELDS 1 f1} e + assert_match {*not an integer*} $e + + catch {r HPEXPIREAT myhash 12345 NX XX FIELDS 1 f1} e2 + assert_match {ERR NX and XX, GT or LT options at the same time are not compatible} $e2 + } + + + test {HPEXPIRETIME - check with multiple fields} { + r FLUSHALL + + # Setup: one expiring field, one persistent, one missing + r HSET myhash f1 v1 f2 v2 + set ts [expr {[clock milliseconds] + 1000}] + r HPEXPIREAT myhash $ts FIELDS 1 f1 + + # Query all 3 fields + set result [r HPEXPIRETIME myhash FIELDS 3 f1 f2 f3] + + # Expect: [timestamp] for f1, -1 for f2, -2 for f3 + assert {[llength $result] == 3} + # f1: has TTL → returns exact timestamp + assert_equal $ts [lindex $result 0] + + # f2: exists, no TTL → returns -1 + assert_equal -1 [lindex $result 1] + + # f3: doesn't exist → returns -2 + assert_equal -2 [lindex $result 2] + + } + + #################### HPERSIST ################## + + test "HPERSIST - field does not exist" { + r FLUSHALL + r hset myhash field1 value1 + assert_equal {-2} [r hpersist myhash FIELDS 1 field2] + } + + test "HPERSIST - key does not exist" { + r FLUSHALL + assert_equal {-2} [r hpersist nonexistent FIELDS 1 field1] + } + + test "HPERSIST - field exists but no expiration" { + r del myhash + r hset myhash field1 value1 + assert_equal {-1} [r hpersist myhash FIELDS 1 field1] + } + + test "HPERSIST - field exists with expiration" { + r FLUSHALL + r hset myhash field1 value1 + r hexpire myhash 600 FIELDS 1 field1 + assert_morethan [r httl myhash FIELDS 1 field1] 0 + assert_equal {1} [r hpersist myhash FIELDS 1 field1] + assert_equal {-1} [r httl myhash FIELDS 1 field1] + } + + test "HPERSIST - multiple fields with mixed state" { + r FLUSHALL + r hset myhash f1 v1 + r hset myhash f2 v2 + r hset myhash f3 v3 + r hexpire myhash 600 FIELDS 1 f1 + # f2 will have no expiration + # f4 does not exist + assert_equal {1 -1 -2} [r hpersist myhash FIELDS 3 f1 f2 f4] + } + + test {HPERSIST, then HEXPIRE, check new TTL is set} { + r FLUSHALL + r HSET myhash f1 v1 + r HEXPIRE myhash 1000 FIELDS 1 f1 + assert_equal 1 [r HPERSIST myhash FIELDS 1 f1] + r HEXPIRE myhash 2000 FIELDS 1 f1 + assert_morethan [r HTTL myhash FIELDS 1 f1] 1000 + } + + #################### HRANDFIELD ################## + + test "HRANDFIELD - CASE 1: negative count" { + r FLUSHALL + assert_equal {1} [r HSETEX myhash PX 1 fields 5 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5] + wait_for_condition 100 100 { + [r HGETALL myhash] eq {} + } else { + fail "Hash is showing expired elements" + } + # check that we get an empty response even though there are expired fields + assert_match {} [r hrandfield myhash 1] + + # Now write a persistent element + assert_equal {1} [r HSET myhash f5 v5] + # make sure this is the element we will get all the time + for {set i 1} {$i <= 50} {incr i} { + assert_equal {f5 f5 f5 f5 f5} [r hrandfield myhash -5] + } + + } + + test "HRANDFIELD - CASE 2: The number of requested elements is greater than the number of elements inside the hash" { + r FLUSHALL + assert_equal {1} [r HSETEX myhash PX 1 fields 5 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5] + wait_for_condition 100 100 { + [r HGETALL myhash] eq {} + } else { + fail "Hash is showing expired elements" + } + # check that we get an empty response even though there are expired fields + assert_match {} [r hrandfield myhash 10] + + # Now write a persistent element + assert_equal {3} [r HSET myhash f5 v5 f6 v6 f7 v7] + # make sure this is the element we will get all the time + for {set i 1} {$i <= 50} {incr i} { + set result [r hrandfield myhash 10] + assert_equal 3 [llength [split $result]] + assert_match {*f5*} $result + assert_match {*f6*} $result + assert_match {*f7*} $result + } + + } + + test "HRANDFIELD - CASE 3: The number of elements inside the hash is not greater than 3 times the number of requested elements" { + r FLUSHALL + assert_equal {1} [r HSETEX myhash PX 1 fields 5 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5] + wait_for_condition 100 100 { + [r HGETALL myhash] eq {} + } else { + fail "Hash is showing expired elements" + } + # check that we get an empty response even though there are expired fields + assert_match {} [r hrandfield myhash 4] + + # Now write a persistent elements + assert_equal {4} [r HSET myhash f5 v5 f6 v6 f7 v7 f8 v8] + # make sure this is the elements we will get all the time + for {set i 1} {$i <= 50} {incr i} { + set result [r hrandfield myhash 4] + assert_equal 4 [llength [split $result]] + assert_match {*f5*} $result + assert_match {*f6*} $result + assert_match {*f7*} $result + assert_match {*f8*} $result + } + } + + test "HRANDFIELD - CASE 4: The number of elements inside the hash is greater than 3 times the number of requested elements" { + r FLUSHALL + assert_equal {1} [r HSETEX myhash PX 1 fields 8 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5 f6 v6 f7 v7 f8 v8] + wait_for_condition 100 100 { + [r HGETALL myhash] eq {} + } else { + fail "Hash is showing expired elements" + } + + # check that we get an empty response even though there are expired fields + assert_match {} [r hrandfield myhash 2] + + # Now write a persistent elements + assert_equal {3} [r HSET myhash f8 v8 f9 v9 f10 v10] + # make sure this is the elements we will get all the time + for {set i 1} {$i <= 50} {incr i} { + set result [r hrandfield myhash 3] + assert_equal 3 [llength [split $result]] + assert_match {*f8*} $result + assert_match {*f9*} $result + assert_match {*f10*} $result + } + } +} + +####### Expiry fields skip tests +start_server {tags {"hashexpire"}} { + test {HGETALL skips expired fields} { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + + # Set two fields: one persistent, one with short TTL + r HSET myhash persistent "val1" + r HSETEX myhash PX 5 FIELDS 1 expiring "val2" + + # Wait for expiry to pass + after 10 + + # HGETALL should skip expired field + set result [r HGETALL myhash] + assert_equal {persistent val1} $result + + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {HSCAN skips expired fields} { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + + # Set multiple fields, one with expiry + r HSET myhash persistent1 "a" persistent2 "b" + r HSETEX myhash PX 5 FIELDS 1 expiring "c" + + # Wait for expiration + after 10 + + # HSCAN must not return the expired field + set cursor 0 + set allfields {} + while {1} { + set res [r HSCAN myhash $cursor] + set cursor [lindex $res 0] + set kvs [lindex $res 1] + lappend allfields {*}$kvs + if {$cursor eq "0"} break + } + + # Extract just the field names + set fieldnames [lmap {k v} $allfields { set k }] + set fieldnames_sorted [lsort $fieldnames] + + # Should only include persistent1 and persistent2 + assert_equal {persistent1 persistent2} $fieldnames_sorted + + # Re-enable active expiry for future tests + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {MOVE preserves field TTLs} { + r FLUSHALL + r SELECT 0 + r HSETEX myhash PX 50000 FIELDS 1 field1 val1 + + # Capture original TTL + set original_ttl [r HPTTL myhash FIELDS 1 field1] + assert {$original_ttl > 0} + + # Move to DB 1 + assert_equal 1 [r MOVE myhash 1] + + # Switch to target DB + r SELECT 1 + + # Field must exist and TTL must be preserved + set moved_ttl [r HPTTL myhash FIELDS 1 field1] + assert {$moved_ttl > 0 && $moved_ttl <= $original_ttl} + } {} {needs:debug} + + test {HSET - overwrite expired field without TTL clears expiration} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired (but not yet lazily deleted), + # and it is overwritten using a plain HSET (i.e., no TTL), + # Valkey treats the field as non existing and updates it, + # effectively clearing the old TTL and making the field persistent. + + r HSETEX myhash PX 10 FIELDS 1 field1 oldval + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + + # Field should still be present in memory due to lazy expiry + assert_equal 1 [r HLEN myhash] + + # Overwrite with HSET (no TTL) before accessing + r HSET myhash field1 newval + + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal newval [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + r debug SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {HINCRBY - on expired field} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired, + # and it is overwritten using a plain HINCRBY (i.e., no TTL), + # Valkey treats the field as still existing and updates it, + # effectively clearing the old TTL and starting the value from 0. + + r HSETEX myhash PX 10 FIELDS 1 field1 1 + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + + # Field should still be present in memory + assert_equal 1 [r HLEN myhash] + + # Overwrite with HINCRBY (no TTL) before accessing + r HINCRBY myhash field1 1 + + # Sanity check: check we only have one field in the hash + assert_equal 1 [r HLEN myhash] + + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal 1 [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + # set expiration on the field + assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + # now incr the field again + assert_equal 2 [r HINCRBY myhash field1 1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + r debug SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {HINCRBYFLOAT - on expired field} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired, + # and it is overwritten using a plain HINCRBYFLOAT (i.e., no TTL), + # Valkey treats the field as still existing and updates it, + # effectively clearing the old TTL and starting the value from 0. + + r HSETEX myhash PX 10 FIELDS 1 field1 1 + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + + # Field should still be present in memory + assert_equal 1 [r HLEN myhash] + + # Overwrite with HINCRBYFLOAT (no TTL) before accessing + r HINCRBYFLOAT myhash field1 1 + + # Sanity check: check we only have one field in the hash + assert_equal 1 [r HLEN myhash] + + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal 1 [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + # set expiration on the field + assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + # now incr the field again + assert_equal 2 [r HINCRBYFLOAT myhash field1 1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + r debug SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {HSET - overwrite unexpired field removes TTL} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that overwriting a field with HSET, + # even while its TTL is still valid (not expired), + # clears the TTL and makes the field persistent. + # This behavior is consistent with how HSET works for normal keys. + + # Set field with long TTL + r HSETEX myhash PX 1000 FIELDS 1 field1 val1 + + # Confirm TTL is active + set before [r HPTTL myhash FIELDS 1 field1] + assert {$before > 0} + + # Overwrite with HSET before TTL expires + r HSET myhash field1 newval + + # TTL should now be gone + set after [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $after + assert_equal newval [r HGET myhash field1] + + r debug SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {HDEL - expired field is removed without triggering expiry logic} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test proves that deleting an expired field with HDEL + # does NOT trigger Valkey's expiration mechanism. + # + # The key observation is that Valkey tracks how many fields were + # expired via TTL using the `expired_subkeys` counter in INFO stats. + # If HDEL caused expiration to be processed internally, + # this counter would increment. We assert that it remains unchanged. + + # Capture expired_subkeys before + set before_info [r INFO stats] + set before [info_field $before_info expired_subkeys] + + # Create field with short TTL + r HSETEX myhash PX 10 FIELDS 1 field1 val1 + after 20 + + # Field is technically expired, but still in-memory due to lazy expiry + assert_equal 1 [r HLEN myhash] + + # Delete the expired field directly + r HDEL myhash field1 + + # Field should be gone + assert_equal 0 [r HEXISTS myhash field1] + + # Capture expired_subkeys again + set after_info [r INFO stats] + set after [info_field $after_info expired_subkeys] + + # Verify that no expiry occurred internally + assert_equal $before $after + r debug SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {HDEL on field with TTL, then re-add and check TTL is gone} { + r FLUSHALL + r HSET myhash f1 v1 + r HEXPIRE myhash 10000 FIELDS 1 f1 + assert_morethan [r HTTL myhash FIELDS 1 f1] 0 + r HDEL myhash f1 + r HSET myhash f1 v2 + assert_equal -1 [r HTTL myhash FIELDS 1 f1] + } + +} + +####### Test info +start_server {tags {"hash-ttl-info external:skip"}} { + test {Hash ttl - check command stats} { + r FLUSHALL + + # Run all relevant hash TTL commands + r HSET myhash f1 v1 f2 v2 + r HEXPIRE myhash 10 FIELDS 1 f1 + r HEXPIREAT myhash [expr {[clock seconds] + 10}] FIELDS 1 f2 + r HEXPIRETIME myhash FIELDS 2 f1 f2 + r HPEXPIRE myhash 1000 FIELDS 1 f1 + r HPEXPIREAT myhash [expr {[clock milliseconds] + 2000}] FIELDS 1 f2 + r HPEXPIRETIME myhash FIELDS 2 f1 f2 + r HGETEX myhash EX 120 FIELDS 1 f1 + r HTTL myhash FIELDS 1 f2 + r HPTTL myhash FIELDS 1 f1 + + # Fetch commandstats + set info [r INFO commandstats] + + # Extract call counts + proc get_calls {info cmd} { + foreach line [split $info "\n"] { + if {[string match "cmdstat_$cmd:*" $line]} { + regexp {calls=(\d+)} $line -> count + return $count + } + } + return -1 + } + + # Assert each command appears with correct call count (1 call each) + assert_equal 1 [get_calls $info hexpire] + assert_equal 1 [get_calls $info hexpireat] + assert_equal 1 [get_calls $info hexpiretime] + assert_equal 1 [get_calls $info hpexpire] + assert_equal 1 [get_calls $info hpexpireat] + assert_equal 1 [get_calls $info hpexpiretime] + assert_equal 1 [get_calls $info hgetex] + assert_equal 1 [get_calls $info httl] + assert_equal 1 [get_calls $info hpttl] + } +} + + +#### Replication #### +start_server {tags {"hashexpire external:skip"}} { + # Start another server to test replication of TTLs + start_server {tags {needs:repl external:skip}} { + # Set the outer layer server as primary + set primary [srv -1 client] + set primary_host [srv -1 host] + set primary_port [srv -1 port] + # Set this inner layer server as replica + set replica [srv 0 client] + + test {Setup replica and check field expiry after full sync} { + $primary flushall + + # Set up some TTLs on primary BEFORE replica connects + set now [clock milliseconds] + set f1_exp [expr {$now + 50000}] + set f2_exp [expr {$now + 70000}] + + $primary HSET myhash f1 v1 f2 v2 + $primary HPEXPIREAT myhash $f1_exp FIELDS 1 f1 + $primary HPEXPIREAT myhash $f2_exp FIELDS 1 f2 + + # Now connect replica + $replica replicaof $primary_host $primary_port + + wait_for_condition 100 100 { + [info_field [$replica info replication] master_link_status] eq "up" + } else { + fail "Master <-> Replica didn't finish sync" + } + + + # Wait for full sync + wait_for_ofs_sync $primary $replica + + + # Validate TTLs replicated correctly + set r1 [$replica HPEXPIRETIME myhash FIELDS 1 f1] + set r2 [$replica HPEXPIRETIME myhash FIELDS 1 f2] + + assert_equal $f1_exp $r1 + assert_equal $f2_exp $r2 + } + + + + test {HASH TTL - replicated TTL is absolute and consistent on replica} { + $primary flushall + + set now [clock milliseconds] + set future [expr {$now + 5000}] + set future_sec [expr {$future / 1000}] + + # HPEXPIREAT + $primary HSET myhash f1 v1 + $primary HPEXPIREAT myhash $future FIELDS 1 f1 + + # HSETEX EX + $primary HSETEX myhash EX 5 FIELDS 1 f2 v2 + + # HEXPIRE + $primary HSET myhash f3 v3 + $primary HEXPIRE myhash 5 FIELDS 1 f3 + + wait_for_ofs_sync $primary $replica + + set t1 [$primary HPEXPIRETIME myhash FIELDS 1 f1] + set t1r [$replica HPEXPIRETIME myhash FIELDS 1 f1] + assert_equal $t1 $t1r + + set t2 [$primary HEXPIRETIME myhash FIELDS 1 f2] + set t2r [$replica HEXPIRETIME myhash FIELDS 1 f2] + assert_equal $t2 $t2r + + set t3 [$primary HEXPIRETIME myhash FIELDS 1 f3] + set t3r [$replica HEXPIRETIME myhash FIELDS 1 f3] + assert_equal $t3 $t3r + } + + test {HASH TTL - field expired on master gets deleted on replica} { + $primary flushall + + $primary HSETEX myhash PX 10 FIELDS 1 f1 val1 + after 20 + wait_for_ofs_sync $primary $replica + + + # Trigger lazy expiry + catch {$primary HGET myhash f1} + wait_for_ofs_sync $primary $replica + + + assert_equal 0 [$replica HEXISTS myhash f1] + } + + + test {HASH TTL - replica retains TTL and field before expiration} { + $primary flushall + + $primary HSETEX myhash PX 1000 FIELDS 1 f1 val1 + wait_for_ofs_sync $primary $replica + + set master_ttl [$primary HPTTL myhash FIELDS 1 f1] + set replica_ttl [$replica HPTTL myhash FIELDS 1 f1] + assert {$replica_ttl > 0} + assert {$replica_ttl <= $master_ttl} + + } + + test {HSETEX with expired time is propagated to the replica} { + $primary flushall + + assert_equal [$primary HSET myhash f1 val1] "1" + + wait_for_condition 100 100 { + [$replica HGET myhash f1] eq {val1} + } else { + fail "hash field was not set on replica after timeout" + } + + assert_equal [$primary HSETEX myhash EXAT 0 FIELDS 1 f1 val1] {1} + + wait_for_condition 100 100 { + [$primary EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on primary after timeout" + } + wait_for_ofs_sync $primary $replica + + wait_for_condition 100 100 { + [$replica EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on replica after timeout" + } + } + + test {HGETEX with expired time is propagated to the replica} { + $primary flushall + + assert_equal [$primary HSET myhash f1 val1] "1" + + wait_for_condition 100 100 { + [$replica HGET myhash f1] eq {val1} + } else { + fail "hash field was not set on replica after timeout" + } + + assert_equal [$primary HGETEX myhash EXAT 0 FIELDS 1 f1] {val1} + + wait_for_condition 100 100 { + [$primary EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on primary after timeout" + } + wait_for_ofs_sync $primary $replica + + wait_for_condition 100 100 { + [$replica EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on replica after timeout" + } + } + test {HEXPIREAT with expired time is propagated to the replica} { + $primary flushall + + assert_equal [$primary HSET myhash f1 val1] "1" + + wait_for_condition 100 100 { + [$replica HGET myhash f1] eq {val1} + } else { + fail "hash field was not set on replica after timeout" + } + + assert_equal [$primary HEXPIREAT myhash 0 FIELDS 1 f1] {2} + + wait_for_condition 100 100 { + [$primary EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on primary after timeout" + } + wait_for_ofs_sync $primary $replica + + wait_for_condition 100 100 { + [$replica EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on replica after timeout" + } + } + } +} + +start_server {tags {"hashexpire external:skip"}} { + set primary [srv 0 client] + set primary_host [srv 0 host] + set primary_port [srv 0 port] + start_server {tags {needs:repl external:skip}} { + set replica_1 [srv 0 client] + set replica_1_host [srv 0 host] + set replica_1_port [srv 0 port] + + test {Replication Primary -> R1} { + lassign [setup_replication_test $primary $replica_1 $primary_host $primary_port] primary_initial_expired replica_1_initial_expired + + # Initialize deferred clients and subscribe to keyspace notifications + foreach instance [list $primary $replica_1] { + $instance config set notify-keyspace-events KEA + } + set rd_primary [valkey_deferring_client -1] + set rd_replica_1 [valkey_deferring_client $replica_1_host $replica_1_port] + foreach rd [list $rd_primary $rd_replica_1] { + assert_equal {1} [psubscribe $rd __keyevent@*] + } + + + # Setup hash, set expire and set expire 0 + $primary HSET myhash f1 v1 f2 v2 ;# Should trigger 3 hset + # Create hash and timing - f1 < f2 expiry times + set f1_exp [expr {[clock seconds] + 10000}] + $primary HEXPIREAT myhash $f1_exp FIELDS 1 f1 ;# Should trigger 3 hexpire + wait_for_ofs_sync $primary $replica_1 + + $primary HEXPIRE myhash 0 FIELDS 1 f1 ;# Should trigger 1 hexpired (for primary) and 1 hdel (for replica) + wait_for_ofs_sync $primary $replica_1 + + # Wait for f1 expiration + wait_for_condition 50 100 { + [$primary HTTL myhash FIELDS 1 f1] eq -2 && \ + [$replica_1 HTTL myhash FIELDS 1 f1] eq -2 + } else { + fail "f1 still exists" + } + + # Verify keyspace notification + foreach rd [list $rd_primary $rd_replica_1] { + assert_keyevent_patterns $rd myhash hset hexpire + } + # primary gets hexpired and replica gets hdel + assert_keyevent_patterns $rd_primary myhash hexpired + assert_keyevent_patterns $rd_replica_1 myhash hdel + + $rd_primary close + $rd_replica_1 close + } + + start_server {tags {needs:repl external:skip}} { + $primary FLUSHALL + set replica_2 [srv 0 client] + set replica_2_host [srv 0 host] + set replica_2_port [srv 0 port] + + test {Chain Replication (Primary -> R1 -> R2) preserves TTL} { + $replica_1 replicaof $primary_host $primary_port + # Wait for R2 to connect to R1 + wait_for_condition 100 100 { + [info_field [$replica_1 info replication] master_link_status] eq "up" + } else { + fail "R1 <-> PRIMARY didn't establish connection" + } + + $replica_2 replicaof $replica_1_host $replica_1_port + # Wait for R2 to connect to R1 + wait_for_condition 100 100 { + [info_field [$replica_1 info replication] master_link_status] eq "up" + } else { + fail "R2 <-> R1 didn't establish connection" + } + + # Initialize deferred clients and subscribe to keyspace notifications + set rd_primary [valkey_deferring_client -2] + set rd_replica_1 [valkey_deferring_client -1] + set rd_replica_2 [valkey_deferring_client $replica_2_host $replica_2_port] + assert_equal {1} [psubscribe $rd_primary __keyevent@*] + assert_equal {1} [psubscribe $rd_replica_1 __keyevent@*] + assert_equal {1} [psubscribe $rd_replica_2 __keyevent@*] + + # Create hash and timing - f1 < f2 < f3 expiry times + set f1_exp [expr {[clock seconds] + 10000}] + + ############################################# STEUP HASH ############################################# + $primary HSET myhash f1 v1 f2 v2 ;# Should trigger 3 hset + $primary HEXPIREAT myhash $f1_exp FIELDS 1 f1 ;# Should trigger 3 hexpire + wait_for_ofs_sync $primary $replica_1 + wait_for_ofs_sync $replica_1 $replica_2 + + $primary HPEXPIRE myhash 0 FIELDS 1 f1 ;# Should trigger 1 hexpired (for primary) and 2 hdel (for replicas) + wait_for_ofs_sync $primary $replica_1 + wait_for_ofs_sync $replica_1 $replica_2 + + + # Wait for f1 expiration + wait_for_condition 50 100 { + [$primary HTTL myhash FIELDS 1 f1] eq -2 && \ + [$replica_1 HTTL myhash FIELDS 1 f1] eq -2 && \ + [$replica_2 HTTL myhash FIELDS 1 f1] eq -2 + } else { + fail "f1 still exists" + } + + # primary gets hexpired and replicas get hdel + foreach rd [list $rd_primary $rd_replica_1 $rd_replica_2] { + assert_keyevent_patterns $rd myhash hset hexpire + } + assert_keyevent_patterns $rd_primary myhash hexpired + assert_keyevent_patterns $rd_replica_1 myhash hdel + assert_keyevent_patterns $rd_replica_2 myhash hdel + + $rd_primary close + $rd_replica_1 close + $rd_replica_2 close + } + } + + test {Replica Failover} { + $primary FLUSHALL + $primary DEBUG SET-ACTIVE-EXPIRE no + $replica_1 DEBUG SET-ACTIVE-EXPIRE no + ####### Replication setup ####### + $replica_1 replicaof $primary_host $primary_port + wait_for_condition 50 100 { + [lindex [$replica_1 role] 0] eq {slave} && + [string match {*master_link_status:up*} [$replica_1 info replication]] + } else { + fail "Can't turn the instance into a replica" + } + + # Create hash fields with TTL on primary + set f1_exp [expr {[clock seconds] + 200}] + set f2_exp [expr {[clock seconds] + 300000}] + $primary HSET myhash f1 v1 f2 v2 f3 v3 + $primary HEXPIREAT myhash $f1_exp FIELDS 1 f1 + $primary HEXPIREAT myhash $f2_exp FIELDS 1 f2 + # f3 remains persistent + + # Wait for full sync + wait_for_ofs_sync $primary $replica_1 + + # Verify primary and replica are the same + foreach instance [list $primary $replica_1] { + assert_equal $f1_exp [$instance HEXPIRETIME myhash FIELDS 1 f1] + assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] + assert_equal -1 [$instance HTTL myhash FIELDS 1 f3] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] + assert_equal "v1 v2 v3" [$instance HMGET myhash f1 f2 f3] + assert_equal 3 [$instance HLEN myhash] + } + + # Perform failover + $replica_1 replicaof no one + # Wait for replica to become primary + wait_for_condition 100 100 { + [info_field [$replica_1 info replication] role] eq "master" + } else { + fail "Replica didn't become master" + } + + # Setup keyspace notifications for the promoted replica + $replica_1 config set notify-keyspace-events KEA + set rd_replica [valkey_deferring_client $replica_1_host $replica_1_port] + assert_equal {1} [psubscribe $rd_replica __keyevent@*] + + # Check all values that checked before are the same + assert_equal 3 [$replica_1 HLEN myhash] + assert_equal $f1_exp [$replica_1 HEXPIRETIME myhash FIELDS 1 f1] + assert_equal $f2_exp [$replica_1 HEXPIRETIME myhash FIELDS 1 f2] + assert_equal -1 [$replica_1 HTTL myhash FIELDS 1 f3] + assert_equal "v1 v2 v3" [$replica_1 HGETEX myhash FIELDS 3 f1 f2 f3] + assert_equal 3 [$replica_1 HLEN myhash] + + # Set f1 to expire in 1 second and wait for expiration + $replica_1 HEXPIRE myhash 1 FIELDS 1 f1 ;# will trigger hexpire + wait_for_condition 50 100 { + [$replica_1 HTTL myhash FIELDS 1 f1] eq -2 + } else { + fail "f1 not expired" + } + + # Verify expiry in replica + assert_equal "" [$replica_1 HGET myhash f1] + assert_equal 3 [$replica_1 HLEN myhash] + + # Verify no expiry in primary + assert_equal "v1" [$primary HGET myhash f1] + + # Change TTL of f2 + $replica_1 HEXPIRE myhash 1000000 FIELDS 1 f2 ;# will trigger hexpire + assert_morethan [$replica_1 HTTL myhash FIELDS 1 f2] 9000 + assert_equal $f2_exp [$primary HEXPIRETIME myhash FIELDS 1 f2] + + # Change TTL of f2 to 0 (immediate expiry) + $replica_1 HGETEX myhash EX 0 FIELDS 1 f2 ;# will trigger hexpired + # Verify final state + assert_equal 2 [$replica_1 HLEN myhash] + assert_equal "{} {} v3" [$replica_1 HGETEX myhash FIELDS 3 f1 f2 f3] + assert_equal "v1 v2 v3" [$primary HGETEX myhash FIELDS 3 f1 f2 f3] ;# No change for primary + + assert_keyevent_patterns $rd_replica myhash hexpire hexpire hexpired + + $rd_replica close + # Re-enable active expiry + $primary DEBUG SET-ACTIVE-EXPIRE yes + $replica_1 DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + + test {Promotion to primary} { + lassign [setup_replication_test $primary $replica_1 $primary_host $primary_port] primary_initial_expired replica_1_initial_expired + + # Initialize deferred clients and subscribe to keyspace notifications + foreach instance [list $primary $replica_1] { + $instance config set notify-keyspace-events KEA + $instance DEBUG SET-ACTIVE-EXPIRE no + } + ####### Replication setup ####### + $replica_1 replicaof $primary_host $primary_port + wait_for_condition 50 100 { + [lindex [$replica_1 role] 0] eq {slave} && + [string match {*master_link_status:up*} [$replica_1 info replication]] + } else { + fail "Can't turn the instance into a replica" + } + + # Create hash fields with TTL on primary + set f1_exp [expr {[clock seconds] + 200}] + set f2_exp [expr {[clock seconds] + 300000}] + $primary HSET myhash f1 v1 f2 v2 f3 v3 + $primary HEXPIREAT myhash $f1_exp FIELDS 1 f1 + $primary HEXPIREAT myhash $f2_exp FIELDS 1 f2 + # f3 remains persistent + + # Wait for full sync + wait_for_ofs_sync $primary $replica_1 + + # Verify primary and replica are the same + foreach instance [list $primary $replica_1] { + assert_equal $f1_exp [$instance HEXPIRETIME myhash FIELDS 1 f1] + assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] + assert_equal -1 [$instance HTTL myhash FIELDS 1 f3] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] + assert_equal "v1 v2 v3" [$instance HMGET myhash f1 f2 f3] + assert_equal 3 [$instance HLEN myhash] + } + + # Perform promotion to primary + $primary FAILOVER TO $replica_1_host $replica_1_port + # Wait for replica to become primary + wait_for_condition 100 100 { + [info_field [$replica_1 info replication] role] eq "master" + } else { + fail "Replica didn't become master" + } + + # Setup keyspace notifications + $primary config set notify-keyspace-events KEA + $replica_1 config set notify-keyspace-events KEA + set rd_primary [valkey_deferring_client -1] + set rd_replica_1 [valkey_deferring_client $replica_1_host $replica_1_port] + assert_equal {1} [psubscribe $rd_primary __keyevent@*] + assert_equal {1} [psubscribe $rd_replica_1 __keyevent@*] + + # Check all values that checked before are the same after the failover + foreach instance [list $primary $replica_1] { + assert_equal $f1_exp [$instance HEXPIRETIME myhash FIELDS 1 f1] + assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] + assert_equal -1 [$instance HTTL myhash FIELDS 1 f3] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] + assert_equal "v1 v2 v3" [$instance HMGET myhash f1 f2 f3] + assert_equal 3 [$instance HLEN myhash] + } + + # Set f1 to expire in 1 second and wait for expiration + $replica_1 HEXPIRE myhash 1 FIELDS 1 f1 ;# will trigger hexpire + wait_for_ofs_sync $replica_1 $primary + wait_for_condition 50 100 { + [$replica_1 HTTL myhash FIELDS 1 f1] eq -2 + } else { + fail "f1 not expired" + } + + # Verify replica and primary are sync + foreach instance [list $primary $replica_1] { + assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] + assert_equal -2 [$instance HTTL myhash FIELDS 1 f1] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] + assert_equal "{} v2 v3" [$instance HMGET myhash f1 f2 f3] + assert_equal 3 [$instance HLEN myhash] + } + + # Change TTL of f2 + $replica_1 HEXPIRE myhash 1000000 FIELDS 1 f2 ;# will trigger hexpire + wait_for_ofs_sync $replica_1 $primary + foreach instance [list $primary $replica_1] { + assert_morethan [$instance HTTL myhash FIELDS 1 f2] 9000 + } + + # Change TTL of f2 to 0 (immediate expiry) + $replica_1 HGETEX myhash EX 0 FIELDS 1 f2 ;# will trigger hexpired for replica_1 and hdel for primary + # Verify final state + foreach instance [list $primary $replica_1] { + assert_equal 2 [$instance HLEN myhash] + assert_equal "{} {} v3" [r HMGET myhash f1 f2 f3] + } + + foreach rd [list $rd_replica_1 $rd_primary] { + assert_keyevent_patterns $rd myhash hexpire hexpire + } + assert_keyevent_patterns $rd_replica_1 myhash hexpired + assert_keyevent_patterns $rd_primary myhash hdel + + $rd_replica_1 close + $rd_primary close + # Re-enable active expiry + $primary DEBUG SET-ACTIVE-EXPIRE yes + $replica_1 DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + } +} + +### Slot Migration #### +start_cluster 3 0 {tags {"cluster mytest external:skip"} overrides {cluster-node-timeout 1000}} { + # Flush all data on all cluster nodes before starting + for {set i 0} {$i < 3} {incr i} { + R $i FLUSHALL + } + if {$::singledb} { + set db 0 + } else { + set db 9 + } + set R0_id [R 0 CLUSTER MYID] + set R1_id [R 1 CLUSTER MYID] + + # Use a fixed hash tag to ensure key is in one slot + set key "{mymigrate}myhash" + + test {Hash with TTL fields migrates correctly between nodes} { + R 0 DEBUG SET-ACTIVE-EXPIRE no + R 1 DEBUG SET-ACTIVE-EXPIRE no + # Create hash fields + R 0 HSET $key f1 v1 f2 v2 f3 v3 + + # Set TTL on fields f1 and f2 + R 0 HEXPIRE $key 300 FIELDS 2 f1 f2 + + # Verify before slot migration + assert_equal 3 [R 0 HLEN $key] + assert_morethan [R 0 HTTL $key FIELDS 1 f1] 290 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [R 0 info keyspace]] keys=%d] + + # Prepare slot migration + set slot [R 0 CLUSTER KEYSLOT $key] + assert_equal OK [R 1 CLUSTER SETSLOT $slot IMPORTING $R0_id] + assert_equal OK [R 0 CLUSTER SETSLOT $slot MIGRATING $R1_id] + + # Migrate key to destination node + R 0 MIGRATE [srv -1 host] [srv -1 port] $key 0 5000 + + # Complete slot migration + R 0 CLUSTER SETSLOT $slot NODE $R1_id + R 1 CLUSTER SETSLOT $slot NODE $R1_id + + # Verify after slot migration + assert_equal 3 [R 1 HLEN $key] + assert_morethan [R 1 HTTL $key FIELDS 1 f1] 280 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] + + # Setup keyspace notifications + R 1 config set notify-keyspace-events KEA + set rd [valkey_deferring_client -1] + assert_equal {1} [psubscribe $rd __keyevent@0__:hexpired] + + # Set expiration to 0 + R 1 HGETEX $key EX 0 FIELDS 1 f1 + + # Veridy expiration + assert_keyevent_patterns $rd "{$key}" hexpired + assert_equal 2 [R 1 HLEN $key] + assert_equal "" [R 1 HGET $key f1] + assert_equal -2 [R 1 HTTL $key FIELDS 1 f1] + + $rd close + # Re-enable active expiry + R 0 DEBUG SET-ACTIVE-EXPIRE yes + R 1 DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} +} + +start_server {tags {"hashexpire external:skip"}} { + foreach cmd {RENAME RESTORE} { + test "$cmd Preserves Field TTLs" { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + r HSET myhash f1 v1 f2 v2 + r HEXPIRE myhash 200 FIELDS 1 f1 + + # Verify initial TTL state + set mem_before [r MEMORY USAGE myhash] + assert_equal "v1 v2" [r HMGET myhash f1 f2] + assert_morethan [r HTTL myhash FIELDS 1 f1] 100 + assert_equal -1 [r HTTL myhash FIELDS 1 f2] + assert_equal 2 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + # Run the command + if {$cmd eq "RENAME"} { + r rename myhash nwhash + set newhash nwhash + } elseif {$cmd eq "RESTORE"} { + set serialized [r DUMP myhash] + r RESTORE rstrhs 0 $serialized + set newhash rstrhs + } + + # Verify field values and TTLs are preserved + set memory_after [r MEMORY USAGE $newhash] + assert_equal "v1 v2" [r HMGET $newhash f1 f2] + assert_morethan [r HTTL $newhash FIELDS 1 f1] 100 + assert_equal -1 [r HTTL $newhash FIELDS 1 f2] + assert_equal 2 [r HLEN $newhash] + if {$cmd eq "RESTORE"} { + assert_match {2} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + } else { + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + } + assert_equal $mem_before $memory_after + } + } + + test {COPY Preserves TTLs} { + r flushall + r DEBUG SET-ACTIVE-EXPIRE no + + # Create hash with fields + r HSET myhash f1 v1 f3 v3 f4 v4 + + # Set TTL on f1 only + r HEXPIRE myhash 200 FIELDS 1 f1 + r HEXPIRE myhash 2 FIELDS 1 f3 + + # Verify initial TTL state + set mem_before [r MEMORY USAGE myhash] + assert_equal "v1 v3 v4" [r HMGET myhash f1 f3 f4] + assert_morethan [r HTTL myhash FIELDS 1 f1] 100 + assert_morethan [r HTTL myhash FIELDS 1 f3] 0 + assert_equal -1 [r HTTL myhash FIELDS 1 f4] + assert_equal 3 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + # Copy hash to new key + r copy myhash newhash1 + + # Verify myhash is the same + assert_equal "v1 v3 v4" [r HMGET myhash f1 f3 f4] + assert_morethan [r HTTL myhash FIELDS 1 f1] 100 + assert_morethan [r HTTL myhash FIELDS 1 f3] 0 + assert_equal -1 [r HTTL myhash FIELDS 1 f4] + assert_equal 3 [r HLEN myhash] + + # Verify new hash got same values + set mem_after [r MEMORY USAGE myhash] + assert_equal "v1 v3 v4" [r HMGET myhash f1 f3 f4] + assert_morethan [r HTTL newhash1 FIELDS 1 f1] 100 + assert_morethan [r HTTL newhash1 FIELDS 1 f3] 0 + assert_equal -1 [r HTTL newhash1 FIELDS 1 f4] + assert_equal 3 [r HLEN newhash1] + assert_match {2} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + assert_equal $mem_before $mem_after + + # Modify TTL in original hash + r HEXPIRE myhash 5 FIELDS 1 f3 + + # Wait for original TTL to expire in copy + after 2000 + assert_equal "v1 {}" [r HMGET newhash1 f1 f3] + assert_equal "v1 v3" [r HMGET myhash f1 f3] + + r HSETEX myhash EX 2 FIELDS 1 f3 v3 + # Create second copy + r copy myhash newhash2 + + # Modify TTL in second copy + r HEXPIRE newhash2 500 FIELDS 1 f3 + + # Wait for original hash TTL to expire + after 2000 + assert_equal "v1 {}" [r HMGET myhash f1 f3] + assert_equal "v1 v3" [r HMGET newhash2 f1 f3] + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {Hash Encoding Transitions with TTL - Add TTL to Existing Fields} { + r flushall + r DEBUG SET-ACTIVE-EXPIRE no + + # Create small hash with listpack encoding + r HSET myhash f1 v1 f2 v2 + + # Verify initial encoding + set "listpack" [r OBJECT ENCODING myhash] + + # Add TTL to existing field + r HEXPIRE myhash 300 FIELDS 1 f1 + + # Verify encoding changed to hashtable + set "hashtable" [r OBJECT ENCODING myhash] + + # Verify field values are preserved + assert_equal "v1 v2" [r HMGET myhash f1 f2] + # Veridy expiry + assert_morethan [r HTTL myhash FIELDS 1 f1] 100 + assert_equal -1 [r HTTL myhash FIELDS 1 f2] + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {Hash Encoding Transitions with TTL - Create New Fields with TTL} { + r flushall + r DEBUG SET-ACTIVE-EXPIRE no + + # Create small hash with listpack encoding + r HSET myhash f1 v1 f2 v2 + + # Verify initial encoding + set "listpack" [r OBJECT ENCODING myhash] + + # Add many fields to force encoding transition + for {set i 3} {$i <= 600} {incr i} { + lappend pairs "f$i" "v$i" + } + r HSET myhash {*}$pairs + r HEXPIRE myhash 3 FIELDS 5 f1 f10 f100 f200 f300 + + # Verify encoding changed to hashtable + set "hashtable" [r OBJECT ENCODING myhash] + + # Verify all field values and TTLs are correct + for {set i 1} {$i <= 600} {incr i} { + assert_equal "v$i" [r HGET myhash "f$i"] + if {$i == 1 || $i == 10 || $i == 100 || $i == 200 || $i == 300} { + assert_equal 3 [r HTTL myhash FIELDS 1 "f$i"] + } else { + assert_equal -1 [r HTTL myhash FIELDS 1 "f$i"] + } + } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} +} + +start_server {tags {"hashexpire external:skip"}} { + r config set notify-keyspace-events KEA + + foreach time_unit {s, ms} { + test "Key TTL expires before field TTL: entire hash should be deleted timeunit: $time_unit" { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + r config set notify-keyspace-events KEA + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + r HSET myhash f1 v1 f2 v2 f3 v3 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 3 [r HLEN myhash] + if {$time_unit eq "s"} { + r HEXPIRE hash1 10 FIELDS 1 f1 + r EXPIRE hash1 1 + } else { + r HPEXPIRE myhash 10000 FIELDS 1 f1 + r PEXPIRE myhash 1000 + } + + wait_for_condition 100 100 { + [r EXISTS myhash] eq "0" + } else { + fail "myhash still exists" + } + assert_equal 0 [r HLEN myhash] + assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + assert_keyevent_patterns $rd myhash hset hexpire expire + $rd close + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test "Field TTL expires before key TTL: only the specific field should expire: $time_unit" { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + r HSET myhash f1 v1 f2 v2 f3 v3 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 3 [r HLEN myhash] + if {$time_unit eq "s"} { + r HEXPIRE myhash 1 FIELDS 1 f1 + r EXPIRE myhash 10 + } else { + r HPEXPIRE myhash 1000 FIELDS 1 f1 + r PEXPIRE myhash 10000 + } + + wait_for_condition 100 100 { + [r HGET myhash f1] eq "" + } else { + fail "f1 not expired" + } + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 1 [r EXISTS myhash] + assert_equal "{} v2 v3" [r HMGET myhash f1 f2 f3] + assert_keyevent_patterns $rd myhash hset hexpire + $rd close + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test "Key and field TTL expire simultaneously: entire hash should be deleted: $time_unit" { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + + r HSET myhash f1 v1 f2 v2 f3 v3 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 3 [r HLEN myhash] + + + if {$time_unit eq "s"} { + set expire [expr {[clock seconds] + 1}] + r HEXPIREAT myhash $expire FIELDS 1 f1 + r EXPIREAT myhash $expire + } else { + set expire [expr {[clock milliseconds] + 1000}] + r HPEXPIREAT myhash $expire FIELDS 1 f1 + r PEXPIREAT myhash $expire + } + + wait_for_condition 100 100 { + [r EXISTS myhash] eq 0 + } else { + fail "myhash still exist" + } + + assert_equal "{} {} {}" [r HMGET myhash f1 f2 f3] + assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 0 [r HLEN myhash] + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {Millisecond/Seconds precision} { + r flushall + r DEBUG SET-ACTIVE-EXPIRE no + + r HSET myhash f1 v1 f2 v2 + if {$time_unit eq "s"} { + r HEXPIRE myhash 3 FIELDS 1 f1 + r EXPIRE myhash 1 + } else { + r HPEXPIRE myhash 3000 FIELDS 1 f1 + r PEXPIRE myhash 1000 + } + + after 1500 + assert_equal 0 [r EXISTS myhash] + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + } + + test {Ensure that key-level PERSIST on the key don't affect field TTL} { + r FLUSHALL + + r HSET myhash f1 v1 f2 v2 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 2 [r HLEN myhash] + r HEXPIRE myhash 100000 FIELDS 1 f1 + r PERSIST myhash + + assert_equal -1 [r TTL myhash] + assert_morethan [r HTTL myhash FIELDS 1 f1] 0 + } +} + +#### AOF Test ##### +tags {"aof external:skip"} { + set defaults {appendonly {yes} appendfilename {appendonly.aof} appenddirname {appendonlydir} auto-aof-rewrite-percentage {0}} + set server_path [tmpdir server.multi.aof] + start_server_aof [list dir $server_path] { + test {TTL Persistence in AOF} { + r flushall + r DEBUG SET-ACTIVE-EXPIRE no + r config set appendonly yes + r config set appendfsync always + + # Create hash with 1 short, long and no expired fields + set long_expire [expr {[clock seconds] + 1000000}] + # Create 10 fields with long expiry + for {set i 1} {$i <= 10} {incr i} { + r HSETEX myhash EXAT $long_expire FIELDS 1 f$i v$i ;# 10 PXAT to aof + } + + # Create 10 fields with short expiry + for {set i 11} {$i <= 20} {incr i} { + r HSETEX myhash PXAT [expr {[clock milliseconds] + 10}] FIELDS 1 f$i v$i ;# 10 PXAT to aof + } + + # Create 10 fields with expire 0 + for {set i 21} {$i <= 30} {incr i} { + r HSET myhash f$i v$i + r HEXPIRE myhash 0 FIELDS 1 f$i ;# 10 HDEL to aof + } + + # Create 10 fields with no expiry + for {set i 31} {$i <= 40} {incr i} { + r HSET myhash f$i v$i + } + + # Now wait for expire of the short expiry + for {set i 11} {$i <= 20} {incr i} { + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 f$i] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + } + + # Verify initial HLEN + assert_equal 30 [r HLEN myhash] + # Verify values + for {set i 1} {$i <= 40} {incr i} { + if {$i >= 11 && $i <= 30} { + assert_equal "" [r HGET myhash f$i] + } else { + assert_equal v$i [r HGET myhash f$i] + } + } + + # Ensure the initial rewrite finishes + waitForBgrewriteaof r + + # Get the last incremental AOF file path + set aof_file [get_last_incr_aof_path r] + + wait_for_condition 100 100 { + [file exists $aof_file] eq 1 + } else { + fail "hash value was not expired after timeout" + } + + # Read and check content + set aof_content [exec cat $aof_file] + + # Verify amount of PXAT and HDEL + # Count PXAT commands (should be 20: 10 long + 10 short) + set pxat_count [regexp -all {PXAT} $aof_content] + assert_equal 20 $pxat_count + # Count HDEL commands (should be 10: from expire 0) + set hdel_count [regexp -all {HDEL} $aof_content] + assert_equal 10 $hdel_count + + # Restart the server and load the AOF + restart_server 0 true false + r debug loadaof + + # Verify hash after loading from aof + # Verify same HLEN + assert_equal 30 [r HLEN myhash] + # Verify the TTLs are preserved + for {set i 1} {$i <= 10} {incr i} { + assert_equal $long_expire [r HEXPIRETIME myhash FIELDS 1 f$i] + assert_equal v$i [r HGET myhash f$i] + } + # Verify expired fields + for {set i 11} {$i <= 30} {incr i} { + assert_equal -2 [r HTTL myhash FIELDS 1 f$i] + assert_equal "" [r HGET myhash f$i] + } + # Verify fields with no TTL + for {set i 31} {$i <= 40} {incr i} { + assert_equal -1 [r HTTL myhash FIELDS 1 f$i] + assert_equal v$i [r HGET myhash f$i] + } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + } +}