From 2d5c653126e4e6f7c9ccd67b50b36ec6a61a62a8 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 19 Jun 2025 09:04:38 +0300 Subject: [PATCH 001/119] Squash all content to a single commit Squashed commit of the following: commit af1175200e553195d16f435bf5db9695db6d5f5a Author: Ran Shidlansik Date: Thu Jun 19 08:56:12 2025 +0300 more PR comments Signed-off-by: Ran Shidlansik commit a39fa2042c11ef10291a28d6c2cd3d42d1590c01 Author: Ran Shidlansik Date: Wed Jun 18 19:35:55 2025 +0300 update comment Signed-off-by: Ran Shidlansik commit ec7390683d3bb28e2aeb015eaf70248fbe5af276 Author: Ran Shidlansik Date: Wed Jun 18 19:33:59 2025 +0300 more pr comments being addressed Signed-off-by: Ran Shidlansik commit a4aa35ccfc41300439c0a8d611fc646a7cbf5707 Author: Ran Shidlansik Date: Wed Jun 18 18:51:24 2025 +0300 move parseExtendedCommandArgumentsOrReply to server.c Signed-off-by: Ran Shidlansik commit f4a8786e86427eb5722b3ffa4312633f9ea9233a Author: Ran Shidlansik Date: Wed Jun 18 18:48:49 2025 +0300 address some more review comments Signed-off-by: Ran Shidlansik commit 8ecd5848b20a09c0b50a520cbe197fd4f716736d Author: Ran Shidlansik Date: Wed Jun 18 18:12:03 2025 +0300 add missing entry.o Signed-off-by: Ran Shidlansik commit ee916d8fa33fdf7252468bdea34f7e2189f39470 Merge: 156c4a588 a1f4cd6f2 Author: Ran Shidlansik Date: Wed Jun 18 17:28:04 2025 +0300 Merge remote-tracking branch 'origin/unstable' into ttl-poc-new commit 156c4a588062de9d1fe96de30d2f0891d3b8b5ab Author: Ran Shidlansik Date: Wed Jun 18 17:10:41 2025 +0300 address more PR comments Signed-off-by: Ran Shidlansik commit de675bc8959662d13ddce3cf251ea9d585e0a546 Author: Ran Shidlansik Date: Wed Jun 18 13:11:48 2025 +0300 minot review fixes Signed-off-by: Ran Shidlansik commit 9d39b401f2a49205e85f66b8d309508a2c52ceb8 Author: Ran Shidlansik Date: Mon Jun 16 13:10:45 2025 +0300 Revert " partial work. introduce set expirations" This reverts commit 04f2006502cdc27360a40a6f822a6cc200a63353. commit 04f2006502cdc27360a40a6f822a6cc200a63353 Author: Ran Shidlansik Date: Mon Jun 16 13:08:25 2025 +0300 partial work. introduce set expirations Signed-off-by: Ran Shidlansik commit cd674be4111321bf8e8f9567982dd41ccd753f04 Author: Ran Shidlansik Date: Sun Jun 15 14:17:05 2025 +0300 fix misspel in test Signed-off-by: Ran Shidlansik commit 25954b37e746dd441a8a767c4b546a6d138ccbfb Author: Ran Shidlansik Date: Sun Jun 15 13:35:58 2025 +0300 fix flakey test with EXPIREAT Signed-off-by: Ran Shidlansik commit aee670eaf0122def90fee118d7dda01216676447 Author: Ran Shidlansik Date: Sun Jun 15 11:36:27 2025 +0300 fix some more memory leaks Signed-off-by: Ran Shidlansik commit 2fea8e735c7795530802718c7508f49b1fb879f6 Author: Ran Shidlansik Date: Fri Jun 13 15:02:13 2025 +0300 fix memory leak issue Signed-off-by: Ran Shidlansik commit 56db999348110ece9a333c107ea35fa2d00526af Author: Ran Shidlansik Date: Thu Jun 12 17:21:51 2025 +0300 fix bad compilation Signed-off-by: Ran Shidlansik commit 1b1ce58f76dc86514e02a08b5dfad8d8bf2d3425 Author: Ran Shidlansik Date: Thu Jun 12 17:17:58 2025 +0300 add missing files Signed-off-by: Ran Shidlansik commit 7a89a70133c64c8d7b01e80cda570f45fe9e36bf Author: Ran Shidlansik Date: Thu Jun 12 17:15:38 2025 +0300 Separate hash entry implementation Signed-off-by: Ran Shidlansik commit 23bb4a2c8a492dd508dd33f78f32b6065a5e5905 Author: Ran Shidlansik Date: Tue Jun 10 19:25:32 2025 +0300 extend the comment Signed-off-by: Ran Shidlansik commit 2a5e9a20b47dd1c7774aa9aa6d0a8e1623263d9c Author: Ran Shidlansik Date: Tue Jun 10 18:52:53 2025 +0300 fix merge issues Signed-off-by: Ran Shidlansik commit e7683b67efa970ffa5c1fe0cddc7974a7de38c26 Merge: 12151e51d c41ffc340 Author: Ran Shidlansik Date: Tue Jun 10 18:52:18 2025 +0300 Merge remote-tracking branch 'origin/unstable' into ttl-poc-new commit 12151e51d5a96f038ccc75472a7c6f4a25fc0405 Author: Ran Shidlansik Date: Tue Jun 10 18:47:11 2025 +0300 fix some bugs and added HPERSIST tests to help schema validator Signed-off-by: Ran Shidlansik commit 846f9432a2ecf41fd29fc3a695d9a13af273c113 Author: Ran Shidlansik Date: Tue Jun 10 14:06:58 2025 +0300 fix new commands json and enable silent tests Signed-off-by: Ran Shidlansik commit 51f9bdca21c2d5c6b61c96797305ad2b4388779a Author: Ran Shidlansik Date: Tue Jun 10 11:40:09 2025 +0300 better enforce fields number to match the number of provided fields in httl and hpersist commands Signed-off-by: Ran Shidlansik commit c1cefec47010d9d0edae8ba57e1bd80432468ad2 Author: Ran Shidlansik Date: Tue Jun 10 10:17:31 2025 +0300 fix reply schema of commands fetching the hash field ttl Signed-off-by: Ran Shidlansik commit 4db5b7c3b812a0698b137639ff1864c8453aae44 Author: Ran Shidlansik Date: Tue Jun 10 09:47:05 2025 +0300 fix hexpire flaky test Signed-off-by: Ran Shidlansik commit f4ae1a2a8ddce646c2914f1f24a1b43e127fee2b Author: Ran Shidlansik Date: Mon Jun 9 22:02:39 2025 +0300 remove fmacros include from volatile_set Signed-off-by: Ran Shidlansik commit 3190eb452607e9c6e1bd8277041f3fb2670d7329 Merge: 99d25d3cb 1941d28ac Author: Ran Shidlansik Date: Mon Jun 9 21:15:07 2025 +0300 Merge remote-tracking branch 'origin/unstable' into ttl-poc-new commit 99d25d3cbee4e9945e0f18c789d78c72991b2acd Author: Ran Shidlansik Date: Mon Jun 9 19:56:14 2025 +0300 completely remove server level access context 9it was mainly used for lazy expiration logic) Signed-off-by: Ran Shidlansik commit 2e082db308fb22e1ab7389e03235f85f05cf0ffc Author: Ran Shidlansik Date: Thu Jun 5 09:34:16 2025 +0300 exlude hexpire tests from external tests Signed-off-by: Ran Shidlansik commit 2c4c3125f40ea37ac4c6ac2579acc7fcdd1d27e3 Author: Ran Shidlansik Date: Wed Jun 4 15:36:32 2025 +0300 switch hashtable type only when object has volatile items Signed-off-by: Ran Shidlansik commit 124acbeadf3caf99741280dd7b4f68f3ce769b73 Author: Ran Shidlansik Date: Wed Jun 4 14:45:19 2025 +0300 return syntax error when fields is not provided in new API arguments Signed-off-by: Ran Shidlansik commit dd071f994668b83d14833a78c07eab6e5fbcf4c5 Merge: 12a35e21c 5699c8c05 Author: Ran Shidlansik Date: Wed Jun 4 13:02:30 2025 +0300 Merge remote-tracking branch 'origin/unstable' into ttl-poc-new commit 12a35e21cc72cdb15cf99109aed6e7b3dca1efba Author: Ran Shidlansik Date: Wed Jun 4 13:02:03 2025 +0300 remove lazy expiration logic and tests Signed-off-by: Ran Shidlansik commit 0cadaec7869f75bee5ec3807b5aaea3c4f26bbec Author: Ran Shidlansik Date: Wed Jun 4 12:27:39 2025 +0300 copy hash object should also copy the fields ttl Signed-off-by: Ran Shidlansik commit 79b7e78142b0b7377dab8c551a32c9ec4332b516 Author: Ran Shidlansik Date: Wed Jun 4 12:18:48 2025 +0300 remove metadata from hash entry Signed-off-by: Ran Shidlansik commit 8654080b86fef1c6335169c288726d0f9f171657 Author: Ran Shidlansik Date: Wed May 21 19:41:59 2025 +0300 make sure to remove the volatile set on hash object detructor Signed-off-by: Ran Shidlansik commit eab6fc4555634d3b287e6438d1ba272756135118 Author: Ran Shidlansik Date: Wed May 21 16:01:56 2025 +0300 fix trackUpdate condition Signed-off-by: Ran Shidlansik commit 6d9551ee4a6365b1abf3eb6f3298e6329ce032d7 Author: Ran Shidlansik Date: Wed May 21 15:16:53 2025 +0300 fix bad memory access issue on entry tracking update Signed-off-by: Ran Shidlansik commit d719dcd6cd0757969419aac1c387606ce10b181c Author: xbasel <103044017+xbasel@users.noreply.github.com> Date: Wed May 21 14:50:18 2025 +0300 Hash TTL - add tests (#1) * add tests Signed-off-by: xbasel <103044017+xbasel@users.noreply.github.com> * fix a bug - return on error Signed-off-by: xbasel <103044017+xbasel@users.noreply.github.com> * disable failing tests Signed-off-by: xbasel <103044017+xbasel@users.noreply.github.com> * rmeove redundant test Signed-off-by: xbasel <103044017+xbasel@users.noreply.github.com> * Update tests/unit/hashexpire.tcl --------- Signed-off-by: xbasel <103044017+xbasel@users.noreply.github.com> Co-authored-by: Ran Shidlansik commit e604b37882a0fe80e6fa4a6f5fe96e2757b3ec4a Author: Ran Shidlansik Date: Wed May 21 14:43:49 2025 +0300 make hashtable call entry destructor on delete access Signed-off-by: Ran Shidlansik commit 0b8dc0307e873b5aab7e81234cbd64a0bfaaa7d8 Author: Ran Shidlansik Date: Wed May 21 14:31:44 2025 +0300 centralize keyspace and key signal notifications to the reset context Signed-off-by: Ran Shidlansik commit 753ba3c67c2307cbc2c084e52963ca2aaae37828 Author: Ran Shidlansik Date: Tue May 20 14:19:34 2025 +0300 fix object pass to keyspace notification in HSETEX Signed-off-by: Ran Shidlansik commit c5b8d76b97b15e02fd2915c77fc43c5a4ce82552 Author: Ran Shidlansik Date: Mon May 19 21:47:42 2025 +0300 fix formatting issue Signed-off-by: Ran Shidlansik commit e72d7a6754c7411ed617ce403319fcc4adb3f434 Author: Ran Shidlansik Date: Mon May 19 21:25:17 2025 +0300 fix build issues Signed-off-by: Ran Shidlansik commit 36b7356d84e4a354f2037cd0052904c6f2cdc6ee Author: Ran Shidlansik Date: Mon May 19 21:13:15 2025 +0300 allow setting the key object in context Signed-off-by: Ran Shidlansik commit a6844ac3fbaccbefd86e73b2dd99d914fa4aaffa Author: Ran Shidlansik Date: Mon May 19 20:15:18 2025 +0300 add commands json files Signed-off-by: Ran Shidlansik commit 20c0d29529a779f40a450ffe8385eec864c0c78e Author: Ran Shidlansik Date: Mon May 19 20:13:23 2025 +0300 fix hexpire propagation to use hpexpireat Signed-off-by: Ran Shidlansik commit 5e19c90117bbdf058a60b7e7135727a868392857 Author: Ran Shidlansik Date: Mon May 19 17:19:41 2025 +0300 fix HGETEX replication handling Signed-off-by: Ran Shidlansik commit 31923c5aabd15b11eb753b70cde8d2285007be97 Author: Ran Shidlansik Date: Mon May 19 15:46:31 2025 +0300 make httl functions verify the type Signed-off-by: Ran Shidlansik commit b782d44e2caeb3aff452756489652149fcb4b561 Author: Ran Shidlansik Date: Mon May 19 15:36:22 2025 +0300 fix case of hll command issues on non-existing listpack encoded hash Signed-off-by: Ran Shidlansik commit 0723625343b5a0ea573ae7fc662559e323db82b5 Author: Ran Shidlansik Date: Mon May 19 15:24:59 2025 +0300 Fix HEXPIRE parse limits Signed-off-by: Ran Shidlansik commit d97e23f84e1c4065e98e97fb842a5a97e2907899 Author: Ran Shidlansik Date: Mon May 19 15:19:53 2025 +0300 fix FNX/FXX logic Signed-off-by: Ran Shidlansik commit 4301399d28f9f0a2dbab70bc5b26d4872cd8c323 Author: Ran Shidlansik Date: Mon May 19 15:08:02 2025 +0300 fix wrong assert condition on update entry Signed-off-by: Ran Shidlansik commit 6465314b09cf9dd827c339823e28fad3e185fae9 Author: Ran Shidlansik Date: Mon May 19 15:00:20 2025 +0300 handle negative ttl correctly Signed-off-by: Ran Shidlansik commit f62c163c3044e07be985ab1a89ee02f60ef3a157 Author: Ran Shidlansik Date: Mon May 19 14:20:49 2025 +0300 format fixes Signed-off-by: Ran Shidlansik commit a59f31ae7e5dade35c3800c091707b4e325e3f17 Author: Ran Shidlansik Date: Mon May 19 14:17:19 2025 +0300 Add support for HGETEX and HSETEX Signed-off-by: Ran Shidlansik commit 4a09f3d8db7e49760c88574d3e737d32e91b713f Author: Ran Shidlansik Date: Sun May 18 12:16:44 2025 +0300 free entry when calling hashTypeDelete Signed-off-by: Ran Shidlansik commit dd62037c2800b47fe06123b26e5a737c75762b56 Author: Ran Shidlansik Date: Sun May 18 11:12:49 2025 +0300 remove hashtable redundant log Signed-off-by: Ran Shidlansik commit ea039c4882dffa9808a853215c0ee15470905d84 Merge: fce9a4374 8d686dd74 Author: Ran Shidlansik Date: Sun May 18 10:40:39 2025 +0300 Merge remote-tracking branch 'origin/unstable' into ttl-poc-new commit fce9a437441542d16c3018a2db8e52a9fd49a18e Author: Ran Shidlansik Date: Sun May 18 10:39:22 2025 +0300 fix cmake compilation Signed-off-by: Ran Shidlansik commit 1f0c9339e3344aadd3292adced5393734705afdd Author: Ran Shidlansik Date: Sun May 18 10:34:27 2025 +0300 avoid extra ref count incrementing in hashTypePropagateDeletion Signed-off-by: Ran Shidlansik commit 6ee497caa97281ff8ed633c9f38db4130a775a94 Author: Ran Shidlansik Date: Thu May 15 21:32:30 2025 +0300 fix some more format issues Signed-off-by: Ran Shidlansik commit 90b7536eb716594af110b1a9cd356490f90a5923 Author: Ran Shidlansik Date: Thu May 15 21:30:46 2025 +0300 fix typo Signed-off-by: Ran Shidlansik commit fcce92bfe7e3d91f264bcfd8857d1d380a8ef459 Author: Ran Shidlansik Date: Thu May 15 21:28:02 2025 +0300 fix expire propagation Signed-off-by: Ran Shidlansik commit cc7c2a30466f49dc01a813794ac97e49da8266cd Author: Ran Shidlansik Date: Thu May 15 20:48:12 2025 +0300 handle some format check issues Signed-off-by: Ran Shidlansik commit 61bd39af78d545d00c2f4138484e78042dec031b Author: Ran Shidlansik Date: Thu May 15 20:45:18 2025 +0300 fix some spelling checks Signed-off-by: Ran Shidlansik commit 89f56b0820f827e9dfa056d01c5735f4d3834e99 Author: Ran Shidlansik Date: Thu May 15 20:38:21 2025 +0300 fix new introduced commands Signed-off-by: Ran Shidlansik commit ecdcce0d43916e5f33b04d56f0f9bb43002377a5 Author: Ran Shidlansik Date: Mon Jan 6 10:46:47 2025 +0200 Introduce HASH items expiration Signed-off-by: Ran Shidlansik Signed-off-by: Ran Shidlansik --- cmake/Modules/SourceFiles.cmake | 5 +- src/Makefile | 2 +- src/aof.c | 24 +- src/commands.def | 432 ++++++++++++ src/commands/hexpire.json | 119 ++++ src/commands/hexpireat.json | 121 ++++ src/commands/hexpiretime.json | 85 +++ src/commands/hgetex.json | 110 +++ src/commands/hpersist.json | 84 +++ src/commands/hpexpire.json | 121 ++++ src/commands/hpexpireat.json | 121 ++++ src/commands/hpexpiretime.json | 85 +++ src/commands/hpttl.json | 85 +++ src/commands/hsetex.json | 152 +++++ src/commands/httl.json | 85 +++ src/db.c | 117 ++-- src/defrag.c | 10 +- src/entry.c | 364 ++++++++++ src/entry.h | 25 + src/expire.c | 39 +- src/hashtable.c | 27 +- src/hashtable.h | 10 + src/module.c | 8 +- src/object.c | 9 +- src/rdb.c | 56 +- src/rdb.h | 50 +- src/sds.c | 2 + src/server.c | 153 ++++- src/server.h | 98 ++- src/t_hash.c | 1072 +++++++++++++++++++++--------- src/t_string.c | 137 +--- src/util.h | 6 + src/valkey-check-rdb.c | 3 + src/volatile_set.c | 79 +++ src/volatile_set.h | 40 ++ tests/unit/hashexpire.tcl | 1104 +++++++++++++++++++++++++++++++ 36 files changed, 4465 insertions(+), 575 deletions(-) create mode 100644 src/commands/hexpire.json create mode 100644 src/commands/hexpireat.json create mode 100644 src/commands/hexpiretime.json create mode 100644 src/commands/hgetex.json create mode 100644 src/commands/hpersist.json create mode 100644 src/commands/hpexpire.json create mode 100644 src/commands/hpexpireat.json create mode 100644 src/commands/hpexpiretime.json create mode 100644 src/commands/hpttl.json create mode 100644 src/commands/hsetex.json create mode 100644 src/commands/httl.json create mode 100644 src/entry.c create mode 100644 src/entry.h create mode 100644 src/volatile_set.c create mode 100644 src/volatile_set.h create mode 100644 tests/unit/hashexpire.tcl diff --git a/cmake/Modules/SourceFiles.cmake b/cmake/Modules/SourceFiles.cmake index 538600c83b..6a217780cd 100644 --- a/cmake/Modules/SourceFiles.cmake +++ b/cmake/Modules/SourceFiles.cmake @@ -117,7 +117,10 @@ set(VALKEY_SERVER_SRCS ${CMAKE_SOURCE_DIR}/src/connection.c ${CMAKE_SOURCE_DIR}/src/unix.c ${CMAKE_SOURCE_DIR}/src/server.c - ${CMAKE_SOURCE_DIR}/src/logreqres.c) + ${CMAKE_SOURCE_DIR}/src/logreqres.c + ${CMAKE_SOURCE_DIR}/src/entry.c + ${CMAKE_SOURCE_DIR}/src/volatile_set.c) + # valkey-cli set(VALKEY_CLI_SRCS diff --git a/src/Makefile b/src/Makefile index cc561929e5..22ab3b0341 100644 --- a/src/Makefile +++ b/src/Makefile @@ -423,7 +423,7 @@ ENGINE_NAME=valkey SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX) ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX) ENGINE_TRACE_OBJ=trace/trace.o trace/trace_commands.o trace/trace_db.o trace/trace_bgsave.o trace/trace_cluster.o trace/trace_server.o trace/trace_aof.o -ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o +ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o entry.o volatile_set.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o ENGINE_SERVER_OBJ+=$(ENGINE_TRACE_OBJ) ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX) ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o sds.o util.o sha256.o diff --git a/src/aof.c b/src/aof.c index 732a71584f..301b81b447 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1955,7 +1955,7 @@ static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) { * The function returns 0 on error, 1 on success. */ int rewriteHashObject(rio *r, robj *key, robj *o) { hashTypeIterator hi; - long long count = 0, items = hashTypeLength(o); + long long count = 0, volatile_items = hashTypeNumVolatileElements(o), items = hashTypeLength(o) - volatile_items; hashTypeInitIterator(o, &hi); while (hashTypeNext(&hi) != C_ERR) { @@ -1969,6 +1969,9 @@ int rewriteHashObject(rio *r, robj *key, robj *o) { } } + if (volatile_items > 0 && entryHasExpiry(hi.next)) + continue; + if (!rioWriteHashIteratorCursor(r, &hi, OBJ_HASH_FIELD) || !rioWriteHashIteratorCursor(r, &hi, OBJ_HASH_VALUE)) { hashTypeResetIterator(&hi); return 0; @@ -1979,6 +1982,25 @@ int rewriteHashObject(rio *r, robj *key, robj *o) { hashTypeResetIterator(&hi); + /* Now serialize volatile items if exist */ + if (hashTypeHasVolatileElements(o)) { + hashTypeInitVolatileIterator(o, &hi); + while (hashTypeNext(&hi) != C_ERR) { + long long expiry = entryGetExpiry(hi.next); + sds field = entryGetField(hi.next); + sds value = entryGetValue(hi.next); + if (rioWriteBulkCount(r, '*', 8) == 0) return 0; + if (rioWriteBulkString(r, "HSETEX", 6) == 0) return 0; + if (rioWriteBulkObject(r, key) == 0) return 0; + if (rioWriteBulkString(r, "PXAT", 4) == 0) return 0; + if (rioWriteBulkLongLong(r, expiry) == 0) return 0; + if (rioWriteBulkString(r, "FIELDS", 6) == 0) return 0; + if (rioWriteBulkLongLong(r, 1) == 0) return 0; + if (rioWriteBulkString(r, field, sdslen(field)) == 0) return 0; + if (rioWriteBulkString(r, value, sdslen(value)) == 0) return 0; + } + hashTypeResetIterator(&hi); + } return 1; } diff --git a/src/commands.def b/src/commands.def index 0ae1911cd3..f589bc3c7e 100644 --- a/src/commands.def +++ b/src/commands.def @@ -3522,6 +3522,119 @@ struct COMMAND_ARG HEXISTS_Args[] = { {MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; +/********** HEXPIRE ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HEXPIRE history */ +#define HEXPIRE_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HEXPIRE tips */ +#define HEXPIRE_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HEXPIRE key specs */ +keySpec HEXPIRE_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HEXPIRE condition argument table */ +struct COMMAND_ARG HEXPIRE_condition_Subargs[] = { +{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HEXPIRE fields argument table */ +struct COMMAND_ARG HEXPIRE_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HEXPIRE argument table */ +struct COMMAND_ARG HEXPIRE_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,4,NULL),.subargs=HEXPIRE_condition_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HEXPIRE_fields_Subargs}, +}; + +/********** HEXPIREAT ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HEXPIREAT history */ +#define HEXPIREAT_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HEXPIREAT tips */ +#define HEXPIREAT_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HEXPIREAT key specs */ +keySpec HEXPIREAT_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HEXPIREAT condition argument table */ +struct COMMAND_ARG HEXPIREAT_condition_Subargs[] = { +{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HEXPIREAT fields argument table */ +struct COMMAND_ARG HEXPIREAT_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HEXPIREAT argument table */ +struct COMMAND_ARG HEXPIREAT_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-seconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"9.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=HEXPIREAT_condition_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HEXPIREAT_fields_Subargs}, +}; + +/********** HEXPIRETIME ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HEXPIRETIME history */ +#define HEXPIRETIME_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HEXPIRETIME tips */ +#define HEXPIRETIME_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HEXPIRETIME key specs */ +keySpec HEXPIRETIME_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HEXPIRETIME fields argument table */ +struct COMMAND_ARG HEXPIRETIME_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HEXPIRETIME argument table */ +struct COMMAND_ARG HEXPIRETIME_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HEXPIRETIME_fields_Subargs}, +}; + /********** HGET ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -3573,6 +3686,47 @@ struct COMMAND_ARG HGETALL_Args[] = { {MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; +/********** HGETEX ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HGETEX history */ +#define HGETEX_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HGETEX tips */ +#define HGETEX_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HGETEX key specs */ +keySpec HGETEX_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HGETEX expiration argument table */ +struct COMMAND_ARG HGETEX_expiration_Subargs[] = { +{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,"EX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,"PX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-seconds",ARG_TYPE_UNIX_TIME,-1,"EXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,"PXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("persist",ARG_TYPE_PURE_TOKEN,-1,"PERSIST",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HGETEX fields argument table */ +struct COMMAND_ARG HGETEX_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HGETEX argument table */ +struct COMMAND_ARG HGETEX_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("expiration",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,5,NULL),.subargs=HGETEX_expiration_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HGETEX_fields_Subargs}, +}; + /********** HINCRBY ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -3731,6 +3885,181 @@ struct COMMAND_ARG HMSET_Args[] = { {MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=HMSET_data_Subargs}, }; +/********** HPERSIST ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HPERSIST history */ +#define HPERSIST_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HPERSIST tips */ +#define HPERSIST_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HPERSIST key specs */ +keySpec HPERSIST_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HPERSIST fields argument table */ +struct COMMAND_ARG HPERSIST_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HPERSIST argument table */ +struct COMMAND_ARG HPERSIST_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPERSIST_fields_Subargs}, +}; + +/********** HPEXPIRE ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HPEXPIRE history */ +#define HPEXPIRE_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HPEXPIRE tips */ +#define HPEXPIRE_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HPEXPIRE key specs */ +keySpec HPEXPIRE_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HPEXPIRE condition argument table */ +struct COMMAND_ARG HPEXPIRE_condition_Subargs[] = { +{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HPEXPIRE fields argument table */ +struct COMMAND_ARG HPEXPIRE_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HPEXPIRE argument table */ +struct COMMAND_ARG HPEXPIRE_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"9.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=HPEXPIRE_condition_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPEXPIRE_fields_Subargs}, +}; + +/********** HPEXPIREAT ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HPEXPIREAT history */ +#define HPEXPIREAT_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HPEXPIREAT tips */ +#define HPEXPIREAT_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HPEXPIREAT key specs */ +keySpec HPEXPIREAT_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_UPDATE,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HPEXPIREAT condition argument table */ +struct COMMAND_ARG HPEXPIREAT_condition_Subargs[] = { +{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("gt",ARG_TYPE_PURE_TOKEN,-1,"GT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("lt",ARG_TYPE_PURE_TOKEN,-1,"LT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HPEXPIREAT fields argument table */ +struct COMMAND_ARG HPEXPIREAT_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HPEXPIREAT argument table */ +struct COMMAND_ARG HPEXPIREAT_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_INTEGER,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("condition",ARG_TYPE_ONEOF,-1,NULL,NULL,"9.0.0",CMD_ARG_OPTIONAL,4,NULL),.subargs=HPEXPIREAT_condition_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPEXPIREAT_fields_Subargs}, +}; + +/********** HPEXPIRETIME ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HPEXPIRETIME history */ +#define HPEXPIRETIME_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HPEXPIRETIME tips */ +#define HPEXPIRETIME_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HPEXPIRETIME key specs */ +keySpec HPEXPIRETIME_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HPEXPIRETIME fields argument table */ +struct COMMAND_ARG HPEXPIRETIME_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HPEXPIRETIME argument table */ +struct COMMAND_ARG HPEXPIRETIME_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPEXPIRETIME_fields_Subargs}, +}; + +/********** HPTTL ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HPTTL history */ +#define HPTTL_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HPTTL tips */ +#define HPTTL_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HPTTL key specs */ +keySpec HPTTL_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HPTTL fields argument table */ +struct COMMAND_ARG HPTTL_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HPTTL argument table */ +struct COMMAND_ARG HPTTL_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HPTTL_fields_Subargs}, +}; + /********** HRANDFIELD ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -3827,6 +4156,67 @@ struct COMMAND_ARG HSET_Args[] = { {MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=HSET_data_Subargs}, }; +/********** HSETEX ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HSETEX history */ +#define HSETEX_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HSETEX tips */ +#define HSETEX_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HSETEX key specs */ +keySpec HSETEX_Keyspecs[1] = { +{NULL,CMD_KEY_RW|CMD_KEY_INSERT,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HSETEX key_condition argument table */ +struct COMMAND_ARG HSETEX_key_condition_Subargs[] = { +{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HSETEX fields_condition argument table */ +struct COMMAND_ARG HSETEX_fields_condition_Subargs[] = { +{MAKE_ARG("fnx",ARG_TYPE_PURE_TOKEN,-1,"FNX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fxx",ARG_TYPE_PURE_TOKEN,-1,"FXX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HSETEX expiration argument table */ +struct COMMAND_ARG HSETEX_expiration_Subargs[] = { +{MAKE_ARG("seconds",ARG_TYPE_INTEGER,-1,"EX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("milliseconds",ARG_TYPE_INTEGER,-1,"PX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-seconds",ARG_TYPE_UNIX_TIME,-1,"EXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("unix-time-milliseconds",ARG_TYPE_UNIX_TIME,-1,"PXAT",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("keepttl",ARG_TYPE_PURE_TOKEN,-1,"KEEPTTL",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HSETEX fields data argument table */ +struct COMMAND_ARG HSETEX_fields_data_Subargs[] = { +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("value",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* HSETEX fields argument table */ +struct COMMAND_ARG HSETEX_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("data",ARG_TYPE_BLOCK,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,2,NULL),.subargs=HSETEX_fields_data_Subargs}, +}; + +/* HSETEX argument table */ +struct COMMAND_ARG HSETEX_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("key-condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=HSETEX_key_condition_Subargs}, +{MAKE_ARG("fields-condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=HSETEX_fields_condition_Subargs}, +{MAKE_ARG("expiration",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,5,NULL),.subargs=HSETEX_expiration_Subargs}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HSETEX_fields_Subargs}, +}; + /********** HSETNX ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -3878,6 +4268,37 @@ struct COMMAND_ARG HSTRLEN_Args[] = { {MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, }; +/********** HTTL ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* HTTL history */ +#define HTTL_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* HTTL tips */ +#define HTTL_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* HTTL key specs */ +keySpec HTTL_Keyspecs[1] = { +{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +}; +#endif + +/* HTTL fields argument table */ +struct COMMAND_ARG HTTL_fields_Subargs[] = { +{MAKE_ARG("numfields",ARG_TYPE_INTEGER,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("field",ARG_TYPE_STRING,-1,NULL,NULL,NULL,CMD_ARG_MULTIPLE,0,NULL)}, +}; + +/* HTTL argument table */ +struct COMMAND_ARG HTTL_Args[] = { +{MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HTTL_fields_Subargs}, +}; + /********** HVALS ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -11233,19 +11654,30 @@ struct COMMAND_STRUCT serverCommandTable[] = { /* hash */ {MAKE_CMD("hdel","Deletes one or more fields and their values from a hash. Deletes the hash if no fields remain.","O(N) where N is the number of fields to be removed.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HDEL_History,1,HDEL_Tips,0,hdelCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HDEL_Keyspecs,1,NULL,2),.args=HDEL_Args}, {MAKE_CMD("hexists","Determines whether a field exists in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXISTS_History,0,HEXISTS_Tips,0,hexistsCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXISTS_Keyspecs,1,NULL,2),.args=HEXISTS_Args}, +{MAKE_CMD("hexpire","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","7.2.4",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRE_History,0,HEXPIRE_Tips,0,hexpireCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRE_Keyspecs,1,NULL,4),.args=HEXPIRE_Args}, +{MAKE_CMD("hexpireat","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIREAT_History,0,HEXPIREAT_Tips,0,hexpireAtCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HEXPIREAT_Keyspecs,1,NULL,4),.args=HEXPIREAT_Args}, +{MAKE_CMD("hexpiretime","Returns the Unix timestamp in seconds since the epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,2),.args=HEXPIRETIME_Args}, {MAKE_CMD("hget","Returns the value of a field in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGET_History,0,HGET_Tips,0,hgetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGET_Keyspecs,1,NULL,2),.args=HGET_Args}, {MAKE_CMD("hgetall","Returns all fields and values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETALL_History,0,HGETALL_Tips,1,hgetallCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HGETALL_Keyspecs,1,NULL,1),.args=HGETALL_Args}, +{MAKE_CMD("hgetex","Set the value of one or more fields of a given hash key, and optionally set their expiration time.","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETEX_History,0,HGETEX_Tips,0,hgetexCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGETEX_Keyspecs,1,NULL,3),.args=HGETEX_Args}, {MAKE_CMD("hincrby","Increments the integer value of a field in a hash by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBY_History,0,HINCRBY_Tips,0,hincrbyCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBY_Keyspecs,1,NULL,3),.args=HINCRBY_Args}, {MAKE_CMD("hincrbyfloat","Increments the floating point value of a field by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBYFLOAT_History,0,HINCRBYFLOAT_Tips,0,hincrbyfloatCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBYFLOAT_Keyspecs,1,NULL,3),.args=HINCRBYFLOAT_Args}, {MAKE_CMD("hkeys","Returns all fields in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HKEYS_History,0,HKEYS_Tips,1,hkeysCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HKEYS_Keyspecs,1,NULL,1),.args=HKEYS_Args}, {MAKE_CMD("hlen","Returns the number of fields in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HLEN_History,0,HLEN_Tips,0,hlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HLEN_Keyspecs,1,NULL,1),.args=HLEN_Args}, {MAKE_CMD("hmget","Returns the values of all fields in a hash.","O(N) where N is the number of fields being requested.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HMGET_History,0,HMGET_Tips,0,hmgetCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HMGET_Keyspecs,1,NULL,2),.args=HMGET_Args}, {MAKE_CMD("hmset","Sets the values of multiple fields.","O(N) where N is the number of fields being set.","2.0.0",CMD_DOC_DEPRECATED,"`HSET` with multiple field-value pairs","4.0.0","hash",COMMAND_GROUP_HASH,HMSET_History,0,HMSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HMSET_Keyspecs,1,NULL,2),.args=HMSET_Args}, +{MAKE_CMD("hpersist","Remove the existing expiration on a hash key's field(s).","O(1) for each field assigned with TTL, so O(N) to persist N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPERSIST_History,0,HPERSIST_Tips,0,hpersistCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HPERSIST_Keyspecs,1,NULL,2),.args=HPERSIST_Args}, +{MAKE_CMD("hpexpire","Set expiry time on hash object.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRE_History,0,HPEXPIRE_Tips,0,hpexpireCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRE_Keyspecs,1,NULL,4),.args=HPEXPIRE_Args}, +{MAKE_CMD("hpexpireat","Set expiry time on hash object.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIREAT_History,0,HPEXPIREAT_Tips,0,hpexpireAtCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIREAT_Keyspecs,1,NULL,4),.args=HPEXPIREAT_Args}, +{MAKE_CMD("hpexpiretime","Returns the Unix timestamp in milliseconds since Unix epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRETIME_History,0,HPEXPIRETIME_Tips,0,hpexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRETIME_Keyspecs,1,NULL,2),.args=HPEXPIRETIME_Args}, +{MAKE_CMD("hpttl","Returns the remaining time to live (in milliseconds) of a hash key's field(s) that have an associated expiration.","O(1) for each field assigned with TTL, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPTTL_History,0,HPTTL_Tips,0,hpttlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPTTL_Keyspecs,1,NULL,2),.args=HPTTL_Args}, {MAKE_CMD("hrandfield","Returns one or more random fields from a hash.","O(N) where N is the number of fields returned","6.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HRANDFIELD_History,0,HRANDFIELD_Tips,1,hrandfieldCommand,-2,CMD_READONLY,ACL_CATEGORY_HASH,HRANDFIELD_Keyspecs,1,NULL,2),.args=HRANDFIELD_Args}, {MAKE_CMD("hscan","Iterates over fields and values of a hash.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSCAN_History,0,HSCAN_Tips,1,hscanCommand,-3,CMD_READONLY,ACL_CATEGORY_HASH,HSCAN_Keyspecs,1,NULL,5),.args=HSCAN_Args}, {MAKE_CMD("hset","Creates or modifies the value of a field in a hash.","O(1) for each field/value pair added, so O(N) to add N field/value pairs when the command is called with multiple field/value pairs.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSET_History,1,HSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSET_Keyspecs,1,NULL,2),.args=HSET_Args}, +{MAKE_CMD("hsetex","Set the value of one or more fields of a given hash key, and optionally set their expiration time.","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSETEX_History,0,HSETEX_Tips,0,hsetexCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSETEX_Keyspecs,1,NULL,5),.args=HSETEX_Args}, {MAKE_CMD("hsetnx","Sets the value of a field in a hash only when the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSETNX_History,0,HSETNX_Tips,0,hsetnxCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSETNX_Keyspecs,1,NULL,3),.args=HSETNX_Args}, {MAKE_CMD("hstrlen","Returns the length of the value of a field.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSTRLEN_History,0,HSTRLEN_Tips,0,hstrlenCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HSTRLEN_Keyspecs,1,NULL,2),.args=HSTRLEN_Args}, +{MAKE_CMD("httl","Returns the remaining time to live (in seconds) of a hash key's field(s) that have an associated expiration.","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HTTL_History,0,HTTL_Tips,0,httlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HTTL_Keyspecs,1,NULL,2),.args=HTTL_Args}, {MAKE_CMD("hvals","Returns all values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HVALS_History,0,HVALS_Tips,1,hvalsCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HVALS_Keyspecs,1,NULL,1),.args=HVALS_Args}, /* hyperloglog */ {MAKE_CMD("pfadd","Adds elements to a HyperLogLog key. Creates the key if it doesn't exist.","O(1) to add every element.","2.8.9",CMD_DOC_NONE,NULL,NULL,"hyperloglog",COMMAND_GROUP_HYPERLOGLOG,PFADD_History,0,PFADD_Tips,0,pfaddCommand,-2,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HYPERLOGLOG,PFADD_Keyspecs,1,NULL,2),.args=PFADD_Args}, diff --git a/src/commands/hexpire.json b/src/commands/hexpire.json new file mode 100644 index 0000000000..fa3b6d83ed --- /dev/null +++ b/src/commands/hexpire.json @@ -0,0 +1,119 @@ +{ + "HEXPIRE": { + "summary": "Set expiry time on hash fields.", + "complexity": "O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.", + "group": "hash", + "since": "7.2.4", + "arity": -6, + "function": "hexpireCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of setting expiry on the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the HASH, or HASH is empty.", + "const": -2 + }, + { + "description": "The specified NX | XX | GT | LT condition has not been met.", + "const": 0 + }, + { + "description": "The expiration time was applied.", + "const": 1 + }, + { + "description": "When called with a 0 second", + "const": 2 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "seconds", + "type": "integer" + }, + { + "name": "condition", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "nx", + "type": "pure-token", + "token": "NX" + }, + { + "name": "xx", + "type": "pure-token", + "token": "XX" + }, + { + "name": "gt", + "type": "pure-token", + "token": "GT" + }, + { + "name": "lt", + "type": "pure-token", + "token": "LT" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/hexpireat.json b/src/commands/hexpireat.json new file mode 100644 index 0000000000..9335066b60 --- /dev/null +++ b/src/commands/hexpireat.json @@ -0,0 +1,121 @@ +{ + "HEXPIREAT": { + "summary": "Set expiry time on hash fields.", + "complexity": "O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -6, + "function": "hexpireAtCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of setting expiry on the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the HASH, or HASH is empty.", + "const": -2 + }, + { + "description": "The specified NX | XX | GT | LT condition has not been met.", + "const": 0 + }, + { + "description": "The expiration time was applied.", + "const": 1 + }, + { + "description": "When called with a 0 second or is called with a past Unix time in seconds.", + "const": 2 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "unix-time-seconds", + "type": "integer" + }, + { + "name": "condition", + "type": "oneof", + "optional": true, + "since": "9.0.0", + "arguments": [ + { + "name": "nx", + "type": "pure-token", + "token": "NX" + }, + { + "name": "xx", + "type": "pure-token", + "token": "XX" + }, + { + "name": "gt", + "type": "pure-token", + "token": "GT" + }, + { + "name": "lt", + "type": "pure-token", + "token": "LT" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/hexpiretime.json b/src/commands/hexpiretime.json new file mode 100644 index 0000000000..9229e1a9b4 --- /dev/null +++ b/src/commands/hexpiretime.json @@ -0,0 +1,85 @@ +{ + "HEXPIRETIME": { + "summary": "Returns the Unix timestamp in seconds since the epoch at which the given key's field(s) will expire", + "complexity": "O(1) for each field, so O(N) for N items when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "hexpiretimeCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of getting the absolute expiry timestamp of the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the provided hash key, or the hash key is empty", + "const": -2 + }, + { + "description": "Field exists in the provided hash key, but has no expiration associated with it.", + "const": -1 + }, + { + "description": "The expiration time associated with the hash key field, is seconds.", + "type": "integer", + "minimum": 0 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/hgetex.json b/src/commands/hgetex.json new file mode 100644 index 0000000000..939487498d --- /dev/null +++ b/src/commands/hgetex.json @@ -0,0 +1,110 @@ +{ + "HGETEX": { + "summary": "Set the value of one or more fields of a given hash key, and optionally set their expiration time.", + "complexity": "O(1)", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "hgetexCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the given fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "expiration", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "seconds", + "type": "integer", + "token": "EX" + }, + { + "name": "milliseconds", + "type": "integer", + "token": "PX" + }, + { + "name": "unix-time-seconds", + "type": "unix-time", + "token": "EXAT" + }, + { + "name": "unix-time-milliseconds", + "type": "unix-time", + "token": "PXAT" + }, + { + "name": "persist", + "type": "pure-token", + "token": "PERSIST" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/hpersist.json b/src/commands/hpersist.json new file mode 100644 index 0000000000..06ea3d5d7e --- /dev/null +++ b/src/commands/hpersist.json @@ -0,0 +1,84 @@ +{ + "HPERSIST": { + "summary": "Remove the existing expiration on a hash key's field(s).", + "complexity": "O(1) for each field assigned with TTL, so O(N) to persist N items when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -3, + "function": "hpersistCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of setting expiry on the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the provided hash key, or the hash key is empty", + "const": -2 + }, + { + "description": "Field exists in the provided hash key, but has no expiration associated with it.", + "const": -1 + }, + { + "description": "The expiration time was removed from the hash key field.", + "const": 1 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/hpexpire.json b/src/commands/hpexpire.json new file mode 100644 index 0000000000..9990d8017d --- /dev/null +++ b/src/commands/hpexpire.json @@ -0,0 +1,121 @@ +{ + "HPEXPIRE": { + "summary": "Set expiry time on hash object.", + "complexity": "O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -6, + "function": "hpexpireCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of setting expiry on the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the HASH, or HASH is empty.", + "const": -2 + }, + { + "description": "The specified NX | XX | GT | LT condition has not been met.", + "const": 0 + }, + { + "description": "The expiration time was applied.", + "const": 1 + }, + { + "description": "When called with a 0 millisecond", + "const": 2 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "milliseconds", + "type": "integer" + }, + { + "name": "condition", + "type": "oneof", + "optional": true, + "since": "9.0.0", + "arguments": [ + { + "name": "nx", + "type": "pure-token", + "token": "NX" + }, + { + "name": "xx", + "type": "pure-token", + "token": "XX" + }, + { + "name": "gt", + "type": "pure-token", + "token": "GT" + }, + { + "name": "lt", + "type": "pure-token", + "token": "LT" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/hpexpireat.json b/src/commands/hpexpireat.json new file mode 100644 index 0000000000..29ef515bc9 --- /dev/null +++ b/src/commands/hpexpireat.json @@ -0,0 +1,121 @@ +{ + "HPEXPIREAT": { + "summary": "Set expiry time on hash object.", + "complexity": "O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -6, + "function": "hpexpireAtCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "UPDATE" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of setting expiry on the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the HASH, or HASH is empty.", + "const": -2 + }, + { + "description": "The specified NX | XX | GT | LT condition has not been met.", + "const": 0 + }, + { + "description": "The expiration time was applied.", + "const": 1 + }, + { + "description": "When called with a 0 second or is called with a past Unix time in milliseconds.", + "const": 2 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "unix-time-milliseconds", + "type": "integer" + }, + { + "name": "condition", + "type": "oneof", + "optional": true, + "since": "9.0.0", + "arguments": [ + { + "name": "nx", + "type": "pure-token", + "token": "NX" + }, + { + "name": "xx", + "type": "pure-token", + "token": "XX" + }, + { + "name": "gt", + "type": "pure-token", + "token": "GT" + }, + { + "name": "lt", + "type": "pure-token", + "token": "LT" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/hpexpiretime.json b/src/commands/hpexpiretime.json new file mode 100644 index 0000000000..9df2f45039 --- /dev/null +++ b/src/commands/hpexpiretime.json @@ -0,0 +1,85 @@ +{ + "HPEXPIRETIME": { + "summary": "Returns the Unix timestamp in milliseconds since Unix epoch at which the given key's field(s) will expire", + "complexity": "O(1) for each field, so O(N) for N items when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "hpexpiretimeCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of getting the absolute expiry timestamp of the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the provided hash key, or the hash key is empty", + "const": -2 + }, + { + "description": "Field exists in the provided hash key, but has no expiration associated with it.", + "const": -1 + }, + { + "description": "The expiration time associated with the hash key field, is milliseconds.", + "type": "integer", + "minimum": 0 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/hpttl.json b/src/commands/hpttl.json new file mode 100644 index 0000000000..360a7d8d91 --- /dev/null +++ b/src/commands/hpttl.json @@ -0,0 +1,85 @@ +{ + "HPTTL": { + "summary": "Returns the remaining time to live (in milliseconds) of a hash key's field(s) that have an associated expiration.", + "complexity": "O(1) for each field assigned with TTL, so O(N) for N items when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "hpttlCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of getting the remaining time-to-live of the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the provided hash key, or the hash key is empty", + "const": -2 + }, + { + "description": "Field exists in the provided hash key, but has no expiration associated with it.", + "const": -1 + }, + { + "description": "The expiration time associated with the hash key field, is milliseconds.", + "type": "integer", + "minimum": 0 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/hsetex.json b/src/commands/hsetex.json new file mode 100644 index 0000000000..a444efb6a3 --- /dev/null +++ b/src/commands/hsetex.json @@ -0,0 +1,152 @@ +{ + "HSETEX": { + "summary": "Set the value of one or more fields of a given hash key, and optionally set their expiration time.", + "complexity": "O(1)", + "group": "hash", + "since": "9.0.0", + "arity": -6, + "function": "hsetexCommand", + "command_flags": [ + "WRITE", + "DENYOOM", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RW", + "INSERT" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "oneOf": [ + { + "description": "None of the provided fields value and or expiration time was set.", + "const": 0 + }, + { + "description": "All the fields value and or expiration time was set.", + "const": 1 + } + ] + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "key-condition", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "nx", + "type": "pure-token", + "token": "NX" + }, + { + "name": "xx", + "type": "pure-token", + "token": "XX" + } + ] + }, + { + "name": "fields-condition", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "fnx", + "type": "pure-token", + "token": "FNX" + }, + { + "name": "fxx", + "type": "pure-token", + "token": "FXX" + } + ] + }, + { + "name": "expiration", + "type": "oneof", + "optional": true, + "arguments": [ + { + "name": "seconds", + "type": "integer", + "token": "EX" + }, + { + "name": "milliseconds", + "type": "integer", + "token": "PX" + }, + { + "name": "unix-time-seconds", + "type": "unix-time", + "token": "EXAT" + }, + { + "name": "unix-time-milliseconds", + "type": "unix-time", + "token": "PXAT" + }, + { + "name": "keepttl", + "type": "pure-token", + "token": "KEEPTTL" + } + ] + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "data", + "type": "block", + "multiple": true, + "arguments": [ + { + "name": "field", + "type": "string" + }, + { + "name": "value", + "type": "string" + } + ] + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/commands/httl.json b/src/commands/httl.json new file mode 100644 index 0000000000..d97d21f1d6 --- /dev/null +++ b/src/commands/httl.json @@ -0,0 +1,85 @@ +{ + "HTTL": { + "summary": "Returns the remaining time to live (in seconds) of a hash key's field(s) that have an associated expiration.", + "complexity": "O(1) for each field, so O(N) for N items when the command is called with multiple fields.", + "group": "hash", + "since": "9.0.0", + "arity": -5, + "function": "httlCommand", + "command_flags": [ + "READONLY", + "FAST" + ], + "acl_categories": [ + "HASH" + ], + "key_specs": [ + { + "flags": [ + "RO", + "ACCESS" + ], + "begin_search": { + "index": { + "pos": 1 + } + }, + "find_keys": { + "range": { + "lastkey": 0, + "step": 1, + "limit": 0 + } + } + } + ], + "reply_schema": { + "description": "List of values associated with the result of getting the remaining time-to-live of the specific fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "description": "Field does not exist in the provided hash key, or the hash key is empty", + "const": -2 + }, + { + "description": "Field exists in the provided hash key, but has no expiration associated with it.", + "const": -1 + }, + { + "description": "The expiration time associated with the hash key field, is seconds.", + "type": "integer", + "minimum": 0 + } + ] + } + }, + "arguments": [ + { + "name": "key", + "type": "key", + "key_spec_index": 0 + }, + { + "name": "fields", + "token": "FIELDS", + "type": "block", + "arguments": [ + { + "name": "numfields", + "type": "integer", + "key_spec_index": 0, + "multiple": false, + "minimum": 1 + }, + { + "name": "field", + "type": "string", + "multiple": true + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/db.c b/src/db.c index f685cba82b..53c21d29d5 100644 --- a/src/db.c +++ b/src/db.c @@ -47,13 +47,6 @@ #define EXPIRE_FORCE_DELETE_EXPIRED 1 #define EXPIRE_AVOID_DELETE_EXPIRED 2 -/* Return values for expireIfNeeded */ -typedef enum { - KEY_VALID = 0, /* Could be volatile and not yet expired, non-volatile, or even non-existing key. */ - KEY_EXPIRED, /* Logically expired but not yet deleted. */ - KEY_DELETED /* The key was deleted now. */ -} keyStatus; - static keyStatus expireIfNeededWithDictIndex(serverDb *db, robj *key, robj *val, int flags, int dict_index); static keyStatus expireIfNeeded(serverDb *db, robj *key, robj *val, int flags); static int keyIsExpiredWithDictIndex(serverDb *db, robj *key, int dict_index); @@ -1004,9 +997,9 @@ void hashtableScanCallback(void *privdata, void *entry) { key = node->ele; /* zset data is copied after filtering by key */ } else if (o->type == OBJ_HASH) { - key = hashTypeEntryGetField(entry); + key = entryGetField(entry); if (!data->only_keys) { - val = hashTypeEntryGetValue(entry); + val = entryGetValue(entry); } } else { serverPanic("Type not handled in hashtable SCAN callback."); @@ -1892,7 +1885,7 @@ void propagateDeletion(serverDb *db, robj *key, int lazy) { } /* Returns 1 if the expire value is expired, 0 otherwise. */ -static int timestampIsExpired(mstime_t when) { +int timestampIsExpired(mstime_t when) { if (when < 0) return 0; /* no expire */ mstime_t now = commandTimeSnapshot(); @@ -1901,6 +1894,59 @@ static int timestampIsExpired(mstime_t when) { return now > when; } +/* This function verify if the current conditions allow expiration of keys and fields. + * For some cases expiration is not allowed, but we would still like to ignore the key + * so to treat it as "expired" without actively deleting it. */ +expirationPolicy getExpirationPolicyWithFlags(int flags) { + if (server.loading) return POLICY_IGNORE_EXPIRE; + + /* If we are running in the context of a replica, instead of + * evicting the expired key from the database, we return ASAP: + * the replica key expiration is controlled by the primary that will + * send us synthesized DEL operations for expired keys. The + * exception is when write operations are performed on writable + * replicas. + * + * Still we try to reflect the correct state to the caller, + * that is, POLICY_KEEP_EXPIRED so that the key will be ignored, but not deleted. + * + * When replicating commands from the primary, keys are never considered + * expired, so we return POLICY_IGNORE_EXPIRE */ + if (server.primary_host != NULL) { + if (server.current_client && (server.current_client->flag.primary)) return POLICY_IGNORE_EXPIRE; + if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return POLICY_KEEP_EXPIRED; + } else if (server.import_mode) { + /* If we are running in the import mode on a primary, instead of + * evicting the expired key from the database, we return ASAP: + * the key expiration is controlled by the import source that will + * send us synthesized DEL operations for expired keys. The + * exception is when write operations are performed on this server + * because it's a primary. + * + * Notice: other clients, apart from the import source, should not access + * the data imported by import source. + * + * Still we try to reflect the correct state to the caller, + * that is, POLICY_KEEP_EXPIRED so that the key will be ignored, but not deleted. + * + * When receiving commands from the import source, keys are never considered + * expired, so we return POLICY_IGNORE_EXPIRE */ + if (server.current_client && (server.current_client->flag.import_source)) return POLICY_IGNORE_EXPIRE; + if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return POLICY_KEEP_EXPIRED; + } + + /* In some cases we're explicitly instructed to return an indication of a + * missing key without actually deleting it, even on primaries. */ + if (flags & EXPIRE_AVOID_DELETE_EXPIRED) return POLICY_KEEP_EXPIRED; + + /* If 'expire' action is paused, for whatever reason, then don't expire any key. + * Typically, at the end of the pause we will properly expire the key OR we + * will have failed over and the new primary will send us the expire. */ + if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return POLICY_KEEP_EXPIRED; + + return POLICY_DELETE_EXPIRED; +} + /* Use this instead of keyIsExpired if you already have the value object. */ static int objectIsExpired(robj *val) { /* Don't expire anything while loading. It will be done later. */ @@ -1944,52 +1990,11 @@ static keyStatus expireIfNeededWithDictIndex(serverDb *db, robj *key, robj *val, } else { if (!keyIsExpiredWithDictIndexImpl(db, key, dict_index)) return KEY_VALID; } - - /* If we are running in the context of a replica, instead of - * evicting the expired key from the database, we return ASAP: - * the replica key expiration is controlled by the primary that will - * send us synthesized DEL operations for expired keys. The - * exception is when write operations are performed on writable - * replicas. - * - * Still we try to return the right information to the caller, - * that is, KEY_VALID if we think the key should still be valid, - * KEY_EXPIRED if we think the key is expired but don't want to delete it at this time. - * - * When replicating commands from the primary, keys are never considered - * expired. */ - if (server.primary_host != NULL) { - if (server.current_client && (server.current_client->flag.primary)) return KEY_VALID; - if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return KEY_EXPIRED; - } else if (server.import_mode) { - /* If we are running in the import mode on a primary, instead of - * evicting the expired key from the database, we return ASAP: - * the key expiration is controlled by the import source that will - * send us synthesized DEL operations for expired keys. The - * exception is when write operations are performed on this server - * because it's a primary. - * - * Notice: other clients, apart from the import source, should not access - * the data imported by import source. - * - * Still we try to return the right information to the caller, - * that is, KEY_VALID if we think the key should still be valid, - * KEY_EXPIRED if we think the key is expired but don't want to delete it at this time. - * - * When receiving commands from the import source, keys are never considered - * expired. */ - if (server.current_client && (server.current_client->flag.import_source)) return KEY_VALID; - if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return KEY_EXPIRED; - } - - /* In some cases we're explicitly instructed to return an indication of a - * missing key without actually deleting it, even on primaries. */ - if (flags & EXPIRE_AVOID_DELETE_EXPIRED) return KEY_EXPIRED; - - /* If 'expire' action is paused, for whatever reason, then don't expire any key. - * Typically, at the end of the pause we will properly expire the key OR we - * will have failed over and the new primary will send us the expire. */ - if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return KEY_EXPIRED; + expirationPolicy policy = getExpirationPolicyWithFlags(flags); + if (policy == POLICY_IGNORE_EXPIRE) /* Ignore keys expiration. treat all keys as valid. */ + return KEY_VALID; + else if (policy == POLICY_KEEP_EXPIRED) /* Treat expired keys as invalid, but do not delete them. */ + return KEY_EXPIRED; /* The key needs to be converted from static to heap before deleted */ int static_key = key->refcount == OBJ_STATIC_REFCOUNT; diff --git a/src/defrag.c b/src/defrag.c index 822b49e22e..b2b8aa1121 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -442,18 +442,18 @@ static void scanLaterSet(robj *ob, unsigned long *cursor) { } /* Hashtable scan callback for hash datatype */ -static void activeDefragHashTypeEntry(void *privdata, void *element_ref) { +static void activeDefragEntry(void *privdata, void *element_ref) { UNUSED(privdata); - hashTypeEntry **entry_ref = (hashTypeEntry **)element_ref; + entry **entry_ref = (entry **)element_ref; - hashTypeEntry *new_entry = hashTypeEntryDefrag(*entry_ref, activeDefragAlloc, activeDefragSds); + entry *new_entry = entryDefrag(*entry_ref, activeDefragAlloc, activeDefragSds); if (new_entry) *entry_ref = new_entry; } static void scanLaterHash(robj *ob, unsigned long *cursor) { serverAssert(ob->type == OBJ_HASH && ob->encoding == OBJ_ENCODING_HASHTABLE); hashtable *ht = ob->ptr; - *cursor = hashtableScanDefrag(ht, *cursor, activeDefragHashTypeEntry, NULL, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); + *cursor = hashtableScanDefrag(ht, *cursor, activeDefragEntry, NULL, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); } static void defragQuicklist(robj *ob) { @@ -498,7 +498,7 @@ static void defragHash(robj *ob) { } else { unsigned long cursor = 0; do { - cursor = hashtableScanDefrag(ht, cursor, activeDefragHashTypeEntry, NULL, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); + cursor = hashtableScanDefrag(ht, cursor, activeDefragEntry, NULL, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); } while (cursor != 0); } /* defrag the hashtable struct and tables */ diff --git a/src/entry.c b/src/entry.c new file mode 100644 index 0000000000..72d9b0a6f9 --- /dev/null +++ b/src/entry.c @@ -0,0 +1,364 @@ +#include +#include "server.h" +#include "serverassert.h" +#include "entry.h" + +#include + +/*----------------------------------------------------------------------------- + * Entry API + *----------------------------------------------------------------------------*/ + +/* The entry pointer is the field sds. We encode the entry layout type + * in the field SDS header. Field type SDS_TYPE_5 doesn't have any spare bits to + * encode this so we use it only for the first layout type. + * + * Entry with embedded value, used for small sizes. The value is stored as + * SDS_TYPE_8. The field can use any SDS type. + * + * Entry can also have expiration timestamp, which is the UNIX timestamp for it to be expired. + * For aligned fast access, we keep the expiry timestamp prior to the start of the sds header. + * + * +--------------+--------------+---------------+ + * | Expiration | field | value | + * | 1234567890LL | hdr "foo" \0 | hdr8 "bar" \0 | + * +--------------+------^-------+---------------+ + * | + * | + * entry pointer = field sds + * + * Entry with value pointer, used for larger fields and values. The field is SDS + * type 8 or higher. + * + * +--------------+-------+--------------+ + * | Expiration | value | field | + * | 1234567890LL | ptr | hdr "foo" \0 | + * +--------------+---^---+--------------+ + * | + * | + * entry pointer = field sds + */ + +/* The maximum allocation size we want to use for entries with embedded + * values. */ +#define EMBED_VALUE_MAX_ALLOC_SIZE 128 + +/* SDS aux flag. If set, it indicates that the entry has TTL metadata set. */ +#define FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY 0 + +/* SDS aux flag. If set, it indicates that the entry has an embedded value + * pointer located in memory before the embedded field. If unset, the entry + * instead has an embedded value located after the embedded field. */ +#define FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR 2 + +/* Returns true in case the entry's value is not embedded in the entry. + * Returns false otherwise. */ +bool entryHasValuePtr(const entry *entry) { + return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR); +} + +/* Returns true in case the entry has expiration timestamp. + * Returns false otherwise. */ +bool entryHasExpiry(const entry *entry) { + return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY); +} + +/* Returns the location of a pointer to a separately allocated value. Only for + * an entry without an embedded value. */ +sds *entryGetValueRef(const entry *entry) { + serverAssert(entryHasValuePtr(entry)); + char *field_data = sdsAllocPtr(entry); + field_data -= sizeof(sds *); + return (sds *)field_data; +} + +/* The entry pointer is the field sds, but that's an implementation detail. */ +sds entryGetField(const entry *entry) { + return (sds)entry; +} + +/* Returns the sds of the entry's value. */ +sds entryGetValue(const entry *entry) { + if (entryHasValuePtr(entry)) { + return *entryGetValueRef(entry); + } else { + /* Skip field content, field null terminator and value sds8 hdr. */ + size_t offset = sdslen(entry) + 1 + sdsHdrSize(SDS_TYPE_8); + serverAssert((char *)entry + offset); + + return (char *)entry + offset; + } +} + +/* Modify the value of this entry and return a pointer to the (potentially new) entry. + * The value is taken by the function and cannot be reused after this function returns. */ +entry *entrySetValue(entry *e, sds value) { + if (entryHasValuePtr(e)) { + sds *value_ref = entryGetValueRef(e); + sdsfree(*value_ref); + *value_ref = value; + return e; + } else { + entry *new_entry = entryUpdate(e, value, entryGetExpiry(e)); + return new_entry; + } +} + +/* Returns the address of the entry allocation. */ +void *entryAllocPtr(const entry *entry) { + char *buf = sdsAllocPtr(entry); + if (entryHasValuePtr(entry)) buf -= sizeof(sds *); + if (entryHasExpiry(entry)) buf -= sizeof(long long); + return buf; +} + +/**************************************** Entry Expiry API *****************************************/ + +/* Returns the entry expiration timestamp. + * In case this entry has no expiration time, will return EXPIRE_NONE. */ +long long entryGetExpiry(const entry *entry) { + long long expiry = EXPIRY_NONE; + if (entryHasExpiry(entry)) { + char *buf = sdsAllocPtr(entry); + if (entryHasValuePtr(entry)) buf -= sizeof(sds *); + buf -= sizeof(expiry); + expiry = *(long long *)buf; + } + return expiry; +} + +/* Modify the expiration time of this entry and return a pointer to the (potentially new) entry. */ +entry *entrySetExpiry(entry *e, long long expiry) { + if (entryHasExpiry(e)) { + char *buf = sdsAllocPtr(e); + if (entryHasValuePtr(e)) buf -= sizeof(sds *); + buf -= sizeof(expiry); + memcpy(buf, &expiry, sizeof(expiry)); + return e; + } + entry *new_entry = entryUpdate(e, NULL, expiry); + return new_entry; +} + +/* Return true in case the entry has assigned expiration or false otherwise. */ +int entryIsExpired(entry *entry) { + /* Don't expire anything while loading. It will be done later. */ + if (server.loading) return 0; + if (!timestampIsExpired(entryGetExpiry(entry))) return 0; + if (server.primary_host == NULL && server.import_mode) { + if (server.current_client && server.current_client->flag.import_source) return 0; + } + return 1; +} +/**************************************** Entry Expiry API - End *****************************************/ + +void entryFree(entry *entry) { + if (entryHasValuePtr(entry)) { + sdsfree(*entryGetValueRef(entry)); + } + zfree(entryAllocPtr(entry)); +} + +/* Takes ownership of value, does not take ownership of field */ +entry *entryCreate(sds field, sds value, long long expiry) { + /* In case simple sds just return the same field we got. */ + if (!value && expiry == EXPIRY_NONE) + return field; + sds embedded_field_sds; + size_t expiry_size = (expiry == EXPIRY_NONE) ? 0 : sizeof(long long); + size_t field_len = sdslen(field); + int field_sds_type = sdsReqType(field_len); + if (field_sds_type == SDS_TYPE_5 && (expiry_size > 0)) { + field_sds_type = SDS_TYPE_8; + } + size_t field_size = sdsReqSize(field_len, field_sds_type); + size_t value_len = value ? sdslen(value) : 0; + size_t embedded_value_size = value ? sdsReqSize(value_len, SDS_TYPE_8) : 0; + size_t alloc_size = field_size + expiry_size; + bool embed_value = false; + if (value) { + if (alloc_size + embedded_value_size <= EMBED_VALUE_MAX_ALLOC_SIZE) { + /* Embed field and value. Value is fixed to SDS_TYPE_8. Unused + * allocation space is recorded in the embedded value's SDS header. + * + * +------+--------------+---------------+ + * | TTL | field | value | + * | | hdr "foo" \0 | hdr8 "bar" \0 | + * +------+--------------+---------------+ + */ + embed_value = true; + alloc_size += embedded_value_size; + } else { + /* Embed field, but not value. Field must be >= SDS_TYPE_8 to encode to + * indicate this type of entry. + * + * +------+-------+---------------+ + * | TTL | value | field | + * | | ptr | hdr8 "foo" \0 | + * +------+-------+---------------+ + */ + embed_value = false; + alloc_size += sizeof(sds *); + if (field_sds_type == SDS_TYPE_5) { + field_sds_type = SDS_TYPE_8; + alloc_size -= field_size; + field_size = sdsReqSize(field_len, field_sds_type); + alloc_size += field_size; + } + } + } + /* allocate the buffer */ + size_t buf_size; + char *buf = zmalloc_usable(alloc_size, &buf_size); + + /* Set The expiry if exists */ + if (expiry_size) { + memcpy(buf, &expiry, expiry_size); + buf += expiry_size; + buf_size -= expiry_size; + } + if (value) { + if (!embed_value) { + *(sds *)buf = value; + buf += sizeof(sds *); + buf_size -= sizeof(sds *); + } else { + sdswrite(buf + field_size, buf_size - field_size, SDS_TYPE_8, value, value_len); + sdsfree(value); + buf_size -= embedded_value_size; + } + } + /* Set the field data */ + embedded_field_sds = sdswrite(buf, field_size, field_sds_type, field, field_len); + + /* Field sds aux bits are zero, which we use for this entry encoding. */ + sdsSetAuxBit(embedded_field_sds, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, embed_value ? 0 : 1); + sdsSetAuxBit(embedded_field_sds, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY, expiry_size > 0 ? 1 : 0); + return (void *)embedded_field_sds; +} + +/* Modify the entry's value and/or expiration time. + * In case the provided value is NULL, will use the existing value. */ +entry *entryUpdate(entry *e, sds value, long long expiry) { + sds field = (sds)e; + + bool update_value = value ? true : false; + long long ttl = entryGetExpiry(e); + bool update_expiry = (expiry != ttl) ? true : false; + if (!update_value && !update_expiry) + return e; + ttl = expiry; + value = update_value ? value : entryGetValue(e); + size_t expiry_size = ttl != EXPIRY_NONE ? sizeof(ttl) : 0; + int field_sds_type = sdsReqType(sdslen(field)); + if (field_sds_type == SDS_TYPE_5 && (expiry_size > 0)) { + field_sds_type = SDS_TYPE_8; + } + size_t field_size = sdsHdrSize(field_sds_type) + sdsalloc(field) + 1; + size_t value_len = value ? sdslen(value) : 0; + size_t embedded_value_size = value ? sdsReqSize(value_len, SDS_TYPE_8) : 0; + + size_t required_embedded_size = field_size + embedded_value_size + expiry_size; + size_t current_embedded_allocation_size = entryHasValuePtr(e) ? 0 : entryMemUsage(e); + /* // We will create a new entry in the following cases: + * 1. In the case were we add or remove expiration. + * 2. in the case were we are NOT migrating from an embedded entry to an embedded entry with ~the same size. */ + bool create_new_entry = (update_expiry && (entryGetExpiry(e) == EXPIRY_NONE || ttl == EXPIRY_NONE)) || + !(update_value && !entryHasValuePtr(e) && + required_embedded_size <= EMBED_VALUE_MAX_ALLOC_SIZE && + required_embedded_size <= current_embedded_allocation_size && + required_embedded_size >= current_embedded_allocation_size * 3 / 4); + + if (!create_new_entry) { + /* In this case we are sure we do not have to allocate new entry, so expiry must already be set. */ + if (update_expiry) { + serverAssert(entryHasExpiry(e)); + char *buf = sdsAllocPtr(e); + if (entryHasValuePtr(e)) buf -= sizeof(sds *); + buf -= sizeof(expiry); + memcpy(buf, &expiry, sizeof(expiry)); + } + /* In this case we are sure we do not have to allocate new entry, so value must already be set or we have enough room to embed it. */ + if (update_value) { + if (entryHasValuePtr(e)) { + sds *value_ref = entryGetValueRef(e); + sdsfree(*value_ref); + *value_ref = value; + } else { + /* Skip field content, field null terminator and value sds8 hdr. */ + sds old_value = entryGetValue(e); + sdswrite(sdsAllocPtr(old_value), sdsAllocSize(old_value), SDS_TYPE_8, value, sdslen(value)); + sdsfree(value); + } + } + return e; + + } else { + if (!update_value) { + /* Check if the value can be reused. */ + int value_was_embedded = !entryHasValuePtr(e); + /* In case the original entry value is embedded WE WILL HAVE TO DUPLICATE IT */ + if (value_was_embedded) + value = sdsdup(value); + /* if not we have to duplicate it, remove it from the original entry since we are going to delete it.*/ + else { + sds *value_ref = entryGetValueRef(e); + *value_ref = NULL; + } + } + } + + entry *new_entry = entryCreate(entryGetField(e), value, ttl); + if (new_entry != e) + entryFree(e); + return new_entry; +} + +/* Returns memory usage of a entry, including all allocations owned by + * the entry. */ +size_t entryMemUsage(entry *entry) { + size_t mem = 0; + + if (entryHasValuePtr(entry)) { + /* In case the value is not embedded we might not be able to sum all the allocation sizes since the field + * header could be too small for holding the real allocation size. */ + mem += zmalloc_usable_size(entryAllocPtr(entry)); + } else { + mem += sdsReqSize(sdslen(entry), sdsType(entry)); + if (entryHasExpiry(entry)) mem += sizeof(long long); + } + mem += sdsAllocSize(entryGetValue(entry)); + return mem; +} + +/* Defragments a hashtable entry (field-value pair) if needed, using the + * provided defrag functions. The defrag functions return NULL if the allocation + * was not moved, otherwise they return a pointer to the new memory location. + * A separate sds defrag function is needed because of the unique memory layout + * of sds strings. + * If the location of the entry changed we return the new location, + * otherwise we return NULL. */ +entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)) { + if (entryHasValuePtr(entry)) { + sds *value_ref = entryGetValueRef(entry); + sds new_value = sdsdefragfn(*value_ref); + if (new_value) *value_ref = new_value; + } + char *allocation = entryAllocPtr(entry); + char *new_allocation = defragfn(allocation); + if (new_allocation != NULL) { + /* Return the same offset into the new allocation as the entry's offset + * in the old allocation. */ + return new_allocation + ((char *)entry - allocation); + } + return NULL; +} + +/* Used for releasing memory to OS to avoid unnecessary CoW. Called when we've + * forked and memory won't be used again. See zmadvise_dontneed() */ +void dismissEntry(entry *entry) { + /* Only dismiss values memory since the field size usually is small. */ + if (entryHasValuePtr(entry)) { + dismissSds(*entryGetValueRef(entry)); + } +} diff --git a/src/entry.h b/src/entry.h new file mode 100644 index 0000000000..edaea1cb51 --- /dev/null +++ b/src/entry.h @@ -0,0 +1,25 @@ +#ifndef _ENTRY_H_ +#define _ENTRY_H_ + +#include "sds.h" +#include + +typedef void entry; + +sds *entryGetValueRef(const entry *entry); +sds entryGetField(const entry *entry); +sds entryGetValue(const entry *entry); +entry *entrySetValue(entry *entry, sds value); +long long entryGetExpiry(const entry *entry); +bool entryHasExpiry(const entry *entry); +entry *entrySetExpiry(entry *entry, long long expiry); +int entryIsExpired(entry *entry); + +void entryFree(entry *entry); +entry *entryCreate(sds field, sds value, long long expiry); +entry *entryUpdate(entry *entry, sds value, long long expiry); +size_t entryMemUsage(entry *entry); +entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)); +void dismissEntry(entry *entry); + +#endif diff --git a/src/expire.c b/src/expire.c index 3a332f5563..60d67ad9e4 100644 --- a/src/expire.c +++ b/src/expire.c @@ -537,23 +537,19 @@ int checkAlreadyExpired(long long when) { return (when <= commandTimeSnapshot() && !server.loading && !server.primary_host && !server.import_mode); } -#define EXPIRE_NX (1 << 0) -#define EXPIRE_XX (1 << 1) -#define EXPIRE_GT (1 << 2) -#define EXPIRE_LT (1 << 3) - -/* Parse additional flags of expire commands +/* Parse additional flags of expire commands up to the specify max_index. + * In case max_index will scan all arguments. * * Supported flags: * - NX: set expiry only when the key has no expiry * - XX: set expiry only when the key has an existing expiry * - GT: set expiry only when the new expiry is greater than current one * - LT: set expiry only when the new expiry is less than current one */ -int parseExtendedExpireArgumentsOrReply(client *c, int *flags) { +int parseExtendedExpireArgumentsOrReply(client *c, int *flags, int max_args) { int nx = 0, xx = 0, gt = 0, lt = 0; int j = 3; - while (j < c->argc) { + while (j < max_args) { char *opt = c->argv[j]->ptr; if (!strcasecmp(opt, "nx")) { *flags |= EXPIRE_NX; @@ -587,6 +583,31 @@ int parseExtendedExpireArgumentsOrReply(client *c, int *flags) { return C_OK; } +int convertExpireArgumentToUnixTime(client *c, robj *arg, long long basetime, int unit, long long *unixtime) { + long long when; + if (getLongLongFromObjectOrReply(c, arg, &when, NULL) != C_OK) return C_ERR; + + if (when < 0) { + addReplyErrorExpireTime(c); + return C_ERR; + } + + if (unit == UNIT_SECONDS) { + if (when > LLONG_MAX / 1000 || when < LLONG_MIN / 1000) { + addReplyErrorExpireTime(c); + return C_ERR; + } + when *= 1000; + } + if (when > LLONG_MAX - basetime) { + addReplyErrorExpireTime(c); + return C_ERR; + } + when += basetime; + if (unixtime) *unixtime = when; + return C_OK; +} + /*----------------------------------------------------------------------------- * Expires Commands *----------------------------------------------------------------------------*/ @@ -607,7 +628,7 @@ void expireGenericCommand(client *c, long long basetime, int unit) { int flag = 0; /* checking optional flags */ - if (parseExtendedExpireArgumentsOrReply(c, &flag) != C_OK) { + if (parseExtendedExpireArgumentsOrReply(c, &flag, c->argc) != C_OK) { return; } diff --git a/src/hashtable.c b/src/hashtable.c index e3b9ee4b3f..27c610afa2 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -366,6 +366,12 @@ typedef struct { void **entries; /* Array of sampled entries. */ } scan_samples; +/* --- Access API --- */ +static inline hashtableEntryValidationState validateElementIfNeeded(hashtable *ht, void *elem) { + if (ht->type->validateEntry == NULL) return ENTRY_VALID; + return ht->type->validateEntry(ht, elem); +} + /* --- Internal functions --- */ static bucket *findBucketForInsert(hashtable *ht, uint64_t hash, int *pos_in_bucket, int *table_index); @@ -685,6 +691,9 @@ static inline int checkCandidateInBucket(hashtable *ht, bucket *b, int pos, cons if (compareKeys(ht, key, elem_key) == 0) { /* It's a match. */ assert(pos_in_bucket != NULL); + if (validateElementIfNeeded(ht, entry) != ENTRY_VALID) { + return 0; + } *pos_in_bucket = pos; if (table_index) *table_index = table; return 1; @@ -1127,6 +1136,15 @@ hashtableType *hashtableGetType(hashtable *ht) { return ht->type; } +/* Set the hashtable type and returns the old type of the hashtable. + * NOTE that changing the hashtable type can lead to unexpected results. + * For example, changing the hash function can impact the ability to correctly fetch elements. */ +hashtableType *hashtableSetType(hashtable *ht, hashtableType *type) { + hashtableType *oldtype = ht->type; + ht->type = type; + return oldtype; +} + /* Returns a pointer to the table's metadata (userdata) section. */ void *hashtableMetadata(hashtable *ht) { return &ht->metadata; @@ -1765,7 +1783,7 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f if (b->presence != 0) { int pos; for (pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { - if (isPositionFilled(b, pos)) { + if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos]) == ENTRY_VALID) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; fn(privdata, emit); } @@ -1802,7 +1820,7 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f do { if (b->presence) { for (int pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { - if (isPositionFilled(b, pos)) { + if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos]) == ENTRY_VALID) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; fn(privdata, emit); } @@ -1832,7 +1850,7 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f do { if (b->presence) { for (int pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { - if (isPositionFilled(b, pos)) { + if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos]) == ENTRY_VALID) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; fn(privdata, emit); } @@ -2021,6 +2039,9 @@ int hashtableNext(hashtableIterator *iterator, void **elemptr) { /* No entry here. */ continue; } + if (!(iter->flags & HASHTABLE_ITER_SKIP_VALIDATION) && validateElementIfNeeded(iter->hashtable, b->entries[iter->pos_in_bucket]) != ENTRY_VALID) { + continue; + } /* Return the entry at this position. */ if (elemptr) { *elemptr = b->entries[iter->pos_in_bucket]; diff --git a/src/hashtable.h b/src/hashtable.h index 48a7077b72..d0c6a9d81e 100644 --- a/src/hashtable.h +++ b/src/hashtable.h @@ -42,6 +42,11 @@ typedef uint64_t hashtableIterator[5]; typedef uint64_t hashtablePosition[2]; typedef uint64_t hashtableIncrementalFindState[5]; +typedef enum { + ENTRY_VALID = 0, + ENTRY_INVALID +} hashtableEntryValidationState; + /* --- Non-opaque types --- */ /* The hashtableType is a set of callbacks for a hashtable. All callbacks are @@ -57,6 +62,8 @@ typedef struct { /* Compare function, returns 0 if the keys are equal. Defaults to just * comparing the pointers for equality. */ int (*keyCompare)(const void *key1, const void *key2); + /* Check for entry access is valid or not. Invalid access will just treat the entry as not-exist. */ + hashtableEntryValidationState (*validateEntry)(hashtable *ht, void *entry); /* Callback to free an entry when it's overwritten or deleted. * Optional. */ void (*entryDestructor)(void *entry); @@ -77,6 +84,7 @@ typedef struct { size_t (*getMetadataSize)(void); /* Flag to disable incremental rehashing */ unsigned instant_rehashing : 1; + } hashtableType; typedef enum { @@ -96,6 +104,7 @@ typedef void (*hashtableScanFunction)(void *privdata, void *entry); /* Iterator flags */ #define HASHTABLE_ITER_SAFE (1 << 0) #define HASHTABLE_ITER_PREFETCH_VALUES (1 << 1) +#define HASHTABLE_ITER_SKIP_VALIDATION (1 << 2) /* --- Prototypes --- */ @@ -113,6 +122,7 @@ hashtable *hashtableCreate(hashtableType *type); void hashtableRelease(hashtable *ht); void hashtableEmpty(hashtable *ht, void(callback)(hashtable *)); hashtableType *hashtableGetType(hashtable *ht); +hashtableType *hashtableSetType(hashtable *ht, hashtableType *type); void *hashtableMetadata(hashtable *ht); size_t hashtableSize(const hashtable *ht); size_t hashtableBuckets(hashtable *ht); diff --git a/src/module.c b/src/module.c index 3569ae8214..63ac8590ce 100644 --- a/src/module.c +++ b/src/module.c @@ -5350,11 +5350,11 @@ int VM_HashSet(ValkeyModuleKey *key, int flags, ...) { /* If CFIELDS is active, we can pass the ownership of the * SDS object to the low level function that sets the field * to avoid a useless copy. */ - if (flags & VALKEYMODULE_HASH_CFIELDS) low_flags |= HASH_SET_TAKE_FIELD; + if (flags & VALKEYMODULE_HASH_CFIELDS) low_flags |= (HASH_SET_TAKE_FIELD); robj *argv[2] = {field, value}; hashTypeTryConversion(key->value, argv, 0, 1); - int updated = hashTypeSet(key->value, field->ptr, value->ptr, low_flags); + int updated = hashTypeSet(key->value, field->ptr, value->ptr, EXPIRY_NONE, low_flags); count += (flags & VALKEYMODULE_HASH_COUNT_ALL) ? 1 : updated; /* If CFIELDS is active, SDS string ownership is now of hashTypeSet(), @@ -11224,8 +11224,8 @@ static void moduleScanKeyHashtableCallback(void *privdata, void *entry) { key = node->ele; value = createStringObjectFromLongDouble(node->score, 0); } else if (o->type == OBJ_HASH) { - key = hashTypeEntryGetField(entry); - sds val = hashTypeEntryGetValue(entry); + key = entryGetField(entry); + sds val = entryGetValue(entry); value = createStringObject(val, sdslen(val)); } else { serverPanic("unexpected object type"); diff --git a/src/object.c b/src/object.c index 36715429b5..2ef0a85ae7 100644 --- a/src/object.c +++ b/src/object.c @@ -525,7 +525,10 @@ void freeZsetObject(robj *o) { void freeHashObject(robj *o) { switch (o->encoding) { - case OBJ_ENCODING_HASHTABLE: hashtableRelease((hashtable *)o->ptr); break; + case OBJ_ENCODING_HASHTABLE: + hashTypeFreeVolatileSet(o); + hashtableRelease((hashtable *)o->ptr); + break; case OBJ_ENCODING_LISTPACK: lpFree(o->ptr); break; default: serverPanic("Unknown hash encoding type"); break; } @@ -680,7 +683,7 @@ void dismissHashObject(robj *o, size_t size_hint) { hashtableInitIterator(&iter, ht, 0); void *next; while (hashtableNext(&iter, &next)) { - dismissHashTypeEntry(next); + dismissEntry(next); } hashtableResetIterator(&iter); } @@ -1201,7 +1204,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { asize = sizeof(*o) + hashtableMemUsage(ht); while (hashtableNext(&iter, &next) && samples < sample_size) { - elesize += hashTypeEntryMemUsage(next); + elesize += entryMemUsage(next); samples++; } hashtableResetIterator(&iter); diff --git a/src/rdb.c b/src/rdb.c index e8bf2e25bd..08d1ada6e1 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -32,6 +32,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ +#include "hashtable.h" #include "server.h" #include "lzf.h" /* LZF compression library */ #include "zipmap.h" @@ -717,7 +718,10 @@ int rdbSaveObjectType(rio *rdb, robj *o) { if (o->encoding == OBJ_ENCODING_LISTPACK) return rdbSaveType(rdb, RDB_TYPE_HASH_LISTPACK); else if (o->encoding == OBJ_ENCODING_HASHTABLE) - return rdbSaveType(rdb, RDB_TYPE_HASH); + if (hashTypeHasVolatileElements(o)) + return rdbSaveType(rdb, RDB_TYPE_HASH_2); + else + return rdbSaveType(rdb, RDB_TYPE_HASH); else serverPanic("Unknown hash encoding"); case OBJ_STREAM: return rdbSaveType(rdb, RDB_TYPE_STREAM_LISTPACKS_3); @@ -840,7 +844,6 @@ size_t rdbSaveStreamConsumers(rio *rdb, streamCG *cg) { * Returns -1 on error, number of bytes written on success. */ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { ssize_t n = 0, nwritten = 0; - if (o->type == OBJ_STRING) { /* Save a string value */ if ((n = rdbSaveStringObject(rdb, o)) == -1) return -1; @@ -963,13 +966,14 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { return -1; } nwritten += n; - + /* check if need to add expired time for the hash elements */ + int add_expiry = hashTypeHasVolatileElements(o); hashtableIterator iter; - hashtableInitIterator(&iter, ht, 0); + hashtableInitIterator(&iter, ht, HASHTABLE_ITER_SKIP_VALIDATION); void *next; while (hashtableNext(&iter, &next)) { - sds field = hashTypeEntryGetField(next); - sds value = hashTypeEntryGetValue(next); + sds field = entryGetField(next); + sds value = entryGetValue(next); if ((n = rdbSaveRawString(rdb, (unsigned char *)field, sdslen(field))) == -1) { hashtableResetIterator(&iter); @@ -981,8 +985,17 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { return -1; } nwritten += n; + if (add_expiry) { + long long expiry = entryGetExpiry(next); + if ((n = rdbSaveMillisecondTime(rdb, expiry) == -1)) { + hashtableResetIterator(&iter); + return -1; + } + nwritten += n; + } } hashtableResetIterator(&iter); + } else { serverPanic("Unknown hash encoding"); } @@ -2073,7 +2086,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { lpSafeToAdd(NULL, totelelen)) { zsetConvert(o, OBJ_ENCODING_LISTPACK); } - } else if (rdbtype == RDB_TYPE_HASH) { + } else if (rdbtype == RDB_TYPE_HASH || rdbtype == RDB_TYPE_HASH_2) { uint64_t len; sds field, value; hashtable *dupSearchHashtable = NULL; @@ -2084,8 +2097,8 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { o = createHashObject(); - /* Too many entries? Use a hash table right from the start. */ - if (len > server.hash_max_listpack_entries) + /* Too many entries or hash object contains elements with expiry? Use a hash table right from the start. */ + if (len > server.hash_max_listpack_entries || rdbtype == RDB_TYPE_HASH_2) hashTypeConvert(o, OBJ_ENCODING_HASHTABLE); else if (deep_integrity_validation) { /* In this mode, we need to guarantee that the server won't crash @@ -2126,21 +2139,23 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { } /* Convert to hash table if size threshold is exceeded */ - if (sdslen(field) > server.hash_max_listpack_value || sdslen(value) > server.hash_max_listpack_value || - !lpSafeToAdd(o->ptr, sdslen(field) + sdslen(value))) { + if (o->encoding != OBJ_ENCODING_HASHTABLE && + (sdslen(field) > server.hash_max_listpack_value || sdslen(value) > server.hash_max_listpack_value || + !lpSafeToAdd(o->ptr, sdslen(field) + sdslen(value)))) { hashTypeConvert(o, OBJ_ENCODING_HASHTABLE); - hashTypeEntry *entry = hashTypeCreateEntry(field, value); + entry *entry = entryCreate(field, value, EXPIRY_NONE); sdsfree(field); if (!hashtableAdd((hashtable *)o->ptr, entry)) { rdbReportCorruptRDB("Duplicate hash fields detected"); if (dupSearchHashtable) hashtableRelease(dupSearchHashtable); - freeHashTypeEntry(entry); + entryFree(entry); decrRefCount(o); return NULL; } break; } + /* Add pair to listpack */ o->ptr = lpAppend(o->ptr, (unsigned char *)field, sdslen(field)); o->ptr = lpAppend(o->ptr, (unsigned char *)value, sdslen(value)); @@ -2178,15 +2193,26 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { return NULL; } + /* Also load the entry expiry */ + long long itemexpiry = -1; + if (rdbtype == RDB_TYPE_HASH_2) { + itemexpiry = rdbLoadMillisecondTime(rdb, RDB_VERSION); + if (itemexpiry == LLONG_MAX && rioGetReadError(rdb)) return NULL; + } + /* Add pair to hash table */ - hashTypeEntry *entry = hashTypeCreateEntry(field, value); + entry *entry = entryCreate(field, value, itemexpiry); sdsfree(field); if (!hashtableAdd((hashtable *)o->ptr, entry)) { rdbReportCorruptRDB("Duplicate hash fields detected"); - freeHashTypeEntry(entry); + entryFree(entry); decrRefCount(o); return NULL; } + + if (rdbtype == RDB_TYPE_HASH_2 && itemexpiry > 0) { + hashTypeTrackEntry(o, entry); + } } /* All pairs should be read by now */ diff --git a/src/rdb.h b/src/rdb.h index 9f19a3a9ec..1253c3fd05 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -90,32 +90,36 @@ static_assert(RDB_VERSION < RDB_FOREIGN_VERSION_MIN || RDB_VERSION > RDB_FOREIGN /* Map object types to RDB object types. Macros starting with OBJ_ are for * memory storage and may change. Instead RDB types must be fixed because * we store them on disk. */ -#define RDB_TYPE_STRING 0 -#define RDB_TYPE_LIST 1 -#define RDB_TYPE_SET 2 -#define RDB_TYPE_ZSET 3 -#define RDB_TYPE_HASH 4 -#define RDB_TYPE_ZSET_2 5 /* ZSET version 2 with doubles stored in binary. */ -#define RDB_TYPE_MODULE_PRE_GA 6 /* Used in 4.0 release candidates */ -#define RDB_TYPE_MODULE_2 7 /* Module value with annotations for parsing without \ +enum RdbType { + RDB_TYPE_STRING = 0, + RDB_TYPE_LIST = 1, + RDB_TYPE_SET = 2, + RDB_TYPE_ZSET = 3, + RDB_TYPE_HASH = 4, + RDB_TYPE_ZSET_2 = 5, /* ZSET version 2 with doubles stored in binary. */ + RDB_TYPE_MODULE_PRE_GA = 6, /* Used in 4.0 release candidates */ + RDB_TYPE_MODULE_2 = 7, /* Module value with annotations for parsing without \ the generating module being loaded. */ -#define RDB_TYPE_HASH_ZIPMAP 9 -#define RDB_TYPE_LIST_ZIPLIST 10 -#define RDB_TYPE_SET_INTSET 11 -#define RDB_TYPE_ZSET_ZIPLIST 12 -#define RDB_TYPE_HASH_ZIPLIST 13 -#define RDB_TYPE_LIST_QUICKLIST 14 -#define RDB_TYPE_STREAM_LISTPACKS 15 -#define RDB_TYPE_HASH_LISTPACK 16 -#define RDB_TYPE_ZSET_LISTPACK 17 -#define RDB_TYPE_LIST_QUICKLIST_2 18 -#define RDB_TYPE_STREAM_LISTPACKS_2 19 -#define RDB_TYPE_SET_LISTPACK 20 -#define RDB_TYPE_STREAM_LISTPACKS_3 21 -/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType(), and rdb_type_string[] */ + RDB_TYPE_HASH_ZIPMAP = 9, + RDB_TYPE_LIST_ZIPLIST = 10, + RDB_TYPE_SET_INTSET = 11, + RDB_TYPE_ZSET_ZIPLIST = 12, + RDB_TYPE_HASH_ZIPLIST = 13, + RDB_TYPE_LIST_QUICKLIST = 14, + RDB_TYPE_STREAM_LISTPACKS = 15, + RDB_TYPE_HASH_LISTPACK = 16, + RDB_TYPE_ZSET_LISTPACK = 17, + RDB_TYPE_LIST_QUICKLIST_2 = 18, + RDB_TYPE_STREAM_LISTPACKS_2 = 19, + RDB_TYPE_SET_LISTPACK = 20, + RDB_TYPE_STREAM_LISTPACKS_3 = 21, + RDB_TYPE_HASH_2 = 22, + RDB_TYPE_LAST +}; +/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdb_type_string[] */ /* Test if a type is an object type. */ -#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 21)) +#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) < RDB_TYPE_LAST)) /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */ #define RDB_OPCODE_FUNCTION2 245 /* function library data */ diff --git a/src/sds.c b/src/sds.c index 0ebfbdd0ff..8aba0ccb2f 100644 --- a/src/sds.c +++ b/src/sds.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "serverassert.h" #include "sds.h" #include "sdsalloc.h" @@ -126,6 +127,7 @@ sds sdswrite(char *buf, size_t bufsize, char type, const char *init, size_t init assert(bufsize >= sdsReqSize(initlen, type)); int hdrlen = sdsHdrSize(type); size_t usable = bufsize - hdrlen - 1; + assert(usable <= sdsTypeMaxSize(type)); sds s = buf + hdrlen; unsigned char *fp = ((unsigned char *)s) - 1; /* flags pointer. */ diff --git a/src/server.c b/src/server.c index e0ccaac5ec..b5b66cf9d8 100644 --- a/src/server.c +++ b/src/server.c @@ -666,20 +666,34 @@ hashtableType subcommandSetType = {.entryGetKey = hashtableSubcommandGetKey, /* Hash type hash table (note that small hashes are represented with listpacks) */ const void *hashHashtableTypeGetKey(const void *entry) { - const hashTypeEntry *hash_entry = entry; - return (const void *)hashTypeEntryGetField(hash_entry); + return (const void *)entryGetField(entry); } void hashHashtableTypeDestructor(void *entry) { - hashTypeEntry *hash_entry = entry; - freeHashTypeEntry(hash_entry); + entryFree(entry); } +size_t hashHashtableTypeMetadataSize(void) { + return sizeof(void *); +} + +extern hashtableEntryValidationState hashHashtableTypeValidate(hashtable *ht, void *entry); + hashtableType hashHashtableType = { .hashFunction = dictSdsHash, .entryGetKey = hashHashtableTypeGetKey, .keyCompare = hashtableSdsKeyCompare, .entryDestructor = hashHashtableTypeDestructor, + .getMetadataSize = hashHashtableTypeMetadataSize, +}; + +hashtableType hashWithVolatileItemsHashtableType = { + .hashFunction = dictSdsHash, + .entryGetKey = hashHashtableTypeGetKey, + .keyCompare = hashtableSdsKeyCompare, + .entryDestructor = hashHashtableTypeDestructor, + .getMetadataSize = hashHashtableTypeMetadataSize, + .validateEntry = hashHashtableTypeValidate, }; /* Hashtable type without destructor */ @@ -2137,6 +2151,9 @@ void createSharedObjects(void) { shared.multi = createSharedString("MULTI"); shared.exec = createSharedString("EXEC"); shared.hset = createSharedString("HSET"); + shared.hdel = createSharedString("HDEL"); + shared.hpexpireat = createSharedString("HPEXPIREAT"); + shared.hpersist = createSharedString("HPERSIST"); shared.srem = createSharedString("SREM"); shared.xgroup = createSharedString("XGROUP"); shared.xclaim = createSharedString("XCLAIM"); @@ -2169,6 +2186,7 @@ void createSharedObjects(void) { shared.special_asterisk = createSharedString("*"); shared.special_equals = createSharedString("="); shared.redacted = createSharedString("(redacted)"); + shared.fields = createSharedString("FIELDS"); for (j = 0; j < OBJ_SHARED_INTEGERS; j++) { shared.integers[j] = makeObjectShared(createObject(OBJ_STRING, (void *)(long)j)); @@ -7292,4 +7310,131 @@ __attribute__((weak)) int main(int argc, char **argv) { aeDeleteEventLoop(server.el); return 0; } + +/* + * The parseExtendedStringArgumentsOrReply() function performs the common validation for extended + * command arguments used in STRING and HASH commands. + * + * Get specific command extended options - PERSIST/DEL + * Set specific command extended options - XX/NX/GET/IFEQ + * HSET specific command extended options - FXX/FNX + * Common command extended options - EX/EXAT/PX/PXAT/KEEPTTL + * + * Function takes pointers to client, flags, unit, pointer to pointer of expire obj if needed + * to be determined and command_type which can be COMMAND_GET or COMMAND_SET. + * + * If there are any syntax violations C_ERR is returned else C_OK is returned. + * + * Input flags are updated upon parsing the arguments. Unit and expire are updated if there are any + * EX/EXAT/PX/PXAT arguments. Unit is updated to millisecond if PX/PXAT is set. + * + * max_args provides a way to limit the scan to a specific range of arguments. + */ +int parseExtendedCommandArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, robj **compare_val, int command_type, int max_args) { + int j = command_type == COMMAND_SET ? 3 : 2; + for (; j < max_args; j++) { + char *opt = c->argv[j]->ptr; + robj *next = (j == max_args - 1) ? NULL : c->argv[j + 1]; + + /* clang-format off */ + if ((opt[0] == 'n' || opt[0] == 'N') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & OBJ_SET_XX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) + { + *flags |= OBJ_SET_NX; + } else if ((opt[0] == 'x' || opt[0] == 'X') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & OBJ_SET_NX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) + { + *flags |= OBJ_SET_XX; + } else if ((opt[0] == 'f' || opt[0] == 'F') && + (opt[1] == 'n' || opt[1] == 'N') && + (opt[2] == 'x' || opt[2] == 'X') && opt[3] == '\0' && + !(*flags & OBJ_SET_FXX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_HSET)) + { + *flags |= OBJ_SET_FNX; + } else if ((opt[0] == 'f' || opt[0] == 'F') && + (opt[1] == 'x' || opt[1] == 'X') && + (opt[2] == 'x' || opt[2] == 'X') && opt[3] == '\0' && + !(*flags & OBJ_SET_FNX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_HSET)) + { + *flags |= OBJ_SET_FXX; + } else if ((opt[0] == 'i' || opt[0] == 'I') && + (opt[1] == 'f' || opt[1] == 'F') && + (opt[2] == 'e' || opt[2] == 'E') && + (opt[3] == 'q' || opt[3] == 'Q') && opt[4] == '\0' && + next && + !(*flags & OBJ_SET_NX || *flags & OBJ_SET_XX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) + { + *flags |= OBJ_SET_IFEQ; + *compare_val = next; + j++; + } else if ((opt[0] == 'g' || opt[0] == 'G') && + (opt[1] == 'e' || opt[1] == 'E') && + (opt[2] == 't' || opt[2] == 'T') && opt[3] == '\0' && + (command_type == COMMAND_SET)) + { + *flags |= OBJ_SET_GET; + } else if (!strcasecmp(opt, "KEEPTTL") && !(*flags & OBJ_PERSIST) && + !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && + !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && (command_type == COMMAND_SET || command_type == COMMAND_HSET)) + { + *flags |= OBJ_KEEPTTL; + } else if (!strcasecmp(opt,"PERSIST") && (command_type == COMMAND_GET || command_type == COMMAND_HGET) && + !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && + !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && + !(*flags & OBJ_KEEPTTL)) + { + *flags |= OBJ_PERSIST; + } else if ((opt[0] == 'e' || opt[0] == 'E') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && + !(*flags & OBJ_EXAT) && !(*flags & OBJ_PX) && + !(*flags & OBJ_PXAT) && next) + { + *flags |= OBJ_EX; + *expire = next; + j++; + } else if ((opt[0] == 'p' || opt[0] == 'P') && + (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && + !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && + !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && + !(*flags & OBJ_PXAT) && next) + { + *flags |= OBJ_PX; + *unit = UNIT_MILLISECONDS; + *expire = next; + j++; + } else if ((opt[0] == 'e' || opt[0] == 'E') && + (opt[1] == 'x' || opt[1] == 'X') && + (opt[2] == 'a' || opt[2] == 'A') && + (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && + !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && + !(*flags & OBJ_EX) && !(*flags & OBJ_PX) && + !(*flags & OBJ_PXAT) && next) + { + *flags |= OBJ_EXAT; + *expire = next; + j++; + } else if ((opt[0] == 'p' || opt[0] == 'P') && + (opt[1] == 'x' || opt[1] == 'X') && + (opt[2] == 'a' || opt[2] == 'A') && + (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && + !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && + !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && + !(*flags & OBJ_PX) && next) + { + *flags |= OBJ_PXAT; + *unit = UNIT_MILLISECONDS; + *expire = next; + j++; + } else { + addReplyErrorObject(c,shared.syntaxerr); + return C_ERR; + } + /* clang-format on */ + } + return C_OK; +} + /* The End */ diff --git a/src/server.h b/src/server.h index 93417930d2..e393458c3c 100644 --- a/src/server.h +++ b/src/server.h @@ -82,7 +82,9 @@ typedef long long ustime_t; /* microsecond time type. */ #include "rax.h" /* Radix tree */ #include "connection.h" /* Connection abstraction */ #include "memory_prefetch.h" +#include "volatile_set.h" #include "trace/trace.h" +#include "entry.h" #ifdef USE_LTTNG #define valkey_fork() do_fork() @@ -220,6 +222,11 @@ struct hdr_histogram; extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; +#define COMMAND_GET 0 +#define COMMAND_SET 1 +#define COMMAND_HGET 2 +#define COMMAND_HSET 3 + /* Command flags. Please check the definition of struct serverCommand in this file * for more information about the meaning of every flag. */ #define CMD_WRITE (1ULL << 0) @@ -316,6 +323,11 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; /* Key flags for when access type is unknown */ #define CMD_KEY_FULL_ACCESS (CMD_KEY_RW | CMD_KEY_ACCESS | CMD_KEY_UPDATE) +#define EXPIRE_NX (1 << 0) +#define EXPIRE_XX (1 << 1) +#define EXPIRE_GT (1 << 2) +#define EXPIRE_LT (1 << 3) + /* Key flags for how key is removed */ #define DB_FLAG_KEY_NONE 0 #define DB_FLAG_KEY_DELETED (1ULL << 0) @@ -597,6 +609,9 @@ typedef enum { #define PAUSE_ACTION_EVICT (1 << 3) #define PAUSE_ACTION_REPLICA (1 << 4) /* pause replica traffic */ +/* Special Expiry values */ +#define EXPIRY_NONE -1 + /* Sets log format */ typedef enum { LOG_FORMAT_LEGACY = 0, LOG_FORMAT_LOGFMT } log_format_type; @@ -702,6 +717,23 @@ typedef enum { * Data types *----------------------------------------------------------------------------*/ +/* Generic set command string object set flags */ +#define OBJ_NO_FLAGS 0 +#define OBJ_SET_NX (1 << 0) /* Set if key not exists. */ +#define OBJ_SET_XX (1 << 1) /* Set if key exists. */ +#define OBJ_EX (1 << 2) /* Set if time in seconds is given */ +#define OBJ_PX (1 << 3) /* Set if time in ms in given */ +#define OBJ_KEEPTTL (1 << 4) /* Set and keep the ttl */ +#define OBJ_SET_GET (1 << 5) /* Set if want to get key before set */ +#define OBJ_EXAT (1 << 6) /* Set if timestamp in second is given */ +#define OBJ_PXAT (1 << 7) /* Set if timestamp in ms is given */ +#define OBJ_PERSIST (1 << 8) /* Set if we need to remove the ttl */ +#define OBJ_SET_IFEQ (1 << 9) /* Set if we need compare and set */ +#define OBJ_ARGV3 (1 << 10) /* Set if the value is at argv[3]; otherwise it's \ + * at argv[2]. */ +#define OBJ_SET_FNX (1 << 11) /* Set if key item not exists. */ +#define OBJ_SET_FXX (1 << 12) /* Set if key item exists. */ + /* An Object, that is a type able to hold a string / list / set */ /* The actual Object */ @@ -835,8 +867,9 @@ typedef struct replBufBlock { * by integers from 0 (the default database) up to the max configured * database. The database number is the 'id' field in the structure. */ typedef struct serverDb { - kvstore *keys; /* The keyspace for this DB */ - kvstore *expires; /* Timeout of keys with a timeout set */ + kvstore *keys; /* The keyspace for this DB */ + kvstore *expires; /* Timeout of keys with a timeout set */ + kvstore *object_with_volatile_elements; dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/ dict *blocking_keys_unblock_on_nokey; /* Keys with clients waiting for * data, and should be unblocked if key is deleted (XREADEDGROUP). @@ -1343,10 +1376,10 @@ struct sharedObjectsStruct { *bgsaveerr_variants[2], *execaborterr, *noautherr, *noreplicaserr, *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink, *rpop, *lpop, *lpush, - *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax, *emptyscan, *multi, *exec, *left, *right, *hset, *srem, + *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax, *emptyscan, *multi, *exec, *left, *right, *hset, *hdel, *hpexpireat, *hpersist, *srem, *xgroup, *xclaim, *script, *replconf, *eval, *persist, *set, *pexpireat, *pexpire, *time, *pxat, *absttl, *retrycount, *force, *justid, *entriesread, *lastid, *ping, *setid, *keepttl, *load, *createconsumer, *getack, - *special_asterisk, *special_equals, *default_username, *redacted, *ssubscribebulk, *sunsubscribebulk, + *special_asterisk, *special_equals, *default_username, *redacted, *ssubscribebulk, *sunsubscribebulk, *fields, *smessagebulk, *select[PROTO_SHARED_SELECT_CMDS], *integers[OBJ_SHARED_INTEGERS], *mbulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "*\r\n" */ *bulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "$\r\n" */ @@ -1591,6 +1624,20 @@ typedef enum childInfoType { CHILD_INFO_TYPE_MODULE_COW_SIZE } childInfoType; +/* Return values for expireIfNeeded */ +typedef enum { + KEY_VALID = 0, /* Could be volatile and not yet expired, non-volatile, or even non-existing key. */ + KEY_EXPIRED, /* Logically expired but not yet deleted. */ + KEY_DELETED /* The key was deleted now. */ +} keyStatus; + +/* Return value for getExpirationPolicy */ +typedef enum { + POLICY_IGNORE_EXPIRE, /* Ignore expiration time of items and treat them as valid. */ + POLICY_KEEP_EXPIRED, /* Ignore items which are expired but do not actively delete them. */ + POLICY_DELETE_EXPIRED /* Delete expired keys on access. */ +} expirationPolicy; + struct valkeyServer { /* General */ pid_t pid; /* Main process pid. */ @@ -2577,11 +2624,13 @@ typedef struct { typedef struct { robj *subject; int encoding; - + bool volatile_items_iter; unsigned char *fptr, *vptr; hashtableIterator iter; + volatileSetIterator viter; void *next; + } hashTypeIterator; #include "stream.h" /* Stream data type header file. */ @@ -2605,6 +2654,7 @@ extern hashtableType kvstoreKeysHashtableType; extern hashtableType kvstoreExpiresHashtableType; extern double R_Zero, R_PosInf, R_NegInf, R_Nan; extern hashtableType hashHashtableType; +extern hashtableType hashWithVolatileItemsHashtableType; extern dictType stringSetDictType; extern dictType externalStringType; extern dictType sdsHashDictType; @@ -2635,6 +2685,7 @@ int validateProcTitleTemplate(const char *template); int serverCommunicateSystemd(const char *sd_notify_msg); void serverSetCpuAffinity(const char *cpulist); void dictVanillaFree(void *val); +int timestampIsExpired(mstime_t when); /* ERROR STATS constants */ @@ -2816,6 +2867,10 @@ int processIOThreadsWriteDone(void); void releaseReplyReferences(client *c); void resetLastWrittenBuf(client *c); +expirationPolicy getExpirationPolicyWithFlags(int flags); +int parseExtendedExpireArgumentsOrReply(client *c, int *flags, int max_args); +int parseExtendedCommandArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, robj **compare_val, int command_type, int max_args); +int convertExpireArgumentToUnixTime(client *c, robj *arg, long long basetime, int unit, long long *unixtime); /* logreqres.c - logging of requests and responses */ void reqresReset(client *c, int free_buf); @@ -3303,16 +3358,13 @@ robj *setTypeDup(robj *o); /* Hash data type */ #define HASH_SET_TAKE_FIELD (1 << 0) #define HASH_SET_TAKE_VALUE (1 << 1) +#define HASH_SET_KEEP_EXPIRY (1 << 2) #define HASH_SET_COPY 0 -typedef void hashTypeEntry; -hashTypeEntry *hashTypeCreateEntry(sds field, sds value); -sds hashTypeEntryGetField(const hashTypeEntry *entry); -sds hashTypeEntryGetValue(const hashTypeEntry *entry); -size_t hashTypeEntryMemUsage(hashTypeEntry *entry); -hashTypeEntry *hashTypeEntryDefrag(hashTypeEntry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)); -void dismissHashTypeEntry(hashTypeEntry *entry); -void freeHashTypeEntry(hashTypeEntry *entry); + +void hashTypeFreeVolatileSet(robj *o); +void hashTypeTrackEntry(robj *o, void *entry); +void hashTypeUntrackEntry(robj *o, void *entry); void hashTypeConvert(robj *o, int enc); void hashTypeTryConversion(robj *subject, robj **argv, int start, int end); @@ -3320,6 +3372,7 @@ int hashTypeExists(robj *o, sds key); int hashTypeDelete(robj *o, sds key); unsigned long hashTypeLength(const robj *o); void hashTypeInitIterator(robj *subject, hashTypeIterator *hi); +void hashTypeInitVolatileIterator(robj *subject, hashTypeIterator *hi); void hashTypeResetIterator(hashTypeIterator *hi); int hashTypeNext(hashTypeIterator *hi); void hashTypeCurrentFromListpack(hashTypeIterator *hi, @@ -3331,8 +3384,10 @@ sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what); sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what); robj *hashTypeLookupWriteOrCreate(client *c, robj *key); robj *hashTypeGetValueObject(robj *o, sds field); -int hashTypeSet(robj *o, sds field, sds value, int flags); +int hashTypeSet(robj *o, sds field, sds value, long long expiry, int flags); robj *hashTypeDup(robj *o); +int hashTypeHasVolatileElements(robj *o); +size_t hashTypeNumVolatileElements(robj *o); /* Pub / Sub */ int pubsubUnsubscribeAllChannels(client *c, int notify); @@ -3794,6 +3849,8 @@ void zrankCommand(client *c); void zrevrankCommand(client *c); void hsetCommand(client *c); void hsetnxCommand(client *c); +void hsetexCommand(client *c); +void hgetexCommand(client *c); void hgetCommand(client *c); void hmgetCommand(client *c); void hdelCommand(client *c); @@ -3815,6 +3872,19 @@ void hgetallCommand(client *c); void hexistsCommand(client *c); void hscanCommand(client *c); void hrandfieldCommand(client *c); +void hexpireCommand(client *c); +void hexpireAtCommand(client *c); +void hpexpireCommand(client *c); +void hpexpireAtCommand(client *c); +void hexpireCommand(client *c); +void hexpireAtCommand(client *c); +void hpexpireCommand(client *c); +void hpexpireAtCommand(client *c); +void httlCommand(client *c); +void hpttlCommand(client *c); +void hexpiretimeCommand(client *c); +void hpexpiretimeCommand(client *c); +void hpersistCommand(client *c); void configSetCommand(client *c); void configGetCommand(client *c); void configResetStatCommand(client *c); diff --git a/src/t_hash.c b/src/t_hash.c index 5a8c17e90c..6a2f7d85af 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -32,233 +32,136 @@ * SPDX-License-Identifier: BSD-3-Clause */ +#include "hashtable.h" +#include "rax.h" +#include "sds.h" +#include "volatile_set.h" #include "server.h" +#include "zmalloc.h" #include -#include +#include +#include "entry.h" + + +int hashTypeExpireEntry(entry *entry); + +volatileEntryType hashVolatileEntryType = { + .entryGetKey = (sds(*)(const void *entry))entryGetField, + .getExpiry = (long long (*)(const void *entry))entryGetExpiry, + .expire = hashTypeExpireEntry, +}; /*----------------------------------------------------------------------------- - * Hash Entry API + * Hash type Expiry API *----------------------------------------------------------------------------*/ -/* The hashTypeEntry pointer is the field sds. We encode the entry layout type - * in the field SDS header. Field type SDS_TYPE_5 doesn't have any spare bits to - * encode this so we use it only for the first layout type. - * - * Entry with embedded value, used for small sizes. The value is stored as - * SDS_TYPE_8. The field can use any SDS type. - * - * +--------------+---------------+ - * | field | value | - * | hdr "foo" \0 | hdr8 "bar" \0 | - * +------^-------+---------------+ - * | - * | - * entry pointer = field sds - * - * Entry with value pointer, used for larger fields and values. The field is SDS - * type 8 or higher. - * - * +-------+--------------+ - * | value | field | - * | ptr | hdr "foo" \0 | - * +-------+------^-------+ - * | - * | - * entry pointer = field sds - */ +static volatile_set *hashTypeGetVolatileSet(robj *o) { + serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); + return *(volatile_set **)hashtableMetadata(o->ptr); +} -/* The maximum allocation size we want to use for entries with embedded - * values. */ -#define EMBED_VALUE_MAX_ALLOC_SIZE 128 - -/* SDS aux flag. If set, it indicates that the entry has an embedded value - * pointer located in memory before the embedded field. If unset, the entry - * instead has an embedded value located after the embedded field. */ -#define FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR 0 - -static inline bool entryHasValuePtr(const hashTypeEntry *entry) { - return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR); -} - -/* Returns the location of a pointer to a separately allocated value. Only for - * an entry without an embedded value. */ -static sds *hashTypeEntryGetValueRef(const hashTypeEntry *entry) { - serverAssert(entryHasValuePtr(entry)); - char *field_data = sdsAllocPtr(entry); - field_data -= sizeof(sds *); - return (sds *)field_data; -} - -/* takes ownership of value, does not take ownership of field */ -hashTypeEntry *hashTypeCreateEntry(sds field, sds value) { - size_t field_len = sdslen(field); - int field_sds_type = sdsReqType(field_len); - size_t field_size = sdsReqSize(field_len, field_sds_type); - size_t value_len = sdslen(value); - size_t value_size = sdsReqSize(value_len, SDS_TYPE_8); - sds embedded_field_sds; - if (field_size + value_size <= EMBED_VALUE_MAX_ALLOC_SIZE) { - /* Embed field and value. Value is fixed to SDS_TYPE_8. Unused - * allocation space is recorded in the embedded value's SDS header. - * - * +--------------+---------------+ - * | field | value | - * | hdr "foo" \0 | hdr8 "bar" \0 | - * +--------------+---------------+ - */ - size_t min_size = field_size + value_size; - size_t buf_size; - char *buf = zmalloc_usable(min_size, &buf_size); - embedded_field_sds = sdswrite(buf, field_size, field_sds_type, field, field_len); - sdswrite(buf + field_size, buf_size - field_size, SDS_TYPE_8, value, value_len); - /* Field sds aux bits are zero, which we use for this entry encoding. */ - sdsSetAuxBit(embedded_field_sds, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, 0); - serverAssert(!entryHasValuePtr(embedded_field_sds)); - sdsfree(value); - } else { - /* Embed field, but not value. Field must be >= SDS_TYPE_8 to encode to - * indicate this type of entry. - * - * +-------+---------------+ - * | value | field | - * | ptr | hdr8 "foo" \0 | - * +-------+---------------+ - */ - char field_sds_type = sdsReqType(field_len); - if (field_sds_type == SDS_TYPE_5) field_sds_type = SDS_TYPE_8; - field_size = sdsReqSize(field_len, field_sds_type); - size_t alloc_size = sizeof(sds *) + field_size; - char *buf = zmalloc(alloc_size); - *(sds *)buf = value; - embedded_field_sds = sdswrite(buf + sizeof(sds *), field_size, field_sds_type, field, field_len); - /* Store the entry encoding type in sds aux bits. */ - sdsSetAuxBit(embedded_field_sds, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, 1); - serverAssert(entryHasValuePtr(embedded_field_sds)); - } - return (void *)embedded_field_sds; -} - -/* The entry pointer is the field sds, but that's an implementation detail. */ -sds hashTypeEntryGetField(const hashTypeEntry *entry) { - return (sds)entry; -} - -sds hashTypeEntryGetValue(const hashTypeEntry *entry) { - if (entryHasValuePtr(entry)) { - return *hashTypeEntryGetValueRef(entry); - } else { - /* Skip field content, field null terminator and value sds8 hdr. */ - size_t offset = sdslen(entry) + 1 + sdsHdrSize(SDS_TYPE_8); - return (char *)entry + offset; - } -} - -/* Returns the address of the entry allocation. */ -static void *hashTypeEntryAllocPtr(hashTypeEntry *entry) { - char *buf = sdsAllocPtr(entry); - if (entryHasValuePtr(entry)) { - buf -= sizeof(sds *); - } - return buf; -} - -/* Frees previous value, takes ownership of new value, returns entry (may be - * reallocated). */ -static hashTypeEntry *hashTypeEntryReplaceValue(hashTypeEntry *entry, sds value) { - sds field = (sds)entry; - size_t field_size = sdsHdrSize(sdsType(field)) + sdsalloc(field) + 1; - size_t value_len = sdslen(value); - size_t value_size = sdsReqSize(value_len, SDS_TYPE_8); - if (!entryHasValuePtr(entry)) { - /* Reuse the allocation if the new value fits and leaves no more than - * 25% unused space after replacing the value. */ - char *alloc_ptr = sdsAllocPtr(entry); - size_t required_size = field_size + value_size; - size_t alloc_size; - if (required_size <= EMBED_VALUE_MAX_ALLOC_SIZE && - required_size <= (alloc_size = hashTypeEntryMemUsage(entry)) && - required_size >= alloc_size * 3 / 4) { - /* It fits in the allocation and leaves max 25% unused space. */ - sdswrite(alloc_ptr + field_size, alloc_size - field_size, SDS_TYPE_8, value, value_len); - sdsfree(value); - return entry; - } - hashTypeEntry *new_entry = hashTypeCreateEntry(hashTypeEntryGetField(entry), value); - freeHashTypeEntry(entry); - return new_entry; - } else { - /* The value pointer is located before the embedded field. */ - if (field_size + value_size <= EMBED_VALUE_MAX_ALLOC_SIZE) { - /* Convert to entry with embedded value. */ - hashTypeEntry *new_entry = hashTypeCreateEntry(field, value); - freeHashTypeEntry(entry); - return new_entry; - } else { - /* Not embedded value. */ - sds *value_ref = hashTypeEntryGetValueRef(entry); - sdsfree(*value_ref); - *value_ref = value; - return entry; - } - } +void hashTypeFreeVolatileSet(robj *o) { + volatile_set *set = hashTypeGetVolatileSet(o); + if (set) + freeVolatileSet(set); } -/* Returns memory usage of a hashTypeEntry, including all allocations owned by - * the hashTypeEntry. */ -size_t hashTypeEntryMemUsage(hashTypeEntry *entry) { - size_t mem = 0; - if (entryHasValuePtr(entry)) { - /* Alloc size is not stored in the embedded field. */ - mem = zmalloc_usable_size(hashTypeEntryAllocPtr(entry)); - mem += sdsAllocSize(*hashTypeEntryGetValueRef(entry)); - } else { - /* Remaining alloc size is encoded in the embedded value SDS header. */ - sds field = entry; - sds value = (char *)entry + sdslen(field) + 1 + sdsHdrSize(SDS_TYPE_8); - size_t field_size = sdsHdrSize(sdsType(field)) + sdslen(field) + 1; - size_t value_size = sdsHdrSize(SDS_TYPE_8) + sdsalloc(value) + 1; - mem = field_size + value_size; +int hashTypeHasVolatileElements(robj *o) { + return ((o->encoding == OBJ_ENCODING_HASHTABLE) && (hashTypeGetVolatileSet(o) != NULL)); +} + +size_t hashTypeNumVolatileElements(robj *o) { + if (hashTypeHasVolatileElements(o)) { + return volatileSetNumEntries(hashTypeGetVolatileSet(o)); } - return mem; + return 0; } -/* Defragments a hashtable entry (field-value pair) if needed, using the - * provided defrag functions. The defrag functions return NULL if the allocation - * was not moved, otherwise they return a pointer to the new memory location. - * A separate sds defrag function is needed because of the unique memory layout - * of sds strings. - * If the location of the hashTypeEntry changed we return the new location, - * otherwise we return NULL. */ -hashTypeEntry *hashTypeEntryDefrag(hashTypeEntry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)) { - if (entryHasValuePtr(entry)) { - sds *value_ref = hashTypeEntryGetValueRef(entry); - sds new_value = sdsdefragfn(*value_ref); - if (new_value) *value_ref = new_value; +/* make any access to the hash object elements ignore the specific elements expiration. + * This is mainly in order to be able to access hash elements which are already expired. */ +void hashTypeIgnoreTTL(robj *o, bool ignore) { + if (o->encoding == OBJ_ENCODING_HASHTABLE) { + /* prevent placing access function if not needed */ + if (!ignore && !hashTypeHasVolatileElements(o)) { + ignore = 0; + } + hashtableSetType(o->ptr, ignore ? &hashHashtableType : &hashWithVolatileItemsHashtableType); } - char *allocation = hashTypeEntryAllocPtr(entry); - char *new_allocation = defragfn(allocation); - if (new_allocation != NULL) { - /* Return the same offset into the new allocation as the entry's offset - * in the old allocation. */ - return new_allocation + ((char *)entry - allocation); +} + +static volatile_set * +hashTypeGetOrcreateVolatileSet(robj *o) { + serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); + volatile_set **volatile_set_ref = hashtableMetadata(o->ptr); + if (*volatile_set_ref == NULL) { + *volatile_set_ref = createVolatileSet(&hashVolatileEntryType); + /* serves mainly for optimization. Use type which supports access function only when needed. */ + hashTypeIgnoreTTL(o, false); } - return NULL; + return *volatile_set_ref; +} + +static void hashTypeDeleteVolatileSet(robj *o) { + volatile_set **volatile_set_ref = hashtableMetadata(o->ptr); + freeVolatileSet(*volatile_set_ref); + *volatile_set_ref = NULL; + /* serves mainly for optimization. by changing the hashtable type we can avoid extra function call in hashtable access */ + hashTypeIgnoreTTL(o, true); +} + +void hashTypeTrackEntry(robj *o, void *entry) { + volatile_set *set = hashTypeGetOrcreateVolatileSet(o); + serverAssert(volatileSetAddEntry(set, entry, entryGetExpiry(entry))); } -/* Used for releasing memory to OS to avoid unnecessary CoW. Called when we've - * forked and memory won't be used again. See zmadvise_dontneed() */ -void dismissHashTypeEntry(hashTypeEntry *entry) { - /* Only dismiss values memory since the field size usually is small. */ - if (entryHasValuePtr(entry)) { - dismissSds(*hashTypeEntryGetValueRef(entry)); +void hashTypeUntrackEntry(robj *o, void *entry) { + if (!entryHasExpiry(entry)) return; + volatile_set *set = hashTypeGetVolatileSet(o); + debugServerAssert(set); + serverAssert(volatileSetRemoveEntry(set, entry, entryGetExpiry(entry))); + if (volatileSetNumEntries(set) == 0) { + hashTypeDeleteVolatileSet(o); } } -void freeHashTypeEntry(hashTypeEntry *entry) { - if (entryHasValuePtr(entry)) { - sdsfree(*hashTypeEntryGetValueRef(entry)); +static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + int old_tracked = (old_entry && old_expiry != EXPIRY_NONE); + int new_tracked = (new_entry && new_expiry != EXPIRY_NONE); + /* If entry was not tracked before and not going to be tracked now, we can simply return */ + if (!old_tracked && !new_tracked) + return; + + volatile_set *set = hashTypeGetOrcreateVolatileSet(o); + debugServerAssert(set); + + if (old_tracked && !new_tracked) + serverAssert(volatileSetRemoveEntry(set, old_entry, old_expiry)); + else if (new_tracked && !old_tracked) + serverAssert(volatileSetAddEntry(set, new_entry, new_expiry)); + else { + volatile_set *set = hashTypeGetVolatileSet(o); + debugServerAssert(set); + serverAssert(volatileSetUpdateEntry(set, old_entry, new_entry, old_expiry, new_expiry) == 1); + } + if (volatileSetNumEntries(set) == 0) { + hashTypeDeleteVolatileSet(o); } - zfree(hashTypeEntryAllocPtr(entry)); +} + +int hashTypeExpireEntry(void *entry) { + // TBD + UNUSED(entry); + return 1; +} + +hashtableEntryValidationState hashHashtableTypeValidate(hashtable *ht, void *entry) { + UNUSED(ht); + expirationPolicy policy = getExpirationPolicyWithFlags(0); + if (policy == POLICY_IGNORE_EXPIRE) return ENTRY_VALID; + + if (!entryIsExpired(entry)) return ENTRY_VALID; + + return ENTRY_INVALID; } /*----------------------------------------------------------------------------- @@ -327,9 +230,12 @@ int hashTypeGetFromListpack(robj *o, sds field, unsigned char **vstr, unsigned i * is returned. */ sds hashTypeGetFromHashTable(robj *o, sds field) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); - void *found_element; - if (!hashtableFind(o->ptr, field, &found_element)) return NULL; - return hashTypeEntryGetValue(found_element); + void *found_element = NULL; + hashtableFind(o->ptr, field, &found_element); + if (found_element) + return entryGetValue(found_element); + else + return NULL; } /* Higher level function of hashTypeGet*() that returns the hash value @@ -358,6 +264,28 @@ int hashTypeGetValue(robj *o, sds field, unsigned char **vstr, unsigned int *vle return C_ERR; } +/* Returns the expiration time associated with the specified field. + * If the field is found C_OK is returned, otherwise C_ERR. + * The matching item expiration time is assigned to `expiry` memory location, if specified. + * In case the item has no assigned expiration time, -1 is returned. */ +int hashTypeGetExpiry(robj *o, sds field, long long *expiry) { + if (o->encoding == OBJ_ENCODING_LISTPACK) { + if (hashTypeExists(o, field)) { + if (expiry) *expiry = EXPIRY_NONE; + return C_OK; + } + } else if (o->encoding == OBJ_ENCODING_HASHTABLE) { + void *found_element = NULL; + if (hashtableFind(o->ptr, field, &found_element)) { + if (expiry) *expiry = entryGetExpiry(found_element); + return C_OK; + } + } else { + serverPanic("Unknown hash encoding"); + } + return C_ERR; +} + /* Like hashTypeGetValue() but returns an Object, which is useful for * interaction with the hash type outside t_hash.c. * The function returns NULL if the field is not found in the hash. Otherwise @@ -416,14 +344,14 @@ int hashTypeExists(robj *o, sds field) { * semantics of copying the values if needed. * */ -int hashTypeSet(robj *o, sds field, sds value, int flags) { +int hashTypeSet(robj *o, sds field, sds value, long long expiry, int flags) { int update = 0; /* Check if the field is too long for listpack, and convert before adding the item. * This is needed for HINCRBY* case since in other commands this is handled early by * hashTypeTryConversion, so this check will be a NOP. */ if (o->encoding == OBJ_ENCODING_LISTPACK) { - if (sdslen(field) > server.hash_max_listpack_value || sdslen(value) > server.hash_max_listpack_value) + if (expiry > 0 || sdslen(field) > server.hash_max_listpack_value || sdslen(value) > server.hash_max_listpack_value) hashTypeConvert(o, OBJ_ENCODING_HASHTABLE); } @@ -465,22 +393,39 @@ int hashTypeSet(robj *o, sds field, sds value, int flags) { v = sdsdup(value); } + /* We have to ignore the TTL when setting an element. this is mainly in order to be able to update an existing expired + * entry and not have it remain in the hashtable with the same field/value. */ + hashTypeIgnoreTTL(o, true); hashtablePosition position; void *existing; if (hashtableFindPositionForInsert(ht, field, &position, &existing)) { /* does not exist yet */ - hashTypeEntry *entry = hashTypeCreateEntry(field, v); + entry *entry = entryCreate(field, v, expiry); hashtableInsertAtPosition(ht, entry, &position); + /* In case an expiry is set on the new entry, we need to track it */ + if (expiry != EXPIRY_NONE) { + hashTypeTrackEntry(o, entry); + } } else { /* exists: replace value */ - void *new_entry = hashTypeEntryReplaceValue(existing, v); + long long entry_expiry = entryGetExpiry(existing); + /* It is possible that the entry is already expired. In this case we can override it, but we need to make sure to treat it + * like it did not exist. */ + int is_expired = timestampIsExpired(entry_expiry); + /* In case the HASH_SET_KEEP_EXPIRY will force keeping the existing entry expiry. */ + if (!is_expired && (flags & HASH_SET_KEEP_EXPIRY)) + expiry = entry_expiry; + void *new_entry = entryUpdate(existing, v, expiry); if (new_entry != existing) { /* It has been reallocated. */ int replaced = hashtableReplaceReallocatedEntry(ht, existing, new_entry); serverAssert(replaced); } - update = 1; + hashTypeTrackUpdateEntry(o, existing, new_entry, entry_expiry, expiry); + + update = is_expired ? 0 : 1; } + hashTypeIgnoreTTL(o, false); } else { serverPanic("Unknown hash encoding"); } @@ -492,6 +437,100 @@ int hashTypeSet(robj *o, sds field, sds value, int flags) { return update; } +/* Set expiration on the specific HASH object 'o' item indicated by 'field'. + * returns -2 in case the provided object is NULL or the specific field was not found. + * returns 0 if the specified flag conditions has not been met. + * returns 1 if the expiration time was applied. + * returns 2 when 'expire' indicate a past Unix time. In this case, if the item exists in the HASH, it will also be expired. + */ +int hashTypeSetExpire(robj *o, sds field, long long expiry, int flag) { + /* If no object we will return -2 */ + if (o == NULL) return -2; + + if (o->encoding == OBJ_ENCODING_LISTPACK) { + /* When listpack representation is used, we consider it as infinite TTL, + * so expire command with gt always fail the GT as well as existence(XX). + * Else, we already know we are going to set an expiration so we expend to hashtable encoding. */ + if (flag & EXPIRE_XX || flag & EXPIRE_GT) { + return 0; + } else { + hashTypeConvert(o, OBJ_ENCODING_HASHTABLE); + } + } + + hashtable *ht = o->ptr; + void **entry_ref = NULL; + if ((entry_ref = hashtableFindRef(ht, field))) { + entry *current_entry = *entry_ref; + long long current_expire = entryGetExpiry(current_entry); + if (flag) { + /* NX option is set, check no current expiry */ + if (flag & EXPIRE_NX) { + if (current_expire != EXPIRY_NONE) { + return 0; + } + } + + /* XX option is set, check current expiry */ + if (flag & EXPIRE_XX) { + if (current_expire == EXPIRY_NONE) { + return 0; + } + } + + /* GT option is set, check current expiry */ + if (flag & EXPIRE_GT) { + /* When current_expire is -1, we consider it as infinite TTL, + * so expire command with gt always fail the GT. */ + if (expiry <= current_expire || current_expire == EXPIRY_NONE) { + return 0; + } + } + + /* LT option is set, check current expiry */ + if (flag & EXPIRE_LT) { + /* When current_expire -1, we consider it as infinite TTL, + * so if there is an expiry on the key and it's not less than current, we fail the LT. */ + if (current_expire != EXPIRY_NONE && expiry >= current_expire) { + return 0; + } + } + } + *entry_ref = entrySetExpiry(current_entry, expiry); + hashTypeTrackUpdateEntry(o, current_entry, *entry_ref, current_expire, expiry); + return 1; + } + return -2; // we did not find anything to do. return -2 +} + + +int hashTypePersist(robj *o, sds field) { + /* NULL object returns -2 */ + if (o == NULL || o->type != OBJ_HASH) return -2; + + if (o->encoding == OBJ_ENCODING_LISTPACK) { + if (hashTypeExists(o, field)) + /* When listpack representation is used, All items are without expiry */ + return -1; + else + return -2; // Did not find any element return -2 + } + + hashtable *ht = o->ptr; + void **entry_ref = NULL; + if ((entry_ref = hashtableFindRef(ht, field))) { + entry *current_entry = *entry_ref; + long long current_expire = entryGetExpiry(current_entry); + if (current_expire != EXPIRY_NONE) { + hashTypeUntrackEntry(o, current_entry); + *entry_ref = entryUpdate(current_entry, NULL, EXPIRY_NONE); + return 1; + } + return -1; // If the found element has no expiration set, return -1 + } + return -2; // Did not find any element return -2 +} + /* Delete an element from a hash. * Return 1 on deleted and 0 on not found. */ int hashTypeDelete(robj *o, sds field) { @@ -513,7 +552,12 @@ int hashTypeDelete(robj *o, sds field) { } } else if (o->encoding == OBJ_ENCODING_HASHTABLE) { hashtable *ht = o->ptr; - deleted = hashtableDelete(ht, field); + void *entry = NULL; + deleted = hashtablePop(ht, field, &entry); + if (deleted) { + hashTypeUntrackEntry(o, entry); + entryFree(entry); + } } else { serverPanic("Unknown hash encoding"); } @@ -536,6 +580,7 @@ unsigned long hashTypeLength(const robj *o) { void hashTypeInitIterator(robj *subject, hashTypeIterator *hi) { hi->subject = subject; hi->encoding = subject->encoding; + hi->volatile_items_iter = false; if (hi->encoding == OBJ_ENCODING_LISTPACK) { hi->fptr = NULL; @@ -547,8 +592,27 @@ void hashTypeInitIterator(robj *subject, hashTypeIterator *hi) { } } +void hashTypeInitVolatileIterator(robj *subject, hashTypeIterator *hi) { + hi->subject = subject; + hi->encoding = subject->encoding; + hi->volatile_items_iter = true; + + if (hi->encoding == OBJ_ENCODING_LISTPACK) { + return; + } else if (hi->encoding == OBJ_ENCODING_HASHTABLE) { + volatileSetStart(hashTypeGetVolatileSet(subject), &hi->viter); + } else { + serverPanic("Unknown hash encoding"); + } +} + void hashTypeResetIterator(hashTypeIterator *hi) { - if (hi->encoding == OBJ_ENCODING_HASHTABLE) hashtableResetIterator(&hi->iter); + if (hi->encoding == OBJ_ENCODING_HASHTABLE) { + if (!hi->volatile_items_iter) + hashtableResetIterator(&hi->iter); + else + volatileSetReset(&hi->viter); + } } /* Move to the next entry in the hash. Return C_OK when the next entry @@ -558,6 +622,9 @@ int hashTypeNext(hashTypeIterator *hi) { unsigned char *zl; unsigned char *fptr, *vptr; + /* listpack encoding does not have volatile items, so return as iteration end */ + if (hi->volatile_items_iter) return C_ERR; + zl = hi->subject->ptr; fptr = hi->fptr; vptr = hi->vptr; @@ -581,7 +648,11 @@ int hashTypeNext(hashTypeIterator *hi) { hi->fptr = fptr; hi->vptr = vptr; } else if (hi->encoding == OBJ_ENCODING_HASHTABLE) { - if (!hashtableNext(&hi->iter, &hi->next)) return C_ERR; + if (!hi->volatile_items_iter) { + if (!hashtableNext(&hi->iter, &hi->next)) return C_ERR; + } else { + if (!volatileSetNext(&hi->viter, &hi->next)) return C_ERR; + } } else { serverPanic("Unknown hash encoding"); } @@ -611,9 +682,9 @@ sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what) { serverAssert(hi->encoding == OBJ_ENCODING_HASHTABLE); if (what & OBJ_HASH_FIELD) { - return hashTypeEntryGetField(hi->next); + return entryGetField(hi->next); } else { - return hashTypeEntryGetValue(hi->next); + return entryGetValue(hi->next); } } @@ -682,10 +753,10 @@ void hashTypeConvertListpack(robj *o, int enc) { while (hashTypeNext(&hi) != C_ERR) { sds field = hashTypeCurrentObjectNewSds(&hi, OBJ_HASH_FIELD); sds value = hashTypeCurrentObjectNewSds(&hi, OBJ_HASH_VALUE); - hashTypeEntry *entry = hashTypeCreateEntry(field, value); + entry *entry = entryCreate(field, value, EXPIRY_NONE); sdsfree(field); if (!hashtableAdd(ht, entry)) { - freeHashTypeEntry(entry); + entryFree(entry); hashTypeResetIterator(&hi); /* Needed for gcc ASAN */ serverLogHexDump(LL_WARNING, "listpack with dup elements dump", o->ptr, lpBytes(o->ptr)); serverPanic("Listpack corruption detected"); @@ -731,21 +802,22 @@ robj *hashTypeDup(robj *o) { } else if (o->encoding == OBJ_ENCODING_HASHTABLE) { hashtable *ht = hashtableCreate(&hashHashtableType); hashtableExpand(ht, hashtableSize((const hashtable *)o->ptr)); + hobj = createObject(OBJ_HASH, ht); + hobj->encoding = OBJ_ENCODING_HASHTABLE; hashTypeInitIterator(o, &hi); while (hashTypeNext(&hi) != C_ERR) { /* Extract a field-value pair from an original hash object.*/ sds field = hashTypeCurrentFromHashTable(&hi, OBJ_HASH_FIELD); sds value = hashTypeCurrentFromHashTable(&hi, OBJ_HASH_VALUE); - + long long expiry = entryGetExpiry(hi.next); /* Add a field-value pair to a new hash object. */ - hashTypeEntry *entry = hashTypeCreateEntry(field, sdsdup(value)); + entry *entry = entryCreate(field, sdsdup(value), expiry); hashtableAdd(ht, entry); + if (expiry != EXPIRY_NONE) + hashTypeTrackEntry(hobj, entry); } hashTypeResetIterator(&hi); - - hobj = createObject(OBJ_HASH, ht); - hobj->encoding = OBJ_ENCODING_HASHTABLE; } else { serverPanic("Unknown hash encoding"); } @@ -771,15 +843,20 @@ void hashReplyFromListpackEntry(client *c, listpackEntry *e) { * 'val' can be NULL in which case it's not extracted. */ static void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpackEntry *field, listpackEntry *val) { if (hashobj->encoding == OBJ_ENCODING_HASHTABLE) { - void *entry; - hashtableFairRandomEntry(hashobj->ptr, &entry); - sds sds_field = hashTypeEntryGetField(entry); - field->sval = (unsigned char *)sds_field; - field->slen = sdslen(sds_field); - if (val) { - sds sds_val = hashTypeEntryGetValue(entry); - val->sval = (unsigned char *)sds_val; - val->slen = sdslen(sds_val); + void *e = NULL; + + while (!e) { + hashtableFairRandomEntry(hashobj->ptr, &e); + sds sds_field = entryGetField(e); + field->sval = (unsigned char *)sds_field; + field->slen = sdslen(sds_field); + if (val) { + entry *hash_entry = e; + sds sds_val = entryGetValue(hash_entry); + val->sval = (unsigned char *)sds_val; + val->slen = + sdslen(sds_val); + } } } else if (hashobj->encoding == OBJ_ENCODING_LISTPACK) { lpRandomPair(hashobj->ptr, hashsize, field, val); @@ -793,51 +870,6 @@ static void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpac * Hash type commands *----------------------------------------------------------------------------*/ -void hsetnxCommand(client *c) { - robj *o; - if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; - - if (hashTypeExists(o, c->argv[2]->ptr)) { - addReply(c, shared.czero); - } else { - hashTypeTryConversion(o, c->argv, 2, 3); - hashTypeSet(o, c->argv[2]->ptr, c->argv[3]->ptr, HASH_SET_COPY); - signalModifiedKey(c, c->db, c->argv[1]); - notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); - server.dirty++; - addReply(c, shared.cone); - } -} - -void hsetCommand(client *c) { - int i, created = 0; - robj *o; - - if ((c->argc % 2) == 1) { - addReplyErrorArity(c); - return; - } - - if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; - hashTypeTryConversion(o, c->argv, 2, c->argc - 1); - - for (i = 2; i < c->argc; i += 2) created += !hashTypeSet(o, c->argv[i]->ptr, c->argv[i + 1]->ptr, HASH_SET_COPY); - - signalModifiedKey(c, c->db, c->argv[1]); - notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); - server.dirty += (c->argc - 2) / 2; - - /* HMSET (deprecated) and HSET return value is different. */ - char *cmdname = c->argv[0]->ptr; - if (cmdname[1] == 's' || cmdname[1] == 'S') { - /* HSET */ - addReplyLongLong(c, created); - } else { - /* HMSET */ - addReply(c, shared.ok); - } -} - void hincrbyCommand(client *c) { long long value, incr, oldvalue; robj *o; @@ -866,7 +898,7 @@ void hincrbyCommand(client *c) { } value += incr; new = sdsfromlonglong(value); - hashTypeSet(o, c->argv[2]->ptr, new, HASH_SET_TAKE_VALUE); + hashTypeSet(o, c->argv[2]->ptr, new, EXPIRY_NONE, HASH_SET_TAKE_VALUE); signalModifiedKey(c, c->db, c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH, "hincrby", c->argv[1], c->db->id); server.dirty++; @@ -887,6 +919,7 @@ void hincrbyfloatCommand(client *c) { return; } if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; + if (hashTypeGetValue(o, c->argv[2]->ptr, &vstr, &vlen, &ll) == C_OK) { if (vstr) { if (string2ld((char *)vstr, vlen, &value) == 0) { @@ -909,7 +942,7 @@ void hincrbyfloatCommand(client *c) { char buf[MAX_LONG_DOUBLE_CHARS]; int len = ld2string(buf, sizeof(buf), value, LD_STR_HUMAN); new = sdsnewlen(buf, len); - hashTypeSet(o, c->argv[2]->ptr, new, HASH_SET_TAKE_VALUE); + hashTypeSet(o, c->argv[2]->ptr, new, EXPIRY_NONE, HASH_SET_TAKE_VALUE); signalModifiedKey(c, c->db, c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH, "hincrbyfloat", c->argv[1], c->db->id); server.dirty++; @@ -950,7 +983,6 @@ void hgetCommand(client *c) { robj *o; if ((o = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp])) == NULL || checkType(c, o, OBJ_HASH)) return; - addHashFieldToReply(c, o, c->argv[2]->ptr); } @@ -961,12 +993,16 @@ void hmgetCommand(client *c) { /* Don't abort when the key cannot be found. Non-existing keys are empty * hashes, where HMGET should respond with a series of null bulks. */ o = lookupKeyRead(c->db, c->argv[1]); + if (checkType(c, o, OBJ_HASH)) return; addReplyArrayLen(c, c->argc - 2); for (i = 2; i < c->argc; i++) { addHashFieldToReply(c, o, c->argv[i]->ptr); } + if (o && hashTypeLength(o) == 0) { + dbDelete(c->db, c->argv[1]); + } } void hdelCommand(client *c) { @@ -974,7 +1010,6 @@ void hdelCommand(client *c) { int j, deleted = 0, keyremoved = 0; if ((o = lookupKeyWriteOrReply(c, c->argv[1], shared.czero)) == NULL || checkType(c, o, OBJ_HASH)) return; - for (j = 2; j < c->argc; j++) { if (hashTypeDelete(o, c->argv[j]->ptr)) { deleted++; @@ -1028,10 +1063,275 @@ static void addHashIteratorCursorToReply(writePreparedClient *wpc, hashTypeItera } } +void hsetnxCommand(client *c) { + robj *o; + if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; + if (hashTypeExists(o, c->argv[2]->ptr)) { + addReply(c, shared.czero); + } else { + hashTypeTryConversion(o, c->argv, 2, 3); + hashTypeSet(o, c->argv[2]->ptr, c->argv[3]->ptr, EXPIRY_NONE, HASH_SET_COPY | HASH_SET_KEEP_EXPIRY); + signalModifiedKey(c, c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); + server.dirty++; + addReply(c, shared.cone); + } +} + +void hsetCommand(client *c) { + int i, created = 0; + robj *o; + + if ((c->argc % 2) == 1) { + addReplyErrorArity(c); + return; + } + + if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; + hashTypeTryConversion(o, c->argv, 2, c->argc - 1); + + for (i = 2; i < c->argc; i += 2) created += !hashTypeSet(o, c->argv[i]->ptr, c->argv[i + 1]->ptr, EXPIRY_NONE, HASH_SET_COPY); + + signalModifiedKey(c, c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); + server.dirty += (c->argc - 2) / 2; + + /* HMSET (deprecated) and HSET return value is different. */ + char *cmdname = c->argv[0]->ptr; + if (cmdname[1] == 's' || cmdname[1] == 'S') { + /* HSET */ + addReplyLongLong(c, created); + } else { + /* HMSET */ + addReply(c, shared.ok); + } +} + +void hsetexCommand(client *c) { + robj *o; + robj *expire = NULL; + robj *comparison = NULL; + int unit = UNIT_SECONDS; + int flags = OBJ_NO_FLAGS; + int fields_index = 0; + long long num_fields = 0; + long long when = EXPIRY_NONE; + int i = 0; + int set_flags = HASH_SET_COPY, set_expired = 0; + int changes = 0; + + for (; fields_index < c->argc; fields_index++) { + if (!strcasecmp(c->argv[fields_index]->ptr, "fields")) { + /* checking optional flags */ + if (parseExtendedCommandArgumentsOrReply(c, &flags, &unit, &expire, &comparison, COMMAND_HSET, fields_index++) != C_OK) return; + if (getLongLongFromObjectOrReply(c, c->argv[fields_index++], &num_fields, NULL) != C_OK) return; + break; + } + } + /* Check that the parsed fields number matches the real provided number of fields */ + if (!num_fields || num_fields != (c->argc - fields_index) / 2) { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + + o = lookupKeyWrite(c->db, c->argv[1]); + if (checkType(c, o, OBJ_HASH)) + return; + + /* Check for object existence condition */ + if ((flags & OBJ_SET_NX && o) || (flags & OBJ_SET_XX && !o)) { + addReply(c, shared.czero); + return; + } + + if (o == NULL) { + o = createHashObject(); + dbAdd(c->db, c->argv[1], &o); + } + + /* Handle parsing and calculating the expiration time. */ + if (flags & OBJ_KEEPTTL) + set_flags |= HASH_SET_KEEP_EXPIRY; + else if (expire) { + long long basetime = (flags & (OBJ_EXAT | OBJ_PXAT)) ? 0 : commandTimeSnapshot(); + + if (convertExpireArgumentToUnixTime(c, expire, basetime, unit, &when) == C_ERR) + return; + + if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(when)) { + set_expired = 1; + when = 0; + } + } + + /* Check for all fields condition */ + if (flags & (OBJ_SET_FNX | OBJ_SET_FXX)) { + for (i = fields_index; i < c->argc; i += 2) { + if (((flags & OBJ_SET_FNX) && hashTypeExists(o, c->argv[i]->ptr)) || + ((flags & OBJ_SET_FXX) && !hashTypeExists(o, c->argv[i]->ptr))) { + addReply(c, shared.czero); + return; + } + } + } + + for (i = fields_index; i < c->argc; i += 2) { + if (set_expired) { + changes += hashTypeDelete(o, c->argv[i]->ptr); + } else { + hashTypeSet(o, c->argv[i]->ptr, c->argv[i + 1]->ptr, when, set_flags); + changes++; + } + } + if (expire) { + /* Propagate as HSETEX Key Value PXAT millisecond-timestamp if there is + * EX/PX/EXAT flag. */ + if (!(flags & OBJ_PXAT)) { + for (int i = 2; i < fields_index; i++) { + if (c->argv[i + 1] == expire) { + robj *milliseconds_obj = createStringObjectFromLongLong(when); + rewriteClientCommandArgument(c, i, shared.pxat); + rewriteClientCommandArgument(c, i + 1, milliseconds_obj); + decrRefCount(milliseconds_obj); + break; + } + } + } + notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); + if (set_expired && changes) + notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); + } + signalModifiedKey(c, c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); + server.dirty += changes; + addReplyLongLong(c, changes == num_fields ? 1 : 0); +} + +void hgetexCommand(client *c) { + robj *o; + robj *expire = NULL; + robj *comparison = NULL; + int unit = UNIT_SECONDS; + int flags = OBJ_NO_FLAGS; + int fields_index = 0; + long long num_fields = -1; + long long when = EXPIRY_NONE; + int i = 0; + int set_expiry = 0, set_expired = 0, persist = 0; + int changes = 0; + robj **new_argv = NULL; + robj *milliseconds_obj = NULL, *numitems_obj = NULL; + int new_argc = 0; + int milliseconds_index = -1, numitems_index = -1; + + for (; fields_index < c->argc; fields_index++) { + if (!strcasecmp(c->argv[fields_index]->ptr, "fields")) { + /* checking optional flags */ + if (parseExtendedCommandArgumentsOrReply(c, &flags, &unit, &expire, &comparison, COMMAND_HGET, fields_index++) != C_OK) return; + if (getLongLongFromObjectOrReply(c, c->argv[fields_index++], &num_fields, NULL) != C_OK) return; + break; + } + } + + /* Check that the parsed fields number matches the real provided number of fields */ + if (!num_fields || num_fields != (c->argc - fields_index)) { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + + o = lookupKeyRead(c->db, c->argv[1]); + if (checkType(c, o, OBJ_HASH)) + return; + + if (o == NULL) { + o = createHashObject(); + dbAdd(c->db, c->argv[1], &o); + } + + /* Handle parsing and calculating the expiration time. */ + if (flags & OBJ_PERSIST) { + persist = 1; + } else if (expire) { + long long basetime = (flags & (OBJ_EXAT | OBJ_PXAT)) ? 0 : commandTimeSnapshot(); + + if (convertExpireArgumentToUnixTime(c, expire, basetime, unit, &when) == C_ERR) + return; + + if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(when)) { + set_expired = 1; + when = 0; + } else { + set_expiry = 1; + } + } + + initDeferredReplyBuffer(c); + + addReplyArrayLen(c, num_fields); + /* This command is never propagated as is. It is either propagated as HPEXPIREAT or PERSIST. + * This why it doesn't need special handling in feedAppendOnlyFile to convert relative expire time to absolute one. */ + if (set_expiry || set_expired || persist) { + /* allocate a new client argv for replicating the command. */ + new_argv = zmalloc(sizeof(robj *) * (num_fields + 5)); + if (persist) + new_argv[new_argc++] = shared.hpersist; + else + new_argv[new_argc++] = shared.hpexpireat; + + new_argv[new_argc++] = c->argv[1]; + if (set_expiry || set_expired) { + new_argv[new_argc++] = NULL; // placeholder for the expiration time + milliseconds_index = new_argc - 1; + } + new_argv[new_argc++] = shared.fields; + new_argv[new_argc++] = NULL; // placeholder for the number of objects + numitems_index = new_argc - 1; + } + for (i = fields_index; i < c->argc; i++) { + int changed = 0; + addHashFieldToReply(c, o, c->argv[i]->ptr); + if (set_expired) { + changed = hashTypeDelete(o, c->argv[i]->ptr); + } else if (set_expiry) { + changed = (hashTypeSetExpire(o, c->argv[i]->ptr, when, 0) == 1) ? 1 : 0; + } else if (persist) { + changed = hashTypePersist(o, c->argv[i]->ptr); + } + if (changed) { + changes++; + new_argv[new_argc++] = c->argv[i]; + } + } + if (changes) { + if (set_expiry) { + milliseconds_obj = createStringObjectFromLongLong(when); + new_argv[milliseconds_index] = milliseconds_obj; + } + numitems_obj = createStringObjectFromLongLong(changes); + new_argv[numitems_index] = numitems_obj; + + for (i = 0; i < new_argc; i++) + if (new_argv[i]) + incrRefCount(new_argv[i]); + replaceClientCommandVector(c, new_argc, new_argv); + server.dirty += changes; + signalModifiedKey(c, c->db, c->argv[1]); + if (set_expired) + notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); + notifyKeyspaceEvent(NOTIFY_HASH, set_expiry ? "hexpire" : "hpersist", c->argv[1], c->db->id); + if (milliseconds_obj) decrRefCount(milliseconds_obj); + if (numitems_obj) decrRefCount(numitems_obj); + } else { + if (new_argv) zfree(new_argv); + } + + commitDeferredReplyBuffer(c, 1); +} + void genericHgetallCommand(client *c, int flags) { robj *o; hashTypeIterator hi; - int length, count = 0; + int count = 0; robj *emptyResp = (flags & OBJ_HASH_FIELD && flags & OBJ_HASH_VALUE) ? shared.emptymap[c->resp] : shared.emptyarray; if ((o = lookupKeyReadOrReply(c, c->argv[1], emptyResp)) == NULL || checkType(c, o, OBJ_HASH)) return; @@ -1040,13 +1340,7 @@ void genericHgetallCommand(client *c, int flags) { if (!wpc) return; /* We return a map if the user requested fields and values, like in the * HGETALL case. Otherwise to use a flat array makes more sense. */ - length = hashTypeLength(o); - if (flags & OBJ_HASH_FIELD && flags & OBJ_HASH_VALUE) { - addWritePreparedReplyMapLen(wpc, length); - } else { - addWritePreparedReplyArrayLen(wpc, length); - } - + void *replylen = addReplyDeferredLen(c); hashTypeInitIterator(o, &hi); while (hashTypeNext(&hi) != C_ERR) { if (flags & OBJ_HASH_FIELD) { @@ -1060,10 +1354,13 @@ void genericHgetallCommand(client *c, int flags) { } hashTypeResetIterator(&hi); - /* Make sure we returned the right number of elements. */ - if (flags & OBJ_HASH_FIELD && flags & OBJ_HASH_VALUE) count /= 2; - serverAssert(count == length); + if (flags & OBJ_HASH_FIELD && flags & OBJ_HASH_VALUE) { + setDeferredMapLen(c, replylen, count /= 2); + count /= 2; + } else { + setDeferredArrayLen(c, replylen, count); + } } void hkeysCommand(client *c) { @@ -1081,7 +1378,6 @@ void hgetallCommand(client *c) { void hexistsCommand(client *c) { robj *o; if ((o = lookupKeyReadOrReply(c, c->argv[1], shared.czero)) == NULL || checkType(c, o, OBJ_HASH)) return; - addReply(c, hashTypeExists(o, c->argv[2]->ptr) ? shared.cone : shared.czero); } @@ -1111,6 +1407,173 @@ static void hrandfieldReplyWithListpack(writePreparedClient *wpc, unsigned int c } } + +void hexpireGenericCommand(client *c, long long basetime, int unit) { + robj *key = c->argv[1], *param = c->argv[2]; + long long when; /* unix time in milliseconds when the key will expire. */ + int flag = 0; + int fields_index = 3; + long long num_fields = 0; + int i, result = 0, expired = 0, updated = 0; + + for (; fields_index < c->argc; fields_index++) { + if (!strcasecmp(c->argv[fields_index]->ptr, "fields")) { + /* checking optional flags */ + if (parseExtendedExpireArgumentsOrReply(c, &flag, fields_index++) != C_OK) return; + if (getLongLongFromObjectOrReply(c, c->argv[fields_index++], &num_fields, NULL) != C_OK) return; + break; + } + } + + /* Check that the parsed fields number matches the real provided number of fields */ + if (!num_fields || num_fields != (c->argc - fields_index)) { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + + if (convertExpireArgumentToUnixTime(c, param, basetime, unit, &when) == C_ERR) + return; + + if (checkAlreadyExpired(when)) + when = 0; + + robj *obj = lookupKeyWrite(c->db, key); + + /* Non HASH type return simple error */ + if (checkType(c, obj, OBJ_HASH)) { + return; + } + /* From this point we would return array reply */ + addReplyArrayLen(c, num_fields); + + for (i = 0; i < num_fields; i++) { + if (when == 0) { + result = -2; + if (hashTypeDelete(obj, c->argv[fields_index + i]->ptr)) { + result = 2; + expired++; + } + } else { + result = hashTypeSetExpire(obj, c->argv[fields_index + i]->ptr, when, flag); + updated++; + } + server.dirty += (result > 0 ? 1 : 0); // in case there was a change increment the dirty + addReplyLongLong(c, result); + } + + if (expired || updated) { + /* Propagate as HPEXPIREAT millisecond-timestamp + * Only rewrite the command arg if not already HPEXPIREAT */ + if (c->cmd->proc != hpexpireAtCommand) { + rewriteClientCommandArgument(c, 0, shared.hpexpireat); + } + + /* Avoid creating a string object when it's the same as argv[2] parameter */ + if (basetime != 0 || unit == UNIT_SECONDS) { + robj *when_obj = createStringObjectFromLongLong(when); + rewriteClientCommandArgument(c, 2, when_obj); + decrRefCount(when_obj); + } + if (expired) + notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); + notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); + signalModifiedKey(c, c->db, obj); + } +} + +void hexpireCommand(client *c) { + hexpireGenericCommand(c, commandTimeSnapshot(), UNIT_SECONDS); +} + +void hexpireAtCommand(client *c) { + hexpireGenericCommand(c, 0, UNIT_SECONDS); +} + +void hpexpireCommand(client *c) { + hexpireGenericCommand(c, commandTimeSnapshot(), UNIT_MILLISECONDS); +} + +void hpexpireAtCommand(client *c) { + hexpireGenericCommand(c, 0, UNIT_MILLISECONDS); +} + +void hpersistCommand(client *c) { + int fields_index = 4, result = 0, changes = 0; + long long num_fields = 0; + + if (getLongLongFromObjectOrReply(c, c->argv[fields_index - 1], &num_fields, NULL) != C_OK) return; + + /* Check that the parsed fields number matches the real provided number of fields */ + if (!num_fields || num_fields != (c->argc - fields_index)) { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + + /* From this point we would return array reply */ + addReplyArrayLen(c, num_fields); + + robj *hash = lookupKeyWrite(c->db, c->argv[1]); + + for (int i = 0; i < num_fields; i++, fields_index++) { + result = hashTypePersist(hash, c->argv[fields_index]->ptr); + server.dirty += (result > 0 ? 1 : 0); // in case there was a change increment the dirty + changes += (result > 0 ? 1 : 0); + addReplyLongLong(c, result); + } + if (changes) { + notifyKeyspaceEvent(NOTIFY_HASH, "hpersist", c->argv[1], c->db->id); + signalModifiedKey(c, c->db, hash); + } +} + +void httlGenericCommand(client *c, long long basetime, int unit) { + int fields_index = 4; + long long num_fields = 0, result = -2; + + if (getLongLongFromObjectOrReply(c, c->argv[fields_index - 1], &num_fields, NULL) != C_OK) return; + + /* Check that the parsed fields number matches the real provided number of fields */ + if (!num_fields || num_fields != (c->argc - fields_index)) { + addReplyErrorObject(c, shared.syntaxerr); + return; + } + + robj *hash = lookupKeyRead(c->db, c->argv[1]); + + if (checkType(c, hash, OBJ_HASH)) return; + + /* From this point we would return array reply */ + addReplyArrayLen(c, num_fields); + + for (int i = 0; i < num_fields; i++) { + if (!hash || hashTypeGetExpiry(hash, c->argv[fields_index + i]->ptr, &result) == C_ERR) { + addReplyLongLong(c, -2); + } else if (result == EXPIRY_NONE) { + addReplyLongLong(c, -1); + } else { + result = result - basetime; + if (result < 0) result = 0; + addReplyLongLong(c, unit == UNIT_MILLISECONDS ? result : ((result + 500) / 1000)); + } + } +} + +void httlCommand(client *c) { + httlGenericCommand(c, commandTimeSnapshot(), UNIT_SECONDS); +} + +void hpttlCommand(client *c) { + httlGenericCommand(c, commandTimeSnapshot(), UNIT_MILLISECONDS); +} + +void hexpiretimeCommand(client *c) { + httlGenericCommand(c, 0, UNIT_SECONDS); +} + +void hpexpiretimeCommand(client *c) { + httlGenericCommand(c, 0, UNIT_MILLISECONDS); +} + /* How many times bigger should be the hash compared to the requested size * for us to not use the "remove elements" strategy? Read later in the * implementation for more info. */ @@ -1144,6 +1607,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { writePreparedClient *wpc = prepareClientForFutureWrites(c); if (!wpc) return; + /* CASE 1: The count was negative, so the extraction method is just: * "return N random elements" sampling the whole set every time. * This case is trivial and can be served without auxiliary data @@ -1155,11 +1619,12 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { else addWritePreparedReplyArrayLen(wpc, count); if (hash->encoding == OBJ_ENCODING_HASHTABLE) { - while (count--) { + while (count && hashtableSize(hash->ptr) > 0) { void *entry; hashtableFairRandomEntry(hash->ptr, &entry); - sds field = hashTypeEntryGetField(entry); - sds value = hashTypeEntryGetValue(entry); + count--; + sds field = entryGetField(entry); + sds value = entryGetValue(entry); if (withvalues && c->resp > 2) addWritePreparedReplyArrayLen(wpc, 2); addWritePreparedReplyBulkCBuffer(wpc, field, sdslen(field)); if (withvalues) addWritePreparedReplyBulkCBuffer(wpc, value, sdslen(value)); @@ -1225,7 +1690,6 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { zfree(vals); return; } - /* CASE 3: * The number of elements inside the hash is not greater than * HRANDFIELD_SUB_STRATEGY_MUL times the number of requested elements. @@ -1263,8 +1727,8 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { hashtableInitIterator(&iter, ht, 0); void *next; while (hashtableNext(&iter, &next)) { - sds field = hashTypeEntryGetField(next); - sds value = hashTypeEntryGetValue(next); + sds field = entryGetField(next); + sds value = entryGetValue(next); if (withvalues && c->resp > 2) addWritePreparedReplyArrayLen(wpc, 2); addWritePreparedReplyBulkCBuffer(wpc, field, sdslen(field)); if (withvalues) addWritePreparedReplyBulkCBuffer(wpc, value, sdslen(value)); @@ -1328,6 +1792,7 @@ void hrandfieldCommand(client *c) { } } hrandfieldWithCountCommand(c, l, withvalues); + return; } @@ -1335,7 +1800,6 @@ void hrandfieldCommand(client *c) { if ((hash = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp])) == NULL || checkType(c, hash, OBJ_HASH)) { return; } - hashTypeRandomElement(hash, hashTypeLength(hash), &ele, NULL); hashReplyFromListpackEntry(c, &ele); } diff --git a/src/t_string.c b/src/t_string.c index ef3e4bccde..0450217b48 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -55,6 +55,9 @@ static int checkStringLength(client *c, long long size, long long append) { return C_OK; } +/* Forward declaration */ +static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds); + /* The setGenericCommand() function implements the SET operation with different * options and variants. This function is called in order to implement the * following commands: SET, SETEX, PSETEX, SETNX, GETSET. @@ -70,24 +73,6 @@ static int checkStringLength(client *c, long long size, long long append) { * * If ok_reply is NULL "+OK" is used. * If abort_reply is NULL, "$-1" is used. */ - -#define OBJ_NO_FLAGS 0 -#define OBJ_SET_NX (1 << 0) /* Set if key not exists. */ -#define OBJ_SET_XX (1 << 1) /* Set if key exists. */ -#define OBJ_EX (1 << 2) /* Set if time in seconds is given */ -#define OBJ_PX (1 << 3) /* Set if time in ms in given */ -#define OBJ_KEEPTTL (1 << 4) /* Set and keep the ttl */ -#define OBJ_SET_GET (1 << 5) /* Set if want to get key before set */ -#define OBJ_EXAT (1 << 6) /* Set if timestamp in second is given */ -#define OBJ_PXAT (1 << 7) /* Set if timestamp in ms is given */ -#define OBJ_PERSIST (1 << 8) /* Set if we need to remove the ttl */ -#define OBJ_SET_IFEQ (1 << 9) /* Set if we need compare and set */ -#define OBJ_ARGV3 (1 << 10) /* Set if the value is at argv[3]; otherwise it's \ - * at argv[2]. */ - -/* Forward declaration */ -static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds); - void setGenericCommand(client *c, int flags, robj *key, @@ -240,118 +225,6 @@ static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int return C_OK; } -#define COMMAND_GET 0 -#define COMMAND_SET 1 -/* - * The parseExtendedStringArgumentsOrReply() function performs the common validation for extended - * string arguments used in SET and GET command. - * - * Get specific commands - PERSIST/DEL - * Set specific commands - XX/NX/GET/IFEQ - * Common commands - EX/EXAT/PX/PXAT/KEEPTTL - * - * Function takes pointers to client, flags, unit, pointer to pointer of expire obj if needed - * to be determined and command_type which can be COMMAND_GET or COMMAND_SET. - * - * If there are any syntax violations C_ERR is returned else C_OK is returned. - * - * Input flags are updated upon parsing the arguments. Unit and expire are updated if there are any - * EX/EXAT/PX/PXAT arguments. Unit is updated to millisecond if PX/PXAT is set. - */ -int parseExtendedStringArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, robj **compare_val, int command_type) { - int j = command_type == COMMAND_GET ? 2 : 3; - for (; j < c->argc; j++) { - char *opt = c->argv[j]->ptr; - robj *next = (j == c->argc - 1) ? NULL : c->argv[j + 1]; - - /* clang-format off */ - if ((opt[0] == 'n' || opt[0] == 'N') && - (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_SET_XX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) - { - *flags |= OBJ_SET_NX; - } else if ((opt[0] == 'x' || opt[0] == 'X') && - (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_SET_NX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) - { - *flags |= OBJ_SET_XX; - } else if ((opt[0] == 'i' || opt[0] == 'I') && - (opt[1] == 'f' || opt[1] == 'F') && - (opt[2] == 'e' || opt[2] == 'E') && - (opt[3] == 'q' || opt[3] == 'Q') && opt[4] == '\0' && - next && !(*flags & OBJ_SET_NX || *flags & OBJ_SET_XX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) - { - *flags |= OBJ_SET_IFEQ; - *compare_val = next; - j++; - } else if ((opt[0] == 'g' || opt[0] == 'G') && - (opt[1] == 'e' || opt[1] == 'E') && - (opt[2] == 't' || opt[2] == 'T') && opt[3] == '\0' && - (command_type == COMMAND_SET)) - { - *flags |= OBJ_SET_GET; - } else if (!strcasecmp(opt, "KEEPTTL") && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && (command_type == COMMAND_SET)) - { - *flags |= OBJ_KEEPTTL; - } else if (!strcasecmp(opt,"PERSIST") && (command_type == COMMAND_GET) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && - !(*flags & OBJ_KEEPTTL)) - { - *flags |= OBJ_PERSIST; - } else if ((opt[0] == 'e' || opt[0] == 'E') && - (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EXAT) && !(*flags & OBJ_PX) && - !(*flags & OBJ_PXAT) && next) - { - *flags |= OBJ_EX; - *expire = next; - j++; - } else if ((opt[0] == 'p' || opt[0] == 'P') && - (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PXAT) && next) - { - *flags |= OBJ_PX; - *unit = UNIT_MILLISECONDS; - *expire = next; - j++; - } else if ((opt[0] == 'e' || opt[0] == 'E') && - (opt[1] == 'x' || opt[1] == 'X') && - (opt[2] == 'a' || opt[2] == 'A') && - (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_PX) && - !(*flags & OBJ_PXAT) && next) - { - *flags |= OBJ_EXAT; - *expire = next; - j++; - } else if ((opt[0] == 'p' || opt[0] == 'P') && - (opt[1] == 'x' || opt[1] == 'X') && - (opt[2] == 'a' || opt[2] == 'A') && - (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PX) && next) - { - *flags |= OBJ_PXAT; - *unit = UNIT_MILLISECONDS; - *expire = next; - j++; - } else { - addReplyErrorObject(c,shared.syntaxerr); - return C_ERR; - } - /* clang-format on */ - } - return C_OK; -} - /* SET key value [NX | XX | IFEQ comparison-value] [GET] * [EX seconds | PX milliseconds | * EXAT seconds-timestamp | PXAT milliseconds-timestamp | KEEPTTL] */ @@ -361,7 +234,7 @@ void setCommand(client *c) { int unit = UNIT_SECONDS; int flags = OBJ_NO_FLAGS; - if (parseExtendedStringArgumentsOrReply(c, &flags, &unit, &expire, &comparison, COMMAND_SET) != C_OK) { + if (parseExtendedCommandArgumentsOrReply(c, &flags, &unit, &expire, &comparison, COMMAND_SET, c->argc) != C_OK) { return; } @@ -447,7 +320,7 @@ void getexCommand(client *c) { int unit = UNIT_SECONDS; int flags = OBJ_NO_FLAGS; - if (parseExtendedStringArgumentsOrReply(c, &flags, &unit, &expire, NULL, COMMAND_GET) != C_OK) { + if (parseExtendedCommandArgumentsOrReply(c, &flags, &unit, &expire, NULL, COMMAND_GET, c->argc) != C_OK) { return; } diff --git a/src/util.h b/src/util.h index 9fe912ad5c..787b79dd11 100644 --- a/src/util.h +++ b/src/util.h @@ -33,6 +33,12 @@ #include #include "sds.h" +/* min/max */ +#undef min +#undef max +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) + /* The maximum number of characters needed to represent a long double * as a string (long double has a huge range of some 4952 chars, see LDBL_MAX). * This should be the size of the buffer given to ld2string */ diff --git a/src/valkey-check-rdb.c b/src/valkey-check-rdb.c index fbb7a31ed0..efe2d674d3 100644 --- a/src/valkey-check-rdb.c +++ b/src/valkey-check-rdb.c @@ -146,8 +146,11 @@ char *rdb_type_string[] = { "stream-v2", "set-listpack", "stream-v3", + "hash-volatile-items", }; +static_assert(sizeof(rdb_type_string) / sizeof(rdb_type_string[0]) == RDB_TYPE_LAST, "Mismatch between enum and string table"); + char *type_name[OBJ_TYPE_MAX] = {"string", "list", "set", "zset", "hash", "module", /* module type is special */ "stream"}; diff --git a/src/volatile_set.c b/src/volatile_set.c new file mode 100644 index 0000000000..718cfecddf --- /dev/null +++ b/src/volatile_set.c @@ -0,0 +1,79 @@ +#include +#include "volatile_set.h" +#include "zmalloc.h" +#include "config.h" +#include "endianconv.h" +#include "serverassert.h" + +#define EXPIRY_HASH_SIZE 16 +volatile_set *createVolatileSet(volatileEntryType *type) { + volatile_set *set = zmalloc(sizeof(volatile_set)); + set->etypr = type; + set->expiry_buckets = raxNew(); + return set; +} + +void freeVolatileSet(volatile_set *b) { + raxFree(b->expiry_buckets); + zfree(b); +} + +int volatileSetAddEntry(volatile_set *set, void *entry, long long expiry) { + unsigned char buf[EXPIRY_HASH_SIZE]; + expiry = htonu64(expiry); + memcpy(buf, &expiry, sizeof(expiry)); + memcpy(buf + 8, &entry, sizeof(entry)); + if (sizeof(entry) == 4) memset(buf + 12, 0, 4); /* Zero padding for 32bit target. */ + return raxTryInsert(set->expiry_buckets, buf, sizeof(buf), NULL, NULL); +} + +int volatileSetRemoveEntry(volatile_set *set, void *entry, long long expiry) { + unsigned char buf[EXPIRY_HASH_SIZE]; + expiry = htonu64(expiry); + memcpy(buf, &expiry, sizeof(expiry)); + memcpy(buf + 8, &entry, sizeof(entry)); + if (sizeof(entry) == 4) memset(buf + 12, 0, 4); /* Zero padding for 32bit target. */ + return raxRemove(set->expiry_buckets, buf, sizeof(buf), NULL); +} + +int volatileSetUpdateEntry(volatile_set *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + if (old_entry == new_entry && old_expiry == new_expiry) return 1; + + if (old_entry && old_expiry != -1) { + assert(volatileSetRemoveEntry(set, old_entry, old_expiry)); + } + if (new_entry && new_expiry != -1) { + assert(volatileSetAddEntry(set, new_entry, new_expiry)); + } + return 1; +} + +int volatileSetExpireEntry(volatile_set *set, void *entry) { + volatileSetRemoveEntry(set, entry, set->etypr->getExpiry(entry)); + if (set->etypr->expire) { + set->etypr->expire(entry); + return 1; + } + return 0; +} + +size_t volatileSetNumEntries(volatile_set *set) { + assert(set && set->expiry_buckets); + return set->expiry_buckets->numele; +} + +void volatileSetStart(volatile_set *set, volatileSetIterator *it) { + raxStart(&it->bucket, set->expiry_buckets); +} + +int volatileSetNext(volatileSetIterator *it, void **entryptr) { + if (raxNext(&it->bucket)) { + assert(it->bucket.key_len != EXPIRY_HASH_SIZE); + memcpy(it->bucket.key + 8, entryptr, sizeof(*entryptr)); + return 1; + } + return 0; +} +void volatileSetReset(volatileSetIterator *it) { + raxStop(&it->bucket); +} diff --git a/src/volatile_set.h b/src/volatile_set.h new file mode 100644 index 0000000000..37dc7c9923 --- /dev/null +++ b/src/volatile_set.h @@ -0,0 +1,40 @@ +#ifndef VOLATILESET_H +#define VOLATILESET_H + +#include +#include "rax.h" +#include "sds.h" + +typedef struct { + sds (*entryGetKey)(const void *entry); + + long long (*getExpiry)(const void *entry); + + int (*expire)(void *entry); + +} volatileEntryType; + + +typedef struct { + volatileEntryType *etypr; + rax *expiry_buckets; +} volatile_set; + +typedef struct volatileSetIterator { + raxIterator bucket; +} volatileSetIterator; + + +int volatileSetRemoveEntry(volatile_set *set, void *entry, long long expiry); +int volatileSetAddEntry(volatile_set *set, void *entry, long long expiry); +int volatileSetExpireEntry(volatile_set *set, void *entry); +int volatileSetUpdateEntry(volatile_set *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); +size_t volatileSetNumEntries(volatile_set *set); +void volatileSetStart(volatile_set *set, volatileSetIterator *it); +int volatileSetNext(volatileSetIterator *it, void **entryptr); +void volatileSetReset(volatileSetIterator *it); + +void freeVolatileSet(volatile_set *b); +volatile_set *createVolatileSet(volatileEntryType *type); + +#endif diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl new file mode 100644 index 0000000000..5f8416b619 --- /dev/null +++ b/tests/unit/hashexpire.tcl @@ -0,0 +1,1104 @@ + +proc info_field {info field} { +foreach line [split $info "\n"] { + if {[string match "$field:*" $line]} { + return [string trim [lindex [split $line ":"] 1]] + } +} +return "" +} + +start_server {tags {"hashexpire external:skip"}} { + + test {HSETEX KEEPTTL - preserves existing TTL of field} { + r FLUSHALL + + # Set a field with a known TTL + r HSETEX myhash PX 1000 FIELDS 1 field1 val1 + set original_pttl [r HPTTL myhash FIELDS 1 field1] + set original_expiretime [r HEXPIRETIME myhash FIELDS 1 field1] + + # Validate TTL is active and expiretime is in the future + assert {$original_pttl > 0} + assert {$original_expiretime > [clock seconds]} + + # Overwrite the field with KEEPTTL + r HSETEX myhash KEEPTTL FIELDS 1 field1 newval + + # Ensure TTL is preserved + set updated_pttl [r HPTTL myhash FIELDS 1 field1] + set updated_expiretime [r HEXPIRETIME myhash FIELDS 1 field1] + assert {$updated_pttl > 0} + assert {$updated_pttl <= $original_pttl} + assert_equal $original_expiretime $updated_expiretime + + # Ensure value was updated + assert_equal newval [r HGET myhash field1] + } + + test {HSETEX EX - FIELDS 0 returns error} { + r FLUSHALL + catch {r HSETEX myhash EX 10 FIELDS 0} e + set e + } {ERR *} + + test {HSETEX EX - test negative ttl} { + set ttl -10 + catch {r HSETEX myhash EX $ttl FIELDS 1 field1 val1} e + set e + } {ERR invalid expire time in 'hsetex' command} + + test {HSETEX EX - test non-numeric ttl} { + set ttl abc + catch {r HSETEX myhash EX $ttl FIELDS 1 field1 val1} e + set e + } {ERR value is not an integer or out of range} + + test {HSETEX EX - overwrite field resets TTL} { + r FLUSHALL + r HSETEX myhash EX 100 FIELDS 1 field1 val1 + r HSETEX myhash EX 200 FIELDS 1 field1 newval + assert_equal 200 [r HTTL myhash FIELDS 1 field1] + assert_equal newval [r HGET myhash field1] + } + + test {HSETEX EX - test zero ttl expires immediately} { + r FLUSHALL + r HSETEX myhash EX 0 FIELDS 1 field1 val1 + after 10 + assert_equal 0 [r HEXISTS myhash field1] + } + + test {HSETEX EX - test mix of expiring and persistent fields} { + r FLUSHALL + r HSET myhash field2 "persistent" + r HSETEX myhash EX 1 FIELDS 1 field1 "temp" + after 1100 + assert_equal 0 [r HEXISTS myhash field1] + assert_equal 1 [r HEXISTS myhash field2] + } + + test {HSETEX EX - test missing TTL} { + catch {r HSETEX myhash EX FIELDS 1 field1 val1} e + set e + } {ERR syntax error} + + test {HSETEX EX - mismatched field/value count} { + catch {r HSETEX myhash EX 10 FIELDS 2 field1 val1} e + set e + } {ERR *} + + + +###### PX ####### + + test {HSETEX PX - test negative ttl} { + set ttl -50 + catch {r HSETEX myhash PX $ttl FIELDS 1 field1 val1} e + set e + } {ERR invalid expire time in 'hsetex' command} + + test {HSETEX PX - test non-numeric ttl} { + set ttl xyz + catch {r HSETEX myhash PX $ttl FIELDS 1 field1 val1} e + set e + } {ERR value is not an integer or out of range} + + test {HSETEX PX - overwrite field resets TTL} { + r FLUSHALL + r HSETEX myhash PX 10000 FIELDS 1 field1 val1 + r HSETEX myhash PX 20000 FIELDS 1 field1 newval + set ttl [r HPTTL myhash FIELDS 1 field1] + assert {$ttl >= 19000 && $ttl <= 20000} + assert_equal newval [r HGET myhash field1] + } + + test {HSETEX PX - test zero ttl expires immediately} { + r FLUSHALL + r HSETEX myhash PX 0 FIELDS 1 field1 val1 + after 10 + assert_equal 0 [r HEXISTS myhash field1] + } + + test {HSETEX PX - test mix of expiring and persistent fields} { + r FLUSHALL + r HSET myhash field2 "persistent" + r HSETEX myhash PX 10 FIELDS 1 field1 "temp" + after 20 + assert_equal 0 [r HEXISTS myhash field1] + assert_equal 1 [r HEXISTS myhash field2] + } + + test {HSETEX PX - test missing TTL} { + catch {r HSETEX myhash PX FIELDS 1 field1 val1} e + set e + } {ERR syntax error} + + # test {HSETEX PX - mismatched field/value count} { + # catch {r HSETEX myhash PX 100 FIELDS 2 field1 val1} e + # set e + # } {ERR wrong number of arguments for 'hsetex' command} + + + ## FNX/FXX + +# hsetex throws ERR syntax error, it shouldn't + test {HSETEX EX FNX - set only if none of the fields exist} { + r FLUSHALL + r HSET myhash field1 val1 + set res [r HSETEX myhash EX 10 FNX FIELDS 1 field1 val2] + assert_equal 0 $res + assert_equal val1 [r HGET myhash field1] + + # Now try with all-new fields + set res [r HSETEX myhash EX 10 FNX FIELDS 2 f2 v2 f3 v3] + assert_equal 1 $res + assert_equal v2 [r HGET myhash f2] + assert_equal v3 [r HGET myhash f3] + } + + test {HSETEX EX FXX - set only if all fields exist} { + r FLUSHALL + r HSET myhash field1 val1 field2 val2 + set res [r HSETEX myhash EX 10 FXX FIELDS 2 field1 new1 field2 new2] + assert_equal 1 $res + assert_equal new1 [r HGET myhash field1] + assert_equal new2 [r HGET myhash field2] + + # Now try when one field doesn't exist + set res [r HSETEX myhash EX 10 FXX FIELDS 2 field1 x fieldX y] + assert_equal 0 $res + assert_equal new1 [r HGET myhash field1] + assert_equal 0 [r HEXISTS myhash fieldX] + } + +# Syntax error: HSETEX myhash PX 100 FNX FIELDS 2 x 2 y 3 + test {HSETEX PX FNX - partial conflict returns 0} { + r FLUSHALL + r HSET myhash x 1 + set res [r HSETEX myhash PX 100 FNX FIELDS 2 x 2 y 3] + assert_equal 0 $res + assert_equal 1 [r HEXISTS myhash x] + assert_equal 0 [r HEXISTS myhash y] + } + + test {HSETEX PX FXX - one field missing returns 0} { + r FLUSHALL + r HSET myhash a 1 + set res [r HSETEX myhash PX 100 FXX FIELDS 2 a 2 b 3] + assert_equal 0 $res + assert_equal 1 [r HGET myhash a] + assert_equal 0 [r HEXISTS myhash b] + } + + test {HSETEX EX - FNX and FXX conflict error} { + catch {r HSETEX myhash EX 10 FNX FXX FIELDS 1 x y} e + set e + } {ERR syntax error} + + #################### Lazy Expiry ######################## + + test {HGETALL skips expired fields without triggering lazy expiry} { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + + # Set two fields: one persistent, one with short TTL + r HSET myhash persistent "val1" + r HSETEX myhash PX 5 FIELDS 1 expiring "val2" + + # Wait for expiry to pass + after 10 + + # HGETALL should skip expired field + set result [r HGETALL myhash] + assert_equal {persistent val1} $result + + # HLEN should still count both fields (expired field not removed) + assert_equal 2 [r HLEN myhash] + + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } + + test {HSCAN skips expired fields} { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + + # Set multiple fields, one with expiry + r HSET myhash persistent1 "a" persistent2 "b" + r HSETEX myhash PX 5 FIELDS 1 expiring "c" + + # Wait for expiration + after 10 + + # HSCAN must not return the expired field + set cursor 0 + set allfields {} + while {1} { + set res [r HSCAN myhash $cursor] + set cursor [lindex $res 0] + set kvs [lindex $res 1] + lappend allfields {*}$kvs + if {$cursor eq "0"} break + } + + # Extract just the field names + set fieldnames [lmap {k v} $allfields { set k }] + set fieldnames_sorted [lsort $fieldnames] + + # Should only include persistent1 and persistent2 + assert_equal {persistent1 persistent2} $fieldnames_sorted + + # Re-enable active expiry for future tests + r DEBUG SET-ACTIVE-EXPIRE yes + } + + test {MOVE preserves field TTLs} { + r FLUSHALL + r SELECT 0 + r HSETEX myhash PX 50000 FIELDS 1 field1 val1 + + # Capture original TTL + set original_ttl [r HPTTL myhash FIELDS 1 field1] + assert {$original_ttl > 0} + + # Move to DB 1 + assert_equal 1 [r MOVE myhash 1] + + # Switch to target DB + r SELECT 1 + + # Field must exist and TTL must be preserved + set moved_ttl [r HPTTL myhash FIELDS 1 field1] + assert {$moved_ttl > 0 && $moved_ttl <= $original_ttl} + } + +test {HSET - overwrite lazily expired field without TTL clears expiration} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired (but not yet lazily deleted), + # and it is overwritten using a plain HSET (i.e., no TTL), + # Valkey treats the field as non existing and updates it, + # effectively clearing the old TTL and making the field persistent. + + r HSETEX myhash PX 10 FIELDS 1 field1 oldval + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + + # Field should still be present in memory due to lazy expiry + assert_equal 1 [r HLEN myhash] + + # Overwrite with HSET (no TTL) before accessing + r HSET myhash field1 newval + + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal newval [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + r debug SET-ACTIVE-EXPIRE yes +} + +test {HINCRBY - on expired field} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired, + # and it is overwritten using a plain HINCRBY (i.e., no TTL), + # Valkey treats the field as still existing and updates it, + # effectively clearing the old TTL and starting the value from 0. + + r HSETEX myhash PX 10 FIELDS 1 field1 1 + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + + # Field should still be present in memory due to lazy expiry + assert_equal 1 [r HLEN myhash] + + # Overwrite with HINCRBY (no TTL) before accessing + r HINCRBY myhash field1 1 + + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal 1 [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + r debug SET-ACTIVE-EXPIRE yes +} + +test {HSET - overwrite unexpired field removes TTL} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that overwriting a field with HSET, + # even while its TTL is still valid (not expired), + # clears the TTL and makes the field persistent. + # This behavior is consistent with how HSET works for normal keys. + + # Set field with long TTL + r HSETEX myhash PX 1000 FIELDS 1 field1 val1 + + # Confirm TTL is active + set before [r HPTTL myhash FIELDS 1 field1] + assert {$before > 0} + + # Overwrite with HSET before TTL expires + r HSET myhash field1 newval + + # TTL should now be gone + set after [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $after + assert_equal newval [r HGET myhash field1] + + r debug SET-ACTIVE-EXPIRE yes +} + +test {HDEL - lazily expired field is removed without triggering expiry logic} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test proves that deleting an expired field with HDEL + # does NOT trigger Valkey's expiration mechanism. + # + # The key observation is that Valkey tracks how many fields were + # expired via TTL using the `expired_subkeys` counter in INFO stats. + # If HDEL caused expiration to be processed internally, + # this counter would increment. We assert that it remains unchanged. + + # Capture expired_subkeys before + set before_info [r INFO stats] + set before [info_field $before_info expired_subkeys] + + # Create field with short TTL + r HSETEX myhash PX 10 FIELDS 1 field1 val1 + after 20 + + # Field is technically expired, but still in-memory due to lazy expiry + assert_equal 1 [r HLEN myhash] + + # Delete the expired field directly + r HDEL myhash field1 + + # Field should be gone + assert_equal 0 [r HEXISTS myhash field1] + + # Capture expired_subkeys again + set after_info [r INFO stats] + set after [info_field $after_info expired_subkeys] + + # Verify that no expiry occurred internally + assert_equal $before $after + + r debug SET-ACTIVE-EXPIRE yes +} + +###### Test EXPIRE ############# + + + # Basic Expiry Functionality + test {HEXPIRE - set TTL on existing field} { + r FLUSHALL + r HSET myhash field1 hello + r HEXPIRE myhash 10 FIELDS 1 field1 + set ttl [r HTTL myhash FIELDS 1 field1] + assert {$ttl > 0} + } + + test {HEXPIRE - TTL 0 deletes field} { + r FLUSHALL + r HSET myhash field1 goodbye + set res [r HEXPIRE myhash 0 FIELDS 1 field1] + assert_equal {2} $res + assert_equal 0 [r HEXISTS myhash field1] + } + + test {HEXPIRE - negative TTL returns error} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIRE myhash -5 FIELDS 1 field1} e + set e + } {ERR invalid expire time in 'hexpire' command} + + test {HEXPIRE - wrong type key returns error} { + r FLUSHALL + r SET myhash notahash + catch {r HEXPIRE myhash 10 FIELDS 1 field1} e + set e + } {WRONGTYPE Operation against a key holding the wrong kind of value} + + # Conditionals: NX + test {HEXPIRE NX - only set when field has no TTL} { + r FLUSHALL + r HSETEX myhash PX 100 FIELDS 1 field1 val + set res [r HEXPIRE myhash 10 NX FIELDS 1 field1] + assert_equal {0} $res + + r HSET myhash field2 val2 + set res2 [r HEXPIRE myhash 10 NX FIELDS 1 field2] + assert_equal {1} $res2 + } + + # Conditionals: XX + test {HEXPIRE XX - only set when field has TTL} { + r FLUSHALL + r HSET myhash field1 val1 field2 val2 + r HEXPIRE myhash 20 FIELDS 1 field1 + set res [r HEXPIRE myhash 30 XX FIELDS 2 field1 field2] + assert_equal {1 0} $res + } + + # Conditionals: GT + test {HEXPIRE GT - only set if new TTL > existing TTL} { + r FLUSHALL + r HSETEX myhash EX 300 FIELDS 1 field1 val1 + after 10 + set res [r HEXPIRE myhash 600 GT FIELDS 1 field1] ;# 600s > 300s remaining + assert_equal {1} $res + + # GT should fail if field is persistent + r HSET myhash field2 val2 + set res2 [r HEXPIRE myhash 1 GT FIELDS 1 field2] + assert_equal {0} $res2 + } + + # Conditionals: LT + test {HEXPIRE LT - only set if new TTL < existing TTL} { + r FLUSHALL + r HSETEX myhash EX 600 FIELDS 1 field1 val1 + set res [r HEXPIRE myhash 1 LT FIELDS 1 field1] + assert_equal {1} $res + + ## TODO this is an expected behavior really? what does non existintg ttl mean? + r HSET myhash field2 val2 + set res2 [r HEXPIRE myhash 1 LT FIELDS 1 field2] + assert_equal {1} $res2 + } + + # TTL Refresh + test {HEXPIRE - refresh TTL with new value} { + r FLUSHALL + r HSET myhash field1 val1 + r HEXPIRE myhash 1 FIELDS 1 field1 + after 500 + r HEXPIRE myhash 3 FIELDS 1 field1 + set ttl [r HTTL myhash FIELDS 1 field1] + assert {$ttl >= 2} + } + + # Error Cases + test {HEXPIRE - conflicting conditions error} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIRE myhash 10 NX XX FIELDS 1 field1} e + set e + } {ERR NX and XX, GT or LT options at the same time are not compatible} + + test {HEXPIRE - missing FIELDS error} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIRE myhash 10} e + set e + } {ERR wrong number of arguments for 'hexpire' command} + + test {HEXPIRE - no fields after FIELDS keyword} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIRE myhash 10 FIELDS 0} e + set e + } {ERR wrong number of arguments for 'hexpire' command} + + test {HEXPIRE - non-integer TTL error} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIRE myhash abc FIELDS 1 field1} e + set e + } {ERR value is not an integer or out of range} + + test {HEXPIRE - non-existing key returns -2} { + r FLUSHALL + set res [r HEXPIRE nokey 10 FIELDS 1 field1] + assert_equal {-2} $res + } + + test {HEXPIRE EX - set TTL on multiple fields} { + r FLUSHALL + r HSET myhash fieldA valA fieldB valB + set ttl 100 + r HEXPIRE myhash $ttl FIELDS 2 fieldA fieldB + + set ttlA [r HTTL myhash FIELDS 1 fieldA] + set ttlB [r HTTL myhash FIELDS 1 fieldB] + + assert { $ttlA > 0 && $ttlA <= $ttl } + assert { $ttlB > 0 && $ttlB <= $ttl } + } {} + + test {HEXPIRE returns -2 on non-existing key} { + r FLUSHALL + assert_equal {-2 -2} [r HEXPIRE nokey 10 FIELDS 2 field1 field2] + } {} + + + ##### HTTL ##### + test {HTTL - persistent field returns -1} { + r FLUSHALL + r HSET myhash field1 val1 + assert_equal -1 [r HTTL myhash FIELDS 1 field1] + } {} + + test {HTTL - non-existent field returns -2} { + r FLUSHALL + r HSET myhash field1 val1 + assert_equal -2 [r HTTL myhash FIELDS 1 nofield] + } {} + + test {HTTL - non-existent key returns -2} { + r FLUSHALL + assert_equal -2 [r HTTL nokey FIELDS 1 field1] + } {} + + ##### EXPIRETIME ###### + + # Basic Expiry Functionality + test {HEXPIREAT - set absolute expiry on field} { + r FLUSHALL + r HSET myhash field1 hello + set now [clock seconds] + set exp [expr {$now + 30}] + r HEXPIREAT myhash $exp FIELDS 1 field1 + set etime [r HEXPIRETIME myhash FIELDS 1 field1] + assert_equal $exp $etime + } + + test {HEXPIREAT - timestamp in past deletes field immediately} { + r FLUSHALL + r HSET myhash field1 gone + set past [expr {[clock seconds] - 1000}] + set res [r HEXPIREAT myhash $past FIELDS 1 field1] + assert_equal {2} $res + assert_equal 0 [r HEXISTS myhash field1] + } + + + test {HEXPIREAT - set TTL on multiple fields (existing + non-existing)} { + r FLUSHALL + r HSET myhash field1 hello field2 world + set exp [expr {[clock seconds] + 10}] + set res [r HEXPIREAT myhash $exp FIELDS 3 field1 field2 fieldX] + assert_equal {1 1 -2} $res + } + + + # Conditionals: NX + test {HEXPIREAT NX - only set when field has no TTL} { + r FLUSHALL + r HSETEX myhash EX 100 FIELDS 1 field1 val + set exp [expr {[clock seconds] + 100}] + set res [r HEXPIREAT myhash $exp NX FIELDS 1 field1] + assert_equal {0} $res + + r HSET myhash field2 val2 + set res2 [r HEXPIREAT myhash $exp NX FIELDS 1 field2] + assert_equal {1} $res2 + } + + # Conditionals: XX + test {HEXPIREAT XX - only set when field has TTL} { + r FLUSHALL + r HSET myhash field1 val1 field2 val2 + set exp1 [expr {[clock seconds] + 20}] + r HEXPIREAT myhash $exp1 FIELDS 1 field1 + set exp2 [expr {[clock seconds] + 30}] + set res [r HEXPIREAT myhash $exp2 XX FIELDS 2 field1 field2] + assert_equal {1 0} $res + } + + # Conditionals: GT + test {HEXPIREAT GT - only set if new expiry > existing} { + r FLUSHALL + r HSETEX myhash PX 5000 FIELDS 1 field1 val1 + after 10 + set now [clock seconds] + set future [expr {$now + 10}] + set res [r HEXPIREAT myhash $future GT FIELDS 1 field1] + assert_equal {1} $res + + r HSET myhash field2 val2 + set res2 [r HEXPIREAT myhash $future GT FIELDS 1 field2] + assert_equal {0} $res2 + } + + + # Conditionals: LT + test {HEXPIREAT LT - only set if new expiry < existing} { + r FLUSHALL + set now [clock seconds] + # now + 20K seconds + set long_future_expiration [expr {$now + 20000}] + # now + 1K seconds + set short_future_expiration [expr {$now + 1000}] + r HSETEX myhash EX $long_future_expiration FIELDS 1 field1 val1 + assert_equal {1} [r HEXPIREAT myhash $short_future_expiration LT FIELDS 1 field1] + + r HSET myhash field2 val2 + assert_equal {1} [r HEXPIREAT myhash $short_future_expiration LT FIELDS 1 field2] + # TODO is this the expected behavior? if no TTL exist, it should be treated as minimum ttl possible? + } + + test {HEXPIREAT - refresh TTL with new future timestamp} { + r FLUSHALL + r HSET myhash field1 val1 + + # Set initial expiry to very near future + set ts1 [expr {[clock seconds] + 10}] + r HEXPIREAT myhash $ts1 FIELDS 1 field1 + + # Immediately refresh to a further expiry (no sleep needed) + set ts2 [expr {$ts1 + 5}] + r HEXPIREAT myhash $ts2 FIELDS 1 field1 + + # Confirm that expiry was updated + set actual [r HEXPIRETIME myhash FIELDS 1 field1] + assert_equal $ts2 $actual + } + + + # TTL Validations + test {HEXPIREAT - TTL is accurate via HEXPIRETIME} { + r FLUSHALL + r HSET myhash field1 val1 + set ts [expr {[clock seconds] + 50}] + r HEXPIREAT myhash $ts FIELDS 1 field1 + set returned [r HEXPIRETIME myhash FIELDS 1 field1] + assert_equal $ts $returned + } + + # Error Cases + test {HEXPIREAT - conflicting options error} { + r FLUSHALL + r HSET myhash field1 val + set ts [expr {[clock seconds] + 5}] + catch {r HEXPIREAT myhash $ts NX XX FIELDS 1 field1} e + set e + } {ERR NX and XX, GT or LT options at the same time are not compatible} + + + + test {HEXPIREAT - missing FIELDS keyword} { + r FLUSHALL + r HSET myhash field1 val + set ts [expr {[clock seconds] + 5}] + catch {r HEXPIREAT myhash $ts} e + set e + } {ERR wrong number of arguments for 'hexpireat' command} + + test {HEXPIREAT - no fields after FIELDS} { + r FLUSHALL + r HSET myhash field1 val + set ts [expr {[clock seconds] + 5}] + catch {r HEXPIREAT myhash $ts FIELDS 0} e + set e + } {ERR wrong number of arguments for 'hexpireat' command} + + test {HEXPIREAT - non-integer timestamp} { + r FLUSHALL + r HSET myhash field1 val + catch {r HEXPIREAT myhash tomorrow FIELDS 1 field1} e + set e + } {ERR value is not an integer or out of range} + + + + test {HEXPIREAT - non-existing key returns -2} { + r FLUSHALL + set ts [expr {[clock seconds] + 5}] + set res [r HEXPIREAT nokey $ts FIELDS 1 field1] + assert_equal {-2} $res + } + + #################### HEXPIRETIME ################## + + # Basic TTL retrieval + test {HEXPIRETIME - returns expiry timestamp for single field with TTL} { + r FLUSHALL + r HSET myhash field1 val + set ts [expr {[clock seconds] + 3}] + r HEXPIREAT myhash $ts FIELDS 1 field1 + set out [r HEXPIRETIME myhash FIELDS 1 field1] + assert_equal $ts $out + } + + + # No expiration set + test {HEXPIRETIME - field has no TTL returns -1} { + r FLUSHALL + r HSET myhash field1 val + set out [r HEXPIRETIME myhash FIELDS 1 field1] + assert_equal -1 $out + } + + # Non-existent field + test {HEXPIRETIME - field does not exist returns -2} { + r FLUSHALL + r HSET myhash field1 val + set out [r HEXPIRETIME myhash FIELDS 1 fieldX] + assert_equal -2 $out + } + + # Non-existent key + test {HEXPIRETIME - key does not exist returns -2} { + r FLUSHALL + set out [r HEXPIRETIME missingkey FIELDS 1 field1] + assert_equal -2 $out + } + + # Multiple fields: mix of TTL, no TTL, and missing + test {HEXPIRETIME - multiple fields mixed cases} { + r FLUSHALL + r HSET myhash f1 a f2 b + set now [clock seconds] + r HEXPIREAT myhash [expr {$now + 100}] FIELDS 1 f1 + set out [r HEXPIRETIME myhash FIELDS 3 f1 f2 f3] + # Should return: expiry for f1, -1 for f2 (no TTL), -2 for f3 (not found) + assert_equal [list [expr {$now + 100}] -1 -2] $out + } + + # Invalid usages + test {HEXPIRETIME - no FIELDS keyword} { + r FLUSHALL + r HSET myhash f1 a + catch {r HEXPIRETIME myhash} e + set e + } {ERR wrong number of arguments for 'hexpiretime' command} + + test {HEXPIRETIME - FIELDS 0} { + r FLUSHALL + r HSET myhash f1 a + catch {r HEXPIRETIME myhash FIELDS 0} e + set e + } {ERR wrong number of arguments for 'hexpiretime' command} + + test {HEXPIRETIME - wrong FIELDS count} { + r FLUSHALL + r HSET myhash f1 a + catch {r HEXPIRETIME myhash FIELDS 1} e + set e + } {ERR wrong number of arguments for 'hexpiretime' command} + + test {HEXPIRETIME - wrong type key} { + r FLUSHALL + r SET myhash "not a hash" + catch {r HEXPIRETIME myhash FIELDS 1 f1} e + set e + } {WRONGTYPE Operation against a key holding the wrong kind of value} + + + # Basic expiration in milliseconds + test {HPEXPIREAT - set absolute expiry with ms precision} { + r FLUSHALL + r HSET myhash field1 val + set now [clock milliseconds] + set future [expr {$now + 123456789}] + r HPEXPIREAT myhash $future FIELDS 1 field1 + set t [r HPEXPIRETIME myhash FIELDS 1 field1] + assert_equal $future $t + } + + test {HPEXPIREAT - past timestamp deletes field immediately} { + r FLUSHALL + r HSET myhash field1 val + set past [expr {[clock milliseconds] - 10000}] + set res [r HPEXPIREAT myhash $past FIELDS 1 field1] + assert_equal {2} $res + assert_equal 0 [r HEXISTS myhash field1] + } + + test {HPEXPIREAT - non-existent key returns -2} { + r FLUSHALL + set ts [expr {[clock milliseconds] + 1000}] + set res [r HPEXPIREAT nokey $ts FIELDS 1 field1] + assert_equal {-2} $res + } + + test {HPEXPIREAT - mixed fields} { + r FLUSHALL + r HSET myhash f1 a f2 b + set ts [expr {[clock milliseconds] + 200000}] + set res [r HPEXPIREAT myhash $ts FIELDS 3 f1 f2 fX] + assert_equal {1 1 -2} $res + } + + test {HPEXPIREAT - GT and LT options with success and failure cases} { + r FLUSHALL + r HSET myhash f1 a + + # Setup: assign a baseline expiry time + set now [clock milliseconds] + set ts1 [expr {$now + 10000}] + set ts2 [expr {$now + 20000}] + r HPEXPIREAT myhash $ts1 FIELDS 1 f1 + + # --- GT Case --- + # ts2 > ts1 → should succeed + set res_gt_pass [r HPEXPIREAT myhash $ts2 GT FIELDS 1 f1] + assert_equal {1} $res_gt_pass + + # ts1 < ts2 → now try GT with ts1 again (should fail because ts2 is already set) + set res_gt_fail [r HPEXPIREAT myhash $ts1 GT FIELDS 1 f1] + assert_equal {0} $res_gt_fail + + # --- LT Case --- + # ts1 < ts2 → LT should fail + set res_lt_fail [r HPEXPIREAT myhash $ts2 LT FIELDS 1 f1] + assert_equal {0} $res_lt_fail + + # ts1 < ts2 → try LT with earlier timestamp, should succeed + set ts0 [expr {$now + 5000}] + set res_lt_pass [r HPEXPIREAT myhash $ts0 LT FIELDS 1 f1] + assert_equal {1} $res_lt_pass + } + + test {HPEXPIREAT - invalid inputs} { + r FLUSHALL + r HSET myhash f1 a + catch {r HPEXPIREAT myhash abc FIELDS 1 f1} e + assert_match {*not an integer*} $e + + catch {r HPEXPIREAT myhash 12345 NX XX FIELDS 1 f1} e2 + assert_match {ERR NX and XX, GT or LT options at the same time are not compatible} $e2 + } + + + test {HPEXPIRETIME - check with multiple fields} { + r FLUSHALL + + # Setup: one expiring field, one persistent, one missing + r HSET myhash f1 v1 f2 v2 + set ts [expr {[clock milliseconds] + 1000}] + r HPEXPIREAT myhash $ts FIELDS 1 f1 + + # Query all 3 fields + set result [r HPEXPIRETIME myhash FIELDS 3 f1 f2 f3] + + # Expect: [timestamp] for f1, -1 for f2, -2 for f3 + assert {[llength $result] == 3} + # f1: has TTL → returns exact timestamp + assert_equal $ts [lindex $result 0] + + # f2: exists, no TTL → returns -1 + assert_equal -1 [lindex $result 1] + + # f3: doesn't exist → returns -2 + assert_equal -2 [lindex $result 2] + + } + + #################### HPERSIST ################## + + test "HPERSIST - field does not exist" { + r FLUSHALL + r hset myhash field1 value1 + assert_equal {-2} [r hpersist myhash FIELDS 1 field2] + } + + test "HPERSIST - key does not exist" { + r FLUSHALL + assert_equal {-2} [r hpersist nonexistent FIELDS 1 field1] + } + + test "HPERSIST - field exists but no expiration" { + r del myhash + r hset myhash field1 value1 + assert_equal {-1} [r hpersist myhash FIELDS 1 field1] + } + + test "HPERSIST - field exists with expiration" { + r FLUSHALL + r hset myhash field1 value1 + r hexpire myhash 600 FIELDS 1 field1 + assert_morethan [r httl myhash FIELDS 1 field1] 0 + assert_equal {1} [r hpersist myhash FIELDS 1 field1] + assert_equal {-1} [r httl myhash FIELDS 1 field1] + } + + test "HPERSIST - multiple fields with mixed state" { + r FLUSHALL + r hset myhash f1 v1 + r hset myhash f2 v2 + r hset myhash f3 v3 + r hexpire myhash 600 FIELDS 1 f1 + # f2 will have no expiration + # f4 does not exist + assert_equal {1 -1 -2} [r hpersist myhash FIELDS 3 f1 f2 f4] + } +} + + +####### Test info +start_server {tags {"hash-ttl-info external:skip"}} { + test {Hash ttl - check command stats} { + r FLUSHALL + + # Run all relevant hash TTL commands + r HSET myhash f1 v1 f2 v2 + r HEXPIRE myhash 10 FIELDS 1 f1 + r HEXPIREAT myhash [expr {[clock seconds] + 10}] FIELDS 1 f2 + r HEXPIRETIME myhash FIELDS 2 f1 f2 + r HPEXPIRE myhash 1000 FIELDS 1 f1 + r HPEXPIREAT myhash [expr {[clock milliseconds] + 2000}] FIELDS 1 f2 + r HPEXPIRETIME myhash FIELDS 2 f1 f2 + r HGETEX myhash EX 120 FIELDS 1 f1 + r HTTL myhash FIELDS 1 f2 + r HPTTL myhash FIELDS 1 f1 + + # Fetch commandstats + set info [r INFO commandstats] + + # Extract call counts + proc get_calls {info cmd} { + foreach line [split $info "\n"] { + if {[string match "cmdstat_$cmd:*" $line]} { + regexp {calls=(\d+)} $line -> count + return $count + } + } + return -1 + } + + # Assert each command appears with correct call count (1 call each) + assert_equal 1 [get_calls $info hexpire] + assert_equal 1 [get_calls $info hexpireat] + assert_equal 1 [get_calls $info hexpiretime] + assert_equal 1 [get_calls $info hpexpire] + assert_equal 1 [get_calls $info hpexpireat] + assert_equal 1 [get_calls $info hpexpiretime] + assert_equal 1 [get_calls $info hgetex] + assert_equal 1 [get_calls $info httl] + assert_equal 1 [get_calls $info hpttl] + } +} + + + +#### Replication + +start_server {tags {"hashexpire external:skip"}} { + # Start another server to test replication of TTLs + start_server {tags {needs:repl external:skip}} { + # Set the outer layer server as primary + set primary [srv -1 client] + set primary_host [srv -1 host] + set primary_port [srv -1 port] + # Set this inner layer server as replica + set replica [srv 0 client] + + test {Setup replica and check field expiry after full sync} { + $primary flushall + + # Set up some TTLs on primary BEFORE replica connects + set now [clock milliseconds] + set f1_exp [expr {$now + 50000}] + set f2_exp [expr {$now + 70000}] + + $primary HSET myhash f1 v1 f2 v2 + $primary HPEXPIREAT myhash $f1_exp FIELDS 1 f1 + $primary HPEXPIREAT myhash $f2_exp FIELDS 1 f2 + + # Now connect replica + $replica replicaof $primary_host $primary_port + + wait_for_condition 100 100 { + [info_field [$replica info replication] master_link_status] eq "up" + } else { + fail "Master <-> Replica didn't finish sync" + } + + + # Wait for full sync + wait_for_ofs_sync $primary $replica + + + # Validate TTLs replicated correctly + set r1 [$replica HPEXPIRETIME myhash FIELDS 1 f1] + set r2 [$replica HPEXPIRETIME myhash FIELDS 1 f2] + + assert_equal $f1_exp $r1 + assert_equal $f2_exp $r2 + } + + + + test {HASH TTL - replicated TTL is absolute and consistent on replica} { + $primary flushall + + set now [clock milliseconds] + set future [expr {$now + 5000}] + set future_sec [expr {$future / 1000}] + + # HPEXPIREAT + $primary HSET myhash f1 v1 + $primary HPEXPIREAT myhash $future FIELDS 1 f1 + + # HSETEX EX + $primary HSETEX myhash EX 5 FIELDS 1 f2 v2 + + # HEXPIRE + $primary HSET myhash f3 v3 + $primary HEXPIRE myhash 5 FIELDS 1 f3 + + wait_for_ofs_sync $primary $replica + + set t1 [$primary HPEXPIRETIME myhash FIELDS 1 f1] + set t1r [$replica HPEXPIRETIME myhash FIELDS 1 f1] + assert_equal $t1 $t1r + + set t2 [$primary HEXPIRETIME myhash FIELDS 1 f2] + set t2r [$replica HEXPIRETIME myhash FIELDS 1 f2] + assert_equal $t2 $t2r + + set t3 [$primary HEXPIRETIME myhash FIELDS 1 f3] + set t3r [$replica HEXPIRETIME myhash FIELDS 1 f3] + assert_equal $t3 $t3r + } + + test {HASH TTL - field expired on master gets deleted on replica} { + $primary flushall + + $primary HSETEX myhash PX 10 FIELDS 1 f1 val1 + after 20 + wait_for_ofs_sync $primary $replica + + + # Trigger lazy expiry + catch {$primary HGET myhash f1} + wait_for_ofs_sync $primary $replica + + + assert_equal 0 [$replica HEXISTS myhash f1] + } + + + test {HASH TTL - replica retains TTL and field before expiration} { + $primary flushall + + $primary HSETEX myhash PX 1000 FIELDS 1 f1 val1 + wait_for_ofs_sync $primary $replica + + set master_ttl [$primary HPTTL myhash FIELDS 1 f1] + set replica_ttl [$replica HPTTL myhash FIELDS 1 f1] + assert {$replica_ttl > 0} + assert {$replica_ttl <= $master_ttl} + + } + + } +} \ No newline at end of file From 554af84159a9e3a1fa7b82cd2b18a14d1315806d Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 19 Jun 2025 15:24:20 +0300 Subject: [PATCH 002/119] drop diplicated assertion Signed-off-by: Ran Shidlansik --- src/sds.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sds.c b/src/sds.c index 8aba0ccb2f..02ffa405b5 100644 --- a/src/sds.c +++ b/src/sds.c @@ -127,7 +127,6 @@ sds sdswrite(char *buf, size_t bufsize, char type, const char *init, size_t init assert(bufsize >= sdsReqSize(initlen, type)); int hdrlen = sdsHdrSize(type); size_t usable = bufsize - hdrlen - 1; - assert(usable <= sdsTypeMaxSize(type)); sds s = buf + hdrlen; unsigned char *fp = ((unsigned char *)s) - 1; /* flags pointer. */ From 70948991497839ac5b356d603642c289fb36c5ed Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Fri, 20 Jun 2025 00:13:27 +0300 Subject: [PATCH 003/119] address some PR comments Signed-off-by: Ran Shidlansik --- src/entry.c | 31 +++++++++++++++---------------- src/entry.h | 1 - src/hashtable.c | 4 ++-- src/server.h | 6 +++--- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/entry.c b/src/entry.c index 72d9b0a6f9..e1df1456e9 100644 --- a/src/entry.c +++ b/src/entry.c @@ -22,10 +22,7 @@ * +--------------+--------------+---------------+ * | Expiration | field | value | * | 1234567890LL | hdr "foo" \0 | hdr8 "bar" \0 | - * +--------------+------^-------+---------------+ - * | - * | - * entry pointer = field sds + * +--------------+--------------+---------------+ * * Entry with value pointer, used for larger fields and values. The field is SDS * type 8 or higher. @@ -36,7 +33,7 @@ * +--------------+---^---+--------------+ * | * | - * entry pointer = field sds + * entry pointer = value sds */ /* The maximum allocation size we want to use for entries with embedded @@ -63,20 +60,20 @@ bool entryHasExpiry(const entry *entry) { return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY); } +/* The entry pointer is the field sds, but that's an implementation detail. */ +sds entryGetField(const entry *entry) { + return (sds)entry; +} + /* Returns the location of a pointer to a separately allocated value. Only for * an entry without an embedded value. */ -sds *entryGetValueRef(const entry *entry) { +static sds *entryGetValueRef(const entry *entry) { serverAssert(entryHasValuePtr(entry)); char *field_data = sdsAllocPtr(entry); field_data -= sizeof(sds *); return (sds *)field_data; } -/* The entry pointer is the field sds, but that's an implementation detail. */ -sds entryGetField(const entry *entry) { - return (sds)entry; -} - /* Returns the sds of the entry's value. */ sds entryGetValue(const entry *entry) { if (entryHasValuePtr(entry)) { @@ -159,11 +156,8 @@ void entryFree(entry *entry) { zfree(entryAllocPtr(entry)); } -/* Takes ownership of value, does not take ownership of field */ +/* Takes ownership of value. does not take ownership of field */ entry *entryCreate(sds field, sds value, long long expiry) { - /* In case simple sds just return the same field we got. */ - if (!value && expiry == EXPIRY_NONE) - return field; sds embedded_field_sds; size_t expiry_size = (expiry == EXPIRY_NONE) ? 0 : sizeof(long long); size_t field_len = sdslen(field); @@ -287,7 +281,12 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { } else { /* Skip field content, field null terminator and value sds8 hdr. */ sds old_value = entryGetValue(e); - sdswrite(sdsAllocPtr(old_value), sdsAllocSize(old_value), SDS_TYPE_8, value, sdslen(value)); + /* We are using the same entry memory in order to store a potentially new value. + * In such cases the old value alloc was adjusted to the real buffer size part it was embedded to. + * since we can potentially write here a smaller value, which requires less allocation space, we would like to + * inherit the old value memory allocation size. */ + size_t value_size = sdsHdrSize(SDS_TYPE_8) + sdsalloc(value) + 1; + sdswrite(sdsAllocPtr(old_value), value_size, SDS_TYPE_8, value, sdslen(value)); sdsfree(value); } } diff --git a/src/entry.h b/src/entry.h index edaea1cb51..08de4ef681 100644 --- a/src/entry.h +++ b/src/entry.h @@ -6,7 +6,6 @@ typedef void entry; -sds *entryGetValueRef(const entry *entry); sds entryGetField(const entry *entry); sds entryGetValue(const entry *entry); entry *entrySetValue(entry *entry, sds value); diff --git a/src/hashtable.c b/src/hashtable.c index 27c610afa2..01fc3ac7d9 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -366,14 +366,14 @@ typedef struct { void **entries; /* Array of sampled entries. */ } scan_samples; +/* --- Internal functions --- */ + /* --- Access API --- */ static inline hashtableEntryValidationState validateElementIfNeeded(hashtable *ht, void *elem) { if (ht->type->validateEntry == NULL) return ENTRY_VALID; return ht->type->validateEntry(ht, elem); } -/* --- Internal functions --- */ - static bucket *findBucketForInsert(hashtable *ht, uint64_t hash, int *pos_in_bucket, int *table_index); static inline void freeEntry(hashtable *ht, void *entry) { diff --git a/src/server.h b/src/server.h index e393458c3c..a06b97621c 100644 --- a/src/server.h +++ b/src/server.h @@ -1633,9 +1633,9 @@ typedef enum { /* Return value for getExpirationPolicy */ typedef enum { - POLICY_IGNORE_EXPIRE, /* Ignore expiration time of items and treat them as valid. */ - POLICY_KEEP_EXPIRED, /* Ignore items which are expired but do not actively delete them. */ - POLICY_DELETE_EXPIRED /* Delete expired keys on access. */ + POLICY_IGNORE_EXPIRE, /* Ignore expiration time of items and treat them as valid. */ + POLICY_KEEP_EXPIRED, /* Ignore items which are expired but do not actively delete them. */ + POLICY_DELETE_EXPIRED /* Delete expired keys on access. */ } expirationPolicy; struct valkeyServer { From 49a1dc2b36d7347c49a75d84d5cc2bb11b124b76 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Fri, 20 Jun 2025 00:18:28 +0300 Subject: [PATCH 004/119] pr comments Signed-off-by: Ran Shidlansik --- src/sds.c | 1 - src/t_hash.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/sds.c b/src/sds.c index 02ffa405b5..0ebfbdd0ff 100644 --- a/src/sds.c +++ b/src/sds.c @@ -34,7 +34,6 @@ #include #include #include -#include #include "serverassert.h" #include "sds.h" #include "sdsalloc.h" diff --git a/src/t_hash.c b/src/t_hash.c index 6a2f7d85af..693d57a997 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -89,8 +89,7 @@ void hashTypeIgnoreTTL(robj *o, bool ignore) { } } -static volatile_set * -hashTypeGetOrcreateVolatileSet(robj *o) { +static volatile_set *hashTypeGetOrcreateVolatileSet(robj *o) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); volatile_set **volatile_set_ref = hashtableMetadata(o->ptr); if (*volatile_set_ref == NULL) { From cc59a7e11de39c4c83baaf0c153a08113dad6074 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 22 Jun 2025 10:05:30 +0300 Subject: [PATCH 005/119] address pr comments. introduce expire.h Signed-off-by: Ran Shidlansik --- src/db.c | 1 + src/expire.h | 39 +++++++++++++++++++++++++++++++++++++++ src/monotonic.h | 2 ++ src/rdb.c | 2 +- src/server.h | 35 +++-------------------------------- src/t_hash.c | 2 +- 6 files changed, 47 insertions(+), 34 deletions(-) create mode 100644 src/expire.h diff --git a/src/db.c b/src/db.c index 53c21d29d5..bb1f8a0e55 100644 --- a/src/db.c +++ b/src/db.c @@ -35,6 +35,7 @@ #include "io_threads.h" #include "module.h" #include "vector.h" +#include "expire.h" #include #include diff --git a/src/expire.h b/src/expire.h new file mode 100644 index 0000000000..a41c1a70c0 --- /dev/null +++ b/src/expire.h @@ -0,0 +1,39 @@ +#ifndef EXPIRE_H +#define EXPIRE_H + +#include +#include "monotonic.h" + +#define ACTIVE_EXPIRE_CYCLE_SLOW 0 +#define ACTIVE_EXPIRE_CYCLE_FAST 1 + +/* Command flags for items expiration update conditions */ +#define EXPIRE_NX (1 << 0) +#define EXPIRE_XX (1 << 1) +#define EXPIRE_GT (1 << 2) +#define EXPIRE_LT (1 << 3) + +/* Return values for expireIfNeeded */ +typedef enum { + KEY_VALID = 0, /* Could be volatile and not yet expired, non-volatile, or even non-existing key. */ + KEY_EXPIRED, /* Logically expired but not yet deleted. */ + KEY_DELETED /* The key was deleted now. */ +} keyStatus; + +/* Return value for getExpirationPolicy */ +typedef enum { + POLICY_IGNORE_EXPIRE, /* Ignore expiration time of items and treat them as valid. */ + POLICY_KEEP_EXPIRED, /* Ignore items which are expired but do not actively delete them. */ + POLICY_DELETE_EXPIRED /* Delete expired keys on access. */ +} expirationPolicy; + +/* Forward declarations */ +typedef struct client client; +typedef struct serverObject robj; + +int timestampIsExpired(mstime_t when); +expirationPolicy getExpirationPolicyWithFlags(int flags); +int parseExtendedExpireArgumentsOrReply(client *c, int *flags, int max_args); +int convertExpireArgumentToUnixTime(client *c, robj *arg, long long basetime, int unit, long long *unixtime); + +#endif diff --git a/src/monotonic.h b/src/monotonic.h index b465f90b10..2880cda858 100644 --- a/src/monotonic.h +++ b/src/monotonic.h @@ -20,6 +20,8 @@ * variable is associated with the monotonic clock and should not be confused * with other types of time.*/ typedef uint64_t monotime; +typedef long long mstime_t; /* millisecond time type. */ +typedef long long ustime_t; /* microsecond time type. */ /* Retrieve counter of micro-seconds relative to an arbitrary point in time. */ extern monotime (*getMonotonicUs)(void); diff --git a/src/rdb.c b/src/rdb.c index 08d1ada6e1..4c3510b789 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -967,7 +967,7 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { } nwritten += n; /* check if need to add expired time for the hash elements */ - int add_expiry = hashTypeHasVolatileElements(o); + bool add_expiry = hashTypeHasVolatileElements(o); hashtableIterator iter; hashtableInitIterator(&iter, ht, HASHTABLE_ITER_SKIP_VALIDATION); void *next; diff --git a/src/server.h b/src/server.h index a06b97621c..a11d565d3d 100644 --- a/src/server.h +++ b/src/server.h @@ -62,9 +62,6 @@ #define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1 : -1] #endif -typedef long long mstime_t; /* millisecond time type. */ -typedef long long ustime_t; /* microsecond time type. */ - #include "ae.h" /* Event driven programming library */ #include "sds.h" /* Dynamic safe strings */ #include "dict.h" /* Hash tables (old implementation) */ @@ -79,7 +76,8 @@ typedef long long ustime_t; /* microsecond time type. */ #include "sparkline.h" /* ASCII graphs API */ #include "quicklist.h" /* Lists are encoded as linked lists of N-elements flat arrays */ -#include "rax.h" /* Radix tree */ +#include "expire.h" /* Expiration public API */ + #include "rax.h" /* Radix tree */ #include "connection.h" /* Connection abstraction */ #include "memory_prefetch.h" #include "volatile_set.h" @@ -164,9 +162,6 @@ struct hdr_histogram; #define CLIENT_MEM_USAGE_BUCKET_MAX_LOG 33 /* Bucket for largest clients: sizes above 4GB (2^32) */ #define CLIENT_MEM_USAGE_BUCKETS (1 + CLIENT_MEM_USAGE_BUCKET_MAX_LOG - CLIENT_MEM_USAGE_BUCKET_MIN_LOG) -#define ACTIVE_EXPIRE_CYCLE_SLOW 0 -#define ACTIVE_EXPIRE_CYCLE_FAST 1 - /* Children process will exit with this status code to signal that the * process terminated without an error: this is useful in order to kill * a saving child (RDB or AOF one), without triggering in the parent the @@ -323,11 +318,6 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; /* Key flags for when access type is unknown */ #define CMD_KEY_FULL_ACCESS (CMD_KEY_RW | CMD_KEY_ACCESS | CMD_KEY_UPDATE) -#define EXPIRE_NX (1 << 0) -#define EXPIRE_XX (1 << 1) -#define EXPIRE_GT (1 << 2) -#define EXPIRE_LT (1 << 3) - /* Key flags for how key is removed */ #define DB_FLAG_KEY_NONE 0 #define DB_FLAG_KEY_DELETED (1ULL << 0) @@ -1623,21 +1613,6 @@ typedef enum childInfoType { CHILD_INFO_TYPE_RDB_COW_SIZE, CHILD_INFO_TYPE_MODULE_COW_SIZE } childInfoType; - -/* Return values for expireIfNeeded */ -typedef enum { - KEY_VALID = 0, /* Could be volatile and not yet expired, non-volatile, or even non-existing key. */ - KEY_EXPIRED, /* Logically expired but not yet deleted. */ - KEY_DELETED /* The key was deleted now. */ -} keyStatus; - -/* Return value for getExpirationPolicy */ -typedef enum { - POLICY_IGNORE_EXPIRE, /* Ignore expiration time of items and treat them as valid. */ - POLICY_KEEP_EXPIRED, /* Ignore items which are expired but do not actively delete them. */ - POLICY_DELETE_EXPIRED /* Delete expired keys on access. */ -} expirationPolicy; - struct valkeyServer { /* General */ pid_t pid; /* Main process pid. */ @@ -2685,7 +2660,6 @@ int validateProcTitleTemplate(const char *template); int serverCommunicateSystemd(const char *sd_notify_msg); void serverSetCpuAffinity(const char *cpulist); void dictVanillaFree(void *val); -int timestampIsExpired(mstime_t when); /* ERROR STATS constants */ @@ -2867,10 +2841,7 @@ int processIOThreadsWriteDone(void); void releaseReplyReferences(client *c); void resetLastWrittenBuf(client *c); -expirationPolicy getExpirationPolicyWithFlags(int flags); -int parseExtendedExpireArgumentsOrReply(client *c, int *flags, int max_args); int parseExtendedCommandArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, robj **compare_val, int command_type, int max_args); -int convertExpireArgumentToUnixTime(client *c, robj *arg, long long basetime, int unit, long long *unixtime); /* logreqres.c - logging of requests and responses */ void reqresReset(client *c, int free_buf); @@ -3386,7 +3357,7 @@ robj *hashTypeLookupWriteOrCreate(client *c, robj *key); robj *hashTypeGetValueObject(robj *o, sds field); int hashTypeSet(robj *o, sds field, sds value, long long expiry, int flags); robj *hashTypeDup(robj *o); -int hashTypeHasVolatileElements(robj *o); +bool hashTypeHasVolatileElements(robj *o); size_t hashTypeNumVolatileElements(robj *o); /* Pub / Sub */ diff --git a/src/t_hash.c b/src/t_hash.c index 693d57a997..e711ec5051 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -66,7 +66,7 @@ void hashTypeFreeVolatileSet(robj *o) { freeVolatileSet(set); } -int hashTypeHasVolatileElements(robj *o) { +bool hashTypeHasVolatileElements(robj *o) { return ((o->encoding == OBJ_ENCODING_HASHTABLE) && (hashTypeGetVolatileSet(o) != NULL)); } From 26a7bb84504b028f87c89de87dfc3c075219df7b Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 22 Jun 2025 19:41:03 +0300 Subject: [PATCH 006/119] Fix HSETEX/HGETEX/HEXPIRE propagation. When we are providing an expired timestamp to these commands, the replica will not process an expired timestamp and we would like to propagate HDEL explicitly Signed-off-by: Ran Shidlansik --- src/t_hash.c | 183 +++++++++++++++++++++++++------------- tests/unit/hashexpire.tcl | 80 +++++++++++++++++ 2 files changed, 200 insertions(+), 63 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index e711ec5051..3e048ff405 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -43,12 +43,9 @@ #include "entry.h" -int hashTypeExpireEntry(entry *entry); - volatileEntryType hashVolatileEntryType = { .entryGetKey = (sds(*)(const void *entry))entryGetField, .getExpiry = (long long (*)(const void *entry))entryGetExpiry, - .expire = hashTypeExpireEntry, }; /*----------------------------------------------------------------------------- @@ -147,12 +144,6 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, } } -int hashTypeExpireEntry(void *entry) { - // TBD - UNUSED(entry); - return 1; -} - hashtableEntryValidationState hashHashtableTypeValidate(hashtable *ht, void *entry) { UNUSED(ht); expirationPolicy policy = getExpirationPolicyWithFlags(0); @@ -408,12 +399,13 @@ int hashTypeSet(robj *o, sds field, sds value, long long expiry, int flags) { } else { /* exists: replace value */ long long entry_expiry = entryGetExpiry(existing); - /* It is possible that the entry is already expired. In this case we can override it, but we need to make sure to treat it - * like it did not exist. */ + /* It is possible that the entry is already expired. In this case we can override it, but we need to make sure to expire it first + * and treat it like it did not exist. */ int is_expired = timestampIsExpired(entry_expiry); - /* In case the HASH_SET_KEEP_EXPIRY will force keeping the existing entry expiry. */ - if (!is_expired && (flags & HASH_SET_KEEP_EXPIRY)) + if (!is_expired && flags & HASH_SET_KEEP_EXPIRY) { + /* In case the HASH_SET_KEEP_EXPIRY will force keeping the existing entry expiry. */ expiry = entry_expiry; + } void *new_entry = entryUpdate(existing, v, expiry); if (new_entry != existing) { /* It has been reallocated. */ @@ -1118,6 +1110,8 @@ void hsetexCommand(client *c) { int i = 0; int set_flags = HASH_SET_COPY, set_expired = 0; int changes = 0; + robj **new_argv = NULL; + int new_argc = 0; for (; fields_index < c->argc; fields_index++) { if (!strcasecmp(c->argv[fields_index]->ptr, "fields")) { @@ -1159,7 +1153,6 @@ void hsetexCommand(client *c) { if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(when)) { set_expired = 1; - when = 0; } } @@ -1174,35 +1167,60 @@ void hsetexCommand(client *c) { } } + /* In case we are expiring all the elements prepare a new argv since we are going to delete all the expired fields. */ + if (set_expired) { + new_argv = zmalloc(sizeof(robj *) * (num_fields + 2)); + new_argv[new_argc++] = shared.hdel; + incrRefCount(shared.hdel); + new_argv[new_argc++] = c->argv[1]; + incrRefCount(c->argv[1]); + } + for (i = fields_index; i < c->argc; i += 2) { if (set_expired) { - changes += hashTypeDelete(o, c->argv[i]->ptr); + if (hashTypeDelete(o, c->argv[i]->ptr)) { + new_argv[new_argc++] = c->argv[i]; + incrRefCount(c->argv[i]); + changes++; + } } else { hashTypeSet(o, c->argv[i]->ptr, c->argv[i + 1]->ptr, when, set_flags); changes++; } } - if (expire) { - /* Propagate as HSETEX Key Value PXAT millisecond-timestamp if there is - * EX/PX/EXAT flag. */ - if (!(flags & OBJ_PXAT)) { - for (int i = 2; i < fields_index; i++) { - if (c->argv[i + 1] == expire) { - robj *milliseconds_obj = createStringObjectFromLongLong(when); - rewriteClientCommandArgument(c, i, shared.pxat); - rewriteClientCommandArgument(c, i + 1, milliseconds_obj); - decrRefCount(milliseconds_obj); - break; + + if (changes) { + if (set_expired) { + replaceClientCommandVector(c, new_argc, new_argv); + /* We would like to reduce the number of hexpired events in case there are potential many expired fields. */ + notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); + } else if (expire) { + /* Propagate as HSETEX Key Value PXAT millisecond-timestamp if there is + * EX/PX/EXAT flag. */ + if (!(flags & OBJ_PXAT)) { + for (int i = 2; i < fields_index; i++) { + if (c->argv[i + 1] == expire) { + robj *milliseconds_obj = createStringObjectFromLongLong(when); + rewriteClientCommandArgument(c, i, shared.pxat); + rewriteClientCommandArgument(c, i + 1, milliseconds_obj); + decrRefCount(milliseconds_obj); + break; + } } } + notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); } - notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); - if (set_expired && changes) - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); + signalModifiedKey(c, c->db, c->argv[1]); + /* Delete the object in case it was left empty */ + if (hashTypeLength(o) == 0) { + dbDelete(c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + } + server.dirty += changes; + } else { + if (new_argv) zfree(new_argv); } - signalModifiedKey(c, c->db, c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); - server.dirty += changes; addReplyLongLong(c, changes == num_fields ? 1 : 0); } @@ -1267,23 +1285,26 @@ void hgetexCommand(client *c) { initDeferredReplyBuffer(c); addReplyArrayLen(c, num_fields); - /* This command is never propagated as is. It is either propagated as HPEXPIREAT or PERSIST. + /* This command is never propagated as is. It is either propagated as HDEL, HPEXPIREAT or PERSIST. * This why it doesn't need special handling in feedAppendOnlyFile to convert relative expire time to absolute one. */ if (set_expiry || set_expired || persist) { /* allocate a new client argv for replicating the command. */ new_argv = zmalloc(sizeof(robj *) * (num_fields + 5)); - if (persist) + if (set_expired) + new_argv[new_argc++] = shared.hdel; + else if (persist) new_argv[new_argc++] = shared.hpersist; else new_argv[new_argc++] = shared.hpexpireat; new_argv[new_argc++] = c->argv[1]; + incrRefCount(c->argv[1]); if (set_expiry || set_expired) { new_argv[new_argc++] = NULL; // placeholder for the expiration time milliseconds_index = new_argc - 1; + new_argv[new_argc++] = shared.fields; + new_argv[new_argc++] = NULL; // placeholder for the number of objects } - new_argv[new_argc++] = shared.fields; - new_argv[new_argc++] = NULL; // placeholder for the number of objects numitems_index = new_argc - 1; } for (i = fields_index; i < c->argc; i++) { @@ -1299,27 +1320,39 @@ void hgetexCommand(client *c) { if (changed) { changes++; new_argv[new_argc++] = c->argv[i]; + incrRefCount(c->argv[i]); } } + + /* rewrite the command vector and persist in case there are changes. + * Also notify keyspace notifications and signal the key was changed. */ if (changes) { - if (set_expiry) { + if (milliseconds_index > 0) { milliseconds_obj = createStringObjectFromLongLong(when); new_argv[milliseconds_index] = milliseconds_obj; + incrRefCount(milliseconds_obj); + } + if (numitems_index > 0) { + numitems_obj = createStringObjectFromLongLong(changes); + new_argv[numitems_index] = numitems_obj; + incrRefCount(numitems_obj); } - numitems_obj = createStringObjectFromLongLong(changes); - new_argv[numitems_index] = numitems_obj; - - for (i = 0; i < new_argc; i++) - if (new_argv[i]) - incrRefCount(new_argv[i]); replaceClientCommandVector(c, new_argc, new_argv); - server.dirty += changes; - signalModifiedKey(c, c->db, c->argv[1]); if (set_expired) notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); - notifyKeyspaceEvent(NOTIFY_HASH, set_expiry ? "hexpire" : "hpersist", c->argv[1], c->db->id); + else + notifyKeyspaceEvent(NOTIFY_HASH, set_expiry ? "hexpire" : "hpersist", c->argv[1], c->db->id); if (milliseconds_obj) decrRefCount(milliseconds_obj); if (numitems_obj) decrRefCount(numitems_obj); + + server.dirty += changes; + signalModifiedKey(c, c->db, c->argv[1]); + + /* Delete the object in case it was left empty */ + if (hashTypeLength(o) == 0) { + dbDelete(c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + } } else { if (new_argv) zfree(new_argv); } @@ -1414,6 +1447,9 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { int fields_index = 3; long long num_fields = 0; int i, result = 0, expired = 0, updated = 0; + int set_expired = 0; + robj **new_argv = NULL; + int new_argc = 0; for (; fields_index < c->argc; fields_index++) { if (!strcasecmp(c->argv[fields_index]->ptr, "fields")) { @@ -1434,7 +1470,7 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { return; if (checkAlreadyExpired(when)) - when = 0; + set_expired = 1; robj *obj = lookupKeyWrite(c->db, key); @@ -1445,38 +1481,59 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { /* From this point we would return array reply */ addReplyArrayLen(c, num_fields); + /* In case we are expiring all the elements prepare a new argv since we are going to delete all the expired fields. */ + if (set_expired) { + new_argv = zmalloc(sizeof(robj *) * (num_fields + 3)); + new_argv[new_argc++] = shared.hdel; + incrRefCount(shared.hdel); + new_argv[new_argc++] = c->argv[1]; + incrRefCount(c->argv[1]); + } + for (i = 0; i < num_fields; i++) { - if (when == 0) { - result = -2; + result = -2; + if (set_expired) { if (hashTypeDelete(obj, c->argv[fields_index + i]->ptr)) { + /* In case we deleted the field, add it to the new hdel command vector. */ + new_argv[new_argc++] = c->argv[fields_index + i]; + incrRefCount(c->argv[fields_index + i]); result = 2; expired++; } } else { result = hashTypeSetExpire(obj, c->argv[fields_index + i]->ptr, when, flag); - updated++; + if (result == 1) updated++; } - server.dirty += (result > 0 ? 1 : 0); // in case there was a change increment the dirty addReplyLongLong(c, result); } if (expired || updated) { - /* Propagate as HPEXPIREAT millisecond-timestamp - * Only rewrite the command arg if not already HPEXPIREAT */ - if (c->cmd->proc != hpexpireAtCommand) { - rewriteClientCommandArgument(c, 0, shared.hpexpireat); - } + if (expired) { + replaceClientCommandVector(c, new_argc, new_argv); + /* We would like to reduce the number of hexpired events in case there are potential many expired fields. */ + notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); + } else if (updated) { + /* Propagate as HPEXPIREAT millisecond-timestamp + * Only rewrite the command arg if not already HPEXPIREAT */ + if (c->cmd->proc != hpexpireAtCommand) { + rewriteClientCommandArgument(c, 0, shared.hpexpireat); + } - /* Avoid creating a string object when it's the same as argv[2] parameter */ - if (basetime != 0 || unit == UNIT_SECONDS) { - robj *when_obj = createStringObjectFromLongLong(when); - rewriteClientCommandArgument(c, 2, when_obj); - decrRefCount(when_obj); + /* Avoid creating a string object when it's the same as argv[2] parameter */ + if (basetime != 0 || unit == UNIT_SECONDS) { + robj *when_obj = createStringObjectFromLongLong(when); + rewriteClientCommandArgument(c, 2, when_obj); + decrRefCount(when_obj); + } + notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); } - if (expired) - notifyKeyspaceEvent(NOTIFY_HASH, "hexpired", c->argv[1], c->db->id); - notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); + server.dirty += (expired + updated); // in case there was a change increment the dirty signalModifiedKey(c, c->db, obj); + /* Delete the object in case it was left empty */ + if (hashTypeLength(obj) == 0) { + dbDelete(c->db, c->argv[1]); + notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); + } } } diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 5f8416b619..d042e5c008 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1100,5 +1100,85 @@ start_server {tags {"hashexpire external:skip"}} { } + test {HSETEX with expired time is propagated to the replica} { + $primary flushall + + assert_equal [$primary HSET myhash f1 val1] "1" + + wait_for_condition 100 100 { + [$replica HGET myhash f1] eq {val1} + } else { + fail "hash field was not set on replica after timeout" + } + + assert_equal [$primary HSETEX myhash EXAT 0 FIELDS 1 f1 val1] {1} + + wait_for_condition 100 100 { + [$primary EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on primary after timeout" + } + wait_for_ofs_sync $primary $replica + + wait_for_condition 100 100 { + [$replica EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on replica after timeout" + } + } + + test {HGETEX with expired time is propagated to the replica} { + $primary flushall + + assert_equal [$primary HSET myhash f1 val1] "1" + + wait_for_condition 100 100 { + [$replica HGET myhash f1] eq {val1} + } else { + fail "hash field was not set on replica after timeout" + } + + assert_equal [$primary HGETEX myhash EXAT 0 FIELDS 1 f1] {val1} + + wait_for_condition 100 100 { + [$primary EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on primary after timeout" + } + wait_for_ofs_sync $primary $replica + + wait_for_condition 100 100 { + [$replica EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on replica after timeout" + } + } + test {HEXPIREAT with expired time is propagated to the replica} { + $primary flushall + + assert_equal [$primary HSET myhash f1 val1] "1" + + wait_for_condition 100 100 { + [$replica HGET myhash f1] eq {val1} + } else { + fail "hash field was not set on replica after timeout" + } + + assert_equal [$primary HEXPIREAT myhash 0 FIELDS 1 f1] {2} + + wait_for_condition 100 100 { + [$primary EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on primary after timeout" + } + wait_for_ofs_sync $primary $replica + + wait_for_condition 100 100 { + [$replica EXISTS myhash] eq "0" + } else { + fail "hash object was not deleted on replica after timeout" + } + } + } } \ No newline at end of file From f8c735665db61f8aafa7120ee57381c24c58888e Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 23 Jun 2025 09:02:55 +0300 Subject: [PATCH 007/119] remove hashTypeNumVolatileElements and dependancy of AOF rewrite Signed-off-by: Ran Shidlansik --- src/aof.c | 48 ++++++++++++++++++++++++------------------------ src/t_hash.c | 9 +-------- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/src/aof.c b/src/aof.c index 301b81b447..e6976bb516 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1955,12 +1955,32 @@ static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) { * The function returns 0 on error, 1 on success. */ int rewriteHashObject(rio *r, robj *key, robj *o) { hashTypeIterator hi; - long long count = 0, volatile_items = hashTypeNumVolatileElements(o), items = hashTypeLength(o) - volatile_items; - + long long count = 0, volatile_items = 0, non_volatile_items; + /* First serialize volatile items if exist */ + if (hashTypeHasVolatileElements(o)) { + hashTypeInitVolatileIterator(o, &hi); + while (hashTypeNext(&hi) != C_ERR) { + long long expiry = entryGetExpiry(hi.next); + sds field = entryGetField(hi.next); + sds value = entryGetValue(hi.next); + if (rioWriteBulkCount(r, '*', 8) == 0) return 0; + if (rioWriteBulkString(r, "HSETEX", 6) == 0) return 0; + if (rioWriteBulkObject(r, key) == 0) return 0; + if (rioWriteBulkString(r, "PXAT", 4) == 0) return 0; + if (rioWriteBulkLongLong(r, expiry) == 0) return 0; + if (rioWriteBulkString(r, "FIELDS", 6) == 0) return 0; + if (rioWriteBulkLongLong(r, 1) == 0) return 0; + if (rioWriteBulkString(r, field, sdslen(field)) == 0) return 0; + if (rioWriteBulkString(r, value, sdslen(value)) == 0) return 0; + volatile_items++; + } + hashTypeResetIterator(&hi); + } + non_volatile_items = hashTypeLength(o) - volatile_items; hashTypeInitIterator(o, &hi); while (hashTypeNext(&hi) != C_ERR) { if (count == 0) { - int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? AOF_REWRITE_ITEMS_PER_CMD : items; + int cmd_items = (non_volatile_items > AOF_REWRITE_ITEMS_PER_CMD) ? AOF_REWRITE_ITEMS_PER_CMD : non_volatile_items; if (!rioWriteBulkCount(r, '*', 2 + cmd_items * 2) || !rioWriteBulkString(r, "HMSET", 5) || !rioWriteBulkObject(r, key)) { @@ -1977,30 +1997,10 @@ int rewriteHashObject(rio *r, robj *key, robj *o) { return 0; } if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; + non_volatile_items--; } hashTypeResetIterator(&hi); - - /* Now serialize volatile items if exist */ - if (hashTypeHasVolatileElements(o)) { - hashTypeInitVolatileIterator(o, &hi); - while (hashTypeNext(&hi) != C_ERR) { - long long expiry = entryGetExpiry(hi.next); - sds field = entryGetField(hi.next); - sds value = entryGetValue(hi.next); - if (rioWriteBulkCount(r, '*', 8) == 0) return 0; - if (rioWriteBulkString(r, "HSETEX", 6) == 0) return 0; - if (rioWriteBulkObject(r, key) == 0) return 0; - if (rioWriteBulkString(r, "PXAT", 4) == 0) return 0; - if (rioWriteBulkLongLong(r, expiry) == 0) return 0; - if (rioWriteBulkString(r, "FIELDS", 6) == 0) return 0; - if (rioWriteBulkLongLong(r, 1) == 0) return 0; - if (rioWriteBulkString(r, field, sdslen(field)) == 0) return 0; - if (rioWriteBulkString(r, value, sdslen(value)) == 0) return 0; - } - hashTypeResetIterator(&hi); - } return 1; } diff --git a/src/t_hash.c b/src/t_hash.c index 3e048ff405..8fae017bca 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -64,14 +64,7 @@ void hashTypeFreeVolatileSet(robj *o) { } bool hashTypeHasVolatileElements(robj *o) { - return ((o->encoding == OBJ_ENCODING_HASHTABLE) && (hashTypeGetVolatileSet(o) != NULL)); -} - -size_t hashTypeNumVolatileElements(robj *o) { - if (hashTypeHasVolatileElements(o)) { - return volatileSetNumEntries(hashTypeGetVolatileSet(o)); - } - return 0; + return ((o->encoding == OBJ_ENCODING_HASHTABLE) && (hashtableGetType(o->ptr) == &hashWithVolatileItemsHashtableType)); } /* make any access to the hash object elements ignore the specific elements expiration. From ea79101c8276e07e50ef785ab4ca7fdec7f62905 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 25 Jun 2025 10:44:39 +0300 Subject: [PATCH 008/119] address Jim early PR comments Signed-off-by: Ran Shidlansik --- src/db.c | 67 ---------------------------------------------------- src/entry.c | 42 ++++++++++++++------------------ src/entry.h | 6 ++--- src/expire.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ src/expire.h | 4 ++++ src/object.c | 2 +- 6 files changed, 89 insertions(+), 95 deletions(-) diff --git a/src/db.c b/src/db.c index bb1f8a0e55..4548cbc7c1 100644 --- a/src/db.c +++ b/src/db.c @@ -44,10 +44,6 @@ * C-level DB API *----------------------------------------------------------------------------*/ -/* Flags for expireIfNeeded */ -#define EXPIRE_FORCE_DELETE_EXPIRED 1 -#define EXPIRE_AVOID_DELETE_EXPIRED 2 - static keyStatus expireIfNeededWithDictIndex(serverDb *db, robj *key, robj *val, int flags, int dict_index); static keyStatus expireIfNeeded(serverDb *db, robj *key, robj *val, int flags); static int keyIsExpiredWithDictIndex(serverDb *db, robj *key, int dict_index); @@ -1885,69 +1881,6 @@ void propagateDeletion(serverDb *db, robj *key, int lazy) { server.replication_allowed = prev_replication_allowed; } -/* Returns 1 if the expire value is expired, 0 otherwise. */ -int timestampIsExpired(mstime_t when) { - if (when < 0) return 0; /* no expire */ - mstime_t now = commandTimeSnapshot(); - - /* The key expired if the current (virtual or real) time is greater - * than the expire time of the key. */ - return now > when; -} - -/* This function verify if the current conditions allow expiration of keys and fields. - * For some cases expiration is not allowed, but we would still like to ignore the key - * so to treat it as "expired" without actively deleting it. */ -expirationPolicy getExpirationPolicyWithFlags(int flags) { - if (server.loading) return POLICY_IGNORE_EXPIRE; - - /* If we are running in the context of a replica, instead of - * evicting the expired key from the database, we return ASAP: - * the replica key expiration is controlled by the primary that will - * send us synthesized DEL operations for expired keys. The - * exception is when write operations are performed on writable - * replicas. - * - * Still we try to reflect the correct state to the caller, - * that is, POLICY_KEEP_EXPIRED so that the key will be ignored, but not deleted. - * - * When replicating commands from the primary, keys are never considered - * expired, so we return POLICY_IGNORE_EXPIRE */ - if (server.primary_host != NULL) { - if (server.current_client && (server.current_client->flag.primary)) return POLICY_IGNORE_EXPIRE; - if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return POLICY_KEEP_EXPIRED; - } else if (server.import_mode) { - /* If we are running in the import mode on a primary, instead of - * evicting the expired key from the database, we return ASAP: - * the key expiration is controlled by the import source that will - * send us synthesized DEL operations for expired keys. The - * exception is when write operations are performed on this server - * because it's a primary. - * - * Notice: other clients, apart from the import source, should not access - * the data imported by import source. - * - * Still we try to reflect the correct state to the caller, - * that is, POLICY_KEEP_EXPIRED so that the key will be ignored, but not deleted. - * - * When receiving commands from the import source, keys are never considered - * expired, so we return POLICY_IGNORE_EXPIRE */ - if (server.current_client && (server.current_client->flag.import_source)) return POLICY_IGNORE_EXPIRE; - if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return POLICY_KEEP_EXPIRED; - } - - /* In some cases we're explicitly instructed to return an indication of a - * missing key without actually deleting it, even on primaries. */ - if (flags & EXPIRE_AVOID_DELETE_EXPIRED) return POLICY_KEEP_EXPIRED; - - /* If 'expire' action is paused, for whatever reason, then don't expire any key. - * Typically, at the end of the pause we will properly expire the key OR we - * will have failed over and the new primary will send us the expire. */ - if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return POLICY_KEEP_EXPIRED; - - return POLICY_DELETE_EXPIRED; -} - /* Use this instead of keyIsExpired if you already have the value object. */ static int objectIsExpired(robj *val) { /* Don't expire anything while loading. It will be done later. */ diff --git a/src/entry.c b/src/entry.c index e1df1456e9..a5fff55818 100644 --- a/src/entry.c +++ b/src/entry.c @@ -50,7 +50,7 @@ /* Returns true in case the entry's value is not embedded in the entry. * Returns false otherwise. */ -bool entryHasValuePtr(const entry *entry) { +static inline bool entryHasValuePtr(const entry *entry) { return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR); } @@ -81,8 +81,6 @@ sds entryGetValue(const entry *entry) { } else { /* Skip field content, field null terminator and value sds8 hdr. */ size_t offset = sdslen(entry) + 1 + sdsHdrSize(SDS_TYPE_8); - serverAssert((char *)entry + offset); - return (char *)entry + offset; } } @@ -117,8 +115,9 @@ long long entryGetExpiry(const entry *entry) { long long expiry = EXPIRY_NONE; if (entryHasExpiry(entry)) { char *buf = sdsAllocPtr(entry); + debugServerAssert((((uintptr_t)buf & 0x7) == 0)); if (entryHasValuePtr(entry)) buf -= sizeof(sds *); - buf -= sizeof(expiry); + buf -= sizeof(long long); expiry = *(long long *)buf; } return expiry; @@ -138,26 +137,21 @@ entry *entrySetExpiry(entry *e, long long expiry) { } /* Return true in case the entry has assigned expiration or false otherwise. */ -int entryIsExpired(entry *entry) { - /* Don't expire anything while loading. It will be done later. */ - if (server.loading) return 0; - if (!timestampIsExpired(entryGetExpiry(entry))) return 0; - if (server.primary_host == NULL && server.import_mode) { - if (server.current_client && server.current_client->flag.import_source) return 0; - } - return 1; +bool entryIsExpired(entry *entry) { + if (!timestampIsExpired(entryGetExpiry(entry))) return false; + return true; } /**************************************** Entry Expiry API - End *****************************************/ void entryFree(entry *entry) { if (entryHasValuePtr(entry)) { - sdsfree(*entryGetValueRef(entry)); + sdsfree(entryGetValue(entry)); } zfree(entryAllocPtr(entry)); } /* Takes ownership of value. does not take ownership of field */ -entry *entryCreate(sds field, sds value, long long expiry) { +entry *entryCreate(const_sds field, sds value, long long expiry) { sds embedded_field_sds; size_t expiry_size = (expiry == EXPIRY_NONE) ? 0 : sizeof(long long); size_t field_len = sdslen(field); @@ -192,7 +186,7 @@ entry *entryCreate(sds field, sds value, long long expiry) { * +------+-------+---------------+ */ embed_value = false; - alloc_size += sizeof(sds *); + alloc_size += sizeof(sds); if (field_sds_type == SDS_TYPE_5) { field_sds_type = SDS_TYPE_8; alloc_size -= field_size; @@ -214,8 +208,8 @@ entry *entryCreate(sds field, sds value, long long expiry) { if (value) { if (!embed_value) { *(sds *)buf = value; - buf += sizeof(sds *); - buf_size -= sizeof(sds *); + buf += sizeof(sds); + buf_size -= sizeof(sds); } else { sdswrite(buf + field_size, buf_size - field_size, SDS_TYPE_8, value, value_len); sdsfree(value); @@ -237,13 +231,13 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { sds field = (sds)e; bool update_value = value ? true : false; - long long ttl = entryGetExpiry(e); - bool update_expiry = (expiry != ttl) ? true : false; + long long expiration_time = entryGetExpiry(e); + bool update_expiry = (expiry != expiration_time) ? true : false; if (!update_value && !update_expiry) return e; - ttl = expiry; + expiration_time = expiry; value = update_value ? value : entryGetValue(e); - size_t expiry_size = ttl != EXPIRY_NONE ? sizeof(ttl) : 0; + size_t expiry_size = (expiration_time != EXPIRY_NONE) ? sizeof(expiration_time) : 0; int field_sds_type = sdsReqType(sdslen(field)); if (field_sds_type == SDS_TYPE_5 && (expiry_size > 0)) { field_sds_type = SDS_TYPE_8; @@ -257,7 +251,7 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { /* // We will create a new entry in the following cases: * 1. In the case were we add or remove expiration. * 2. in the case were we are NOT migrating from an embedded entry to an embedded entry with ~the same size. */ - bool create_new_entry = (update_expiry && (entryGetExpiry(e) == EXPIRY_NONE || ttl == EXPIRY_NONE)) || + bool create_new_entry = (update_expiry && (entryGetExpiry(e) == EXPIRY_NONE || expiration_time == EXPIRY_NONE)) || !(update_value && !entryHasValuePtr(e) && required_embedded_size <= EMBED_VALUE_MAX_ALLOC_SIZE && required_embedded_size <= current_embedded_allocation_size && @@ -307,7 +301,7 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { } } - entry *new_entry = entryCreate(entryGetField(e), value, ttl); + entry *new_entry = entryCreate(entryGetField(e), value, expiration_time); if (new_entry != e) entryFree(e); return new_entry; @@ -355,7 +349,7 @@ entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(s /* Used for releasing memory to OS to avoid unnecessary CoW. Called when we've * forked and memory won't be used again. See zmadvise_dontneed() */ -void dismissEntry(entry *entry) { +void entryDismissMemory(entry *entry) { /* Only dismiss values memory since the field size usually is small. */ if (entryHasValuePtr(entry)) { dismissSds(*entryGetValueRef(entry)); diff --git a/src/entry.h b/src/entry.h index 08de4ef681..33069b5093 100644 --- a/src/entry.h +++ b/src/entry.h @@ -12,13 +12,13 @@ entry *entrySetValue(entry *entry, sds value); long long entryGetExpiry(const entry *entry); bool entryHasExpiry(const entry *entry); entry *entrySetExpiry(entry *entry, long long expiry); -int entryIsExpired(entry *entry); +bool entryIsExpired(entry *entry); void entryFree(entry *entry); -entry *entryCreate(sds field, sds value, long long expiry); +entry *entryCreate(const_sds field, sds value, long long expiry); entry *entryUpdate(entry *entry, sds value, long long expiry); size_t entryMemUsage(entry *entry); entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)); -void dismissEntry(entry *entry); +void entryDismissMemory(entry *entry); #endif diff --git a/src/expire.c b/src/expire.c index 60d67ad9e4..922adfda97 100644 --- a/src/expire.c +++ b/src/expire.c @@ -815,3 +815,66 @@ void touchCommand(client *c) { if (lookupKeyRead(c->db, c->argv[j]) != NULL) touched++; addReplyLongLong(c, touched); } + +/* Returns 1 if the expire value is expired, 0 otherwise. */ +int timestampIsExpired(mstime_t when) { + if (when < 0) return 0; /* no expire */ + mstime_t now = commandTimeSnapshot(); + + /* The time indicated by 'when' is considered expired if the current (virtual or real) time is greater + * than it. */ + return now > when; +} + +/* This function verify if the current conditions allow expiration of keys and fields. + * For some cases expiration is not allowed, but we would still like to ignore the key + * so to treat it as "expired" without actively deleting it. */ +expirationPolicy getExpirationPolicyWithFlags(int flags) { + if (server.loading) return POLICY_IGNORE_EXPIRE; + + /* If we are running in the context of a replica, instead of + * evicting the expired key from the database, we return ASAP: + * the replica key expiration is controlled by the primary that will + * send us synthesized DEL operations for expired keys. The + * exception is when write operations are performed on writable + * replicas. + * + * Still we try to reflect the correct state to the caller, + * that is, POLICY_KEEP_EXPIRED so that the key will be ignored, but not deleted. + * + * When replicating commands from the primary, keys are never considered + * expired, so we return POLICY_IGNORE_EXPIRE */ + if (server.primary_host != NULL) { + if (server.current_client && (server.current_client->flag.primary)) return POLICY_IGNORE_EXPIRE; + if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return POLICY_KEEP_EXPIRED; + } else if (server.import_mode) { + /* If we are running in the import mode on a primary, instead of + * evicting the expired key from the database, we return ASAP: + * the key expiration is controlled by the import source that will + * send us synthesized DEL operations for expired keys. The + * exception is when write operations are performed on this server + * because it's a primary. + * + * Notice: other clients, apart from the import source, should not access + * the data imported by import source. + * + * Still we try to reflect the correct state to the caller, + * that is, POLICY_KEEP_EXPIRED so that the key will be ignored, but not deleted. + * + * When receiving commands from the import source, keys are never considered + * expired, so we return POLICY_IGNORE_EXPIRE */ + if (server.current_client && (server.current_client->flag.import_source)) return POLICY_IGNORE_EXPIRE; + if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return POLICY_KEEP_EXPIRED; + } + + /* In some cases we're explicitly instructed to return an indication of a + * missing key without actually deleting it, even on primaries. */ + if (flags & EXPIRE_AVOID_DELETE_EXPIRED) return POLICY_KEEP_EXPIRED; + + /* If 'expire' action is paused, for whatever reason, then don't expire any key. + * Typically, at the end of the pause we will properly expire the key OR we + * will have failed over and the new primary will send us the expire. */ + if (isPausedActionsWithUpdate(PAUSE_ACTION_EXPIRE)) return POLICY_KEEP_EXPIRED; + + return POLICY_DELETE_EXPIRED; +} diff --git a/src/expire.h b/src/expire.h index a41c1a70c0..adc28552fc 100644 --- a/src/expire.h +++ b/src/expire.h @@ -4,6 +4,10 @@ #include #include "monotonic.h" +/* Flags for expireIfNeeded */ +#define EXPIRE_FORCE_DELETE_EXPIRED 1 +#define EXPIRE_AVOID_DELETE_EXPIRED 2 + #define ACTIVE_EXPIRE_CYCLE_SLOW 0 #define ACTIVE_EXPIRE_CYCLE_FAST 1 diff --git a/src/object.c b/src/object.c index 2ef0a85ae7..301b963ea4 100644 --- a/src/object.c +++ b/src/object.c @@ -683,7 +683,7 @@ void dismissHashObject(robj *o, size_t size_hint) { hashtableInitIterator(&iter, ht, 0); void *next; while (hashtableNext(&iter, &next)) { - dismissEntry(next); + entryDismissMemory(next); } hashtableResetIterator(&iter); } From 9962f3373b153eae5a5b59ff2242a1476e0cd4ea Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 25 Jun 2025 11:02:14 +0300 Subject: [PATCH 009/119] Entry unit test Signed-off-by: Ran Shidlansik --- src/unit/test_entry.c | 466 ++++++++++++++++++++++++++++++++++++++++++ src/unit/test_files.h | 7 + 2 files changed, 473 insertions(+) create mode 100644 src/unit/test_entry.c diff --git a/src/unit/test_entry.c b/src/unit/test_entry.c new file mode 100644 index 0000000000..79f663c1d9 --- /dev/null +++ b/src/unit/test_entry.c @@ -0,0 +1,466 @@ +#include "../entry.c" +#include "test_help.h" + +#include +#include +#include +#include +#include + +/* Constants for test values */ +#define SHORT_FIELD "foo" +#define SHORT_VALUE "bar" +#define LONG_FIELD "k:123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" +#define LONG_VALUE "v:12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + +/* Verify entry properties */ +static int verify_entry_properties(entry *e, sds field, sds value_copy, long long expiry, bool has_expiry, bool has_valueptr) { + TEST_ASSERT(sdscmp(entryGetField(e), field) == 0); + TEST_ASSERT(sdscmp(entryGetValue(e), value_copy) == 0); + TEST_ASSERT(entryGetExpiry(e) == expiry); + TEST_ASSERT(entryHasExpiry(e) == has_expiry); + TEST_ASSERT(entryHasValuePtr(e) == has_valueptr); + return 0; +} + +/** + * Test entryCreate functunallity: + * 1. embedded with expiry + * 2. embedded without expiry + * 3. non-embedded with expiry + * 4. non-embedded without expiry + */ +int test_entryCreate(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + // Test with embedded value with expiry + sds field1 = sdsnew(SHORT_FIELD); + sds value1 = sdsnew(SHORT_VALUE); + sds value_copy1 = sdsdup(value1); // Keep a copy since entryCreate takes ownership of value + long long expiry1 = 100; + entry *e1 = entryCreate(field1, value1, expiry1); + verify_entry_properties(e1, field1, value_copy1, expiry1, true, false); + + // Test with embedded value with no expiry + sds field2 = sdsnew(SHORT_FIELD); + sds value2 = sdsnew(SHORT_VALUE); + sds value_copy2 = sdsdup(value2); + long long expiry2 = EXPIRY_NONE; + entry *e2 = entryCreate(field2, value2, expiry2); + verify_entry_properties(e2, field2, value_copy2, expiry2, false, false); + + // Test with non-embedded field and value with expiry + sds field3 = sdsnew(LONG_FIELD); + sds value3 = sdsnew(LONG_VALUE); + sds value_copy3 = sdsdup(value3); + long long expiry3 = 100; + entry *e3 = entryCreate(field3, value3, expiry3); + verify_entry_properties(e3, field3, value_copy3, expiry3, true, true); + + // Test with non-embedded field and value with no expiry + sds field4 = sdsnew(LONG_FIELD); + sds value4 = sdsnew(LONG_VALUE); + sds value_copy4 = sdsdup(value4); + long long expiry4 = EXPIRY_NONE; + entry *e4 = entryCreate(field4, value4, expiry4); + verify_entry_properties(e4, field4, value_copy4, expiry4, false, true); + + entryFree(e1); + entryFree(e2); + entryFree(e3); + entryFree(e4); + + // Free field as entryCreate doesn't take ownership + sdsfree(field1); + sdsfree(field2); + sdsfree(field3); + sdsfree(field4); + + sdsfree(value_copy1); + sdsfree(value_copy2); + sdsfree(value_copy3); + sdsfree(value_copy4); + + return 0; +} + +/** + * Test entryUpdate with various combinations of value and expiry changes: + * 1. Update only the value (keeping embedded) + * 2. Update only the expiry (keeping embedded) + * 3. Update both value and expiry (keeping embedded) + * 4. Update with no changes (should return same entry) + * 5. Update to a value that's too large to be embedded + * 6. Update expiry of a non-embedded entry + * 7. Update from non-embedded back to embedded value + * 8. Update entry to less then 3/4 allocation size + * 9. Update entry to more than 3/4 allocation size + * 8. Update entry to exactly 3/4 allocation size + */ +int test_entryUpdate(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + // Create embedded entry + sds value1 = sdsnew(SHORT_VALUE); + sds field = sdsnew(SHORT_FIELD); + sds value_copy1 = sdsdup(value1); + long long expiry1 = 100; + entry *e1 = entryCreate(field, value1, expiry1); + verify_entry_properties(e1, field, value_copy1, expiry1, true, false); + + // Update only value (keeping embedded) + sds value2 = sdsnew("bar2"); + sds value_copy2 = sdsdup(value2); + long long expiry2 = expiry1; + entry *e2 = entryUpdate(e1, value2, expiry2); + verify_entry_properties(e2, field, value_copy2, expiry2, true, false); + + // Update only expiry (keeping embedded) + long long expiry3 = 200; + entry *e3 = entryUpdate(e2, NULL, expiry3); + verify_entry_properties(e3, field, value_copy2, expiry3, true, false); + + // Update both value and expiry (keeping embedded) + sds value4 = sdsnew("bar4"); + long long expiry4 = 300; + sds value_copy4 = sdsdup(value4); + entry *e4 = entryUpdate(e3, value4, expiry4); + verify_entry_properties(e4, field, value_copy4, expiry4, true, false); + + // Update with no changes (should return same entry) + entry *e5 = entryUpdate(e4, NULL, expiry4); + verify_entry_properties(e5, field, value_copy4, expiry4, true, false); + TEST_ASSERT(e5 == e4); + + // Update to a value that's too large to be embedded + sds value6 = sdsnew(LONG_VALUE); + sds value_copy6 = sdsdup(value6); + long long expiry6 = expiry4; + entry *e6 = entryUpdate(e5, value6, expiry6); + verify_entry_properties(e6, field, value_copy6, expiry6, true, true); + + // Update expiry of a non-embedded entry + long long expiry7 = 400; + entry *e7 = entryUpdate(e6, NULL, expiry7); + verify_entry_properties(e7, field, value_copy6, expiry7, true, true); + + // Update from non-embedded back to embedded value + sds value8 = sdsnew("bar8"); + sds value_copy8 = sdsdup(value8); + long long expiry8 = expiry7; + entry *e8 = entryUpdate(e7, value8, expiry8); + verify_entry_properties(e8, field, value_copy8, expiry8, true, false); + + // Update value with identical value (keeping embedded) + sds value9 = sdsnew("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + sds value_copy9 = sdsdup(value9); + long long expiry9 = expiry8; + entry *e9 = entryUpdate(e8, value9, expiry9); + verify_entry_properties(e9, field, value_copy9, expiry9, true, false); + + // Update the value so that memory usage is less than 3/4 of the current allocation size + // Ensuring required_embedded_size < current_embedded_allocation_size * 3 / 4, which creates a new entry + size_t current_embedded_allocation_size = entryMemUsage(e9); + sds value10 = sdsnew("xxxxxxxxxxxxxxxxxxxxxx"); + sds value_copy10 = sdsdup(value10); + long long expiry10 = expiry9; + entry *e10 = entryUpdate(e9, value10, expiry10); + verify_entry_properties(e10, field, value_copy10, expiry10, true, false); + TEST_ASSERT(entryMemUsage(e10) < current_embedded_allocation_size * 3 / 4); + TEST_ASSERT(e10 != e9); + + // Update the value so that memory usage is at least 3/4 of the current memory usage + // Ensuring required_embedded_size > current_embedded_allocation_size * 3 / 4 without creating a new entry + current_embedded_allocation_size = entryMemUsage(e10); + sds value11 = sdsnew("yyyyyyyyyyyyy"); + sds value_copy11 = sdsdup(value11); + long long expiry11 = expiry10; + entry *e11 = entryUpdate(e10, value11, expiry11); + verify_entry_properties(e11, field, value_copy11, expiry11, true, false); + TEST_ASSERT(entryMemUsage(e11) >= current_embedded_allocation_size * 3 / 4); + TEST_ASSERT(entryMemUsage(e11) <= current_embedded_allocation_size); + TEST_ASSERT(entryMemUsage(e11) <= EMBED_VALUE_MAX_ALLOC_SIZE); + TEST_ASSERT(e10 == e11); + + // Update the value so that memory usage is exactly equal to the current allocation size + // Ensuring required_embedded_size == current_embedded_allocation_size without creating a new entry + current_embedded_allocation_size = entryMemUsage(e11); + sds value12 = sdsnew("zzzzzzzzzzzzz"); + sds value_copy12 = sdsdup(value12); + long long expiry12 = expiry11; + entry *e12 = entryUpdate(e11, value12, expiry12); + verify_entry_properties(e11, field, value_copy12, expiry12, true, false); + TEST_ASSERT(entryMemUsage(e12) == current_embedded_allocation_size); + TEST_ASSERT(entryMemUsage(e12) <= EMBED_VALUE_MAX_ALLOC_SIZE); + TEST_ASSERT(e12 == e11); + + entryFree(e12); + sdsfree(field); + sdsfree(value_copy1); + sdsfree(value_copy2); + sdsfree(value_copy4); + sdsfree(value_copy6); + sdsfree(value_copy8); + sdsfree(value_copy9); + sdsfree(value_copy10); + sdsfree(value_copy11); + sdsfree(value_copy12); + + return 0; +} + +/** + * Test setting expiry on an entry: + * 1. No expiry + * 2. Set expiry on entry without expiry + * 3. Update expiry on entry with expiry + * 4. Test with non-embedded entry + * 5. Set expiry on non-embedded entry + */ +int test_entryHasexpiry_entrySetExpiry(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + // No expiry + sds field1 = sdsnew(SHORT_FIELD); + sds value1 = sdsnew(SHORT_VALUE); + entry *e1 = entryCreate(field1, value1, EXPIRY_NONE); + TEST_ASSERT(entryHasExpiry(e1) == false); + TEST_ASSERT(entryGetExpiry(e1) == EXPIRY_NONE); + + // Set expiry on entry without expiry + long long expiry2 = 100; + entry *e2 = entrySetExpiry(e1, expiry2); + TEST_ASSERT(entryHasExpiry(e2) == true); + TEST_ASSERT(entryGetExpiry(e2) == expiry2); + + // Update expiry on entry with expiry + long long expiry3 = 200; + entry *e3 = entrySetExpiry(e2, expiry3); + TEST_ASSERT(entryHasExpiry(e3) == true); + TEST_ASSERT(entryGetExpiry(e3) == expiry3); + TEST_ASSERT(e2 == e3); // Should be the same pointer when just updating expiry + + // Test with non-embedded entry + sds field4 = sdsnew(LONG_FIELD); + sds value4 = sdsnew(LONG_VALUE); + entry *e4 = entryCreate(field4, value4, EXPIRY_NONE); + TEST_ASSERT(entryHasExpiry(e4) == false); + TEST_ASSERT(entryHasValuePtr(e4) == true); + + // Set expiry on entry without expiry + long long expiry5 = 100; + entry *e5 = entrySetExpiry(e4, expiry5); + TEST_ASSERT(entryHasExpiry(e5) == true); + TEST_ASSERT(entryGetExpiry(e5) == expiry5); + + // Update expiry on entry with expiry + long long expiry6 = 200; + entry *e6 = entrySetExpiry(e5, expiry6); + TEST_ASSERT(entryHasExpiry(e6) == true); + TEST_ASSERT(entryGetExpiry(e6) == expiry6); + TEST_ASSERT(e5 == e6); // Should be the same pointer when just updating expiry + + entryFree(e3); + entryFree(e6); + sdsfree(field1); + sdsfree(field4); + + return 0; +} + +/** + * Test entryIsExpired: + * 1. No expiry + * 2. Future expiry + * 3. Current time expiry + * 4. Past expiry + * 5. Test with loading mode + * 6. Test with import mode and import source client + * 7. Test with import mode and import source client and import expiry + * 8. Test with import mode and import source client and import expiry and import expiry is in the past + */ +int test_entryIsExpired(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + // Setup server state + long long current_time = mstime(); + server.cmd_time_snapshot = current_time; + + // No expiry + sds field1 = sdsnew(SHORT_FIELD); + sds value1 = sdsnew(SHORT_VALUE); + entry *e1 = entryCreate(field1, value1, EXPIRY_NONE); + TEST_ASSERT(entryGetExpiry(e1) == EXPIRY_NONE); + TEST_ASSERT(entryIsExpired(e1) == 0); + + // Future expiry + sds field2 = sdsnew(SHORT_FIELD); + sds value2 = sdsnew(SHORT_VALUE); + long long future_time = current_time + 10000; // 10 seconds in future + entry *e2 = entryCreate(field2, value2, future_time); + TEST_ASSERT(entryGetExpiry(e2) == future_time); + TEST_ASSERT(entryIsExpired(e2) == 0); + + // Current time expiry + sds field3 = sdsnew(SHORT_FIELD); + sds value3 = sdsnew(SHORT_VALUE); + entry *e3 = entryCreate(field3, value3, current_time); + TEST_ASSERT(entryGetExpiry(e3) == current_time); + TEST_ASSERT(entryIsExpired(e3) == 0); + + // Test with past expiry + sds field4 = sdsnew(SHORT_FIELD); + sds value4 = sdsnew(SHORT_VALUE); + long long past_time = current_time - 10000; // 10 seconds ago + entry *e4 = entryCreate(field4, value4, past_time); + TEST_ASSERT(entryGetExpiry(e4) == past_time); + TEST_ASSERT(entryIsExpired(e4) == 1); + + entryFree(e1); + entryFree(e2); + entryFree(e3); + entryFree(e4); + sdsfree(field1); + sdsfree(field2); + sdsfree(field3); + sdsfree(field4); + + return 0; +} + +/** + * Test entryMemUsage: + * 1. Embedded entry tests: + * - Initial creation without expiry + * - Adding expiry (should increase memory usage) + * - Updating expiry (should not change memory usage) + * - Updating value while keeping it embedded: + * * To smaller value (should not decrease memory usage) + * * To bigger value (should not increase memory usage) + * + * 2. Non-embedded entry tests: + * - Initial creation without expiry + * - Adding expiry (should increase memory usage) + * - Updating expiry (should not change memory usage) + * - Updating value: + * * To smaller value (should decrease memory usage) + * * To bigger value (should increase memory usage) + */ +int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + // Tests with embedded entry + // Embedded entry without expiry + sds field1 = sdsnew(SHORT_FIELD); + sds value1 = sdsnew(SHORT_VALUE); + sds value_copy1 = sdsdup(value1); + long long expiry1 = EXPIRY_NONE; + entry *e1 = entryCreate(field1, value1, expiry1); + size_t e1_entryMemUsage = entryMemUsage(e1); + verify_entry_properties(e1, field1, value_copy1, expiry1, false, false); + TEST_ASSERT(e1_entryMemUsage > 0); + + // Add expiry to embedded entry without expiry + // This should increase memory usage by sizeof(long long) + 2 bytes + // (long long for the expiry value, 2 bytes for SDS header adjustment) + long long expiry2 = 100; + entry *e2 = entrySetExpiry(e1, expiry2); + size_t e2_entryMemUsage = entryMemUsage(e2); + verify_entry_properties(e2, field1, value_copy1, expiry2, true, false); + TEST_ASSERT(e2_entryMemUsage == e1_entryMemUsage + sizeof(long long) + 2); + + // Update expiry on an entry that already has one + // This should NOT change memory usage as we're just updating the expiry value (long long) + long long expiry3 = 10000; + entry *e3 = entrySetExpiry(e2, expiry3); + size_t e3_entryMemUsage = entryMemUsage(e3); + verify_entry_properties(e3, field1, value_copy1, expiry3, true, false); + TEST_ASSERT(e3_entryMemUsage == e2_entryMemUsage); + + // Update to smaller value (keeping embedded) + // Memory usage should decrease by the difference in value size (2 bytes) + sds value4 = sdsnew("x"); + sds value_copy4 = sdsdup(value4); + entry *e4 = entrySetValue(e3, value4); + size_t e4_entryMemUsage = entryMemUsage(e4); + verify_entry_properties(e4, field1, value_copy4, expiry3, true, false); + TEST_ASSERT(e4_entryMemUsage == e3_entryMemUsage - 2); + + // Update to bigger value (keeping embedded) + // Memory usage should increase by the difference in value size (1 byte) + sds value5 = sdsnew("xx"); + sds value_copy5 = sdsdup(value5); + entry *e5 = entrySetValue(e4, value5); + size_t e5_entryMemUsage = entryMemUsage(e5); + verify_entry_properties(e5, field1, value_copy5, expiry3, true, false); + TEST_ASSERT(e5_entryMemUsage == e4_entryMemUsage + 1); + + // Tests with non-embedded entry + // Non-embedded entry without expiry + sds field6 = sdsnew(LONG_FIELD); + field6 = sdscat(field6, LONG_FIELD); // Double the length to ensure non-embedded entry + sds value6 = sdsnew(LONG_VALUE); + sds value_copy6 = sdsdup(value6); + long long expiry6 = EXPIRY_NONE; + entry *e6 = entryCreate(field6, value6, EXPIRY_NONE); + size_t e6_entryMemUsage = entryMemUsage(e6); + verify_entry_properties(e6, field6, value_copy6, expiry6, false, true); + TEST_ASSERT(entryMemUsage(e6) > 0); + + // Add expiry to non-embedded entry wihout expiry + // For non-embedded entries this increases memory by exactly sizeof(long long) + long long expiry7 = 100; + entry *e7 = entrySetExpiry(e6, expiry7); + size_t e7_entryMemUsage = entryMemUsage(e7); + verify_entry_properties(e7, field6, value_copy6, expiry7, true, true); + TEST_ASSERT(e7_entryMemUsage == e6_entryMemUsage + sizeof(long long)); + + // Update expiry on a non-embedded entry that already has one + // This should not change memory usage as we're just updating the expiry value + long long expiry8 = 10000; + entry *e8 = entrySetExpiry(e7, expiry8); + size_t e8_entryMemUsage = entryMemUsage(e8); + verify_entry_properties(e8, field6, value_copy6, expiry8, true, true); + TEST_ASSERT(e8_entryMemUsage == e7_entryMemUsage); + + // Update to smaller value (keeping non-embedded) + // Memory usage should increase by the difference between LONG_VALUE and "x" (143) + sds value9 = sdsnew("x"); + sds value_copy9 = sdsdup(value9); + entry *e9 = entrySetValue(e8, value9); + size_t e9_entryMemUsage = entryMemUsage(e9); + verify_entry_properties(e9, field6, value_copy9, expiry8, true, true); + TEST_ASSERT(e9_entryMemUsage == e8_entryMemUsage - 143); + + // Update to bigger value (keeping non-embedded) + // Memory usage increases by the difference in value size (1 byte) + sds value10 = sdsnew("xx"); + sds value_copy10 = sdsdup(value10); + entry *e10 = entrySetValue(e9, value10); + size_t e10_entryMemUsage = entryMemUsage(e10); + verify_entry_properties(e10, field6, value_copy10, expiry8, true, true); + TEST_ASSERT(e10_entryMemUsage == e9_entryMemUsage + 1); + + entryFree(e5); + entryFree(e10); + sdsfree(field1); + sdsfree(field6); + sdsfree(value_copy1); + sdsfree(value_copy4); + sdsfree(value_copy5); + sdsfree(value_copy6); + sdsfree(value_copy9); + sdsfree(value_copy10); + + return 0; +} diff --git a/src/unit/test_files.h b/src/unit/test_files.h index f284db51a5..85fa510c46 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -20,6 +20,11 @@ int test_dictDisableResizeReduceTo3(int argc, char **argv, int flags); int test_dictDeleteOneKeyTriggerResizeAgain(int argc, char **argv, int flags); int test_dictBenchmark(int argc, char **argv, int flags); int test_endianconv(int argc, char *argv[], int flags); +int test_entryCreate(int argc, char **argv, int flags); +int test_entryUpdate(int argc, char **argv, int flags); +int test_entryHasexpiry_entrySetExpiry(int argc, char **argv, int flags); +int test_entryIsExpired(int argc, char **argv, int flags); +int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int flags); int test_cursor(int argc, char **argv, int flags); int test_set_hash_function_seed(int argc, char **argv, int flags); int test_add_find_delete(int argc, char **argv, int flags); @@ -243,6 +248,7 @@ unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}}; unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}}; unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}}; unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, NULL}}; +unitTest __test_entry_c[] = {{"test_entryCreate", test_entryCreate}, {"test_entryUpdate", test_entryUpdate}, {"test_entryHasexpiry_entrySetExpiry", test_entryHasexpiry_entrySetExpiry}, {"test_entryIsExpired", test_entryIsExpired}, {"test_entryMemUsage_entrySetExpiry_entrySetValue", test_entryMemUsage_entrySetExpiry_entrySetValue}, {NULL, NULL}}; unitTest __test_hashtable_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_bucket_chain_length", test_bucket_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_replace_reallocated_entry", test_replace_reallocated_entry}, {"test_incremental_find", test_incremental_find}, {"test_scan", test_scan}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_compact_bucket_chain", test_compact_bucket_chain}, {"test_random_entry", test_random_entry}, {"test_random_entry_with_long_chain", test_random_entry_with_long_chain}, {"test_random_entry_sparse_table", test_random_entry_sparse_table}, {"test_all_memory_freed", test_all_memory_freed}, {NULL, NULL}}; unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}}; unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyHashtable", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyHashtable}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyHashtable", test_kvstoreIteratorRemoveAllKeysDeleteEmptyHashtable}, {"test_kvstoreHashtableIteratorRemoveAllKeysNoDeleteEmptyHashtable", test_kvstoreHashtableIteratorRemoveAllKeysNoDeleteEmptyHashtable}, {"test_kvstoreHashtableIteratorRemoveAllKeysDeleteEmptyHashtable", test_kvstoreHashtableIteratorRemoveAllKeysDeleteEmptyHashtable}, {NULL, NULL}}; @@ -269,6 +275,7 @@ struct unitTestSuite { {"test_crc64combine.c", __test_crc64combine_c}, {"test_dict.c", __test_dict_c}, {"test_endianconv.c", __test_endianconv_c}, + {"test_entry.c", __test_entry_c}, {"test_hashtable.c", __test_hashtable_c}, {"test_intset.c", __test_intset_c}, {"test_kvstore.c", __test_kvstore_c}, From 94a7a9db20b5bfe0e72be146ddea383778b7f1d4 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 25 Jun 2025 11:37:05 +0300 Subject: [PATCH 010/119] 1. fix crash in rewriteHashObject 2. firx test_entry Signed-off-by: Ran Shidlansik --- src/entry.c | 8 ++++++-- src/entry.h | 3 +++ src/expire.h | 2 +- src/server.h | 2 +- src/t_hash.c | 2 +- src/unit/test_entry.c | 6 +++--- 6 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/entry.c b/src/entry.c index a5fff55818..b7b1cae286 100644 --- a/src/entry.c +++ b/src/entry.c @@ -107,6 +107,10 @@ void *entryAllocPtr(const entry *entry) { return buf; } +bool entryHasEmbeddedValue(entry *entry) { + return (entryGetValue(entry) && !entryHasValuePtr(entry)); +} + /**************************************** Entry Expiry API *****************************************/ /* Returns the entry expiration timestamp. @@ -115,8 +119,8 @@ long long entryGetExpiry(const entry *entry) { long long expiry = EXPIRY_NONE; if (entryHasExpiry(entry)) { char *buf = sdsAllocPtr(entry); - debugServerAssert((((uintptr_t)buf & 0x7) == 0)); - if (entryHasValuePtr(entry)) buf -= sizeof(sds *); + debugServerAssert((((uintptr_t)buf & 0x7) == 0)); /* Test that the allocation is indeed 8 bytes aligned */ + if (entryHasValuePtr(entry)) buf -= sizeof(sds); buf -= sizeof(long long); expiry = *(long long *)buf; } diff --git a/src/entry.h b/src/entry.h index 33069b5093..8e34049583 100644 --- a/src/entry.h +++ b/src/entry.h @@ -21,4 +21,7 @@ size_t entryMemUsage(entry *entry); entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)); void entryDismissMemory(entry *entry); +/* Internal used for debug. No need to use this function except in tests */ +bool entryHasEmbeddedValue(entry *entry); + #endif diff --git a/src/expire.h b/src/expire.h index adc28552fc..65faa15326 100644 --- a/src/expire.h +++ b/src/expire.h @@ -25,7 +25,7 @@ typedef enum { } keyStatus; /* Return value for getExpirationPolicy */ -typedef enum { +typedef enum { POLICY_IGNORE_EXPIRE, /* Ignore expiration time of items and treat them as valid. */ POLICY_KEEP_EXPIRED, /* Ignore items which are expired but do not actively delete them. */ POLICY_DELETE_EXPIRED /* Delete expired keys on access. */ diff --git a/src/server.h b/src/server.h index a11d565d3d..8ab3996b72 100644 --- a/src/server.h +++ b/src/server.h @@ -77,7 +77,7 @@ #include "quicklist.h" /* Lists are encoded as linked lists of N-elements flat arrays */ #include "expire.h" /* Expiration public API */ - #include "rax.h" /* Radix tree */ +#include "rax.h" /* Radix tree */ #include "connection.h" /* Connection abstraction */ #include "memory_prefetch.h" #include "volatile_set.h" diff --git a/src/t_hash.c b/src/t_hash.c index 8fae017bca..f241234067 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -64,7 +64,7 @@ void hashTypeFreeVolatileSet(robj *o) { } bool hashTypeHasVolatileElements(robj *o) { - return ((o->encoding == OBJ_ENCODING_HASHTABLE) && (hashtableGetType(o->ptr) == &hashWithVolatileItemsHashtableType)); + return ((o->encoding == OBJ_ENCODING_HASHTABLE) && (hashTypeGetVolatileSet(o) != NULL)); } /* make any access to the hash object elements ignore the specific elements expiration. diff --git a/src/unit/test_entry.c b/src/unit/test_entry.c index 79f663c1d9..c4edca73b2 100644 --- a/src/unit/test_entry.c +++ b/src/unit/test_entry.c @@ -19,7 +19,7 @@ static int verify_entry_properties(entry *e, sds field, sds value_copy, long lon TEST_ASSERT(sdscmp(entryGetValue(e), value_copy) == 0); TEST_ASSERT(entryGetExpiry(e) == expiry); TEST_ASSERT(entryHasExpiry(e) == has_expiry); - TEST_ASSERT(entryHasValuePtr(e) == has_valueptr); + TEST_ASSERT(entryHasEmbeddedValue(e) != has_valueptr); return 0; } @@ -251,7 +251,7 @@ int test_entryHasexpiry_entrySetExpiry(int argc, char **argv, int flags) { sds value4 = sdsnew(LONG_VALUE); entry *e4 = entryCreate(field4, value4, EXPIRY_NONE); TEST_ASSERT(entryHasExpiry(e4) == false); - TEST_ASSERT(entryHasValuePtr(e4) == true); + TEST_ASSERT(entryHasEmbeddedValue(e4) == false); // Set expiry on entry without expiry long long expiry5 = 100; @@ -417,7 +417,7 @@ int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int f verify_entry_properties(e6, field6, value_copy6, expiry6, false, true); TEST_ASSERT(entryMemUsage(e6) > 0); - // Add expiry to non-embedded entry wihout expiry + // Add expiry to non-embedded entry without expiry // For non-embedded entries this increases memory by exactly sizeof(long long) long long expiry7 = 100; entry *e7 = entrySetExpiry(e6, expiry7); From 871253c9c3d2018c08dda1e28569fc65e09bc95c Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 25 Jun 2025 11:54:35 +0300 Subject: [PATCH 011/119] add documentation in entry.h Signed-off-by: Ran Shidlansik --- src/entry.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/entry.h b/src/entry.h index 8e34049583..7c69757ce6 100644 --- a/src/entry.h +++ b/src/entry.h @@ -4,21 +4,84 @@ #include "sds.h" #include +/*----------------------------------------------------------------------------- + * Entry + *----------------------------------------------------------------------------*/ + +/* + * The entry pointer is the field `sds`. We encode the entry layout type + * in the SDS header. + * + * An entry represents a key–value pair with an optional expiration timestamp. + * The pointer of type `entry *` always points to the VALUE `sds`. + * + * Layout 1: Embedded Field and Value (Compact Form) + * + * +-------------------+-------------------+-------------------+ + * | Expiration (opt) | Field (sds) | Value (sds) | + * | 8 bytes (int64_t) | "field" + header | "value" + header | + * +-------------------+-------------------+-------------------+ + * ^ + * | + * entry pointer + * + * - Both field and value are small and embedded. + * - The expiration is stored just before the first sds. + * + * + * Layout 2: Pointer-Based Value (Large Values) + * + * +-------------------+-------------------+------------------+ + * | Expiration (opt) | Value pointer | Field (sds) | + * | 8 bytes (int64_t) | 8 bytes (void *) | "field" + header | + * +-------------------+-------------------+------------------+ + * ^ + * | + * entry pointer + * + * - The value is stored separately via a pointer. + * - Used for large value sizes. */ typedef void entry; +/* Returns the field string (sds) from the entry. */ sds entryGetField(const entry *entry); + +/* Returns the value string (sds) from the entry. */ sds entryGetValue(const entry *entry); + +/* Sets or replaces the value string in the entry. May reallocate and return a new pointer. */ entry *entrySetValue(entry *entry, sds value); + +/* Gets the expiration timestamp (UNIX time in milliseconds). */ long long entryGetExpiry(const entry *entry); + +/* Returns true if the entry has an expiration timestamp set. */ bool entryHasExpiry(const entry *entry); + +/* Sets the expiration timestamp. */ entry *entrySetExpiry(entry *entry, long long expiry); + +/* Returns true if the entry is expired compared to current system time (commandTimeSnapshot). */ bool entryIsExpired(entry *entry); +/* Frees the memory used by the entry (including field/value). */ void entryFree(entry *entry); + +/* Creates a new entry with the given field, value, and optional expiry. */ entry *entryCreate(const_sds field, sds value, long long expiry); + +/* Updates the value and/or expiry of an existing entry. + * In case value is NULL, will use the existing entry value. + * In case expiry is EXPIRE_NONE, will use the existing entry expiration time. */ entry *entryUpdate(entry *entry, sds value, long long expiry); + +/* Returns the total memory used by the entry (in bytes). */ size_t entryMemUsage(entry *entry); + +/* Defragments the entry and returns the new pointer (if moved). */ entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(sds)); + +/* Advises allocator to dismiss memory used by entry. */ void entryDismissMemory(entry *entry); /* Internal used for debug. No need to use this function except in tests */ From 8531d7b05a30fdb7cc19f0f444d71d9c32dcc93c Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 25 Jun 2025 14:54:05 +0300 Subject: [PATCH 012/119] make test_entry use the header file Signed-off-by: Ran Shidlansik --- src/entry.c | 4 ---- src/entry.h | 4 ++++ src/expire.h | 3 +++ src/server.h | 3 --- src/unit/test_entry.c | 9 ++++++--- src/util.c | 2 -- src/util.h | 5 +++++ 7 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/entry.c b/src/entry.c index b7b1cae286..6e70ede169 100644 --- a/src/entry.c +++ b/src/entry.c @@ -36,10 +36,6 @@ * entry pointer = value sds */ -/* The maximum allocation size we want to use for entries with embedded - * values. */ -#define EMBED_VALUE_MAX_ALLOC_SIZE 128 - /* SDS aux flag. If set, it indicates that the entry has TTL metadata set. */ #define FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY 0 diff --git a/src/entry.h b/src/entry.h index 7c69757ce6..362c75c956 100644 --- a/src/entry.h +++ b/src/entry.h @@ -43,6 +43,10 @@ * - Used for large value sizes. */ typedef void entry; +/* The maximum allocation size we want to use for entries with embedded + * values. */ +#define EMBED_VALUE_MAX_ALLOC_SIZE 128 + /* Returns the field string (sds) from the entry. */ sds entryGetField(const entry *entry); diff --git a/src/expire.h b/src/expire.h index 65faa15326..66a5241233 100644 --- a/src/expire.h +++ b/src/expire.h @@ -4,6 +4,9 @@ #include #include "monotonic.h" +/* Special Expiry values */ +#define EXPIRY_NONE -1 + /* Flags for expireIfNeeded */ #define EXPIRE_FORCE_DELETE_EXPIRED 1 #define EXPIRE_AVOID_DELETE_EXPIRED 2 diff --git a/src/server.h b/src/server.h index 8ab3996b72..1f8c66256d 100644 --- a/src/server.h +++ b/src/server.h @@ -599,9 +599,6 @@ typedef enum { #define PAUSE_ACTION_EVICT (1 << 3) #define PAUSE_ACTION_REPLICA (1 << 4) /* pause replica traffic */ -/* Special Expiry values */ -#define EXPIRY_NONE -1 - /* Sets log format */ typedef enum { LOG_FORMAT_LEGACY = 0, LOG_FORMAT_LOGFMT } log_format_type; diff --git a/src/unit/test_entry.c b/src/unit/test_entry.c index c4edca73b2..c9cb96c08a 100644 --- a/src/unit/test_entry.c +++ b/src/unit/test_entry.c @@ -1,6 +1,8 @@ -#include "../entry.c" +#include "../entry.h" #include "test_help.h" - +#include "../expire.h" +#include "../monotonic.h" +#include "../server.h" #include #include #include @@ -183,7 +185,8 @@ int test_entryUpdate(int argc, char **argv, int flags) { verify_entry_properties(e11, field, value_copy11, expiry11, true, false); TEST_ASSERT(entryMemUsage(e11) >= current_embedded_allocation_size * 3 / 4); TEST_ASSERT(entryMemUsage(e11) <= current_embedded_allocation_size); - TEST_ASSERT(entryMemUsage(e11) <= EMBED_VALUE_MAX_ALLOC_SIZE); + TEST_ASSERT(entryMemUsage(e11) <= + EMBED_VALUE_MAX_ALLOC_SIZE); TEST_ASSERT(e10 == e11); // Update the value so that memory usage is exactly equal to the current allocation size diff --git a/src/util.c b/src/util.c index 41ecfab74d..cdee67c61f 100644 --- a/src/util.c +++ b/src/util.c @@ -59,8 +59,6 @@ #include #endif -#define UNUSED(x) ((void)(x)) - /* Glob-style pattern matching. */ static int stringmatchlen_impl(const char *pattern, int patternLen, diff --git a/src/util.h b/src/util.h index 787b79dd11..f089cbda0a 100644 --- a/src/util.h +++ b/src/util.h @@ -33,6 +33,11 @@ #include #include "sds.h" +/* Anti-warning macro... */ +#ifndef UNUSED +#define UNUSED(V) ((void)V) +#endif + /* min/max */ #undef min #undef max From 1aa83cc394a5cf1af3e2134ad3f80b07daa6789b Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 25 Jun 2025 14:57:32 +0300 Subject: [PATCH 013/119] better make UNUSED macro provided by util.h Signed-off-by: Ran Shidlansik --- src/anet.c | 2 -- src/server.h | 3 --- 2 files changed, 5 deletions(-) diff --git a/src/anet.c b/src/anet.c index 5e970e2cf2..f9944bb65a 100644 --- a/src/anet.c +++ b/src/anet.c @@ -52,8 +52,6 @@ #include "util.h" #include "serverassert.h" -#define UNUSED(x) (void)(x) - static void anetSetError(char *err, const char *fmt, ...) { va_list ap; diff --git a/src/server.h b/src/server.h index 1f8c66256d..b610101800 100644 --- a/src/server.h +++ b/src/server.h @@ -508,9 +508,6 @@ typedef enum { #define SUPERVISED_SYSTEMD 2 #define SUPERVISED_UPSTART 3 -/* Anti-warning macro... */ -#define UNUSED(V) ((void)V) - #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^64 elements */ #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */ #define ZSKIPLIST_MAX_SEARCH 10 From bfeb7515af36e0d892370de648d87119b892f7c3 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 25 Jun 2025 17:20:30 +0300 Subject: [PATCH 014/119] Fix test_entry and a bug in entryUpdate Signed-off-by: Ran Shidlansik --- src/entry.c | 2 +- src/unit/test_entry.c | 36 +++++++++++++++++++----------------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/entry.c b/src/entry.c index 6e70ede169..999f352b36 100644 --- a/src/entry.c +++ b/src/entry.c @@ -279,7 +279,7 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { * In such cases the old value alloc was adjusted to the real buffer size part it was embedded to. * since we can potentially write here a smaller value, which requires less allocation space, we would like to * inherit the old value memory allocation size. */ - size_t value_size = sdsHdrSize(SDS_TYPE_8) + sdsalloc(value) + 1; + size_t value_size = sdsHdrSize(SDS_TYPE_8) + sdsalloc(old_value) + 1; sdswrite(sdsAllocPtr(old_value), value_size, SDS_TYPE_8, value, sdslen(value)); sdsfree(value); } diff --git a/src/unit/test_entry.c b/src/unit/test_entry.c index c9cb96c08a..27a2028f95 100644 --- a/src/unit/test_entry.c +++ b/src/unit/test_entry.c @@ -167,7 +167,7 @@ int test_entryUpdate(int argc, char **argv, int flags) { // Update the value so that memory usage is less than 3/4 of the current allocation size // Ensuring required_embedded_size < current_embedded_allocation_size * 3 / 4, which creates a new entry size_t current_embedded_allocation_size = entryMemUsage(e9); - sds value10 = sdsnew("xxxxxxxxxxxxxxxxxxxxxx"); + sds value10 = sdsnew("xxxxxxxxxxxxxxxxxxxxx"); sds value_copy10 = sdsdup(value10); long long expiry10 = expiry9; entry *e10 = entryUpdate(e9, value10, expiry10); @@ -294,15 +294,15 @@ int test_entryIsExpired(int argc, char **argv, int flags) { UNUSED(flags); // Setup server state - long long current_time = mstime(); - server.cmd_time_snapshot = current_time; + enterExecutionUnit(1, ustime()); + long long current_time = commandTimeSnapshot(); // No expiry sds field1 = sdsnew(SHORT_FIELD); sds value1 = sdsnew(SHORT_VALUE); entry *e1 = entryCreate(field1, value1, EXPIRY_NONE); TEST_ASSERT(entryGetExpiry(e1) == EXPIRY_NONE); - TEST_ASSERT(entryIsExpired(e1) == 0); + TEST_ASSERT(entryIsExpired(e1) == false); // Future expiry sds field2 = sdsnew(SHORT_FIELD); @@ -310,14 +310,14 @@ int test_entryIsExpired(int argc, char **argv, int flags) { long long future_time = current_time + 10000; // 10 seconds in future entry *e2 = entryCreate(field2, value2, future_time); TEST_ASSERT(entryGetExpiry(e2) == future_time); - TEST_ASSERT(entryIsExpired(e2) == 0); + TEST_ASSERT(entryIsExpired(e2) == false); // Current time expiry sds field3 = sdsnew(SHORT_FIELD); sds value3 = sdsnew(SHORT_VALUE); entry *e3 = entryCreate(field3, value3, current_time); TEST_ASSERT(entryGetExpiry(e3) == current_time); - TEST_ASSERT(entryIsExpired(e3) == 0); + TEST_ASSERT(entryIsExpired(e3) == false); // Test with past expiry sds field4 = sdsnew(SHORT_FIELD); @@ -325,7 +325,7 @@ int test_entryIsExpired(int argc, char **argv, int flags) { long long past_time = current_time - 10000; // 10 seconds ago entry *e4 = entryCreate(field4, value4, past_time); TEST_ASSERT(entryGetExpiry(e4) == past_time); - TEST_ASSERT(entryIsExpired(e4) == 1); + TEST_ASSERT(entryIsExpired(e4) == true); entryFree(e1); entryFree(e2); @@ -335,7 +335,7 @@ int test_entryIsExpired(int argc, char **argv, int flags) { sdsfree(field2); sdsfree(field3); sdsfree(field4); - + exitExecutionUnit(); return 0; } @@ -380,7 +380,7 @@ int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int f entry *e2 = entrySetExpiry(e1, expiry2); size_t e2_entryMemUsage = entryMemUsage(e2); verify_entry_properties(e2, field1, value_copy1, expiry2, true, false); - TEST_ASSERT(e2_entryMemUsage == e1_entryMemUsage + sizeof(long long) + 2); + TEST_ASSERT(zmalloc_usable_size((char *)e2 - sizeof(long long) - 3) == e2_entryMemUsage); // Update expiry on an entry that already has one // This should NOT change memory usage as we're just updating the expiry value (long long) @@ -397,7 +397,7 @@ int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int f entry *e4 = entrySetValue(e3, value4); size_t e4_entryMemUsage = entryMemUsage(e4); verify_entry_properties(e4, field1, value_copy4, expiry3, true, false); - TEST_ASSERT(e4_entryMemUsage == e3_entryMemUsage - 2); + TEST_ASSERT(zmalloc_usable_size((char *)e4 - sizeof(long long) - 3) == e4_entryMemUsage); // Update to bigger value (keeping embedded) // Memory usage should increase by the difference in value size (1 byte) @@ -406,7 +406,7 @@ int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int f entry *e5 = entrySetValue(e4, value5); size_t e5_entryMemUsage = entryMemUsage(e5); verify_entry_properties(e5, field1, value_copy5, expiry3, true, false); - TEST_ASSERT(e5_entryMemUsage == e4_entryMemUsage + 1); + TEST_ASSERT(zmalloc_usable_size((char *)e5 - sizeof(long long) - 3) == e5_entryMemUsage); // Tests with non-embedded entry // Non-embedded entry without expiry @@ -418,7 +418,7 @@ int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int f entry *e6 = entryCreate(field6, value6, EXPIRY_NONE); size_t e6_entryMemUsage = entryMemUsage(e6); verify_entry_properties(e6, field6, value_copy6, expiry6, false, true); - TEST_ASSERT(entryMemUsage(e6) > 0); + TEST_ASSERT(e6_entryMemUsage > 0); // Add expiry to non-embedded entry without expiry // For non-embedded entries this increases memory by exactly sizeof(long long) @@ -426,7 +426,8 @@ int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int f entry *e7 = entrySetExpiry(e6, expiry7); size_t e7_entryMemUsage = entryMemUsage(e7); verify_entry_properties(e7, field6, value_copy6, expiry7, true, true); - TEST_ASSERT(e7_entryMemUsage == e6_entryMemUsage + sizeof(long long)); + size_t expected_e7_entry_mem = zmalloc_usable_size((char *)e7 - sizeof(long long) - sizeof(sds) - 3) + sdsAllocSize(value6); + TEST_ASSERT(expected_e7_entry_mem == e7_entryMemUsage); // Update expiry on a non-embedded entry that already has one // This should not change memory usage as we're just updating the expiry value @@ -437,13 +438,14 @@ int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int f TEST_ASSERT(e8_entryMemUsage == e7_entryMemUsage); // Update to smaller value (keeping non-embedded) - // Memory usage should increase by the difference between LONG_VALUE and "x" (143) + // Memory usage should increase by at least the difference between LONG_VALUE and "x" (143) sds value9 = sdsnew("x"); sds value_copy9 = sdsdup(value9); entry *e9 = entrySetValue(e8, value9); size_t e9_entryMemUsage = entryMemUsage(e9); verify_entry_properties(e9, field6, value_copy9, expiry8, true, true); - TEST_ASSERT(e9_entryMemUsage == e8_entryMemUsage - 143); + size_t expected_e9_entry_mem = zmalloc_usable_size((char *)e9 - sizeof(long long) - sizeof(sds) - 3) + sdsAllocSize(value9); + TEST_ASSERT(expected_e9_entry_mem == e9_entryMemUsage); // Update to bigger value (keeping non-embedded) // Memory usage increases by the difference in value size (1 byte) @@ -451,8 +453,8 @@ int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int f sds value_copy10 = sdsdup(value10); entry *e10 = entrySetValue(e9, value10); size_t e10_entryMemUsage = entryMemUsage(e10); - verify_entry_properties(e10, field6, value_copy10, expiry8, true, true); - TEST_ASSERT(e10_entryMemUsage == e9_entryMemUsage + 1); + size_t expected_10_entry_mem = zmalloc_usable_size((char *)e10 - sizeof(long long) - sizeof(sds) - 3) + sdsAllocSize(value10); + TEST_ASSERT(expected_10_entry_mem == e10_entryMemUsage); entryFree(e5); entryFree(e10); From eecceecb85914b592ccfaddb0e2f25c3d87d38f0 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 25 Jun 2025 17:44:55 +0300 Subject: [PATCH 015/119] fix minor formatting issue Signed-off-by: Ran Shidlansik --- src/entry.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/entry.h b/src/entry.h index 362c75c956..5447a1f58a 100644 --- a/src/entry.h +++ b/src/entry.h @@ -74,7 +74,7 @@ void entryFree(entry *entry); /* Creates a new entry with the given field, value, and optional expiry. */ entry *entryCreate(const_sds field, sds value, long long expiry); -/* Updates the value and/or expiry of an existing entry. +/* Updates the value and/or expiry of an existing entry. * In case value is NULL, will use the existing entry value. * In case expiry is EXPIRE_NONE, will use the existing entry expiration time. */ entry *entryUpdate(entry *entry, sds value, long long expiry); From de77fbb00435cf6d0b1ad16b47b42fe306c23fea Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 26 Jun 2025 19:27:42 +0300 Subject: [PATCH 016/119] introduce volatile_set implementation Signed-off-by: Ran Shidlansik --- src/t_hash.c | 4 +- src/unit/test_files.h | 14 +- src/volatile_set.c | 1189 +++++++++++++++++++++++++++++++++++++++-- src/volatile_set.h | 226 +++++++- 4 files changed, 1374 insertions(+), 59 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index f241234067..c15e79e348 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -108,7 +108,7 @@ void hashTypeUntrackEntry(robj *o, void *entry) { volatile_set *set = hashTypeGetVolatileSet(o); debugServerAssert(set); serverAssert(volatileSetRemoveEntry(set, entry, entryGetExpiry(entry))); - if (volatileSetNumEntries(set) == 0) { + if (volatileSetIsEmpty(set)) { hashTypeDeleteVolatileSet(o); } } @@ -132,7 +132,7 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, debugServerAssert(set); serverAssert(volatileSetUpdateEntry(set, old_entry, new_entry, old_expiry, new_expiry) == 1); } - if (volatileSetNumEntries(set) == 0) { + if (volatileSetIsEmpty(set)) { hashTypeDeleteVolatileSet(o); } } diff --git a/src/unit/test_files.h b/src/unit/test_files.h index 85fa510c46..4b9f37b3a4 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -20,11 +20,6 @@ int test_dictDisableResizeReduceTo3(int argc, char **argv, int flags); int test_dictDeleteOneKeyTriggerResizeAgain(int argc, char **argv, int flags); int test_dictBenchmark(int argc, char **argv, int flags); int test_endianconv(int argc, char *argv[], int flags); -int test_entryCreate(int argc, char **argv, int flags); -int test_entryUpdate(int argc, char **argv, int flags); -int test_entryHasexpiry_entrySetExpiry(int argc, char **argv, int flags); -int test_entryIsExpired(int argc, char **argv, int flags); -int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int flags); int test_cursor(int argc, char **argv, int flags); int test_set_hash_function_seed(int argc, char **argv, int flags); int test_add_find_delete(int argc, char **argv, int flags); @@ -201,6 +196,11 @@ int test_version2num(int argc, char **argv, int flags); int test_reclaimFilePageCache(int argc, char **argv, int flags); int test_valkey_strtod(int argc, char **argv, int flags); int test_vector(int argc, char **argv, int flags); +int test_volatile_set_add_and_iterate(int argc, char **argv, int flags); +int test_volatile_set_large_batch_same_expiry(int argc, char **argv, int flags); +int test_volatile_set_iterate_multiple_expiries(int argc, char **argv, int flags); +int test_volatile_set_add_and_remove_all(int argc, char **argv, int flags); +int test_volatile_set_fuzzer(int argc, char **argv, int flags); int test_ziplistCreateIntList(int argc, char **argv, int flags); int test_ziplistPop(int argc, char **argv, int flags); int test_ziplistGetElementAtIndex3(int argc, char **argv, int flags); @@ -248,7 +248,6 @@ unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}}; unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}}; unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}}; unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, NULL}}; -unitTest __test_entry_c[] = {{"test_entryCreate", test_entryCreate}, {"test_entryUpdate", test_entryUpdate}, {"test_entryHasexpiry_entrySetExpiry", test_entryHasexpiry_entrySetExpiry}, {"test_entryIsExpired", test_entryIsExpired}, {"test_entryMemUsage_entrySetExpiry_entrySetValue", test_entryMemUsage_entrySetExpiry_entrySetValue}, {NULL, NULL}}; unitTest __test_hashtable_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_bucket_chain_length", test_bucket_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_replace_reallocated_entry", test_replace_reallocated_entry}, {"test_incremental_find", test_incremental_find}, {"test_scan", test_scan}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_compact_bucket_chain", test_compact_bucket_chain}, {"test_random_entry", test_random_entry}, {"test_random_entry_with_long_chain", test_random_entry_with_long_chain}, {"test_random_entry_sparse_table", test_random_entry_sparse_table}, {"test_all_memory_freed", test_all_memory_freed}, {NULL, NULL}}; unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}}; unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyHashtable", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyHashtable}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyHashtable", test_kvstoreIteratorRemoveAllKeysDeleteEmptyHashtable}, {"test_kvstoreHashtableIteratorRemoveAllKeysNoDeleteEmptyHashtable", test_kvstoreHashtableIteratorRemoveAllKeysNoDeleteEmptyHashtable}, {"test_kvstoreHashtableIteratorRemoveAllKeysDeleteEmptyHashtable", test_kvstoreHashtableIteratorRemoveAllKeysDeleteEmptyHashtable}, {NULL, NULL}}; @@ -262,6 +261,7 @@ unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}}; unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}}; unitTest __test_valkey_strtod_c[] = {{"test_valkey_strtod", test_valkey_strtod}, {NULL, NULL}}; unitTest __test_vector_c[] = {{"test_vector", test_vector}, {NULL, NULL}}; +unitTest __test_volatile_set_c[] = {{"test_volatile_set_add_and_iterate", test_volatile_set_add_and_iterate}, {"test_volatile_set_large_batch_same_expiry", test_volatile_set_large_batch_same_expiry}, {"test_volatile_set_iterate_multiple_expiries", test_volatile_set_iterate_multiple_expiries}, {"test_volatile_set_add_and_remove_all", test_volatile_set_add_and_remove_all}, {"test_volatile_set_fuzzer", test_volatile_set_fuzzer}, {NULL, NULL}}; unitTest __test_ziplist_c[] = {{"test_ziplistCreateIntList", test_ziplistCreateIntList}, {"test_ziplistPop", test_ziplistPop}, {"test_ziplistGetElementAtIndex3", test_ziplistGetElementAtIndex3}, {"test_ziplistGetElementOutOfRange", test_ziplistGetElementOutOfRange}, {"test_ziplistGetLastElement", test_ziplistGetLastElement}, {"test_ziplistGetFirstElement", test_ziplistGetFirstElement}, {"test_ziplistGetElementOutOfRangeReverse", test_ziplistGetElementOutOfRangeReverse}, {"test_ziplistIterateThroughFullList", test_ziplistIterateThroughFullList}, {"test_ziplistIterateThroughListFrom1ToEnd", test_ziplistIterateThroughListFrom1ToEnd}, {"test_ziplistIterateThroughListFrom2ToEnd", test_ziplistIterateThroughListFrom2ToEnd}, {"test_ziplistIterateThroughStartOutOfRange", test_ziplistIterateThroughStartOutOfRange}, {"test_ziplistIterateBackToFront", test_ziplistIterateBackToFront}, {"test_ziplistIterateBackToFrontDeletingAllItems", test_ziplistIterateBackToFrontDeletingAllItems}, {"test_ziplistDeleteInclusiveRange0To0", test_ziplistDeleteInclusiveRange0To0}, {"test_ziplistDeleteInclusiveRange0To1", test_ziplistDeleteInclusiveRange0To1}, {"test_ziplistDeleteInclusiveRange1To2", test_ziplistDeleteInclusiveRange1To2}, {"test_ziplistDeleteWithStartIndexOutOfRange", test_ziplistDeleteWithStartIndexOutOfRange}, {"test_ziplistDeleteWithNumOverflow", test_ziplistDeleteWithNumOverflow}, {"test_ziplistDeleteFooWhileIterating", test_ziplistDeleteFooWhileIterating}, {"test_ziplistReplaceWithSameSize", test_ziplistReplaceWithSameSize}, {"test_ziplistReplaceWithDifferentSize", test_ziplistReplaceWithDifferentSize}, {"test_ziplistRegressionTestForOver255ByteStrings", test_ziplistRegressionTestForOver255ByteStrings}, {"test_ziplistRegressionTestDeleteNextToLastEntries", test_ziplistRegressionTestDeleteNextToLastEntries}, {"test_ziplistCreateLongListAndCheckIndices", test_ziplistCreateLongListAndCheckIndices}, {"test_ziplistCompareStringWithZiplistEntries", test_ziplistCompareStringWithZiplistEntries}, {"test_ziplistMergeTest", test_ziplistMergeTest}, {"test_ziplistStressWithRandomPayloadsOfDifferentEncoding", test_ziplistStressWithRandomPayloadsOfDifferentEncoding}, {"test_ziplistCascadeUpdateEdgeCases", test_ziplistCascadeUpdateEdgeCases}, {"test_ziplistInsertEdgeCase", test_ziplistInsertEdgeCase}, {"test_ziplistStressWithVariableSize", test_ziplistStressWithVariableSize}, {"test_BenchmarkziplistFind", test_BenchmarkziplistFind}, {"test_BenchmarkziplistIndex", test_BenchmarkziplistIndex}, {"test_BenchmarkziplistValidateIntegrity", test_BenchmarkziplistValidateIntegrity}, {"test_BenchmarkziplistCompareWithString", test_BenchmarkziplistCompareWithString}, {"test_BenchmarkziplistCompareWithNumber", test_BenchmarkziplistCompareWithNumber}, {"test_ziplistStress__ziplistCascadeUpdate", test_ziplistStress__ziplistCascadeUpdate}, {NULL, NULL}}; unitTest __test_zipmap_c[] = {{"test_zipmapIterateWithLargeKey", test_zipmapIterateWithLargeKey}, {"test_zipmapIterateThroughElements", test_zipmapIterateThroughElements}, {NULL, NULL}}; unitTest __test_zmalloc_c[] = {{"test_zmallocInitialUsedMemory", test_zmallocInitialUsedMemory}, {"test_zmallocAllocReallocCallocAndFree", test_zmallocAllocReallocCallocAndFree}, {"test_zmallocAllocZeroByteAndFree", test_zmallocAllocZeroByteAndFree}, {NULL, NULL}}; @@ -275,7 +275,6 @@ struct unitTestSuite { {"test_crc64combine.c", __test_crc64combine_c}, {"test_dict.c", __test_dict_c}, {"test_endianconv.c", __test_endianconv_c}, - {"test_entry.c", __test_entry_c}, {"test_hashtable.c", __test_hashtable_c}, {"test_intset.c", __test_intset_c}, {"test_kvstore.c", __test_kvstore_c}, @@ -289,6 +288,7 @@ struct unitTestSuite { {"test_util.c", __test_util_c}, {"test_valkey_strtod.c", __test_valkey_strtod_c}, {"test_vector.c", __test_vector_c}, + {"test_volatile_set.c", __test_volatile_set_c}, {"test_ziplist.c", __test_ziplist_c}, {"test_zipmap.c", __test_zipmap_c}, {"test_zmalloc.c", __test_zmalloc_c}, diff --git a/src/volatile_set.c b/src/volatile_set.c index 718cfecddf..61f503a5fa 100644 --- a/src/volatile_set.c +++ b/src/volatile_set.c @@ -1,79 +1,1182 @@ #include #include "volatile_set.h" +#include "rax.h" #include "zmalloc.h" -#include "config.h" #include "endianconv.h" #include "serverassert.h" +#include "hashtable.h" +#include "server.h" +#include -#define EXPIRY_HASH_SIZE 16 -volatile_set *createVolatileSet(volatileEntryType *type) { - volatile_set *set = zmalloc(sizeof(volatile_set)); - set->etypr = type; - set->expiry_buckets = raxNew(); - return set; +/************************************************************************************************************* + * pointer_vector Implementation + *************************************************************************************************************/ + +#define PV_CARD_BITS 30 +#define PV_ALLOC_BITS 34 +#define PV_MAX_ELEMENTS ((1ULL << PV_CARD_BITS) - 1) +#define PV_HEADER_SIZE (sizeof(pointer_vector)) +#define PV_ELEM_SIZE (sizeof(void *)) +#define PV_ALLOC(pv) (pv ? pv->alloc : 0) +#define PV_LEN(pv) (pv ? pv->len : 0) +#define PV_USED_SIZE(pv) (pv ? (PV_HEADER_SIZE + (pv_len(pv)) * PV_ELEM_SIZE) : 0) + +/* Custom vector structure with embedded allocation and length counters */ +typedef struct { + uint64_t len : 30; /* Number of elements */ + uint64_t alloc : 34; /* Allocated capacity */ + void *data[]; /* Flexible array member */ +} pointer_vector; + +/* Returns the number of elements currently stored in the pointer_vector. + * + * Arguments: + * vec - The pointer_vector to query. + * + * Return: + * The number of elements in the vector. + * Note that a NULL is a !!!valid!!! vector - returns 0 if the vector is NULL. */ +static inline uint32_t pv_len(pointer_vector *vec) { + return PV_LEN(vec); +} + +/* Ensures that a pointer_vector has enough capacity to hold additional elements. + * + * This function guarantees that the given pointer_vector `sv` has at least enough + * allocated space to accommodate `capacity` more elements, growing it if necessary. + * If the vector is currently `NULL`, it will be newly allocated. + * + * The allocation is handled using `zmalloc` or `zrealloc_usable`, depending on whether + * the vector is new or already initialized. The internal `alloc` field is updated to + * reflect the actual allocated size. + * + * Arguments: + * sv - Pointer to an existing pointer_vector or NULL. + * capacity - The number of additional elements the vector should be able to accommodate. + * + * Return: + * A pointer to the resized (or newly allocated) pointer_vector with sufficient capacity. + * Returns NULL only if the allocation fails. + * + * Note: + * The `capacity` is the number of *additional* elements beyond the current length. + * This function does not modify the vector's logical length (`len`), only its allocation. */ +pointer_vector *pv_grow_to_fit(pointer_vector *sv, size_t capacity) { + if (capacity == 0) return sv; + size_t required = PV_HEADER_SIZE + (PV_LEN(sv) + capacity) * PV_ELEM_SIZE; + if (PV_ALLOC(sv) >= required) return sv; + + if (!sv) { + sv = zmalloc(required); + sv->len = 0; + } else { + sv = zrealloc_usable(sv, required, &required); + } + sv->alloc = required; + return sv; +} + +/* Shrinks a pointer_vector to release unused allocated memory. + * + * This function checks if the current allocation (`used`) for the given + * `pointer_vector` exceeds the memory actually required to store its elements. + * If so, it reallocates the vector to use only the needed memory, helping reduce + * memory overhead and improve space efficiency. + * + * The function uses `zrealloc_usable()` to reallocate memory in a way compatible + * with jemalloc (or other zmalloc backends) and updates the internal allocation + * size (`alloc`) to reflect the new length. + * + * Arguments: + * sv - A pointer to the `pointer_vector` to shrink. + * + * Return: + * A potentially reallocated `pointer_vector` with minimized memory usage, + * or `NULL` if the input was `NULL`. + * + * This function does not change the logical contents of the vector. + * It only adjusts the allocated memory footprint. If no reallocation + * is needed, the original pointer is returned unchanged. + * + * Example: + * pointer_vector *vec = pv_new(); + * // After some insertions and deletions + * vec = pv_shrink_to_fit(vec); */ +pointer_vector *pv_shrink_to_fit(pointer_vector *sv) { + if (!sv) return NULL; + + size_t used = PV_ALLOC(sv); + size_t required = pv_len(sv) == 0 ? 0 : PV_HEADER_SIZE + pv_len(sv) * PV_ELEM_SIZE; + + if (used > required) { + if (!required) { + zfree(sv); + return NULL; + } + sv = zrealloc_usable(sv, used, &required); + sv->alloc = required; + } + return sv; +} + +/** + * pv_split - Splits a pointer_vector into two parts at a given index. + * + * Arguments: + * sv_ptr: A pointer to the pointer_vector* to split. This pointer is + * updated in-place to point to the left portion (elements [0..split_index-1]). + * split_index: The index at which to split the vector. The resulting right + * vector will contain elements [split_index..len-1]. + * + * This function is used to **efficiently split a sorted vector of pointers** + * into two separate vectors. The original vector is truncated in-place to + * only contain the first half, and a new vector is returned containing the + * second half. This allows for logical partitioning of data without scanning + * or reallocating unnecessary memory. + * + * The vector is assumed to be densely packed and its elements are of type `void*`. + * + * Memory is allocated for the new right vector using `zmalloc`, and the unused + * portion of the original vector may be freed or shrunk via `pv_shrink_to_fit` + * to optimize memory usage. + * + * Return: + * - A new pointer_vector containing the right split [split_index..len-1]. + * - `NULL` in the following cases: + * • The input vector is `NULL`. + * • The input vector has only 1 or fewer elements (nothing to split). + * • The `split_index` is equal to the vector length (all elements stay in the left part). + * • The `split_index` is such that the right part would have 0 elements. + * + * Side effects: + * - The original vector pointer (`*sv_ptr`) is modified to point to the + * resized left portion. + * + * Example: + * -------- + * Suppose `sv_ptr` points to a vector of 5 elements: + * [A, B, C, D, E] + * + * Calling: + * pointer_vector *right = pv_split(&sv_ptr, 3); + * + * Results in: + * sv_ptr -> [A, B, C] + * right -> [D, E] + * + * If the split_index is 5 (i.e. the end), the function returns NULL and the + * original vector is unchanged. */ +pointer_vector *pv_split(pointer_vector **sv_ptr, uint32_t split_index) { + pointer_vector *sv = *sv_ptr; + + // Handle edge cases: null or empty + if (!sv || sv->len <= 1) return NULL; + + // If no valid split found, return NULL (entire vector is one block) + if (split_index == sv->len) return NULL; + + // Number of elements for the right half + uint64_t right_len = sv->len - split_index; + if (right_len == 0) return NULL; + + // Allocate new vector for right part + size_t item_bytes = sizeof(void *); + size_t total_bytes = sizeof(pointer_vector) + right_len * item_bytes; + size_t new_alloc; + pointer_vector *right = zmalloc_usable(total_bytes, &new_alloc); + right->alloc = new_alloc; + right->len = right_len; + + // Copy the right part + memcpy(&right->data[0], &sv->data[split_index], right_len * item_bytes); + + // Shrink original vector + sv->len = split_index; + *sv_ptr = pv_shrink_to_fit(sv); // Optional: shrink in-place to reduce memory + + return right; +} + +/* Creates a new pointer_vector with the specified initial capacity. + * + * This function initializes a new pointer_vector capable of holding at least + * `capacity` elements. Internally, it delegates allocation and setup to + * `pv_grow_to_fit`, starting from a NULL vector. + * + * Arguments: + * capacity - The initial number of elements the vector should be able to store. + * + * Return: + * A pointer to the newly allocated pointer_vector. + * Note that a NULL is a !!valid!! cector which size is zero. + * + * Note: + * The logical length (`len`) of the returned vector is initialized to 0. + */ +pointer_vector *pv_new(uint32_t capacity) { + pointer_vector *new_vec = NULL; + return pv_grow_to_fit(new_vec, capacity); +} + +/* Inserts an element at the specified position in the pointer_vector. + * + * Ensures enough capacity for the new element, shifts elements to make space, + * and inserts the given element at the desired position. + * + * Arguments: + * sv - The pointer_vector to insert into (can be NULL). + * elem - The pointer to be inserted. + * pos - The index at which to insert the element (must be ≤ sv->len). + * + * Return: + * The updated pointer_vector with the element inserted. */ +pointer_vector *pv_insert(pointer_vector *sv, void *elem, uint32_t pos) { + sv = pv_grow_to_fit(sv, 1); + + if (pos < sv->len) { + memmove(&sv->data[pos + 1], &sv->data[pos], (sv->len - pos) * sizeof(void *)); + } + + sv->data[pos] = elem; + sv->len++; + return sv; +} + +/* Removes the element at the specified index from the pointer_vector. + * + * Shifts elements as necessary and optionally shrinks the vector if memory can be saved. + * If this is the last element in the vector, the vector is freed and NULL is returned. + * + * Arguments: + * sv - The pointer_vector to remove from. + * idx - The index of the element to remove (must be < sv->len). + * + * Return: + * The updated pointer_vector after removal. + * Returns NULL if the last element was removed and the vector was freed. */ +pointer_vector *pv_removeAt(pointer_vector *sv, uint32_t idx) { + if (!sv || sv->len == 0) return sv; + assert(idx < sv->len); + if (sv->len == 1) { + /* Last element being removed; delete vector */ + zfree(sv); + return NULL; + } else if (idx < sv->len - 1) + memmove(&sv->data[idx], &sv->data[idx + 1], (sv->len - idx - 1) * PV_ELEM_SIZE); + sv->len--; + return pv_shrink_to_fit(sv); +} + +/* Removes the first matching element from the pointer_vector. + * + * Performs a linear search for the given pointer and removes the first match. + * Updates the vector pointer in case a removal was done. + * + * Arguments: + * sv - A pointer to the location of the pointer_vector to remove from. + * elem - The element pointer to match and remove. + * + * Return: + * true in case a removal was made, false otherwise */ +bool pv_remove(pointer_vector **psv, void *elem) { + pointer_vector *sv = *psv; + if (!sv || sv->len == 0) return false; + + for (uint32_t i = 0; i < sv->len; i++) { + if (sv->data[i] == elem) { + *psv = pv_removeAt(sv, i); + return true; + } + } + return false; +} + +/* Retrieves the element at the specified index in the pointer_vector. + * + * Arguments: + * vec - The pointer_vector to retrieve from. + * idx - The index of the element to access. + * + * Return: + * A pointer to the element at the given index. + * Returns NULL if the vector is NULL or the index is out of bounds. */ +void *pv_get(pointer_vector *vec, uint32_t idx) { + if (!vec || idx >= vec->len) return NULL; + return vec->data[idx]; +} + +/* Frees the memory used by the pointer_vector. + * + * Arguments: + * sv - The pointer_vector to free. + * + * Return: + * None. */ +void pv_free(pointer_vector *sv) { + if (sv) zfree(sv); +} + +uint32_t pv_find(pointer_vector *sv, void *elem) { + if (!sv || sv->len == 0) return 0; + + for (uint32_t i = 0; i < sv->len; i++) { + if (sv->data[i] == elem) { + return i; + } + } + return sv->len; +} +/************************************************************************************************************* + * pointer_vector End + *************************************************************************************************************/ +#define VSET_BUCKET_NONE -1 // matching the NULL case +#define VSET_BUCKET_SINGLE 0x1ULL // xx1 (assuming sds) +#define VSET_BUCKET_VECTOR 0x2ULL // 010 +#define VSET_BUCKET_HT 0x4ULL // 100 +#define VSET_BUCKET_RAX 0x6ULL // 110 + +#define VSET_TAG_MASK 0x7ULL +#define VSET_PTR_MASK (~VSET_TAG_MASK) + +// Determine bucket type +static inline int vsetBucketType(vsetBucket *b) { + if (b == NULL) return VSET_BUCKET_NONE; + + uintptr_t bits = (uintptr_t)b; + if (bits & 0x1) + return VSET_BUCKET_SINGLE; + return bits & VSET_TAG_MASK; +} + +// Access raw pointer +static inline void *vsetBucketRawPtr(vsetBucket *b) { + return (void *)((uintptr_t)b & VSET_PTR_MASK); +} + +// Accessors with type assertions +static inline pointer_vector *vsetBucketVector(vsetBucket *b) { + assert(vsetBucketType(b) == VSET_BUCKET_VECTOR); + return (pointer_vector *)vsetBucketRawPtr(b); +} + +static inline hashtable *vsetBucketHashtable(vsetBucket *b) { + assert(vsetBucketType(b) == VSET_BUCKET_HT); + return (hashtable *)vsetBucketRawPtr(b); +} + +static inline rax *vsetBucketRax(vsetBucket *b) { + assert(vsetBucketType(b) == VSET_BUCKET_RAX); + return (rax *)vsetBucketRawPtr(b); +} + +static inline void *vsetBucketSingle(vsetBucket *b) { + return b; +} + +// Setters +static inline vsetBucket *vsetBucketSetType(vsetBucket *b, int type) { + uintptr_t p = (uintptr_t)b; + return (vsetBucket *)(p | (type & VSET_TAG_MASK)); +} + +static inline vsetBucket *vsetBucketSetVector(vsetBucket *b, pointer_vector *vec) { + UNUSED(b); + return vsetBucketSetType(vec, VSET_BUCKET_VECTOR); +} + +static inline vsetBucket *vsetBucketSetHashtable(vsetBucket *b, hashtable *ht) { + UNUSED(b); + return vsetBucketSetType(ht, VSET_BUCKET_HT); +} + +static inline vsetBucket *vsetBucketSetSingle(vsetBucket *b, void *ptr) { + UNUSED(b); + return ptr; +} + +static inline vsetBucket *vsetBucketSetNone(vsetBucket *b) { + UNUSED(b); + return NULL; +} + +static inline vsetBucket *vsetBucketSetRax(vsetBucket *b, rax *r) { + UNUSED(b); + return vsetBucketSetType(r, VSET_BUCKET_RAX); +} + +/****************** Helper Functions *******************************************/ + +/* compare 2 expiration times */ +#define EXPIRE_COMPARE(exp1, exp2) (exp1 < exp2 ? -1 : exp1 == exp2 ? 0 \ + : 1) + +static inline long long get_bucket_ts(long long expiry) { + return (expiry & ~(VOLATILESET_BUCKET_INTERVAL_MIN - 1LL)) + VOLATILESET_BUCKET_INTERVAL_MIN; +} + +static inline long long get_max_bucket_ts(long long expiry) { + return (expiry & ~(VOLATILESET_BUCKET_INTERVAL_MAX - 1LL)) + VOLATILESET_BUCKET_INTERVAL_MAX; +} + +static inline size_t encodeExpiryKey(long long expiry, unsigned char *key) { + long long be_ts = htonu64(expiry); + size_t size = sizeof(be_ts); + memcpy(key, &be_ts, size); + return size; +} + +static inline long long decodeExpiryKey(unsigned char *key) { + long long res; + memcpy(&res, key, sizeof(res)); + res = ntohu64(res); + return res; +} + +static size_t encodeNewExpiryBucketKey(unsigned char *key, long long expiry) { + long long bucket_ts = get_max_bucket_ts(expiry); + long long be_ts = htonu64(bucket_ts); + size_t size = sizeof(be_ts); + memcpy(key, &be_ts, size); + return size; +} + +/** + * Performs binary search to find the index where the element should be inserted. + * Returns the index where the element should be placed to keep the array sorted. + * + * sv Pointer to the sorted vector + * elem Pointer to the element to insert + * cmp Comparison function (like strcmp-style: <0, ==0, >0) + * returns the insertion index (between 0 and sv->len) */ +uint32_t _find_insert_position(volatile_set *set, vsetBucket *bucket, long long expiry) { + pointer_vector *sv = vsetBucketVector(bucket); + uint32_t left = 0; + uint32_t right = pv_len(sv); + while (left < right) { + uint32_t mid = (left + right) / 2; + int res = EXPIRE_COMPARE(expiry, set->etypr->getExpiry(sv->data[mid])); + if (res <= 0) + right = mid; + else + left = mid + 1; + } + + return left; // Final position to insert the element +} + +/* _find_split_position - Find the optimal split index in a sorted pointer vector + * based on coarse (bucketed) expiry timestamps. + * Arguments + * set: Pointer to the `volatile_set` containing the element type and expiry logic. + * bucket: Pointer to a `vsetBucket` holding a sorted `pointer_vector` of elements. + * split_ts: an optional pointer to a location to store the split timestamp, that is the position + * belonging in the lower split vector with the largest expiration time. + * + * This function searches for the earliest index at which the vector can be split into + * two parts such that all elements in the first part are strictly less than all elements + * in the second part, after mapping each element's expiry to a lower-resolution bucket. + * The mapping is done using `get_bucket_ts(set->etypr->getExpiry(element))`. + * + * This ensures that elements belonging to the same coarse-grained time bucket remain + * in the same split group, which is useful for efficient time-based partitioning. + * + * To do this efficiently, the function performs a binary search to locate the first + * position where the bucketed expiry of the current item is greater than the bucketed + * expiry of the previous item. This approach attempts to maximize the size of each + * resulting split vector while ensuring that: + * + * bucket_ts[element[i-1]] < bucket_ts[element[i]] + * + * If no valid split is found (i.e. all elements map to the same bucket timestamp), + * the function returns `sv->len` to indicate that splitting is not possible. + * + * Return: + * - A valid split index in the range [1, sv->len], where the split occurs. + * - May return `sv->len` if no valid position is found. + * + * Example: + * -------- + * Suppose the vector contains elements with matching expiry timestamps: + * [1234, 1235, 1236, 4567, 4568] + * + * And `get_bucket_ts()` maps them to: + * [1300, 1300, 1300, 5000, 5000] + * + * Then `_find_split_position(set, bucket)` returns 3, resulting in: + * First part: [1234, 1235, 1236] (bucket 1300) + * Second part: [4567, 4568] (bucket 5000) + * + * This guarantees that each vector contains elements with the same bucket timestamp, + * and no value in the first part maps to the same or later bucket as the second part. + */ +uint32_t _find_split_position(volatile_set *set, vsetBucket *bucket, long long *split_ts_out) { + pointer_vector *sv = vsetBucketVector(bucket); + + if (!sv || sv->len < 2) return sv->len; + + if (!sv || sv->len < 2) return sv ? sv->len : 0; + + uint32_t left = 1; + uint32_t right = sv->len - 1; + uint32_t best_split = sv->len; + uint32_t mid_closest_to_center = sv->len / 2; + long long best_split_ts = 0; + + while (left <= right) { + uint32_t mid = (left + right) / 2; + + long long prev_ts = get_bucket_ts(set->etypr->getExpiry(pv_get(sv, mid - 1))); + long long curr_ts = get_bucket_ts(set->etypr->getExpiry(pv_get(sv, mid))); + + if (prev_ts != curr_ts) { + // Check if closer to center + if (best_split == sv->len || + abs((int)mid - (int)mid_closest_to_center) < abs((int)best_split - (int)mid_closest_to_center)) { + best_split = mid; + best_split_ts = prev_ts; + } + right = mid - 1; + } else { + left = mid + 1; + } + } + + if (split_ts_out) { + *split_ts_out = best_split != sv->len + ? best_split_ts + : get_bucket_ts(set->etypr->getExpiry(pv_get(sv, sv->len - 1))); + } + + return best_split; } -void freeVolatileSet(volatile_set *b) { - raxFree(b->expiry_buckets); - zfree(b); + +#define VSET_BUCKET_KEY_LEN 8 + +static uint64_t hash_pointer(const void *ptr) { + uintptr_t x = (uintptr_t)ptr; + if (sizeof(ptr) == 4) { + // 32-bit platform + x ^= x >> 16; + x *= 0x85ebca6b; + x ^= x >> 13; + x *= 0xc2b2ae35; + x ^= x >> 16; + } else { + // 64-bit platform + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; + } + return (uint64_t)x; +} + +hashtableType pointerHashtableType = { + .hashFunction = hash_pointer, +}; + +static inline vsetBucket *findBucket(rax *expiry_buckets, long long expiry, unsigned char *key, size_t *key_len, long long *pbucket_ts, raxNode **node) { + *key_len = encodeExpiryKey(expiry, key); + vsetBucket *bucket = NULL; + /* First try to locate the first bucket which is larger than the specified key */ + raxIterator iter; + raxStart(&iter, expiry_buckets); + raxSeek(&iter, ">", (unsigned char *)key, *key_len); + + if (raxNext(&iter)) { + long long bucket_ts = decodeExpiryKey(iter.key); + /* If this bucket span over a window to far in the future, it is not a candidate. */ + if (get_max_bucket_ts(expiry) < bucket_ts) { + raxStop(&iter); + return NULL; + } + bucket = iter.data; + assert(iter.node->iskey); + if (node) *node = iter.node; + if (key) { + assert(iter.key_len == VSET_BUCKET_KEY_LEN); + memcpy(key, iter.key, iter.key_len); + } + if (pbucket_ts) *pbucket_ts = decodeExpiryKey(iter.key); + } + raxStop(&iter); + return bucket; +} + +static void freeVsetBucket(void *entry) { + vsetBucket *bucket = (vsetBucket *)entry; + switch (vsetBucketType(bucket)) { + case VSET_BUCKET_NONE: + case VSET_BUCKET_SINGLE: + // No internal memory to free + break; + case VSET_BUCKET_VECTOR: + pv_free(vsetBucketVector(bucket)); + break; + case VSET_BUCKET_HT: + hashtableRelease(vsetBucketHashtable(bucket)); + break; + case VSET_BUCKET_RAX: + raxFreeWithCallback(vsetBucketRax(bucket), freeVsetBucket); + break; + default: + serverPanic("Unknown volatile set type in freeVsetBucket"); + } +} + +static bool splitBucketIfPossible(volatile_set *set, vsetBucket *bucket, long long bucket_ts, raxNode *node) { + /* We can only split vector encoded buckets */ + if (vsetBucketType(bucket) != VSET_BUCKET_VECTOR) { + return false; + } + size_t key_len; + long long target_bucket_ts = bucket_ts; + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + vsetBucket *new_bucket = NULL; + pointer_vector *sv = vsetBucketVector(bucket); + rax *expiry_buckets = vsetBucketRax(set->expiry_buckets); + long long max_bucket_ts = get_bucket_ts(set->etypr->getExpiry(sv->data[pv_len(sv) - 1])); + long long min_bucket_ts = get_bucket_ts(set->etypr->getExpiry(sv->data[0])); + + if (max_bucket_ts < bucket_ts) { + /* In case the bucket is already spanning over a larger window than needed, just place the bucket in a new place */ + key_len = encodeExpiryKey(bucket_ts, key); + serverAssert(raxRemove(expiry_buckets, key, key_len, (void **)&new_bucket)); + serverAssert(new_bucket == bucket); + target_bucket_ts = max_bucket_ts; + + } else if (min_bucket_ts != max_bucket_ts) { + /* lets split the bucket. we know we can do it. */ + uint32_t split_index = _find_split_position(set, bucket, &target_bucket_ts); + assert(target_bucket_ts < bucket_ts); + assert(split_index != pv_len(sv)); /* no way to split it ??? */ + pointer_vector *new_bucket_vector = vsetBucketVector(bucket); + bucket = vsetBucketSetVector(bucket, pv_split(&new_bucket_vector, split_index)); + new_bucket = vsetBucketSetVector(new_bucket, new_bucket_vector); + assert(pv_len(vsetBucketVector(new_bucket)) > 0); + assert(pv_len(vsetBucketVector(bucket)) > 0); + /* modify the current bucket data pointer */ + key_len = encodeExpiryKey(bucket_ts, key); + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(set->expiry_buckets, key, key_len, bucket, NULL); + raxSetData(node, bucket); + + /* santity check after split + assert(target_bucket_ts < bucket_ts); + pointer_vector *high_bucket_vector = vsetBucketVector(bucket); + pointer_vector *low_bucket_vector = vsetBucketVector(new_bucket); + for (uint32_t i = 0; i < pv_len(low_bucket_vector); i++) { + assert(set->etypr->getExpiry(pv_get(low_bucket_vector, i)) < target_bucket_ts); + assert(get_bucket_ts(set->etypr->getExpiry(pv_get(low_bucket_vector, i))) < bucket_ts); + assert(get_bucket_ts(set->etypr->getExpiry(pv_get(low_bucket_vector, i))) <= target_bucket_ts); + long long find_bucket_ts; + vsetBucket *find_bucket = findBucket(set, set->etypr->getExpiry(pv_get(low_bucket_vector, i)), key, &key_len, &find_bucket_ts, NULL); + assert(find_bucket == new_bucket); + } + for (uint32_t i = 0; i < pv_len(high_bucket_vector); i++) { + assert(get_bucket_ts(set->etypr->getExpiry(pv_get(high_bucket_vector, i))) > target_bucket_ts); + assert(get_bucket_ts(set->etypr->getExpiry(pv_get(high_bucket_vector, i))) <= bucket_ts); + assert(set->etypr->getExpiry(pv_get(high_bucket_vector, i)) >= target_bucket_ts); + assert(set->etypr->getExpiry(pv_get(high_bucket_vector, i)) < bucket_ts); + long long find_bucket_ts; + vsetBucket *find_bucket = findBucket(set, set->etypr->getExpiry(pv_get(high_bucket_vector, i)), key, &key_len, &find_bucket_ts, NULL); + assert(find_bucket == bucket); + }*/ + } else { + /* We cannot split the bucket. just return false */ + return false; + } + /* We change the current bucket position OR we splited it, either way we have a new bucket to insert. */ + key_len = encodeExpiryKey(target_bucket_ts, key); + raxInsert(expiry_buckets, key, key_len, new_bucket, NULL); + return true; +} + +static inline vsetBucket *insertToBucket_NONE(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry) { + UNUSED(set); + UNUSED(expiry); + return vsetBucketSetSingle(bucket, entry); +} + +static inline vsetBucket *insertToBucket_SINGLE(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry) { + /* Upgrade to vector */ + pointer_vector *sv = pv_new(2); + void *curr_entry = vsetBucketSingle(bucket); + long long curr_expiry = set->etypr->getExpiry(curr_entry); + if (curr_expiry < expiry) { + sv = pv_insert(sv, curr_entry, 0); + sv = pv_insert(sv, entry, 1); + } else { + sv = pv_insert(sv, entry, 0); + sv = pv_insert(sv, curr_entry, 1); + } + bucket = vsetBucketSetVector(bucket, sv); + return bucket; +} + +static inline vsetBucket *insertToBucket_VECTOR(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry) { + pointer_vector *pv = vsetBucketVector(bucket); + /* limit of the number of elements in a vector. */ + if (pv_len(pv) >= VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { + // Upgrade to hashtable + hashtable *ht = hashtableCreate(&pointerHashtableType); + for (uint32_t i = 0; i < pv_len(pv); i++) { + hashtableAdd(ht, pv_get(pv, i)); + } + pv_free(pv); + /* Add the new entry as well */ + hashtableAdd(ht, entry); + + return vsetBucketSetHashtable(bucket, ht); + } else { + uint32_t pos = _find_insert_position(set, bucket, expiry); + return vsetBucketSetVector(bucket, pv_insert(pv, entry, pos)); + } + return NULL; +} + +static inline vsetBucket *insertToBucket_HASHTABLE(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry) { + UNUSED(set); + UNUSED(expiry); + + hashtable *ht = vsetBucketHashtable(bucket); + assert(hashtableAdd(ht, entry)); + return bucket; +} + +static inline vsetBucket *insertToBucket_RAX(volatile_set *set, vsetBucket *target, void *entry, long long expiry) { + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + size_t key_len; + long long bucket_ts; + rax *expiry_buckets = vsetBucketRax(target); + raxNode *node; + vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); + int type = vsetBucketType(bucket); + if (type == VSET_BUCKET_NONE) { + /* No bucket: create single-entry bucket */ + bucket = insertToBucket_NONE(set, bucket, entry, expiry); + assert(vsetBucketType(bucket) == VSET_BUCKET_SINGLE); + size_t key_size = encodeNewExpiryBucketKey(key, expiry); + raxInsert(expiry_buckets, key, key_size, bucket, NULL); + return target; + } else if (type == VSET_BUCKET_SINGLE) { + /* Upgrade to vector */ + bucket = insertToBucket_SINGLE(set, bucket, entry, expiry); + assert(vsetBucketType(bucket) == VSET_BUCKET_VECTOR); + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(expiry_buckets, key, key_len, bucket, NULL); + raxSetData(node, bucket); + } else if (type == VSET_BUCKET_VECTOR) { + pointer_vector *sv = vsetBucketVector(bucket); + if (pv_len(sv) == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { + /* Try to split the bucket. If not possible switch to hashtable encoding. */ + if (!splitBucketIfPossible(set, bucket, bucket_ts, node)) { + /* Can't split? insrt to the vector anyway, it will just expand to hashtable */ + bucket = insertToBucket_VECTOR(set, bucket, entry, expiry); + assert(vsetBucketType(bucket) == VSET_BUCKET_HT); + /* In order to avoid rax override, we directly change the node data */ + // alternative raxInsert(expiry_buckets, key, key_len, bucket, NULL); + raxSetData(node, bucket); + } else { + /* we splitted the bucket. go and find again a bucket to place the entry since there can be new options now. */ + return insertToBucket_RAX(set, target, entry, expiry); + } + } else { + vsetBucket *new_bucket = insertToBucket_VECTOR(set, bucket, entry, expiry); + if (new_bucket != bucket) + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(expiry_buckets, key, key_len, new_bucket, NULL); + raxSetData(node, new_bucket); + } + } else if (vsetBucketType(bucket) == VSET_BUCKET_HT) { + bucket = insertToBucket_HASHTABLE(set, bucket, entry, expiry); + } else { + serverPanic("Unknown bucket type in insertToBucket_RAX"); + } + return target; +} + +static inline vsetBucket *removeFromBucket_SINGLE(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { + UNUSED(set); + UNUSED(expiry); + + if (vsetBucketSingle(bucket) == entry) { + *removed = true; + return vsetBucketSetNone(bucket); + } else { + *removed = false; + return bucket; + } +} + +static inline vsetBucket *removeFromBucket_VECTOR(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { + UNUSED(set); + UNUSED(expiry); + + vsetBucket *new_bucket = bucket; + bool success = false; + pointer_vector *sv = vsetBucketVector(bucket); + /* In case we we removed the entry */ + uint32_t vlen = pv_len(sv); + if (vlen <= 2) { + /* convert to single if needed */ + uint32_t idx = pv_find(sv, entry); + if (idx == vlen) { + success = false; + } else { + if (vlen == 1) + new_bucket = vsetBucketSetNone(bucket); + else + new_bucket = vsetBucketSetSingle(bucket, pv_get(sv, idx == 0 ? 1 : 0)); + success = true; + pv_free(sv); + } + } else { + if (pv_remove(&sv, entry)) { + success = true; + new_bucket = vsetBucketSetVector(bucket, sv); + } + } + if (removed) *removed = success; + return new_bucket; +} + +static inline vsetBucket *removeFromBucket_HASHTABLE(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { + UNUSED(set); + UNUSED(expiry); + + bool success = false; + vsetBucket *new_bucket = bucket; + hashtable *ht = vsetBucketHashtable(bucket); + if (hashtableDelete(ht, entry)) { + success = true; + assert(hashtableSize(ht) > 0); + if (hashtableSize(ht) == 1) { + // Downgrade to SINGLE + hashtableIterator hi; + hashtableInitIterator(&hi, ht, 0); + void *ptr; + hashtableNext(&hi, &ptr); + hashtableRelease(ht); + new_bucket = vsetBucketSetSingle(bucket, ptr); + } + } + if (removed) *removed = success; + return new_bucket; +} + +static bool raxBucketRemoveEntry(volatile_set *set, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { + bool removed = false; + switch (vsetBucketType(bucket)) { + case VSET_BUCKET_SINGLE: + bucket = removeFromBucket_SINGLE(set, bucket, entry, 0, &removed); + if (removed) { + raxRemove(vsetBucketRax(set->expiry_buckets), key, key_len, NULL); + if (pbucket) *pbucket = NULL; + } + break; + case VSET_BUCKET_VECTOR: { + vsetBucket *new_bucket = removeFromBucket_VECTOR(set, bucket, entry, 0, &removed); + if (new_bucket != bucket) + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(set->expiry_buckets, key, key_len, new_bucket, NULL); + raxSetData(node, new_bucket); + if (pbucket) *pbucket = new_bucket; + break; + } + case VSET_BUCKET_HT: { + vsetBucket *new_bucket = removeFromBucket_HASHTABLE(set, bucket, entry, 0, &removed); + if (new_bucket != bucket) + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(set->expiry_buckets, key, key_len, bucket, NULL); + raxSetData(node, new_bucket); + + if (pbucket) *pbucket = new_bucket; + break; + } + default: + serverPanic("Unknown bucket type for raxBucketRemoveEntry"); + return false; + } + return removed; +} + +static inline vsetBucket *removeFromBucket_RAX(volatile_set *set, vsetBucket *target, void *entry, long long expiry, bool *removed) { + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + long long bucket_ts; + size_t key_len; + raxNode *node; + rax *expiry_buckets = vsetBucketRax(target); + vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); + assert(bucket); + bool success = raxBucketRemoveEntry(set, entry, bucket, key, key_len, NULL, node); + if (removed) *removed = success; + // shrink to single bucket if possible + if (raxSize(expiry_buckets) == 1) { + raxIterator it; + raxStart(&it, expiry_buckets); + assert(raxSeek(&it, "^", NULL, 0)); + assert(raxNext(&it)); + bucket = it.data; + int bucket_type = vsetBucketType(bucket); + raxStop(&it); + /* We will not convert hashtable to our only bucket since we will lose the ability to scan the items in a sorted way. + * We will also not shrink when we have a full vector, since it might immediately be repopulated. */ + if (bucket_type == VSET_BUCKET_SINGLE || + (bucket_type == VSET_BUCKET_VECTOR && pv_len(vsetBucketVector(bucket)) < VOLATILESET_VECTOR_BUCKET_MAX_SIZE)) { + /* lets make our bucket to be the only left bucket */ + target = bucket; + raxFree(expiry_buckets); + } + } + return target; } int volatileSetAddEntry(volatile_set *set, void *entry, long long expiry) { - unsigned char buf[EXPIRY_HASH_SIZE]; - expiry = htonu64(expiry); - memcpy(buf, &expiry, sizeof(expiry)); - memcpy(buf + 8, &entry, sizeof(entry)); - if (sizeof(entry) == 4) memset(buf + 12, 0, 4); /* Zero padding for 32bit target. */ - return raxTryInsert(set->expiry_buckets, buf, sizeof(buf), NULL, NULL); + int bucket_type = vsetBucketType(set->expiry_buckets); + switch (bucket_type) { + case VSET_BUCKET_NONE: + set->expiry_buckets = insertToBucket_NONE(set, set->expiry_buckets, entry, expiry); + break; + case VSET_BUCKET_SINGLE: + set->expiry_buckets = insertToBucket_SINGLE(set, set->expiry_buckets, entry, expiry); + break; + case VSET_BUCKET_VECTOR: { + pointer_vector *vec = vsetBucketVector(set->expiry_buckets); + uint32_t len = pv_len(vec); + /* in case the vector is full, we need to turn into RAX */ + if (len == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { + rax *r = raxNew(); + long long min_expiry = set->etypr->getExpiry(pv_get(vec, 0)); + long long max_expiry = set->etypr->getExpiry(pv_get(vec, len - 1)); + if (get_max_bucket_ts(min_expiry) == get_max_bucket_ts(max_expiry)) { + /* In case we can just insert the bucket, no need to iterate and insert it's elements. we can just push the bucket as a whole. */ + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + size_t key_len = encodeNewExpiryBucketKey(key, max_expiry); + raxInsert(r, key, key_len, set->expiry_buckets, NULL); + set->expiry_buckets = vsetBucketSetRax(set->expiry_buckets, r); + set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, entry, expiry); + } else { + /* We need to migrate entries to the new set of buckets since we do not know all entries are in the same bucket */ + set->expiry_buckets = vsetBucketSetRax(set->expiry_buckets, r); + for (uint32_t i = 0; i < len; i++) { + void *moved_entry = pv_get(vec, i); + set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, moved_entry, set->etypr->getExpiry(moved_entry)); + } + /* free the vector */ + pv_free(vec); + /* now insert the new entry to the buckets */ + set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, entry, expiry); + } + } else { + set->expiry_buckets = insertToBucket_VECTOR(set, set->expiry_buckets, entry, expiry); + } + break; + } + case VSET_BUCKET_RAX: + set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, entry, expiry); + break; + default: + serverPanic("Cannot insert to bucket which is not single, vector or rax"); + } + return 1; } int volatileSetRemoveEntry(volatile_set *set, void *entry, long long expiry) { - unsigned char buf[EXPIRY_HASH_SIZE]; - expiry = htonu64(expiry); - memcpy(buf, &expiry, sizeof(expiry)); - memcpy(buf + 8, &entry, sizeof(entry)); - if (sizeof(entry) == 4) memset(buf + 12, 0, 4); /* Zero padding for 32bit target. */ - return raxRemove(set->expiry_buckets, buf, sizeof(buf), NULL); + bool removed; + vsetBucket *bucket = set->expiry_buckets; + int bucket_type = vsetBucketType(bucket); + switch (bucket_type) { + case VSET_BUCKET_NONE: + /* We cannot remove from empty set */ + return 0; + case VSET_BUCKET_SINGLE: + bucket = removeFromBucket_SINGLE(set, bucket, entry, expiry, &removed); + break; + case VSET_BUCKET_VECTOR: + bucket = removeFromBucket_VECTOR(set, bucket, entry, expiry, &removed); + break; + case VSET_BUCKET_HT: + bucket = removeFromBucket_HASHTABLE(set, bucket, entry, expiry, &removed); + break; + case VSET_BUCKET_RAX: + bucket = removeFromBucket_RAX(set, bucket, entry, expiry, &removed); + break; + default: + serverPanic("Cannot insert to bucket which is not single, vector or rax"); + } + assert(removed); + set->expiry_buckets = bucket; + return removed ? 1 : 0; } int volatileSetUpdateEntry(volatile_set *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { - if (old_entry == new_entry && old_expiry == new_expiry) return 1; + if (old_entry == new_entry && old_expiry == new_expiry) + return 1; - if (old_entry && old_expiry != -1) { - assert(volatileSetRemoveEntry(set, old_entry, old_expiry)); - } - if (new_entry && new_expiry != -1) { + if (old_entry && old_expiry != -1) + assert((volatileSetRemoveEntry(set, old_entry, old_expiry))); + + if (new_entry && new_expiry != -1) assert(volatileSetAddEntry(set, new_entry, new_expiry)); - } + return 1; } -int volatileSetExpireEntry(volatile_set *set, void *entry) { - volatileSetRemoveEntry(set, entry, set->etypr->getExpiry(entry)); - if (set->etypr->expire) { - set->etypr->expire(entry); +static void *volatileSetGetFirstExpired(volatile_set *set, mstime_t now, bool delete) { + volatileSetIterator it; + void *entry = NULL; + volatileSetStart(set, &it); + if (volatileSetNext(&it, NULL) && (it.bucket_ts <= now)) { + entry = it.entry; + } + volatileSetReset(&it); + if (entry && delete) + volatileSetRemoveEntry(set, entry, set->etypr->getExpiry(entry)); + return entry; +} + +void *volatileSetdPopExpired(volatile_set *set, mstime_t now) { + return volatileSetGetFirstExpired(set, now, true); +} + +void *volatileSetFirstExpired(volatile_set *set, mstime_t now) { + return volatileSetGetFirstExpired(set, now, false); +} + +static int vsetBucketNext_NONE(volatileSetIterator *it, void **entryptr) { + UNUSED(it); + UNUSED(entryptr); + return 0; +} +static inline int vsetBucketNext_SINGLE(volatileSetIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + if (init_bucket_scan) { + it->iteration_state = VSET_BUCKET_SINGLE; + it->entry = vsetBucketSingle(it->bucket); + if (entryptr) *entryptr = it->entry; return 1; } return 0; } +static inline int vsetBucketNext_VECTOR(volatileSetIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + pointer_vector *pv = vsetBucketVector(it->bucket); + if (init_bucket_scan) { + it->iteration_state = VSET_BUCKET_VECTOR; + it->viter = 0; + } else { + it->viter++; + } + if (it->viter < pv_len(pv)) { + it->entry = pv_get(pv, it->viter); + } else { + return 0; + } + if (entryptr) *entryptr = it->entry; + return 1; +} -size_t volatileSetNumEntries(volatile_set *set) { - assert(set && set->expiry_buckets); - return set->expiry_buckets->numele; +static inline int vsetBucketNext_HASHTABLE(volatileSetIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + hashtable *ht = vsetBucketHashtable(it->bucket); + if (init_bucket_scan) { + it->iteration_state = VSET_BUCKET_HT; + hashtableInitIterator(&it->hiter, ht, 0); + } + if (!hashtableNext(&it->hiter, &it->entry)) { + hashtableResetIterator(&it->hiter); + return 0; + } + if (entryptr) *entryptr = it->entry; + return 1; } -void volatileSetStart(volatile_set *set, volatileSetIterator *it) { - raxStart(&it->bucket, set->expiry_buckets); +static inline int vsetBucketNext_RAX(volatileSetIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + if (init_bucket_scan) { + /* set myself as the parent bucket */ + it->parent_bucket = it->bucket; + raxStart(&it->riter, vsetBucketRax(it->bucket)); + raxSeek(&it->riter, "^", NULL, 0); + } + if (raxNext(&it->riter)) { + /* lets start again by going into the first bucket. */ + it->iteration_state = vsetBucketType(it->riter.data); + it->bucket_ts = decodeExpiryKey(it->riter.key); + it->bucket = it->riter.data; + it->iteration_state = VSET_BUCKET_NONE; + return volatileSetNext(it, entryptr); + } else { + /* We currently do not support nested RAX buckets */ + it->parent_bucket = vsetBucketSetNone(it->parent_bucket); + return 0; + } + return 1; } int volatileSetNext(volatileSetIterator *it, void **entryptr) { - if (raxNext(&it->bucket)) { - assert(it->bucket.key_len != EXPIRY_HASH_SIZE); - memcpy(it->bucket.key + 8, entryptr, sizeof(*entryptr)); - return 1; + vsetBucket *bucket = it->bucket; + int bucket_type = vsetBucketType(bucket); + int ret = 0; + switch (bucket_type) { + case VSET_BUCKET_NONE: + return vsetBucketNext_NONE(it, entryptr); + break; + case VSET_BUCKET_RAX: + return vsetBucketNext_RAX(it, entryptr); + break; + case VSET_BUCKET_SINGLE: + ret = vsetBucketNext_SINGLE(it, entryptr); + break; + case VSET_BUCKET_VECTOR: + ret = vsetBucketNext_VECTOR(it, entryptr); + break; + case VSET_BUCKET_HT: + ret = vsetBucketNext_HASHTABLE(it, entryptr); + break; + default: + serverPanic("Unknown volatile set bucket type in volatileSetNext"); } - return 0; + if (ret == 0) { + /* continue iterating the parent bucket */ + it->iteration_state = vsetBucketType(it->parent_bucket); + it->bucket = it->parent_bucket; + return volatileSetNext(it, entryptr); + } + return ret; } + +void volatileSetStart(volatile_set *set, volatileSetIterator *it) { + it->iteration_state = VSET_BUCKET_NONE; /*lets start by going to the first bucket. */ + it->bucket = set->expiry_buckets; + it->parent_bucket = vsetBucketSetNone(it->parent_bucket); +} + void volatileSetReset(volatileSetIterator *it) { - raxStop(&it->bucket); + int bucket_type = vsetBucketType(it->bucket); + int parent_bucket_type = vsetBucketType(it->parent_bucket); + if (parent_bucket_type == VSET_BUCKET_RAX) + raxStop(&it->riter); + if (bucket_type == VSET_BUCKET_HT) + hashtableResetIterator(&it->hiter); +} + +volatile_set *createVolatileSet(volatileEntryType *type) { + volatile_set *set = zmalloc(sizeof(volatile_set)); + set->etypr = type; + set->expiry_buckets = vsetBucketSetNone(set->expiry_buckets); + return set; +} + +void freeVolatileSet(volatile_set *set) { + if (!set) return; + freeVsetBucket(set->expiry_buckets); + zfree(set); +} + +bool volatileSetIsEmpty(volatile_set *set) { + return vsetBucketType(set->expiry_buckets) == VSET_BUCKET_NONE; } diff --git a/src/volatile_set.h b/src/volatile_set.h index 37dc7c9923..505a187625 100644 --- a/src/volatile_set.h +++ b/src/volatile_set.h @@ -2,39 +2,251 @@ #define VOLATILESET_H #include +#include +#include "hashtable.h" + +#include "hashtable.h" #include "rax.h" #include "sds.h" +#include "monotonic.h" /* for mstime_t*/ + +/* + *----------------------------------------------------------------------------- + * Volatile Set - Adaptive, Expiry-aware Set Structure + *----------------------------------------------------------------------------- + * + * The `volatile_set` is a dynamic, memory-efficient container for managing + * entries with expiry semantics. It is designed to efficiently track entries + * that expire at varying times and scales to large sets by adapting its internal + * representation as it grows or shrinks. + * + *----------------------------------------------------------------------------- + * Expiry Buckets and Pointer Tagging + *----------------------------------------------------------------------------- + * + * Internally, the `volatile_set` maintains a single `vsetBucket*` pointer, + * which can point to different types of buckets depending on the number of + * entries and the needed resolution. The pointer is tagged using the lowest 3 bits: + * + * #define VSET_BUCKET_NONE -1 + * #define VSET_BUCKET_SINGLE 0x1ULL // pointer to single entry (odd ptr) + * #define VSET_BUCKET_VECTOR 0x2ULL // pointer to pointer vector + * #define VSET_BUCKET_HT 0x4ULL // pointer to hashtable + * #define VSET_BUCKET_RAX 0x6ULL // pointer to radix tree + * + * #define VSET_TAG_MASK 0x7ULL + * #define VSET_PTR_MASK (~VSET_TAG_MASK) + * + * IMPORTANT!!!! - All entries must have LSB set (i.e., be odd-aligned) to be compatible with !!!! + * tagging constraints. + * + *----------------------------------------------------------------------------- + * Time Bucket Management + *----------------------------------------------------------------------------- + * + * Entries are grouped into **time buckets** based on their expiry time. + * Each time bucket represents a window aligned to: + * + * #define VOLATILESET_BUCKET_INTERVAL_MIN (1 << 4) // 16ms + * #define VOLATILESET_BUCKET_INTERVAL_MAX (1 << 13) // 8192ms + * + * A time bucket key is computed by rounding the expiry timestamp up to the + * nearest aligned window using `get_bucket_ts()`. + * + *----------------------------------------------------------------------------- + * Entry Addition and Bucket Promotion + *----------------------------------------------------------------------------- + * + * When a new entry is added: + * + * 1. If the current set is `NONE`, it becomes a `SINGLE` bucket. + * 2. If the set is a `SINGLE` bucket and another entry arrives: + * → it is promoted to a `VECTOR` bucket (sorted by expiry). + * 3. If the `VECTOR` exceeds `VOLATILESET_VECTOR_BUCKET_MAX_SIZE` (127): + * → the set becomes a `RAX`, and existing entries are migrated. + * + *----------------------------------------------------------------------------- + * RAX Bucket and Dynamic Splitting + *----------------------------------------------------------------------------- + * + * A `VSET_BUCKET_RAX` bucket stores multiple time-aligned buckets in a radix tree. + * Each key in the RAX represents the **end timestamp** of a bucket window. + * + * When a bucket in RAX becomes full (vector limit exceeded): + * - The vector is split into two parts using a **binary search** to find an optimal + * split point where the expiry bucket timestamp changes. + * - Two new buckets are created and inserted back into the RAX with their new + * aligned timestamps as keys. + * - If entries cannot be split (all in same window), the bucket is promoted to HT. + * + *----------------------------------------------------------------------------- + * RAX Bucket Layout + *----------------------------------------------------------------------------- + * + * * RAX View with Time Keys: + * + * expiry_buckets = rax * | 0x6 + * + * +--------------------------+ + * | RAX (key = bucket_ts) | + * |--------------------------| + * | "000016" → [entry1] | ← Vector (SINGLE→VECTOR→HT) + * | "000032" → [entry2...] | ← Full vector, might split + * | "000048" → [entry...] | + * +--------------------------+ + * + * * Splitting a Full Vector in RAX: + * + * Suppose vector at key "000032" has 13 entries: + * + * 1. Use binary search to find a transition point in expiry bucket_ts. + * We search the first 2 following entries which belong to different lwo granularity time windows, + * but as close as possible to the middle of the vector: + * [entry1, entry7, ..., entry13] + * ↑ + * split (first where get_bucket_ts(entry) > min_ts) + * + * 2. Create two vectors: + * bucket A → [entry1..entry6] with key = "000032" + * bucket B → [entry7..entry13] with key = "000048" + * + * 3. Insert both back to the RAX. + * + *----------------------------------------------------------------------------- + * Bucket Lifecycle + *----------------------------------------------------------------------------- + * + * NONE + * | + * v + * SINGLE (1 entry) + * | + * v + * VECTOR (sorted, up to 127) + * | + * v + * RAX + * | + * v + * +-------------+ + * | key → bucket| + * +-------------+ + * | "000016" → VECTOR + * | "000032" → HT + * | "000048" → SINGLE + * +-------------+ + * + *----------------------------------------------------------------------------- + * Entry Type Contract + *----------------------------------------------------------------------------- + * + * Users must supply a `volatileEntryType` implementation: + * + * typedef struct { + * sds (*entryGetKey)(const void *entry); // get key + * long long (*getExpiry)(const void *entry); // get expiry + * int (*expire)(void *db, void *o, void *entry); // trigger expiry + * } volatileEntryType; + * + *----------------------------------------------------------------------------- + * Public API + *----------------------------------------------------------------------------- + * + * Create/Free: + * volatile_set *createVolatileSet(volatileEntryType *type); + * void freeVolatileSet(volatile_set *set); + * + * Mutation: + * int volatileSetAddEntry(volatile_set *set, void *entry, long long expiry); + * int volatileSetRemoveEntry(volatile_set *set, void *entry, long long expiry); + * int volatileSetUpdateEntry(volatile_set *set, void *old_entry, + * void *new_entry, long long old_expiry, + * long long new_expiry); + * + * Expiry Retrieval: + * void *volatileSetFirstExpired(volatile_set *set, mstime_t now); + * void *volatileSetdPopExpired(volatile_set *set, mstime_t now); + * + * Utilities: + * bool volatileSetIsEmpty(volatile_set *set); + * + * Iteration: + * void volatileSetStart(volatile_set *set, volatileSetIterator *it); + * int volatileSetNext(volatileSetIterator *it, void **entryptr); + * void volatileSetReset(volatileSetIterator *it); + * + *----------------------------------------------------------------------------- + * Iteration Support + *----------------------------------------------------------------------------- + * + * Iterator structure maintains context across all bucket types: + * + * typedef struct volatileSetIterator { + * raxIterator riter; // for RAX + * hashtableIterator hiter; // for HT + * uint32_t viter; // for VECTOR + * void *vsingle; // for SINGLE + * vsetBucket *parent_bucket; // owning bucket + * vsetBucket *bucket; // active bucket + * void *entry; // current entry + * long long bucket_ts; // for RAX + * int iteration_state; // internal FSM + * } volatileSetIterator; + * */ + +#define VOLATILESET_BUCKET_INTERVAL_MAX (1LL << 13LL) // 2^13 = 8192 milliseconds +#define VOLATILESET_BUCKET_INTERVAL_MIN (1LL << 4LL) // 2^4 = 16 milliseconds +#define VOLATILESET_VECTOR_BUCKET_MAX_SIZE 127 typedef struct { sds (*entryGetKey)(const void *entry); long long (*getExpiry)(const void *entry); - int (*expire)(void *entry); + int (*expire)(void*db, void* o, void *entry); } volatileEntryType; +// Generic bucket type +typedef void vsetBucket; typedef struct { volatileEntryType *etypr; - rax *expiry_buckets; + vsetBucket *expiry_buckets; } volatile_set; typedef struct volatileSetIterator { - raxIterator bucket; + /* for rax bucket */ + raxIterator riter; + /* for hashtable bucket */ + hashtableIterator hiter; + /* for vector bucket */ + uint32_t viter; + /* for single bucket */ + void *vsingle; + /* the parent of the bucket we are currently iterating on */ + vsetBucket *parent_bucket; + /* the bucket we are currently iterating on */ + vsetBucket *bucket; + /* the pointer entry */ + void *entry; + /* In case of rax encoded set, this is the current iterated bucket timestamp */ + long long bucket_ts; + /* the state of the iteration */ + int iteration_state; } volatileSetIterator; - int volatileSetRemoveEntry(volatile_set *set, void *entry, long long expiry); int volatileSetAddEntry(volatile_set *set, void *entry, long long expiry); -int volatileSetExpireEntry(volatile_set *set, void *entry); +void *volatileSetdPopExpired(volatile_set *set, mstime_t now); +void *volatileSetFirstExpired(volatile_set *set, mstime_t now); int volatileSetUpdateEntry(volatile_set *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); -size_t volatileSetNumEntries(volatile_set *set); +bool volatileSetIsEmpty(volatile_set *set); void volatileSetStart(volatile_set *set, volatileSetIterator *it); int volatileSetNext(volatileSetIterator *it, void **entryptr); void volatileSetReset(volatileSetIterator *it); - void freeVolatileSet(volatile_set *b); volatile_set *createVolatileSet(volatileEntryType *type); + #endif From 6be1bd051f04e1b9fbf57a59babad488c9a7a825 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 26 Jun 2025 19:44:13 +0300 Subject: [PATCH 017/119] fix comparison Signed-off-by: Ran Shidlansik --- src/volatile_set.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/volatile_set.c b/src/volatile_set.c index 61f503a5fa..be0fa4cad2 100644 --- a/src/volatile_set.c +++ b/src/volatile_set.c @@ -261,7 +261,7 @@ pointer_vector *pv_removeAt(pointer_vector *sv, uint32_t idx) { /* Last element being removed; delete vector */ zfree(sv); return NULL; - } else if (idx < sv->len - 1) + } else if (idx < sv->len - 1UL) memmove(&sv->data[idx], &sv->data[idx + 1], (sv->len - idx - 1) * PV_ELEM_SIZE); sv->len--; return pv_shrink_to_fit(sv); From 6df722fa06635839493cab4f93c7b3e8900219af Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 26 Jun 2025 19:46:28 +0300 Subject: [PATCH 018/119] fix typos Signed-off-by: Ran Shidlansik --- src/volatile_set.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/src/volatile_set.c b/src/volatile_set.c index be0fa4cad2..12d65c91fe 100644 --- a/src/volatile_set.c +++ b/src/volatile_set.c @@ -663,32 +663,11 @@ static bool splitBucketIfPossible(volatile_set *set, vsetBucket *bucket, long lo // alternative: raxInsert(set->expiry_buckets, key, key_len, bucket, NULL); raxSetData(node, bucket); - /* santity check after split - assert(target_bucket_ts < bucket_ts); - pointer_vector *high_bucket_vector = vsetBucketVector(bucket); - pointer_vector *low_bucket_vector = vsetBucketVector(new_bucket); - for (uint32_t i = 0; i < pv_len(low_bucket_vector); i++) { - assert(set->etypr->getExpiry(pv_get(low_bucket_vector, i)) < target_bucket_ts); - assert(get_bucket_ts(set->etypr->getExpiry(pv_get(low_bucket_vector, i))) < bucket_ts); - assert(get_bucket_ts(set->etypr->getExpiry(pv_get(low_bucket_vector, i))) <= target_bucket_ts); - long long find_bucket_ts; - vsetBucket *find_bucket = findBucket(set, set->etypr->getExpiry(pv_get(low_bucket_vector, i)), key, &key_len, &find_bucket_ts, NULL); - assert(find_bucket == new_bucket); - } - for (uint32_t i = 0; i < pv_len(high_bucket_vector); i++) { - assert(get_bucket_ts(set->etypr->getExpiry(pv_get(high_bucket_vector, i))) > target_bucket_ts); - assert(get_bucket_ts(set->etypr->getExpiry(pv_get(high_bucket_vector, i))) <= bucket_ts); - assert(set->etypr->getExpiry(pv_get(high_bucket_vector, i)) >= target_bucket_ts); - assert(set->etypr->getExpiry(pv_get(high_bucket_vector, i)) < bucket_ts); - long long find_bucket_ts; - vsetBucket *find_bucket = findBucket(set, set->etypr->getExpiry(pv_get(high_bucket_vector, i)), key, &key_len, &find_bucket_ts, NULL); - assert(find_bucket == bucket); - }*/ } else { /* We cannot split the bucket. just return false */ return false; } - /* We change the current bucket position OR we splited it, either way we have a new bucket to insert. */ + /* We change the current bucket position OR we split it, either way we have a new bucket to insert. */ key_len = encodeExpiryKey(target_bucket_ts, key); raxInsert(expiry_buckets, key, key_len, new_bucket, NULL); return true; @@ -780,7 +759,7 @@ static inline vsetBucket *insertToBucket_RAX(volatile_set *set, vsetBucket *targ // alternative raxInsert(expiry_buckets, key, key_len, bucket, NULL); raxSetData(node, bucket); } else { - /* we splitted the bucket. go and find again a bucket to place the entry since there can be new options now. */ + /* we split the bucket. go and find again a bucket to place the entry since there can be new options now. */ return insertToBucket_RAX(set, target, entry, expiry); } } else { From 52d8a253e152c59efd6b7d169895c28c23375b78 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 26 Jun 2025 20:17:05 +0300 Subject: [PATCH 019/119] fix 32bit compilation and forgoten test file Signed-off-by: Ran Shidlansik --- src/unit/test_volatile_set.c | 382 +++++++++++++++++++++++++++++++++++ src/volatile_set.c | 43 ++-- 2 files changed, 404 insertions(+), 21 deletions(-) create mode 100644 src/unit/test_volatile_set.c diff --git a/src/unit/test_volatile_set.c b/src/unit/test_volatile_set.c new file mode 100644 index 0000000000..a399d93be3 --- /dev/null +++ b/src/unit/test_volatile_set.c @@ -0,0 +1,382 @@ +#include "../volatile_set.h" +#include "../entry.h" +#include "test_help.h" +#include "../zmalloc.h" +#include +#include +#include +#include +#include +#include +#include + + +typedef entry mock_entry; + +static mock_entry *mockCreateEntry(const char *keystr, long long expiry) { + sds field = sdsnew(keystr); + mock_entry *e = entryCreate(field, sdsnew("value"), expiry); + sdsfree(field); + return e; +} + +static mock_entry *mockEntryUpdate(mock_entry *entry, long long expiry) { + return entryUpdate(entry, NULL, expiry); +} + +static sds mockGetKey(const void *entry) { + return (sds)entry; +} + +static long long mockGetExpiry(const void *entry) { + return entryGetExpiry(entry); +} + +static void mockFreeEntry(void *entry) { + // printf("mockFreeEntry: %p\n", entry); + entryFree(entry); +} + +static int mockExpire(void *db, void *o, void *entry) { + (void)db; + (void)o; + (void)entry; + return 1; +} + +int test_volatile_set_add_and_iterate(int argc, char **argv, int flags) { + return 0; + (void)argc; + (void)argv; + (void)flags; + + volatileEntryType type = { + .entryGetKey = mockGetKey, + .getExpiry = mockGetExpiry, + .expire = mockExpire, + }; + + volatile_set *set = createVolatileSet(&type); + TEST_ASSERT(set != NULL); + + mock_entry *e1 = mockCreateEntry("item1", 123); + mock_entry *e2 = mockCreateEntry("item2", 456); + + TEST_ASSERT(volatileSetAddEntry(set, e1, mockGetExpiry(e1))); + TEST_ASSERT(volatileSetAddEntry(set, e2, mockGetExpiry(e2))); + + TEST_ASSERT(!volatileSetIsEmpty(set)); + + volatileSetIterator it; + volatileSetStart(set, &it); + + void *entry; + int count = 0; + while (volatileSetNext(&it, &entry)) { + TEST_EXPECT(entry != NULL); + count++; + } + + TEST_ASSERT(count == 2); + + volatileSetReset(&it); + freeVolatileSet(set); + mockFreeEntry(e1); + mockFreeEntry(e2); + + TEST_PRINT_INFO("Test passed with %d expects", failed_expects); + return 0; +} + +int test_volatile_set_large_batch_same_expiry(int argc, char **argv, int flags) { + return 0; + (void)argc; + (void)argv; + (void)flags; + + volatileEntryType type = { + .entryGetKey = mockGetKey, + .getExpiry = mockGetExpiry, + .expire = mockExpire, + }; + + volatile_set *set = createVolatileSet(&type); + TEST_ASSERT(set != NULL); + + const long long expiry_time = 1000LL; + const int total_entries = 200; + + // Allocate and add 200 entries with same expiry + mock_entry **entries = zmalloc(sizeof(mock_entry *) * total_entries); + TEST_ASSERT(entries != NULL); + + for (int i = 0; i < total_entries; i++) { + char key_buf[32]; + snprintf(key_buf, sizeof(key_buf), "entry_%d", i); + entries[i] = mockCreateEntry(key_buf, expiry_time); + TEST_ASSERT(volatileSetAddEntry(set, entries[i], expiry_time)); + } + + // Verify set is not empty + TEST_ASSERT(!volatileSetIsEmpty(set)); + + // Iterate all entries and count them + volatileSetIterator it; + volatileSetStart(set, &it); + + void *entry; + int count = 0; + while (volatileSetNext(&it, &entry)) { + TEST_EXPECT(entry != NULL); + count++; + } + TEST_ASSERT(count == total_entries); + + // Cleanup + volatileSetReset(&it); + freeVolatileSet(set); + + for (int i = 0; i < total_entries; i++) { + mockFreeEntry(entries[i]); + } + zfree(entries); + + TEST_PRINT_INFO("Inserted and iterated %d entries with same expiry", total_entries); + return 0; +} + +int test_volatile_set_iterate_multiple_expiries(int argc, char **argv, int flags) { + return 0; + (void)argc; + (void)argv; + (void)flags; + const unsigned int total_entries = 5; + volatileEntryType type = { + .entryGetKey = mockGetKey, + .getExpiry = mockGetExpiry, + .expire = mockExpire, + }; + + volatile_set *set = createVolatileSet(&type); + TEST_ASSERT(set != NULL); + + // Prepare entries with mixed expiry times, some duplicates + mock_entry *entries[total_entries]; + + // Initialize keys + for (unsigned int i = 0; i < total_entries; i++) { + char key_buf[32]; + snprintf(key_buf, sizeof(key_buf), "entry_%d", i); + long long expiry_time = rand() % 10000; + entries[i] = mockCreateEntry(key_buf, expiry_time); + TEST_ASSERT(volatileSetAddEntry(set, entries[i], expiry_time)); + } + + volatileSetIterator it; + volatileSetStart(set, &it); + + int found[5] = {0}; + int total = 0; + + void *entry; + while (volatileSetNext(&it, &entry)) { + TEST_EXPECT(entry != NULL); + mock_entry *e = (mock_entry *)entry; + + // Match the entries we inserted + for (int i = 0; i < 5; i++) { + if (strcmp(entryGetField(e), entryGetField(entries[i])) == 0) { + found[i] = 1; + break; + } + } + total++; + } + + TEST_ASSERT(total == 5); + + for (int i = 0; i < 5; i++) { + TEST_EXPECT(found[i]); + } + + volatileSetReset(&it); + freeVolatileSet(set); + for (int i = 0; i < 5; i++) mockFreeEntry(entries[i]); + + TEST_PRINT_INFO("Iterated all %d mixed expiry entries successfully", total); + return 0; +} + +int test_volatile_set_add_and_remove_all(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + volatileEntryType type = { + .entryGetKey = mockGetKey, + .getExpiry = mockGetExpiry, + .expire = mockExpire, + }; + + volatile_set *set = createVolatileSet(&type); + TEST_ASSERT(set != NULL); + + const int total_entries = 130; + mock_entry *entries[total_entries]; + long long expiry = 5000; + + for (int i = 0; i < total_entries; i++) { + char key[32]; + snprintf(key, sizeof(key), "key_%d", i); + entries[i] = mockCreateEntry(key, expiry); + TEST_ASSERT(volatileSetAddEntry(set, entries[i], expiry)); + } + + for (int i = 0; i < total_entries; i++) { + TEST_ASSERT(volatileSetRemoveEntry(set, entries[i], expiry)); + mockFreeEntry(entries[i]); + } + + TEST_ASSERT(volatileSetIsEmpty(set)); + freeVolatileSet(set); + + TEST_PRINT_INFO("Add/remove %d entries, set size now 0", total_entries); + return 0; +} + +/********************* Fuzzer tests ********************************/ + +#define NUM_ITERATIONS 1000000 +#define MAX_ENTRIES 100000 + +/* Global array to simulate a test database */ +mock_entry *mock_entries[MAX_ENTRIES]; +int mock_entry_count = 0; + +/* --------- volatileEntryType Callbacks --------- */ +sds mock_entry_get_key(const void *entry) { + return (sds)entry; +} + +long long mock_entry_get_expiry(const void *entry) { + return mockGetExpiry(entry); +} + +int mock_entry_expire(void *db, void *o, void *entry) { + UNUSED(db); + UNUSED(o); + mock_entry *e = (mock_entry *)entry; + for (int i = 0; i < mock_entry_count; i++) { + if (mock_entries[i] == e) { + // printf("expire entry %p with expiry %llu\n", e, mockGetExpiry(e)); + mockFreeEntry(e); + mock_entries[i] = mock_entries[--mock_entry_count]; + return 1; + } + } + return 0; +} + +/* --------- Helper Functions --------- */ +mock_entry *mock_entry_create(const char *keystr, long long expiry) { + return mockCreateEntry(keystr, expiry); +} + +int insert_mock_entry(volatile_set *set) { + if (mock_entry_count >= MAX_ENTRIES) return 0; + char keybuf[32]; + snprintf(keybuf, sizeof(keybuf), "key_%d", rand()); + + long long expiry = rand() % 10000 + 100; + mock_entry *e = mock_entry_create(keybuf, expiry); + // printf("adding entry %p with expiry %llu\n", e, expiry); + TEST_ASSERT(volatileSetAddEntry(set, e, expiry)); + mock_entries[mock_entry_count++] = e; + return 0; +} + +int update_mock_entry(volatile_set *set) { + if (mock_entry_count == 0) return 0; + int idx = rand() % mock_entry_count; + mock_entry *old = mock_entries[idx]; + long long old_expiry = mockGetExpiry(old); + long long new_expiry = old_expiry + (rand() % 500); + mock_entry *updated = mockEntryUpdate(old, new_expiry); + mock_entries[idx] = updated; + // printf("Update entry %p with entry %p with old expiry %llu new expiry %llu\n", old, updated, old_expiry, new_expiry); + TEST_ASSERT(volatileSetUpdateEntry(set, old, updated, old_expiry, new_expiry)); + return 0; +} + +int remove_mock_entry(volatile_set *set) { + if (mock_entry_count == 0) return 0; + int idx = rand() % mock_entry_count; + mock_entry *e = mock_entries[idx]; + // printf("removing entry %p with expiry %llu\n", e, mockGetExpiry(e)); + TEST_ASSERT(volatileSetRemoveEntry(set, e, mockGetExpiry(e))); + mockFreeEntry(e); + mock_entries[idx] = mock_entries[--mock_entry_count]; + + return 0; +} + +int expire_mock_entries(volatile_set *set, mstime_t now) { + void *entry; + do { + entry = volatileSetdPopExpired(set, now); + if (entry) { + // printf("pop expire entry %p with expiry %llu\n", entry, mockGetExpiry(entry)); + mock_entry_expire(NULL, NULL, entry); + } + } while (entry); + return 0; +} + +int free_mock_entries(void) { + for (int i = 0; i < mock_entry_count; i++) { + mock_entry *e = mock_entries[i]; + mockFreeEntry(e); + } + return 0; +} + +/* --------- Fuzzer Test --------- */ +int test_volatile_set_fuzzer(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + srand(time(NULL)); + + volatileEntryType type = { + .entryGetKey = mock_entry_get_key, + .getExpiry = mock_entry_get_expiry, + .expire = mock_entry_expire}; + + volatile_set *set = createVolatileSet(&type); + + for (int i = 0; i < NUM_ITERATIONS; i++) { + int op = rand() % 4; + switch (op) { + case 0: + case 1: + insert_mock_entry(set); + break; + case 2: + update_mock_entry(set); + break; + case 3: + remove_mock_entry(set); + break; + } + + if (i % 100 == 0) { + mstime_t now = rand() % 10000; + expire_mock_entries(set, now); + } + } + + TEST_ASSERT(volatileSetIsEmpty(set) || mock_entry_count > 0); + freeVolatileSet(set); + free_mock_entries(); + return 0; +} diff --git a/src/volatile_set.c b/src/volatile_set.c index 12d65c91fe..2b32e1e664 100644 --- a/src/volatile_set.c +++ b/src/volatile_set.c @@ -329,13 +329,13 @@ uint32_t pv_find(pointer_vector *sv, void *elem) { /************************************************************************************************************* * pointer_vector End *************************************************************************************************************/ -#define VSET_BUCKET_NONE -1 // matching the NULL case -#define VSET_BUCKET_SINGLE 0x1ULL // xx1 (assuming sds) -#define VSET_BUCKET_VECTOR 0x2ULL // 010 -#define VSET_BUCKET_HT 0x4ULL // 100 -#define VSET_BUCKET_RAX 0x6ULL // 110 +#define VSET_BUCKET_NONE -1 // matching the NULL case +#define VSET_BUCKET_SINGLE 0x1UL // xx1 (assuming sds) +#define VSET_BUCKET_VECTOR 0x2UL // 010 +#define VSET_BUCKET_HT 0x4UL // 100 +#define VSET_BUCKET_RAX 0x6UL // 110 -#define VSET_TAG_MASK 0x7ULL +#define VSET_TAG_MASK 0x7UL #define VSET_PTR_MASK (~VSET_TAG_MASK) // Determine bucket type @@ -555,21 +555,22 @@ uint32_t _find_split_position(volatile_set *set, vsetBucket *bucket, long long * static uint64_t hash_pointer(const void *ptr) { uintptr_t x = (uintptr_t)ptr; - if (sizeof(ptr) == 4) { - // 32-bit platform - x ^= x >> 16; - x *= 0x85ebca6b; - x ^= x >> 13; - x *= 0xc2b2ae35; - x ^= x >> 16; - } else { - // 64-bit platform - x ^= x >> 33; - x *= 0xff51afd7ed558ccdULL; - x ^= x >> 33; - x *= 0xc4ceb9fe1a85ec53ULL; - x ^= x >> 33; - } +#if UINTPTR_MAX == 0xFFFFFFFF + // 32-bit platform + x ^= x >> 16; + x *= 0x85ebca6b; + x ^= x >> 13; + x *= 0xc2b2ae35; + x ^= x >> 16; + +#else + // 64-bit platform + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; +#endif return (uint64_t)x; } From 344d487582d8e0d81694dbfab4991c44fbee9026 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Fri, 27 Jun 2025 07:42:31 +0300 Subject: [PATCH 020/119] add volatile set test condition Signed-off-by: Ran Shidlansik --- src/unit/test_files.h | 7 +++++++ src/unit/test_volatile_set.c | 1 + 2 files changed, 8 insertions(+) diff --git a/src/unit/test_files.h b/src/unit/test_files.h index 4b9f37b3a4..05f9eed17b 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -20,6 +20,11 @@ int test_dictDisableResizeReduceTo3(int argc, char **argv, int flags); int test_dictDeleteOneKeyTriggerResizeAgain(int argc, char **argv, int flags); int test_dictBenchmark(int argc, char **argv, int flags); int test_endianconv(int argc, char *argv[], int flags); +int test_entryCreate(int argc, char **argv, int flags); +int test_entryUpdate(int argc, char **argv, int flags); +int test_entryHasexpiry_entrySetExpiry(int argc, char **argv, int flags); +int test_entryIsExpired(int argc, char **argv, int flags); +int test_entryMemUsage_entrySetExpiry_entrySetValue(int argc, char **argv, int flags); int test_cursor(int argc, char **argv, int flags); int test_set_hash_function_seed(int argc, char **argv, int flags); int test_add_find_delete(int argc, char **argv, int flags); @@ -248,6 +253,7 @@ unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}}; unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}}; unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}}; unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, NULL}}; +unitTest __test_entry_c[] = {{"test_entryCreate", test_entryCreate}, {"test_entryUpdate", test_entryUpdate}, {"test_entryHasexpiry_entrySetExpiry", test_entryHasexpiry_entrySetExpiry}, {"test_entryIsExpired", test_entryIsExpired}, {"test_entryMemUsage_entrySetExpiry_entrySetValue", test_entryMemUsage_entrySetExpiry_entrySetValue}, {NULL, NULL}}; unitTest __test_hashtable_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_bucket_chain_length", test_bucket_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_replace_reallocated_entry", test_replace_reallocated_entry}, {"test_incremental_find", test_incremental_find}, {"test_scan", test_scan}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_compact_bucket_chain", test_compact_bucket_chain}, {"test_random_entry", test_random_entry}, {"test_random_entry_with_long_chain", test_random_entry_with_long_chain}, {"test_random_entry_sparse_table", test_random_entry_sparse_table}, {"test_all_memory_freed", test_all_memory_freed}, {NULL, NULL}}; unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}}; unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyHashtable", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyHashtable}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyHashtable", test_kvstoreIteratorRemoveAllKeysDeleteEmptyHashtable}, {"test_kvstoreHashtableIteratorRemoveAllKeysNoDeleteEmptyHashtable", test_kvstoreHashtableIteratorRemoveAllKeysNoDeleteEmptyHashtable}, {"test_kvstoreHashtableIteratorRemoveAllKeysDeleteEmptyHashtable", test_kvstoreHashtableIteratorRemoveAllKeysDeleteEmptyHashtable}, {NULL, NULL}}; @@ -275,6 +281,7 @@ struct unitTestSuite { {"test_crc64combine.c", __test_crc64combine_c}, {"test_dict.c", __test_dict_c}, {"test_endianconv.c", __test_endianconv_c}, + {"test_entry.c", __test_entry_c}, {"test_hashtable.c", __test_hashtable_c}, {"test_intset.c", __test_intset_c}, {"test_kvstore.c", __test_kvstore_c}, diff --git a/src/unit/test_volatile_set.c b/src/unit/test_volatile_set.c index a399d93be3..869c685d51 100644 --- a/src/unit/test_volatile_set.c +++ b/src/unit/test_volatile_set.c @@ -325,6 +325,7 @@ int expire_mock_entries(volatile_set *set, mstime_t now) { do { entry = volatileSetdPopExpired(set, now); if (entry) { + TEST_ASSERT(mockGetExpiry(entry) <= now); // printf("pop expire entry %p with expiry %llu\n", entry, mockGetExpiry(entry)); mock_entry_expire(NULL, NULL, entry); } From 9d900dcba337c9c993a0e4d54d47315dd43e01bd Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Fri, 27 Jun 2025 11:25:44 +0300 Subject: [PATCH 021/119] fix a bug in handling a removal from vecotr of size 1 Signed-off-by: Ran Shidlansik --- src/unit/test_volatile_set.c | 11 ++++++----- src/volatile_set.c | 17 +++++++++++------ 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/unit/test_volatile_set.c b/src/unit/test_volatile_set.c index 869c685d51..9b59bc637e 100644 --- a/src/unit/test_volatile_set.c +++ b/src/unit/test_volatile_set.c @@ -246,8 +246,8 @@ int test_volatile_set_add_and_remove_all(int argc, char **argv, int flags) { /********************* Fuzzer tests ********************************/ -#define NUM_ITERATIONS 1000000 -#define MAX_ENTRIES 100000 +#define NUM_ITERATIONS 100000 +#define MAX_ENTRIES 10000 /* Global array to simulate a test database */ mock_entry *mock_entries[MAX_ENTRIES]; @@ -375,9 +375,10 @@ int test_volatile_set_fuzzer(int argc, char **argv, int flags) { expire_mock_entries(set, now); } } - - TEST_ASSERT(volatileSetIsEmpty(set) || mock_entry_count > 0); + /* now expire all the entries and check that we have no entries left */ + expire_mock_entries(set, LONG_LONG_MAX); + TEST_ASSERT(volatileSetIsEmpty(set) && mock_entry_count == 0); freeVolatileSet(set); - free_mock_entries(); + free_mock_entries(); /* Just in case */ return 0; } diff --git a/src/volatile_set.c b/src/volatile_set.c index 2b32e1e664..9ac176d20c 100644 --- a/src/volatile_set.c +++ b/src/volatile_set.c @@ -859,11 +859,17 @@ static bool raxBucketRemoveEntry(volatile_set *set, void *entry, vsetBucket *buc break; case VSET_BUCKET_VECTOR: { vsetBucket *new_bucket = removeFromBucket_VECTOR(set, bucket, entry, 0, &removed); - if (new_bucket != bucket) - /* In order to avoid rax override, we directly change the node data */ - // alternative: raxInsert(set->expiry_buckets, key, key_len, new_bucket, NULL); - raxSetData(node, new_bucket); - if (pbucket) *pbucket = new_bucket; + if (new_bucket != bucket) { + if (!new_bucket) { + raxRemove(vsetBucketRax(set->expiry_buckets), key, key_len, NULL); + if (pbucket) *pbucket = NULL; + } else { + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(set->expiry_buckets, key, key_len, new_bucket, NULL); + raxSetData(node, new_bucket); + if (pbucket) *pbucket = new_bucket; + } + } break; } case VSET_BUCKET_HT: { @@ -987,7 +993,6 @@ int volatileSetRemoveEntry(volatile_set *set, void *entry, long long expiry) { default: serverPanic("Cannot insert to bucket which is not single, vector or rax"); } - assert(removed); set->expiry_buckets = bucket; return removed ? 1 : 0; } From 603bbe1b3a05b6a58b52a61ae9f1e0676dd75e67 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Fri, 27 Jun 2025 15:17:04 +0300 Subject: [PATCH 022/119] fix pop expired Signed-off-by: Ran Shidlansik --- src/unit/test_volatile_set.c | 2 +- src/volatile_set.c | 196 +++++++++++++++++++++-------------- 2 files changed, 119 insertions(+), 79 deletions(-) diff --git a/src/unit/test_volatile_set.c b/src/unit/test_volatile_set.c index 9b59bc637e..b40d586768 100644 --- a/src/unit/test_volatile_set.c +++ b/src/unit/test_volatile_set.c @@ -325,8 +325,8 @@ int expire_mock_entries(volatile_set *set, mstime_t now) { do { entry = volatileSetdPopExpired(set, now); if (entry) { + // printf("pop expire entry %p with expiry %llu now: %llu\n", entry, mockGetExpiry(entry), now); TEST_ASSERT(mockGetExpiry(entry) <= now); - // printf("pop expire entry %p with expiry %llu\n", entry, mockGetExpiry(entry)); mock_entry_expire(NULL, NULL, entry); } } while (entry); diff --git a/src/volatile_set.c b/src/volatile_set.c index 9ac176d20c..4a6ffdc715 100644 --- a/src/volatile_set.c +++ b/src/volatile_set.c @@ -847,6 +847,77 @@ static inline vsetBucket *removeFromBucket_HASHTABLE(volatile_set *set, vsetBuck return new_bucket; } +static int vsetBucketNext_NONE(volatileSetIterator *it, void **entryptr) { + UNUSED(it); + UNUSED(entryptr); + return 0; +} +static inline int vsetBucketNext_SINGLE(volatileSetIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + if (init_bucket_scan) { + it->iteration_state = VSET_BUCKET_SINGLE; + it->entry = vsetBucketSingle(it->bucket); + if (entryptr) *entryptr = it->entry; + return 1; + } + return 0; +} +static inline int vsetBucketNext_VECTOR(volatileSetIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + pointer_vector *pv = vsetBucketVector(it->bucket); + if (init_bucket_scan) { + it->iteration_state = VSET_BUCKET_VECTOR; + it->viter = 0; + } else { + it->viter++; + } + if (it->viter < pv_len(pv)) { + it->entry = pv_get(pv, it->viter); + } else { + return 0; + } + if (entryptr) *entryptr = it->entry; + return 1; +} + +static inline int vsetBucketNext_HASHTABLE(volatileSetIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + hashtable *ht = vsetBucketHashtable(it->bucket); + if (init_bucket_scan) { + it->iteration_state = VSET_BUCKET_HT; + hashtableInitIterator(&it->hiter, ht, 0); + } + if (!hashtableNext(&it->hiter, &it->entry)) { + hashtableResetIterator(&it->hiter); + return 0; + } + if (entryptr) *entryptr = it->entry; + return 1; +} + +static inline int vsetBucketNext_RAX(volatileSetIterator *it, void **entryptr) { + bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); + if (init_bucket_scan) { + /* set myself as the parent bucket */ + it->parent_bucket = it->bucket; + raxStart(&it->riter, vsetBucketRax(it->bucket)); + raxSeek(&it->riter, "^", NULL, 0); + } + if (raxNext(&it->riter)) { + /* lets start again by going into the first bucket. */ + it->iteration_state = vsetBucketType(it->riter.data); + it->bucket_ts = decodeExpiryKey(it->riter.key); + it->bucket = it->riter.data; + it->iteration_state = VSET_BUCKET_NONE; + return volatileSetNext(it, entryptr); + } else { + /* We currently do not support nested RAX buckets */ + it->parent_bucket = vsetBucketSetNone(it->parent_bucket); + return 0; + } + return 1; +} + static bool raxBucketRemoveEntry(volatile_set *set, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { bool removed = false; switch (vsetBucketType(bucket)) { @@ -1011,15 +1082,54 @@ int volatileSetUpdateEntry(volatile_set *set, void *old_entry, void *new_entry, } static void *volatileSetGetFirstExpired(volatile_set *set, mstime_t now, bool delete) { - volatileSetIterator it; + int set_type = vsetBucketType(set->expiry_buckets); void *entry = NULL; - volatileSetStart(set, &it); - if (volatileSetNext(&it, NULL) && (it.bucket_ts <= now)) { - entry = it.entry; + long long expiry; + switch (set_type) { + case VSET_BUCKET_NONE: + return NULL; + break; + case VSET_BUCKET_RAX: { + volatileSetIterator iter; + volatileSetStart(set, &iter); + assert(vsetBucketNext_RAX(&iter, &entry)); + long long bucket_ts = iter.bucket_ts; + volatileSetReset(&iter); + if (bucket_ts > now) + return NULL; + expiry = set->etypr->getExpiry(entry); + assert(expiry <= now); + break; } - volatileSetReset(&it); - if (entry && delete) - volatileSetRemoveEntry(set, entry, set->etypr->getExpiry(entry)); + case VSET_BUCKET_SINGLE: { + entry = vsetBucketSingle(set->expiry_buckets); + expiry = set->etypr->getExpiry(entry); + if (expiry > now) + return NULL; + break; + } + case VSET_BUCKET_VECTOR: { + entry = pv_get(vsetBucketVector(set->expiry_buckets), 0); + expiry = set->etypr->getExpiry(entry); + if (expiry > now) + return NULL; + break; + } + case VSET_BUCKET_HT: { + hashtableIterator iter; + hashtableInitIterator(&iter, vsetBucketHashtable(set->expiry_buckets), 0); + assert(hashtableNext(&iter, &entry)); + hashtableResetIterator(&iter); + expiry = set->etypr->getExpiry(entry); + if (expiry > now) + return NULL; + break; + } + default: + serverPanic("Unknown volatile set bucket type in volatileSetNext"); + } + if (delete) + assert(volatileSetRemoveEntry(set, entry, expiry)); return entry; } @@ -1031,77 +1141,6 @@ void *volatileSetFirstExpired(volatile_set *set, mstime_t now) { return volatileSetGetFirstExpired(set, now, false); } -static int vsetBucketNext_NONE(volatileSetIterator *it, void **entryptr) { - UNUSED(it); - UNUSED(entryptr); - return 0; -} -static inline int vsetBucketNext_SINGLE(volatileSetIterator *it, void **entryptr) { - bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); - if (init_bucket_scan) { - it->iteration_state = VSET_BUCKET_SINGLE; - it->entry = vsetBucketSingle(it->bucket); - if (entryptr) *entryptr = it->entry; - return 1; - } - return 0; -} -static inline int vsetBucketNext_VECTOR(volatileSetIterator *it, void **entryptr) { - bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); - pointer_vector *pv = vsetBucketVector(it->bucket); - if (init_bucket_scan) { - it->iteration_state = VSET_BUCKET_VECTOR; - it->viter = 0; - } else { - it->viter++; - } - if (it->viter < pv_len(pv)) { - it->entry = pv_get(pv, it->viter); - } else { - return 0; - } - if (entryptr) *entryptr = it->entry; - return 1; -} - -static inline int vsetBucketNext_HASHTABLE(volatileSetIterator *it, void **entryptr) { - bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); - hashtable *ht = vsetBucketHashtable(it->bucket); - if (init_bucket_scan) { - it->iteration_state = VSET_BUCKET_HT; - hashtableInitIterator(&it->hiter, ht, 0); - } - if (!hashtableNext(&it->hiter, &it->entry)) { - hashtableResetIterator(&it->hiter); - return 0; - } - if (entryptr) *entryptr = it->entry; - return 1; -} - -static inline int vsetBucketNext_RAX(volatileSetIterator *it, void **entryptr) { - bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); - if (init_bucket_scan) { - /* set myself as the parent bucket */ - it->parent_bucket = it->bucket; - raxStart(&it->riter, vsetBucketRax(it->bucket)); - raxSeek(&it->riter, "^", NULL, 0); - } - if (raxNext(&it->riter)) { - /* lets start again by going into the first bucket. */ - it->iteration_state = vsetBucketType(it->riter.data); - it->bucket_ts = decodeExpiryKey(it->riter.key); - it->bucket = it->riter.data; - it->iteration_state = VSET_BUCKET_NONE; - return volatileSetNext(it, entryptr); - } else { - /* We currently do not support nested RAX buckets */ - it->parent_bucket = vsetBucketSetNone(it->parent_bucket); - return 0; - } - return 1; -} - int volatileSetNext(volatileSetIterator *it, void **entryptr) { vsetBucket *bucket = it->bucket; int bucket_type = vsetBucketType(bucket); @@ -1137,6 +1176,7 @@ int volatileSetNext(volatileSetIterator *it, void **entryptr) { void volatileSetStart(volatile_set *set, volatileSetIterator *it) { it->iteration_state = VSET_BUCKET_NONE; /*lets start by going to the first bucket. */ it->bucket = set->expiry_buckets; + it->bucket_ts = -1; it->parent_bucket = vsetBucketSetNone(it->parent_bucket); } From 680a36a9ca41f5c51c2e7dad81796295036867f1 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sat, 28 Jun 2025 07:58:10 +0300 Subject: [PATCH 023/119] change volatile_set to vset Signed-off-by: Ran Shidlansik --- cmake/Modules/SourceFiles.cmake | 2 +- src/Makefile | 2 +- src/server.h | 2 +- src/t_hash.c | 32 ++++++------ src/unit/test_files.h | 14 ++--- src/unit/{test_volatile_set.c => test_vest.c} | 30 +++++------ src/{volatile_set.c => vset.c} | 52 +++++++++---------- src/{volatile_set.h => vset.h} | 42 +++++++-------- 8 files changed, 88 insertions(+), 88 deletions(-) rename src/unit/{test_volatile_set.c => test_vest.c} (91%) rename src/{volatile_set.c => vset.c} (94%) rename src/{volatile_set.h => vset.h} (85%) diff --git a/cmake/Modules/SourceFiles.cmake b/cmake/Modules/SourceFiles.cmake index 6a217780cd..a10c7fd58b 100644 --- a/cmake/Modules/SourceFiles.cmake +++ b/cmake/Modules/SourceFiles.cmake @@ -119,7 +119,7 @@ set(VALKEY_SERVER_SRCS ${CMAKE_SOURCE_DIR}/src/server.c ${CMAKE_SOURCE_DIR}/src/logreqres.c ${CMAKE_SOURCE_DIR}/src/entry.c - ${CMAKE_SOURCE_DIR}/src/volatile_set.c) + ${CMAKE_SOURCE_DIR}/src/vset.c) # valkey-cli diff --git a/src/Makefile b/src/Makefile index 22ab3b0341..5dbad57ff3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -423,7 +423,7 @@ ENGINE_NAME=valkey SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX) ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX) ENGINE_TRACE_OBJ=trace/trace.o trace/trace_commands.o trace/trace_db.o trace/trace_bgsave.o trace/trace_cluster.o trace/trace_server.o trace/trace_aof.o -ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o entry.o volatile_set.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o +ENGINE_SERVER_OBJ=threads_mngr.o adlist.o vector.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o commandlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script.o functions.o commands.o strl.o connection.o unix.o logreqres.o rdma.o scripting_engine.o entry.o vset.o lua/script_lua.o lua/function_lua.o lua/engine_lua.o lua/debug_lua.o ENGINE_SERVER_OBJ+=$(ENGINE_TRACE_OBJ) ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX) ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o sds.o util.o sha256.o diff --git a/src/server.h b/src/server.h index b610101800..6cc1db1d27 100644 --- a/src/server.h +++ b/src/server.h @@ -80,7 +80,7 @@ #include "rax.h" /* Radix tree */ #include "connection.h" /* Connection abstraction */ #include "memory_prefetch.h" -#include "volatile_set.h" +#include "vset.h" #include "trace/trace.h" #include "entry.h" diff --git a/src/t_hash.c b/src/t_hash.c index c15e79e348..b7aa6d29dd 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -35,7 +35,7 @@ #include "hashtable.h" #include "rax.h" #include "sds.h" -#include "volatile_set.h" +#include "vset.h" #include "server.h" #include "zmalloc.h" #include @@ -52,13 +52,13 @@ volatileEntryType hashVolatileEntryType = { * Hash type Expiry API *----------------------------------------------------------------------------*/ -static volatile_set *hashTypeGetVolatileSet(robj *o) { +static vset *hashTypeGetVolatileSet(robj *o) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); - return *(volatile_set **)hashtableMetadata(o->ptr); + return *(vset **)hashtableMetadata(o->ptr); } void hashTypeFreeVolatileSet(robj *o) { - volatile_set *set = hashTypeGetVolatileSet(o); + vset *set = hashTypeGetVolatileSet(o); if (set) freeVolatileSet(set); } @@ -79,33 +79,33 @@ void hashTypeIgnoreTTL(robj *o, bool ignore) { } } -static volatile_set *hashTypeGetOrcreateVolatileSet(robj *o) { +static vset *hashTypeGetOrcreateVolatileSet(robj *o) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); - volatile_set **volatile_set_ref = hashtableMetadata(o->ptr); - if (*volatile_set_ref == NULL) { - *volatile_set_ref = createVolatileSet(&hashVolatileEntryType); + vset **vset_ref = hashtableMetadata(o->ptr); + if (*vset_ref == NULL) { + *vset_ref = createVolatileSet(&hashVolatileEntryType); /* serves mainly for optimization. Use type which supports access function only when needed. */ hashTypeIgnoreTTL(o, false); } - return *volatile_set_ref; + return *vset_ref; } static void hashTypeDeleteVolatileSet(robj *o) { - volatile_set **volatile_set_ref = hashtableMetadata(o->ptr); - freeVolatileSet(*volatile_set_ref); - *volatile_set_ref = NULL; + vset **vset_ref = hashtableMetadata(o->ptr); + freeVolatileSet(*vset_ref); + *vset_ref = NULL; /* serves mainly for optimization. by changing the hashtable type we can avoid extra function call in hashtable access */ hashTypeIgnoreTTL(o, true); } void hashTypeTrackEntry(robj *o, void *entry) { - volatile_set *set = hashTypeGetOrcreateVolatileSet(o); + vset *set = hashTypeGetOrcreateVolatileSet(o); serverAssert(volatileSetAddEntry(set, entry, entryGetExpiry(entry))); } void hashTypeUntrackEntry(robj *o, void *entry) { if (!entryHasExpiry(entry)) return; - volatile_set *set = hashTypeGetVolatileSet(o); + vset *set = hashTypeGetVolatileSet(o); debugServerAssert(set); serverAssert(volatileSetRemoveEntry(set, entry, entryGetExpiry(entry))); if (volatileSetIsEmpty(set)) { @@ -120,7 +120,7 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, if (!old_tracked && !new_tracked) return; - volatile_set *set = hashTypeGetOrcreateVolatileSet(o); + vset *set = hashTypeGetOrcreateVolatileSet(o); debugServerAssert(set); if (old_tracked && !new_tracked) @@ -128,7 +128,7 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, else if (new_tracked && !old_tracked) serverAssert(volatileSetAddEntry(set, new_entry, new_expiry)); else { - volatile_set *set = hashTypeGetVolatileSet(o); + vset *set = hashTypeGetVolatileSet(o); debugServerAssert(set); serverAssert(volatileSetUpdateEntry(set, old_entry, new_entry, old_expiry, new_expiry) == 1); } diff --git a/src/unit/test_files.h b/src/unit/test_files.h index 05f9eed17b..8dbe63f452 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -201,11 +201,11 @@ int test_version2num(int argc, char **argv, int flags); int test_reclaimFilePageCache(int argc, char **argv, int flags); int test_valkey_strtod(int argc, char **argv, int flags); int test_vector(int argc, char **argv, int flags); -int test_volatile_set_add_and_iterate(int argc, char **argv, int flags); -int test_volatile_set_large_batch_same_expiry(int argc, char **argv, int flags); -int test_volatile_set_iterate_multiple_expiries(int argc, char **argv, int flags); -int test_volatile_set_add_and_remove_all(int argc, char **argv, int flags); -int test_volatile_set_fuzzer(int argc, char **argv, int flags); +int test_vset_add_and_iterate(int argc, char **argv, int flags); +int test_vset_large_batch_same_expiry(int argc, char **argv, int flags); +int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags); +int test_vset_add_and_remove_all(int argc, char **argv, int flags); +int test_vset_fuzzer(int argc, char **argv, int flags); int test_ziplistCreateIntList(int argc, char **argv, int flags); int test_ziplistPop(int argc, char **argv, int flags); int test_ziplistGetElementAtIndex3(int argc, char **argv, int flags); @@ -267,7 +267,7 @@ unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}}; unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}}; unitTest __test_valkey_strtod_c[] = {{"test_valkey_strtod", test_valkey_strtod}, {NULL, NULL}}; unitTest __test_vector_c[] = {{"test_vector", test_vector}, {NULL, NULL}}; -unitTest __test_volatile_set_c[] = {{"test_volatile_set_add_and_iterate", test_volatile_set_add_and_iterate}, {"test_volatile_set_large_batch_same_expiry", test_volatile_set_large_batch_same_expiry}, {"test_volatile_set_iterate_multiple_expiries", test_volatile_set_iterate_multiple_expiries}, {"test_volatile_set_add_and_remove_all", test_volatile_set_add_and_remove_all}, {"test_volatile_set_fuzzer", test_volatile_set_fuzzer}, {NULL, NULL}}; +unitTest __test_vset_c[] = {{"test_vset_add_and_iterate", test_vset_add_and_iterate}, {"test_vset_large_batch_same_expiry", test_vset_large_batch_same_expiry}, {"test_vset_iterate_multiple_expiries", test_vset_iterate_multiple_expiries}, {"test_vset_add_and_remove_all", test_vset_add_and_remove_all}, {"test_vset_fuzzer", test_vset_fuzzer}, {NULL, NULL}}; unitTest __test_ziplist_c[] = {{"test_ziplistCreateIntList", test_ziplistCreateIntList}, {"test_ziplistPop", test_ziplistPop}, {"test_ziplistGetElementAtIndex3", test_ziplistGetElementAtIndex3}, {"test_ziplistGetElementOutOfRange", test_ziplistGetElementOutOfRange}, {"test_ziplistGetLastElement", test_ziplistGetLastElement}, {"test_ziplistGetFirstElement", test_ziplistGetFirstElement}, {"test_ziplistGetElementOutOfRangeReverse", test_ziplistGetElementOutOfRangeReverse}, {"test_ziplistIterateThroughFullList", test_ziplistIterateThroughFullList}, {"test_ziplistIterateThroughListFrom1ToEnd", test_ziplistIterateThroughListFrom1ToEnd}, {"test_ziplistIterateThroughListFrom2ToEnd", test_ziplistIterateThroughListFrom2ToEnd}, {"test_ziplistIterateThroughStartOutOfRange", test_ziplistIterateThroughStartOutOfRange}, {"test_ziplistIterateBackToFront", test_ziplistIterateBackToFront}, {"test_ziplistIterateBackToFrontDeletingAllItems", test_ziplistIterateBackToFrontDeletingAllItems}, {"test_ziplistDeleteInclusiveRange0To0", test_ziplistDeleteInclusiveRange0To0}, {"test_ziplistDeleteInclusiveRange0To1", test_ziplistDeleteInclusiveRange0To1}, {"test_ziplistDeleteInclusiveRange1To2", test_ziplistDeleteInclusiveRange1To2}, {"test_ziplistDeleteWithStartIndexOutOfRange", test_ziplistDeleteWithStartIndexOutOfRange}, {"test_ziplistDeleteWithNumOverflow", test_ziplistDeleteWithNumOverflow}, {"test_ziplistDeleteFooWhileIterating", test_ziplistDeleteFooWhileIterating}, {"test_ziplistReplaceWithSameSize", test_ziplistReplaceWithSameSize}, {"test_ziplistReplaceWithDifferentSize", test_ziplistReplaceWithDifferentSize}, {"test_ziplistRegressionTestForOver255ByteStrings", test_ziplistRegressionTestForOver255ByteStrings}, {"test_ziplistRegressionTestDeleteNextToLastEntries", test_ziplistRegressionTestDeleteNextToLastEntries}, {"test_ziplistCreateLongListAndCheckIndices", test_ziplistCreateLongListAndCheckIndices}, {"test_ziplistCompareStringWithZiplistEntries", test_ziplistCompareStringWithZiplistEntries}, {"test_ziplistMergeTest", test_ziplistMergeTest}, {"test_ziplistStressWithRandomPayloadsOfDifferentEncoding", test_ziplistStressWithRandomPayloadsOfDifferentEncoding}, {"test_ziplistCascadeUpdateEdgeCases", test_ziplistCascadeUpdateEdgeCases}, {"test_ziplistInsertEdgeCase", test_ziplistInsertEdgeCase}, {"test_ziplistStressWithVariableSize", test_ziplistStressWithVariableSize}, {"test_BenchmarkziplistFind", test_BenchmarkziplistFind}, {"test_BenchmarkziplistIndex", test_BenchmarkziplistIndex}, {"test_BenchmarkziplistValidateIntegrity", test_BenchmarkziplistValidateIntegrity}, {"test_BenchmarkziplistCompareWithString", test_BenchmarkziplistCompareWithString}, {"test_BenchmarkziplistCompareWithNumber", test_BenchmarkziplistCompareWithNumber}, {"test_ziplistStress__ziplistCascadeUpdate", test_ziplistStress__ziplistCascadeUpdate}, {NULL, NULL}}; unitTest __test_zipmap_c[] = {{"test_zipmapIterateWithLargeKey", test_zipmapIterateWithLargeKey}, {"test_zipmapIterateThroughElements", test_zipmapIterateThroughElements}, {NULL, NULL}}; unitTest __test_zmalloc_c[] = {{"test_zmallocInitialUsedMemory", test_zmallocInitialUsedMemory}, {"test_zmallocAllocReallocCallocAndFree", test_zmallocAllocReallocCallocAndFree}, {"test_zmallocAllocZeroByteAndFree", test_zmallocAllocZeroByteAndFree}, {NULL, NULL}}; @@ -295,7 +295,7 @@ struct unitTestSuite { {"test_util.c", __test_util_c}, {"test_valkey_strtod.c", __test_valkey_strtod_c}, {"test_vector.c", __test_vector_c}, - {"test_volatile_set.c", __test_volatile_set_c}, + {"test_vset.c", __test_vset_c}, {"test_ziplist.c", __test_ziplist_c}, {"test_zipmap.c", __test_zipmap_c}, {"test_zmalloc.c", __test_zmalloc_c}, diff --git a/src/unit/test_volatile_set.c b/src/unit/test_vest.c similarity index 91% rename from src/unit/test_volatile_set.c rename to src/unit/test_vest.c index b40d586768..d973f02409 100644 --- a/src/unit/test_volatile_set.c +++ b/src/unit/test_vest.c @@ -1,4 +1,4 @@ -#include "../volatile_set.h" +#include "../vset.h" #include "../entry.h" #include "test_help.h" #include "../zmalloc.h" @@ -44,7 +44,7 @@ static int mockExpire(void *db, void *o, void *entry) { return 1; } -int test_volatile_set_add_and_iterate(int argc, char **argv, int flags) { +int test_vset_add_and_iterate(int argc, char **argv, int flags) { return 0; (void)argc; (void)argv; @@ -56,7 +56,7 @@ int test_volatile_set_add_and_iterate(int argc, char **argv, int flags) { .expire = mockExpire, }; - volatile_set *set = createVolatileSet(&type); + vset *set = createVolatileSet(&type); TEST_ASSERT(set != NULL); mock_entry *e1 = mockCreateEntry("item1", 123); @@ -88,7 +88,7 @@ int test_volatile_set_add_and_iterate(int argc, char **argv, int flags) { return 0; } -int test_volatile_set_large_batch_same_expiry(int argc, char **argv, int flags) { +int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { return 0; (void)argc; (void)argv; @@ -100,7 +100,7 @@ int test_volatile_set_large_batch_same_expiry(int argc, char **argv, int flags) .expire = mockExpire, }; - volatile_set *set = createVolatileSet(&type); + vset *set = createVolatileSet(&type); TEST_ASSERT(set != NULL); const long long expiry_time = 1000LL; @@ -145,7 +145,7 @@ int test_volatile_set_large_batch_same_expiry(int argc, char **argv, int flags) return 0; } -int test_volatile_set_iterate_multiple_expiries(int argc, char **argv, int flags) { +int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { return 0; (void)argc; (void)argv; @@ -157,7 +157,7 @@ int test_volatile_set_iterate_multiple_expiries(int argc, char **argv, int flags .expire = mockExpire, }; - volatile_set *set = createVolatileSet(&type); + vset *set = createVolatileSet(&type); TEST_ASSERT(set != NULL); // Prepare entries with mixed expiry times, some duplicates @@ -207,7 +207,7 @@ int test_volatile_set_iterate_multiple_expiries(int argc, char **argv, int flags return 0; } -int test_volatile_set_add_and_remove_all(int argc, char **argv, int flags) { +int test_vset_add_and_remove_all(int argc, char **argv, int flags) { UNUSED(argc); UNUSED(argv); UNUSED(flags); @@ -218,7 +218,7 @@ int test_volatile_set_add_and_remove_all(int argc, char **argv, int flags) { .expire = mockExpire, }; - volatile_set *set = createVolatileSet(&type); + vset *set = createVolatileSet(&type); TEST_ASSERT(set != NULL); const int total_entries = 130; @@ -282,7 +282,7 @@ mock_entry *mock_entry_create(const char *keystr, long long expiry) { return mockCreateEntry(keystr, expiry); } -int insert_mock_entry(volatile_set *set) { +int insert_mock_entry(vset *set) { if (mock_entry_count >= MAX_ENTRIES) return 0; char keybuf[32]; snprintf(keybuf, sizeof(keybuf), "key_%d", rand()); @@ -295,7 +295,7 @@ int insert_mock_entry(volatile_set *set) { return 0; } -int update_mock_entry(volatile_set *set) { +int update_mock_entry(vset *set) { if (mock_entry_count == 0) return 0; int idx = rand() % mock_entry_count; mock_entry *old = mock_entries[idx]; @@ -308,7 +308,7 @@ int update_mock_entry(volatile_set *set) { return 0; } -int remove_mock_entry(volatile_set *set) { +int remove_mock_entry(vset *set) { if (mock_entry_count == 0) return 0; int idx = rand() % mock_entry_count; mock_entry *e = mock_entries[idx]; @@ -320,7 +320,7 @@ int remove_mock_entry(volatile_set *set) { return 0; } -int expire_mock_entries(volatile_set *set, mstime_t now) { +int expire_mock_entries(vset *set, mstime_t now) { void *entry; do { entry = volatileSetdPopExpired(set, now); @@ -342,7 +342,7 @@ int free_mock_entries(void) { } /* --------- Fuzzer Test --------- */ -int test_volatile_set_fuzzer(int argc, char **argv, int flags) { +int test_vset_fuzzer(int argc, char **argv, int flags) { UNUSED(argc); UNUSED(argv); UNUSED(flags); @@ -353,7 +353,7 @@ int test_volatile_set_fuzzer(int argc, char **argv, int flags) { .getExpiry = mock_entry_get_expiry, .expire = mock_entry_expire}; - volatile_set *set = createVolatileSet(&type); + vset *set = createVolatileSet(&type); for (int i = 0; i < NUM_ITERATIONS; i++) { int op = rand() % 4; diff --git a/src/volatile_set.c b/src/vset.c similarity index 94% rename from src/volatile_set.c rename to src/vset.c index 4a6ffdc715..0bf4caba3d 100644 --- a/src/volatile_set.c +++ b/src/vset.c @@ -1,5 +1,5 @@ #include -#include "volatile_set.h" +#include "vset.h" #include "rax.h" #include "zmalloc.h" #include "endianconv.h" @@ -448,7 +448,7 @@ static size_t encodeNewExpiryBucketKey(unsigned char *key, long long expiry) { * elem Pointer to the element to insert * cmp Comparison function (like strcmp-style: <0, ==0, >0) * returns the insertion index (between 0 and sv->len) */ -uint32_t _find_insert_position(volatile_set *set, vsetBucket *bucket, long long expiry) { +uint32_t _find_insert_position(vset *set, vsetBucket *bucket, long long expiry) { pointer_vector *sv = vsetBucketVector(bucket); uint32_t left = 0; uint32_t right = pv_len(sv); @@ -467,7 +467,7 @@ uint32_t _find_insert_position(volatile_set *set, vsetBucket *bucket, long long /* _find_split_position - Find the optimal split index in a sorted pointer vector * based on coarse (bucketed) expiry timestamps. * Arguments - * set: Pointer to the `volatile_set` containing the element type and expiry logic. + * set: Pointer to the `vset` containing the element type and expiry logic. * bucket: Pointer to a `vsetBucket` holding a sorted `pointer_vector` of elements. * split_ts: an optional pointer to a location to store the split timestamp, that is the position * belonging in the lower split vector with the largest expiration time. @@ -509,7 +509,7 @@ uint32_t _find_insert_position(volatile_set *set, vsetBucket *bucket, long long * This guarantees that each vector contains elements with the same bucket timestamp, * and no value in the first part maps to the same or later bucket as the second part. */ -uint32_t _find_split_position(volatile_set *set, vsetBucket *bucket, long long *split_ts_out) { +uint32_t _find_split_position(vset *set, vsetBucket *bucket, long long *split_ts_out) { pointer_vector *sv = vsetBucketVector(bucket); if (!sv || sv->len < 2) return sv->len; @@ -627,7 +627,7 @@ static void freeVsetBucket(void *entry) { } } -static bool splitBucketIfPossible(volatile_set *set, vsetBucket *bucket, long long bucket_ts, raxNode *node) { +static bool splitBucketIfPossible(vset *set, vsetBucket *bucket, long long bucket_ts, raxNode *node) { /* We can only split vector encoded buckets */ if (vsetBucketType(bucket) != VSET_BUCKET_VECTOR) { return false; @@ -674,13 +674,13 @@ static bool splitBucketIfPossible(volatile_set *set, vsetBucket *bucket, long lo return true; } -static inline vsetBucket *insertToBucket_NONE(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry) { +static inline vsetBucket *insertToBucket_NONE(vset *set, vsetBucket *bucket, void *entry, long long expiry) { UNUSED(set); UNUSED(expiry); return vsetBucketSetSingle(bucket, entry); } -static inline vsetBucket *insertToBucket_SINGLE(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry) { +static inline vsetBucket *insertToBucket_SINGLE(vset *set, vsetBucket *bucket, void *entry, long long expiry) { /* Upgrade to vector */ pointer_vector *sv = pv_new(2); void *curr_entry = vsetBucketSingle(bucket); @@ -696,7 +696,7 @@ static inline vsetBucket *insertToBucket_SINGLE(volatile_set *set, vsetBucket *b return bucket; } -static inline vsetBucket *insertToBucket_VECTOR(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry) { +static inline vsetBucket *insertToBucket_VECTOR(vset *set, vsetBucket *bucket, void *entry, long long expiry) { pointer_vector *pv = vsetBucketVector(bucket); /* limit of the number of elements in a vector. */ if (pv_len(pv) >= VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { @@ -717,7 +717,7 @@ static inline vsetBucket *insertToBucket_VECTOR(volatile_set *set, vsetBucket *b return NULL; } -static inline vsetBucket *insertToBucket_HASHTABLE(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry) { +static inline vsetBucket *insertToBucket_HASHTABLE(vset *set, vsetBucket *bucket, void *entry, long long expiry) { UNUSED(set); UNUSED(expiry); @@ -726,7 +726,7 @@ static inline vsetBucket *insertToBucket_HASHTABLE(volatile_set *set, vsetBucket return bucket; } -static inline vsetBucket *insertToBucket_RAX(volatile_set *set, vsetBucket *target, void *entry, long long expiry) { +static inline vsetBucket *insertToBucket_RAX(vset *set, vsetBucket *target, void *entry, long long expiry) { unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; size_t key_len; long long bucket_ts; @@ -778,7 +778,7 @@ static inline vsetBucket *insertToBucket_RAX(volatile_set *set, vsetBucket *targ return target; } -static inline vsetBucket *removeFromBucket_SINGLE(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { +static inline vsetBucket *removeFromBucket_SINGLE(vset *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { UNUSED(set); UNUSED(expiry); @@ -791,7 +791,7 @@ static inline vsetBucket *removeFromBucket_SINGLE(volatile_set *set, vsetBucket } } -static inline vsetBucket *removeFromBucket_VECTOR(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { +static inline vsetBucket *removeFromBucket_VECTOR(vset *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { UNUSED(set); UNUSED(expiry); @@ -823,7 +823,7 @@ static inline vsetBucket *removeFromBucket_VECTOR(volatile_set *set, vsetBucket return new_bucket; } -static inline vsetBucket *removeFromBucket_HASHTABLE(volatile_set *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { +static inline vsetBucket *removeFromBucket_HASHTABLE(vset *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { UNUSED(set); UNUSED(expiry); @@ -918,7 +918,7 @@ static inline int vsetBucketNext_RAX(volatileSetIterator *it, void **entryptr) { return 1; } -static bool raxBucketRemoveEntry(volatile_set *set, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { +static bool raxBucketRemoveEntry(vset *set, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { bool removed = false; switch (vsetBucketType(bucket)) { case VSET_BUCKET_SINGLE: @@ -960,7 +960,7 @@ static bool raxBucketRemoveEntry(volatile_set *set, void *entry, vsetBucket *buc return removed; } -static inline vsetBucket *removeFromBucket_RAX(volatile_set *set, vsetBucket *target, void *entry, long long expiry, bool *removed) { +static inline vsetBucket *removeFromBucket_RAX(vset *set, vsetBucket *target, void *entry, long long expiry, bool *removed) { unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; long long bucket_ts; size_t key_len; @@ -991,7 +991,7 @@ static inline vsetBucket *removeFromBucket_RAX(volatile_set *set, vsetBucket *ta return target; } -int volatileSetAddEntry(volatile_set *set, void *entry, long long expiry) { +int volatileSetAddEntry(vset *set, void *entry, long long expiry) { int bucket_type = vsetBucketType(set->expiry_buckets); switch (bucket_type) { case VSET_BUCKET_NONE: @@ -1041,7 +1041,7 @@ int volatileSetAddEntry(volatile_set *set, void *entry, long long expiry) { return 1; } -int volatileSetRemoveEntry(volatile_set *set, void *entry, long long expiry) { +int volatileSetRemoveEntry(vset *set, void *entry, long long expiry) { bool removed; vsetBucket *bucket = set->expiry_buckets; int bucket_type = vsetBucketType(bucket); @@ -1068,7 +1068,7 @@ int volatileSetRemoveEntry(volatile_set *set, void *entry, long long expiry) { return removed ? 1 : 0; } -int volatileSetUpdateEntry(volatile_set *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { +int volatileSetUpdateEntry(vset *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { if (old_entry == new_entry && old_expiry == new_expiry) return 1; @@ -1081,7 +1081,7 @@ int volatileSetUpdateEntry(volatile_set *set, void *old_entry, void *new_entry, return 1; } -static void *volatileSetGetFirstExpired(volatile_set *set, mstime_t now, bool delete) { +static void *volatileSetGetFirstExpired(vset *set, mstime_t now, bool delete) { int set_type = vsetBucketType(set->expiry_buckets); void *entry = NULL; long long expiry; @@ -1133,11 +1133,11 @@ static void *volatileSetGetFirstExpired(volatile_set *set, mstime_t now, bool de return entry; } -void *volatileSetdPopExpired(volatile_set *set, mstime_t now) { +void *volatileSetdPopExpired(vset *set, mstime_t now) { return volatileSetGetFirstExpired(set, now, true); } -void *volatileSetFirstExpired(volatile_set *set, mstime_t now) { +void *volatileSetFirstExpired(vset *set, mstime_t now) { return volatileSetGetFirstExpired(set, now, false); } @@ -1173,7 +1173,7 @@ int volatileSetNext(volatileSetIterator *it, void **entryptr) { return ret; } -void volatileSetStart(volatile_set *set, volatileSetIterator *it) { +void volatileSetStart(vset *set, volatileSetIterator *it) { it->iteration_state = VSET_BUCKET_NONE; /*lets start by going to the first bucket. */ it->bucket = set->expiry_buckets; it->bucket_ts = -1; @@ -1189,19 +1189,19 @@ void volatileSetReset(volatileSetIterator *it) { hashtableResetIterator(&it->hiter); } -volatile_set *createVolatileSet(volatileEntryType *type) { - volatile_set *set = zmalloc(sizeof(volatile_set)); +vset *createVolatileSet(volatileEntryType *type) { + vset *set = zmalloc(sizeof(vset)); set->etypr = type; set->expiry_buckets = vsetBucketSetNone(set->expiry_buckets); return set; } -void freeVolatileSet(volatile_set *set) { +void freeVolatileSet(vset *set) { if (!set) return; freeVsetBucket(set->expiry_buckets); zfree(set); } -bool volatileSetIsEmpty(volatile_set *set) { +bool volatileSetIsEmpty(vset *set) { return vsetBucketType(set->expiry_buckets) == VSET_BUCKET_NONE; } diff --git a/src/volatile_set.h b/src/vset.h similarity index 85% rename from src/volatile_set.h rename to src/vset.h index 505a187625..4abaf0fc7f 100644 --- a/src/volatile_set.h +++ b/src/vset.h @@ -15,7 +15,7 @@ * Volatile Set - Adaptive, Expiry-aware Set Structure *----------------------------------------------------------------------------- * - * The `volatile_set` is a dynamic, memory-efficient container for managing + * The `vset` is a dynamic, memory-efficient container for managing * entries with expiry semantics. It is designed to efficiently track entries * that expire at varying times and scales to large sets by adapting its internal * representation as it grows or shrinks. @@ -24,7 +24,7 @@ * Expiry Buckets and Pointer Tagging *----------------------------------------------------------------------------- * - * Internally, the `volatile_set` maintains a single `vsetBucket*` pointer, + * Internally, the `vset` maintains a single `vsetBucket*` pointer, * which can point to different types of buckets depending on the number of * entries and the needed resolution. The pointer is tagged using the lowest 3 bits: * @@ -153,25 +153,25 @@ *----------------------------------------------------------------------------- * * Create/Free: - * volatile_set *createVolatileSet(volatileEntryType *type); - * void freeVolatileSet(volatile_set *set); + * vset *createVolatileSet(volatileEntryType *type); + * void freeVolatileSet(vset *set); * * Mutation: - * int volatileSetAddEntry(volatile_set *set, void *entry, long long expiry); - * int volatileSetRemoveEntry(volatile_set *set, void *entry, long long expiry); - * int volatileSetUpdateEntry(volatile_set *set, void *old_entry, + * int volatileSetAddEntry(vset *set, void *entry, long long expiry); + * int volatileSetRemoveEntry(vset *set, void *entry, long long expiry); + * int volatileSetUpdateEntry(vset *set, void *old_entry, * void *new_entry, long long old_expiry, * long long new_expiry); * * Expiry Retrieval: - * void *volatileSetFirstExpired(volatile_set *set, mstime_t now); - * void *volatileSetdPopExpired(volatile_set *set, mstime_t now); + * void *volatileSetFirstExpired(vset *set, mstime_t now); + * void *volatileSetdPopExpired(vset *set, mstime_t now); * * Utilities: - * bool volatileSetIsEmpty(volatile_set *set); + * bool volatileSetIsEmpty(vset *set); * * Iteration: - * void volatileSetStart(volatile_set *set, volatileSetIterator *it); + * void volatileSetStart(vset *set, volatileSetIterator *it); * int volatileSetNext(volatileSetIterator *it, void **entryptr); * void volatileSetReset(volatileSetIterator *it); * @@ -213,7 +213,7 @@ typedef void vsetBucket; typedef struct { volatileEntryType *etypr; vsetBucket *expiry_buckets; -} volatile_set; +} vset; typedef struct volatileSetIterator { /* for rax bucket */ @@ -236,17 +236,17 @@ typedef struct volatileSetIterator { int iteration_state; } volatileSetIterator; -int volatileSetRemoveEntry(volatile_set *set, void *entry, long long expiry); -int volatileSetAddEntry(volatile_set *set, void *entry, long long expiry); -void *volatileSetdPopExpired(volatile_set *set, mstime_t now); -void *volatileSetFirstExpired(volatile_set *set, mstime_t now); -int volatileSetUpdateEntry(volatile_set *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); -bool volatileSetIsEmpty(volatile_set *set); -void volatileSetStart(volatile_set *set, volatileSetIterator *it); +int volatileSetRemoveEntry(vset *set, void *entry, long long expiry); +int volatileSetAddEntry(vset *set, void *entry, long long expiry); +void *volatileSetdPopExpired(vset *set, mstime_t now); +void *volatileSetFirstExpired(vset *set, mstime_t now); +int volatileSetUpdateEntry(vset *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); +bool volatileSetIsEmpty(vset *set); +void volatileSetStart(vset *set, volatileSetIterator *it); int volatileSetNext(volatileSetIterator *it, void **entryptr); void volatileSetReset(volatileSetIterator *it); -void freeVolatileSet(volatile_set *b); -volatile_set *createVolatileSet(volatileEntryType *type); +void freeVolatileSet(vset *b); +vset *createVolatileSet(volatileEntryType *type); #endif From e1ace279c1143c79b40d3e79836269b0028bdae9 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 29 Jun 2025 10:14:45 +0300 Subject: [PATCH 024/119] 1. change to vset 2. change pointer_vector to pVecotr 3. multiple pr comments fix Signed-off-by: Ran Shidlansik --- src/object.c | 4 +- src/server.h | 2 +- src/t_hash.c | 20 +-- src/unit/test_files.h | 9 +- src/unit/test_vest.c | 52 +++--- src/vset.c | 378 +++++++++++++++++++++++------------------- src/vset.h | 128 ++++++++++---- 7 files changed, 354 insertions(+), 239 deletions(-) diff --git a/src/object.c b/src/object.c index 301b963ea4..61f9a09c01 100644 --- a/src/object.c +++ b/src/object.c @@ -1129,9 +1129,9 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { if (o->type == OBJ_STRING) { if (o->encoding == OBJ_ENCODING_INT) { - asize = sizeof(*o); + asize = zmalloc_size((void *)o); } else if (o->encoding == OBJ_ENCODING_RAW) { - asize = sdsAllocSize(o->ptr) + sizeof(*o); + asize = sdsAllocSize(o->ptr) + zmalloc_size((void *)o); } else if (o->encoding == OBJ_ENCODING_EMBSTR) { asize = zmalloc_size((void *)o); } else { diff --git a/src/server.h b/src/server.h index 6cc1db1d27..b81bd221fb 100644 --- a/src/server.h +++ b/src/server.h @@ -2597,7 +2597,7 @@ typedef struct { unsigned char *fptr, *vptr; hashtableIterator iter; - volatileSetIterator viter; + vsetIterator viter; void *next; } hashTypeIterator; diff --git a/src/t_hash.c b/src/t_hash.c index b7aa6d29dd..87cec9c8d5 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -100,15 +100,15 @@ static void hashTypeDeleteVolatileSet(robj *o) { void hashTypeTrackEntry(robj *o, void *entry) { vset *set = hashTypeGetOrcreateVolatileSet(o); - serverAssert(volatileSetAddEntry(set, entry, entryGetExpiry(entry))); + serverAssert(vsetAddEntry(set, entry, entryGetExpiry(entry))); } void hashTypeUntrackEntry(robj *o, void *entry) { if (!entryHasExpiry(entry)) return; vset *set = hashTypeGetVolatileSet(o); debugServerAssert(set); - serverAssert(volatileSetRemoveEntry(set, entry, entryGetExpiry(entry))); - if (volatileSetIsEmpty(set)) { + serverAssert(vsetRemoveEntry(set, entry, entryGetExpiry(entry))); + if (vsetIsEmpty(set)) { hashTypeDeleteVolatileSet(o); } } @@ -124,15 +124,15 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, debugServerAssert(set); if (old_tracked && !new_tracked) - serverAssert(volatileSetRemoveEntry(set, old_entry, old_expiry)); + serverAssert(vsetRemoveEntry(set, old_entry, old_expiry)); else if (new_tracked && !old_tracked) - serverAssert(volatileSetAddEntry(set, new_entry, new_expiry)); + serverAssert(vsetAddEntry(set, new_entry, new_expiry)); else { vset *set = hashTypeGetVolatileSet(o); debugServerAssert(set); - serverAssert(volatileSetUpdateEntry(set, old_entry, new_entry, old_expiry, new_expiry) == 1); + serverAssert(vsetUpdateEntry(set, old_entry, new_entry, old_expiry, new_expiry) == 1); } - if (volatileSetIsEmpty(set)) { + if (vsetIsEmpty(set)) { hashTypeDeleteVolatileSet(o); } } @@ -584,7 +584,7 @@ void hashTypeInitVolatileIterator(robj *subject, hashTypeIterator *hi) { if (hi->encoding == OBJ_ENCODING_LISTPACK) { return; } else if (hi->encoding == OBJ_ENCODING_HASHTABLE) { - volatileSetStart(hashTypeGetVolatileSet(subject), &hi->viter); + vsetStart(hashTypeGetVolatileSet(subject), &hi->viter); } else { serverPanic("Unknown hash encoding"); } @@ -595,7 +595,7 @@ void hashTypeResetIterator(hashTypeIterator *hi) { if (!hi->volatile_items_iter) hashtableResetIterator(&hi->iter); else - volatileSetReset(&hi->viter); + vsetStop(&hi->viter); } } @@ -635,7 +635,7 @@ int hashTypeNext(hashTypeIterator *hi) { if (!hi->volatile_items_iter) { if (!hashtableNext(&hi->iter, &hi->next)) return C_ERR; } else { - if (!volatileSetNext(&hi->viter, &hi->next)) return C_ERR; + if (!vsetNext(&hi->viter, &hi->next)) return C_ERR; } } else { serverPanic("Unknown hash encoding"); diff --git a/src/unit/test_files.h b/src/unit/test_files.h index 8dbe63f452..f0f3ef8aa0 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -206,6 +206,11 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags); int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags); int test_vset_add_and_remove_all(int argc, char **argv, int flags); int test_vset_fuzzer(int argc, char **argv, int flags); +int test_vset_add_and_iterate(int argc, char **argv, int flags); +int test_vset_large_batch_same_expiry(int argc, char **argv, int flags); +int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags); +int test_vset_add_and_remove_all(int argc, char **argv, int flags); +int test_vset_fuzzer(int argc, char **argv, int flags); int test_ziplistCreateIntList(int argc, char **argv, int flags); int test_ziplistPop(int argc, char **argv, int flags); int test_ziplistGetElementAtIndex3(int argc, char **argv, int flags); @@ -267,7 +272,7 @@ unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}}; unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}}; unitTest __test_valkey_strtod_c[] = {{"test_valkey_strtod", test_valkey_strtod}, {NULL, NULL}}; unitTest __test_vector_c[] = {{"test_vector", test_vector}, {NULL, NULL}}; -unitTest __test_vset_c[] = {{"test_vset_add_and_iterate", test_vset_add_and_iterate}, {"test_vset_large_batch_same_expiry", test_vset_large_batch_same_expiry}, {"test_vset_iterate_multiple_expiries", test_vset_iterate_multiple_expiries}, {"test_vset_add_and_remove_all", test_vset_add_and_remove_all}, {"test_vset_fuzzer", test_vset_fuzzer}, {NULL, NULL}}; +unitTest __test_vest_c[] = {{"test_vset_add_and_iterate", test_vset_add_and_iterate}, {"test_vset_large_batch_same_expiry", test_vset_large_batch_same_expiry}, {"test_vset_iterate_multiple_expiries", test_vset_iterate_multiple_expiries}, {"test_vset_add_and_remove_all", test_vset_add_and_remove_all}, {"test_vset_fuzzer", test_vset_fuzzer}, {NULL, NULL}}; unitTest __test_ziplist_c[] = {{"test_ziplistCreateIntList", test_ziplistCreateIntList}, {"test_ziplistPop", test_ziplistPop}, {"test_ziplistGetElementAtIndex3", test_ziplistGetElementAtIndex3}, {"test_ziplistGetElementOutOfRange", test_ziplistGetElementOutOfRange}, {"test_ziplistGetLastElement", test_ziplistGetLastElement}, {"test_ziplistGetFirstElement", test_ziplistGetFirstElement}, {"test_ziplistGetElementOutOfRangeReverse", test_ziplistGetElementOutOfRangeReverse}, {"test_ziplistIterateThroughFullList", test_ziplistIterateThroughFullList}, {"test_ziplistIterateThroughListFrom1ToEnd", test_ziplistIterateThroughListFrom1ToEnd}, {"test_ziplistIterateThroughListFrom2ToEnd", test_ziplistIterateThroughListFrom2ToEnd}, {"test_ziplistIterateThroughStartOutOfRange", test_ziplistIterateThroughStartOutOfRange}, {"test_ziplistIterateBackToFront", test_ziplistIterateBackToFront}, {"test_ziplistIterateBackToFrontDeletingAllItems", test_ziplistIterateBackToFrontDeletingAllItems}, {"test_ziplistDeleteInclusiveRange0To0", test_ziplistDeleteInclusiveRange0To0}, {"test_ziplistDeleteInclusiveRange0To1", test_ziplistDeleteInclusiveRange0To1}, {"test_ziplistDeleteInclusiveRange1To2", test_ziplistDeleteInclusiveRange1To2}, {"test_ziplistDeleteWithStartIndexOutOfRange", test_ziplistDeleteWithStartIndexOutOfRange}, {"test_ziplistDeleteWithNumOverflow", test_ziplistDeleteWithNumOverflow}, {"test_ziplistDeleteFooWhileIterating", test_ziplistDeleteFooWhileIterating}, {"test_ziplistReplaceWithSameSize", test_ziplistReplaceWithSameSize}, {"test_ziplistReplaceWithDifferentSize", test_ziplistReplaceWithDifferentSize}, {"test_ziplistRegressionTestForOver255ByteStrings", test_ziplistRegressionTestForOver255ByteStrings}, {"test_ziplistRegressionTestDeleteNextToLastEntries", test_ziplistRegressionTestDeleteNextToLastEntries}, {"test_ziplistCreateLongListAndCheckIndices", test_ziplistCreateLongListAndCheckIndices}, {"test_ziplistCompareStringWithZiplistEntries", test_ziplistCompareStringWithZiplistEntries}, {"test_ziplistMergeTest", test_ziplistMergeTest}, {"test_ziplistStressWithRandomPayloadsOfDifferentEncoding", test_ziplistStressWithRandomPayloadsOfDifferentEncoding}, {"test_ziplistCascadeUpdateEdgeCases", test_ziplistCascadeUpdateEdgeCases}, {"test_ziplistInsertEdgeCase", test_ziplistInsertEdgeCase}, {"test_ziplistStressWithVariableSize", test_ziplistStressWithVariableSize}, {"test_BenchmarkziplistFind", test_BenchmarkziplistFind}, {"test_BenchmarkziplistIndex", test_BenchmarkziplistIndex}, {"test_BenchmarkziplistValidateIntegrity", test_BenchmarkziplistValidateIntegrity}, {"test_BenchmarkziplistCompareWithString", test_BenchmarkziplistCompareWithString}, {"test_BenchmarkziplistCompareWithNumber", test_BenchmarkziplistCompareWithNumber}, {"test_ziplistStress__ziplistCascadeUpdate", test_ziplistStress__ziplistCascadeUpdate}, {NULL, NULL}}; unitTest __test_zipmap_c[] = {{"test_zipmapIterateWithLargeKey", test_zipmapIterateWithLargeKey}, {"test_zipmapIterateThroughElements", test_zipmapIterateThroughElements}, {NULL, NULL}}; unitTest __test_zmalloc_c[] = {{"test_zmallocInitialUsedMemory", test_zmallocInitialUsedMemory}, {"test_zmallocAllocReallocCallocAndFree", test_zmallocAllocReallocCallocAndFree}, {"test_zmallocAllocZeroByteAndFree", test_zmallocAllocZeroByteAndFree}, {NULL, NULL}}; @@ -295,7 +300,7 @@ struct unitTestSuite { {"test_util.c", __test_util_c}, {"test_valkey_strtod.c", __test_valkey_strtod_c}, {"test_vector.c", __test_vector_c}, - {"test_vset.c", __test_vset_c}, + {"test_vest.c", __test_vest_c}, {"test_ziplist.c", __test_ziplist_c}, {"test_zipmap.c", __test_zipmap_c}, {"test_zmalloc.c", __test_zmalloc_c}, diff --git a/src/unit/test_vest.c b/src/unit/test_vest.c index d973f02409..dc407749b0 100644 --- a/src/unit/test_vest.c +++ b/src/unit/test_vest.c @@ -62,24 +62,24 @@ int test_vset_add_and_iterate(int argc, char **argv, int flags) { mock_entry *e1 = mockCreateEntry("item1", 123); mock_entry *e2 = mockCreateEntry("item2", 456); - TEST_ASSERT(volatileSetAddEntry(set, e1, mockGetExpiry(e1))); - TEST_ASSERT(volatileSetAddEntry(set, e2, mockGetExpiry(e2))); + TEST_ASSERT(vsetAddEntry(set, e1, mockGetExpiry(e1))); + TEST_ASSERT(vsetAddEntry(set, e2, mockGetExpiry(e2))); - TEST_ASSERT(!volatileSetIsEmpty(set)); + TEST_ASSERT(!vsetIsEmpty(set)); - volatileSetIterator it; - volatileSetStart(set, &it); + vsetIterator it; + vsetStart(set, &it); void *entry; int count = 0; - while (volatileSetNext(&it, &entry)) { + while (vsetNext(&it, &entry)) { TEST_EXPECT(entry != NULL); count++; } TEST_ASSERT(count == 2); - volatileSetReset(&it); + vsetStop(&it); freeVolatileSet(set); mockFreeEntry(e1); mockFreeEntry(e2); @@ -114,26 +114,26 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { char key_buf[32]; snprintf(key_buf, sizeof(key_buf), "entry_%d", i); entries[i] = mockCreateEntry(key_buf, expiry_time); - TEST_ASSERT(volatileSetAddEntry(set, entries[i], expiry_time)); + TEST_ASSERT(vsetAddEntry(set, entries[i], expiry_time)); } // Verify set is not empty - TEST_ASSERT(!volatileSetIsEmpty(set)); + TEST_ASSERT(!vsetIsEmpty(set)); // Iterate all entries and count them - volatileSetIterator it; - volatileSetStart(set, &it); + vsetIterator it; + vsetStart(set, &it); void *entry; int count = 0; - while (volatileSetNext(&it, &entry)) { + while (vsetNext(&it, &entry)) { TEST_EXPECT(entry != NULL); count++; } TEST_ASSERT(count == total_entries); // Cleanup - volatileSetReset(&it); + vsetStop(&it); freeVolatileSet(set); for (int i = 0; i < total_entries; i++) { @@ -169,17 +169,17 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { snprintf(key_buf, sizeof(key_buf), "entry_%d", i); long long expiry_time = rand() % 10000; entries[i] = mockCreateEntry(key_buf, expiry_time); - TEST_ASSERT(volatileSetAddEntry(set, entries[i], expiry_time)); + TEST_ASSERT(vsetAddEntry(set, entries[i], expiry_time)); } - volatileSetIterator it; - volatileSetStart(set, &it); + vsetIterator it; + vsetStart(set, &it); int found[5] = {0}; int total = 0; void *entry; - while (volatileSetNext(&it, &entry)) { + while (vsetNext(&it, &entry)) { TEST_EXPECT(entry != NULL); mock_entry *e = (mock_entry *)entry; @@ -199,7 +199,7 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { TEST_EXPECT(found[i]); } - volatileSetReset(&it); + vsetStop(&it); freeVolatileSet(set); for (int i = 0; i < 5; i++) mockFreeEntry(entries[i]); @@ -229,15 +229,15 @@ int test_vset_add_and_remove_all(int argc, char **argv, int flags) { char key[32]; snprintf(key, sizeof(key), "key_%d", i); entries[i] = mockCreateEntry(key, expiry); - TEST_ASSERT(volatileSetAddEntry(set, entries[i], expiry)); + TEST_ASSERT(vsetAddEntry(set, entries[i], expiry)); } for (int i = 0; i < total_entries; i++) { - TEST_ASSERT(volatileSetRemoveEntry(set, entries[i], expiry)); + TEST_ASSERT(vsetRemoveEntry(set, entries[i], expiry)); mockFreeEntry(entries[i]); } - TEST_ASSERT(volatileSetIsEmpty(set)); + TEST_ASSERT(vsetIsEmpty(set)); freeVolatileSet(set); TEST_PRINT_INFO("Add/remove %d entries, set size now 0", total_entries); @@ -290,7 +290,7 @@ int insert_mock_entry(vset *set) { long long expiry = rand() % 10000 + 100; mock_entry *e = mock_entry_create(keybuf, expiry); // printf("adding entry %p with expiry %llu\n", e, expiry); - TEST_ASSERT(volatileSetAddEntry(set, e, expiry)); + TEST_ASSERT(vsetAddEntry(set, e, expiry)); mock_entries[mock_entry_count++] = e; return 0; } @@ -304,7 +304,7 @@ int update_mock_entry(vset *set) { mock_entry *updated = mockEntryUpdate(old, new_expiry); mock_entries[idx] = updated; // printf("Update entry %p with entry %p with old expiry %llu new expiry %llu\n", old, updated, old_expiry, new_expiry); - TEST_ASSERT(volatileSetUpdateEntry(set, old, updated, old_expiry, new_expiry)); + TEST_ASSERT(vsetUpdateEntry(set, old, updated, old_expiry, new_expiry)); return 0; } @@ -313,7 +313,7 @@ int remove_mock_entry(vset *set) { int idx = rand() % mock_entry_count; mock_entry *e = mock_entries[idx]; // printf("removing entry %p with expiry %llu\n", e, mockGetExpiry(e)); - TEST_ASSERT(volatileSetRemoveEntry(set, e, mockGetExpiry(e))); + TEST_ASSERT(vsetRemoveEntry(set, e, mockGetExpiry(e))); mockFreeEntry(e); mock_entries[idx] = mock_entries[--mock_entry_count]; @@ -323,7 +323,7 @@ int remove_mock_entry(vset *set) { int expire_mock_entries(vset *set, mstime_t now) { void *entry; do { - entry = volatileSetdPopExpired(set, now); + entry = vsetPopExpired(set, now); if (entry) { // printf("pop expire entry %p with expiry %llu now: %llu\n", entry, mockGetExpiry(entry), now); TEST_ASSERT(mockGetExpiry(entry) <= now); @@ -377,7 +377,7 @@ int test_vset_fuzzer(int argc, char **argv, int flags) { } /* now expire all the entries and check that we have no entries left */ expire_mock_entries(set, LONG_LONG_MAX); - TEST_ASSERT(volatileSetIsEmpty(set) && mock_entry_count == 0); + TEST_ASSERT(vsetIsEmpty(set) && mock_entry_count == 0); freeVolatileSet(set); free_mock_entries(); /* Just in case */ return 0; diff --git a/src/vset.c b/src/vset.c index 0bf4caba3d..5d203b8707 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1,49 +1,50 @@ -#include #include "vset.h" #include "rax.h" -#include "zmalloc.h" #include "endianconv.h" #include "serverassert.h" #include "hashtable.h" -#include "server.h" +#include "util.h" +#include "zmalloc.h" + +#include #include /************************************************************************************************************* - * pointer_vector Implementation + * pVector Implementation *************************************************************************************************************/ #define PV_CARD_BITS 30 #define PV_ALLOC_BITS 34 #define PV_MAX_ELEMENTS ((1ULL << PV_CARD_BITS) - 1) -#define PV_HEADER_SIZE (sizeof(pointer_vector)) +#define PV_HEADER_SIZE (sizeof(pVector)) #define PV_ELEM_SIZE (sizeof(void *)) #define PV_ALLOC(pv) (pv ? pv->alloc : 0) #define PV_LEN(pv) (pv ? pv->len : 0) -#define PV_USED_SIZE(pv) (pv ? (PV_HEADER_SIZE + (pv_len(pv)) * PV_ELEM_SIZE) : 0) +#define PV_USED_SIZE(pv) (pv ? (PV_HEADER_SIZE + (pvLen(pv)) * PV_ELEM_SIZE) : 0) /* Custom vector structure with embedded allocation and length counters */ typedef struct { - uint64_t len : 30; /* Number of elements */ - uint64_t alloc : 34; /* Allocated capacity */ + uint64_t len : 30; /* Number of elements (cardinality) */ + uint64_t alloc : 34; /* Allocated memory (zmalloc_size of the current vector allocation) */ void *data[]; /* Flexible array member */ -} pointer_vector; +} pVector; -/* Returns the number of elements currently stored in the pointer_vector. +/* Returns the number of elements currently stored in the pVector. * * Arguments: - * vec - The pointer_vector to query. + * vec - The pVector to query. * * Return: * The number of elements in the vector. * Note that a NULL is a !!!valid!!! vector - returns 0 if the vector is NULL. */ -static inline uint32_t pv_len(pointer_vector *vec) { +static inline uint32_t pvLen(pVector *vec) { return PV_LEN(vec); } -/* Ensures that a pointer_vector has enough capacity to hold additional elements. +/* Ensures that a pVector has enough capacity to hold additional elements. * - * This function guarantees that the given pointer_vector `sv` has at least enough - * allocated space to accommodate `capacity` more elements, growing it if necessary. + * This function guarantees that the given pVector `sv` has at least enough + * allocated space to accommodate `additional` more elements, growing it if necessary. * If the vector is currently `NULL`, it will be newly allocated. * * The allocation is handled using `zmalloc` or `zrealloc_usable`, depending on whether @@ -51,19 +52,19 @@ static inline uint32_t pv_len(pointer_vector *vec) { * reflect the actual allocated size. * * Arguments: - * sv - Pointer to an existing pointer_vector or NULL. - * capacity - The number of additional elements the vector should be able to accommodate. + * sv - Pointer to an existing pVector or NULL. + * additional - The number of additional elements the vector should be able to accommodate. * * Return: - * A pointer to the resized (or newly allocated) pointer_vector with sufficient capacity. + * A pointer to the resized (or newly allocated) pVector with sufficient capacity. * Returns NULL only if the allocation fails. * * Note: - * The `capacity` is the number of *additional* elements beyond the current length. + * The `additional` is the number of *additional* elements beyond the current length. * This function does not modify the vector's logical length (`len`), only its allocation. */ -pointer_vector *pv_grow_to_fit(pointer_vector *sv, size_t capacity) { - if (capacity == 0) return sv; - size_t required = PV_HEADER_SIZE + (PV_LEN(sv) + capacity) * PV_ELEM_SIZE; +pVector *pvMakeRoomFor(pVector *sv, size_t additional) { + if (additional == 0) return sv; + size_t required = PV_HEADER_SIZE + (PV_LEN(sv) + additional) * PV_ELEM_SIZE; if (PV_ALLOC(sv) >= required) return sv; if (!sv) { @@ -76,10 +77,10 @@ pointer_vector *pv_grow_to_fit(pointer_vector *sv, size_t capacity) { return sv; } -/* Shrinks a pointer_vector to release unused allocated memory. +/* Shrinks a pVector to release unused allocated memory. * * This function checks if the current allocation (`used`) for the given - * `pointer_vector` exceeds the memory actually required to store its elements. + * `pVector` exceeds the memory actually required to store its elements. * If so, it reallocates the vector to use only the needed memory, helping reduce * memory overhead and improve space efficiency. * @@ -88,10 +89,10 @@ pointer_vector *pv_grow_to_fit(pointer_vector *sv, size_t capacity) { * size (`alloc`) to reflect the new length. * * Arguments: - * sv - A pointer to the `pointer_vector` to shrink. + * sv - A pointer to the `pVector` to shrink. * * Return: - * A potentially reallocated `pointer_vector` with minimized memory usage, + * A potentially reallocated `pVector` with minimized memory usage, * or `NULL` if the input was `NULL`. * * This function does not change the logical contents of the vector. @@ -99,14 +100,14 @@ pointer_vector *pv_grow_to_fit(pointer_vector *sv, size_t capacity) { * is needed, the original pointer is returned unchanged. * * Example: - * pointer_vector *vec = pv_new(); + * pVector *vec = pvNew(); * // After some insertions and deletions - * vec = pv_shrink_to_fit(vec); */ -pointer_vector *pv_shrink_to_fit(pointer_vector *sv) { + * vec = pvShrinkToFit(vec); */ +pVector *pvShrinkToFit(pVector *sv) { if (!sv) return NULL; size_t used = PV_ALLOC(sv); - size_t required = pv_len(sv) == 0 ? 0 : PV_HEADER_SIZE + pv_len(sv) * PV_ELEM_SIZE; + size_t required = pvLen(sv) == 0 ? 0 : PV_HEADER_SIZE + pvLen(sv) * PV_ELEM_SIZE; if (used > required) { if (!required) { @@ -120,10 +121,10 @@ pointer_vector *pv_shrink_to_fit(pointer_vector *sv) { } /** - * pv_split - Splits a pointer_vector into two parts at a given index. + * pvSplit - Splits a pVector into two parts at a given index. * * Arguments: - * sv_ptr: A pointer to the pointer_vector* to split. This pointer is + * sv_ptr: A pointer to the pVector* to split. This pointer is * updated in-place to point to the left portion (elements [0..split_index-1]). * split_index: The index at which to split the vector. The resulting right * vector will contain elements [split_index..len-1]. @@ -137,11 +138,11 @@ pointer_vector *pv_shrink_to_fit(pointer_vector *sv) { * The vector is assumed to be densely packed and its elements are of type `void*`. * * Memory is allocated for the new right vector using `zmalloc`, and the unused - * portion of the original vector may be freed or shrunk via `pv_shrink_to_fit` + * portion of the original vector may be freed or shrunk via `pvShrinkToFit` * to optimize memory usage. * * Return: - * - A new pointer_vector containing the right split [split_index..len-1]. + * - A new pVector containing the right split [split_index..len-1]. * - `NULL` in the following cases: * • The input vector is `NULL`. * • The input vector has only 1 or fewer elements (nothing to split). @@ -158,7 +159,7 @@ pointer_vector *pv_shrink_to_fit(pointer_vector *sv) { * [A, B, C, D, E] * * Calling: - * pointer_vector *right = pv_split(&sv_ptr, 3); + * pVector *right = pvSplit(&sv_ptr, 3); * * Results in: * sv_ptr -> [A, B, C] @@ -166,8 +167,8 @@ pointer_vector *pv_shrink_to_fit(pointer_vector *sv) { * * If the split_index is 5 (i.e. the end), the function returns NULL and the * original vector is unchanged. */ -pointer_vector *pv_split(pointer_vector **sv_ptr, uint32_t split_index) { - pointer_vector *sv = *sv_ptr; +pVector *pvSplit(pVector **sv_ptr, uint32_t split_index) { + pVector *sv = *sv_ptr; // Handle edge cases: null or empty if (!sv || sv->len <= 1) return NULL; @@ -181,9 +182,9 @@ pointer_vector *pv_split(pointer_vector **sv_ptr, uint32_t split_index) { // Allocate new vector for right part size_t item_bytes = sizeof(void *); - size_t total_bytes = sizeof(pointer_vector) + right_len * item_bytes; + size_t total_bytes = sizeof(pVector) + right_len * item_bytes; size_t new_alloc; - pointer_vector *right = zmalloc_usable(total_bytes, &new_alloc); + pVector *right = zmalloc_usable(total_bytes, &new_alloc); right->alloc = new_alloc; right->len = right_len; @@ -192,46 +193,46 @@ pointer_vector *pv_split(pointer_vector **sv_ptr, uint32_t split_index) { // Shrink original vector sv->len = split_index; - *sv_ptr = pv_shrink_to_fit(sv); // Optional: shrink in-place to reduce memory + *sv_ptr = pvShrinkToFit(sv); // Optional: shrink in-place to reduce memory return right; } -/* Creates a new pointer_vector with the specified initial capacity. +/* Creates a new pVector with the specified initial capacity. * - * This function initializes a new pointer_vector capable of holding at least + * This function initializes a new pVector capable of holding at least * `capacity` elements. Internally, it delegates allocation and setup to - * `pv_grow_to_fit`, starting from a NULL vector. + * `pvMakeRoomFor`, starting from a NULL vector. * * Arguments: * capacity - The initial number of elements the vector should be able to store. * * Return: - * A pointer to the newly allocated pointer_vector. + * A pointer to the newly allocated pVector. * Note that a NULL is a !!valid!! cector which size is zero. * * Note: * The logical length (`len`) of the returned vector is initialized to 0. */ -pointer_vector *pv_new(uint32_t capacity) { - pointer_vector *new_vec = NULL; - return pv_grow_to_fit(new_vec, capacity); +pVector *pvNew(uint32_t capacity) { + pVector *new_vec = NULL; + return pvMakeRoomFor(new_vec, capacity); } -/* Inserts an element at the specified position in the pointer_vector. +/* Inserts an element at the specified position in the pVector. * * Ensures enough capacity for the new element, shifts elements to make space, * and inserts the given element at the desired position. * * Arguments: - * sv - The pointer_vector to insert into (can be NULL). + * sv - The pVector to insert into (can be NULL). * elem - The pointer to be inserted. * pos - The index at which to insert the element (must be ≤ sv->len). * * Return: - * The updated pointer_vector with the element inserted. */ -pointer_vector *pv_insert(pointer_vector *sv, void *elem, uint32_t pos) { - sv = pv_grow_to_fit(sv, 1); + * The updated pVector with the element inserted. */ +pVector *pvInsert(pVector *sv, void *elem, uint32_t pos) { + sv = pvMakeRoomFor(sv, 1); if (pos < sv->len) { memmove(&sv->data[pos + 1], &sv->data[pos], (sv->len - pos) * sizeof(void *)); @@ -242,19 +243,19 @@ pointer_vector *pv_insert(pointer_vector *sv, void *elem, uint32_t pos) { return sv; } -/* Removes the element at the specified index from the pointer_vector. +/* Removes the element at the specified index from the pVector. * * Shifts elements as necessary and optionally shrinks the vector if memory can be saved. * If this is the last element in the vector, the vector is freed and NULL is returned. * * Arguments: - * sv - The pointer_vector to remove from. + * sv - The pVector to remove from. * idx - The index of the element to remove (must be < sv->len). * * Return: - * The updated pointer_vector after removal. + * The updated pVector after removal. * Returns NULL if the last element was removed and the vector was freed. */ -pointer_vector *pv_removeAt(pointer_vector *sv, uint32_t idx) { +pVector *pvRemoveAt(pVector *sv, uint32_t idx) { if (!sv || sv->len == 0) return sv; assert(idx < sv->len); if (sv->len == 1) { @@ -264,59 +265,59 @@ pointer_vector *pv_removeAt(pointer_vector *sv, uint32_t idx) { } else if (idx < sv->len - 1UL) memmove(&sv->data[idx], &sv->data[idx + 1], (sv->len - idx - 1) * PV_ELEM_SIZE); sv->len--; - return pv_shrink_to_fit(sv); + return pvShrinkToFit(sv); } -/* Removes the first matching element from the pointer_vector. +/* Removes the first matching element from the pVector. * * Performs a linear search for the given pointer and removes the first match. * Updates the vector pointer in case a removal was done. * * Arguments: - * sv - A pointer to the location of the pointer_vector to remove from. + * sv - A pointer to the location of the pVector to remove from. * elem - The element pointer to match and remove. * * Return: * true in case a removal was made, false otherwise */ -bool pv_remove(pointer_vector **psv, void *elem) { - pointer_vector *sv = *psv; +bool pvRemove(pVector **psv, void *elem) { + pVector *sv = *psv; if (!sv || sv->len == 0) return false; for (uint32_t i = 0; i < sv->len; i++) { if (sv->data[i] == elem) { - *psv = pv_removeAt(sv, i); + *psv = pvRemoveAt(sv, i); return true; } } return false; } -/* Retrieves the element at the specified index in the pointer_vector. +/* Retrieves the element at the specified index in the pVector. * * Arguments: - * vec - The pointer_vector to retrieve from. + * vec - The pVector to retrieve from. * idx - The index of the element to access. * * Return: * A pointer to the element at the given index. * Returns NULL if the vector is NULL or the index is out of bounds. */ -void *pv_get(pointer_vector *vec, uint32_t idx) { +void *pvGet(pVector *vec, uint32_t idx) { if (!vec || idx >= vec->len) return NULL; return vec->data[idx]; } -/* Frees the memory used by the pointer_vector. +/* Frees the memory used by the pVector. * * Arguments: - * sv - The pointer_vector to free. + * sv - The pVector to free. * * Return: * None. */ -void pv_free(pointer_vector *sv) { +void pvFree(pVector *sv) { if (sv) zfree(sv); } -uint32_t pv_find(pointer_vector *sv, void *elem) { +uint32_t pvFind(pVector *sv, void *elem) { if (!sv || sv->len == 0) return 0; for (uint32_t i = 0; i < sv->len; i++) { @@ -327,7 +328,7 @@ uint32_t pv_find(pointer_vector *sv, void *elem) { return sv->len; } /************************************************************************************************************* - * pointer_vector End + * pVector End *************************************************************************************************************/ #define VSET_BUCKET_NONE -1 // matching the NULL case #define VSET_BUCKET_SINGLE 0x1UL // xx1 (assuming sds) @@ -354,9 +355,9 @@ static inline void *vsetBucketRawPtr(vsetBucket *b) { } // Accessors with type assertions -static inline pointer_vector *vsetBucketVector(vsetBucket *b) { +static inline pVector *vsetBucketVector(vsetBucket *b) { assert(vsetBucketType(b) == VSET_BUCKET_VECTOR); - return (pointer_vector *)vsetBucketRawPtr(b); + return (pVector *)vsetBucketRawPtr(b); } static inline hashtable *vsetBucketHashtable(vsetBucket *b) { @@ -379,7 +380,7 @@ static inline vsetBucket *vsetBucketSetType(vsetBucket *b, int type) { return (vsetBucket *)(p | (type & VSET_TAG_MASK)); } -static inline vsetBucket *vsetBucketSetVector(vsetBucket *b, pointer_vector *vec) { +static inline vsetBucket *vsetBucketSetVector(vsetBucket *b, pVector *vec) { UNUSED(b); return vsetBucketSetType(vec, VSET_BUCKET_VECTOR); } @@ -432,7 +433,7 @@ static inline long long decodeExpiryKey(unsigned char *key) { return res; } -static size_t encodeNewExpiryBucketKey(unsigned char *key, long long expiry) { +static inline size_t encodeNewExpiryBucketKey(unsigned char *key, long long expiry) { long long bucket_ts = get_max_bucket_ts(expiry); long long be_ts = htonu64(bucket_ts); size_t size = sizeof(be_ts); @@ -448,13 +449,13 @@ static size_t encodeNewExpiryBucketKey(unsigned char *key, long long expiry) { * elem Pointer to the element to insert * cmp Comparison function (like strcmp-style: <0, ==0, >0) * returns the insertion index (between 0 and sv->len) */ -uint32_t _find_insert_position(vset *set, vsetBucket *bucket, long long expiry) { - pointer_vector *sv = vsetBucketVector(bucket); +static inline uint32_t findInsertPosition(vset *set, vsetBucket *bucket, long long expiry) { + pVector *pv = vsetBucketVector(bucket); uint32_t left = 0; - uint32_t right = pv_len(sv); + uint32_t right = pvLen(pv); while (left < right) { uint32_t mid = (left + right) / 2; - int res = EXPIRE_COMPARE(expiry, set->etypr->getExpiry(sv->data[mid])); + int res = EXPIRE_COMPARE(expiry, set->etypr->getExpiry(pv->data[mid])); if (res <= 0) right = mid; else @@ -464,11 +465,11 @@ uint32_t _find_insert_position(vset *set, vsetBucket *bucket, long long expiry) return left; // Final position to insert the element } -/* _find_split_position - Find the optimal split index in a sorted pointer vector +/* findSplitPosition - Find the optimal split index in a sorted pointer vector * based on coarse (bucketed) expiry timestamps. * Arguments * set: Pointer to the `vset` containing the element type and expiry logic. - * bucket: Pointer to a `vsetBucket` holding a sorted `pointer_vector` of elements. + * bucket: Pointer to a `vsetBucket` holding a sorted `pVector` of elements. * split_ts: an optional pointer to a location to store the split timestamp, that is the position * belonging in the lower split vector with the largest expiration time. * @@ -502,35 +503,33 @@ uint32_t _find_insert_position(vset *set, vsetBucket *bucket, long long expiry) * And `get_bucket_ts()` maps them to: * [1300, 1300, 1300, 5000, 5000] * - * Then `_find_split_position(set, bucket)` returns 3, resulting in: + * Then `findSplitPosition(set, bucket)` returns 3, resulting in: * First part: [1234, 1235, 1236] (bucket 1300) * Second part: [4567, 4568] (bucket 5000) * * This guarantees that each vector contains elements with the same bucket timestamp, * and no value in the first part maps to the same or later bucket as the second part. */ -uint32_t _find_split_position(vset *set, vsetBucket *bucket, long long *split_ts_out) { - pointer_vector *sv = vsetBucketVector(bucket); - - if (!sv || sv->len < 2) return sv->len; +static uint32_t findSplitPosition(vset *set, vsetBucket *bucket, long long *split_ts_out) { + pVector *pv = vsetBucketVector(bucket); - if (!sv || sv->len < 2) return sv ? sv->len : 0; + if (!pv || pv->len < 2) return pv ? pv->len : 0; uint32_t left = 1; - uint32_t right = sv->len - 1; - uint32_t best_split = sv->len; - uint32_t mid_closest_to_center = sv->len / 2; + uint32_t right = pv->len - 1; + uint32_t best_split = pv->len; + uint32_t mid_closest_to_center = pv->len / 2; long long best_split_ts = 0; while (left <= right) { uint32_t mid = (left + right) / 2; - long long prev_ts = get_bucket_ts(set->etypr->getExpiry(pv_get(sv, mid - 1))); - long long curr_ts = get_bucket_ts(set->etypr->getExpiry(pv_get(sv, mid))); + long long prev_ts = get_bucket_ts(set->etypr->getExpiry(pvGet(pv, mid - 1))); + long long curr_ts = get_bucket_ts(set->etypr->getExpiry(pvGet(pv, mid))); if (prev_ts != curr_ts) { // Check if closer to center - if (best_split == sv->len || + if (best_split == pv->len || abs((int)mid - (int)mid_closest_to_center) < abs((int)best_split - (int)mid_closest_to_center)) { best_split = mid; best_split_ts = prev_ts; @@ -542,9 +541,9 @@ uint32_t _find_split_position(vset *set, vsetBucket *bucket, long long *split_ts } if (split_ts_out) { - *split_ts_out = best_split != sv->len + *split_ts_out = best_split != pv->len ? best_split_ts - : get_bucket_ts(set->etypr->getExpiry(pv_get(sv, sv->len - 1))); + : get_bucket_ts(set->etypr->getExpiry(pvGet(pv, pv->len - 1))); } return best_split; @@ -553,6 +552,49 @@ uint32_t _find_split_position(vset *set, vsetBucket *bucket, long long *split_ts #define VSET_BUCKET_KEY_LEN 8 +/* hash_pointer - Computes a high-quality 64-bit hash from a pointer value. + * + * This function is designed to produce a well-distributed hash from a memory + * pointer, avoiding the common pitfall of poor entropy due to pointer alignment. + * It uses a platform-dependent mixing strategy based on MurmurHash3 finalization + * constants, ensuring good avalanche behavior and low collision rates. + * + * For 32-bit systems: + * The function uses a reduced MurmurHash3 32-bit finalizer: + * - XORs and right shifts to mix higher-order bits into lower ones. + * - Multiplies by large constants to further spread the bits. + * + * Example transformation: + * x ^= x >> 16; + * x *= 0x85ebca6b; + * x ^= x >> 13; + * x *= 0xc2b2ae35; + * x ^= x >> 16; + * + * For 64-bit systems: + * The function uses MurmurHash3 64-bit finalizer constants: + * - These constants are chosen to maximize bit diffusion and avoid hash clustering. + * - This version benefits from the full 64-bit pointer space. + * + * Example transformation: + * x ^= x >> 33; + * x *= 0xff51afd7ed558ccdULL; + * x ^= x >> 33; + * x *= 0xc4ceb9fe1a85ec53ULL; + * x ^= x >> 33; + * + * Why this works: + * - Pointers tend to have low entropy in their lower bits (due to alignment). + * - A naive cast to integer leads to clustering and collisions in hash tables. + * - This function performs fast and effective bit mixing to reduce collisions. + * - Ideal for use in pointer-keyed hash tables, interning systems, or caches. + * + * Note: + * - This is not a cryptographic hash. It is suitable for fast, internal use only. + * - Returns a 64-bit hash value, even on 32-bit systems. + * + * Returns: + * A 64-bit hash value derived from the input pointer. */ static uint64_t hash_pointer(const void *ptr) { uintptr_t x = (uintptr_t)ptr; #if UINTPTR_MAX == 0xFFFFFFFF @@ -614,7 +656,7 @@ static void freeVsetBucket(void *entry) { // No internal memory to free break; case VSET_BUCKET_VECTOR: - pv_free(vsetBucketVector(bucket)); + pvFree(vsetBucketVector(bucket)); break; case VSET_BUCKET_HT: hashtableRelease(vsetBucketHashtable(bucket)); @@ -623,7 +665,7 @@ static void freeVsetBucket(void *entry) { raxFreeWithCallback(vsetBucketRax(bucket), freeVsetBucket); break; default: - serverPanic("Unknown volatile set type in freeVsetBucket"); + panic("Unknown volatile set type in freeVsetBucket"); } } @@ -636,28 +678,28 @@ static bool splitBucketIfPossible(vset *set, vsetBucket *bucket, long long bucke long long target_bucket_ts = bucket_ts; unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; vsetBucket *new_bucket = NULL; - pointer_vector *sv = vsetBucketVector(bucket); + pVector *sv = vsetBucketVector(bucket); rax *expiry_buckets = vsetBucketRax(set->expiry_buckets); - long long max_bucket_ts = get_bucket_ts(set->etypr->getExpiry(sv->data[pv_len(sv) - 1])); + long long max_bucket_ts = get_bucket_ts(set->etypr->getExpiry(sv->data[pvLen(sv) - 1])); long long min_bucket_ts = get_bucket_ts(set->etypr->getExpiry(sv->data[0])); if (max_bucket_ts < bucket_ts) { /* In case the bucket is already spanning over a larger window than needed, just place the bucket in a new place */ key_len = encodeExpiryKey(bucket_ts, key); - serverAssert(raxRemove(expiry_buckets, key, key_len, (void **)&new_bucket)); - serverAssert(new_bucket == bucket); + assert(raxRemove(expiry_buckets, key, key_len, (void **)&new_bucket)); + assert(new_bucket == bucket); target_bucket_ts = max_bucket_ts; } else if (min_bucket_ts != max_bucket_ts) { /* lets split the bucket. we know we can do it. */ - uint32_t split_index = _find_split_position(set, bucket, &target_bucket_ts); + uint32_t split_index = findSplitPosition(set, bucket, &target_bucket_ts); assert(target_bucket_ts < bucket_ts); - assert(split_index != pv_len(sv)); /* no way to split it ??? */ - pointer_vector *new_bucket_vector = vsetBucketVector(bucket); - bucket = vsetBucketSetVector(bucket, pv_split(&new_bucket_vector, split_index)); + assert(split_index != pvLen(sv)); /* no way to split it ??? */ + pVector *new_bucket_vector = vsetBucketVector(bucket); + bucket = vsetBucketSetVector(bucket, pvSplit(&new_bucket_vector, split_index)); new_bucket = vsetBucketSetVector(new_bucket, new_bucket_vector); - assert(pv_len(vsetBucketVector(new_bucket)) > 0); - assert(pv_len(vsetBucketVector(bucket)) > 0); + assert(pvLen(vsetBucketVector(new_bucket)) > 0); + assert(pvLen(vsetBucketVector(bucket)) > 0); /* modify the current bucket data pointer */ key_len = encodeExpiryKey(bucket_ts, key); /* In order to avoid rax override, we directly change the node data */ @@ -682,37 +724,37 @@ static inline vsetBucket *insertToBucket_NONE(vset *set, vsetBucket *bucket, voi static inline vsetBucket *insertToBucket_SINGLE(vset *set, vsetBucket *bucket, void *entry, long long expiry) { /* Upgrade to vector */ - pointer_vector *sv = pv_new(2); + pVector *sv = pvNew(2); void *curr_entry = vsetBucketSingle(bucket); long long curr_expiry = set->etypr->getExpiry(curr_entry); if (curr_expiry < expiry) { - sv = pv_insert(sv, curr_entry, 0); - sv = pv_insert(sv, entry, 1); + sv = pvInsert(sv, curr_entry, 0); + sv = pvInsert(sv, entry, 1); } else { - sv = pv_insert(sv, entry, 0); - sv = pv_insert(sv, curr_entry, 1); + sv = pvInsert(sv, entry, 0); + sv = pvInsert(sv, curr_entry, 1); } bucket = vsetBucketSetVector(bucket, sv); return bucket; } static inline vsetBucket *insertToBucket_VECTOR(vset *set, vsetBucket *bucket, void *entry, long long expiry) { - pointer_vector *pv = vsetBucketVector(bucket); + pVector *pv = vsetBucketVector(bucket); /* limit of the number of elements in a vector. */ - if (pv_len(pv) >= VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { + if (pvLen(pv) >= VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { // Upgrade to hashtable hashtable *ht = hashtableCreate(&pointerHashtableType); - for (uint32_t i = 0; i < pv_len(pv); i++) { - hashtableAdd(ht, pv_get(pv, i)); + for (uint32_t i = 0; i < pvLen(pv); i++) { + hashtableAdd(ht, pvGet(pv, i)); } - pv_free(pv); + pvFree(pv); /* Add the new entry as well */ hashtableAdd(ht, entry); return vsetBucketSetHashtable(bucket, ht); } else { - uint32_t pos = _find_insert_position(set, bucket, expiry); - return vsetBucketSetVector(bucket, pv_insert(pv, entry, pos)); + uint32_t pos = findInsertPosition(set, bucket, expiry); + return vsetBucketSetVector(bucket, pvInsert(pv, entry, pos)); } return NULL; } @@ -749,8 +791,8 @@ static inline vsetBucket *insertToBucket_RAX(vset *set, vsetBucket *target, void // alternative: raxInsert(expiry_buckets, key, key_len, bucket, NULL); raxSetData(node, bucket); } else if (type == VSET_BUCKET_VECTOR) { - pointer_vector *sv = vsetBucketVector(bucket); - if (pv_len(sv) == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { + pVector *sv = vsetBucketVector(bucket); + if (pvLen(sv) == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { /* Try to split the bucket. If not possible switch to hashtable encoding. */ if (!splitBucketIfPossible(set, bucket, bucket_ts, node)) { /* Can't split? insrt to the vector anyway, it will just expand to hashtable */ @@ -773,7 +815,7 @@ static inline vsetBucket *insertToBucket_RAX(vset *set, vsetBucket *target, void } else if (vsetBucketType(bucket) == VSET_BUCKET_HT) { bucket = insertToBucket_HASHTABLE(set, bucket, entry, expiry); } else { - serverPanic("Unknown bucket type in insertToBucket_RAX"); + panic("Unknown bucket type in insertToBucket_RAX"); } return target; } @@ -797,24 +839,24 @@ static inline vsetBucket *removeFromBucket_VECTOR(vset *set, vsetBucket *bucket, vsetBucket *new_bucket = bucket; bool success = false; - pointer_vector *sv = vsetBucketVector(bucket); + pVector *sv = vsetBucketVector(bucket); /* In case we we removed the entry */ - uint32_t vlen = pv_len(sv); + uint32_t vlen = pvLen(sv); if (vlen <= 2) { /* convert to single if needed */ - uint32_t idx = pv_find(sv, entry); + uint32_t idx = pvFind(sv, entry); if (idx == vlen) { success = false; } else { if (vlen == 1) new_bucket = vsetBucketSetNone(bucket); else - new_bucket = vsetBucketSetSingle(bucket, pv_get(sv, idx == 0 ? 1 : 0)); + new_bucket = vsetBucketSetSingle(bucket, pvGet(sv, idx == 0 ? 1 : 0)); success = true; - pv_free(sv); + pvFree(sv); } } else { - if (pv_remove(&sv, entry)) { + if (pvRemove(&sv, entry)) { success = true; new_bucket = vsetBucketSetVector(bucket, sv); } @@ -847,12 +889,12 @@ static inline vsetBucket *removeFromBucket_HASHTABLE(vset *set, vsetBucket *buck return new_bucket; } -static int vsetBucketNext_NONE(volatileSetIterator *it, void **entryptr) { +static int vsetBucketNext_NONE(vsetIterator *it, void **entryptr) { UNUSED(it); UNUSED(entryptr); return 0; } -static inline int vsetBucketNext_SINGLE(volatileSetIterator *it, void **entryptr) { +static inline int vsetBucketNext_SINGLE(vsetIterator *it, void **entryptr) { bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); if (init_bucket_scan) { it->iteration_state = VSET_BUCKET_SINGLE; @@ -862,17 +904,17 @@ static inline int vsetBucketNext_SINGLE(volatileSetIterator *it, void **entryptr } return 0; } -static inline int vsetBucketNext_VECTOR(volatileSetIterator *it, void **entryptr) { +static inline int vsetBucketNext_VECTOR(vsetIterator *it, void **entryptr) { bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); - pointer_vector *pv = vsetBucketVector(it->bucket); + pVector *pv = vsetBucketVector(it->bucket); if (init_bucket_scan) { it->iteration_state = VSET_BUCKET_VECTOR; it->viter = 0; } else { it->viter++; } - if (it->viter < pv_len(pv)) { - it->entry = pv_get(pv, it->viter); + if (it->viter < pvLen(pv)) { + it->entry = pvGet(pv, it->viter); } else { return 0; } @@ -880,7 +922,7 @@ static inline int vsetBucketNext_VECTOR(volatileSetIterator *it, void **entryptr return 1; } -static inline int vsetBucketNext_HASHTABLE(volatileSetIterator *it, void **entryptr) { +static inline int vsetBucketNext_HASHTABLE(vsetIterator *it, void **entryptr) { bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); hashtable *ht = vsetBucketHashtable(it->bucket); if (init_bucket_scan) { @@ -895,7 +937,7 @@ static inline int vsetBucketNext_HASHTABLE(volatileSetIterator *it, void **entry return 1; } -static inline int vsetBucketNext_RAX(volatileSetIterator *it, void **entryptr) { +static inline int vsetBucketNext_RAX(vsetIterator *it, void **entryptr) { bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); if (init_bucket_scan) { /* set myself as the parent bucket */ @@ -909,7 +951,7 @@ static inline int vsetBucketNext_RAX(volatileSetIterator *it, void **entryptr) { it->bucket_ts = decodeExpiryKey(it->riter.key); it->bucket = it->riter.data; it->iteration_state = VSET_BUCKET_NONE; - return volatileSetNext(it, entryptr); + return vsetNext(it, entryptr); } else { /* We currently do not support nested RAX buckets */ it->parent_bucket = vsetBucketSetNone(it->parent_bucket); @@ -954,7 +996,7 @@ static bool raxBucketRemoveEntry(vset *set, void *entry, vsetBucket *bucket, uns break; } default: - serverPanic("Unknown bucket type for raxBucketRemoveEntry"); + panic("Unknown bucket type for raxBucketRemoveEntry"); return false; } return removed; @@ -982,7 +1024,7 @@ static inline vsetBucket *removeFromBucket_RAX(vset *set, vsetBucket *target, vo /* We will not convert hashtable to our only bucket since we will lose the ability to scan the items in a sorted way. * We will also not shrink when we have a full vector, since it might immediately be repopulated. */ if (bucket_type == VSET_BUCKET_SINGLE || - (bucket_type == VSET_BUCKET_VECTOR && pv_len(vsetBucketVector(bucket)) < VOLATILESET_VECTOR_BUCKET_MAX_SIZE)) { + (bucket_type == VSET_BUCKET_VECTOR && pvLen(vsetBucketVector(bucket)) < VOLATILESET_VECTOR_BUCKET_MAX_SIZE)) { /* lets make our bucket to be the only left bucket */ target = bucket; raxFree(expiry_buckets); @@ -991,7 +1033,7 @@ static inline vsetBucket *removeFromBucket_RAX(vset *set, vsetBucket *target, vo return target; } -int volatileSetAddEntry(vset *set, void *entry, long long expiry) { +int vsetAddEntry(vset *set, void *entry, long long expiry) { int bucket_type = vsetBucketType(set->expiry_buckets); switch (bucket_type) { case VSET_BUCKET_NONE: @@ -1001,13 +1043,13 @@ int volatileSetAddEntry(vset *set, void *entry, long long expiry) { set->expiry_buckets = insertToBucket_SINGLE(set, set->expiry_buckets, entry, expiry); break; case VSET_BUCKET_VECTOR: { - pointer_vector *vec = vsetBucketVector(set->expiry_buckets); - uint32_t len = pv_len(vec); + pVector *vec = vsetBucketVector(set->expiry_buckets); + uint32_t len = pvLen(vec); /* in case the vector is full, we need to turn into RAX */ if (len == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { rax *r = raxNew(); - long long min_expiry = set->etypr->getExpiry(pv_get(vec, 0)); - long long max_expiry = set->etypr->getExpiry(pv_get(vec, len - 1)); + long long min_expiry = set->etypr->getExpiry(pvGet(vec, 0)); + long long max_expiry = set->etypr->getExpiry(pvGet(vec, len - 1)); if (get_max_bucket_ts(min_expiry) == get_max_bucket_ts(max_expiry)) { /* In case we can just insert the bucket, no need to iterate and insert it's elements. we can just push the bucket as a whole. */ unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; @@ -1019,11 +1061,11 @@ int volatileSetAddEntry(vset *set, void *entry, long long expiry) { /* We need to migrate entries to the new set of buckets since we do not know all entries are in the same bucket */ set->expiry_buckets = vsetBucketSetRax(set->expiry_buckets, r); for (uint32_t i = 0; i < len; i++) { - void *moved_entry = pv_get(vec, i); + void *moved_entry = pvGet(vec, i); set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, moved_entry, set->etypr->getExpiry(moved_entry)); } /* free the vector */ - pv_free(vec); + pvFree(vec); /* now insert the new entry to the buckets */ set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, entry, expiry); } @@ -1036,12 +1078,12 @@ int volatileSetAddEntry(vset *set, void *entry, long long expiry) { set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, entry, expiry); break; default: - serverPanic("Cannot insert to bucket which is not single, vector or rax"); + panic("Cannot insert to bucket which is not single, vector or rax"); } return 1; } -int volatileSetRemoveEntry(vset *set, void *entry, long long expiry) { +int vsetRemoveEntry(vset *set, void *entry, long long expiry) { bool removed; vsetBucket *bucket = set->expiry_buckets; int bucket_type = vsetBucketType(bucket); @@ -1062,26 +1104,26 @@ int volatileSetRemoveEntry(vset *set, void *entry, long long expiry) { bucket = removeFromBucket_RAX(set, bucket, entry, expiry, &removed); break; default: - serverPanic("Cannot insert to bucket which is not single, vector or rax"); + panic("Cannot insert to bucket which is not single, vector or rax"); } set->expiry_buckets = bucket; return removed ? 1 : 0; } -int volatileSetUpdateEntry(vset *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { +int vsetUpdateEntry(vset *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { if (old_entry == new_entry && old_expiry == new_expiry) return 1; if (old_entry && old_expiry != -1) - assert((volatileSetRemoveEntry(set, old_entry, old_expiry))); + assert((vsetRemoveEntry(set, old_entry, old_expiry))); if (new_entry && new_expiry != -1) - assert(volatileSetAddEntry(set, new_entry, new_expiry)); + assert(vsetAddEntry(set, new_entry, new_expiry)); return 1; } -static void *volatileSetGetFirstExpired(vset *set, mstime_t now, bool delete) { +static void *vsetGetFirstExpired(vset *set, mstime_t now, bool delete) { int set_type = vsetBucketType(set->expiry_buckets); void *entry = NULL; long long expiry; @@ -1090,11 +1132,11 @@ static void *volatileSetGetFirstExpired(vset *set, mstime_t now, bool delete) { return NULL; break; case VSET_BUCKET_RAX: { - volatileSetIterator iter; - volatileSetStart(set, &iter); + vsetIterator iter; + vsetStart(set, &iter); assert(vsetBucketNext_RAX(&iter, &entry)); long long bucket_ts = iter.bucket_ts; - volatileSetReset(&iter); + vsetStop(&iter); if (bucket_ts > now) return NULL; expiry = set->etypr->getExpiry(entry); @@ -1109,7 +1151,7 @@ static void *volatileSetGetFirstExpired(vset *set, mstime_t now, bool delete) { break; } case VSET_BUCKET_VECTOR: { - entry = pv_get(vsetBucketVector(set->expiry_buckets), 0); + entry = pvGet(vsetBucketVector(set->expiry_buckets), 0); expiry = set->etypr->getExpiry(entry); if (expiry > now) return NULL; @@ -1126,22 +1168,22 @@ static void *volatileSetGetFirstExpired(vset *set, mstime_t now, bool delete) { break; } default: - serverPanic("Unknown volatile set bucket type in volatileSetNext"); + panic("Unknown volatile set bucket type in vsetNext"); } if (delete) - assert(volatileSetRemoveEntry(set, entry, expiry)); + assert(vsetRemoveEntry(set, entry, expiry)); return entry; } -void *volatileSetdPopExpired(vset *set, mstime_t now) { - return volatileSetGetFirstExpired(set, now, true); +void *vsetPopExpired(vset *set, mstime_t now) { + return vsetGetFirstExpired(set, now, true); } -void *volatileSetFirstExpired(vset *set, mstime_t now) { - return volatileSetGetFirstExpired(set, now, false); +void *vsetFirstExpired(vset *set, mstime_t now) { + return vsetGetFirstExpired(set, now, false); } -int volatileSetNext(volatileSetIterator *it, void **entryptr) { +int vsetNext(vsetIterator *it, void **entryptr) { vsetBucket *bucket = it->bucket; int bucket_type = vsetBucketType(bucket); int ret = 0; @@ -1162,25 +1204,25 @@ int volatileSetNext(volatileSetIterator *it, void **entryptr) { ret = vsetBucketNext_HASHTABLE(it, entryptr); break; default: - serverPanic("Unknown volatile set bucket type in volatileSetNext"); + panic("Unknown volatile set bucket type in vsetNext"); } if (ret == 0) { /* continue iterating the parent bucket */ it->iteration_state = vsetBucketType(it->parent_bucket); it->bucket = it->parent_bucket; - return volatileSetNext(it, entryptr); + return vsetNext(it, entryptr); } return ret; } -void volatileSetStart(vset *set, volatileSetIterator *it) { +void vsetStart(vset *set, vsetIterator *it) { it->iteration_state = VSET_BUCKET_NONE; /*lets start by going to the first bucket. */ it->bucket = set->expiry_buckets; it->bucket_ts = -1; it->parent_bucket = vsetBucketSetNone(it->parent_bucket); } -void volatileSetReset(volatileSetIterator *it) { +void vsetStop(vsetIterator *it) { int bucket_type = vsetBucketType(it->bucket); int parent_bucket_type = vsetBucketType(it->parent_bucket); if (parent_bucket_type == VSET_BUCKET_RAX) @@ -1202,6 +1244,6 @@ void freeVolatileSet(vset *set) { zfree(set); } -bool volatileSetIsEmpty(vset *set) { +bool vsetIsEmpty(vset *set) { return vsetBucketType(set->expiry_buckets) == VSET_BUCKET_NONE; } diff --git a/src/vset.h b/src/vset.h index 4abaf0fc7f..6331abc7e6 100644 --- a/src/vset.h +++ b/src/vset.h @@ -64,20 +64,88 @@ * → it is promoted to a `VECTOR` bucket (sorted by expiry). * 3. If the `VECTOR` exceeds `VOLATILESET_VECTOR_BUCKET_MAX_SIZE` (127): * → the set becomes a `RAX`, and existing entries are migrated. + * 4. IF the set is using RAX encoding it will locate a bucket to add the entry + * following the strategy explained below. * *----------------------------------------------------------------------------- * RAX Bucket and Dynamic Splitting *----------------------------------------------------------------------------- * - * A `VSET_BUCKET_RAX` bucket stores multiple time-aligned buckets in a radix tree. - * Each key in the RAX represents the **end timestamp** of a bucket window. + * Each bucket in the RAX bucket corresponds to a **time window**, defined by + * its bucket timestamp (`bucket_ts`). This timestamp represents the **END** of + * the time window. Entries in the bucket must expire *before* this timestamp. * - * When a bucket in RAX becomes full (vector limit exceeded): - * - The vector is split into two parts using a **binary search** to find an optimal - * split point where the expiry bucket timestamp changes. - * - Two new buckets are created and inserted back into the RAX with their new - * aligned timestamps as keys. - * - If entries cannot be split (all in same window), the bucket is promoted to HT. + * Time windows are defined in granular ranges: + * - Minimum granularity: VOLATILESET_BUCKET_INTERVAL_MIN (16 ms) + * - Maximum granularity: VOLATILESET_BUCKET_INTERVAL_MAX (8192 ms) + * + * A bucket can only contain entries that: + * 1. Have expiry < bucket_ts + * 2. Do not fit into any bucket with a smaller timestamp (i.e., earlier window) + * + * The structure allows multiple encodings: + * VSET_BUCKET_SINGLE - A single pointer to one entry. + * VSET_BUCKET_VECTOR - A sorted vector of pointers (up to 127 entries). + * VSET_BUCKET_HT - A hashtable used when vectors become too dense. + * + * Bucket Timestamp (END of window): + * + * |------------------ Bucket Span ------------------| + * [window_start .................................. bucket_ts) + * + * ASCII Layout Example: + * + * Timeline: ---> increasing time ---> + * +------+---------+--------+ + * | B0 | B1 | B2 | + * | ts=32| ts=128 | ts=2048| + * +------+---------+--------+ + * ^ ^ ^ + * | | | + * [E1,E2] ∈ B0 [E3...E7] ∈ B1 [E8...] ∈ B2 + * All entries expire BEFORE their bucket_ts + * + * Bucket Splitting Strategy: + * ---------------------------------- + * + * When a bucket (e.g. VECTOR) becomes too dense or needs realignment: + * + * 1. **Re-align to lower granularity:** + * - Adjust the bucket timestamp down to a finer granularity (e.g. 16ms). + * - Only done if ALL entries still fit in the tighter window. + * - Effectively “moves” the bucket to an earlier timestamp. + * + * Example: B(ts=128, span=128ms) → B(ts=64, span=16ms) + * + * 2. **Split into two buckets:** + * - Use binary search to find a “natural” boundary based on entry expiry. + * - Original bucket retains its timestamp (but holds fewer entries). + * - New bucket is inserted before the current one with its own tighter timestamp. + * + * Example: + * + * Before: + * [ Entry0 ... Entry126 ] → B(ts=128) + * + * After Split: + * [ Entry0...Entry62 ] → New B(ts=64) + * [ Entry63...Entry126 ] → Original B(ts=128) + * + * 3. **Convert to hashtable:** + * - When no clean split is found (e.g. all entries share similar expiry), + * and realignment is not possible. + * - This allows efficient O(1) lookups even with clustered expiry values. + * + * Vector B(ts=128) → Hashtable B(ts=128) + * + * This hierarchical design ensures: + * - Efficient memory usage (tight buckets) + * - Predictable iteration by expiry time + * - Low overhead insertions & deletions + * - Graceful promotion & demotion of bucket types + * + * NOTE: Buckets are always sorted by their `bucket_ts` in the radix tree (RAX), + * which allows efficient search for insertion/removal based on expiry. * *----------------------------------------------------------------------------- * RAX Bucket Layout @@ -157,23 +225,23 @@ * void freeVolatileSet(vset *set); * * Mutation: - * int volatileSetAddEntry(vset *set, void *entry, long long expiry); - * int volatileSetRemoveEntry(vset *set, void *entry, long long expiry); - * int volatileSetUpdateEntry(vset *set, void *old_entry, + * int vsetAddEntry(vset *set, void *entry, long long expiry); + * int vsetRemoveEntry(vset *set, void *entry, long long expiry); + * int vsetUpdateEntry(vset *set, void *old_entry, * void *new_entry, long long old_expiry, * long long new_expiry); * * Expiry Retrieval: - * void *volatileSetFirstExpired(vset *set, mstime_t now); - * void *volatileSetdPopExpired(vset *set, mstime_t now); + * void *vsetFirstExpired(vset *set, mstime_t now); + * void *vsetPopExpired(vset *set, mstime_t now); * * Utilities: - * bool volatileSetIsEmpty(vset *set); + * bool vsetIsEmpty(vset *set); * * Iteration: - * void volatileSetStart(vset *set, volatileSetIterator *it); - * int volatileSetNext(volatileSetIterator *it, void **entryptr); - * void volatileSetReset(volatileSetIterator *it); + * void vsetStart(vset *set, vsetIterator *it); + * int vsetNext(vsetIterator *it, void **entryptr); + * void vsetStop(vsetIterator *it); * *----------------------------------------------------------------------------- * Iteration Support @@ -181,7 +249,7 @@ * * Iterator structure maintains context across all bucket types: * - * typedef struct volatileSetIterator { + * typedef struct vsetIterator { * raxIterator riter; // for RAX * hashtableIterator hiter; // for HT * uint32_t viter; // for VECTOR @@ -191,7 +259,7 @@ * void *entry; // current entry * long long bucket_ts; // for RAX * int iteration_state; // internal FSM - * } volatileSetIterator; + * } vsetIterator; * */ #define VOLATILESET_BUCKET_INTERVAL_MAX (1LL << 13LL) // 2^13 = 8192 milliseconds @@ -215,7 +283,7 @@ typedef struct { vsetBucket *expiry_buckets; } vset; -typedef struct volatileSetIterator { +typedef struct vsetIterator { /* for rax bucket */ raxIterator riter; /* for hashtable bucket */ @@ -234,17 +302,17 @@ typedef struct volatileSetIterator { long long bucket_ts; /* the state of the iteration */ int iteration_state; -} volatileSetIterator; +} vsetIterator; -int volatileSetRemoveEntry(vset *set, void *entry, long long expiry); -int volatileSetAddEntry(vset *set, void *entry, long long expiry); -void *volatileSetdPopExpired(vset *set, mstime_t now); -void *volatileSetFirstExpired(vset *set, mstime_t now); -int volatileSetUpdateEntry(vset *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); -bool volatileSetIsEmpty(vset *set); -void volatileSetStart(vset *set, volatileSetIterator *it); -int volatileSetNext(volatileSetIterator *it, void **entryptr); -void volatileSetReset(volatileSetIterator *it); +int vsetRemoveEntry(vset *set, void *entry, long long expiry); +int vsetAddEntry(vset *set, void *entry, long long expiry); +void *vsetPopExpired(vset *set, mstime_t now); +void *vsetFirstExpired(vset *set, mstime_t now); +int vsetUpdateEntry(vset *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); +bool vsetIsEmpty(vset *set); +void vsetStart(vset *set, vsetIterator *it); +int vsetNext(vsetIterator *it, void **entryptr); +void vsetStop(vsetIterator *it); void freeVolatileSet(vset *b); vset *createVolatileSet(volatileEntryType *type); From 1f03adc51a07558c5affa3e8f17ed8f785cc2432 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 29 Jun 2025 18:34:32 +0300 Subject: [PATCH 025/119] fix vset unittest name run Signed-off-by: Ran Shidlansik --- src/unit/test_files.h | 9 ++------- src/unit/{test_vest.c => test_vset.c} | 3 --- 2 files changed, 2 insertions(+), 10 deletions(-) rename src/unit/{test_vest.c => test_vset.c} (99%) diff --git a/src/unit/test_files.h b/src/unit/test_files.h index f0f3ef8aa0..8dbe63f452 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -206,11 +206,6 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags); int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags); int test_vset_add_and_remove_all(int argc, char **argv, int flags); int test_vset_fuzzer(int argc, char **argv, int flags); -int test_vset_add_and_iterate(int argc, char **argv, int flags); -int test_vset_large_batch_same_expiry(int argc, char **argv, int flags); -int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags); -int test_vset_add_and_remove_all(int argc, char **argv, int flags); -int test_vset_fuzzer(int argc, char **argv, int flags); int test_ziplistCreateIntList(int argc, char **argv, int flags); int test_ziplistPop(int argc, char **argv, int flags); int test_ziplistGetElementAtIndex3(int argc, char **argv, int flags); @@ -272,7 +267,7 @@ unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}}; unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}}; unitTest __test_valkey_strtod_c[] = {{"test_valkey_strtod", test_valkey_strtod}, {NULL, NULL}}; unitTest __test_vector_c[] = {{"test_vector", test_vector}, {NULL, NULL}}; -unitTest __test_vest_c[] = {{"test_vset_add_and_iterate", test_vset_add_and_iterate}, {"test_vset_large_batch_same_expiry", test_vset_large_batch_same_expiry}, {"test_vset_iterate_multiple_expiries", test_vset_iterate_multiple_expiries}, {"test_vset_add_and_remove_all", test_vset_add_and_remove_all}, {"test_vset_fuzzer", test_vset_fuzzer}, {NULL, NULL}}; +unitTest __test_vset_c[] = {{"test_vset_add_and_iterate", test_vset_add_and_iterate}, {"test_vset_large_batch_same_expiry", test_vset_large_batch_same_expiry}, {"test_vset_iterate_multiple_expiries", test_vset_iterate_multiple_expiries}, {"test_vset_add_and_remove_all", test_vset_add_and_remove_all}, {"test_vset_fuzzer", test_vset_fuzzer}, {NULL, NULL}}; unitTest __test_ziplist_c[] = {{"test_ziplistCreateIntList", test_ziplistCreateIntList}, {"test_ziplistPop", test_ziplistPop}, {"test_ziplistGetElementAtIndex3", test_ziplistGetElementAtIndex3}, {"test_ziplistGetElementOutOfRange", test_ziplistGetElementOutOfRange}, {"test_ziplistGetLastElement", test_ziplistGetLastElement}, {"test_ziplistGetFirstElement", test_ziplistGetFirstElement}, {"test_ziplistGetElementOutOfRangeReverse", test_ziplistGetElementOutOfRangeReverse}, {"test_ziplistIterateThroughFullList", test_ziplistIterateThroughFullList}, {"test_ziplistIterateThroughListFrom1ToEnd", test_ziplistIterateThroughListFrom1ToEnd}, {"test_ziplistIterateThroughListFrom2ToEnd", test_ziplistIterateThroughListFrom2ToEnd}, {"test_ziplistIterateThroughStartOutOfRange", test_ziplistIterateThroughStartOutOfRange}, {"test_ziplistIterateBackToFront", test_ziplistIterateBackToFront}, {"test_ziplistIterateBackToFrontDeletingAllItems", test_ziplistIterateBackToFrontDeletingAllItems}, {"test_ziplistDeleteInclusiveRange0To0", test_ziplistDeleteInclusiveRange0To0}, {"test_ziplistDeleteInclusiveRange0To1", test_ziplistDeleteInclusiveRange0To1}, {"test_ziplistDeleteInclusiveRange1To2", test_ziplistDeleteInclusiveRange1To2}, {"test_ziplistDeleteWithStartIndexOutOfRange", test_ziplistDeleteWithStartIndexOutOfRange}, {"test_ziplistDeleteWithNumOverflow", test_ziplistDeleteWithNumOverflow}, {"test_ziplistDeleteFooWhileIterating", test_ziplistDeleteFooWhileIterating}, {"test_ziplistReplaceWithSameSize", test_ziplistReplaceWithSameSize}, {"test_ziplistReplaceWithDifferentSize", test_ziplistReplaceWithDifferentSize}, {"test_ziplistRegressionTestForOver255ByteStrings", test_ziplistRegressionTestForOver255ByteStrings}, {"test_ziplistRegressionTestDeleteNextToLastEntries", test_ziplistRegressionTestDeleteNextToLastEntries}, {"test_ziplistCreateLongListAndCheckIndices", test_ziplistCreateLongListAndCheckIndices}, {"test_ziplistCompareStringWithZiplistEntries", test_ziplistCompareStringWithZiplistEntries}, {"test_ziplistMergeTest", test_ziplistMergeTest}, {"test_ziplistStressWithRandomPayloadsOfDifferentEncoding", test_ziplistStressWithRandomPayloadsOfDifferentEncoding}, {"test_ziplistCascadeUpdateEdgeCases", test_ziplistCascadeUpdateEdgeCases}, {"test_ziplistInsertEdgeCase", test_ziplistInsertEdgeCase}, {"test_ziplistStressWithVariableSize", test_ziplistStressWithVariableSize}, {"test_BenchmarkziplistFind", test_BenchmarkziplistFind}, {"test_BenchmarkziplistIndex", test_BenchmarkziplistIndex}, {"test_BenchmarkziplistValidateIntegrity", test_BenchmarkziplistValidateIntegrity}, {"test_BenchmarkziplistCompareWithString", test_BenchmarkziplistCompareWithString}, {"test_BenchmarkziplistCompareWithNumber", test_BenchmarkziplistCompareWithNumber}, {"test_ziplistStress__ziplistCascadeUpdate", test_ziplistStress__ziplistCascadeUpdate}, {NULL, NULL}}; unitTest __test_zipmap_c[] = {{"test_zipmapIterateWithLargeKey", test_zipmapIterateWithLargeKey}, {"test_zipmapIterateThroughElements", test_zipmapIterateThroughElements}, {NULL, NULL}}; unitTest __test_zmalloc_c[] = {{"test_zmallocInitialUsedMemory", test_zmallocInitialUsedMemory}, {"test_zmallocAllocReallocCallocAndFree", test_zmallocAllocReallocCallocAndFree}, {"test_zmallocAllocZeroByteAndFree", test_zmallocAllocZeroByteAndFree}, {NULL, NULL}}; @@ -300,7 +295,7 @@ struct unitTestSuite { {"test_util.c", __test_util_c}, {"test_valkey_strtod.c", __test_valkey_strtod_c}, {"test_vector.c", __test_vector_c}, - {"test_vest.c", __test_vest_c}, + {"test_vset.c", __test_vset_c}, {"test_ziplist.c", __test_ziplist_c}, {"test_zipmap.c", __test_zipmap_c}, {"test_zmalloc.c", __test_zmalloc_c}, diff --git a/src/unit/test_vest.c b/src/unit/test_vset.c similarity index 99% rename from src/unit/test_vest.c rename to src/unit/test_vset.c index dc407749b0..efc8967e38 100644 --- a/src/unit/test_vest.c +++ b/src/unit/test_vset.c @@ -45,7 +45,6 @@ static int mockExpire(void *db, void *o, void *entry) { } int test_vset_add_and_iterate(int argc, char **argv, int flags) { - return 0; (void)argc; (void)argv; (void)flags; @@ -89,7 +88,6 @@ int test_vset_add_and_iterate(int argc, char **argv, int flags) { } int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { - return 0; (void)argc; (void)argv; (void)flags; @@ -146,7 +144,6 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { } int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { - return 0; (void)argc; (void)argv; (void)flags; From 05a731c9179bb6dcab2fce482ba9316a29253b0f Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 29 Jun 2025 18:36:19 +0300 Subject: [PATCH 026/119] replace vset "virtual table" with explicit function parameters to get entry expiry. This is the first change in order to reduce the vset default memory consumption. Although this complicates the API, it allows reducing the memory footprint per each hash object using the set. The next potential step is to make the vset a pure bucket pointer so that it will not use any extra memory. I intentionally separated these changes in order for us to be able to decide if "sacrifice" API friendly is better than consuming more memory Signed-off-by: Ran Shidlansik --- src/t_hash.c | 18 ++---- src/unit/test_vset.c | 69 +++++----------------- src/vset.c | 134 ++++++++++++++++++++++--------------------- src/vset.h | 15 ++--- 4 files changed, 97 insertions(+), 139 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 87cec9c8d5..4af1028bde 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -42,12 +42,6 @@ #include #include "entry.h" - -volatileEntryType hashVolatileEntryType = { - .entryGetKey = (sds(*)(const void *entry))entryGetField, - .getExpiry = (long long (*)(const void *entry))entryGetExpiry, -}; - /*----------------------------------------------------------------------------- * Hash type Expiry API *----------------------------------------------------------------------------*/ @@ -83,7 +77,7 @@ static vset *hashTypeGetOrcreateVolatileSet(robj *o) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); vset **vset_ref = hashtableMetadata(o->ptr); if (*vset_ref == NULL) { - *vset_ref = createVolatileSet(&hashVolatileEntryType); + *vset_ref = createVolatileSet(); /* serves mainly for optimization. Use type which supports access function only when needed. */ hashTypeIgnoreTTL(o, false); } @@ -100,14 +94,14 @@ static void hashTypeDeleteVolatileSet(robj *o) { void hashTypeTrackEntry(robj *o, void *entry) { vset *set = hashTypeGetOrcreateVolatileSet(o); - serverAssert(vsetAddEntry(set, entry, entryGetExpiry(entry))); + serverAssert(vsetAddEntry(set, entryGetExpiry, entry, entryGetExpiry(entry))); } void hashTypeUntrackEntry(robj *o, void *entry) { if (!entryHasExpiry(entry)) return; vset *set = hashTypeGetVolatileSet(o); debugServerAssert(set); - serverAssert(vsetRemoveEntry(set, entry, entryGetExpiry(entry))); + serverAssert(vsetRemoveEntry(set, entryGetExpiry, entry, entryGetExpiry(entry))); if (vsetIsEmpty(set)) { hashTypeDeleteVolatileSet(o); } @@ -124,13 +118,13 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, debugServerAssert(set); if (old_tracked && !new_tracked) - serverAssert(vsetRemoveEntry(set, old_entry, old_expiry)); + serverAssert(vsetRemoveEntry(set, entryGetExpiry, old_entry, old_expiry)); else if (new_tracked && !old_tracked) - serverAssert(vsetAddEntry(set, new_entry, new_expiry)); + serverAssert(vsetAddEntry(set, entryGetExpiry, new_entry, new_expiry)); else { vset *set = hashTypeGetVolatileSet(o); debugServerAssert(set); - serverAssert(vsetUpdateEntry(set, old_entry, new_entry, old_expiry, new_expiry) == 1); + serverAssert(vsetUpdateEntry(set, entryGetExpiry, old_entry, new_entry, old_expiry, new_expiry) == 1); } if (vsetIsEmpty(set)) { hashTypeDeleteVolatileSet(o); diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c index efc8967e38..3ae4052d56 100644 --- a/src/unit/test_vset.c +++ b/src/unit/test_vset.c @@ -24,10 +24,6 @@ static mock_entry *mockEntryUpdate(mock_entry *entry, long long expiry) { return entryUpdate(entry, NULL, expiry); } -static sds mockGetKey(const void *entry) { - return (sds)entry; -} - static long long mockGetExpiry(const void *entry) { return entryGetExpiry(entry); } @@ -37,32 +33,19 @@ static void mockFreeEntry(void *entry) { entryFree(entry); } -static int mockExpire(void *db, void *o, void *entry) { - (void)db; - (void)o; - (void)entry; - return 1; -} - int test_vset_add_and_iterate(int argc, char **argv, int flags) { (void)argc; (void)argv; (void)flags; - volatileEntryType type = { - .entryGetKey = mockGetKey, - .getExpiry = mockGetExpiry, - .expire = mockExpire, - }; - - vset *set = createVolatileSet(&type); + vset *set = createVolatileSet(); TEST_ASSERT(set != NULL); mock_entry *e1 = mockCreateEntry("item1", 123); mock_entry *e2 = mockCreateEntry("item2", 456); - TEST_ASSERT(vsetAddEntry(set, e1, mockGetExpiry(e1))); - TEST_ASSERT(vsetAddEntry(set, e2, mockGetExpiry(e2))); + TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, e1, mockGetExpiry(e1))); + TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, e2, mockGetExpiry(e2))); TEST_ASSERT(!vsetIsEmpty(set)); @@ -92,13 +75,7 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { (void)argv; (void)flags; - volatileEntryType type = { - .entryGetKey = mockGetKey, - .getExpiry = mockGetExpiry, - .expire = mockExpire, - }; - - vset *set = createVolatileSet(&type); + vset *set = createVolatileSet(); TEST_ASSERT(set != NULL); const long long expiry_time = 1000LL; @@ -112,7 +89,7 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { char key_buf[32]; snprintf(key_buf, sizeof(key_buf), "entry_%d", i); entries[i] = mockCreateEntry(key_buf, expiry_time); - TEST_ASSERT(vsetAddEntry(set, entries[i], expiry_time)); + TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, entries[i], expiry_time)); } // Verify set is not empty @@ -148,13 +125,8 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { (void)argv; (void)flags; const unsigned int total_entries = 5; - volatileEntryType type = { - .entryGetKey = mockGetKey, - .getExpiry = mockGetExpiry, - .expire = mockExpire, - }; - vset *set = createVolatileSet(&type); + vset *set = createVolatileSet(); TEST_ASSERT(set != NULL); // Prepare entries with mixed expiry times, some duplicates @@ -166,7 +138,7 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { snprintf(key_buf, sizeof(key_buf), "entry_%d", i); long long expiry_time = rand() % 10000; entries[i] = mockCreateEntry(key_buf, expiry_time); - TEST_ASSERT(vsetAddEntry(set, entries[i], expiry_time)); + TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, entries[i], expiry_time)); } vsetIterator it; @@ -209,13 +181,7 @@ int test_vset_add_and_remove_all(int argc, char **argv, int flags) { UNUSED(argv); UNUSED(flags); - volatileEntryType type = { - .entryGetKey = mockGetKey, - .getExpiry = mockGetExpiry, - .expire = mockExpire, - }; - - vset *set = createVolatileSet(&type); + vset *set = createVolatileSet(); TEST_ASSERT(set != NULL); const int total_entries = 130; @@ -226,11 +192,11 @@ int test_vset_add_and_remove_all(int argc, char **argv, int flags) { char key[32]; snprintf(key, sizeof(key), "key_%d", i); entries[i] = mockCreateEntry(key, expiry); - TEST_ASSERT(vsetAddEntry(set, entries[i], expiry)); + TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, entries[i], expiry)); } for (int i = 0; i < total_entries; i++) { - TEST_ASSERT(vsetRemoveEntry(set, entries[i], expiry)); + TEST_ASSERT(vsetRemoveEntry(set, mockGetExpiry, entries[i], expiry)); mockFreeEntry(entries[i]); } @@ -287,7 +253,7 @@ int insert_mock_entry(vset *set) { long long expiry = rand() % 10000 + 100; mock_entry *e = mock_entry_create(keybuf, expiry); // printf("adding entry %p with expiry %llu\n", e, expiry); - TEST_ASSERT(vsetAddEntry(set, e, expiry)); + TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, e, expiry)); mock_entries[mock_entry_count++] = e; return 0; } @@ -301,7 +267,7 @@ int update_mock_entry(vset *set) { mock_entry *updated = mockEntryUpdate(old, new_expiry); mock_entries[idx] = updated; // printf("Update entry %p with entry %p with old expiry %llu new expiry %llu\n", old, updated, old_expiry, new_expiry); - TEST_ASSERT(vsetUpdateEntry(set, old, updated, old_expiry, new_expiry)); + TEST_ASSERT(vsetUpdateEntry(set, mockGetExpiry, old, updated, old_expiry, new_expiry)); return 0; } @@ -310,7 +276,7 @@ int remove_mock_entry(vset *set) { int idx = rand() % mock_entry_count; mock_entry *e = mock_entries[idx]; // printf("removing entry %p with expiry %llu\n", e, mockGetExpiry(e)); - TEST_ASSERT(vsetRemoveEntry(set, e, mockGetExpiry(e))); + TEST_ASSERT(vsetRemoveEntry(set, mockGetExpiry, e, mockGetExpiry(e))); mockFreeEntry(e); mock_entries[idx] = mock_entries[--mock_entry_count]; @@ -320,7 +286,7 @@ int remove_mock_entry(vset *set) { int expire_mock_entries(vset *set, mstime_t now) { void *entry; do { - entry = vsetPopExpired(set, now); + entry = vsetPopExpired(set, mockGetExpiry, now); if (entry) { // printf("pop expire entry %p with expiry %llu now: %llu\n", entry, mockGetExpiry(entry), now); TEST_ASSERT(mockGetExpiry(entry) <= now); @@ -345,12 +311,7 @@ int test_vset_fuzzer(int argc, char **argv, int flags) { UNUSED(flags); srand(time(NULL)); - volatileEntryType type = { - .entryGetKey = mock_entry_get_key, - .getExpiry = mock_entry_get_expiry, - .expire = mock_entry_expire}; - - vset *set = createVolatileSet(&type); + vset *set = createVolatileSet(); for (int i = 0; i < NUM_ITERATIONS; i++) { int op = rand() % 4; diff --git a/src/vset.c b/src/vset.c index 5d203b8707..9ea13e98ef 100644 --- a/src/vset.c +++ b/src/vset.c @@ -449,13 +449,13 @@ static inline size_t encodeNewExpiryBucketKey(unsigned char *key, long long expi * elem Pointer to the element to insert * cmp Comparison function (like strcmp-style: <0, ==0, >0) * returns the insertion index (between 0 and sv->len) */ -static inline uint32_t findInsertPosition(vset *set, vsetBucket *bucket, long long expiry) { +static inline uint32_t findInsertPosition(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, long long expiry) { pVector *pv = vsetBucketVector(bucket); uint32_t left = 0; uint32_t right = pvLen(pv); while (left < right) { uint32_t mid = (left + right) / 2; - int res = EXPIRE_COMPARE(expiry, set->etypr->getExpiry(pv->data[mid])); + int res = EXPIRE_COMPARE(expiry, getExpiry(pv->data[mid])); if (res <= 0) right = mid; else @@ -510,7 +510,7 @@ static inline uint32_t findInsertPosition(vset *set, vsetBucket *bucket, long lo * This guarantees that each vector contains elements with the same bucket timestamp, * and no value in the first part maps to the same or later bucket as the second part. */ -static uint32_t findSplitPosition(vset *set, vsetBucket *bucket, long long *split_ts_out) { +static uint32_t findSplitPosition(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, long long *split_ts_out) { pVector *pv = vsetBucketVector(bucket); if (!pv || pv->len < 2) return pv ? pv->len : 0; @@ -524,8 +524,8 @@ static uint32_t findSplitPosition(vset *set, vsetBucket *bucket, long long *spli while (left <= right) { uint32_t mid = (left + right) / 2; - long long prev_ts = get_bucket_ts(set->etypr->getExpiry(pvGet(pv, mid - 1))); - long long curr_ts = get_bucket_ts(set->etypr->getExpiry(pvGet(pv, mid))); + long long prev_ts = get_bucket_ts(getExpiry(pvGet(pv, mid - 1))); + long long curr_ts = get_bucket_ts(getExpiry(pvGet(pv, mid))); if (prev_ts != curr_ts) { // Check if closer to center @@ -543,7 +543,7 @@ static uint32_t findSplitPosition(vset *set, vsetBucket *bucket, long long *spli if (split_ts_out) { *split_ts_out = best_split != pv->len ? best_split_ts - : get_bucket_ts(set->etypr->getExpiry(pvGet(pv, pv->len - 1))); + : get_bucket_ts(getExpiry(pvGet(pv, pv->len - 1))); } return best_split; @@ -669,7 +669,7 @@ static void freeVsetBucket(void *entry) { } } -static bool splitBucketIfPossible(vset *set, vsetBucket *bucket, long long bucket_ts, raxNode *node) { +static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpiry, vsetBucket *bucket, long long bucket_ts, raxNode *node) { /* We can only split vector encoded buckets */ if (vsetBucketType(bucket) != VSET_BUCKET_VECTOR) { return false; @@ -679,9 +679,9 @@ static bool splitBucketIfPossible(vset *set, vsetBucket *bucket, long long bucke unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; vsetBucket *new_bucket = NULL; pVector *sv = vsetBucketVector(bucket); - rax *expiry_buckets = vsetBucketRax(set->expiry_buckets); - long long max_bucket_ts = get_bucket_ts(set->etypr->getExpiry(sv->data[pvLen(sv) - 1])); - long long min_bucket_ts = get_bucket_ts(set->etypr->getExpiry(sv->data[0])); + rax *expiry_buckets = vsetBucketRax(parent); + long long max_bucket_ts = get_bucket_ts(getExpiry(sv->data[pvLen(sv) - 1])); + long long min_bucket_ts = get_bucket_ts(getExpiry(sv->data[0])); if (max_bucket_ts < bucket_ts) { /* In case the bucket is already spanning over a larger window than needed, just place the bucket in a new place */ @@ -692,7 +692,7 @@ static bool splitBucketIfPossible(vset *set, vsetBucket *bucket, long long bucke } else if (min_bucket_ts != max_bucket_ts) { /* lets split the bucket. we know we can do it. */ - uint32_t split_index = findSplitPosition(set, bucket, &target_bucket_ts); + uint32_t split_index = findSplitPosition(getExpiry, bucket, &target_bucket_ts); assert(target_bucket_ts < bucket_ts); assert(split_index != pvLen(sv)); /* no way to split it ??? */ pVector *new_bucket_vector = vsetBucketVector(bucket); @@ -716,17 +716,17 @@ static bool splitBucketIfPossible(vset *set, vsetBucket *bucket, long long bucke return true; } -static inline vsetBucket *insertToBucket_NONE(vset *set, vsetBucket *bucket, void *entry, long long expiry) { - UNUSED(set); +static inline vsetBucket *insertToBucket_NONE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { + UNUSED(getExpiry); UNUSED(expiry); return vsetBucketSetSingle(bucket, entry); } -static inline vsetBucket *insertToBucket_SINGLE(vset *set, vsetBucket *bucket, void *entry, long long expiry) { +static inline vsetBucket *insertToBucket_SINGLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { /* Upgrade to vector */ pVector *sv = pvNew(2); void *curr_entry = vsetBucketSingle(bucket); - long long curr_expiry = set->etypr->getExpiry(curr_entry); + long long curr_expiry = getExpiry(curr_entry); if (curr_expiry < expiry) { sv = pvInsert(sv, curr_entry, 0); sv = pvInsert(sv, entry, 1); @@ -738,7 +738,7 @@ static inline vsetBucket *insertToBucket_SINGLE(vset *set, vsetBucket *bucket, v return bucket; } -static inline vsetBucket *insertToBucket_VECTOR(vset *set, vsetBucket *bucket, void *entry, long long expiry) { +static inline vsetBucket *insertToBucket_VECTOR(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { pVector *pv = vsetBucketVector(bucket); /* limit of the number of elements in a vector. */ if (pvLen(pv) >= VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { @@ -753,14 +753,14 @@ static inline vsetBucket *insertToBucket_VECTOR(vset *set, vsetBucket *bucket, v return vsetBucketSetHashtable(bucket, ht); } else { - uint32_t pos = findInsertPosition(set, bucket, expiry); + uint32_t pos = findInsertPosition(getExpiry, bucket, expiry); return vsetBucketSetVector(bucket, pvInsert(pv, entry, pos)); } return NULL; } -static inline vsetBucket *insertToBucket_HASHTABLE(vset *set, vsetBucket *bucket, void *entry, long long expiry) { - UNUSED(set); +static inline vsetBucket *insertToBucket_HASHTABLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { + UNUSED(getExpiry); UNUSED(expiry); hashtable *ht = vsetBucketHashtable(bucket); @@ -768,7 +768,7 @@ static inline vsetBucket *insertToBucket_HASHTABLE(vset *set, vsetBucket *bucket return bucket; } -static inline vsetBucket *insertToBucket_RAX(vset *set, vsetBucket *target, void *entry, long long expiry) { +static inline vsetBucket *insertToBucket_RAX(vsetGetExpiryFunc getExpiry, vsetBucket *target, void *entry, long long expiry) { unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; size_t key_len; long long bucket_ts; @@ -778,14 +778,14 @@ static inline vsetBucket *insertToBucket_RAX(vset *set, vsetBucket *target, void int type = vsetBucketType(bucket); if (type == VSET_BUCKET_NONE) { /* No bucket: create single-entry bucket */ - bucket = insertToBucket_NONE(set, bucket, entry, expiry); + bucket = insertToBucket_NONE(getExpiry, bucket, entry, expiry); assert(vsetBucketType(bucket) == VSET_BUCKET_SINGLE); size_t key_size = encodeNewExpiryBucketKey(key, expiry); raxInsert(expiry_buckets, key, key_size, bucket, NULL); return target; } else if (type == VSET_BUCKET_SINGLE) { /* Upgrade to vector */ - bucket = insertToBucket_SINGLE(set, bucket, entry, expiry); + bucket = insertToBucket_SINGLE(getExpiry, bucket, entry, expiry); assert(vsetBucketType(bucket) == VSET_BUCKET_VECTOR); /* In order to avoid rax override, we directly change the node data */ // alternative: raxInsert(expiry_buckets, key, key_len, bucket, NULL); @@ -794,34 +794,35 @@ static inline vsetBucket *insertToBucket_RAX(vset *set, vsetBucket *target, void pVector *sv = vsetBucketVector(bucket); if (pvLen(sv) == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { /* Try to split the bucket. If not possible switch to hashtable encoding. */ - if (!splitBucketIfPossible(set, bucket, bucket_ts, node)) { + if (!splitBucketIfPossible(target, getExpiry, bucket, bucket_ts, node)) { /* Can't split? insrt to the vector anyway, it will just expand to hashtable */ - bucket = insertToBucket_VECTOR(set, bucket, entry, expiry); + bucket = insertToBucket_VECTOR(getExpiry, bucket, entry, expiry); assert(vsetBucketType(bucket) == VSET_BUCKET_HT); /* In order to avoid rax override, we directly change the node data */ // alternative raxInsert(expiry_buckets, key, key_len, bucket, NULL); raxSetData(node, bucket); } else { /* we split the bucket. go and find again a bucket to place the entry since there can be new options now. */ - return insertToBucket_RAX(set, target, entry, expiry); + return insertToBucket_RAX(getExpiry, target, entry, expiry); } } else { - vsetBucket *new_bucket = insertToBucket_VECTOR(set, bucket, entry, expiry); + vsetBucket *new_bucket = insertToBucket_VECTOR(getExpiry, bucket, entry, expiry); if (new_bucket != bucket) /* In order to avoid rax override, we directly change the node data */ // alternative: raxInsert(expiry_buckets, key, key_len, new_bucket, NULL); raxSetData(node, new_bucket); } } else if (vsetBucketType(bucket) == VSET_BUCKET_HT) { - bucket = insertToBucket_HASHTABLE(set, bucket, entry, expiry); + bucket = insertToBucket_HASHTABLE(getExpiry, bucket, entry, expiry); } else { panic("Unknown bucket type in insertToBucket_RAX"); } return target; } -static inline vsetBucket *removeFromBucket_SINGLE(vset *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { +static inline vsetBucket *removeFromBucket_SINGLE(vset *set, vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { UNUSED(set); + UNUSED(getExpiry); UNUSED(expiry); if (vsetBucketSingle(bucket) == entry) { @@ -833,8 +834,9 @@ static inline vsetBucket *removeFromBucket_SINGLE(vset *set, vsetBucket *bucket, } } -static inline vsetBucket *removeFromBucket_VECTOR(vset *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { +static inline vsetBucket *removeFromBucket_VECTOR(vset *set, vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { UNUSED(set); + UNUSED(getExpiry); UNUSED(expiry); vsetBucket *new_bucket = bucket; @@ -865,8 +867,9 @@ static inline vsetBucket *removeFromBucket_VECTOR(vset *set, vsetBucket *bucket, return new_bucket; } -static inline vsetBucket *removeFromBucket_HASHTABLE(vset *set, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { +static inline vsetBucket *removeFromBucket_HASHTABLE(vset *set, vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { UNUSED(set); + UNUSED(getExpiry); UNUSED(expiry); bool success = false; @@ -960,18 +963,18 @@ static inline int vsetBucketNext_RAX(vsetIterator *it, void **entryptr) { return 1; } -static bool raxBucketRemoveEntry(vset *set, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { +static bool raxBucketRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { bool removed = false; switch (vsetBucketType(bucket)) { case VSET_BUCKET_SINGLE: - bucket = removeFromBucket_SINGLE(set, bucket, entry, 0, &removed); + bucket = removeFromBucket_SINGLE(set, getExpiry, bucket, entry, 0, &removed); if (removed) { raxRemove(vsetBucketRax(set->expiry_buckets), key, key_len, NULL); if (pbucket) *pbucket = NULL; } break; case VSET_BUCKET_VECTOR: { - vsetBucket *new_bucket = removeFromBucket_VECTOR(set, bucket, entry, 0, &removed); + vsetBucket *new_bucket = removeFromBucket_VECTOR(set, getExpiry, bucket, entry, 0, &removed); if (new_bucket != bucket) { if (!new_bucket) { raxRemove(vsetBucketRax(set->expiry_buckets), key, key_len, NULL); @@ -986,7 +989,7 @@ static bool raxBucketRemoveEntry(vset *set, void *entry, vsetBucket *bucket, uns break; } case VSET_BUCKET_HT: { - vsetBucket *new_bucket = removeFromBucket_HASHTABLE(set, bucket, entry, 0, &removed); + vsetBucket *new_bucket = removeFromBucket_HASHTABLE(set, getExpiry, bucket, entry, 0, &removed); if (new_bucket != bucket) /* In order to avoid rax override, we directly change the node data */ // alternative: raxInsert(set->expiry_buckets, key, key_len, bucket, NULL); @@ -1002,7 +1005,7 @@ static bool raxBucketRemoveEntry(vset *set, void *entry, vsetBucket *bucket, uns return removed; } -static inline vsetBucket *removeFromBucket_RAX(vset *set, vsetBucket *target, void *entry, long long expiry, bool *removed) { +static inline vsetBucket *removeFromBucket_RAX(vset *set, vsetGetExpiryFunc getExpiry, vsetBucket *target, void *entry, long long expiry, bool *removed) { unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; long long bucket_ts; size_t key_len; @@ -1010,7 +1013,7 @@ static inline vsetBucket *removeFromBucket_RAX(vset *set, vsetBucket *target, vo rax *expiry_buckets = vsetBucketRax(target); vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); assert(bucket); - bool success = raxBucketRemoveEntry(set, entry, bucket, key, key_len, NULL, node); + bool success = raxBucketRemoveEntry(set, getExpiry, entry, bucket, key, key_len, NULL, node); if (removed) *removed = success; // shrink to single bucket if possible if (raxSize(expiry_buckets) == 1) { @@ -1033,14 +1036,14 @@ static inline vsetBucket *removeFromBucket_RAX(vset *set, vsetBucket *target, vo return target; } -int vsetAddEntry(vset *set, void *entry, long long expiry) { +int vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry) { int bucket_type = vsetBucketType(set->expiry_buckets); switch (bucket_type) { case VSET_BUCKET_NONE: - set->expiry_buckets = insertToBucket_NONE(set, set->expiry_buckets, entry, expiry); + set->expiry_buckets = insertToBucket_NONE(getExpiry, set->expiry_buckets, entry, expiry); break; case VSET_BUCKET_SINGLE: - set->expiry_buckets = insertToBucket_SINGLE(set, set->expiry_buckets, entry, expiry); + set->expiry_buckets = insertToBucket_SINGLE(getExpiry, set->expiry_buckets, entry, expiry); break; case VSET_BUCKET_VECTOR: { pVector *vec = vsetBucketVector(set->expiry_buckets); @@ -1048,34 +1051,34 @@ int vsetAddEntry(vset *set, void *entry, long long expiry) { /* in case the vector is full, we need to turn into RAX */ if (len == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { rax *r = raxNew(); - long long min_expiry = set->etypr->getExpiry(pvGet(vec, 0)); - long long max_expiry = set->etypr->getExpiry(pvGet(vec, len - 1)); + long long min_expiry = getExpiry(pvGet(vec, 0)); + long long max_expiry = getExpiry(pvGet(vec, len - 1)); if (get_max_bucket_ts(min_expiry) == get_max_bucket_ts(max_expiry)) { /* In case we can just insert the bucket, no need to iterate and insert it's elements. we can just push the bucket as a whole. */ unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; size_t key_len = encodeNewExpiryBucketKey(key, max_expiry); raxInsert(r, key, key_len, set->expiry_buckets, NULL); set->expiry_buckets = vsetBucketSetRax(set->expiry_buckets, r); - set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, entry, expiry); + set->expiry_buckets = insertToBucket_RAX(getExpiry, set->expiry_buckets, entry, expiry); } else { /* We need to migrate entries to the new set of buckets since we do not know all entries are in the same bucket */ set->expiry_buckets = vsetBucketSetRax(set->expiry_buckets, r); for (uint32_t i = 0; i < len; i++) { void *moved_entry = pvGet(vec, i); - set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, moved_entry, set->etypr->getExpiry(moved_entry)); + set->expiry_buckets = insertToBucket_RAX(getExpiry, set->expiry_buckets, moved_entry, getExpiry(moved_entry)); } /* free the vector */ pvFree(vec); /* now insert the new entry to the buckets */ - set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, entry, expiry); + set->expiry_buckets = insertToBucket_RAX(getExpiry, set->expiry_buckets, entry, expiry); } } else { - set->expiry_buckets = insertToBucket_VECTOR(set, set->expiry_buckets, entry, expiry); + set->expiry_buckets = insertToBucket_VECTOR(getExpiry, set->expiry_buckets, entry, expiry); } break; } case VSET_BUCKET_RAX: - set->expiry_buckets = insertToBucket_RAX(set, set->expiry_buckets, entry, expiry); + set->expiry_buckets = insertToBucket_RAX(getExpiry, set->expiry_buckets, entry, expiry); break; default: panic("Cannot insert to bucket which is not single, vector or rax"); @@ -1083,7 +1086,7 @@ int vsetAddEntry(vset *set, void *entry, long long expiry) { return 1; } -int vsetRemoveEntry(vset *set, void *entry, long long expiry) { +int vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry) { bool removed; vsetBucket *bucket = set->expiry_buckets; int bucket_type = vsetBucketType(bucket); @@ -1092,16 +1095,16 @@ int vsetRemoveEntry(vset *set, void *entry, long long expiry) { /* We cannot remove from empty set */ return 0; case VSET_BUCKET_SINGLE: - bucket = removeFromBucket_SINGLE(set, bucket, entry, expiry, &removed); + bucket = removeFromBucket_SINGLE(set, getExpiry, bucket, entry, expiry, &removed); break; case VSET_BUCKET_VECTOR: - bucket = removeFromBucket_VECTOR(set, bucket, entry, expiry, &removed); + bucket = removeFromBucket_VECTOR(set, getExpiry, bucket, entry, expiry, &removed); break; case VSET_BUCKET_HT: - bucket = removeFromBucket_HASHTABLE(set, bucket, entry, expiry, &removed); + bucket = removeFromBucket_HASHTABLE(set, getExpiry, bucket, entry, expiry, &removed); break; case VSET_BUCKET_RAX: - bucket = removeFromBucket_RAX(set, bucket, entry, expiry, &removed); + bucket = removeFromBucket_RAX(set, getExpiry, bucket, entry, expiry, &removed); break; default: panic("Cannot insert to bucket which is not single, vector or rax"); @@ -1110,20 +1113,20 @@ int vsetRemoveEntry(vset *set, void *entry, long long expiry) { return removed ? 1 : 0; } -int vsetUpdateEntry(vset *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { +int vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { if (old_entry == new_entry && old_expiry == new_expiry) return 1; if (old_entry && old_expiry != -1) - assert((vsetRemoveEntry(set, old_entry, old_expiry))); + assert((vsetRemoveEntry(set, getExpiry, old_entry, old_expiry))); if (new_entry && new_expiry != -1) - assert(vsetAddEntry(set, new_entry, new_expiry)); + assert(vsetAddEntry(set, getExpiry, new_entry, new_expiry)); return 1; } -static void *vsetGetFirstExpired(vset *set, mstime_t now, bool delete) { +static void *vsetGetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now, bool delete) { int set_type = vsetBucketType(set->expiry_buckets); void *entry = NULL; long long expiry; @@ -1139,20 +1142,20 @@ static void *vsetGetFirstExpired(vset *set, mstime_t now, bool delete) { vsetStop(&iter); if (bucket_ts > now) return NULL; - expiry = set->etypr->getExpiry(entry); + expiry = getExpiry(entry); assert(expiry <= now); break; } case VSET_BUCKET_SINGLE: { entry = vsetBucketSingle(set->expiry_buckets); - expiry = set->etypr->getExpiry(entry); + expiry = getExpiry(entry); if (expiry > now) return NULL; break; } case VSET_BUCKET_VECTOR: { entry = pvGet(vsetBucketVector(set->expiry_buckets), 0); - expiry = set->etypr->getExpiry(entry); + expiry = getExpiry(entry); if (expiry > now) return NULL; break; @@ -1162,7 +1165,7 @@ static void *vsetGetFirstExpired(vset *set, mstime_t now, bool delete) { hashtableInitIterator(&iter, vsetBucketHashtable(set->expiry_buckets), 0); assert(hashtableNext(&iter, &entry)); hashtableResetIterator(&iter); - expiry = set->etypr->getExpiry(entry); + expiry = getExpiry(entry); if (expiry > now) return NULL; break; @@ -1171,16 +1174,16 @@ static void *vsetGetFirstExpired(vset *set, mstime_t now, bool delete) { panic("Unknown volatile set bucket type in vsetNext"); } if (delete) - assert(vsetRemoveEntry(set, entry, expiry)); + assert(vsetRemoveEntry(set, getExpiry, entry, expiry)); return entry; } -void *vsetPopExpired(vset *set, mstime_t now) { - return vsetGetFirstExpired(set, now, true); +void *vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now) { + return vsetGetFirstExpired(set, getExpiry, now, true); } -void *vsetFirstExpired(vset *set, mstime_t now) { - return vsetGetFirstExpired(set, now, false); +void *vsetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now) { + return vsetGetFirstExpired(set, getExpiry, now, false); } int vsetNext(vsetIterator *it, void **entryptr) { @@ -1231,9 +1234,8 @@ void vsetStop(vsetIterator *it) { hashtableResetIterator(&it->hiter); } -vset *createVolatileSet(volatileEntryType *type) { +vset *createVolatileSet(void) { vset *set = zmalloc(sizeof(vset)); - set->etypr = type; set->expiry_buckets = vsetBucketSetNone(set->expiry_buckets); return set; } diff --git a/src/vset.h b/src/vset.h index 6331abc7e6..ef04c2006d 100644 --- a/src/vset.h +++ b/src/vset.h @@ -266,6 +266,8 @@ #define VOLATILESET_BUCKET_INTERVAL_MIN (1LL << 4LL) // 2^4 = 16 milliseconds #define VOLATILESET_VECTOR_BUCKET_MAX_SIZE 127 + +typedef long long (*vsetGetExpiryFunc)(const void *entry); typedef struct { sds (*entryGetKey)(const void *entry); @@ -279,7 +281,6 @@ typedef struct { typedef void vsetBucket; typedef struct { - volatileEntryType *etypr; vsetBucket *expiry_buckets; } vset; @@ -304,17 +305,17 @@ typedef struct vsetIterator { int iteration_state; } vsetIterator; -int vsetRemoveEntry(vset *set, void *entry, long long expiry); -int vsetAddEntry(vset *set, void *entry, long long expiry); -void *vsetPopExpired(vset *set, mstime_t now); -void *vsetFirstExpired(vset *set, mstime_t now); -int vsetUpdateEntry(vset *set, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); +int vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry); +int vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry); +void *vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); +void *vsetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); +int vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); bool vsetIsEmpty(vset *set); void vsetStart(vset *set, vsetIterator *it); int vsetNext(vsetIterator *it, void **entryptr); void vsetStop(vsetIterator *it); void freeVolatileSet(vset *b); -vset *createVolatileSet(volatileEntryType *type); +vset *createVolatileSet(void); #endif From d464f4fbd3991f412c33a9a30a53e6b185e59347 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 29 Jun 2025 18:40:55 +0300 Subject: [PATCH 027/119] explicit include stdlib for abs Signed-off-by: Ran Shidlansik --- src/unit/test_vset.c | 2 +- src/vset.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c index 3ae4052d56..1c980dce51 100644 --- a/src/unit/test_vset.c +++ b/src/unit/test_vset.c @@ -2,6 +2,7 @@ #include "../entry.h" #include "test_help.h" #include "../zmalloc.h" + #include #include #include @@ -10,7 +11,6 @@ #include #include - typedef entry mock_entry; static mock_entry *mockCreateEntry(const char *keystr, long long expiry) { diff --git a/src/vset.c b/src/vset.c index 9ea13e98ef..368244c29a 100644 --- a/src/vset.c +++ b/src/vset.c @@ -8,6 +8,7 @@ #include #include +#include /************************************************************************************************************* * pVector Implementation From 69491e4940abe5a5551dc45fd8854101bb25a71d Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 29 Jun 2025 20:21:02 +0300 Subject: [PATCH 028/119] make vset a simple bucket pointer Signed-off-by: Ran Shidlansik --- src/t_hash.c | 19 ++-- src/unit/test_vset.c | 65 ++++++------- src/vset.c | 217 +++++++++++++++++++++---------------------- src/vset.h | 7 +- 4 files changed, 151 insertions(+), 157 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 4af1028bde..c69d26e52c 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -48,7 +48,7 @@ static vset *hashTypeGetVolatileSet(robj *o) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); - return *(vset **)hashtableMetadata(o->ptr); + return (vset *)hashtableMetadata(o->ptr); } void hashTypeFreeVolatileSet(robj *o) { @@ -75,19 +75,18 @@ void hashTypeIgnoreTTL(robj *o, bool ignore) { static vset *hashTypeGetOrcreateVolatileSet(robj *o) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); - vset **vset_ref = hashtableMetadata(o->ptr); - if (*vset_ref == NULL) { - *vset_ref = createVolatileSet(); + vset *vset = hashtableMetadata(o->ptr); + if (*vset == NULL) { + createVolatileSet(vset); /* serves mainly for optimization. Use type which supports access function only when needed. */ hashTypeIgnoreTTL(o, false); } - return *vset_ref; + return vset; } static void hashTypeDeleteVolatileSet(robj *o) { - vset **vset_ref = hashtableMetadata(o->ptr); - freeVolatileSet(*vset_ref); - *vset_ref = NULL; + vset *vset = hashtableMetadata(o->ptr); + freeVolatileSet(vset); /* serves mainly for optimization. by changing the hashtable type we can avoid extra function call in hashtable access */ hashTypeIgnoreTTL(o, true); } @@ -115,15 +114,13 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, return; vset *set = hashTypeGetOrcreateVolatileSet(o); - debugServerAssert(set); + debugServerAssert(!old_tracked || !vsetIsEmpty(set)); if (old_tracked && !new_tracked) serverAssert(vsetRemoveEntry(set, entryGetExpiry, old_entry, old_expiry)); else if (new_tracked && !old_tracked) serverAssert(vsetAddEntry(set, entryGetExpiry, new_entry, new_expiry)); else { - vset *set = hashTypeGetVolatileSet(o); - debugServerAssert(set); serverAssert(vsetUpdateEntry(set, entryGetExpiry, old_entry, new_entry, old_expiry, new_expiry) == 1); } if (vsetIsEmpty(set)) { diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c index 1c980dce51..5b14fa981b 100644 --- a/src/unit/test_vset.c +++ b/src/unit/test_vset.c @@ -38,19 +38,19 @@ int test_vset_add_and_iterate(int argc, char **argv, int flags) { (void)argv; (void)flags; - vset *set = createVolatileSet(); - TEST_ASSERT(set != NULL); + vset set; + createVolatileSet(&set); mock_entry *e1 = mockCreateEntry("item1", 123); mock_entry *e2 = mockCreateEntry("item2", 456); - TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, e1, mockGetExpiry(e1))); - TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, e2, mockGetExpiry(e2))); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, e1, mockGetExpiry(e1))); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, e2, mockGetExpiry(e2))); - TEST_ASSERT(!vsetIsEmpty(set)); + TEST_ASSERT(!vsetIsEmpty(&set)); vsetIterator it; - vsetStart(set, &it); + vsetStart(&set, &it); void *entry; int count = 0; @@ -62,7 +62,7 @@ int test_vset_add_and_iterate(int argc, char **argv, int flags) { TEST_ASSERT(count == 2); vsetStop(&it); - freeVolatileSet(set); + freeVolatileSet(&set); mockFreeEntry(e1); mockFreeEntry(e2); @@ -75,8 +75,8 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { (void)argv; (void)flags; - vset *set = createVolatileSet(); - TEST_ASSERT(set != NULL); + vset set; + createVolatileSet(&set); const long long expiry_time = 1000LL; const int total_entries = 200; @@ -89,15 +89,15 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { char key_buf[32]; snprintf(key_buf, sizeof(key_buf), "entry_%d", i); entries[i] = mockCreateEntry(key_buf, expiry_time); - TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, entries[i], expiry_time)); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i], expiry_time)); } // Verify set is not empty - TEST_ASSERT(!vsetIsEmpty(set)); + TEST_ASSERT(!vsetIsEmpty(&set)); // Iterate all entries and count them vsetIterator it; - vsetStart(set, &it); + vsetStart(&set, &it); void *entry; int count = 0; @@ -109,7 +109,7 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { // Cleanup vsetStop(&it); - freeVolatileSet(set); + freeVolatileSet(&set); for (int i = 0; i < total_entries; i++) { mockFreeEntry(entries[i]); @@ -126,8 +126,8 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { (void)flags; const unsigned int total_entries = 5; - vset *set = createVolatileSet(); - TEST_ASSERT(set != NULL); + vset set; + createVolatileSet(&set); // Prepare entries with mixed expiry times, some duplicates mock_entry *entries[total_entries]; @@ -138,11 +138,11 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { snprintf(key_buf, sizeof(key_buf), "entry_%d", i); long long expiry_time = rand() % 10000; entries[i] = mockCreateEntry(key_buf, expiry_time); - TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, entries[i], expiry_time)); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i], expiry_time)); } vsetIterator it; - vsetStart(set, &it); + vsetStart(&set, &it); int found[5] = {0}; int total = 0; @@ -169,7 +169,7 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { } vsetStop(&it); - freeVolatileSet(set); + freeVolatileSet(&set); for (int i = 0; i < 5; i++) mockFreeEntry(entries[i]); TEST_PRINT_INFO("Iterated all %d mixed expiry entries successfully", total); @@ -181,8 +181,8 @@ int test_vset_add_and_remove_all(int argc, char **argv, int flags) { UNUSED(argv); UNUSED(flags); - vset *set = createVolatileSet(); - TEST_ASSERT(set != NULL); + vset set; + createVolatileSet(&set); const int total_entries = 130; mock_entry *entries[total_entries]; @@ -192,16 +192,16 @@ int test_vset_add_and_remove_all(int argc, char **argv, int flags) { char key[32]; snprintf(key, sizeof(key), "key_%d", i); entries[i] = mockCreateEntry(key, expiry); - TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, entries[i], expiry)); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i], expiry)); } for (int i = 0; i < total_entries; i++) { - TEST_ASSERT(vsetRemoveEntry(set, mockGetExpiry, entries[i], expiry)); + TEST_ASSERT(vsetRemoveEntry(&set, mockGetExpiry, entries[i], expiry)); mockFreeEntry(entries[i]); } - TEST_ASSERT(vsetIsEmpty(set)); - freeVolatileSet(set); + TEST_ASSERT(vsetIsEmpty(&set)); + freeVolatileSet(&set); TEST_PRINT_INFO("Add/remove %d entries, set size now 0", total_entries); return 0; @@ -311,32 +311,33 @@ int test_vset_fuzzer(int argc, char **argv, int flags) { UNUSED(flags); srand(time(NULL)); - vset *set = createVolatileSet(); + vset set; + createVolatileSet(&set); for (int i = 0; i < NUM_ITERATIONS; i++) { int op = rand() % 4; switch (op) { case 0: case 1: - insert_mock_entry(set); + insert_mock_entry(&set); break; case 2: - update_mock_entry(set); + update_mock_entry(&set); break; case 3: - remove_mock_entry(set); + remove_mock_entry(&set); break; } if (i % 100 == 0) { mstime_t now = rand() % 10000; - expire_mock_entries(set, now); + expire_mock_entries(&set, now); } } /* now expire all the entries and check that we have no entries left */ - expire_mock_entries(set, LONG_LONG_MAX); - TEST_ASSERT(vsetIsEmpty(set) && mock_entry_count == 0); - freeVolatileSet(set); + expire_mock_entries(&set, LONG_LONG_MAX); + TEST_ASSERT(vsetIsEmpty(&set) && mock_entry_count == 0); + freeVolatileSet(&set); free_mock_entries(); /* Just in case */ return 0; } diff --git a/src/vset.c b/src/vset.c index 368244c29a..a56adbede2 100644 --- a/src/vset.c +++ b/src/vset.c @@ -704,7 +704,7 @@ static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpir /* modify the current bucket data pointer */ key_len = encodeExpiryKey(bucket_ts, key); /* In order to avoid rax override, we directly change the node data */ - // alternative: raxInsert(set->expiry_buckets, key, key_len, bucket, NULL); + // alternative: raxInsert(*set, key, key_len, bucket, NULL); raxSetData(node, bucket); } else { @@ -821,8 +821,7 @@ static inline vsetBucket *insertToBucket_RAX(vsetGetExpiryFunc getExpiry, vsetBu return target; } -static inline vsetBucket *removeFromBucket_SINGLE(vset *set, vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { - UNUSED(set); +static inline vsetBucket *removeFromBucket_SINGLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { UNUSED(getExpiry); UNUSED(expiry); @@ -835,8 +834,7 @@ static inline vsetBucket *removeFromBucket_SINGLE(vset *set, vsetGetExpiryFunc g } } -static inline vsetBucket *removeFromBucket_VECTOR(vset *set, vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { - UNUSED(set); +static inline vsetBucket *removeFromBucket_VECTOR(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { UNUSED(getExpiry); UNUSED(expiry); @@ -868,8 +866,7 @@ static inline vsetBucket *removeFromBucket_VECTOR(vset *set, vsetGetExpiryFunc g return new_bucket; } -static inline vsetBucket *removeFromBucket_HASHTABLE(vset *set, vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { - UNUSED(set); +static inline vsetBucket *removeFromBucket_HASHTABLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { UNUSED(getExpiry); UNUSED(expiry); @@ -892,6 +889,78 @@ static inline vsetBucket *removeFromBucket_HASHTABLE(vset *set, vsetGetExpiryFun if (removed) *removed = success; return new_bucket; } +static bool raxBucketRemoveEntry(vsetBucket *parent, vsetGetExpiryFunc getExpiry, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { + bool removed = false; + switch (vsetBucketType(bucket)) { + case VSET_BUCKET_SINGLE: + bucket = removeFromBucket_SINGLE(getExpiry, bucket, entry, 0, &removed); + if (removed) { + raxRemove(vsetBucketRax(parent), key, key_len, NULL); + if (pbucket) *pbucket = NULL; + } + break; + case VSET_BUCKET_VECTOR: { + vsetBucket *new_bucket = removeFromBucket_VECTOR(getExpiry, bucket, entry, 0, &removed); + if (new_bucket != bucket) { + if (!new_bucket) { + raxRemove(vsetBucketRax(parent), key, key_len, NULL); + if (pbucket) *pbucket = NULL; + } else { + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(*set, key, key_len, new_bucket, NULL); + raxSetData(node, new_bucket); + if (pbucket) *pbucket = new_bucket; + } + } + break; + } + case VSET_BUCKET_HT: { + vsetBucket *new_bucket = removeFromBucket_HASHTABLE(getExpiry, bucket, entry, 0, &removed); + if (new_bucket != bucket) + /* In order to avoid rax override, we directly change the node data */ + // alternative: raxInsert(*set, key, key_len, bucket, NULL); + raxSetData(node, new_bucket); + + if (pbucket) *pbucket = new_bucket; + break; + } + default: + panic("Unknown bucket type for raxBucketRemoveEntry"); + return false; + } + return removed; +} + +static inline vsetBucket *removeFromBucket_RAX(vsetGetExpiryFunc getExpiry, vsetBucket *target, void *entry, long long expiry, bool *removed) { + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + long long bucket_ts; + size_t key_len; + raxNode *node; + rax *expiry_buckets = vsetBucketRax(target); + vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); + assert(bucket); + bool success = raxBucketRemoveEntry(target, getExpiry, entry, bucket, key, key_len, NULL, node); + if (removed) *removed = success; + // shrink to single bucket if possible + if (raxSize(expiry_buckets) == 1) { + raxIterator it; + raxStart(&it, expiry_buckets); + assert(raxSeek(&it, "^", NULL, 0)); + assert(raxNext(&it)); + bucket = it.data; + int bucket_type = vsetBucketType(bucket); + raxStop(&it); + /* We will not convert hashtable to our only bucket since we will lose the ability to scan the items in a sorted way. + * We will also not shrink when we have a full vector, since it might immediately be repopulated. */ + if (bucket_type == VSET_BUCKET_SINGLE || + (bucket_type == VSET_BUCKET_VECTOR && pvLen(vsetBucketVector(bucket)) < VOLATILESET_VECTOR_BUCKET_MAX_SIZE)) { + /* lets make our bucket to be the only left bucket */ + target = bucket; + raxFree(expiry_buckets); + } + } + return target; +} static int vsetBucketNext_NONE(vsetIterator *it, void **entryptr) { UNUSED(it); @@ -964,90 +1033,18 @@ static inline int vsetBucketNext_RAX(vsetIterator *it, void **entryptr) { return 1; } -static bool raxBucketRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { - bool removed = false; - switch (vsetBucketType(bucket)) { - case VSET_BUCKET_SINGLE: - bucket = removeFromBucket_SINGLE(set, getExpiry, bucket, entry, 0, &removed); - if (removed) { - raxRemove(vsetBucketRax(set->expiry_buckets), key, key_len, NULL); - if (pbucket) *pbucket = NULL; - } - break; - case VSET_BUCKET_VECTOR: { - vsetBucket *new_bucket = removeFromBucket_VECTOR(set, getExpiry, bucket, entry, 0, &removed); - if (new_bucket != bucket) { - if (!new_bucket) { - raxRemove(vsetBucketRax(set->expiry_buckets), key, key_len, NULL); - if (pbucket) *pbucket = NULL; - } else { - /* In order to avoid rax override, we directly change the node data */ - // alternative: raxInsert(set->expiry_buckets, key, key_len, new_bucket, NULL); - raxSetData(node, new_bucket); - if (pbucket) *pbucket = new_bucket; - } - } - break; - } - case VSET_BUCKET_HT: { - vsetBucket *new_bucket = removeFromBucket_HASHTABLE(set, getExpiry, bucket, entry, 0, &removed); - if (new_bucket != bucket) - /* In order to avoid rax override, we directly change the node data */ - // alternative: raxInsert(set->expiry_buckets, key, key_len, bucket, NULL); - raxSetData(node, new_bucket); - - if (pbucket) *pbucket = new_bucket; - break; - } - default: - panic("Unknown bucket type for raxBucketRemoveEntry"); - return false; - } - return removed; -} - -static inline vsetBucket *removeFromBucket_RAX(vset *set, vsetGetExpiryFunc getExpiry, vsetBucket *target, void *entry, long long expiry, bool *removed) { - unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; - long long bucket_ts; - size_t key_len; - raxNode *node; - rax *expiry_buckets = vsetBucketRax(target); - vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); - assert(bucket); - bool success = raxBucketRemoveEntry(set, getExpiry, entry, bucket, key, key_len, NULL, node); - if (removed) *removed = success; - // shrink to single bucket if possible - if (raxSize(expiry_buckets) == 1) { - raxIterator it; - raxStart(&it, expiry_buckets); - assert(raxSeek(&it, "^", NULL, 0)); - assert(raxNext(&it)); - bucket = it.data; - int bucket_type = vsetBucketType(bucket); - raxStop(&it); - /* We will not convert hashtable to our only bucket since we will lose the ability to scan the items in a sorted way. - * We will also not shrink when we have a full vector, since it might immediately be repopulated. */ - if (bucket_type == VSET_BUCKET_SINGLE || - (bucket_type == VSET_BUCKET_VECTOR && pvLen(vsetBucketVector(bucket)) < VOLATILESET_VECTOR_BUCKET_MAX_SIZE)) { - /* lets make our bucket to be the only left bucket */ - target = bucket; - raxFree(expiry_buckets); - } - } - return target; -} - int vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry) { - int bucket_type = vsetBucketType(set->expiry_buckets); + vsetBucket *expiry_buckets = *set; + int bucket_type = vsetBucketType(expiry_buckets); switch (bucket_type) { case VSET_BUCKET_NONE: - set->expiry_buckets = insertToBucket_NONE(getExpiry, set->expiry_buckets, entry, expiry); + expiry_buckets = insertToBucket_NONE(getExpiry, expiry_buckets, entry, expiry); break; case VSET_BUCKET_SINGLE: - set->expiry_buckets = insertToBucket_SINGLE(getExpiry, set->expiry_buckets, entry, expiry); + expiry_buckets = insertToBucket_SINGLE(getExpiry, expiry_buckets, entry, expiry); break; case VSET_BUCKET_VECTOR: { - pVector *vec = vsetBucketVector(set->expiry_buckets); + pVector *vec = vsetBucketVector(expiry_buckets); uint32_t len = pvLen(vec); /* in case the vector is full, we need to turn into RAX */ if (len == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { @@ -1058,59 +1055,61 @@ int vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long /* In case we can just insert the bucket, no need to iterate and insert it's elements. we can just push the bucket as a whole. */ unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; size_t key_len = encodeNewExpiryBucketKey(key, max_expiry); - raxInsert(r, key, key_len, set->expiry_buckets, NULL); - set->expiry_buckets = vsetBucketSetRax(set->expiry_buckets, r); - set->expiry_buckets = insertToBucket_RAX(getExpiry, set->expiry_buckets, entry, expiry); + raxInsert(r, key, key_len, expiry_buckets, NULL); + expiry_buckets = vsetBucketSetRax(expiry_buckets, r); + expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, entry, expiry); } else { /* We need to migrate entries to the new set of buckets since we do not know all entries are in the same bucket */ - set->expiry_buckets = vsetBucketSetRax(set->expiry_buckets, r); + expiry_buckets = vsetBucketSetRax(expiry_buckets, r); for (uint32_t i = 0; i < len; i++) { void *moved_entry = pvGet(vec, i); - set->expiry_buckets = insertToBucket_RAX(getExpiry, set->expiry_buckets, moved_entry, getExpiry(moved_entry)); + expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, moved_entry, getExpiry(moved_entry)); } /* free the vector */ pvFree(vec); /* now insert the new entry to the buckets */ - set->expiry_buckets = insertToBucket_RAX(getExpiry, set->expiry_buckets, entry, expiry); + expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, entry, expiry); } } else { - set->expiry_buckets = insertToBucket_VECTOR(getExpiry, set->expiry_buckets, entry, expiry); + expiry_buckets = insertToBucket_VECTOR(getExpiry, expiry_buckets, entry, expiry); } break; } case VSET_BUCKET_RAX: - set->expiry_buckets = insertToBucket_RAX(getExpiry, set->expiry_buckets, entry, expiry); + expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, entry, expiry); break; default: panic("Cannot insert to bucket which is not single, vector or rax"); } + /* update the set */ + *set = expiry_buckets; return 1; } int vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry) { bool removed; - vsetBucket *bucket = set->expiry_buckets; + vsetBucket *bucket = *set; int bucket_type = vsetBucketType(bucket); switch (bucket_type) { case VSET_BUCKET_NONE: /* We cannot remove from empty set */ return 0; case VSET_BUCKET_SINGLE: - bucket = removeFromBucket_SINGLE(set, getExpiry, bucket, entry, expiry, &removed); + bucket = removeFromBucket_SINGLE(getExpiry, bucket, entry, expiry, &removed); break; case VSET_BUCKET_VECTOR: - bucket = removeFromBucket_VECTOR(set, getExpiry, bucket, entry, expiry, &removed); + bucket = removeFromBucket_VECTOR(getExpiry, bucket, entry, expiry, &removed); break; case VSET_BUCKET_HT: - bucket = removeFromBucket_HASHTABLE(set, getExpiry, bucket, entry, expiry, &removed); + bucket = removeFromBucket_HASHTABLE(getExpiry, bucket, entry, expiry, &removed); break; case VSET_BUCKET_RAX: - bucket = removeFromBucket_RAX(set, getExpiry, bucket, entry, expiry, &removed); + bucket = removeFromBucket_RAX(getExpiry, bucket, entry, expiry, &removed); break; default: panic("Cannot insert to bucket which is not single, vector or rax"); } - set->expiry_buckets = bucket; + *set = bucket; return removed ? 1 : 0; } @@ -1128,7 +1127,7 @@ int vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, voi } static void *vsetGetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now, bool delete) { - int set_type = vsetBucketType(set->expiry_buckets); + int set_type = vsetBucketType(*set); void *entry = NULL; long long expiry; switch (set_type) { @@ -1148,14 +1147,14 @@ static void *vsetGetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_ break; } case VSET_BUCKET_SINGLE: { - entry = vsetBucketSingle(set->expiry_buckets); + entry = vsetBucketSingle(*set); expiry = getExpiry(entry); if (expiry > now) return NULL; break; } case VSET_BUCKET_VECTOR: { - entry = pvGet(vsetBucketVector(set->expiry_buckets), 0); + entry = pvGet(vsetBucketVector(*set), 0); expiry = getExpiry(entry); if (expiry > now) return NULL; @@ -1163,7 +1162,7 @@ static void *vsetGetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_ } case VSET_BUCKET_HT: { hashtableIterator iter; - hashtableInitIterator(&iter, vsetBucketHashtable(set->expiry_buckets), 0); + hashtableInitIterator(&iter, vsetBucketHashtable(*set), 0); assert(hashtableNext(&iter, &entry)); hashtableResetIterator(&iter); expiry = getExpiry(entry); @@ -1221,7 +1220,7 @@ int vsetNext(vsetIterator *it, void **entryptr) { void vsetStart(vset *set, vsetIterator *it) { it->iteration_state = VSET_BUCKET_NONE; /*lets start by going to the first bucket. */ - it->bucket = set->expiry_buckets; + it->bucket = *set; it->bucket_ts = -1; it->parent_bucket = vsetBucketSetNone(it->parent_bucket); } @@ -1235,18 +1234,16 @@ void vsetStop(vsetIterator *it) { hashtableResetIterator(&it->hiter); } -vset *createVolatileSet(void) { - vset *set = zmalloc(sizeof(vset)); - set->expiry_buckets = vsetBucketSetNone(set->expiry_buckets); +vset *createVolatileSet(vset *set) { + *set = vsetBucketSetNone(*set); return set; } void freeVolatileSet(vset *set) { - if (!set) return; - freeVsetBucket(set->expiry_buckets); - zfree(set); + if (!(*set)) return; + freeVsetBucket(*set); } bool vsetIsEmpty(vset *set) { - return vsetBucketType(set->expiry_buckets) == VSET_BUCKET_NONE; + return vsetBucketType(*set) == VSET_BUCKET_NONE; } diff --git a/src/vset.h b/src/vset.h index ef04c2006d..54d9e3a622 100644 --- a/src/vset.h +++ b/src/vset.h @@ -280,9 +280,8 @@ typedef struct { // Generic bucket type typedef void vsetBucket; -typedef struct { - vsetBucket *expiry_buckets; -} vset; +// vset is just a pointer to a bucket +typedef vsetBucket* vset; typedef struct vsetIterator { /* for rax bucket */ @@ -315,7 +314,7 @@ void vsetStart(vset *set, vsetIterator *it); int vsetNext(vsetIterator *it, void **entryptr); void vsetStop(vsetIterator *it); void freeVolatileSet(vset *b); -vset *createVolatileSet(void); +vset *createVolatileSet(vset *set); #endif From 2cdca3335a24a524198fe27eda61b1a7b7465dfd Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 29 Jun 2025 21:12:15 +0300 Subject: [PATCH 029/119] fix some format issues Signed-off-by: Ran Shidlansik --- src/vset.h | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/vset.h b/src/vset.h index 54d9e3a622..bdf39b2525 100644 --- a/src/vset.h +++ b/src/vset.h @@ -3,7 +3,6 @@ #include #include -#include "hashtable.h" #include "hashtable.h" #include "rax.h" @@ -64,7 +63,7 @@ * → it is promoted to a `VECTOR` bucket (sorted by expiry). * 3. If the `VECTOR` exceeds `VOLATILESET_VECTOR_BUCKET_MAX_SIZE` (127): * → the set becomes a `RAX`, and existing entries are migrated. - * 4. IF the set is using RAX encoding it will locate a bucket to add the entry + * 4. IF the set is using RAX encoding it will locate a bucket to add the entry * following the strategy explained below. * *----------------------------------------------------------------------------- @@ -72,7 +71,7 @@ *----------------------------------------------------------------------------- * * Each bucket in the RAX bucket corresponds to a **time window**, defined by - * its bucket timestamp (`bucket_ts`). This timestamp represents the **END** of + * its bucket timestamp (`bucket_ts`). This timestamp represents the **END** of * the time window. Entries in the bucket must expire *before* this timestamp. * * Time windows are defined in granular ranges: @@ -268,20 +267,12 @@ #define VOLATILESET_VECTOR_BUCKET_MAX_SIZE 127 typedef long long (*vsetGetExpiryFunc)(const void *entry); -typedef struct { - sds (*entryGetKey)(const void *entry); - - long long (*getExpiry)(const void *entry); - - int (*expire)(void*db, void* o, void *entry); - -} volatileEntryType; // Generic bucket type typedef void vsetBucket; // vset is just a pointer to a bucket -typedef vsetBucket* vset; +typedef vsetBucket *vset; typedef struct vsetIterator { /* for rax bucket */ @@ -301,7 +292,7 @@ typedef struct vsetIterator { /* In case of rax encoded set, this is the current iterated bucket timestamp */ long long bucket_ts; /* the state of the iteration */ - int iteration_state; + int iteration_state; } vsetIterator; int vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry); From 6c527c80103d7fecd2fc2203e4e019f776ccfb48 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 29 Jun 2025 22:02:46 +0300 Subject: [PATCH 030/119] fix a bug in HINCRBY/(FLOAT). When HSET is called we do make sure to persist the field in case it has expiration. This should (however) not be done for a volatile field which was NOT expired. Signed-off-by: Ran Shidlansik --- src/t_hash.c | 52 ++++++++++++++++++------------------ tests/unit/hashexpire.tcl | 55 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 28 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index f241234067..20f2a40482 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -208,19 +208,6 @@ int hashTypeGetFromListpack(robj *o, sds field, unsigned char **vstr, unsigned i return -1; } -/* Get the value from a hash table encoded hash, identified by field. - * Returns NULL when the field cannot be found, otherwise the SDS value - * is returned. */ -sds hashTypeGetFromHashTable(robj *o, sds field) { - serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); - void *found_element = NULL; - hashtableFind(o->ptr, field, &found_element); - if (found_element) - return entryGetValue(found_element); - else - return NULL; -} - /* Higher level function of hashTypeGet*() that returns the hash value * associated with the specified field. If the field is found C_OK * is returned, otherwise C_ERR. The returned object is returned by @@ -229,16 +216,26 @@ sds hashTypeGetFromHashTable(robj *o, sds field) { * * If *vll is populated *vstr is set to NULL, so the caller * can always check the function return by checking the return value - * for C_OK and checking if vll (or vstr) is NULL. */ -int hashTypeGetValue(robj *o, sds field, unsigned char **vstr, unsigned int *vlen, long long *vll) { + * for C_OK and checking if vll (or vstr) is NULL. + * + * If *expiry is populated than the function will also provide the current field expiration time + * or EXPIRY_NONE in case the field has no expiration time defined. */ +int hashTypeGetValue(robj *o, sds field, unsigned char **vstr, unsigned int *vlen, long long *vll, long long *expiry) { if (o->encoding == OBJ_ENCODING_LISTPACK) { *vstr = NULL; - if (hashTypeGetFromListpack(o, field, vstr, vlen, vll) == 0) return C_OK; + if (hashTypeGetFromListpack(o, field, vstr, vlen, vll) == 0) { + if (expiry) *expiry = EXPIRY_NONE; + return C_OK; + } } else if (o->encoding == OBJ_ENCODING_HASHTABLE) { - sds value = hashTypeGetFromHashTable(o, field); - if (value != NULL) { + void *entry = NULL; + hashtableFind(o->ptr, field, &entry); + if (entry) { + sds value = entryGetValue(entry); + serverAssert(value != NULL); *vstr = (unsigned char *)value; *vlen = sdslen(value); + if (expiry) *expiry = entryGetExpiry(entry); return C_OK; } } else { @@ -278,7 +275,7 @@ robj *hashTypeGetValueObject(robj *o, sds field) { unsigned int vlen; long long vll; - if (hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_ERR) return NULL; + if (hashTypeGetValue(o, field, &vstr, &vlen, &vll, NULL) == C_ERR) return NULL; if (vstr) return createStringObject((char *)vstr, vlen); else @@ -294,7 +291,7 @@ size_t hashTypeGetValueLength(robj *o, sds field) { unsigned int vlen = UINT_MAX; long long vll = LLONG_MAX; - if (hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK) len = vstr ? vlen : sdigits10(vll); + if (hashTypeGetValue(o, field, &vstr, &vlen, &vll, NULL) == C_OK) len = vstr ? vlen : sdigits10(vll); return len; } @@ -306,7 +303,7 @@ int hashTypeExists(robj *o, sds field) { unsigned int vlen = UINT_MAX; long long vll = LLONG_MAX; - return hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK; + return hashTypeGetValue(o, field, &vstr, &vlen, &vll, NULL) == C_OK; } /* Add a new field, overwrite the old with the new value if it already exists. @@ -860,10 +857,10 @@ void hincrbyCommand(client *c) { sds new; unsigned char *vstr; unsigned int vlen; - + long long expiry = EXPIRY_NONE; if (getLongLongFromObjectOrReply(c, c->argv[3], &incr, NULL) != C_OK) return; if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; - if (hashTypeGetValue(o, c->argv[2]->ptr, &vstr, &vlen, &value) == C_OK) { + if (hashTypeGetValue(o, c->argv[2]->ptr, &vstr, &vlen, &value, &expiry) == C_OK) { if (vstr) { if (string2ll((char *)vstr, vlen, &value) == 0) { addReplyError(c, "hash value is not an integer"); @@ -882,7 +879,7 @@ void hincrbyCommand(client *c) { } value += incr; new = sdsfromlonglong(value); - hashTypeSet(o, c->argv[2]->ptr, new, EXPIRY_NONE, HASH_SET_TAKE_VALUE); + hashTypeSet(o, c->argv[2]->ptr, new, expiry, HASH_SET_TAKE_VALUE); signalModifiedKey(c, c->db, c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH, "hincrby", c->argv[1], c->db->id); server.dirty++; @@ -896,6 +893,7 @@ void hincrbyfloatCommand(client *c) { sds new; unsigned char *vstr; unsigned int vlen; + long long expiry = EXPIRY_NONE; if (getLongDoubleFromObjectOrReply(c, c->argv[3], &incr, NULL) != C_OK) return; if (isnan(incr) || isinf(incr)) { @@ -904,7 +902,7 @@ void hincrbyfloatCommand(client *c) { } if ((o = hashTypeLookupWriteOrCreate(c, c->argv[1])) == NULL) return; - if (hashTypeGetValue(o, c->argv[2]->ptr, &vstr, &vlen, &ll) == C_OK) { + if (hashTypeGetValue(o, c->argv[2]->ptr, &vstr, &vlen, &ll, &expiry) == C_OK) { if (vstr) { if (string2ld((char *)vstr, vlen, &value) == 0) { addReplyError(c, "hash value is not a float"); @@ -926,7 +924,7 @@ void hincrbyfloatCommand(client *c) { char buf[MAX_LONG_DOUBLE_CHARS]; int len = ld2string(buf, sizeof(buf), value, LD_STR_HUMAN); new = sdsnewlen(buf, len); - hashTypeSet(o, c->argv[2]->ptr, new, EXPIRY_NONE, HASH_SET_TAKE_VALUE); + hashTypeSet(o, c->argv[2]->ptr, new, expiry, HASH_SET_TAKE_VALUE); signalModifiedKey(c, c->db, c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH, "hincrbyfloat", c->argv[1], c->db->id); server.dirty++; @@ -952,7 +950,7 @@ static void addHashFieldToReply(client *c, robj *o, sds field) { unsigned int vlen = UINT_MAX; long long vll = LLONG_MAX; - if (hashTypeGetValue(o, field, &vstr, &vlen, &vll) == C_OK) { + if (hashTypeGetValue(o, field, &vstr, &vlen, &vll, NULL) == C_OK) { if (vstr) { addReplyBulkCBuffer(c, vstr, vlen); } else { diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index d042e5c008..3ecd07b837 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -320,18 +320,71 @@ test {HINCRBY - on expired field} { fail "hash value was not expired after timeout" } - # Field should still be present in memory due to lazy expiry + # Field should still be present in memory assert_equal 1 [r HLEN myhash] # Overwrite with HINCRBY (no TTL) before accessing r HINCRBY myhash field1 1 + # Sanity check: check we only have one field in the hash + assert_equal 1 [r HLEN myhash] + + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal 1 [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + # set expiration on the field + assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + # now incr the field again + assert_equal 2 [r HINCRBY myhash field1 1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + r debug SET-ACTIVE-EXPIRE yes +} + +test {HINCRBYFLOAT - on expired field} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired, + # and it is overwritten using a plain HINCRBYFLOAT (i.e., no TTL), + # Valkey treats the field as still existing and updates it, + # effectively clearing the old TTL and starting the value from 0. + + r HSETEX myhash PX 10 FIELDS 1 field1 1 + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + + # Field should still be present in memory + assert_equal 1 [r HLEN myhash] + + # Overwrite with HINCRBYFLOAT (no TTL) before accessing + r HINCRBYFLOAT myhash field1 1 + + # Sanity check: check we only have one field in the hash + assert_equal 1 [r HLEN myhash] + # TTL should now be gone; field becomes persistent set ttl [r HPTTL myhash FIELDS 1 field1] assert_equal -1 $ttl assert_equal 1 [r HGET myhash field1] assert_equal 1 [r HLEN myhash] + # set expiration on the field + assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + # now incr the field again + assert_equal 2 [r HINCRBYFLOAT myhash field1 1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 r debug SET-ACTIVE-EXPIRE yes } From 7d55e7954e0132d569219451ea00cc975cdc6fa3 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 30 Jun 2025 11:11:45 +0300 Subject: [PATCH 031/119] address PR comments Signed-off-by: Ran Shidlansik --- src/t_hash.c | 13 ++---- src/vset.c | 122 ++++++++++++++++++++++++--------------------------- src/vset.h | 8 ++-- 3 files changed, 66 insertions(+), 77 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index c69d26e52c..68648bd292 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -93,14 +93,14 @@ static void hashTypeDeleteVolatileSet(robj *o) { void hashTypeTrackEntry(robj *o, void *entry) { vset *set = hashTypeGetOrcreateVolatileSet(o); - serverAssert(vsetAddEntry(set, entryGetExpiry, entry, entryGetExpiry(entry))); + serverAssert(vsetAddEntry(set, entryGetExpiry, entry)); } void hashTypeUntrackEntry(robj *o, void *entry) { if (!entryHasExpiry(entry)) return; vset *set = hashTypeGetVolatileSet(o); debugServerAssert(set); - serverAssert(vsetRemoveEntry(set, entryGetExpiry, entry, entryGetExpiry(entry))); + serverAssert(vsetRemoveEntry(set, entryGetExpiry, entry)); if (vsetIsEmpty(set)) { hashTypeDeleteVolatileSet(o); } @@ -116,13 +116,8 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, vset *set = hashTypeGetOrcreateVolatileSet(o); debugServerAssert(!old_tracked || !vsetIsEmpty(set)); - if (old_tracked && !new_tracked) - serverAssert(vsetRemoveEntry(set, entryGetExpiry, old_entry, old_expiry)); - else if (new_tracked && !old_tracked) - serverAssert(vsetAddEntry(set, entryGetExpiry, new_entry, new_expiry)); - else { - serverAssert(vsetUpdateEntry(set, entryGetExpiry, old_entry, new_entry, old_expiry, new_expiry) == 1); - } + serverAssert(vsetUpdateEntry(set, entryGetExpiry, old_entry, new_entry, old_expiry, new_expiry) == 1); + if (vsetIsEmpty(set)) { hashTypeDeleteVolatileSet(o); } diff --git a/src/vset.c b/src/vset.c index a56adbede2..891e10d5e7 100644 --- a/src/vset.c +++ b/src/vset.c @@ -376,34 +376,29 @@ static inline void *vsetBucketSingle(vsetBucket *b) { } // Setters -static inline vsetBucket *vsetBucketSetType(vsetBucket *b, int type) { - uintptr_t p = (uintptr_t)b; +static inline vsetBucket *vsetBucketFromRawPtr(void *ptr, int type) { + uintptr_t p = (uintptr_t)ptr; return (vsetBucket *)(p | (type & VSET_TAG_MASK)); } -static inline vsetBucket *vsetBucketSetVector(vsetBucket *b, pVector *vec) { - UNUSED(b); - return vsetBucketSetType(vec, VSET_BUCKET_VECTOR); +static inline vsetBucket *vsetBucketFromVector(pVector *vec) { + return vsetBucketFromRawPtr(vec, VSET_BUCKET_VECTOR); } -static inline vsetBucket *vsetBucketSetHashtable(vsetBucket *b, hashtable *ht) { - UNUSED(b); - return vsetBucketSetType(ht, VSET_BUCKET_HT); +static inline vsetBucket *vsetBucketFromHashtable(hashtable *ht) { + return vsetBucketFromRawPtr(ht, VSET_BUCKET_HT); } -static inline vsetBucket *vsetBucketSetSingle(vsetBucket *b, void *ptr) { - UNUSED(b); +static inline vsetBucket *vsetBucketFromSingle(void *ptr) { return ptr; } -static inline vsetBucket *vsetBucketSetNone(vsetBucket *b) { - UNUSED(b); +static inline vsetBucket *vsetBucketFromNone(void) { return NULL; } -static inline vsetBucket *vsetBucketSetRax(vsetBucket *b, rax *r) { - UNUSED(b); - return vsetBucketSetType(r, VSET_BUCKET_RAX); +static inline vsetBucket *vsetBucketFromRax(rax *r) { + return vsetBucketFromRawPtr(r, VSET_BUCKET_RAX); } /****************** Helper Functions *******************************************/ @@ -565,25 +560,12 @@ static uint32_t findSplitPosition(vsetGetExpiryFunc getExpiry, vsetBucket *bucke * - XORs and right shifts to mix higher-order bits into lower ones. * - Multiplies by large constants to further spread the bits. * - * Example transformation: - * x ^= x >> 16; - * x *= 0x85ebca6b; - * x ^= x >> 13; - * x *= 0xc2b2ae35; - * x ^= x >> 16; * * For 64-bit systems: * The function uses MurmurHash3 64-bit finalizer constants: * - These constants are chosen to maximize bit diffusion and avoid hash clustering. * - This version benefits from the full 64-bit pointer space. * - * Example transformation: - * x ^= x >> 33; - * x *= 0xff51afd7ed558ccdULL; - * x ^= x >> 33; - * x *= 0xc4ceb9fe1a85ec53ULL; - * x ^= x >> 33; - * * Why this works: * - Pointers tend to have low entropy in their lower bits (due to alignment). * - A naive cast to integer leads to clustering and collisions in hash tables. @@ -649,8 +631,9 @@ static inline vsetBucket *findBucket(rax *expiry_buckets, long long expiry, unsi return bucket; } -static void freeVsetBucket(void *entry) { - vsetBucket *bucket = (vsetBucket *)entry; +/* Free all the vsetBucket memory. + * Since the bucket only holds references to entries the entries themselves are NOT freed */ +static void freeVsetBucket(vsetBucket *bucket) { switch (vsetBucketType(bucket)) { case VSET_BUCKET_NONE: case VSET_BUCKET_SINGLE: @@ -697,8 +680,8 @@ static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpir assert(target_bucket_ts < bucket_ts); assert(split_index != pvLen(sv)); /* no way to split it ??? */ pVector *new_bucket_vector = vsetBucketVector(bucket); - bucket = vsetBucketSetVector(bucket, pvSplit(&new_bucket_vector, split_index)); - new_bucket = vsetBucketSetVector(new_bucket, new_bucket_vector); + bucket = vsetBucketFromVector(pvSplit(&new_bucket_vector, split_index)); + new_bucket = vsetBucketFromVector(new_bucket_vector); assert(pvLen(vsetBucketVector(new_bucket)) > 0); assert(pvLen(vsetBucketVector(bucket)) > 0); /* modify the current bucket data pointer */ @@ -720,7 +703,8 @@ static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpir static inline vsetBucket *insertToBucket_NONE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { UNUSED(getExpiry); UNUSED(expiry); - return vsetBucketSetSingle(bucket, entry); + UNUSED(bucket); + return vsetBucketFromSingle(entry); } static inline vsetBucket *insertToBucket_SINGLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { @@ -735,7 +719,7 @@ static inline vsetBucket *insertToBucket_SINGLE(vsetGetExpiryFunc getExpiry, vse sv = pvInsert(sv, entry, 0); sv = pvInsert(sv, curr_entry, 1); } - bucket = vsetBucketSetVector(bucket, sv); + bucket = vsetBucketFromVector(sv); return bucket; } @@ -752,10 +736,10 @@ static inline vsetBucket *insertToBucket_VECTOR(vsetGetExpiryFunc getExpiry, vse /* Add the new entry as well */ hashtableAdd(ht, entry); - return vsetBucketSetHashtable(bucket, ht); + return vsetBucketFromHashtable(ht); } else { uint32_t pos = findInsertPosition(getExpiry, bucket, expiry); - return vsetBucketSetVector(bucket, pvInsert(pv, entry, pos)); + return vsetBucketFromVector(pvInsert(pv, entry, pos)); } return NULL; } @@ -827,7 +811,7 @@ static inline vsetBucket *removeFromBucket_SINGLE(vsetGetExpiryFunc getExpiry, v if (vsetBucketSingle(bucket) == entry) { *removed = true; - return vsetBucketSetNone(bucket); + return vsetBucketFromNone(); } else { *removed = false; return bucket; @@ -850,16 +834,16 @@ static inline vsetBucket *removeFromBucket_VECTOR(vsetGetExpiryFunc getExpiry, v success = false; } else { if (vlen == 1) - new_bucket = vsetBucketSetNone(bucket); + new_bucket = vsetBucketFromNone(); else - new_bucket = vsetBucketSetSingle(bucket, pvGet(sv, idx == 0 ? 1 : 0)); + new_bucket = vsetBucketFromSingle(pvGet(sv, idx == 0 ? 1 : 0)); success = true; pvFree(sv); } } else { if (pvRemove(&sv, entry)) { success = true; - new_bucket = vsetBucketSetVector(bucket, sv); + new_bucket = vsetBucketFromVector(sv); } } if (removed) *removed = success; @@ -883,19 +867,19 @@ static inline vsetBucket *removeFromBucket_HASHTABLE(vsetGetExpiryFunc getExpiry void *ptr; hashtableNext(&hi, &ptr); hashtableRelease(ht); - new_bucket = vsetBucketSetSingle(bucket, ptr); + new_bucket = vsetBucketFromSingle(ptr); } } if (removed) *removed = success; return new_bucket; } -static bool raxBucketRemoveEntry(vsetBucket *parent, vsetGetExpiryFunc getExpiry, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { +static bool removeEntryBucketFromRaxBucket(vsetBucket *rax_bucket, vsetGetExpiryFunc getExpiry, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { bool removed = false; switch (vsetBucketType(bucket)) { case VSET_BUCKET_SINGLE: bucket = removeFromBucket_SINGLE(getExpiry, bucket, entry, 0, &removed); if (removed) { - raxRemove(vsetBucketRax(parent), key, key_len, NULL); + raxRemove(vsetBucketRax(rax_bucket), key, key_len, NULL); if (pbucket) *pbucket = NULL; } break; @@ -903,7 +887,7 @@ static bool raxBucketRemoveEntry(vsetBucket *parent, vsetGetExpiryFunc getExpiry vsetBucket *new_bucket = removeFromBucket_VECTOR(getExpiry, bucket, entry, 0, &removed); if (new_bucket != bucket) { if (!new_bucket) { - raxRemove(vsetBucketRax(parent), key, key_len, NULL); + raxRemove(vsetBucketRax(rax_bucket), key, key_len, NULL); if (pbucket) *pbucket = NULL; } else { /* In order to avoid rax override, we directly change the node data */ @@ -925,7 +909,7 @@ static bool raxBucketRemoveEntry(vsetBucket *parent, vsetGetExpiryFunc getExpiry break; } default: - panic("Unknown bucket type for raxBucketRemoveEntry"); + panic("Unknown bucket type for removeEntryBucketFromRaxBucket"); return false; } return removed; @@ -939,7 +923,7 @@ static inline vsetBucket *removeFromBucket_RAX(vsetGetExpiryFunc getExpiry, vset rax *expiry_buckets = vsetBucketRax(target); vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); assert(bucket); - bool success = raxBucketRemoveEntry(target, getExpiry, entry, bucket, key, key_len, NULL, node); + bool success = removeEntryBucketFromRaxBucket(target, getExpiry, entry, bucket, key, key_len, NULL, node); if (removed) *removed = success; // shrink to single bucket if possible if (raxSize(expiry_buckets) == 1) { @@ -1027,13 +1011,14 @@ static inline int vsetBucketNext_RAX(vsetIterator *it, void **entryptr) { return vsetNext(it, entryptr); } else { /* We currently do not support nested RAX buckets */ - it->parent_bucket = vsetBucketSetNone(it->parent_bucket); + it->parent_bucket = vsetBucketFromNone(); return 0; } return 1; } -int vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry) { +bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { + long long expiry = getExpiry(entry); vsetBucket *expiry_buckets = *set; int bucket_type = vsetBucketType(expiry_buckets); switch (bucket_type) { @@ -1056,11 +1041,11 @@ int vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; size_t key_len = encodeNewExpiryBucketKey(key, max_expiry); raxInsert(r, key, key_len, expiry_buckets, NULL); - expiry_buckets = vsetBucketSetRax(expiry_buckets, r); + expiry_buckets = vsetBucketFromRax(r); expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, entry, expiry); } else { /* We need to migrate entries to the new set of buckets since we do not know all entries are in the same bucket */ - expiry_buckets = vsetBucketSetRax(expiry_buckets, r); + expiry_buckets = vsetBucketFromRax(r); for (uint32_t i = 0; i < len; i++) { void *moved_entry = pvGet(vec, i); expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, moved_entry, getExpiry(moved_entry)); @@ -1083,10 +1068,10 @@ int vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long } /* update the set */ *set = expiry_buckets; - return 1; + return true; } -int vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry) { +static inline bool vsetRemoveEntryWithExpiry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry) { bool removed; vsetBucket *bucket = *set; int bucket_type = vsetBucketType(bucket); @@ -1107,23 +1092,30 @@ int vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long lo bucket = removeFromBucket_RAX(getExpiry, bucket, entry, expiry, &removed); break; default: - panic("Cannot insert to bucket which is not single, vector or rax"); + panic("Cannot remove from bucket which is not single, vector, hashtable or rax"); } *set = bucket; - return removed ? 1 : 0; + return removed; } -int vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { +bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { + return vsetRemoveEntryWithExpiry(set, getExpiry, entry, getExpiry(entry)); +} + +bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + /* Nothing to do */ if (old_entry == new_entry && old_expiry == new_expiry) - return 1; + return true; if (old_entry && old_expiry != -1) - assert((vsetRemoveEntry(set, getExpiry, old_entry, old_expiry))); + /* We cannot take the expiration time from the removed entry, since it might not be allocated anymore. + * For this reason we ask the API user to provide us the removed entry expiration time. */ + assert((vsetRemoveEntryWithExpiry(set, getExpiry, old_entry, old_expiry))); if (new_entry && new_expiry != -1) - assert(vsetAddEntry(set, getExpiry, new_entry, new_expiry)); + assert(vsetAddEntry(set, getExpiry, new_entry)); - return 1; + return true; } static void *vsetGetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now, bool delete) { @@ -1174,7 +1166,7 @@ static void *vsetGetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_ panic("Unknown volatile set bucket type in vsetNext"); } if (delete) - assert(vsetRemoveEntry(set, getExpiry, entry, expiry)); + assert(vsetRemoveEntry(set, getExpiry, entry)); return entry; } @@ -1186,7 +1178,7 @@ void *vsetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now) { return vsetGetFirstExpired(set, getExpiry, now, false); } -int vsetNext(vsetIterator *it, void **entryptr) { +bool vsetNext(vsetIterator *it, void **entryptr) { vsetBucket *bucket = it->bucket; int bucket_type = vsetBucketType(bucket); int ret = 0; @@ -1215,14 +1207,14 @@ int vsetNext(vsetIterator *it, void **entryptr) { it->bucket = it->parent_bucket; return vsetNext(it, entryptr); } - return ret; + return ret == 1; } void vsetStart(vset *set, vsetIterator *it) { it->iteration_state = VSET_BUCKET_NONE; /*lets start by going to the first bucket. */ it->bucket = *set; it->bucket_ts = -1; - it->parent_bucket = vsetBucketSetNone(it->parent_bucket); + it->parent_bucket = vsetBucketFromNone(); } void vsetStop(vsetIterator *it) { @@ -1235,10 +1227,12 @@ void vsetStop(vsetIterator *it) { } vset *createVolatileSet(vset *set) { - *set = vsetBucketSetNone(*set); + *set = vsetBucketFromNone(); return set; } +/* Free all the vset memory used in order to reference the entries. + * Since the set only holds references to entries the entries themselves are NOT freed */ void freeVolatileSet(vset *set) { if (!(*set)) return; freeVsetBucket(*set); diff --git a/src/vset.h b/src/vset.h index bdf39b2525..2e593e2f7e 100644 --- a/src/vset.h +++ b/src/vset.h @@ -295,14 +295,14 @@ typedef struct vsetIterator { int iteration_state; } vsetIterator; -int vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry); -int vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry); +bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); +bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); void *vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); void *vsetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); -int vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); +bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); bool vsetIsEmpty(vset *set); void vsetStart(vset *set, vsetIterator *it); -int vsetNext(vsetIterator *it, void **entryptr); +bool vsetNext(vsetIterator *it, void **entryptr); void vsetStop(vsetIterator *it); void freeVolatileSet(vset *b); vset *createVolatileSet(vset *set); From 54e52bf2431f65e290cdd1e75c3387203a0bb9e6 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 30 Jun 2025 11:21:38 +0300 Subject: [PATCH 032/119] More fixes following the pr review: 1. Fix unittest 2. change create and delete vset functions to init and clear Signed-off-by: Ran Shidlansik --- src/t_hash.c | 6 +++--- src/unit/test_vset.c | 36 ++++++++++++++++++------------------ src/vset.c | 6 +++--- src/vset.h | 3 ++- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 68648bd292..621ee84c5c 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -54,7 +54,7 @@ static vset *hashTypeGetVolatileSet(robj *o) { void hashTypeFreeVolatileSet(robj *o) { vset *set = hashTypeGetVolatileSet(o); if (set) - freeVolatileSet(set); + vsetClear(set); } bool hashTypeHasVolatileElements(robj *o) { @@ -77,7 +77,7 @@ static vset *hashTypeGetOrcreateVolatileSet(robj *o) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); vset *vset = hashtableMetadata(o->ptr); if (*vset == NULL) { - createVolatileSet(vset); + vsetInit(vset); /* serves mainly for optimization. Use type which supports access function only when needed. */ hashTypeIgnoreTTL(o, false); } @@ -86,7 +86,7 @@ static vset *hashTypeGetOrcreateVolatileSet(robj *o) { static void hashTypeDeleteVolatileSet(robj *o) { vset *vset = hashtableMetadata(o->ptr); - freeVolatileSet(vset); + vsetClear(vset); /* serves mainly for optimization. by changing the hashtable type we can avoid extra function call in hashtable access */ hashTypeIgnoreTTL(o, true); } diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c index 5b14fa981b..88aeb6212e 100644 --- a/src/unit/test_vset.c +++ b/src/unit/test_vset.c @@ -39,13 +39,13 @@ int test_vset_add_and_iterate(int argc, char **argv, int flags) { (void)flags; vset set; - createVolatileSet(&set); + vsetInit(&set); mock_entry *e1 = mockCreateEntry("item1", 123); mock_entry *e2 = mockCreateEntry("item2", 456); - TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, e1, mockGetExpiry(e1))); - TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, e2, mockGetExpiry(e2))); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, e1)); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, e2)); TEST_ASSERT(!vsetIsEmpty(&set)); @@ -62,7 +62,7 @@ int test_vset_add_and_iterate(int argc, char **argv, int flags) { TEST_ASSERT(count == 2); vsetStop(&it); - freeVolatileSet(&set); + vsetClear(&set); mockFreeEntry(e1); mockFreeEntry(e2); @@ -76,7 +76,7 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { (void)flags; vset set; - createVolatileSet(&set); + vsetInit(&set); const long long expiry_time = 1000LL; const int total_entries = 200; @@ -89,7 +89,7 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { char key_buf[32]; snprintf(key_buf, sizeof(key_buf), "entry_%d", i); entries[i] = mockCreateEntry(key_buf, expiry_time); - TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i], expiry_time)); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i])); } // Verify set is not empty @@ -109,7 +109,7 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { // Cleanup vsetStop(&it); - freeVolatileSet(&set); + vsetClear(&set); for (int i = 0; i < total_entries; i++) { mockFreeEntry(entries[i]); @@ -127,7 +127,7 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { const unsigned int total_entries = 5; vset set; - createVolatileSet(&set); + vsetInit(&set); // Prepare entries with mixed expiry times, some duplicates mock_entry *entries[total_entries]; @@ -138,7 +138,7 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { snprintf(key_buf, sizeof(key_buf), "entry_%d", i); long long expiry_time = rand() % 10000; entries[i] = mockCreateEntry(key_buf, expiry_time); - TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i], expiry_time)); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i])); } vsetIterator it; @@ -169,7 +169,7 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { } vsetStop(&it); - freeVolatileSet(&set); + vsetClear(&set); for (int i = 0; i < 5; i++) mockFreeEntry(entries[i]); TEST_PRINT_INFO("Iterated all %d mixed expiry entries successfully", total); @@ -182,7 +182,7 @@ int test_vset_add_and_remove_all(int argc, char **argv, int flags) { UNUSED(flags); vset set; - createVolatileSet(&set); + vsetInit(&set); const int total_entries = 130; mock_entry *entries[total_entries]; @@ -192,16 +192,16 @@ int test_vset_add_and_remove_all(int argc, char **argv, int flags) { char key[32]; snprintf(key, sizeof(key), "key_%d", i); entries[i] = mockCreateEntry(key, expiry); - TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i], expiry)); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i])); } for (int i = 0; i < total_entries; i++) { - TEST_ASSERT(vsetRemoveEntry(&set, mockGetExpiry, entries[i], expiry)); + TEST_ASSERT(vsetRemoveEntry(&set, mockGetExpiry, entries[i])); mockFreeEntry(entries[i]); } TEST_ASSERT(vsetIsEmpty(&set)); - freeVolatileSet(&set); + vsetClear(&set); TEST_PRINT_INFO("Add/remove %d entries, set size now 0", total_entries); return 0; @@ -253,7 +253,7 @@ int insert_mock_entry(vset *set) { long long expiry = rand() % 10000 + 100; mock_entry *e = mock_entry_create(keybuf, expiry); // printf("adding entry %p with expiry %llu\n", e, expiry); - TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, e, expiry)); + TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, e)); mock_entries[mock_entry_count++] = e; return 0; } @@ -276,7 +276,7 @@ int remove_mock_entry(vset *set) { int idx = rand() % mock_entry_count; mock_entry *e = mock_entries[idx]; // printf("removing entry %p with expiry %llu\n", e, mockGetExpiry(e)); - TEST_ASSERT(vsetRemoveEntry(set, mockGetExpiry, e, mockGetExpiry(e))); + TEST_ASSERT(vsetRemoveEntry(set, mockGetExpiry, e)); mockFreeEntry(e); mock_entries[idx] = mock_entries[--mock_entry_count]; @@ -312,7 +312,7 @@ int test_vset_fuzzer(int argc, char **argv, int flags) { srand(time(NULL)); vset set; - createVolatileSet(&set); + vsetInit(&set); for (int i = 0; i < NUM_ITERATIONS; i++) { int op = rand() % 4; @@ -337,7 +337,7 @@ int test_vset_fuzzer(int argc, char **argv, int flags) { /* now expire all the entries and check that we have no entries left */ expire_mock_entries(&set, LONG_LONG_MAX); TEST_ASSERT(vsetIsEmpty(&set) && mock_entry_count == 0); - freeVolatileSet(&set); + vsetClear(&set); free_mock_entries(); /* Just in case */ return 0; } diff --git a/src/vset.c b/src/vset.c index 891e10d5e7..ee4e941a5f 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1226,16 +1226,16 @@ void vsetStop(vsetIterator *it) { hashtableResetIterator(&it->hiter); } -vset *createVolatileSet(vset *set) { +void vsetInit(vset *set) { *set = vsetBucketFromNone(); - return set; } /* Free all the vset memory used in order to reference the entries. * Since the set only holds references to entries the entries themselves are NOT freed */ -void freeVolatileSet(vset *set) { +void vsetClear(vset *set) { if (!(*set)) return; freeVsetBucket(*set); + *set = vsetBucketFromNone(); } bool vsetIsEmpty(vset *set) { diff --git a/src/vset.h b/src/vset.h index 2e593e2f7e..3bd15fcadc 100644 --- a/src/vset.h +++ b/src/vset.h @@ -305,7 +305,8 @@ void vsetStart(vset *set, vsetIterator *it); bool vsetNext(vsetIterator *it, void **entryptr); void vsetStop(vsetIterator *it); void freeVolatileSet(vset *b); -vset *createVolatileSet(vset *set); +void vsetInit(vset *set); +void vsetClear(vset *set); #endif From 46eacf67d579ae0a84be9a58340402d34523a1f1 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 30 Jun 2025 11:35:21 +0300 Subject: [PATCH 033/119] fix vset comment arrow use in documentation Signed-off-by: Ran Shidlansik --- src/vset.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/vset.h b/src/vset.h index 3bd15fcadc..5c0158cd1c 100644 --- a/src/vset.h +++ b/src/vset.h @@ -60,9 +60,9 @@ * * 1. If the current set is `NONE`, it becomes a `SINGLE` bucket. * 2. If the set is a `SINGLE` bucket and another entry arrives: - * → it is promoted to a `VECTOR` bucket (sorted by expiry). + * -> it is promoted to a `VECTOR` bucket (sorted by expiry). * 3. If the `VECTOR` exceeds `VOLATILESET_VECTOR_BUCKET_MAX_SIZE` (127): - * → the set becomes a `RAX`, and existing entries are migrated. + * -> the set becomes a `RAX`, and existing entries are migrated. * 4. IF the set is using RAX encoding it will locate a bucket to add the entry * following the strategy explained below. * @@ -114,7 +114,7 @@ * - Only done if ALL entries still fit in the tighter window. * - Effectively “moves” the bucket to an earlier timestamp. * - * Example: B(ts=128, span=128ms) → B(ts=64, span=16ms) + * Example: B(ts=128, span=128ms) -> B(ts=64, span=16ms) * * 2. **Split into two buckets:** * - Use binary search to find a “natural” boundary based on entry expiry. @@ -124,18 +124,18 @@ * Example: * * Before: - * [ Entry0 ... Entry126 ] → B(ts=128) + * [ Entry0 ... Entry126 ] -> B(ts=128) * * After Split: - * [ Entry0...Entry62 ] → New B(ts=64) - * [ Entry63...Entry126 ] → Original B(ts=128) + * [ Entry0...Entry62 ] -> New B(ts=64) + * [ Entry63...Entry126 ] -> Original B(ts=128) * * 3. **Convert to hashtable:** * - When no clean split is found (e.g. all entries share similar expiry), * and realignment is not possible. * - This allows efficient O(1) lookups even with clustered expiry values. * - * Vector B(ts=128) → Hashtable B(ts=128) + * Vector B(ts=128) -> Hashtable B(ts=128) * * This hierarchical design ensures: * - Efficient memory usage (tight buckets) @@ -157,9 +157,9 @@ * +--------------------------+ * | RAX (key = bucket_ts) | * |--------------------------| - * | "000016" → [entry1] | ← Vector (SINGLE→VECTOR→HT) - * | "000032" → [entry2...] | ← Full vector, might split - * | "000048" → [entry...] | + * | "000016" -> [entry1] | <- Vector (SINGLE->VECTOR->HT) + * | "000032" -> [entry2...] | <- Full vector, might split + * | "000048" -> [entry...] | * +--------------------------+ * * * Splitting a Full Vector in RAX: @@ -174,8 +174,8 @@ * split (first where get_bucket_ts(entry) > min_ts) * * 2. Create two vectors: - * bucket A → [entry1..entry6] with key = "000032" - * bucket B → [entry7..entry13] with key = "000048" + * bucket A -> [entry1..entry6] with key = "000032" + * bucket B -> [entry7..entry13] with key = "000048" * * 3. Insert both back to the RAX. * @@ -196,11 +196,11 @@ * | * v * +-------------+ - * | key → bucket| + * | key -> bucket| * +-------------+ - * | "000016" → VECTOR - * | "000032" → HT - * | "000048" → SINGLE + * | "000016" -> VECTOR + * | "000032" -> HT + * | "000048" -> SINGLE * +-------------+ * *----------------------------------------------------------------------------- From 688f2089f321dfffd222f0e9eba454185103c251 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 30 Jun 2025 13:26:00 +0300 Subject: [PATCH 034/119] add public API documentation Signed-off-by: Ran Shidlansik --- src/vset.c | 229 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 227 insertions(+), 2 deletions(-) diff --git a/src/vset.c b/src/vset.c index ee4e941a5f..aad002827e 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1017,6 +1017,54 @@ static inline int vsetBucketNext_RAX(vsetIterator *it, void **entryptr) { return 1; } +/* Adds an entry to a volatile set (vset) based on its expiration time. + * + * The volatile set maintains buckets of entries grouped by time windows. Each + * entry is inserted into an appropriate bucket based on its expiry timestamp. + * Buckets are memory-efficient and use dynamic representations that evolve as + * the number of entries grows: + * + * - VSET_BUCKET_NONE: + * Indicates the set is empty. A new SINGLE bucket is created to hold the entry. + * + * - VSET_BUCKET_SINGLE: + * Holds a single entry directly. Upon inserting a second entry, the bucket + * is promoted to a VECTOR, preserving the sorted order. + * + * - VSET_BUCKET_VECTOR: + * Stores entries in a compact, sorted vector. The maximum size is 127 entries. + * If inserting a new entry exceeds the limit: + * - If all entries share the same bucket timestamp (same high-resolution time window), + * the entire vector is moved into a RAX bucket as a single node. + * - Otherwise, each vector entry is redistributed into the new RAX structure. + * + * - VSET_BUCKET_RAX: + * A radix tree (RAX) used for scalable management of multiple time-based buckets. + * Entries are inserted by computing their bucket key based on their expiration timestamp. + * + * The function uses the entry’s expiration time (provided via the getExpiry function) + * to determine the correct bucket. It promotes bucket types as needed to maintain + * sorted and efficient storage. + * + * In all cases, if the insertion causes a structural change (e.g., bucket promotion), + * the pointer to the root of the bucket tree is updated via the `set` pointer. + * + * This function always returns true, as insertion is guaranteed to succeed + * (barring internal memory allocation failure, which is outside its concern). + * + * Notes: + * - Buckets are upgraded in-place based on size and time span distribution. + * - Vector buckets allow binary search insertion to maintain order. + * - Tagged pointers are used to determine bucket types efficiently. + * - It is assumed that all entries have odd-valued pointers (LSB set). + * - Key encoding in RAX is based on the maximum expiration timestamp + * that falls within a fixed window granularity. + * + * Example: + * vset *myset = NULL; + * vsetAddEntry(&myset, extract_expiry, my_object); + * + * // Internally, my_object is placed into the appropriate bucket. */ bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { long long expiry = getExpiry(entry); vsetBucket *expiry_buckets = *set; @@ -1098,10 +1146,102 @@ static inline bool vsetRemoveEntryWithExpiry(vset *set, vsetGetExpiryFunc getExp return removed; } +/* Removes an entry from the volatile set (vset), based on its expiration time. + * + * The volatile set organizes entries into time-based buckets of varying types: + * SINGLE, VECTOR, or RAX. The bucket type determines how entries are stored + * and managed internally. This function will locate and remove the entry + * from its appropriate bucket. + * + * The removal process works as follows: + * + * 1. The expiration timestamp of the entry is used to compute which bucket + * (based on its end time) the entry should reside in. + * + * 2. Depending on the current top-level bucket type of the vset, the function + * dispatches to the appropriate removal handler: + * + * - VSET_BUCKET_SINGLE: + * If the stored entry matches, the bucket is set to NONE. + * + * - VSET_BUCKET_VECTOR: + * Performs a binary search to find and remove the entry from the vector. + * If the resulting vector size drops to 1, it is converted to a SINGLE bucket. + * If the vector becomes empty, it is removed entirely (set to NONE). + * + * - VSET_BUCKET_RAX: + * The function decodes the appropriate bucket key (based on the expiration + * time), looks up the RAX node, and dispatches removal to the sub-bucket. + * If a sub-bucket becomes empty or has only one entry left, its bucket + * type may be downgraded (e.g., to SINGLE or removed). + * + * 3. If the removal results in a structural change (e.g., shrinking a bucket), + * the bucket type may be changed, and the root pointer is updated accordingly. + * + * 4. If the entry is not found in the expected bucket, no action is taken. + * + * Notes: + * - Buckets self-adjust during removal for memory efficiency. + * - The vector bucket keeps entries sorted for fast search/removal. + * - RAX-based sets support a large number of buckets and scale well + * with many time windows. + * - Entries are assumed to have pointer identity (odd-valued pointers). + * - Correct expiration timestamp must be provided for accurate removal. + * + * Return value: + * Returns true if the entry was found and removed successfully. + * Returns false if the entry was not found. + * + * Example usage: + * vsetRemoveEntry(myset, extract_expiry, my_object); + * + * // my_object is removed from the appropriate bucket in myset BUT is not freed. */ bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { return vsetRemoveEntryWithExpiry(set, getExpiry, entry, getExpiry(entry)); } +/** + * Updates an existing entry in the volatile set (vset), optionally replacing it + * with a new entry and expiration time. + * + * This function provides a unified interface for removing an old entry and + * adding a new one. It supports three main cases: + * + * 1. Entry identity or expiry time didn't change: + * If the `old_entry` and `new_entry` are the same, and their expiration + * timestamps are also equal, the function returns early with no action taken. + * + * 2. Removal of the old entry: + * If `old_entry` is provided (i.e., not NULL) and its old expiration time + * is valid (`old_expiry != -1`), the function will remove it from the set. + * + * Note: Since the object might already be deallocated (or changed), the + * expiration time is passed explicitly as an argument, rather than + * relying on `getExpiry(old_entry)` which might not be safe to call. + * + * 3. Insertion of the new entry: + * If `new_entry` is provided (i.e., not NULL) and its new expiration time + * is valid (`new_expiry != -1`), the function will insert it into the set. + * + * The function assumes both `vsetRemoveEntryWithExpiry()` and + * `vsetAddEntry()` succeed. It uses assertions to enforce this at runtime, + * assuming this function is used in trusted code paths. + * + * Notes: + * - The update is not atomic. If the removal fails (assertion fails), + * insertion of the new entry does not occur. + * - If the new entry is the same as the old one, but the expiry changed, + * the entry is effectively reinserted in the correct bucket. + * - This is useful for renewal or replacement logic where entries may + * need to change time buckets due to updated TTLs or key mutation. + * + * Return value: + * Always returns true on success. + * In case of assertion failures, the program will abort. + * + * Example usage: + * vsetUpdateEntry(myset, getExpiry, old_ptr, new_ptr, old_ts, new_ts); + */ bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { /* Nothing to do */ if (old_entry == new_entry && old_expiry == new_expiry) @@ -1170,14 +1310,58 @@ static void *vsetGetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_ return entry; } +/* Retrieves and removes the first expired entry from the volatile set. + * + * This is a public-facing convenience wrapper around vsetGetFirstExpired() + * with delete=true. It performs a "pop" operation, returning the first + * expired entry (if any) and removing it from the underlying structure. + * + * Parameters: + * - set: Pointer to the volatile set. + * - getExpiry: Function used to extract the expiry timestamp from an entry. + * - now: Current time in milliseconds. Used in order to compare the entries time against + * to decide if they are expired or not. + * + * Returns: + * - The first expired entry, or NULL if no expired entries are present.*/ void *vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now) { return vsetGetFirstExpired(set, getExpiry, now, true); } +/* Retrieves (but does not remove) the first expired entry from the volatile set. + * + * This function is useful when the caller wants to inspect the next item + * scheduled for expiration without mutating the underlying set. + * + * Internally calls vsetGetFirstExpired() with delete=false. + * + * Parameters: + * - set: Pointer to the volatile set. + * - getExpiry: Function used to extract the expiry timestamp from an entry. + * - now: Current time in milliseconds. Used in order to compare the entries time against + * to decide if they are expired or not. + * + * Returns: + * - The first expired entry, or NULL if no expired entries are present.*/ void *vsetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now) { return vsetGetFirstExpired(set, getExpiry, now, false); } +/* Advances the volatile set iterator to the next entry. + * + * This function handles iteration over various bucket types in the set. It attempts + * to return the next valid entry, updating the iterator state accordingly. + * + * If the current bucket is exhausted, the iterator automatically switches back to + * the parent bucket (typically used when iterating nested structures, such as RAX buckets). + * + * Parameters: + * - it: Pointer to an initialized vsetIterator. + * - entryptr: Output pointer to receive the next entry. + * + * Returns: + * - true if a next entry is found. + * - false if iteration is complete. */ bool vsetNext(vsetIterator *it, void **entryptr) { vsetBucket *bucket = it->bucket; int bucket_type = vsetBucketType(bucket); @@ -1210,6 +1394,15 @@ bool vsetNext(vsetIterator *it, void **entryptr) { return ret == 1; } +/* Initializes a volatile set iterator. + * + * This function prepares the iterator for scanning a volatile set from the beginning. + * It sets the internal state, pointing to the main set bucket, and uses VSET_BUCKET_NONE + * as an initial placeholder to transition correctly into the actual bucket logic. + * + * Parameters: + * - set: Pointer to the volatile set to iterate. + * - it: Pointer to a vsetIterator structure to initialize. */ void vsetStart(vset *set, vsetIterator *it) { it->iteration_state = VSET_BUCKET_NONE; /*lets start by going to the first bucket. */ it->bucket = *set; @@ -1217,6 +1410,13 @@ void vsetStart(vset *set, vsetIterator *it) { it->parent_bucket = vsetBucketFromNone(); } +/* Finalizes and cleans up an active volatile set iterator. + * + * Some internal iterators (e.g., RAX, hashtable) allocate temporary state. + * This function ensures proper cleanup of those structures when the iteration is done. + * + * Parameters: + * - it: Pointer to the vsetIterator that was previously initialized with vsetStart(). */ void vsetStop(vsetIterator *it) { int bucket_type = vsetBucketType(it->bucket); int parent_bucket_type = vsetBucketType(it->parent_bucket); @@ -1226,18 +1426,43 @@ void vsetStop(vsetIterator *it) { hashtableResetIterator(&it->hiter); } +/* Initializes an empty volatile set. + * + * The function sets the set to its initial state by assigning a "NONE" bucket. + * This is the starting point for all volatile sets before entries are inserted. + * + * Parameters: + * - set: Pointer to the volatile set to initialize. */ void vsetInit(vset *set) { *set = vsetBucketFromNone(); } -/* Free all the vset memory used in order to reference the entries. - * Since the set only holds references to entries the entries themselves are NOT freed */ +/* Clears the volatile set, freeing all memory used for internal buckets. + * + * This function deallocates all internal data structures used by the set (buckets, vectors, + * hash tables, etc.). It does NOT free the entries themselves, since the set only holds + * references. + * + * After this call, the set is reset to an empty state. + * + * Parameters: + * - set: Pointer to the volatile set to clear. */ void vsetClear(vset *set) { if (!(*set)) return; freeVsetBucket(*set); *set = vsetBucketFromNone(); } +/* Checks whether a volatile set is empty. + * + * This function simply checks if the set's current bucket type is VSET_BUCKET_NONE. + * + * Parameters: + * - set: Pointer to the volatile set. + * + * Returns: + * - true if the set contains no entries. + * - false otherwise. */ bool vsetIsEmpty(vset *set) { return vsetBucketType(*set) == VSET_BUCKET_NONE; } From 597f0d4b5b9643e6d4d6bad6371dea77f855c68d Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 30 Jun 2025 15:05:27 +0300 Subject: [PATCH 035/119] polish comments and documentation Signed-off-by: Ran Shidlansik --- src/vset.h | 71 +++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/src/vset.h b/src/vset.h index 5c0158cd1c..7be797725b 100644 --- a/src/vset.h +++ b/src/vset.h @@ -92,31 +92,32 @@ * |------------------ Bucket Span ------------------| * [window_start .................................. bucket_ts) * - * ASCII Layout Example: - * - * Timeline: ---> increasing time ---> - * +------+---------+--------+ - * | B0 | B1 | B2 | - * | ts=32| ts=128 | ts=2048| - * +------+---------+--------+ - * ^ ^ ^ - * | | | - * [E1,E2] ∈ B0 [E3...E7] ∈ B1 [E8...] ∈ B2 - * All entries expire BEFORE their bucket_ts + * Layout Example: + * + * Timeline: ----------> increasing time -----------> + * +--------------+-------------+---------+ + * | B0 | B1 | B2 | + * | ts=32 | ts=128 | ts=2048 | + * +--------------+-------------+---------+ + * ^ ^ ^ + * | | | + * [E1,E2] ∈ B0 [E3...E7] ∈ B1 [E8...E15] ∈ B2 + * + * All entries expire BEFORE their bucket_ts * * Bucket Splitting Strategy: * ---------------------------------- * * When a bucket (e.g. VECTOR) becomes too dense or needs realignment: * - * 1. **Re-align to lower granularity:** + * 1. Re-align to lower granularity: * - Adjust the bucket timestamp down to a finer granularity (e.g. 16ms). * - Only done if ALL entries still fit in the tighter window. * - Effectively “moves” the bucket to an earlier timestamp. * * Example: B(ts=128, span=128ms) -> B(ts=64, span=16ms) * - * 2. **Split into two buckets:** + * 2. Split into two buckets: * - Use binary search to find a “natural” boundary based on entry expiry. * - Original bucket retains its timestamp (but holds fewer entries). * - New bucket is inserted before the current one with its own tighter timestamp. @@ -130,7 +131,7 @@ * [ Entry0...Entry62 ] -> New B(ts=64) * [ Entry63...Entry126 ] -> Original B(ts=128) * - * 3. **Convert to hashtable:** + * 3. Convert to hashtable: * - When no clean split is found (e.g. all entries share similar expiry), * and realignment is not possible. * - This allows efficient O(1) lookups even with clustered expiry values. @@ -155,7 +156,7 @@ * expiry_buckets = rax * | 0x6 * * +--------------------------+ - * | RAX (key = bucket_ts) | + * | RAX (key = bucket_ts) | * |--------------------------| * | "000016" -> [entry1] | <- Vector (SINGLE->VECTOR->HT) * | "000032" -> [entry2...] | <- Full vector, might split @@ -192,17 +193,18 @@ * VECTOR (sorted, up to 127) * | * v - * RAX - * | - * v - * +-------------+ - * | key -> bucket| - * +-------------+ - * | "000016" -> VECTOR - * | "000032" -> HT - * | "000048" -> SINGLE - * +-------------+ - * + * RAX (holds multiple buckets, keyed by each bucket's end timestamp) + * Bucket types within a RAX: + * + * SINGLE + * | + * v + * VECTOR (sorted, up to 127, can split + * | into multiple vectors) + * | + * v + * HASHTABLE (only when a vector can't split) + * *----------------------------------------------------------------------------- * Entry Type Contract *----------------------------------------------------------------------------- @@ -220,26 +222,26 @@ *----------------------------------------------------------------------------- * * Create/Free: - * vset *createVolatileSet(volatileEntryType *type); - * void freeVolatileSet(vset *set); + * void vsetInit(vset *set); + * void vsetClear(vset *set); * * Mutation: - * int vsetAddEntry(vset *set, void *entry, long long expiry); - * int vsetRemoveEntry(vset *set, void *entry, long long expiry); - * int vsetUpdateEntry(vset *set, void *old_entry, + * bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); + * bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); + * bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, * void *new_entry, long long old_expiry, * long long new_expiry); * * Expiry Retrieval: - * void *vsetFirstExpired(vset *set, mstime_t now); - * void *vsetPopExpired(vset *set, mstime_t now); + * void *vsetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); + * void *vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); * * Utilities: * bool vsetIsEmpty(vset *set); * * Iteration: * void vsetStart(vset *set, vsetIterator *it); - * int vsetNext(vsetIterator *it, void **entryptr); + * bool vsetNext(vsetIterator *it, void **entryptr); * void vsetStop(vsetIterator *it); * *----------------------------------------------------------------------------- @@ -304,7 +306,6 @@ bool vsetIsEmpty(vset *set); void vsetStart(vset *set, vsetIterator *it); bool vsetNext(vsetIterator *it, void **entryptr); void vsetStop(vsetIterator *it); -void freeVolatileSet(vset *b); void vsetInit(vset *set); void vsetClear(vset *set); From 9c260e37bbefbc00dd6cb5bdc5649ffd0557568a Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 30 Jun 2025 15:12:25 +0300 Subject: [PATCH 036/119] change sv to pv in multiple places Signed-off-by: Ran Shidlansik --- src/vset.c | 188 ++++++++++++++++++++++++++--------------------------- src/vset.h | 14 ++-- 2 files changed, 102 insertions(+), 100 deletions(-) diff --git a/src/vset.c b/src/vset.c index aad002827e..44e8a2ac3a 100644 --- a/src/vset.c +++ b/src/vset.c @@ -44,7 +44,7 @@ static inline uint32_t pvLen(pVector *vec) { /* Ensures that a pVector has enough capacity to hold additional elements. * - * This function guarantees that the given pVector `sv` has at least enough + * This function guarantees that the given pVector `pv` has at least enough * allocated space to accommodate `additional` more elements, growing it if necessary. * If the vector is currently `NULL`, it will be newly allocated. * @@ -53,7 +53,7 @@ static inline uint32_t pvLen(pVector *vec) { * reflect the actual allocated size. * * Arguments: - * sv - Pointer to an existing pVector or NULL. + * pv - Pointer to an existing pVector or NULL. * additional - The number of additional elements the vector should be able to accommodate. * * Return: @@ -63,19 +63,19 @@ static inline uint32_t pvLen(pVector *vec) { * Note: * The `additional` is the number of *additional* elements beyond the current length. * This function does not modify the vector's logical length (`len`), only its allocation. */ -pVector *pvMakeRoomFor(pVector *sv, size_t additional) { - if (additional == 0) return sv; - size_t required = PV_HEADER_SIZE + (PV_LEN(sv) + additional) * PV_ELEM_SIZE; - if (PV_ALLOC(sv) >= required) return sv; - - if (!sv) { - sv = zmalloc(required); - sv->len = 0; +pVector *pvMakeRoomFor(pVector *pv, size_t additional) { + if (additional == 0) return pv; + size_t required = PV_HEADER_SIZE + (PV_LEN(pv) + additional) * PV_ELEM_SIZE; + if (PV_ALLOC(pv) >= required) return pv; + + if (!pv) { + pv = zmalloc(required); + pv->len = 0; } else { - sv = zrealloc_usable(sv, required, &required); + pv = zrealloc_usable(pv, required, &required); } - sv->alloc = required; - return sv; + pv->alloc = required; + return pv; } /* Shrinks a pVector to release unused allocated memory. @@ -90,7 +90,7 @@ pVector *pvMakeRoomFor(pVector *sv, size_t additional) { * size (`alloc`) to reflect the new length. * * Arguments: - * sv - A pointer to the `pVector` to shrink. + * pv - A pointer to the `pVector` to shrink. * * Return: * A potentially reallocated `pVector` with minimized memory usage, @@ -104,28 +104,28 @@ pVector *pvMakeRoomFor(pVector *sv, size_t additional) { * pVector *vec = pvNew(); * // After some insertions and deletions * vec = pvShrinkToFit(vec); */ -pVector *pvShrinkToFit(pVector *sv) { - if (!sv) return NULL; +pVector *pvShrinkToFit(pVector *pv) { + if (!pv) return NULL; - size_t used = PV_ALLOC(sv); - size_t required = pvLen(sv) == 0 ? 0 : PV_HEADER_SIZE + pvLen(sv) * PV_ELEM_SIZE; + size_t used = PV_ALLOC(pv); + size_t required = pvLen(pv) == 0 ? 0 : PV_HEADER_SIZE + pvLen(pv) * PV_ELEM_SIZE; if (used > required) { if (!required) { - zfree(sv); + zfree(pv); return NULL; } - sv = zrealloc_usable(sv, used, &required); - sv->alloc = required; + pv = zrealloc_usable(pv, used, &required); + pv->alloc = required; } - return sv; + return pv; } /** * pvSplit - Splits a pVector into two parts at a given index. * * Arguments: - * sv_ptr: A pointer to the pVector* to split. This pointer is + * pv_ptr: A pointer to the pVector* to split. This pointer is * updated in-place to point to the left portion (elements [0..split_index-1]). * split_index: The index at which to split the vector. The resulting right * vector will contain elements [split_index..len-1]. @@ -151,34 +151,34 @@ pVector *pvShrinkToFit(pVector *sv) { * • The `split_index` is such that the right part would have 0 elements. * * Side effects: - * - The original vector pointer (`*sv_ptr`) is modified to point to the + * - The original vector pointer (`*pv_ptr`) is modified to point to the * resized left portion. * * Example: * -------- - * Suppose `sv_ptr` points to a vector of 5 elements: + * Suppose `pv_ptr` points to a vector of 5 elements: * [A, B, C, D, E] * * Calling: - * pVector *right = pvSplit(&sv_ptr, 3); + * pVector *right = pvSplit(&pv_ptr, 3); * * Results in: - * sv_ptr -> [A, B, C] + * pv_ptr -> [A, B, C] * right -> [D, E] * * If the split_index is 5 (i.e. the end), the function returns NULL and the * original vector is unchanged. */ -pVector *pvSplit(pVector **sv_ptr, uint32_t split_index) { - pVector *sv = *sv_ptr; +pVector *pvSplit(pVector **pv_ptr, uint32_t split_index) { + pVector *pv = *pv_ptr; // Handle edge cases: null or empty - if (!sv || sv->len <= 1) return NULL; + if (!pv || pv->len <= 1) return NULL; // If no valid split found, return NULL (entire vector is one block) - if (split_index == sv->len) return NULL; + if (split_index == pv->len) return NULL; // Number of elements for the right half - uint64_t right_len = sv->len - split_index; + uint64_t right_len = pv->len - split_index; if (right_len == 0) return NULL; // Allocate new vector for right part @@ -190,11 +190,11 @@ pVector *pvSplit(pVector **sv_ptr, uint32_t split_index) { right->len = right_len; // Copy the right part - memcpy(&right->data[0], &sv->data[split_index], right_len * item_bytes); + memcpy(&right->data[0], &pv->data[split_index], right_len * item_bytes); // Shrink original vector - sv->len = split_index; - *sv_ptr = pvShrinkToFit(sv); // Optional: shrink in-place to reduce memory + pv->len = split_index; + *pv_ptr = pvShrinkToFit(pv); // Optional: shrink in-place to reduce memory return right; } @@ -226,22 +226,22 @@ pVector *pvNew(uint32_t capacity) { * and inserts the given element at the desired position. * * Arguments: - * sv - The pVector to insert into (can be NULL). + * pv - The pVector to insert into (can be NULL). * elem - The pointer to be inserted. - * pos - The index at which to insert the element (must be ≤ sv->len). + * pos - The index at which to insert the element (must be ≤ pv->len). * * Return: * The updated pVector with the element inserted. */ -pVector *pvInsert(pVector *sv, void *elem, uint32_t pos) { - sv = pvMakeRoomFor(sv, 1); +pVector *pvInsert(pVector *pv, void *elem, uint32_t pos) { + pv = pvMakeRoomFor(pv, 1); - if (pos < sv->len) { - memmove(&sv->data[pos + 1], &sv->data[pos], (sv->len - pos) * sizeof(void *)); + if (pos < pv->len) { + memmove(&pv->data[pos + 1], &pv->data[pos], (pv->len - pos) * sizeof(void *)); } - sv->data[pos] = elem; - sv->len++; - return sv; + pv->data[pos] = elem; + pv->len++; + return pv; } /* Removes the element at the specified index from the pVector. @@ -250,23 +250,23 @@ pVector *pvInsert(pVector *sv, void *elem, uint32_t pos) { * If this is the last element in the vector, the vector is freed and NULL is returned. * * Arguments: - * sv - The pVector to remove from. - * idx - The index of the element to remove (must be < sv->len). + * pv - The pVector to remove from. + * idx - The index of the element to remove (must be < pv->len). * * Return: * The updated pVector after removal. * Returns NULL if the last element was removed and the vector was freed. */ -pVector *pvRemoveAt(pVector *sv, uint32_t idx) { - if (!sv || sv->len == 0) return sv; - assert(idx < sv->len); - if (sv->len == 1) { +pVector *pvRemoveAt(pVector *pv, uint32_t idx) { + if (!pv || pv->len == 0) return pv; + assert(idx < pv->len); + if (pv->len == 1) { /* Last element being removed; delete vector */ - zfree(sv); + zfree(pv); return NULL; - } else if (idx < sv->len - 1UL) - memmove(&sv->data[idx], &sv->data[idx + 1], (sv->len - idx - 1) * PV_ELEM_SIZE); - sv->len--; - return pvShrinkToFit(sv); + } else if (idx < pv->len - 1UL) + memmove(&pv->data[idx], &pv->data[idx + 1], (pv->len - idx - 1) * PV_ELEM_SIZE); + pv->len--; + return pvShrinkToFit(pv); } /* Removes the first matching element from the pVector. @@ -275,18 +275,18 @@ pVector *pvRemoveAt(pVector *sv, uint32_t idx) { * Updates the vector pointer in case a removal was done. * * Arguments: - * sv - A pointer to the location of the pVector to remove from. + * pv - A pointer to the location of the pVector to remove from. * elem - The element pointer to match and remove. * * Return: * true in case a removal was made, false otherwise */ -bool pvRemove(pVector **psv, void *elem) { - pVector *sv = *psv; - if (!sv || sv->len == 0) return false; +bool pvRemove(pVector **ppv, void *elem) { + pVector *pv = *ppv; + if (!pv || pv->len == 0) return false; - for (uint32_t i = 0; i < sv->len; i++) { - if (sv->data[i] == elem) { - *psv = pvRemoveAt(sv, i); + for (uint32_t i = 0; i < pv->len; i++) { + if (pv->data[i] == elem) { + *ppv = pvRemoveAt(pv, i); return true; } } @@ -310,23 +310,23 @@ void *pvGet(pVector *vec, uint32_t idx) { /* Frees the memory used by the pVector. * * Arguments: - * sv - The pVector to free. + * pv - The pVector to free. * * Return: * None. */ -void pvFree(pVector *sv) { - if (sv) zfree(sv); +void pvFree(pVector *pv) { + if (pv) zfree(pv); } -uint32_t pvFind(pVector *sv, void *elem) { - if (!sv || sv->len == 0) return 0; +uint32_t pvFind(pVector *pv, void *elem) { + if (!pv || pv->len == 0) return 0; - for (uint32_t i = 0; i < sv->len; i++) { - if (sv->data[i] == elem) { + for (uint32_t i = 0; i < pv->len; i++) { + if (pv->data[i] == elem) { return i; } } - return sv->len; + return pv->len; } /************************************************************************************************************* * pVector End @@ -441,10 +441,10 @@ static inline size_t encodeNewExpiryBucketKey(unsigned char *key, long long expi * Performs binary search to find the index where the element should be inserted. * Returns the index where the element should be placed to keep the array sorted. * - * sv Pointer to the sorted vector + * pv Pointer to the sorted vector * elem Pointer to the element to insert * cmp Comparison function (like strcmp-style: <0, ==0, >0) - * returns the insertion index (between 0 and sv->len) */ + * returns the insertion index (between 0 and pv->len) */ static inline uint32_t findInsertPosition(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, long long expiry) { pVector *pv = vsetBucketVector(bucket); uint32_t left = 0; @@ -485,11 +485,11 @@ static inline uint32_t findInsertPosition(vsetGetExpiryFunc getExpiry, vsetBucke * bucket_ts[element[i-1]] < bucket_ts[element[i]] * * If no valid split is found (i.e. all elements map to the same bucket timestamp), - * the function returns `sv->len` to indicate that splitting is not possible. + * the function returns `pv->len` to indicate that splitting is not possible. * * Return: - * - A valid split index in the range [1, sv->len], where the split occurs. - * - May return `sv->len` if no valid position is found. + * - A valid split index in the range [1, pv->len], where the split occurs. + * - May return `pv->len` if no valid position is found. * * Example: * -------- @@ -662,10 +662,10 @@ static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpir long long target_bucket_ts = bucket_ts; unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; vsetBucket *new_bucket = NULL; - pVector *sv = vsetBucketVector(bucket); + pVector *pv = vsetBucketVector(bucket); rax *expiry_buckets = vsetBucketRax(parent); - long long max_bucket_ts = get_bucket_ts(getExpiry(sv->data[pvLen(sv) - 1])); - long long min_bucket_ts = get_bucket_ts(getExpiry(sv->data[0])); + long long max_bucket_ts = get_bucket_ts(getExpiry(pv->data[pvLen(pv) - 1])); + long long min_bucket_ts = get_bucket_ts(getExpiry(pv->data[0])); if (max_bucket_ts < bucket_ts) { /* In case the bucket is already spanning over a larger window than needed, just place the bucket in a new place */ @@ -678,7 +678,7 @@ static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpir /* lets split the bucket. we know we can do it. */ uint32_t split_index = findSplitPosition(getExpiry, bucket, &target_bucket_ts); assert(target_bucket_ts < bucket_ts); - assert(split_index != pvLen(sv)); /* no way to split it ??? */ + assert(split_index != pvLen(pv)); /* no way to split it ??? */ pVector *new_bucket_vector = vsetBucketVector(bucket); bucket = vsetBucketFromVector(pvSplit(&new_bucket_vector, split_index)); new_bucket = vsetBucketFromVector(new_bucket_vector); @@ -709,17 +709,17 @@ static inline vsetBucket *insertToBucket_NONE(vsetGetExpiryFunc getExpiry, vsetB static inline vsetBucket *insertToBucket_SINGLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { /* Upgrade to vector */ - pVector *sv = pvNew(2); + pVector *pv = pvNew(2); void *curr_entry = vsetBucketSingle(bucket); long long curr_expiry = getExpiry(curr_entry); if (curr_expiry < expiry) { - sv = pvInsert(sv, curr_entry, 0); - sv = pvInsert(sv, entry, 1); + pv = pvInsert(pv, curr_entry, 0); + pv = pvInsert(pv, entry, 1); } else { - sv = pvInsert(sv, entry, 0); - sv = pvInsert(sv, curr_entry, 1); + pv = pvInsert(pv, entry, 0); + pv = pvInsert(pv, curr_entry, 1); } - bucket = vsetBucketFromVector(sv); + bucket = vsetBucketFromVector(pv); return bucket; } @@ -776,8 +776,8 @@ static inline vsetBucket *insertToBucket_RAX(vsetGetExpiryFunc getExpiry, vsetBu // alternative: raxInsert(expiry_buckets, key, key_len, bucket, NULL); raxSetData(node, bucket); } else if (type == VSET_BUCKET_VECTOR) { - pVector *sv = vsetBucketVector(bucket); - if (pvLen(sv) == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { + pVector *pv = vsetBucketVector(bucket); + if (pvLen(pv) == VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { /* Try to split the bucket. If not possible switch to hashtable encoding. */ if (!splitBucketIfPossible(target, getExpiry, bucket, bucket_ts, node)) { /* Can't split? insrt to the vector anyway, it will just expand to hashtable */ @@ -824,26 +824,26 @@ static inline vsetBucket *removeFromBucket_VECTOR(vsetGetExpiryFunc getExpiry, v vsetBucket *new_bucket = bucket; bool success = false; - pVector *sv = vsetBucketVector(bucket); + pVector *pv = vsetBucketVector(bucket); /* In case we we removed the entry */ - uint32_t vlen = pvLen(sv); + uint32_t vlen = pvLen(pv); if (vlen <= 2) { /* convert to single if needed */ - uint32_t idx = pvFind(sv, entry); + uint32_t idx = pvFind(pv, entry); if (idx == vlen) { success = false; } else { if (vlen == 1) new_bucket = vsetBucketFromNone(); else - new_bucket = vsetBucketFromSingle(pvGet(sv, idx == 0 ? 1 : 0)); + new_bucket = vsetBucketFromSingle(pvGet(pv, idx == 0 ? 1 : 0)); success = true; - pvFree(sv); + pvFree(pv); } } else { - if (pvRemove(&sv, entry)) { + if (pvRemove(&pv, entry)) { success = true; - new_bucket = vsetBucketFromVector(sv); + new_bucket = vsetBucketFromVector(pv); } } if (removed) *removed = success; diff --git a/src/vset.h b/src/vset.h index 7be797725b..f14b869940 100644 --- a/src/vset.h +++ b/src/vset.h @@ -279,12 +279,14 @@ typedef vsetBucket *vset; typedef struct vsetIterator { /* for rax bucket */ raxIterator riter; - /* for hashtable bucket */ - hashtableIterator hiter; - /* for vector bucket */ - uint32_t viter; - /* for single bucket */ - void *vsingle; + union { + /* for hashtable bucket */ + hashtableIterator hiter; + /* for vector bucket */ + uint32_t viter; + /* for single bucket */ + void *vsingle; + }; /* the parent of the bucket we are currently iterating on */ vsetBucket *parent_bucket; /* the bucket we are currently iterating on */ From c4dd8776dd7baad86626d0d97b9f347ab591a03e Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 30 Jun 2025 19:04:05 +0300 Subject: [PATCH 037/119] Use unsorted vector for most caeses. Signed-off-by: Ran Shidlansik --- src/vset.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 148 insertions(+), 10 deletions(-) diff --git a/src/vset.c b/src/vset.c index 44e8a2ac3a..f5ea09ae1e 100644 --- a/src/vset.c +++ b/src/vset.c @@ -318,6 +318,76 @@ void pvFree(pVector *pv) { if (pv) zfree(pv); } +/* Appends an element to the end of the given pVector. + * + * Parameters: + * pv - The vector to append to. + * elem - The element to append. + * + * Returns: + * A (possibly reallocated) pVector with the new element inserted at the end. + * + * Notes: + * Internally this uses pvInsert() with the current length of the vector, + * effectively appending the element. */ +pVector *pvPush(pVector *pv, void *elem) { + return pvInsert(pv, elem, PV_LEN(pv)); +} + +/* Removes and optionally returns the last element from the given pVector. + * + * Parameters: + * pv - The vector to remove the element from. + * pelem - Optional pointer to store the popped element. Can be NULL. + * + * Returns: + * A (possibly reallocated) pVector with the last element removed. + * + * Notes: + * If the vector is empty, the behavior is to remove from index 0 (safe fallback). + * You can pass NULL for `pelem` if you don't need the removed value. */ +pVector *pvPop(pVector *pv, void **pelem) { + uint32_t last_idx = PV_LEN(pv) > 0 ? PV_LEN(pv) - 1 : 0; + if (pelem) *pelem = pvGet(pv, last_idx); + return pvRemoveAt(pv, last_idx); +} + +/* Swaps two elements at given indices inside the pVector. + * + * Parameters: + * pv - The vector containing the elements to swap. + * idx1 - Index of the first element. + * idx2 - Index of the second element. + * + * Returns: + * None. + * + * Preconditions: + * - idx1 and idx2 must both be valid indices within the vector. + * + * Notes: + * This is a simple in-place swap that uses direct pointer assignment. */ +void pvSwap(pVector *pv, uint32_t idx1, uint32_t idx2) { + assert(idx1 >= 0 && idx1 < PV_LEN(pv)); + assert(idx2 >= 0 && idx2 < PV_LEN(pv)); + void *temp = pvGet(pv, idx1); + pv->data[idx1] = pv->data[idx2]; + pv->data[idx2] = temp; +} + +/* Finds the index of the given element in the pVector. + * + * Parameters: + * pv - The vector to search. + * elem - The element to look for (pointer equality). + * + * Returns: + * The index of the element if found; otherwise, returns pv->len (i.e., not found). + * + * Notes: + * - This compares elements using raw pointer equality (`==`). + * - If pv is NULL or empty, returns 0 as a safe fallback. + * - Return value being equal to pv->len can be used to check for absence. */ uint32_t pvFind(pVector *pv, void *elem) { if (!pv || pv->len == 0) return 0; @@ -328,6 +398,36 @@ uint32_t pvFind(pVector *pv, void *elem) { } return pv->len; } + +/* Sort the elements of a pVector using a user-provided comparison function. + * + * This function performs an in-place sort of the elements in the given pVector. + * It uses the standard C library `qsort()` function under the hood and assumes + * the elements are pointers. The caller must supply a comparison function + * compatible with `qsort()`, which determines the ordering of the elements. + * + * Parameters: + * pv - A pointer to the pVector to sort. + * compare - A function pointer used to compare two elements. This function must + * match the signature: int compare(const void *a, const void *b) + * and return: + * < 0 if *a < *b + * > 0 if *a > *b + * 0 if *a == *b + * + * Returns: + * None. The pVector is sorted in place. + * + * Example: + * int cmp(const void *a, const void *b) { + * return strcmp(*(const char **)a, *(const char **)b); + * } + * + * pvSort(my_vector, cmp); */ +void pvSort(pVector *pv, int (*compare)(const void *a, const void *b)) { + qsort(pv->data, pv->len, sizeof(void *), compare); +} + /************************************************************************************************************* * pVector End *************************************************************************************************************/ @@ -407,6 +507,31 @@ static inline vsetBucket *vsetBucketFromRax(rax *r) { #define EXPIRE_COMPARE(exp1, exp2) (exp1 < exp2 ? -1 : exp1 == exp2 ? 0 \ : 1) +/* Since we do not have native posix support for qsort_r, we use this variable to help the vset + * compare function operate entry comparison given a dynamic getExpiry function is passed to + * different vset functions. */ +static __thread vsetGetExpiryFunc current_getter_func; + +static inline void vsetSetExpiryGetter(vsetGetExpiryFunc f) { + assert(current_getter_func == NULL); + current_getter_func = f; +} + +static inline void vsetUnsetExpiryGetter(void) { + current_getter_func = NULL; +} + +static inline vsetGetExpiryFunc vsetGetExpiryGetter(void) { + return current_getter_func; +} + +static int vsetCompareEntries(const void *a, const void *b) { + vsetGetExpiryFunc getExpiry = vsetGetExpiryGetter(); + long long ea = getExpiry(*(void **)a); + long long eb = getExpiry(*(void **)b); + return (ea > eb) - (ea < eb); +} + static inline long long get_bucket_ts(long long expiry) { return (expiry & ~(VOLATILESET_BUCKET_INTERVAL_MIN - 1LL)) + VOLATILESET_BUCKET_INTERVAL_MIN; } @@ -664,6 +789,12 @@ static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpir vsetBucket *new_bucket = NULL; pVector *pv = vsetBucketVector(bucket); rax *expiry_buckets = vsetBucketRax(parent); + /* first lets sort the vector. we cannot take a decision without it. + * We set the global expiry getter so we can sort according to the provided getExpiry function. */ + vsetSetExpiryGetter(getExpiry); + pvSort(pv, vsetCompareEntries); + vsetUnsetExpiryGetter(); + long long max_bucket_ts = get_bucket_ts(getExpiry(pv->data[pvLen(pv) - 1])); long long min_bucket_ts = get_bucket_ts(getExpiry(pv->data[0])); @@ -713,17 +844,19 @@ static inline vsetBucket *insertToBucket_SINGLE(vsetGetExpiryFunc getExpiry, vse void *curr_entry = vsetBucketSingle(bucket); long long curr_expiry = getExpiry(curr_entry); if (curr_expiry < expiry) { - pv = pvInsert(pv, curr_entry, 0); - pv = pvInsert(pv, entry, 1); + pv = pvPush(pv, curr_entry); + pv = pvPush(pv, entry); } else { - pv = pvInsert(pv, entry, 0); - pv = pvInsert(pv, curr_entry, 1); + pv = pvPush(pv, entry); + pv = pvPush(pv, curr_entry); } bucket = vsetBucketFromVector(pv); return bucket; } -static inline vsetBucket *insertToBucket_VECTOR(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { +static inline vsetBucket *insertToBucket_VECTOR(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, int pos) { + UNUSED(getExpiry); + UNUSED(expiry); pVector *pv = vsetBucketVector(bucket); /* limit of the number of elements in a vector. */ if (pvLen(pv) >= VOLATILESET_VECTOR_BUCKET_MAX_SIZE) { @@ -738,8 +871,12 @@ static inline vsetBucket *insertToBucket_VECTOR(vsetGetExpiryFunc getExpiry, vse return vsetBucketFromHashtable(ht); } else { - uint32_t pos = findInsertPosition(getExpiry, bucket, expiry); - return vsetBucketFromVector(pvInsert(pv, entry, pos)); + if (pos >= 0) + /* In case we are explicitly provided a position to insert place the entry there */ + return vsetBucketFromVector(pvInsert(pv, entry, pos)); + else + /* Otherwise it is better to just push the entry to the vector with less change of memmove and reallocation. */ + return vsetBucketFromVector(pvPush(pv, entry)); } return NULL; } @@ -781,7 +918,7 @@ static inline vsetBucket *insertToBucket_RAX(vsetGetExpiryFunc getExpiry, vsetBu /* Try to split the bucket. If not possible switch to hashtable encoding. */ if (!splitBucketIfPossible(target, getExpiry, bucket, bucket_ts, node)) { /* Can't split? insrt to the vector anyway, it will just expand to hashtable */ - bucket = insertToBucket_VECTOR(getExpiry, bucket, entry, expiry); + bucket = insertToBucket_VECTOR(getExpiry, bucket, entry, expiry, -1); assert(vsetBucketType(bucket) == VSET_BUCKET_HT); /* In order to avoid rax override, we directly change the node data */ // alternative raxInsert(expiry_buckets, key, key_len, bucket, NULL); @@ -791,7 +928,7 @@ static inline vsetBucket *insertToBucket_RAX(vsetGetExpiryFunc getExpiry, vsetBu return insertToBucket_RAX(getExpiry, target, entry, expiry); } } else { - vsetBucket *new_bucket = insertToBucket_VECTOR(getExpiry, bucket, entry, expiry); + vsetBucket *new_bucket = insertToBucket_VECTOR(getExpiry, bucket, entry, expiry, -1); if (new_bucket != bucket) /* In order to avoid rax override, we directly change the node data */ // alternative: raxInsert(expiry_buckets, key, key_len, new_bucket, NULL); @@ -1104,7 +1241,8 @@ bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { expiry_buckets = insertToBucket_RAX(getExpiry, expiry_buckets, entry, expiry); } } else { - expiry_buckets = insertToBucket_VECTOR(getExpiry, expiry_buckets, entry, expiry); + uint32_t pos = findInsertPosition(getExpiry, expiry_buckets, expiry); + expiry_buckets = insertToBucket_VECTOR(getExpiry, expiry_buckets, entry, expiry, pos); } break; } From b01ec8d9b232e9f0310fdea059524a0ef8c66ea2 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 30 Jun 2025 19:09:30 +0300 Subject: [PATCH 038/119] fix compilation warning Signed-off-by: Ran Shidlansik --- src/vset.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vset.c b/src/vset.c index f5ea09ae1e..8441b99d77 100644 --- a/src/vset.c +++ b/src/vset.c @@ -368,8 +368,7 @@ pVector *pvPop(pVector *pv, void **pelem) { * Notes: * This is a simple in-place swap that uses direct pointer assignment. */ void pvSwap(pVector *pv, uint32_t idx1, uint32_t idx2) { - assert(idx1 >= 0 && idx1 < PV_LEN(pv)); - assert(idx2 >= 0 && idx2 < PV_LEN(pv)); + assert(idx1 < PV_LEN(pv) && idx2 < PV_LEN(pv)); void *temp = pvGet(pv, idx1); pv->data[idx1] = pv->data[idx2]; pv->data[idx2] = temp; From b11b1436c3cec29e38d8f6a631b9bf7e89e8abc6 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 10:01:48 +0300 Subject: [PATCH 039/119] Introduce a multi pop method for expiration entries Signed-off-by: Ran Shidlansik --- src/unit/test_vset.c | 19 +-- src/vset.c | 336 ++++++++++++++++++++++++++++++++----------- src/vset.h | 4 +- 3 files changed, 260 insertions(+), 99 deletions(-) diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c index 88aeb6212e..5f91600f3c 100644 --- a/src/unit/test_vset.c +++ b/src/unit/test_vset.c @@ -225,10 +225,10 @@ long long mock_entry_get_expiry(const void *entry) { return mockGetExpiry(entry); } -int mock_entry_expire(void *db, void *o, void *entry) { - UNUSED(db); - UNUSED(o); +int mock_entry_expire(void *entry, void *ctx) { mock_entry *e = (mock_entry *)entry; + long long now = *(long long *)ctx; + TEST_ASSERT(mock_entry_get_expiry(entry) <= now); for (int i = 0; i < mock_entry_count; i++) { if (mock_entries[i] == e) { // printf("expire entry %p with expiry %llu\n", e, mockGetExpiry(e)); @@ -283,16 +283,11 @@ int remove_mock_entry(vset *set) { return 0; } + int expire_mock_entries(vset *set, mstime_t now) { - void *entry; - do { - entry = vsetPopExpired(set, mockGetExpiry, now); - if (entry) { - // printf("pop expire entry %p with expiry %llu now: %llu\n", entry, mockGetExpiry(entry), now); - TEST_ASSERT(mockGetExpiry(entry) <= now); - mock_entry_expire(NULL, NULL, entry); - } - } while (entry); + // printf("Before expired entries entries: %d\n", mock_entry_count); + vsetPopExpired(set, mockGetExpiry, mock_entry_expire, now, mock_entry_count, &now); + // printf("After expired %zu entries left entries: %d and set is empty: %s\n", count, mock_entry_count, vsetIsEmpty(set) ? "true" : "false"); return 0; } diff --git a/src/vset.c b/src/vset.c index 8441b99d77..dd7c896a81 100644 --- a/src/vset.c +++ b/src/vset.c @@ -531,6 +531,12 @@ static int vsetCompareEntries(const void *a, const void *b) { return (ea > eb) - (ea < eb); } +/* used for popping form rax bucket where we KNOW all entries are expired. */ +static long long vsetGetExpiryZero(const void *entry) { + UNUSED(entry); + return 0; +} + static inline long long get_bucket_ts(long long expiry) { return (expiry & ~(VOLATILESET_BUCKET_INTERVAL_MIN - 1LL)) + VOLATILESET_BUCKET_INTERVAL_MIN; } @@ -954,7 +960,7 @@ static inline vsetBucket *removeFromBucket_SINGLE(vsetGetExpiryFunc getExpiry, v } } -static inline vsetBucket *removeFromBucket_VECTOR(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed) { +static inline vsetBucket *removeFromBucket_VECTOR(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry, bool *removed, bool pop) { UNUSED(getExpiry); UNUSED(expiry); @@ -977,7 +983,18 @@ static inline vsetBucket *removeFromBucket_VECTOR(vsetGetExpiryFunc getExpiry, v pvFree(pv); } } else { - if (pvRemove(&pv, entry)) { + /* pop is a more efficient way to remove an element from the vector. However it may + * change the order of the elements in the vector, so we should ask the user to indicate if to use pop or not. */ + if (pop) { + uint32_t idx = pvFind(pv, entry); + if (idx < vlen) { + void *poped_entry = NULL; + pvSwap(pv, idx, pvLen(pv) - 1); + success = true; + new_bucket = vsetBucketFromVector(pvPop(pv, &poped_entry)); + assert(poped_entry == entry); + } + } else if (pvRemove(&pv, entry)) { success = true; new_bucket = vsetBucketFromVector(pv); } @@ -1009,7 +1026,7 @@ static inline vsetBucket *removeFromBucket_HASHTABLE(vsetGetExpiryFunc getExpiry if (removed) *removed = success; return new_bucket; } -static bool removeEntryBucketFromRaxBucket(vsetBucket *rax_bucket, vsetGetExpiryFunc getExpiry, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { +static bool removeEntryFromRaxBucket(vsetBucket *rax_bucket, vsetGetExpiryFunc getExpiry, void *entry, vsetBucket *bucket, unsigned char *key, size_t key_len, vsetBucket **pbucket, raxNode *node) { bool removed = false; switch (vsetBucketType(bucket)) { case VSET_BUCKET_SINGLE: @@ -1020,7 +1037,7 @@ static bool removeEntryBucketFromRaxBucket(vsetBucket *rax_bucket, vsetGetExpiry } break; case VSET_BUCKET_VECTOR: { - vsetBucket *new_bucket = removeFromBucket_VECTOR(getExpiry, bucket, entry, 0, &removed); + vsetBucket *new_bucket = removeFromBucket_VECTOR(getExpiry, bucket, entry, 0, &removed, true); if (new_bucket != bucket) { if (!new_bucket) { raxRemove(vsetBucketRax(rax_bucket), key, key_len, NULL); @@ -1045,48 +1062,182 @@ static bool removeEntryBucketFromRaxBucket(vsetBucket *rax_bucket, vsetGetExpiry break; } default: - panic("Unknown bucket type for removeEntryBucketFromRaxBucket"); + panic("Unknown bucket type for removeEntryFromRaxBucket"); return false; } return removed; } -static inline vsetBucket *removeFromBucket_RAX(vsetGetExpiryFunc getExpiry, vsetBucket *target, void *entry, long long expiry, bool *removed) { - unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; - long long bucket_ts; - size_t key_len; - raxNode *node; - rax *expiry_buckets = vsetBucketRax(target); - vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); - assert(bucket); - bool success = removeEntryBucketFromRaxBucket(target, getExpiry, entry, bucket, key, key_len, NULL, node); - if (removed) *removed = success; - // shrink to single bucket if possible +static inline bool shrinkRaxBucketIfPossible(vsetBucket **target, vsetGetExpiryFunc getExpiry) { + rax *expiry_buckets = vsetBucketRax(*target); if (raxSize(expiry_buckets) == 1) { raxIterator it; raxStart(&it, expiry_buckets); assert(raxSeek(&it, "^", NULL, 0)); assert(raxNext(&it)); - bucket = it.data; + vsetBucket *bucket = it.data; int bucket_type = vsetBucketType(bucket); raxStop(&it); /* We will not convert hashtable to our only bucket since we will lose the ability to scan the items in a sorted way. * We will also not shrink when we have a full vector, since it might immediately be repopulated. */ if (bucket_type == VSET_BUCKET_SINGLE || (bucket_type == VSET_BUCKET_VECTOR && pvLen(vsetBucketVector(bucket)) < VOLATILESET_VECTOR_BUCKET_MAX_SIZE)) { + if (bucket_type == VSET_BUCKET_VECTOR) { + pVector *pv = vsetBucketVector(bucket); + /* first lets sort the vector. we cannot set the target bucket as unsorted vector bucket */ + vsetSetExpiryGetter(getExpiry); + pvSort(pv, vsetCompareEntries); + vsetUnsetExpiryGetter(); + } /* lets make our bucket to be the only left bucket */ - target = bucket; + *target = bucket; raxFree(expiry_buckets); + return true; } } + return false; +} + +static inline vsetBucket *removeFromBucket_RAX(vsetGetExpiryFunc getExpiry, vsetBucket *target, void *entry, long long expiry, bool *removed) { + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + long long bucket_ts; + size_t key_len; + raxNode *node; + rax *expiry_buckets = vsetBucketRax(target); + vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); + assert(bucket); + bool success = removeEntryFromRaxBucket(target, getExpiry, entry, bucket, key, key_len, NULL, node); + if (removed) *removed = success; + // shrink to single bucket if possible + shrinkRaxBucketIfPossible(&target, getExpiry); return target; } +static inline size_t vsetBucketMultiPopExpired_NONE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + UNUSED(bucket); + UNUSED(getExpiry); + UNUSED(expiryFunc); + UNUSED(now); + UNUSED(max_count); + UNUSED(ctx); + return 0; +} + +static inline size_t vsetBucketMultiPopExpired_SINGLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + void *entry = vsetBucketSingle(*bucket); + if (max_count && getExpiry(entry) <= now && expiryFunc(entry, ctx)) { + freeVsetBucket(*bucket); + *bucket = vsetBucketFromNone(); + return 1; + } + return 0; +} + +static inline size_t vsetBucketMultiPopExpired_VECTOR(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + pVector *pv = vsetBucketVector(*bucket); + uint32_t len = min(pvLen(pv), max_count); + uint32_t i = 0; + for (; i < len; i++) { + void *entry = pvGet(pv, i); + /* break as soon as the expiryFunc stops us OR we reached an entry which is not expired */ + if (getExpiry(entry) > now || !(expiryFunc(entry, ctx))) + break; + } + pVector *new_pv = pvSplit(&pv, i); + *bucket = (new_pv ? vsetBucketFromVector(new_pv) : vsetBucketFromNone()); + pvFree(pv); + return i; +} + +static inline size_t vsetBucketMultiPopExpired_HASHTABLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + UNUSED(getExpiry); + UNUSED(now); + hashtable *ht = vsetBucketHashtable(*bucket); + hashtableIterator it; + void *entry = NULL; + size_t expired = 0; + hashtableInitIterator(&it, ht, HASHTABLE_ITER_SAFE); + while (hashtableNext(&it, &entry)) { + if (expired < max_count && expiryFunc(entry, ctx)) { + hashtableDelete(ht, entry); + expired++; + entry = NULL; + } else + break; + } + hashtableResetIterator(&it); + + /* in case we completed scanning the hashtable or a single element is left, we can convert the hashtable. */ + size_t ht_size = hashtableSize(ht); + if (ht_size == 0) { + hashtableRelease(ht); + *bucket = vsetBucketFromNone(); + } else if (ht_size == 1) { + assert(entry); + *bucket = vsetBucketFromSingle(entry); + } + return expired; +} + +static inline size_t vsetBucketMultiPopExpired_RAX(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + UNUSED(getExpiry); + rax *buckets = vsetBucketRax(*bucket); + size_t count = 0; + while (count < max_count && raxSize(buckets) > 0) { + raxIterator it; + raxStart(&it, buckets); + raxSeek(&it, "^", NULL, 0); + assert(raxNext(&it)); + /* lets start again by going into the first bucket. */ + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + vsetBucket *time_bucket = it.data; + int time_bucket_type = vsetBucketType(time_bucket); + long long time_bucket_ts = decodeExpiryKey(it.key); + memcpy(key, it.key, it.key_len); + size_t key_len = it.key_len; + raxNode *node = it.node; + raxStop(&it); + if (time_bucket_ts > now) + break; + switch (time_bucket_type) { + case VSET_BUCKET_SINGLE: + count += vsetBucketMultiPopExpired_SINGLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + break; + case VSET_BUCKET_VECTOR: + count += vsetBucketMultiPopExpired_VECTOR(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + break; + case VSET_BUCKET_HT: + count += vsetBucketMultiPopExpired_HASHTABLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + break; + default: + panic("Cannot expire entries from bucket which is not single, vector or hashtable"); + } + if (!time_bucket) { + /* in case the bucket is freed, we can just remove it and continue to the next bucket. */ + raxRemove(buckets, key, key_len, NULL); + } else { + /* in case the bucket still exists, it must be since we reached the max_count. + * So we save the new bucket to the rax and bail. */ + assert(max_count == count); + raxSetData(node, time_bucket); + break; + } + } + /* if all buckets are removed, */ + if (raxSize(buckets) == 0) { + raxFree(buckets); + *bucket = vsetBucketFromNone(); + } + shrinkRaxBucketIfPossible(bucket, getExpiry); + return count; +} + static int vsetBucketNext_NONE(vsetIterator *it, void **entryptr) { UNUSED(it); UNUSED(entryptr); return 0; } + static inline int vsetBucketNext_SINGLE(vsetIterator *it, void **entryptr) { bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); if (init_bucket_scan) { @@ -1097,6 +1248,7 @@ static inline int vsetBucketNext_SINGLE(vsetIterator *it, void **entryptr) { } return 0; } + static inline int vsetBucketNext_VECTOR(vsetIterator *it, void **entryptr) { bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); pVector *pv = vsetBucketVector(it->bucket); @@ -1268,7 +1420,7 @@ static inline bool vsetRemoveEntryWithExpiry(vset *set, vsetGetExpiryFunc getExp bucket = removeFromBucket_SINGLE(getExpiry, bucket, entry, expiry, &removed); break; case VSET_BUCKET_VECTOR: - bucket = removeFromBucket_VECTOR(getExpiry, bucket, entry, expiry, &removed); + bucket = removeFromBucket_VECTOR(getExpiry, bucket, entry, expiry, &removed, false); break; case VSET_BUCKET_HT: bucket = removeFromBucket_HASHTABLE(getExpiry, bucket, entry, expiry, &removed); @@ -1395,93 +1547,107 @@ bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, vo return true; } -static void *vsetGetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now, bool delete) { +/* vsetPopExpired - Remove expired entries from a volatile set up to a maximum count. + * + * Parameters: + * set: Pointer to the volatile set (vset *) to operate on. + * getExpiry: Function to retrieve the expiration time from an entry. + * expiryFunc: Function to call on each expired entry (e.g., to free or notify). + * now: Current time in milliseconds used to compare against expiry times. + * max_count: Maximum number of expired entries to remove. + * ctx: Opaque context pointer passed through to the expiryFunc callback. + * + * This function delegates expiration popping to a type-specific handler based on the + * internal bucket type of the set. It supports various bucket encodings: + * - NONE + * - SINGLE + * - VECTOR + * - RAX (radix tree) + * - HT (hashtable) + * + * Returns the number of expired entries successfully removed (and passed to expiryFunc). + * + * Panics if the bucket type is unknown or unsupported. + * + * Return: + * Number of expired entries removed (size_t). */ +size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { + vsetBucket *bucket = *set; + int bucket_type = vsetBucketType(bucket); + switch (bucket_type) { + case VSET_BUCKET_NONE: + return vsetBucketMultiPopExpired_NONE(set, getExpiry, expiryFunc, now, max_count, ctx); + break; + case VSET_BUCKET_RAX: + return vsetBucketMultiPopExpired_RAX(set, getExpiry, expiryFunc, now, max_count, ctx); + break; + case VSET_BUCKET_SINGLE: + return vsetBucketMultiPopExpired_SINGLE(set, getExpiry, expiryFunc, now, max_count, ctx); + break; + case VSET_BUCKET_VECTOR: + return vsetBucketMultiPopExpired_VECTOR(set, getExpiry, expiryFunc, now, max_count, ctx); + break; + case VSET_BUCKET_HT: + return vsetBucketMultiPopExpired_HASHTABLE(set, getExpiry, expiryFunc, now, max_count, ctx); + break; + default: + panic("Unknown volatile set bucket type in vsetPopExpired"); + } + return 0; +} + +/* vsetEstimatedEarliestExpiry - Estimate the earliest expiration time in a volatile set. + * + * Parameters: + * set: Pointer to the volatile set (vset *) to inspect. + * getExpiry: Callback function used to extract the expiration time from a set entry. + * + * Returns the earliest expiration time based on the structure of the volatile set. + * This is an *approximate* value: + * - For bucketed types (e.g., radix tree, vector), it returns the expiry of the first bucket or entry, + * which may not be the actual earliest expiring item. + * - For single-entry sets, it returns the expiry of the sole item. + * - For VSET_BUCKET_NONE, it returns -1 to indicate there is no data. + * + * Supported bucket types: + * - VSET_BUCKET_SINGLE + * - VSET_BUCKET_VECTOR + * - VSET_BUCKET_RAX + * + * Panics if called with an unsupported bucket type. + * + * Return: + * Estimated earliest expiry time in milliseconds, or -1 if the set is empty. */ +long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry) { int set_type = vsetBucketType(*set); void *entry = NULL; long long expiry; switch (set_type) { case VSET_BUCKET_NONE: - return NULL; + return -1; break; case VSET_BUCKET_RAX: { - vsetIterator iter; - vsetStart(set, &iter); - assert(vsetBucketNext_RAX(&iter, &entry)); - long long bucket_ts = iter.bucket_ts; - vsetStop(&iter); - if (bucket_ts > now) - return NULL; - expiry = getExpiry(entry); - assert(expiry <= now); + rax *r = vsetBucketRax(set); + raxIterator it; + raxStart(&it, r); + expiry = decodeExpiryKey(it.key); + raxStop(&it); break; } case VSET_BUCKET_SINGLE: { entry = vsetBucketSingle(*set); expiry = getExpiry(entry); - if (expiry > now) - return NULL; break; } case VSET_BUCKET_VECTOR: { entry = pvGet(vsetBucketVector(*set), 0); expiry = getExpiry(entry); - if (expiry > now) - return NULL; - break; - } - case VSET_BUCKET_HT: { - hashtableIterator iter; - hashtableInitIterator(&iter, vsetBucketHashtable(*set), 0); - assert(hashtableNext(&iter, &entry)); - hashtableResetIterator(&iter); - expiry = getExpiry(entry); - if (expiry > now) - return NULL; break; } default: - panic("Unknown volatile set bucket type in vsetNext"); + panic("Unsupported vset encoding type. Only supported types are single, vector or rax"); } - if (delete) - assert(vsetRemoveEntry(set, getExpiry, entry)); - return entry; -} - -/* Retrieves and removes the first expired entry from the volatile set. - * - * This is a public-facing convenience wrapper around vsetGetFirstExpired() - * with delete=true. It performs a "pop" operation, returning the first - * expired entry (if any) and removing it from the underlying structure. - * - * Parameters: - * - set: Pointer to the volatile set. - * - getExpiry: Function used to extract the expiry timestamp from an entry. - * - now: Current time in milliseconds. Used in order to compare the entries time against - * to decide if they are expired or not. - * - * Returns: - * - The first expired entry, or NULL if no expired entries are present.*/ -void *vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now) { - return vsetGetFirstExpired(set, getExpiry, now, true); -} - -/* Retrieves (but does not remove) the first expired entry from the volatile set. - * - * This function is useful when the caller wants to inspect the next item - * scheduled for expiration without mutating the underlying set. - * - * Internally calls vsetGetFirstExpired() with delete=false. - * - * Parameters: - * - set: Pointer to the volatile set. - * - getExpiry: Function used to extract the expiry timestamp from an entry. - * - now: Current time in milliseconds. Used in order to compare the entries time against - * to decide if they are expired or not. - * - * Returns: - * - The first expired entry, or NULL if no expired entries are present.*/ -void *vsetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now) { - return vsetGetFirstExpired(set, getExpiry, now, false); + return expiry; } /* Advances the volatile set iterator to the next entry. diff --git a/src/vset.h b/src/vset.h index f14b869940..6d11ac9490 100644 --- a/src/vset.h +++ b/src/vset.h @@ -269,6 +269,7 @@ #define VOLATILESET_VECTOR_BUCKET_MAX_SIZE 127 typedef long long (*vsetGetExpiryFunc)(const void *entry); +typedef int (*vsetExpiryFunc)(void *entry, void *ctx); // Generic bucket type typedef void vsetBucket; @@ -301,7 +302,6 @@ typedef struct vsetIterator { bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); -void *vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); void *vsetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); bool vsetIsEmpty(vset *set); @@ -310,6 +310,6 @@ bool vsetNext(vsetIterator *it, void **entryptr); void vsetStop(vsetIterator *it); void vsetInit(vset *set); void vsetClear(vset *set); - +size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx); #endif From 505895308fe1b7704c4ec2c9c55445e2350db147 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 10:37:47 +0300 Subject: [PATCH 040/119] fix format and spelcheck Signed-off-by: Ran Shidlansik --- src/vset.c | 6 +++--- src/vset.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/vset.c b/src/vset.c index dd7c896a81..5d5062eb60 100644 --- a/src/vset.c +++ b/src/vset.c @@ -988,11 +988,11 @@ static inline vsetBucket *removeFromBucket_VECTOR(vsetGetExpiryFunc getExpiry, v if (pop) { uint32_t idx = pvFind(pv, entry); if (idx < vlen) { - void *poped_entry = NULL; + void *popped_entry = NULL; pvSwap(pv, idx, pvLen(pv) - 1); success = true; - new_bucket = vsetBucketFromVector(pvPop(pv, &poped_entry)); - assert(poped_entry == entry); + new_bucket = vsetBucketFromVector(pvPop(pv, &popped_entry)); + assert(popped_entry == entry); } } else if (pvRemove(&pv, entry)) { success = true; diff --git a/src/vset.h b/src/vset.h index 6d11ac9490..3b27788724 100644 --- a/src/vset.h +++ b/src/vset.h @@ -198,7 +198,7 @@ * * SINGLE * | - * v + * v * VECTOR (sorted, up to 127, can split * | into multiple vectors) * | From 0c3fb77e0f63d3f0e9b6a88752c0148e79e9dd80 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 10:40:21 +0300 Subject: [PATCH 041/119] revert changes to object.c (they belong in a separate PR) Signed-off-by: Ran Shidlansik --- src/object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/object.c b/src/object.c index 61f9a09c01..301b963ea4 100644 --- a/src/object.c +++ b/src/object.c @@ -1129,9 +1129,9 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { if (o->type == OBJ_STRING) { if (o->encoding == OBJ_ENCODING_INT) { - asize = zmalloc_size((void *)o); + asize = sizeof(*o); } else if (o->encoding == OBJ_ENCODING_RAW) { - asize = sdsAllocSize(o->ptr) + zmalloc_size((void *)o); + asize = sdsAllocSize(o->ptr) + sizeof(*o); } else if (o->encoding == OBJ_ENCODING_EMBSTR) { asize = zmalloc_size((void *)o); } else { From ceb0a277d8c1ffdc363e5321c0bc7468b2c21696 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 14:41:41 +0300 Subject: [PATCH 042/119] fix HPERSIST bug in accounting different flows new argv Signed-off-by: Ran Shidlansik --- src/t_hash.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 20f2a40482..73b6de33e0 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1247,14 +1247,7 @@ void hgetexCommand(client *c) { return; } - o = lookupKeyRead(c->db, c->argv[1]); - if (checkType(c, o, OBJ_HASH)) - return; - - if (o == NULL) { - o = createHashObject(); - dbAdd(c->db, c->argv[1], &o); - } + if ((o = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp])) == NULL || checkType(c, o, OBJ_HASH)) return; /* Handle parsing and calculating the expiration time. */ if (flags & OBJ_PERSIST) { @@ -1290,13 +1283,17 @@ void hgetexCommand(client *c) { new_argv[new_argc++] = c->argv[1]; incrRefCount(c->argv[1]); - if (set_expiry || set_expired) { + + if (set_expiry) { new_argv[new_argc++] = NULL; // placeholder for the expiration time milliseconds_index = new_argc - 1; + } + + if (set_expiry || persist) { new_argv[new_argc++] = shared.fields; new_argv[new_argc++] = NULL; // placeholder for the number of objects + numitems_index = new_argc - 1; } - numitems_index = new_argc - 1; } for (i = fields_index; i < c->argc; i++) { int changed = 0; @@ -1306,7 +1303,7 @@ void hgetexCommand(client *c) { } else if (set_expiry) { changed = (hashTypeSetExpire(o, c->argv[i]->ptr, when, 0) == 1) ? 1 : 0; } else if (persist) { - changed = hashTypePersist(o, c->argv[i]->ptr); + changed = (hashTypePersist(o, c->argv[i]->ptr) == 1) ? 1 : 0; } if (changed) { changes++; From f8e5bc1f41089d171afc260833d99f9123c3a1b6 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 15:41:51 +0300 Subject: [PATCH 043/119] fix new function names and documentation Signed-off-by: Ran Shidlansik --- src/vset.c | 26 +++++++++++++------------- src/vset.h | 8 ++++---- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/vset.c b/src/vset.c index 5d5062eb60..1056329c22 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1113,7 +1113,7 @@ static inline vsetBucket *removeFromBucket_RAX(vsetGetExpiryFunc getExpiry, vset return target; } -static inline size_t vsetBucketMultiPopExpired_NONE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +static inline size_t vsetBucketPopExpired_NONE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { UNUSED(bucket); UNUSED(getExpiry); UNUSED(expiryFunc); @@ -1123,7 +1123,7 @@ static inline size_t vsetBucketMultiPopExpired_NONE(vsetBucket **bucket, vsetGet return 0; } -static inline size_t vsetBucketMultiPopExpired_SINGLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +static inline size_t vsetBucketPopExpired_SINGLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { void *entry = vsetBucketSingle(*bucket); if (max_count && getExpiry(entry) <= now && expiryFunc(entry, ctx)) { freeVsetBucket(*bucket); @@ -1133,7 +1133,7 @@ static inline size_t vsetBucketMultiPopExpired_SINGLE(vsetBucket **bucket, vsetG return 0; } -static inline size_t vsetBucketMultiPopExpired_VECTOR(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +static inline size_t vsetBucketPopExpired_VECTOR(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { pVector *pv = vsetBucketVector(*bucket); uint32_t len = min(pvLen(pv), max_count); uint32_t i = 0; @@ -1149,7 +1149,7 @@ static inline size_t vsetBucketMultiPopExpired_VECTOR(vsetBucket **bucket, vsetG return i; } -static inline size_t vsetBucketMultiPopExpired_HASHTABLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +static inline size_t vsetBucketPopExpired_HASHTABLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { UNUSED(getExpiry); UNUSED(now); hashtable *ht = vsetBucketHashtable(*bucket); @@ -1179,7 +1179,7 @@ static inline size_t vsetBucketMultiPopExpired_HASHTABLE(vsetBucket **bucket, vs return expired; } -static inline size_t vsetBucketMultiPopExpired_RAX(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +static inline size_t vsetBucketPopExpired_RAX(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { UNUSED(getExpiry); rax *buckets = vsetBucketRax(*bucket); size_t count = 0; @@ -1201,13 +1201,13 @@ static inline size_t vsetBucketMultiPopExpired_RAX(vsetBucket **bucket, vsetGetE break; switch (time_bucket_type) { case VSET_BUCKET_SINGLE: - count += vsetBucketMultiPopExpired_SINGLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + count += vsetBucketPopExpired_SINGLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); break; case VSET_BUCKET_VECTOR: - count += vsetBucketMultiPopExpired_VECTOR(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + count += vsetBucketPopExpired_VECTOR(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); break; case VSET_BUCKET_HT: - count += vsetBucketMultiPopExpired_HASHTABLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + count += vsetBucketPopExpired_HASHTABLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); break; default: panic("Cannot expire entries from bucket which is not single, vector or hashtable"); @@ -1576,19 +1576,19 @@ size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc exp int bucket_type = vsetBucketType(bucket); switch (bucket_type) { case VSET_BUCKET_NONE: - return vsetBucketMultiPopExpired_NONE(set, getExpiry, expiryFunc, now, max_count, ctx); + return vsetBucketPopExpired_NONE(set, getExpiry, expiryFunc, now, max_count, ctx); break; case VSET_BUCKET_RAX: - return vsetBucketMultiPopExpired_RAX(set, getExpiry, expiryFunc, now, max_count, ctx); + return vsetBucketPopExpired_RAX(set, getExpiry, expiryFunc, now, max_count, ctx); break; case VSET_BUCKET_SINGLE: - return vsetBucketMultiPopExpired_SINGLE(set, getExpiry, expiryFunc, now, max_count, ctx); + return vsetBucketPopExpired_SINGLE(set, getExpiry, expiryFunc, now, max_count, ctx); break; case VSET_BUCKET_VECTOR: - return vsetBucketMultiPopExpired_VECTOR(set, getExpiry, expiryFunc, now, max_count, ctx); + return vsetBucketPopExpired_VECTOR(set, getExpiry, expiryFunc, now, max_count, ctx); break; case VSET_BUCKET_HT: - return vsetBucketMultiPopExpired_HASHTABLE(set, getExpiry, expiryFunc, now, max_count, ctx); + return vsetBucketPopExpired_HASHTABLE(set, getExpiry, expiryFunc, now, max_count, ctx); break; default: panic("Unknown volatile set bucket type in vsetPopExpired"); diff --git a/src/vset.h b/src/vset.h index 3b27788724..b43280ccbc 100644 --- a/src/vset.h +++ b/src/vset.h @@ -232,9 +232,9 @@ * void *new_entry, long long old_expiry, * long long new_expiry); * - * Expiry Retrieval: - * void *vsetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); - * void *vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); + * Expiry Retrieval/Removal: + * long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry); + * size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx); * * Utilities: * bool vsetIsEmpty(vset *set); @@ -302,7 +302,6 @@ typedef struct vsetIterator { bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); -void *vsetFirstExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now); bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); bool vsetIsEmpty(vset *set); void vsetStart(vset *set, vsetIterator *it); @@ -310,6 +309,7 @@ bool vsetNext(vsetIterator *it, void **entryptr); void vsetStop(vsetIterator *it); void vsetInit(vset *set); void vsetClear(vset *set); +long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry); size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx); #endif From fa8a4998a76c4fcf3ed5bd55ecb29026f9e3bad5 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 18:42:22 +0300 Subject: [PATCH 044/119] fix memory leak in t_hash. After the changes to vset, we need to check is the set is empty and not check it's pointer Signed-off-by: Ran Shidlansik --- src/t_hash.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 3b2528260c..d883a2a9b1 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -51,14 +51,8 @@ static vset *hashTypeGetVolatileSet(robj *o) { return (vset *)hashtableMetadata(o->ptr); } -void hashTypeFreeVolatileSet(robj *o) { - vset *set = hashTypeGetVolatileSet(o); - if (set) - vsetClear(set); -} - bool hashTypeHasVolatileElements(robj *o) { - return ((o->encoding == OBJ_ENCODING_HASHTABLE) && (hashTypeGetVolatileSet(o) != NULL)); + return ((o->encoding == OBJ_ENCODING_HASHTABLE) && !(vsetIsEmpty(hashTypeGetVolatileSet(o)))); } /* make any access to the hash object elements ignore the specific elements expiration. @@ -84,7 +78,7 @@ static vset *hashTypeGetOrcreateVolatileSet(robj *o) { return vset; } -static void hashTypeDeleteVolatileSet(robj *o) { +void hashTypeFreeVolatileSet(robj *o) { vset *vset = hashtableMetadata(o->ptr); vsetClear(vset); /* serves mainly for optimization. by changing the hashtable type we can avoid extra function call in hashtable access */ @@ -102,7 +96,7 @@ void hashTypeUntrackEntry(robj *o, void *entry) { debugServerAssert(set); serverAssert(vsetRemoveEntry(set, entryGetExpiry, entry)); if (vsetIsEmpty(set)) { - hashTypeDeleteVolatileSet(o); + hashTypeFreeVolatileSet(o); } } @@ -119,7 +113,7 @@ static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, serverAssert(vsetUpdateEntry(set, entryGetExpiry, old_entry, new_entry, old_expiry, new_expiry) == 1); if (vsetIsEmpty(set)) { - hashTypeDeleteVolatileSet(o); + hashTypeFreeVolatileSet(o); } } From af1c360efaa2bf8e316bf305adcee2d2f85553e1 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 19:20:53 +0300 Subject: [PATCH 045/119] format fix Signed-off-by: Ran Shidlansik --- src/vset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vset.h b/src/vset.h index b43280ccbc..c237670859 100644 --- a/src/vset.h +++ b/src/vset.h @@ -204,7 +204,7 @@ * | * v * HASHTABLE (only when a vector can't split) - * + * *----------------------------------------------------------------------------- * Entry Type Contract *----------------------------------------------------------------------------- From 973429093a529cc57a658baf9733f23c45deb42f Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 20:35:21 +0300 Subject: [PATCH 046/119] add vset memory usage Signed-off-by: Ran Shidlansik --- src/object.c | 4 ++- src/vset.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++-- src/vset.h | 2 +- 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/src/object.c b/src/object.c index 0d8c44e790..4d6c349d04 100644 --- a/src/object.c +++ b/src/object.c @@ -28,6 +28,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "hashtable.h" #include "server.h" #include "serverassert.h" #include "functions.h" @@ -1201,10 +1202,11 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { } else if (o->encoding == OBJ_ENCODING_HASHTABLE) { hashtable *ht = o->ptr; hashtableIterator iter; + vset *volatile_fields = hashtableMetadata(ht); hashtableInitIterator(&iter, ht, 0); void *next; - asize = sizeof(*o) + hashtableMemUsage(ht); + asize = sizeof(*o) + hashtableMemUsage(ht) + vsetMemUsage(volatile_fields); while (hashtableNext(&iter, &next) && samples < sample_size) { elesize += entryMemUsage(next); samples++; diff --git a/src/vset.c b/src/vset.c index 1056329c22..7e2a9a1d51 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1216,9 +1216,8 @@ static inline size_t vsetBucketPopExpired_RAX(vsetBucket **bucket, vsetGetExpiry /* in case the bucket is freed, we can just remove it and continue to the next bucket. */ raxRemove(buckets, key, key_len, NULL); } else { - /* in case the bucket still exists, it must be since we reached the max_count. + /* in case the bucket still exists, it must be since we reached the max_count or stopped due to expiry function. * So we save the new bucket to the rax and bail. */ - assert(max_count == count); raxSetData(node, time_bucket); break; } @@ -1305,6 +1304,55 @@ static inline int vsetBucketNext_RAX(vsetIterator *it, void **entryptr) { return 1; } +static inline size_t vsetBucketMemUsage_NONE(vsetBucket *bucket) { + UNUSED(bucket); + return 0; +} + +static inline size_t vsetBucketMemUsage_SINGLE(vsetBucket *bucket) { + UNUSED(bucket); + return 0; +} + +static inline size_t vsetBucketMemUsage_VECTOR(vsetBucket *bucket) { + pVector *pv = vsetBucketVector(bucket); + assert(pv); + return pv->alloc; +} + +static inline size_t vsetBucketMemUsage_HASHTABLE(vsetBucket *bucket) { + hashtable *ht = vsetBucketHashtable(bucket); + return hashtableMemUsage(ht); +} + +static inline size_t vsetBucketMemUsage_RAX(vsetBucket *bucket) { + rax *r = vsetBucketRax(bucket); + size_t total_mem = raxAllocSize(r); + raxIterator it; + raxStart(&it, r); + assert(raxSeek(&it, "^", NULL, 0)); + while (raxNext(&it)) { + switch (vsetBucketType(it.data)) { + case VSET_BUCKET_NONE: + total_mem += vsetBucketMemUsage_NONE(it.data); + break; + case VSET_BUCKET_SINGLE: + total_mem += vsetBucketMemUsage_SINGLE(it.data); + break; + case VSET_BUCKET_VECTOR: + total_mem += vsetBucketMemUsage_VECTOR(it.data); + break; + case VSET_BUCKET_HT: + total_mem += vsetBucketMemUsage_HASHTABLE(it.data); + break; + default: + panic("Unknown bucket type encountered in vsetBucketMemUsage_HASHTABLE"); + } + } + raxStop(&it); + return total_mem; +} + /* Adds an entry to a volatile set (vset) based on its expiration time. * * The volatile set maintains buckets of entries grouped by time windows. Each @@ -1697,6 +1745,25 @@ bool vsetNext(vsetIterator *it, void **entryptr) { return ret == 1; } +size_t vsetMemUsage(vset *set) { + int bucket_type = vsetBucketType(*set); + switch (bucket_type) { + case VSET_BUCKET_NONE: + return vsetBucketMemUsage_NONE(*set); + case VSET_BUCKET_SINGLE: + return vsetBucketMemUsage_SINGLE(*set); + case VSET_BUCKET_VECTOR: + return vsetBucketMemUsage_VECTOR(*set); + case VSET_BUCKET_HT: + panic("Unsupported hashtable bucket type for vset"); + case VSET_BUCKET_RAX: + return vsetBucketMemUsage_RAX(*set); + default: + panic("Unknown set type encountered in vsetMemUsage"); + } + return 0; +} + /* Initializes a volatile set iterator. * * This function prepares the iterator for scanning a volatile set from the beginning. diff --git a/src/vset.h b/src/vset.h index c237670859..540ba7fe22 100644 --- a/src/vset.h +++ b/src/vset.h @@ -311,5 +311,5 @@ void vsetInit(vset *set); void vsetClear(vset *set); long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry); size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx); - +size_t vsetMemUsage(vset *set); #endif From e1c78f2f0e1218f0ece261a7c96a0389cf7ab34c Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 22:26:29 +0300 Subject: [PATCH 047/119] fix some pr comments Signed-off-by: Ran Shidlansik --- src/entry.c | 16 ++++++++-------- src/entry.h | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/entry.c b/src/entry.c index 999f352b36..c8dc2dee5c 100644 --- a/src/entry.c +++ b/src/entry.c @@ -33,7 +33,7 @@ * +--------------+---^---+--------------+ * | * | - * entry pointer = value sds + * value pointer = value sds */ /* SDS aux flag. If set, it indicates that the entry has TTL metadata set. */ @@ -66,7 +66,7 @@ sds entryGetField(const entry *entry) { static sds *entryGetValueRef(const entry *entry) { serverAssert(entryHasValuePtr(entry)); char *field_data = sdsAllocPtr(entry); - field_data -= sizeof(sds *); + field_data -= sizeof(sds); return (sds *)field_data; } @@ -98,13 +98,13 @@ entry *entrySetValue(entry *e, sds value) { /* Returns the address of the entry allocation. */ void *entryAllocPtr(const entry *entry) { char *buf = sdsAllocPtr(entry); - if (entryHasValuePtr(entry)) buf -= sizeof(sds *); + if (entryHasValuePtr(entry)) buf -= sizeof(sds); if (entryHasExpiry(entry)) buf -= sizeof(long long); return buf; } bool entryHasEmbeddedValue(entry *entry) { - return (entryGetValue(entry) && !entryHasValuePtr(entry)); + return (!entryHasValuePtr(entry)); } /**************************************** Entry Expiry API *****************************************/ @@ -127,7 +127,7 @@ long long entryGetExpiry(const entry *entry) { entry *entrySetExpiry(entry *e, long long expiry) { if (entryHasExpiry(e)) { char *buf = sdsAllocPtr(e); - if (entryHasValuePtr(e)) buf -= sizeof(sds *); + if (entryHasValuePtr(e)) buf -= sizeof(sds); buf -= sizeof(expiry); memcpy(buf, &expiry, sizeof(expiry)); return e; @@ -262,7 +262,7 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { if (update_expiry) { serverAssert(entryHasExpiry(e)); char *buf = sdsAllocPtr(e); - if (entryHasValuePtr(e)) buf -= sizeof(sds *); + if (entryHasValuePtr(e)) buf -= sizeof(sds); buf -= sizeof(expiry); memcpy(buf, &expiry, sizeof(expiry)); } @@ -302,8 +302,8 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { } entry *new_entry = entryCreate(entryGetField(e), value, expiration_time); - if (new_entry != e) - entryFree(e); + debugServerAssert(new_entry != e); + entryFree(e); return new_entry; } diff --git a/src/entry.h b/src/entry.h index 5447a1f58a..f23f3dfc7b 100644 --- a/src/entry.h +++ b/src/entry.h @@ -35,9 +35,9 @@ * | Expiration (opt) | Value pointer | Field (sds) | * | 8 bytes (int64_t) | 8 bytes (void *) | "field" + header | * +-------------------+-------------------+------------------+ - * ^ - * | - * entry pointer + * ^ + * | + * entry pointer * * - The value is stored separately via a pointer. * - Used for large value sizes. */ From 962e03b18198e7bfbd4085bf1c1fff7377bee567 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 22:39:03 +0300 Subject: [PATCH 048/119] change the OBJ_* prefix to ARGS_* Signed-off-by: Ran Shidlansik --- src/server.c | 70 +++++++++++++++++++++++++------------------------- src/server.h | 28 ++++++++++---------- src/t_hash.c | 26 +++++++++---------- src/t_string.c | 44 +++++++++++++++---------------- 4 files changed, 84 insertions(+), 84 deletions(-) diff --git a/src/server.c b/src/server.c index 8d0f6ab2ba..68e1708562 100644 --- a/src/server.c +++ b/src/server.c @@ -7366,34 +7366,34 @@ int parseExtendedCommandArgumentsOrReply(client *c, int *flags, int *unit, robj /* clang-format off */ if ((opt[0] == 'n' || opt[0] == 'N') && (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_SET_XX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) + !(*flags & ARGS_SET_XX || *flags & ARGS_SET_IFEQ) && (command_type == COMMAND_SET)) { - *flags |= OBJ_SET_NX; + *flags |= ARGS_SET_NX; } else if ((opt[0] == 'x' || opt[0] == 'X') && (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_SET_NX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) + !(*flags & ARGS_SET_NX || *flags & ARGS_SET_IFEQ) && (command_type == COMMAND_SET)) { - *flags |= OBJ_SET_XX; + *flags |= ARGS_SET_XX; } else if ((opt[0] == 'f' || opt[0] == 'F') && (opt[1] == 'n' || opt[1] == 'N') && (opt[2] == 'x' || opt[2] == 'X') && opt[3] == '\0' && - !(*flags & OBJ_SET_FXX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_HSET)) + !(*flags & ARGS_SET_FXX || *flags & ARGS_SET_IFEQ) && (command_type == COMMAND_HSET)) { - *flags |= OBJ_SET_FNX; + *flags |= ARGS_SET_FNX; } else if ((opt[0] == 'f' || opt[0] == 'F') && (opt[1] == 'x' || opt[1] == 'X') && (opt[2] == 'x' || opt[2] == 'X') && opt[3] == '\0' && - !(*flags & OBJ_SET_FNX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_HSET)) + !(*flags & ARGS_SET_FNX || *flags & ARGS_SET_IFEQ) && (command_type == COMMAND_HSET)) { - *flags |= OBJ_SET_FXX; + *flags |= ARGS_SET_FXX; } else if ((opt[0] == 'i' || opt[0] == 'I') && (opt[1] == 'f' || opt[1] == 'F') && (opt[2] == 'e' || opt[2] == 'E') && (opt[3] == 'q' || opt[3] == 'Q') && opt[4] == '\0' && next && - !(*flags & OBJ_SET_NX || *flags & OBJ_SET_XX || *flags & OBJ_SET_IFEQ) && (command_type == COMMAND_SET)) + !(*flags & ARGS_SET_NX || *flags & ARGS_SET_XX || *flags & ARGS_SET_IFEQ) && (command_type == COMMAND_SET)) { - *flags |= OBJ_SET_IFEQ; + *flags |= ARGS_SET_IFEQ; *compare_val = next; j++; } else if ((opt[0] == 'g' || opt[0] == 'G') && @@ -7401,34 +7401,34 @@ int parseExtendedCommandArgumentsOrReply(client *c, int *flags, int *unit, robj (opt[2] == 't' || opt[2] == 'T') && opt[3] == '\0' && (command_type == COMMAND_SET)) { - *flags |= OBJ_SET_GET; - } else if (!strcasecmp(opt, "KEEPTTL") && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && (command_type == COMMAND_SET || command_type == COMMAND_HSET)) + *flags |= ARGS_SET_GET; + } else if (!strcasecmp(opt, "KEEPTTL") && !(*flags & ARGS_PERSIST) && + !(*flags & ARGS_EX) && !(*flags & ARGS_EXAT) && + !(*flags & ARGS_PX) && !(*flags & ARGS_PXAT) && (command_type == COMMAND_SET || command_type == COMMAND_HSET)) { - *flags |= OBJ_KEEPTTL; + *flags |= ARGS_KEEPTTL; } else if (!strcasecmp(opt,"PERSIST") && (command_type == COMMAND_GET || command_type == COMMAND_HGET) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && - !(*flags & OBJ_KEEPTTL)) + !(*flags & ARGS_EX) && !(*flags & ARGS_EXAT) && + !(*flags & ARGS_PX) && !(*flags & ARGS_PXAT) && + !(*flags & ARGS_KEEPTTL)) { - *flags |= OBJ_PERSIST; + *flags |= ARGS_PERSIST; } else if ((opt[0] == 'e' || opt[0] == 'E') && (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EXAT) && !(*flags & OBJ_PX) && - !(*flags & OBJ_PXAT) && next) + !(*flags & ARGS_KEEPTTL) && !(*flags & ARGS_PERSIST) && + !(*flags & ARGS_EXAT) && !(*flags & ARGS_PX) && + !(*flags & ARGS_PXAT) && next) { - *flags |= OBJ_EX; + *flags |= ARGS_EX; *expire = next; j++; } else if ((opt[0] == 'p' || opt[0] == 'P') && (opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PXAT) && next) + !(*flags & ARGS_KEEPTTL) && !(*flags & ARGS_PERSIST) && + !(*flags & ARGS_EX) && !(*flags & ARGS_EXAT) && + !(*flags & ARGS_PXAT) && next) { - *flags |= OBJ_PX; + *flags |= ARGS_PX; *unit = UNIT_MILLISECONDS; *expire = next; j++; @@ -7436,22 +7436,22 @@ int parseExtendedCommandArgumentsOrReply(client *c, int *flags, int *unit, robj (opt[1] == 'x' || opt[1] == 'X') && (opt[2] == 'a' || opt[2] == 'A') && (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_PX) && - !(*flags & OBJ_PXAT) && next) + !(*flags & ARGS_KEEPTTL) && !(*flags & ARGS_PERSIST) && + !(*flags & ARGS_EX) && !(*flags & ARGS_PX) && + !(*flags & ARGS_PXAT) && next) { - *flags |= OBJ_EXAT; + *flags |= ARGS_EXAT; *expire = next; j++; } else if ((opt[0] == 'p' || opt[0] == 'P') && (opt[1] == 'x' || opt[1] == 'X') && (opt[2] == 'a' || opt[2] == 'A') && (opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' && - !(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) && - !(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) && - !(*flags & OBJ_PX) && next) + !(*flags & ARGS_KEEPTTL) && !(*flags & ARGS_PERSIST) && + !(*flags & ARGS_EX) && !(*flags & ARGS_EXAT) && + !(*flags & ARGS_PX) && next) { - *flags |= OBJ_PXAT; + *flags |= ARGS_PXAT; *unit = UNIT_MILLISECONDS; *expire = next; j++; diff --git a/src/server.h b/src/server.h index 9f221088fd..529a733769 100644 --- a/src/server.h +++ b/src/server.h @@ -702,21 +702,21 @@ typedef enum { *----------------------------------------------------------------------------*/ /* Generic set command string object set flags */ -#define OBJ_NO_FLAGS 0 -#define OBJ_SET_NX (1 << 0) /* Set if key not exists. */ -#define OBJ_SET_XX (1 << 1) /* Set if key exists. */ -#define OBJ_EX (1 << 2) /* Set if time in seconds is given */ -#define OBJ_PX (1 << 3) /* Set if time in ms in given */ -#define OBJ_KEEPTTL (1 << 4) /* Set and keep the ttl */ -#define OBJ_SET_GET (1 << 5) /* Set if want to get key before set */ -#define OBJ_EXAT (1 << 6) /* Set if timestamp in second is given */ -#define OBJ_PXAT (1 << 7) /* Set if timestamp in ms is given */ -#define OBJ_PERSIST (1 << 8) /* Set if we need to remove the ttl */ -#define OBJ_SET_IFEQ (1 << 9) /* Set if we need compare and set */ -#define OBJ_ARGV3 (1 << 10) /* Set if the value is at argv[3]; otherwise it's \ +#define ARGS_NO_FLAGS 0 +#define ARGS_SET_NX (1 << 0) /* Set if key not exists. */ +#define ARGS_SET_XX (1 << 1) /* Set if key exists. */ +#define ARGS_EX (1 << 2) /* Set if time in seconds is given */ +#define ARGS_PX (1 << 3) /* Set if time in ms in given */ +#define ARGS_KEEPTTL (1 << 4) /* Set and keep the ttl */ +#define ARGS_SET_GET (1 << 5) /* Set if want to get key before set */ +#define ARGS_EXAT (1 << 6) /* Set if timestamp in second is given */ +#define ARGS_PXAT (1 << 7) /* Set if timestamp in ms is given */ +#define ARGS_PERSIST (1 << 8) /* Set if we need to remove the ttl */ +#define ARGS_SET_IFEQ (1 << 9) /* Set if we need compare and set */ +#define ARGS_ARGV3 (1 << 10) /* Set if the value is at argv[3]; otherwise it's \ * at argv[2]. */ -#define OBJ_SET_FNX (1 << 11) /* Set if key item not exists. */ -#define OBJ_SET_FXX (1 << 12) /* Set if key item exists. */ +#define ARGS_SET_FNX (1 << 11) /* Set if key item not exists. */ +#define ARGS_SET_FXX (1 << 12) /* Set if key item exists. */ /* An Object, that is a type able to hold a string / list / set */ diff --git a/src/t_hash.c b/src/t_hash.c index 73b6de33e0..4f0ff4fc2e 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1094,7 +1094,7 @@ void hsetexCommand(client *c) { robj *expire = NULL; robj *comparison = NULL; int unit = UNIT_SECONDS; - int flags = OBJ_NO_FLAGS; + int flags = ARGS_NO_FLAGS; int fields_index = 0; long long num_fields = 0; long long when = EXPIRY_NONE; @@ -1123,7 +1123,7 @@ void hsetexCommand(client *c) { return; /* Check for object existence condition */ - if ((flags & OBJ_SET_NX && o) || (flags & OBJ_SET_XX && !o)) { + if ((flags & ARGS_SET_NX && o) || (flags & ARGS_SET_XX && !o)) { addReply(c, shared.czero); return; } @@ -1134,24 +1134,24 @@ void hsetexCommand(client *c) { } /* Handle parsing and calculating the expiration time. */ - if (flags & OBJ_KEEPTTL) + if (flags & ARGS_KEEPTTL) set_flags |= HASH_SET_KEEP_EXPIRY; else if (expire) { - long long basetime = (flags & (OBJ_EXAT | OBJ_PXAT)) ? 0 : commandTimeSnapshot(); + long long basetime = (flags & (ARGS_EXAT | ARGS_PXAT)) ? 0 : commandTimeSnapshot(); if (convertExpireArgumentToUnixTime(c, expire, basetime, unit, &when) == C_ERR) return; - if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(when)) { + if (((flags & ARGS_PXAT) || (flags & ARGS_EXAT)) && checkAlreadyExpired(when)) { set_expired = 1; } } /* Check for all fields condition */ - if (flags & (OBJ_SET_FNX | OBJ_SET_FXX)) { + if (flags & (ARGS_SET_FNX | ARGS_SET_FXX)) { for (i = fields_index; i < c->argc; i += 2) { - if (((flags & OBJ_SET_FNX) && hashTypeExists(o, c->argv[i]->ptr)) || - ((flags & OBJ_SET_FXX) && !hashTypeExists(o, c->argv[i]->ptr))) { + if (((flags & ARGS_SET_FNX) && hashTypeExists(o, c->argv[i]->ptr)) || + ((flags & ARGS_SET_FXX) && !hashTypeExists(o, c->argv[i]->ptr))) { addReply(c, shared.czero); return; } @@ -1188,7 +1188,7 @@ void hsetexCommand(client *c) { } else if (expire) { /* Propagate as HSETEX Key Value PXAT millisecond-timestamp if there is * EX/PX/EXAT flag. */ - if (!(flags & OBJ_PXAT)) { + if (!(flags & ARGS_PXAT)) { for (int i = 2; i < fields_index; i++) { if (c->argv[i + 1] == expire) { robj *milliseconds_obj = createStringObjectFromLongLong(when); @@ -1220,7 +1220,7 @@ void hgetexCommand(client *c) { robj *expire = NULL; robj *comparison = NULL; int unit = UNIT_SECONDS; - int flags = OBJ_NO_FLAGS; + int flags = ARGS_NO_FLAGS; int fields_index = 0; long long num_fields = -1; long long when = EXPIRY_NONE; @@ -1250,15 +1250,15 @@ void hgetexCommand(client *c) { if ((o = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp])) == NULL || checkType(c, o, OBJ_HASH)) return; /* Handle parsing and calculating the expiration time. */ - if (flags & OBJ_PERSIST) { + if (flags & ARGS_PERSIST) { persist = 1; } else if (expire) { - long long basetime = (flags & (OBJ_EXAT | OBJ_PXAT)) ? 0 : commandTimeSnapshot(); + long long basetime = (flags & (ARGS_EXAT | ARGS_PXAT)) ? 0 : commandTimeSnapshot(); if (convertExpireArgumentToUnixTime(c, expire, basetime, unit, &when) == C_ERR) return; - if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(when)) { + if (((flags & ARGS_PXAT) || (flags & ARGS_EXAT)) && checkAlreadyExpired(when)) { set_expired = 1; when = 0; } else { diff --git a/src/t_string.c b/src/t_string.c index 0450217b48..a8c46a8a91 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -90,7 +90,7 @@ void setGenericCommand(client *c, return; } - if (flags & OBJ_SET_GET) { + if (flags & ARGS_SET_GET) { initDeferredReplyBuffer(c); if (getGenericCommand(c) == C_ERR) goto cleanup; } @@ -99,26 +99,26 @@ void setGenericCommand(client *c, found = existing_value != NULL; /* Handle the IFEQ conditional check */ - if (flags & OBJ_SET_IFEQ && found) { - if (!(flags & OBJ_SET_GET) && checkType(c, existing_value, OBJ_STRING)) { + if (flags & ARGS_SET_IFEQ && found) { + if (!(flags & ARGS_SET_GET) && checkType(c, existing_value, OBJ_STRING)) { goto cleanup; } if (compareStringObjects(existing_value, comparison) != 0) { - if (!(flags & OBJ_SET_GET)) { + if (!(flags & ARGS_SET_GET)) { addReply(c, abort_reply ? abort_reply : shared.null[c->resp]); } goto cleanup; } - } else if (flags & OBJ_SET_IFEQ && !found) { - if (!(flags & OBJ_SET_GET)) { + } else if (flags & ARGS_SET_IFEQ && !found) { + if (!(flags & ARGS_SET_GET)) { addReply(c, abort_reply ? abort_reply : shared.null[c->resp]); } goto cleanup; } - if ((flags & OBJ_SET_NX && found) || (flags & OBJ_SET_XX && !found)) { - if (!(flags & OBJ_SET_GET)) { + if ((flags & ARGS_SET_NX && found) || (flags & ARGS_SET_XX && !found)) { + if (!(flags & ARGS_SET_GET)) { addReply(c, abort_reply ? abort_reply : shared.null[c->resp]); } goto cleanup; @@ -129,13 +129,13 @@ void setGenericCommand(client *c, * If the key already exists, delete it. */ if (expire && checkAlreadyExpired(milliseconds)) { if (found) deleteExpiredKeyFromOverwriteAndPropagate(c, key); - if (!(flags & OBJ_SET_GET)) addReply(c, shared.ok); + if (!(flags & ARGS_SET_GET)) addReply(c, shared.ok); goto cleanup; } /* When expire is not NULL, we avoid deleting the TTL so it can be updated later instead of being deleted and then * created again. */ - setkey_flags |= ((flags & OBJ_KEEPTTL) || expire) ? SETKEY_KEEPTTL : 0; + setkey_flags |= ((flags & ARGS_KEEPTTL) || expire) ? SETKEY_KEEPTTL : 0; setkey_flags |= found ? SETKEY_ALREADY_EXIST : SETKEY_DOESNT_EXIST; setKey(c, c->db, key, &val, setkey_flags); @@ -143,7 +143,7 @@ void setGenericCommand(client *c, /* By setting the reallocated value back into argv, we can avoid duplicating * a large string value when adding it to the db. */ - c->argv[(flags & OBJ_ARGV3) ? 3 : 2] = val; + c->argv[(flags & ARGS_ARGV3) ? 3 : 2] = val; incrRefCount(val); server.dirty++; @@ -152,7 +152,7 @@ void setGenericCommand(client *c, if (expire) { /* Propagate as SET Key Value PXAT millisecond-timestamp if there is * EX/PX/EXAT flag. */ - if (!(flags & OBJ_PXAT)) { + if (!(flags & ARGS_PXAT)) { robj *milliseconds_obj = createStringObjectFromLongLong(milliseconds); rewriteClientCommandVector(c, 5, shared.set, key, val, shared.pxat, milliseconds_obj); decrRefCount(milliseconds_obj); @@ -160,13 +160,13 @@ void setGenericCommand(client *c, notifyKeyspaceEvent(NOTIFY_GENERIC, "expire", key, c->db->id); } - if (!(flags & OBJ_SET_GET)) { + if (!(flags & ARGS_SET_GET)) { addReply(c, ok_reply ? ok_reply : shared.ok); } /* Propagate without the GET argument (Isn't needed if we had expire since in that case we completely re-written the * command argv) */ - if ((flags & OBJ_SET_GET) && !expire) { + if ((flags & ARGS_SET_GET) && !expire) { int argc = 0; int j; robj **argv = zmalloc((c->argc - 1) * sizeof(robj *)); @@ -212,7 +212,7 @@ static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int if (unit == UNIT_SECONDS) *milliseconds *= 1000; - if ((flags & OBJ_PX) || (flags & OBJ_EX)) { + if ((flags & ARGS_PX) || (flags & ARGS_EX)) { *milliseconds += commandTimeSnapshot(); } @@ -232,7 +232,7 @@ void setCommand(client *c) { robj *expire = NULL; robj *comparison = NULL; int unit = UNIT_SECONDS; - int flags = OBJ_NO_FLAGS; + int flags = ARGS_NO_FLAGS; if (parseExtendedCommandArgumentsOrReply(c, &flags, &unit, &expire, &comparison, COMMAND_SET, c->argc) != C_OK) { return; @@ -244,17 +244,17 @@ void setCommand(client *c) { void setnxCommand(client *c) { c->argv[2] = tryObjectEncoding(c->argv[2]); - setGenericCommand(c, OBJ_SET_NX, c->argv[1], c->argv[2], NULL, 0, shared.cone, shared.czero, NULL); + setGenericCommand(c, ARGS_SET_NX, c->argv[1], c->argv[2], NULL, 0, shared.cone, shared.czero, NULL); } void setexCommand(client *c) { c->argv[3] = tryObjectEncoding(c->argv[3]); - setGenericCommand(c, OBJ_EX | OBJ_ARGV3, c->argv[1], c->argv[3], c->argv[2], UNIT_SECONDS, NULL, NULL, NULL); + setGenericCommand(c, ARGS_EX | ARGS_ARGV3, c->argv[1], c->argv[3], c->argv[2], UNIT_SECONDS, NULL, NULL, NULL); } void psetexCommand(client *c) { c->argv[3] = tryObjectEncoding(c->argv[3]); - setGenericCommand(c, OBJ_PX | OBJ_ARGV3, c->argv[1], c->argv[3], c->argv[2], UNIT_MILLISECONDS, NULL, NULL, NULL); + setGenericCommand(c, ARGS_PX | ARGS_ARGV3, c->argv[1], c->argv[3], c->argv[2], UNIT_MILLISECONDS, NULL, NULL, NULL); } /* DELIFEQ key value */ @@ -318,7 +318,7 @@ void getCommand(client *c) { void getexCommand(client *c) { robj *expire = NULL; int unit = UNIT_SECONDS; - int flags = OBJ_NO_FLAGS; + int flags = ARGS_NO_FLAGS; if (parseExtendedCommandArgumentsOrReply(c, &flags, &unit, &expire, NULL, COMMAND_GET, c->argc) != C_OK) { return; @@ -345,7 +345,7 @@ void getexCommand(client *c) { /* This command is never propagated as is. It is either propagated as PEXPIRE[AT],DEL,UNLINK or PERSIST. * This why it doesn't need special handling in feedAppendOnlyFile to convert relative expire time to absolute one. */ - if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(milliseconds)) { + if (((flags & ARGS_PXAT) || (flags & ARGS_EXAT)) && checkAlreadyExpired(milliseconds)) { /* When PXAT/EXAT absolute timestamp is specified, there can be a chance that timestamp * has already elapsed so delete the key in that case. */ deleteExpiredKeyFromOverwriteAndPropagate(c, c->argv[1]); @@ -359,7 +359,7 @@ void getexCommand(client *c) { signalModifiedKey(c, c->db, c->argv[1]); notifyKeyspaceEvent(NOTIFY_GENERIC, "expire", c->argv[1], c->db->id); server.dirty++; - } else if (flags & OBJ_PERSIST) { + } else if (flags & ARGS_PERSIST) { if (removeExpire(c->db, c->argv[1])) { signalModifiedKey(c, c->db, c->argv[1]); rewriteClientCommandVector(c, 2, shared.persist, c->argv[1]); From 5038145b19819b65f7c6094eb6ca144557bc0ab1 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 22:48:40 +0300 Subject: [PATCH 049/119] Change timestamptIsExpired to return bool Signed-off-by: Ran Shidlansik --- src/db.c | 2 +- src/entry.c | 3 +-- src/expire.c | 4 ++-- src/expire.h | 3 ++- src/t_hash.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/db.c b/src/db.c index a6b4744db6..bf20ddb48c 100644 --- a/src/db.c +++ b/src/db.c @@ -1898,7 +1898,7 @@ static int keyIsExpiredWithDictIndexImpl(serverDb *db, robj *key, int dict_index /* Don't expire anything while loading. It will be done later. */ if (server.loading) return 0; mstime_t when = getExpireWithDictIndex(db, key, dict_index); - return timestampIsExpired(when); + return timestampIsExpired(when) ? 1 : 0; } /* Check if the key is expired. */ diff --git a/src/entry.c b/src/entry.c index c8dc2dee5c..4e340b282f 100644 --- a/src/entry.c +++ b/src/entry.c @@ -138,8 +138,7 @@ entry *entrySetExpiry(entry *e, long long expiry) { /* Return true in case the entry has assigned expiration or false otherwise. */ bool entryIsExpired(entry *entry) { - if (!timestampIsExpired(entryGetExpiry(entry))) return false; - return true; + return timestampIsExpired(entryGetExpiry(entry)); } /**************************************** Entry Expiry API - End *****************************************/ diff --git a/src/expire.c b/src/expire.c index 338cda8306..a30c6b1caa 100644 --- a/src/expire.c +++ b/src/expire.c @@ -818,8 +818,8 @@ void touchCommand(client *c) { } /* Returns 1 if the expire value is expired, 0 otherwise. */ -int timestampIsExpired(mstime_t when) { - if (when < 0) return 0; /* no expire */ +bool timestampIsExpired(mstime_t when) { + if (when < 0) return false; /* no expire */ mstime_t now = commandTimeSnapshot(); /* The time indicated by 'when' is considered expired if the current (virtual or real) time is greater diff --git a/src/expire.h b/src/expire.h index 66a5241233..8e852e30af 100644 --- a/src/expire.h +++ b/src/expire.h @@ -2,6 +2,7 @@ #define EXPIRE_H #include +#include #include "monotonic.h" /* Special Expiry values */ @@ -38,7 +39,7 @@ typedef enum { typedef struct client client; typedef struct serverObject robj; -int timestampIsExpired(mstime_t when); +bool timestampIsExpired(mstime_t when); expirationPolicy getExpirationPolicyWithFlags(int flags); int parseExtendedExpireArgumentsOrReply(client *c, int *flags, int max_args); int convertExpireArgumentToUnixTime(client *c, robj *arg, long long basetime, int unit, long long *unixtime); diff --git a/src/t_hash.c b/src/t_hash.c index 4f0ff4fc2e..552b92506b 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -391,7 +391,7 @@ int hashTypeSet(robj *o, sds field, sds value, long long expiry, int flags) { long long entry_expiry = entryGetExpiry(existing); /* It is possible that the entry is already expired. In this case we can override it, but we need to make sure to expire it first * and treat it like it did not exist. */ - int is_expired = timestampIsExpired(entry_expiry); + bool is_expired = timestampIsExpired(entry_expiry); if (!is_expired && flags & HASH_SET_KEEP_EXPIRY) { /* In case the HASH_SET_KEEP_EXPIRY will force keeping the existing entry expiry. */ expiry = entry_expiry; From c4b404245f053f8b698dd1d4d819cf4e31f67e4c Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 1 Jul 2025 22:53:32 +0300 Subject: [PATCH 050/119] fix format issue Signed-off-by: Ran Shidlansik --- src/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.h b/src/server.h index 529a733769..f64b87c4b9 100644 --- a/src/server.h +++ b/src/server.h @@ -714,7 +714,7 @@ typedef enum { #define ARGS_PERSIST (1 << 8) /* Set if we need to remove the ttl */ #define ARGS_SET_IFEQ (1 << 9) /* Set if we need compare and set */ #define ARGS_ARGV3 (1 << 10) /* Set if the value is at argv[3]; otherwise it's \ - * at argv[2]. */ + * at argv[2]. */ #define ARGS_SET_FNX (1 << 11) /* Set if key item not exists. */ #define ARGS_SET_FXX (1 << 12) /* Set if key item exists. */ From c628720161bc5518f95ece935de1de75266a0dd7 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 2 Jul 2025 10:39:11 +0300 Subject: [PATCH 051/119] fix HEXPIRE/HGETEX/HSETEX handling of 0 expiraiton time Signed-off-by: Ran Shidlansik --- src/t_hash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 552b92506b..1e9bca8017 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1142,7 +1142,7 @@ void hsetexCommand(client *c) { if (convertExpireArgumentToUnixTime(c, expire, basetime, unit, &when) == C_ERR) return; - if (((flags & ARGS_PXAT) || (flags & ARGS_EXAT)) && checkAlreadyExpired(when)) { + if ((when == basetime) || (((flags & ARGS_PXAT) || (flags & ARGS_EXAT)) && ((when == basetime) || checkAlreadyExpired(when)))) { set_expired = 1; } } @@ -1258,7 +1258,7 @@ void hgetexCommand(client *c) { if (convertExpireArgumentToUnixTime(c, expire, basetime, unit, &when) == C_ERR) return; - if (((flags & ARGS_PXAT) || (flags & ARGS_EXAT)) && checkAlreadyExpired(when)) { + if ((when == basetime) || (((flags & ARGS_PXAT) || (flags & ARGS_EXAT)) && checkAlreadyExpired(when))) { set_expired = 1; when = 0; } else { @@ -1457,7 +1457,7 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { if (convertExpireArgumentToUnixTime(c, param, basetime, unit, &when) == C_ERR) return; - if (checkAlreadyExpired(when)) + if ((when == basetime) || checkAlreadyExpired(when)) set_expired = 1; robj *obj = lookupKeyWrite(c->db, key); From 536f949b46a0c0aedd12ebe60ac150bf7ff407a0 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 2 Jul 2025 13:56:52 +0300 Subject: [PATCH 052/119] fix 2 bugs: 1. avoid unneeded expend of the hash object 2. fix memory leak where we do not decrement the first argument of getex when we did not changes Signed-off-by: Ran Shidlansik --- src/t_hash.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/t_hash.c b/src/t_hash.c index 1e9bca8017..8798717203 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -429,6 +429,14 @@ int hashTypeSetExpire(robj *o, sds field, long long expiry, int flag) { if (o == NULL) return -2; if (o->encoding == OBJ_ENCODING_LISTPACK) { + unsigned char *vstr; + unsigned int vlen; + long long vll; + /* We do not want to convert to listpack for no good reason. + * So we first check if the item exists.*/ + if (hashTypeGetFromListpack(o, field, &vstr, &vlen, &vll) < 0) { + return -2; + } /* When listpack representation is used, we consider it as infinite TTL, * so expire command with gt always fail the GT as well as existence(XX). * Else, we already know we are going to set an expiration so we expend to hashtable encoding. */ @@ -439,6 +447,9 @@ int hashTypeSetExpire(robj *o, sds field, long long expiry, int flag) { } } + /* we must be hashtable encoded */ + serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); + hashtable *ht = o->ptr; void **entry_ref = NULL; if ((entry_ref = hashtableFindRef(ht, field))) { @@ -1342,6 +1353,10 @@ void hgetexCommand(client *c) { notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id); } } else { + /* If no changes were done we still need to free the new argv array and the refcount of the first argument. */ + if (set_expiry || set_expired || persist) { + decrRefCount(c->argv[1]); + } if (new_argv) zfree(new_argv); } From e7aaf8b3d8f111b50d9244f4c620dd27bdd186fb Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 2 Jul 2025 15:38:57 +0300 Subject: [PATCH 053/119] move hset keyspace notification first in case of hsetex Signed-off-by: Ran Shidlansik --- src/t_hash.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 8798717203..e82fb7b1f0 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -403,7 +403,7 @@ int hashTypeSet(robj *o, sds field, sds value, long long expiry, int flags) { serverAssert(replaced); } hashTypeTrackUpdateEntry(o, existing, new_entry, entry_expiry, expiry); - + /* since we are exposed to expired entries, we must NOT reflect them as being "updated" */ update = is_expired ? 0 : 1; } hashTypeIgnoreTTL(o, false); @@ -1191,7 +1191,9 @@ void hsetexCommand(client *c) { } } + if (changes) { + notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); if (set_expired) { replaceClientCommandVector(c, new_argc, new_argv); /* We would like to reduce the number of hexpired events in case there are potential many expired fields. */ @@ -1220,9 +1222,11 @@ void hsetexCommand(client *c) { } server.dirty += changes; } else { + /* If no changes were done we still need to free the new argv array and the refcount of the first argument. */ + if (set_expired) + decrRefCount(c->argv[1]); if (new_argv) zfree(new_argv); } - notifyKeyspaceEvent(NOTIFY_HASH, "hset", c->argv[1], c->db->id); addReplyLongLong(c, changes == num_fields ? 1 : 0); } From 8deb3e9ab3f8919aefba24a9bab930ed1c724bcc Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 2 Jul 2025 16:20:04 +0300 Subject: [PATCH 054/119] add some new commands documentation Signed-off-by: Ran Shidlansik --- src/t_hash.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/src/t_hash.c b/src/t_hash.c index e82fb7b1f0..c4e892e7bb 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1100,6 +1100,50 @@ void hsetCommand(client *c) { } } +/* High-Level Algorithm of HSETEX Command: + * + * - Parse arguments and options: + * Parses optional flags such as NX, XX, FNX, FXX, KEEPTTL, and expiration time options. + * Ensures the number of specified fields matches the actual provided key-value pairs. + * + * - Check object existence conditions: + * Depending on NX/XX flags, verifies whether the hash key must or must not exist. + * Exits early with a zero reply if conditions aren't met. + * + * - Create the hash object if needed: + * If the key does not exist and creation is permitted, allocates a new hash. + * + * - Handle expiration logic: + * Computes the expiry time (relative or absolute). + * If the expiration is in the past, the command proceeds to delete the relevant fields. + * + * - Enforce per-field conditions: + * If FNX (field must not exist) or FXX (field must exist) flags are set, + * ensures all fields satisfy these conditions before proceeding. + * + * - Apply changes: + * Either deletes expired fields or sets fields with optional expiration. + * + * - Clean up and notify: + * Deletes the key if the hash becomes empty. + * Emits keyspace notifications for changes (see below). + * Modifies the command vector for AOF propagation if necessary. + * + * + * Return Value: + * - Returns integer 1 if all fields were successfully updated or deleted. + * - Returns integer 0 if no fields were updated due to condition failures. + * + * + * Keyspace Notifications (if enabled): + * - "hset" — Emitted when fields are added or updated. + * - "hexpire" — Emitted when expiration is set on fields. + * - "hexpired" — Emitted when fields are immediately expired and deleted. + * - "del" — Emitted if the entire key is removed (empty hash). + * + * + * Client Reply: + * - Integer reply: 1 if all changes succeeded, 0 if no changes occurred. */ void hsetexCommand(client *c) { robj *o; robj *expire = NULL; @@ -1230,6 +1274,42 @@ void hsetexCommand(client *c) { addReplyLongLong(c, changes == num_fields ? 1 : 0); } +/* High-Level Algorithm of HGETEX Command: + * + * - Parses the command for optional arguments, including expiration options, + * persistence flags, and the list of hash fields to retrieve. + * + * - Verifies that the number of fields specified matches the actual arguments, + * and ensures the key exists and is a valid hash type. + * + * - Computes the expiration behavior: + * - If `PERSIST` is provided, removes the expiration from the fields. + * - If an expiration time is specified, calculates it relative or absolute. + * - If already expired, deletes the fields immediately. + * - Otherwise, schedules new expiration timestamps. + * + * - Retrieves and replies with the values for each requested field. + * + * - For each field: + * - If expiration is due: deletes the field. + * - If an expiry is scheduled: updates the field's expiration timestamp. + * - If persisting: clears the field's expiration. + * + * - If any changes were made (deletes, expires, or persists): + * - Rewrites the command vector (for AOF and replication) using HDEL, HPEXPIREAT, or HPERSIST. + * - Issues keyspace notifications accordingly. + * - If the hash becomes empty as a result, deletes the key and notifies. + * + * + * Return Value: + * - Always replies with an array of values for the requested fields (including NULLs for missing fields). + * + * + * Keyspace Notifications (if enabled): + * - "hexpire" — When expiration is added to hash fields. + * - "hexpired" — When fields are immediately expired and deleted. + * - "hpersist" — When expiration is removed from fields. + * - "del" — If the hash becomes empty and is removed entirely. */ void hgetexCommand(client *c) { robj *o; robj *expire = NULL; @@ -1447,6 +1527,41 @@ static void hrandfieldReplyWithListpack(writePreparedClient *wpc, unsigned int c } +/* High-Level Algorithm of hexpireGenericCommand (used by HEXPIRE, HPEXPIRE, HEXPIREAT, HPEXPIREAT): + * + * - Parses optional flags and the number of hash fields to apply expiration to. + * + * - Converts the given expiration time (relative or absolute) into a Unix timestamp. + * + * - Determines if the given timestamp is already expired: + * - If so, immediately deletes the specified hash fields. + * - If not, updates their expiration metadata. + * + * - Responds with an array of integers: + * - 1 if the expiration was set. + * - 0 if it was unchanged. + * - -1 if the field does not exist. + * - 2 if the field was immediately expired and deleted. + * + * - If fields were deleted due to expiration: + * - Rewrites the command as HDEL for replication/AOF. + * - Emits a "hexpired" keyspace event. + * + * - If expiration was newly set: + * - May rewrite the command as HPEXPIREAT if needed. + * - Emits a "hexpire" keyspace event. + * + * - If the hash becomes empty after deletions: + * - Deletes the hash key. + * - Emits a "del" event for the key. + * + * Return Value: + * - An array of integers corresponding to the result for each field. + * + * Keyspace Notifications (if enabled): + * - "hexpired" — when fields are immediately expired and deleted. + * - "hexpire" — when fields receive new expiration timestamps. + * - "del" — when the hash key becomes empty and is removed. */ void hexpireGenericCommand(client *c, long long basetime, int unit) { robj *key = c->argv[1], *param = c->argv[2]; long long when; /* unix time in milliseconds when the key will expire. */ @@ -1560,6 +1675,31 @@ void hpexpireAtCommand(client *c) { hexpireGenericCommand(c, 0, UNIT_MILLISECONDS); } +/* High-Level Algorithm of HPERSIST Command: + * + * - Expects a key and a list of hash fields whose expiration metadata should be removed. + * - Validates that the number of provided fields matches the declared count. + * + * - For each specified field: + * - Attempts to remove any existing expiration. + * - Replies with: + * - 1 if the expiration was successfully removed. + * - 0 if the field had no expiration or did not exist. + * + * - Replies with an array of integers, one per field, indicating the outcome of each attempt. + * + * - If any expirations were removed: + * - Marks the key as modified (for replication/AOF consistency). + * - Emits a "hpersist" keyspace notification. + * + * + * Return Value: + * - An array of integers, each representing the result of persistence for one field. + * - 1 = field existed and expiration was removed. + * - 0 = field did not exist or had no expiration. + * + * Keyspace Notifications (if enabled): + * - "hpersist" — emitted once if any field had its expiration removed. */ void hpersistCommand(client *c) { int fields_index = 4, result = 0, changes = 0; long long num_fields = 0; @@ -1589,6 +1729,34 @@ void hpersistCommand(client *c) { } } +/* High-Level Algorithm of HTTL / HPTTL / HEXPIRETIME / HPEXPIRETIME Commands: + * + * - These commands return the remaining time to live (TTL) or absolute expiry time + * of one or more fields in a hash. + * + * - HTTL / HPTTL: + * - Return relative TTL of each field (in seconds or milliseconds). + * - TTL is computed as the difference between current time and expiry time. + * + * - HEXPIRETIME / HPEXPIRETIME: + * - Return the absolute Unix time at which each field will expire + * (in seconds or milliseconds, depending on the variant). + * + * For each field requested: + * - If the field or hash does not exist: reply with -2. + * - If the field exists but has no expiration: reply with -1. + * - If the field has an expiration: + * - HTTL / HPTTL: reply with remaining TTL (clamped at 0 if negative). + * - HEXPIRETIME / HPEXPIRETIME: reply with the absolute expiry time. + * + * Return Value: + * - An array of integers, one per field: + * - -2 = hash or field does not exist. + * - -1 = field exists but has no expiration. + * - >=0 = TTL or expiry time, depending on the command variant. + * + * Keyspace Notifications: + * - None emitted; this command is read-only. */ void httlGenericCommand(client *c, long long basetime, int unit) { int fields_index = 4; long long num_fields = 0, result = -2; From 15f7ab82f963c38b4d0a553bb18800e6112ea18b Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 2 Jul 2025 17:59:28 +0300 Subject: [PATCH 055/119] update tacked entry when the entry is defragged Signed-off-by: Ran Shidlansik --- src/defrag.c | 19 +++++++++++++++---- src/server.h | 1 + src/t_hash.c | 2 +- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/defrag.c b/src/defrag.c index b2b8aa1121..d2a904a0a8 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -38,6 +38,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ +#include "entry.h" #include "server.h" #include "hashtable.h" #include "eval.h" @@ -445,9 +446,19 @@ static void scanLaterSet(robj *ob, unsigned long *cursor) { static void activeDefragEntry(void *privdata, void *element_ref) { UNUSED(privdata); entry **entry_ref = (entry **)element_ref; - - entry *new_entry = entryDefrag(*entry_ref, activeDefragAlloc, activeDefragSds); - if (new_entry) *entry_ref = new_entry; + entry *old_entry = *entry_ref, *new_entry = NULL; + long long old_expiry = entryGetExpiry(old_entry); + + new_entry = entryDefrag(*entry_ref, activeDefragAlloc, activeDefragSds); + if (new_entry) { + /* In case the entry is tracked we need to update it in the volatile set */ + if (entryHasExpiry(new_entry)) { + robj *obj = (robj *)privdata; + serverAssert(obj); + hashTypeTrackUpdateEntry(obj, old_entry, new_entry, old_expiry, entryGetExpiry(new_entry)); + } + *entry_ref = new_entry; + } } static void scanLaterHash(robj *ob, unsigned long *cursor) { @@ -498,7 +509,7 @@ static void defragHash(robj *ob) { } else { unsigned long cursor = 0; do { - cursor = hashtableScanDefrag(ht, cursor, activeDefragEntry, NULL, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); + cursor = hashtableScanDefrag(ht, cursor, activeDefragEntry, ob, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); } while (cursor != 0); } /* defrag the hashtable struct and tables */ diff --git a/src/server.h b/src/server.h index f64b87c4b9..c9e0bc3dff 100644 --- a/src/server.h +++ b/src/server.h @@ -3330,6 +3330,7 @@ robj *setTypeDup(robj *o); void hashTypeFreeVolatileSet(robj *o); void hashTypeTrackEntry(robj *o, void *entry); void hashTypeUntrackEntry(robj *o, void *entry); +void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); void hashTypeConvert(robj *o, int enc); void hashTypeTryConversion(robj *subject, robj **argv, int start, int end); diff --git a/src/t_hash.c b/src/t_hash.c index c4e892e7bb..55f2a528ba 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -113,7 +113,7 @@ void hashTypeUntrackEntry(robj *o, void *entry) { } } -static void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { +void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { int old_tracked = (old_entry && old_expiry != EXPIRY_NONE); int new_tracked = (new_entry && new_expiry != EXPIRY_NONE); /* If entry was not tracked before and not going to be tracked now, we can simply return */ From abef0515a88fd4228bcdc323283bbeff0b793283 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 10:04:59 +0300 Subject: [PATCH 056/119] also pass object to entryDefrag on scanLaterHash Signed-off-by: Ran Shidlansik --- src/defrag.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/defrag.c b/src/defrag.c index d2a904a0a8..2f6a32f311 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -444,7 +444,6 @@ static void scanLaterSet(robj *ob, unsigned long *cursor) { /* Hashtable scan callback for hash datatype */ static void activeDefragEntry(void *privdata, void *element_ref) { - UNUSED(privdata); entry **entry_ref = (entry **)element_ref; entry *old_entry = *entry_ref, *new_entry = NULL; long long old_expiry = entryGetExpiry(old_entry); @@ -464,7 +463,7 @@ static void activeDefragEntry(void *privdata, void *element_ref) { static void scanLaterHash(robj *ob, unsigned long *cursor) { serverAssert(ob->type == OBJ_HASH && ob->encoding == OBJ_ENCODING_HASHTABLE); hashtable *ht = ob->ptr; - *cursor = hashtableScanDefrag(ht, *cursor, activeDefragEntry, NULL, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); + *cursor = hashtableScanDefrag(ht, *cursor, activeDefragEntry, ob, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF); } static void defragQuicklist(robj *ob) { From 390015ec764b9bbcb788ed907d69e1669c652468 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 16:11:13 +0300 Subject: [PATCH 057/119] extend the test suite Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 1239 ++++++++++++++++++++++++++++++++----- 1 file changed, 1071 insertions(+), 168 deletions(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 3ecd07b837..21bb58ae8d 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1,15 +1,553 @@ proc info_field {info field} { -foreach line [split $info "\n"] { - if {[string match "$field:*" $line]} { - return [string trim [lindex [split $line ":"] 1]] + foreach line [split $info "\n"] { + if {[string match "$field:*" $line]} { + return [string trim [lindex [split $line ":"] 1]] + } } + return "" } -return "" + +proc assert_keyevent_pattern {rd event_type key} { + set event [$rd read] + assert_match "pmessage __keyevent@* __keyevent@*:$event_type $key" $event } -start_server {tags {"hashexpire external:skip"}} { +start_server {tags {"hashexpire external:skip"}} { + ####### Valid scenarios tests ####### + foreach command {EX PX EXAT PXAT} { + test "HGETEX $command expiry" { + r FLUSHALL + r HSET myhash f1 v1 + + # Configuration dictionary mapping expiry commands to their test parameters: + # - time: expiry value (seconds/milliseconds or absolute timestamp) + # - wait: milliseconds to wait before checking expiration + # - cmd: command to verify the TTL/expiry time + set config [dict create \ + EX [list time 1 wait 1100 cmd HTTL] \ + PX [list time 100 wait 150 cmd HPTTL] \ + EXAT [list time [expr {[clock seconds] + 1}] wait 1100 cmd HEXPIRETIME] \ + PXAT [list time [expr {[clock milliseconds] + 100}] wait 150 cmd HPEXPIRETIME] \ + ] + set params [dict get $config $command] + set expire_time [dict get $params time] + set wait_time [dict get $params wait] + set ttl_cmd [dict get $params cmd] + + # Verify HGETEX command + assert_equal "v1" [r HGETEX myhash $command $expire_time FIELDS 1 f1] + set expire_result [r $ttl_cmd myhash FIELDS 1 f1] + + # Verify expiry + if {[regexp "AT$" $command]} { + assert_equal $expire_result $expire_time + } else { + assert_morethan $expire_result 0 + } + after $wait_time + assert_equal "" [r HGET myhash f1] + } + + test "HGETEX $command with mix of existing and non-existing fields" { + r FLUSHALL + r HSET myhash f1 v1 f3 v3 + + set config [dict create \ + EX [list time 2000000] \ + PX [list time 2000000] \ + EXAT [list time [expr {[clock seconds] + 2000000}]] \ + PXAT [list time [expr {[clock milliseconds] + 20000000}]] \ + ] + set params [dict get $config $command] + set expire_time [dict get $params time] + + # HGETEX on exist/non-exist fields + assert_equal "v1 {} v3" [r HGETEX myhash $command $expire_time FIELDS 3 f1 f2 f3] + + # Verification checks (f2 should not be created) + assert_equal "" [r HGET myhash f2] + assert_equal -2 [r HTTL myhash FIELDS 1 f2] + assert_morethan [r HTTL myhash FIELDS 1 f1] 0 + assert_morethan [r HTTL myhash FIELDS 1 f3] 0 + } + + test "HGETEX $command on more then 1 field" { + r FLUSHALL + r HSET myhash f1 v1 f2 v2 + + set config [dict create \ + EX [list time 1 wait 1100 check_cmd HTTL] \ + PX [list time 100 wait 150 check_cmd HPTTL] \ + EXAT [list time [expr {[clock seconds] + 1}] wait 1100 check_cmd HEXPIRETIME] \ + PXAT [list time [expr {[clock milliseconds] + 100}] wait 150 check_cmd HPEXPIRETIME] \ + ] + set params [dict get $config $command] + set expire_time [dict get $params time] + set wait_time [dict get $params wait] + set check_cmd [dict get $params check_cmd] + + assert_equal "v1 v2" [r HGETEX myhash $command $expire_time FIELDS 2 f1 f2] + + # Verify expiration + if {[regexp "AT$" $command]} { + assert_equal $expire_time [r $check_cmd myhash FIELDS 1 f1] + assert_equal $expire_time [r $check_cmd myhash FIELDS 1 f2] + } else { + assert_morethan [r $check_cmd myhash FIELDS 1 f1] 0 + assert_morethan [r $check_cmd myhash FIELDS 1 f2] 0 + } + + after $wait_time + assert_equal "" [r HGET myhash f1] + assert_equal "" [r HGET myhash f2] + } + + test "HGETEX $command -> PERSIST" { + r FLUSHALL + r HSET myhash f1 v1 + r HSETEX myhash EX 10000 FIELDS 1 f2 v2 + + set config [dict create \ + EX [list time 1 cmd HTTL check_cmd HTTL] \ + PX [list time 100 cmd HPTTL check_cmd HPTTL] \ + EXAT [list time [expr {[clock seconds] + 1}] cmd HTTL check_cmd HEXPIRETIME] \ + PXAT [list time [expr {[clock milliseconds] + 100}] cmd HPTTL check_cmd HPEXPIRETIME] \ + ] + set params [dict get $config $command] + set expire_time [dict get $params time] + set ttl_cmd [dict get $params cmd] + set check_cmd [dict get $params check_cmd] + + assert_equal "v1" [r HGETEX myhash $command $expire_time FIELDS 1 f1] + if {[regexp "AT$" $command]} { + assert_equal $expire_time [r $check_cmd myhash FIELDS 1 f1] + } else { + assert_morethan [r $ttl_cmd myhash FIELDS 1 f1] 0 + } + + assert_equal "v1" [r HGETEX myhash PERSIST FIELDS 1 f1] + assert_equal -1 [r HTTL myhash FIELDS 1 f1] + # Verify f2 still has ttl + assert_morethan [r HTTL myhash FIELDS 1 f2] 100 + } + + test "HGETEX $command on non-exist field" { + r FLUSHALL + r HSET myhash f1 v1 + + set config [dict create \ + EX [list time 1] \ + PX [list time 100] \ + EXAT [list time [expr {[clock seconds] + 1}]] \ + PXAT [list time [expr {[clock milliseconds] + 100}]] \ + ] + set params [dict get $config $command] + set expire_time [dict get $params time] + + assert_equal {{}} [r HGETEX myhash $command $expire_time FIELDS 1 f2] + } + test "HGETEX $command on non-exist key" { + r FLUSHALL + + set config [dict create \ + EX [list time 100000] \ + PX [list time 10000000] \ + EXAT [list time [expr {[clock seconds] + 10000}]] \ + PXAT [list time [expr {[clock milliseconds] + 100000}]] \ + ] + set params [dict get $config $command] + set expire_time [dict get $params time] + + assert_equal "" [r HGETEX myhash $command $expire_time FIELDS 1 f2] + } + + test "HGETEX $command with duplicate field names" { + r FLUSHALL + r HSET myhash f1 v1 + + set config [dict create \ + EX [list time 10000] \ + PX [list time 10000] \ + EXAT [list time [expr {[clock seconds] + 10000}]] \ + PXAT [list time [expr {[clock milliseconds] + 100000}]] \ + ] + set params [dict get $config $command] + set expire_time [dict get $params time] + + assert_equal "v1 v1" [r HGETEX myhash $command $expire_time FIELDS 2 f1 f1] + } + } + + foreach command {EX PX} { + test "HGETEX $command with 0 ttl" { + r FLUSHALL + r HSET myhash f1 v1 + assert_equal "v1" [r HGETEX myhash $command 0 FIELDS 1 f1] + assert_equal "" [r HGET myhash f1] + assert_equal -2 [r HTTL myhash FIELDS 1 f1] + } + } + + foreach command {EXAT PXAT} { + test "HGETEX $command with past expiry" { + r FLUSHALL + r HSET myhash f1 v1 + if {$command eq "EXAT"} { + set expire_time [expr {[clock seconds] - 100000}] + } else { + set expire_time [expr {[clock milliseconds] - 100000}] + } + assert_equal "v1" [r HGETEX myhash $command $expire_time FIELDS 1 f1] + assert_equal "" [r HGET myhash f1] + assert_equal -2 [r HTTL myhash FIELDS 1 f1] + } + } + + foreach command {EX PX EXAT PXAT} { + test "HGETEX $command overwrites existing field TTL with bigger value" { + r FLUSHALL + set config [dict create \ + EX [list setup_cmd EX setup_val 100000 bigger_val 200000] \ + PX [list setup_cmd PX setup_val 100000000 bigger_val 200000000] \ + EXAT [list setup_cmd EX setup_val 100000 bigger_val [expr {[clock seconds] + 200000}]] \ + PXAT [list setup_cmd PX setup_val 100000000 bigger_val [expr {[clock milliseconds] + 200000000}]] \ + ] + set params [dict get $config $command] + set setup_cmd [dict get $params setup_cmd] + set setup_val [dict get $params setup_val] + set bigger_val [dict get $params bigger_val] + + r HSETEX myhash $setup_cmd $setup_val FIELDS 1 f1 v1 + set old_ttl [r HTTL myhash FIELDS 1 f1] + r HGETEX myhash $command $bigger_val FIELDS 1 f1 + set new_ttl [r HTTL myhash FIELDS 1 f1] + assert {$new_ttl > $old_ttl} + } + + test "HGETEX $command overwrites existing field TTL with smaller value" { + set config [dict create \ + EX [list setup_cmd EX setup_val 100000 smaller_val 50000] \ + PX [list setup_cmd PX setup_val 100000000 smaller_val 50000000] \ + EXAT [list setup_cmd EX setup_val 100000 smaller_val [expr {[clock seconds] + 50000}]] \ + PXAT [list setup_cmd PX setup_val 100000000 smaller_val [expr {[clock milliseconds] + 50000000}]] \ + ] + set params [dict get $config $command] + set setup_cmd [dict get $params setup_cmd] + set setup_val [dict get $params setup_val] + set smaller_val [dict get $params smaller_val] + + r FLUSHALL + r HSETEX myhash $setup_cmd $setup_val FIELDS 1 f1 v1 + set old_ttl [r HTTL myhash FIELDS 1 f1] + r HGETEX myhash $command $smaller_val FIELDS 1 f1 + set new_ttl [r HTTL myhash FIELDS 1 f1] + assert {$new_ttl <= $old_ttl} + } + } + + test {HGETEX - verify no change when field does not exist} { + r FLUSHALL + r HSET myhash f1 v1 + set mem_before [r MEMORY USAGE myhash] + assert_equal {{}} [r HGETEX myhash EX 1 FIELDS 1 f2] + set memory_after [r MEMORY USAGE myhash] + assert_equal $mem_before $memory_after + } + + ####### Invalid scenarios tests ####### + test {HGETEX EX- multiple options used (EX + PX)} { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash EX 60 PX 1000 FIELDS 1 f1} e + set e + } {ERR syntax error} + + test {HGETEX EXAT- multiple options used (EXAT + PXAT)} { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash EXAT [expr {[clock seconds] + 100}] PXAT [expr {[clock milliseconds] + 100000}] 1000 FIELDS 1 f1} e + set e + } {ERR syntax error} + + # Common error scenarios for all commands + foreach {cmd ttl_val} [list \ + EX 60 \ + PX 60 \ + EXAT [expr {[clock seconds] + 100}] \ + PXAT [expr {[clock milliseconds] + 100}] \ + ] { + test "HGETEX $cmd- missing TTL value" { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash $cmd FIELDS 1 f1} e + set e + } {ERR syntax error} + + test "HGETEX $cmd- negative TTL" { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash $cmd -10 FIELDS 1 f1} e + set e + } {ERR invalid expire time in 'hgetex' command} + + test "HGETEX $cmd- non-integer TTL" { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash $cmd abc FIELDS 1 f1} e + set e + } {ERR value is not an integer or out of range} + + test "HGETEX $cmd- missing FIELDS keyword" { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash $cmd $ttl_val 1 f1} e + set e + } {ERR syntax error} + + test "HGETEX $cmd- wrong numfields count (too few fields)" { + r FLUSHALL + r HSET myhash f1 v1 f2 v2 + catch {r HGETEX myhash $cmd $ttl_val FIELDS 2 f1} e + set e + } {ERR *} + + test "HGETEX $cmd- wrong numfields count (too many fields)" { + r FLUSHALL + r HSET myhash f1 v1 + catch {r HGETEX myhash $cmd $ttl_val FIELDS 1 f1 f2} e + set e + } {ERR syntax error} + + test "HGETEX $cmd- key is wrong type (string instead of hash)" { + r FLUSHALL + r SET mystring "v1" + catch {r HGETEX mystring $cmd $ttl_val FIELDS 1 f1} e + set e + } {WRONGTYPE Operation against a key holding the wrong kind of value} + + test "HGETEX $cmd with FIELDS 0" { + r FLUSHALL + catch {r HGETEX myhash $cmd $ttl_val FIELDS 0} e + set e + } {ERR syntax error} + + test "HGETEX $cmd with negative numfields" { + r FLUSHALL + catch {r HGETEX myhash $cmd $ttl_val FIELDS -10} e + set e + } {ERR syntax error} + + test "HGETEX $cmd with missing key" { + r FLUSHALL + set expire [expr {[clock seconds] + 100}] + catch {r HGETEX $cmd $expire FIELDS 1 f1} e + set e + } {ERR syntax error} + } +} + +## HGETEX -> Keyspace notification tests #### +start_server {tags {"hashexpire external:skip"}} { + if {$::singledb} { + set db 0 + } else { + set db 9 + } + set all_h_pattern "h*" + set hexpire_pattern "hexpire" + set hpersist_pattern "hpersist" + + r config set notify-keyspace-events KEA + + foreach command {EX PX EXAT PXAT} { + set config [dict create \ + EX [list time 6000000] \ + PX [list time 6000000] \ + EXAT [list time [expr {[clock seconds] + 6000000}]] \ + PXAT [list time [expr {[clock milliseconds] + 6000000}]] \ + ] + set params [dict get $config $command] + set expire_time [dict get $params time] + + test "HGETEX $command generates hexpire keyspace notification" { + r FLUSHALL + r HSET myhash f1 v1 + + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + + r HGETEX myhash $command $expire_time FIELDS 1 f1 + + assert_keyevent_pattern $rd hexpire myhash + $rd close + } + + test "HGETEX $command with multiple fields generates single notification" { + r FLUSHALL + r HSET myhash f1 v1 f2 v2 f3 v3 + + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + r HGETEX myhash $command $expire_time FIELDS 3 f1 f2 f3 + + assert_keyevent_pattern $rd hexpire myhash + # Verify no notification (getting hset and not hexpire) + r HSET dummy dummy dummy + assert_keyevent_pattern $rd hset dummy + $rd close + } + + test "HGETEX $command on non-existent field generates no notification" { + r FLUSHALL + r HSET myhash f1 v1 + + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + # This HGETEX targets a non-existent field, so no notification about hexpire should be emitted + r HGETEX myhash $command $expire_time FIELDS 1 f2 + + # # Verify no notification (getting hset and not hexpire) + # r HSET dummy dummy dummy + # assert_keyevent_pattern $rd hset dummy + + $rd close + } + } + + test {HGETEX PERSIST generates hpersist keyspace notification} { + r FLUSHALL + r HSET myhash f1 v1 + r HEXPIRE myhash 60 FIELDS 1 f1 + + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + r HGETEX myhash PERSIST FIELDS 1 f1 + + assert_keyevent_pattern $rd hpersist myhash + $rd close + } + + + + foreach command {EX PX EXAT PXAT} { + set config [dict create \ + EX [list time 0] \ + PX [list time 0] \ + EXAT [list time [expr {[clock seconds] - 2000}]] \ + PXAT [list time [expr {[clock milliseconds] - 2000}]] \ + ] + set params [dict get $config $command] + set expire_time [dict get $params time] + + test "HGETEX $command 0/past time works correctly with 1 field" { + r FLUSHALL + + # Create hash with field + r HSET myhash f1 v1 + assert_equal 1 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + # Set field to expire immediately + r HGETEX myhash $command $expire_time FIELDS 1 f1 + + # Verify field and keys are deleted + assert_keyevent_pattern $rd hexpired myhash + assert_keyevent_pattern $rd del myhash + assert_equal -2 [r HTTL myhash FIELDS 1 f1] + assert_equal 0 [r HLEN myhash] + assert_equal 0 [r EXISTS myhash] + assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + $rd close + } + + test "HGETEX $command 0/past time works correctly with 1 field on field with expire" { + r FLUSHALL + + # Create hash with field + r HSETEX myhash EX 1000 FIELDS 1 f1 v1 + assert_equal 1 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + # Set field to expire immediately + r HGETEX myhash $command $expire_time FIELDS 1 f1 + + # Verify field and keys are deleted + assert_keyevent_pattern $rd hexpired myhash + assert_keyevent_pattern $rd del myhash + assert_equal -2 [r HTTL myhash FIELDS 1 f1] + assert_equal 0 [r HLEN myhash] + assert_equal 0 [r EXISTS myhash] + assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + $rd close + } + + test "HGETEX $command 0/past time works correctly with more then 1 field" { + r FLUSHALL + + # Create hash with field + r HSET myhash f1 v1 f2 v2 + assert_equal 2 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + # Set field to expire immediately + r HGETEX myhash $command $expire_time FIELDS 1 f2 + + # Verify field and keys are deleted + assert_keyevent_pattern $rd hexpired myhash + assert_equal -2 [r HTTL myhash FIELDS 1 f2] + assert_equal 1 [r HLEN myhash] + assert_equal 1 [r EXISTS myhash] + assert_match 1 [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + $rd close + } + + test "HGETEX $command 0/past time works correctly with more then 1 field and expire" { + r FLUSHALL + + # Create hash with field + r HSET myhash f1 v1 f2 v2 f3 v3 f4 v4 + r HEXPIRE myhash 1000000 FIELDS 1 f1 + assert_equal 4 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + # Set field to expire immediately + r HGETEX myhash $command $expire_time FIELDS 1 f1 + + # Verify field and keys are deleted + assert_keyevent_pattern $rd hexpired myhash + assert_equal -2 [r HTTL myhash FIELDS 1 f1] + assert_equal 3 [r HLEN myhash] + assert_equal 1 [r EXISTS myhash] + assert_match 1 [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + $rd close + } + } +} + +# HSETEX #### +start_server {tags {"hashexpire external:skip"}} { test {HSETEX KEEPTTL - preserves existing TTL of field} { r FLUSHALL @@ -90,7 +628,7 @@ start_server {tags {"hashexpire external:skip"}} { -###### PX ####### + ###### PX ####### test {HSETEX PX - test negative ttl} { set ttl -50 @@ -142,7 +680,7 @@ start_server {tags {"hashexpire external:skip"}} { ## FNX/FXX -# hsetex throws ERR syntax error, it shouldn't + # hsetex throws ERR syntax error, it shouldn't test {HSETEX EX FNX - set only if none of the fields exist} { r FLUSHALL r HSET myhash field1 val1 @@ -172,7 +710,7 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal 0 [r HEXISTS myhash fieldX] } -# Syntax error: HSETEX myhash PX 100 FNX FIELDS 2 x 2 y 3 + # Syntax error: HSETEX myhash PX 100 FNX FIELDS 2 x 2 y 3 test {HSETEX PX FNX - partial conflict returns 0} { r FLUSHALL r HSET myhash x 1 @@ -273,188 +811,187 @@ start_server {tags {"hashexpire external:skip"}} { assert {$moved_ttl > 0 && $moved_ttl <= $original_ttl} } -test {HSET - overwrite lazily expired field without TTL clears expiration} { - r FLUSHALL - r debug SET-ACTIVE-EXPIRE no - - # This test verifies that if a field has expired (but not yet lazily deleted), - # and it is overwritten using a plain HSET (i.e., no TTL), - # Valkey treats the field as non existing and updates it, - # effectively clearing the old TTL and making the field persistent. - - r HSETEX myhash PX 10 FIELDS 1 field1 oldval - wait_for_condition 100 100 { - [r HTTL myhash FIELDS 1 field1] eq "-2" - } else { - fail "hash value was not expired after timeout" - } - - # Field should still be present in memory due to lazy expiry - assert_equal 1 [r HLEN myhash] + test {HSET - overwrite lazily expired field without TTL clears expiration} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired (but not yet lazily deleted), + # and it is overwritten using a plain HSET (i.e., no TTL), + # Valkey treats the field as non existing and updates it, + # effectively clearing the old TTL and making the field persistent. + + r HSETEX myhash PX 10 FIELDS 1 field1 oldval + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } - # Overwrite with HSET (no TTL) before accessing - r HSET myhash field1 newval + # Field should still be present in memory due to lazy expiry + assert_equal 1 [r HLEN myhash] - # TTL should now be gone; field becomes persistent - set ttl [r HPTTL myhash FIELDS 1 field1] - assert_equal -1 $ttl - assert_equal newval [r HGET myhash field1] - assert_equal 1 [r HLEN myhash] + # Overwrite with HSET (no TTL) before accessing + r HSET myhash field1 newval - r debug SET-ACTIVE-EXPIRE yes -} + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal newval [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] -test {HINCRBY - on expired field} { - r FLUSHALL - r debug SET-ACTIVE-EXPIRE no - - # This test verifies that if a field has expired, - # and it is overwritten using a plain HINCRBY (i.e., no TTL), - # Valkey treats the field as still existing and updates it, - # effectively clearing the old TTL and starting the value from 0. - - r HSETEX myhash PX 10 FIELDS 1 field1 1 - wait_for_condition 100 100 { - [r HTTL myhash FIELDS 1 field1] eq "-2" - } else { - fail "hash value was not expired after timeout" + r debug SET-ACTIVE-EXPIRE yes } - # Field should still be present in memory - assert_equal 1 [r HLEN myhash] - - # Overwrite with HINCRBY (no TTL) before accessing - r HINCRBY myhash field1 1 + test {HINCRBY - on expired field} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired, + # and it is overwritten using a plain HINCRBY (i.e., no TTL), + # Valkey treats the field as still existing and updates it, + # effectively clearing the old TTL and starting the value from 0. + + r HSETEX myhash PX 10 FIELDS 1 field1 1 + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } - # Sanity check: check we only have one field in the hash - assert_equal 1 [r HLEN myhash] + # Field should still be present in memory + assert_equal 1 [r HLEN myhash] - # TTL should now be gone; field becomes persistent - set ttl [r HPTTL myhash FIELDS 1 field1] - assert_equal -1 $ttl - assert_equal 1 [r HGET myhash field1] - assert_equal 1 [r HLEN myhash] + # Overwrite with HINCRBY (no TTL) before accessing + r HINCRBY myhash field1 1 - # set expiration on the field - assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] - # verify the field has TTL - assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 - # now incr the field again - assert_equal 2 [r HINCRBY myhash field1 1] - # verify the field has TTL - assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 - r debug SET-ACTIVE-EXPIRE yes -} + # Sanity check: check we only have one field in the hash + assert_equal 1 [r HLEN myhash] -test {HINCRBYFLOAT - on expired field} { - r FLUSHALL - r debug SET-ACTIVE-EXPIRE no - - # This test verifies that if a field has expired, - # and it is overwritten using a plain HINCRBYFLOAT (i.e., no TTL), - # Valkey treats the field as still existing and updates it, - # effectively clearing the old TTL and starting the value from 0. - - r HSETEX myhash PX 10 FIELDS 1 field1 1 - wait_for_condition 100 100 { - [r HTTL myhash FIELDS 1 field1] eq "-2" - } else { - fail "hash value was not expired after timeout" + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal 1 [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + # set expiration on the field + assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + # now incr the field again + assert_equal 2 [r HINCRBY myhash field1 1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + r debug SET-ACTIVE-EXPIRE yes } - # Field should still be present in memory - assert_equal 1 [r HLEN myhash] - - # Overwrite with HINCRBYFLOAT (no TTL) before accessing - r HINCRBYFLOAT myhash field1 1 - - # Sanity check: check we only have one field in the hash - assert_equal 1 [r HLEN myhash] - - # TTL should now be gone; field becomes persistent - set ttl [r HPTTL myhash FIELDS 1 field1] - assert_equal -1 $ttl - assert_equal 1 [r HGET myhash field1] - assert_equal 1 [r HLEN myhash] - - # set expiration on the field - assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] - # verify the field has TTL - assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 - # now incr the field again - assert_equal 2 [r HINCRBYFLOAT myhash field1 1] - # verify the field has TTL - assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 - r debug SET-ACTIVE-EXPIRE yes -} + test {HINCRBYFLOAT - on expired field} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired, + # and it is overwritten using a plain HINCRBYFLOAT (i.e., no TTL), + # Valkey treats the field as still existing and updates it, + # effectively clearing the old TTL and starting the value from 0. + + r HSETEX myhash PX 10 FIELDS 1 field1 1 + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } -test {HSET - overwrite unexpired field removes TTL} { - r FLUSHALL - r debug SET-ACTIVE-EXPIRE no + # Field should still be present in memory + assert_equal 1 [r HLEN myhash] - # This test verifies that overwriting a field with HSET, - # even while its TTL is still valid (not expired), - # clears the TTL and makes the field persistent. - # This behavior is consistent with how HSET works for normal keys. + # Overwrite with HINCRBYFLOAT (no TTL) before accessing + r HINCRBYFLOAT myhash field1 1 - # Set field with long TTL - r HSETEX myhash PX 1000 FIELDS 1 field1 val1 + # Sanity check: check we only have one field in the hash + assert_equal 1 [r HLEN myhash] - # Confirm TTL is active - set before [r HPTTL myhash FIELDS 1 field1] - assert {$before > 0} + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal 1 [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + # set expiration on the field + assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + # now incr the field again + assert_equal 2 [r HINCRBYFLOAT myhash field1 1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + r debug SET-ACTIVE-EXPIRE yes + } - # Overwrite with HSET before TTL expires - r HSET myhash field1 newval + test {HSET - overwrite unexpired field removes TTL} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no - # TTL should now be gone - set after [r HPTTL myhash FIELDS 1 field1] - assert_equal -1 $after - assert_equal newval [r HGET myhash field1] + # This test verifies that overwriting a field with HSET, + # even while its TTL is still valid (not expired), + # clears the TTL and makes the field persistent. + # This behavior is consistent with how HSET works for normal keys. - r debug SET-ACTIVE-EXPIRE yes -} + # Set field with long TTL + r HSETEX myhash PX 1000 FIELDS 1 field1 val1 -test {HDEL - lazily expired field is removed without triggering expiry logic} { - r FLUSHALL - r debug SET-ACTIVE-EXPIRE no + # Confirm TTL is active + set before [r HPTTL myhash FIELDS 1 field1] + assert {$before > 0} - # This test proves that deleting an expired field with HDEL - # does NOT trigger Valkey's expiration mechanism. - # - # The key observation is that Valkey tracks how many fields were - # expired via TTL using the `expired_subkeys` counter in INFO stats. - # If HDEL caused expiration to be processed internally, - # this counter would increment. We assert that it remains unchanged. + # Overwrite with HSET before TTL expires + r HSET myhash field1 newval - # Capture expired_subkeys before - set before_info [r INFO stats] - set before [info_field $before_info expired_subkeys] + # TTL should now be gone + set after [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $after + assert_equal newval [r HGET myhash field1] - # Create field with short TTL - r HSETEX myhash PX 10 FIELDS 1 field1 val1 - after 20 + r debug SET-ACTIVE-EXPIRE yes + } - # Field is technically expired, but still in-memory due to lazy expiry - assert_equal 1 [r HLEN myhash] + test {HDEL - lazily expired field is removed without triggering expiry logic} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test proves that deleting an expired field with HDEL + # does NOT trigger Valkey's expiration mechanism. + # + # The key observation is that Valkey tracks how many fields were + # expired via TTL using the `expired_subkeys` counter in INFO stats. + # If HDEL caused expiration to be processed internally, + # this counter would increment. We assert that it remains unchanged. + + # Capture expired_subkeys before + set before_info [r INFO stats] + set before [info_field $before_info expired_subkeys] + + # Create field with short TTL + r HSETEX myhash PX 10 FIELDS 1 field1 val1 + after 20 - # Delete the expired field directly - r HDEL myhash field1 + # Field is technically expired, but still in-memory due to lazy expiry + assert_equal 1 [r HLEN myhash] - # Field should be gone - assert_equal 0 [r HEXISTS myhash field1] + # Delete the expired field directly + r HDEL myhash field1 - # Capture expired_subkeys again - set after_info [r INFO stats] - set after [info_field $after_info expired_subkeys] + # Field should be gone + assert_equal 0 [r HEXISTS myhash field1] - # Verify that no expiry occurred internally - assert_equal $before $after + # Capture expired_subkeys again + set after_info [r INFO stats] + set after [info_field $after_info expired_subkeys] - r debug SET-ACTIVE-EXPIRE yes -} + # Verify that no expiry occurred internally + assert_equal $before $after + r debug SET-ACTIVE-EXPIRE yes + } -###### Test EXPIRE ############# + ###### Test EXPIRE ############# # Basic Expiry Functionality @@ -600,6 +1137,17 @@ test {HDEL - lazily expired field is removed without triggering expiry logic} { assert_equal {-2 -2} [r HEXPIRE nokey 10 FIELDS 2 field1 field2] } {} + test {HEXPIRE - GT condition fails when field has no TTL} { + r FLUSHALL + r HSET myhash f1 v1 + assert_equal 0 [r HEXPIRE myhash 10 GT fields 1 f1] + } + + test {HEXPIRE - LT condition succeeds when field has no TTL} { + r FLUSHALL + r HSET myhash f1 v1 + assert_equal 1 [r HEXPIRE myhash 10 LT fields 1 f1] + } ##### HTTL ##### test {HTTL - persistent field returns -1} { @@ -747,11 +1295,11 @@ test {HDEL - lazily expired field is removed without triggering expiry logic} { test {HEXPIREAT - missing FIELDS keyword} { - r FLUSHALL - r HSET myhash field1 val - set ts [expr {[clock seconds] + 5}] - catch {r HEXPIREAT myhash $ts} e - set e + r FLUSHALL + r HSET myhash field1 val + set ts [expr {[clock seconds] + 5}] + catch {r HEXPIREAT myhash $ts} e + set e } {ERR wrong number of arguments for 'hexpireat' command} test {HEXPIREAT - no fields after FIELDS} { @@ -1041,9 +1589,7 @@ start_server {tags {"hash-ttl-info external:skip"}} { } - -#### Replication - +#### Replication #### start_server {tags {"hashexpire external:skip"}} { # Start another server to test replication of TTLs start_server {tags {needs:repl external:skip}} { @@ -1232,6 +1778,363 @@ start_server {tags {"hashexpire external:skip"}} { fail "hash object was not deleted on replica after timeout" } } + } +} + +### Slot Migration #### +start_cluster 3 0 {tags {"cluster mytest"} overrides {cluster-node-timeout 1000}} { + # Flush all data on all cluster nodes before starting + for {set i 0} {$i < 3} {incr i} { + R $i FLUSHALL + } + if {$::singledb} { + set db 0 + } else { + set db 9 + } + set R0_id [R 0 CLUSTER MYID] + set R1_id [R 1 CLUSTER MYID] + + # Use a fixed hash tag to ensure key is in one slot + set key "{mymigrate}myhash" + + test {Hash with TTL fields migrates correctly between nodes} { + # Create hash fields + R 0 HSET $key f1 v1 f2 v2 f3 v3 + + # Set TTL on fields f1 and f2 + R 0 HEXPIRE $key 300 FIELDS 2 f1 f2 + + # Verify before slot migration + assert_equal 3 [R 0 HLEN $key] + assert_morethan [R 0 HTTL $key FIELDS 1 f1] 290 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [R 0 info keyspace]] keys=%d] + + # Prepare slot migration + set slot [R 0 CLUSTER KEYSLOT $key] + assert_equal OK [R 1 CLUSTER SETSLOT $slot IMPORTING $R0_id] + assert_equal OK [R 0 CLUSTER SETSLOT $slot MIGRATING $R1_id] + # Migrate key to destination node + R 0 MIGRATE [srv -1 host] [srv -1 port] $key 0 5000 + + # Complete slot migration + R 0 CLUSTER SETSLOT $slot NODE $R1_id + R 1 CLUSTER SETSLOT $slot NODE $R1_id + + # Verify after slot migration + assert_equal 3 [R 1 HLEN $key] + assert_morethan [R 1 HTTL $key FIELDS 1 f1] 280 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] + + # Setup keyspace notifications + R 1 config set notify-keyspace-events KEA + set rd [valkey_deferring_client -1] + assert_equal {1} [psubscribe $rd __keyevent@0__:hexpired] + + # Set expiration to 0 + R 1 HGETEX $key EX 0 FIELDS 1 f1 + + # Veridy expiration + assert_keyevent_pattern $rd hexpired "{$key}" + assert_equal 2 [R 1 HLEN $key] + assert_equal "" [R 1 HGET $key f1] + assert_equal -2 [R 1 HTTL $key FIELDS 1 f1] + + $rd close + } +} + +start_server {tags {"hashexpire external:skip"}} { + foreach cmd {RENAME RESTORE} { + test "$cmd Preserves Field TTLs" { + r FLUSHALL + r HSET myhash f1 v1 f2 v2 + r HEXPIRE myhash 200 FIELDS 1 f1 + + # Verify initial TTL state + set mem_before [r MEMORY USAGE myhash] + assert_equal "v1" [r HGET myhash f1] + assert_equal "v2" [r HGET myhash f2] + assert_morethan [r HTTL myhash FIELDS 1 f1] 100 + assert_equal -1 [r HTTL myhash FIELDS 1 f2] + assert_equal 2 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + # Run the command + if {$cmd eq "RENAME"} { + r rename myhash nwhash + set newhash nwhash + } elseif {$cmd eq "RESTORE"} { + set serialized [r DUMP myhash] + r RESTORE rstrhs 0 $serialized + set newhash rstrhs + } + + # Verify field values and TTLs are preserved + set memory_after [r MEMORY USAGE $newhash] + assert_equal "v1" [r HGET $newhash f1] + assert_equal "v2" [r HGET $newhash f2] + assert_morethan [r HTTL $newhash FIELDS 1 f1] 100 + assert_equal -1 [r HTTL $newhash FIELDS 1 f2] + assert_equal 2 [r HLEN $newhash] + if {$cmd eq "RESTORE"} { + assert_match {2} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + } else { + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + } + assert_equal $mem_before $memory_after + } + } + + test {COPY Preserves TTLs} { + r flushall + + # Create hash with fields + r HSET myhash f1 v1 f3 v3 f4 v4 + + # Set TTL on f1 only + r HEXPIRE myhash 200 FIELDS 1 f1 + r HEXPIRE myhash 2 FIELDS 1 f3 + + # Verify initial TTL state + set mem_before [r MEMORY USAGE myhash] + assert_equal "v1" [r HGET myhash f1] + assert_equal "v3" [r HGET myhash f3] + assert_equal "v4" [r HGET myhash f4] + assert_morethan [r HTTL myhash FIELDS 1 f1] 100 + assert_morethan [r HTTL myhash FIELDS 1 f3] 0 + assert_equal -1 [r HTTL myhash FIELDS 1 f4] + assert_equal 3 [r HLEN myhash] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + # Copy hash to new key + r copy myhash newhash1 + + # Verify myhash is the same + assert_equal "v1" [r HGET myhash f1] + assert_equal "v3" [r HGET myhash f3] + assert_equal "v4" [r HGET myhash f4] + assert_morethan [r HTTL myhash FIELDS 1 f1] 100 + assert_morethan [r HTTL myhash FIELDS 1 f3] 0 + assert_equal -1 [r HTTL myhash FIELDS 1 f4] + assert_equal 3 [r HLEN myhash] + + # Verify new hash got same values + set mem_after [r MEMORY USAGE myhash] + assert_equal "v1" [r HGET newhash1 f1] + assert_equal "v3" [r HGET newhash1 f3] + assert_equal "v4" [r HGET newhash1 f4] + assert_morethan [r HTTL newhash1 FIELDS 1 f1] 100 + assert_morethan [r HTTL newhash1 FIELDS 1 f3] 0 + assert_equal -1 [r HTTL newhash1 FIELDS 1 f4] + assert_equal 3 [r HLEN newhash1] + assert_match {2} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + assert_equal $mem_before $mem_after + + # Modify TTL in original hash + r HEXPIRE myhash 5 FIELDS 1 f3 + + # Wait for original TTL to expire in copy + after 2000 + assert_equal "v1" [r HGET newhash1 f1] + assert_equal "" [r HGET newhash1 f3] + assert_equal "v1" [r HGET myhash f1] + assert_equal "v3" [r HGET myhash f3] + + r HSETEX myhash EX 2 FIELDS 1 f3 v3 + # Create second copy + r copy myhash newhash2 + + # Modify TTL in second copy + r HEXPIRE newhash2 500 FIELDS 1 f3 + + # Wait for original hash TTL to expire + after 2000 + assert_equal "v1" [r HGET myhash f1] + assert_equal "" [r HGET myhash f3] + assert_equal "v1" [r HGET newhash2 f1] + assert_equal "v3" [r HGET newhash2 f3] + } + + test {Hash Encoding Transitions with TTL - Add TTL to Existing Fields} { + r flushall + + # Create small hash with listpack encoding + r HSET myhash f1 v1 f2 v2 + + # Verify initial encoding + set "listpack" [r OBJECT ENCODING myhash] + + # Add TTL to existing field + r HEXPIRE myhash 300 FIELDS 1 f1 + + # Verify encoding changed to hashtable + set "hashtable" [r OBJECT ENCODING myhash] + + # Verify field values are preserved + assert_equal "v1" [r HGET myhash f1] + assert_equal "v2" [r HGET myhash f2] + # Veridy expiry + assert_morethan [r HTTL myhash FIELDS 1 f1] 100 + assert_equal -1 [r HTTL myhash FIELDS 1 f2] + } + + test {Hash Encoding Transitions with TTL - Create New Fields with TTL} { + r flushall + + # Create small hash with listpack encoding + r HSET myhash f1 v1 f2 v2 + + # Verify initial encoding + set "listpack" [r OBJECT ENCODING myhash] + + # Add many fields to force encoding transition + for {set i 3} {$i <= 600} {incr i} { + lappend pairs "f$i" "v$i" + } + r HSET myhash {*}$pairs + r HEXPIRE myhash 3 FIELDS 5 f1 f10 f100 f200 f300 + + # Verify encoding changed to hashtable + set "hashtable" [r OBJECT ENCODING myhash] + + # Verify all field values and TTLs are correct + for {set i 1} {$i <= 600} {incr i} { + assert_equal "v$i" [r HGET myhash "f$i"] + if {$i == 1 || $i == 10 || $i == 100 || $i == 200 || $i == 300} { + assert_equal 3 [r HTTL myhash FIELDS 1 "f$i"] + } else { + assert_equal -1 [r HTTL myhash FIELDS 1 "f$i"] + } + } + } +} + +start_server {tags {"hashexpire external:skip"}} { + r config set notify-keyspace-events KEA + + foreach time_unit {s, ms} { + test "Key TTL expires before field TTL: entire hash should be deleted timeunit: $time_unit" { + r FLUSHALL + r config set notify-keyspace-events KEA + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + r HSET myhash f1 v1 f2 v2 f3 v3 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 3 [r HLEN myhash] + if {$time_unit eq "s"} { + r HEXPIRE hash1 10 FIELDS 1 f1 + r EXPIRE hash1 1 + } else { + r HPEXPIRE myhash 10000 FIELDS 1 f1 + r PEXPIRE myhash 1000 + } + + wait_for_condition 100 100 { + [r EXISTS myhash] eq "0" + } else { + fail "myhash still exists" + } + assert_equal 0 [r HLEN myhash] + assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + + assert_keyevent_pattern $rd hset myhash + assert_keyevent_pattern $rd hexpire myhash + assert_keyevent_pattern $rd expire myhash + $rd close + } + + test "Field TTL expires before key TTL: only the specific field should expire: $time_unit" { + r FLUSHALL + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + + r HSET myhash f1 v1 f2 v2 f3 v3 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 3 [r HLEN myhash] + if {$time_unit eq "s"} { + r HEXPIRE myhash 1 FIELDS 1 f1 + r EXPIRE myhash 10 + } else { + r HPEXPIRE myhash 1000 FIELDS 1 f1 + r PEXPIRE myhash 10000 + } + + wait_for_condition 100 100 { + [r HGET myhash f1] eq "" + } else { + fail "f1 not expired" + } + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 1 [r EXISTS myhash] + assert_equal "v2" [r HGET myhash f2] + assert_equal "v3" [r HGET myhash f3] + assert_keyevent_pattern $rd hset myhash + assert_keyevent_pattern $rd hexpire myhash + $rd close + } + + test "Key and field TTL expire simultaneously: entire hash should be deleted: $time_unit" { + r FLUSHALL + + r HSET myhash f1 v1 f2 v2 f3 v3 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 3 [r HLEN myhash] + + + if {$time_unit eq "s"} { + set expire [expr {[clock seconds] + 1}] + r HEXPIREAT myhash $expire FIELDS 1 f1 + r EXPIREAT myhash $expire + } else { + set expire [expr {[clock milliseconds] + 1000}] + r HPEXPIREAT myhash $expire FIELDS 1 f1 + r PEXPIREAT myhash $expire + } + + wait_for_condition 100 100 { + [r EXISTS myhash] eq 0 + } else { + fail "myhash still exsist" + } + + assert_equal "" [r HGET myhash f1] + assert_equal "" [r HGET myhash f2] + assert_equal "" [r HGET myhash f3] + assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 0 [r HLEN myhash] + } + + test {Millisecond/Seconds precision} { + r flushall + + r HSET myhash f1 v1 f2 v2 + if {$time_unit eq "s"} { + r HEXPIRE myhash 3 FIELDS 1 f1 + r EXPIRE myhash 1 + } else { + r HPEXPIRE myhash 3000 FIELDS 1 f1 + r PEXPIRE myhash 1000 + } + + after 1500 + assert_equal 0 [r EXISTS myhash] + } + } + + test {Ensure that key-level PERSIST on the key don't affect field TTL} { + r FLUSHALL + + r HSET myhash f1 v1 f2 v2 + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] + assert_equal 2 [r HLEN myhash] + r HEXPIRE myhash 100000 FIELDS 1 f1 + r PERSIST myhash + + assert_equal -1 [r TTL myhash] + assert_morethan [r HTTL myhash FIELDS 1 f1] 0 } } \ No newline at end of file From f4869a720aa6a737ea66c22118cb5226bde8e12d Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 16:37:30 +0300 Subject: [PATCH 058/119] address pr comments Signed-off-by: Ran Shidlansik --- src/expire.c | 3 ++- src/t_hash.c | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/expire.c b/src/expire.c index a30c6b1caa..cc42d803a4 100644 --- a/src/expire.c +++ b/src/expire.c @@ -604,7 +604,8 @@ int convertExpireArgumentToUnixTime(client *c, robj *arg, long long basetime, in return C_ERR; } when += basetime; - if (unixtime) *unixtime = when; + debugServerAssert(unixtime); + *unixtime = when; return C_OK; } diff --git a/src/t_hash.c b/src/t_hash.c index 55f2a528ba..57a6174c8f 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1197,7 +1197,7 @@ void hsetexCommand(client *c) { if (convertExpireArgumentToUnixTime(c, expire, basetime, unit, &when) == C_ERR) return; - if ((when == basetime) || (((flags & ARGS_PXAT) || (flags & ARGS_EXAT)) && ((when == basetime) || checkAlreadyExpired(when)))) { + if (checkAlreadyExpired(when)) { set_expired = 1; } } @@ -1353,7 +1353,7 @@ void hgetexCommand(client *c) { if (convertExpireArgumentToUnixTime(c, expire, basetime, unit, &when) == C_ERR) return; - if ((when == basetime) || (((flags & ARGS_PXAT) || (flags & ARGS_EXAT)) && checkAlreadyExpired(when))) { + if (checkAlreadyExpired(when)) { set_expired = 1; when = 0; } else { @@ -1591,7 +1591,7 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { if (convertExpireArgumentToUnixTime(c, param, basetime, unit, &when) == C_ERR) return; - if ((when == basetime) || checkAlreadyExpired(when)) + if (checkAlreadyExpired(when)) set_expired = 1; robj *obj = lookupKeyWrite(c->db, key); From 195c5a56481e68d5f7ca8cf274e2c945795a22c8 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 16:40:39 +0300 Subject: [PATCH 059/119] fix typo Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 21bb58ae8d..3f2a970e9b 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -2098,7 +2098,7 @@ start_server {tags {"hashexpire external:skip"}} { wait_for_condition 100 100 { [r EXISTS myhash] eq 0 } else { - fail "myhash still exsist" + fail "myhash still exist" } assert_equal "" [r HGET myhash f1] From 2c4ea389e1e08d70d275d43d17de3ece681ef197 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 16:55:45 +0300 Subject: [PATCH 060/119] add replication tests Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 211 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 3f2a970e9b..65f3f824af 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1781,6 +1781,217 @@ start_server {tags {"hashexpire external:skip"}} { } } +start_server {tags {"hashexpire external:skip"}} { + set primary [srv 0 client] + set primary_host [srv 0 host] + set primary_port [srv 0 port] + start_server {tags {needs:repl external:skip}} { + set replica_1 [srv 0 client] + set replica_1_host [srv 0 host] + set replica_1_port [srv 0 port] + + test {Replication Primary -> R1} { + $primary FLUSHALL + ####### Replication setup ####### + $replica_1 replicaof $primary_host $primary_port + wait_for_condition 50 100 { + [lindex [$replica_1 role] 0] eq {slave} && + [string match {*master_link_status:up*} [$replica_1 info replication]] + } else { + fail "Can't turn the instance into a replica" + } + + # Initialize deferred clients and subscribe to keyspace notifications + set rd_primary [valkey_deferring_client -1] + set rd_replica_1 [valkey_deferring_client $replica_1_host $replica_1_port] + assert_equal {1} [psubscribe $rd_primary __keyevent@*] + assert_equal {1} [psubscribe $rd_replica_1 __keyevent@*] + + # Create hash and timing - f1 < f2 < f3 expiry times + set f1_exp [expr {[clock seconds] + 10000}] + + # Setup hash, set expire and set expire 0 + $primary HSET myhash f1 v1 f2 v2 ;# Should trigger 3 hset + $primary HEXPIREAT myhash $f1_exp FIELDS 1 f1 ;# Should trigger 3 hexpire + wait_for_ofs_sync $primary $replica_1 + + $primary HEXPIRE myhash 0 FIELDS 1 f1 ;# Should trigger 1 hexpired (for primary) and 1 hdel (for replica) + wait_for_ofs_sync $primary $replica_1 + + # Wait for f1 expiration + wait_for_condition 50 100 { + [$primary HTTL myhash FIELDS 1 f1] eq -2 && \ + [$replica_1 HTTL myhash FIELDS 1 f1] eq -2 + } else { + fail "f1 still exsists" + } + + # Verify keyspace notification + foreach rd [list $rd_primary $rd_replica_1] { + assert_keyevent_pattern $rd hset myhash + assert_keyevent_pattern $rd hexpire myhash + } + # primary gets hexpired and replica gets hdel + assert_keyevent_pattern $rd_primary hexpired myhash + assert_keyevent_pattern $rd_replica_1 hdel myhash + + $rd_primary close + $rd_replica_1 close + } + + start_server {tags {needs:repl external:skip}} { + $primary FLUSHALL + set replica_2 [srv 0 client] + set replica_2_host [srv 0 host] + set replica_2_port [srv 0 port] + + test {Chain Replication (Primary -> R1 -> R2) preserves TTL} { + $replica_1 replicaof $primary_host $primary_port + # Wait for R2 to connect to R1 + wait_for_condition 100 100 { + [info_field [$replica_1 info replication] master_link_status] eq "up" + } else { + fail "R1 <-> PRIMARY didn't establish connection" + } + + $replica_2 replicaof $replica_1_host $replica_1_port + # Wait for R2 to connect to R1 + wait_for_condition 100 100 { + [info_field [$replica_1 info replication] master_link_status] eq "up" + } else { + fail "R2 <-> R1 didn't establish connection" + } + + # Initialize deferred clients and subscribe to keyspace notifications + set rd_primary [valkey_deferring_client -2] + set rd_replica_1 [valkey_deferring_client -1] + set rd_replica_2 [valkey_deferring_client $replica_2_host $replica_2_port] + assert_equal {1} [psubscribe $rd_primary __keyevent@*] + assert_equal {1} [psubscribe $rd_replica_1 __keyevent@*] + assert_equal {1} [psubscribe $rd_replica_2 __keyevent@*] + + # Create hash and timing - f1 < f2 < f3 expiry times + set f1_exp [expr {[clock seconds] + 10000}] + + ############################################# STEUP HASH ############################################# + $primary HSET myhash f1 v1 f2 v2 ;# Should trigger 3 hset + $primary HEXPIREAT myhash $f1_exp FIELDS 1 f1 ;# Should trigger 3 hexpire + wait_for_ofs_sync $primary $replica_1 + wait_for_ofs_sync $replica_1 $replica_2 + + $primary HPEXPIRE myhash 0 FIELDS 1 f1 ;# Should trigger 1 hexpired (for primary) and 2 hdel (for replicas) + wait_for_ofs_sync $primary $replica_1 + wait_for_ofs_sync $replica_1 $replica_2 + + + # Wait for f1 expiration + wait_for_condition 50 100 { + [$primary HTTL myhash FIELDS 1 f1] eq -2 && \ + [$replica_1 HTTL myhash FIELDS 1 f1] eq -2 && \ + [$replica_2 HTTL myhash FIELDS 1 f1] eq -2 + } else { + fail "f1 still exsists" + } + + # primary gets hexpired and replicas get hdel + foreach rd [list $rd_primary $rd_replica_1 $rd_replica_2] { + assert_keyevent_pattern $rd hset myhash + assert_keyevent_pattern $rd hexpire myhash + } + assert_keyevent_pattern $rd_primary hexpired myhash + assert_keyevent_pattern $rd_replica_1 hdel myhash + assert_keyevent_pattern $rd_replica_2 hdel myhash + + $rd_primary close + $rd_replica_1 close + $rd_replica_2 close + } + } + + test {Replica Failover/Promotion to Primary} { + $primary FLUSHALL + ####### Replication setup ####### + $replica_1 replicaof $primary_host $primary_port + wait_for_condition 50 100 { + [lindex [$replica_1 role] 0] eq {slave} && + [string match {*master_link_status:up*} [$replica_1 info replication]] + } else { + fail "Can't turn the instance into a replica" + } + + # Create hash fields with TTL on primary + set f1_exp [expr {[clock seconds] + 200}] + set f2_exp [expr {[clock seconds] + 300000}] + $primary HSET myhash f1 v1 f2 v2 f3 v3 + $primary HEXPIREAT myhash $f1_exp FIELDS 1 f1 + $primary HEXPIREAT myhash $f2_exp FIELDS 1 f2 + # f3 remains persistent + + # Wait for full sync + wait_for_ofs_sync $primary $replica_1 + + # Verify primary and replica are the same + foreach instance [list $primary $replica_1] { + assert_equal $f1_exp [$instance HEXPIRETIME myhash FIELDS 1 f1] + assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] + assert_equal -1 [$instance HTTL myhash FIELDS 1 f3] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] + assert_equal "v1 v2 v3" [$instance HGETEX myhash FIELDS 3 f1 f2 f3] + assert_equal 3 [$instance HLEN myhash] + } + + # Perform failover + $replica_1 replicaof no one + # Wait for replica to become primary + wait_for_condition 100 100 { + [info_field [$replica_1 info replication] role] eq "master" + } else { + fail "Replica didn't become master" + } + + # Setup keyspace notifications for the promoted replica + $replica_1 config set notify-keyspace-events KEA + set rd_replica [valkey_deferring_client $replica_1_host $replica_1_port] + assert_equal {1} [psubscribe $rd_replica __keyevent@*] + + # Check all values that checked before are the same + assert_equal 3 [$replica_1 HLEN myhash] + assert_equal $f1_exp [$replica_1 HEXPIRETIME myhash FIELDS 1 f1] + assert_equal $f2_exp [$replica_1 HEXPIRETIME myhash FIELDS 1 f2] + assert_equal -1 [$replica_1 HTTL myhash FIELDS 1 f3] + assert_equal "v1 v2 v3" [$replica_1 HGETEX myhash FIELDS 3 f1 f2 f3] + assert_equal 3 [$replica_1 HLEN myhash] + + # Set f1 to expire in 1 second and wait for expiration + $replica_1 HEXPIRE myhash 1 FIELDS 1 f1 ;# will trigger hexpire + wait_for_condition 50 100 { + [$replica_1 HTTL myhash FIELDS 1 f1] eq -2 + } else { + fail "f1 not expired" + } + + # Verify expiry + assert_equal "" [$replica_1 HGET myhash f1] + assert_equal 3 [$replica_1 HLEN myhash] + # Change TTL of f2 + $replica_1 HEXPIRE myhash 1000000 FIELDS 1 f2 ;# will trigger hexpire + assert_morethan [$replica_1 HTTL myhash FIELDS 1 f2] 9000 + # Change TTL of f2 to 0 (immediate expiry) + $replica_1 HGETEX myhash EX 0 FIELDS 1 f2 ;# will trigger hexpired + # Verify final state + assert_equal 2 [$replica_1 HLEN myhash] + assert_equal "{} {} v3" [$replica_1 HGETEX myhash FIELDS 3 f1 f2 f3] + + assert_keyevent_pattern $rd_replica hexpire myhash + assert_keyevent_pattern $rd_replica hexpire myhash + assert_keyevent_pattern $rd_replica hexpired myhash + + $rd_replica close + } + + } +} + ### Slot Migration #### start_cluster 3 0 {tags {"cluster mytest"} overrides {cluster-node-timeout 1000}} { # Flush all data on all cluster nodes before starting From 0e1305cf96c93a50d78d8fab027a1ac1bc230467 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 16:57:12 +0300 Subject: [PATCH 061/119] fix typo Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 65f3f824af..21ab796804 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1823,7 +1823,7 @@ start_server {tags {"hashexpire external:skip"}} { [$primary HTTL myhash FIELDS 1 f1] eq -2 && \ [$replica_1 HTTL myhash FIELDS 1 f1] eq -2 } else { - fail "f1 still exsists" + fail "f1 still exists" } # Verify keyspace notification @@ -1890,7 +1890,7 @@ start_server {tags {"hashexpire external:skip"}} { [$replica_1 HTTL myhash FIELDS 1 f1] eq -2 && \ [$replica_2 HTTL myhash FIELDS 1 f1] eq -2 } else { - fail "f1 still exsists" + fail "f1 still exists" } # primary gets hexpired and replicas get hdel From 6c7e841d7672e8a1b975306e64f2641aee396f10 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 19:28:52 +0300 Subject: [PATCH 062/119] optimize vsetUpdateEntry to avoid unnecessary mutations Signed-off-by: Ran Shidlansik --- src/vset.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 141 insertions(+), 8 deletions(-) diff --git a/src/vset.c b/src/vset.c index 7e2a9a1d51..8d2bfa086f 100644 --- a/src/vset.c +++ b/src/vset.c @@ -352,6 +352,23 @@ pVector *pvPop(pVector *pv, void **pelem) { return pvRemoveAt(pv, last_idx); } +/* Set the element at given index inside the pVector. + * + * Parameters: + * pv - The vector containing the elements to swap. + * idx - Index of the element. + * elem - pointer to the new element. + * + * Returns: + * None. + * + * Preconditions: + * - idx must be valid indices within the vector. */ +void pvSet(pVector *pv, uint32_t idx, void *elem) { + assert(idx < PV_LEN(pv)); + pv->data[idx] = elem; +} + /* Swaps two elements at given indices inside the pVector. * * Parameters: @@ -1226,8 +1243,9 @@ static inline size_t vsetBucketPopExpired_RAX(vsetBucket **bucket, vsetGetExpiry if (raxSize(buckets) == 0) { raxFree(buckets); *bucket = vsetBucketFromNone(); + } else { + shrinkRaxBucketIfPossible(bucket, getExpiry); } - shrinkRaxBucketIfPossible(bucket, getExpiry); return count; } @@ -1537,6 +1555,99 @@ bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { return vsetRemoveEntryWithExpiry(set, getExpiry, entry, getExpiry(entry)); } +vsetBucket *vsetBucketUpdateEntry_NONE(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + UNUSED(bucket); + UNUSED(getExpiry); + UNUSED(old_entry); + UNUSED(new_entry); + UNUSED(old_expiry); + UNUSED(new_expiry); + + return vsetBucketFromNone(); +} + +vsetBucket *vsetBucketUpdateEntry_SINGLE(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + UNUSED(getExpiry); + UNUSED(old_expiry); + UNUSED(new_expiry); + + if (vsetBucketSingle(bucket) == old_entry) { + return vsetBucketFromSingle(new_entry); + } + return vsetBucketFromNone(); +} + +vsetBucket *vsetBucketUpdateEntry_VECTOR(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + UNUSED(getExpiry); + UNUSED(old_expiry); + UNUSED(new_expiry); + + pVector *pv = vsetBucketVector(bucket); + uint32_t idx = pvFind(pv, old_entry); + /* in case we did not locate the entry, just return NONE bucket */ + if (idx == pvLen(pv)) + return vsetBucketFromNone(); + pvSet(pv, idx, new_entry); + return bucket; +} + +vsetBucket *vsetBucketUpdateEntry_HASHTABLE(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + UNUSED(getExpiry); + UNUSED(old_expiry); + UNUSED(new_expiry); + + hashtable *ht = vsetBucketHashtable(bucket); + void **ref = hashtableFindRef(ht, old_entry); + if (!ref) { + return vsetBucketFromNone(); + } else { + *ref = new_entry; + } + return bucket; +} + +vsetBucket *vsetBucketUpdateEntry_RAX(vsetBucket *target, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + size_t key_len; + long long bucket_ts; + rax *expiry_buckets = vsetBucketRax(target); + raxNode *node; + /* In case new and old are to be updated in the same bucket - just update the bucket. */ + bool update_bucket = (get_bucket_ts(old_expiry) == get_bucket_ts(new_expiry)); + vsetBucket *bucket = findBucket(expiry_buckets, old_expiry, key, &key_len, &bucket_ts, &node); + + if (!update_bucket) { + /* if the old and new entries are in different buckets, remove the old entry and add the new one. */ + if (removeEntryFromRaxBucket(target, getExpiry, old_entry, bucket, key, key_len, NULL, node)) + target = insertToBucket_RAX(getExpiry, target, new_entry, new_expiry); + else + return vsetBucketFromNone(); + } else { + /* Just update the current bucket */ + switch (vsetBucketType(bucket)) { + case VSET_BUCKET_NONE: + /* No bucket means there is no such old entry. return NONE */ + return vsetBucketFromNone(); + case VSET_BUCKET_SINGLE: + bucket = vsetBucketUpdateEntry_SINGLE(bucket, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; + case VSET_BUCKET_VECTOR: + bucket = vsetBucketUpdateEntry_VECTOR(bucket, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; + case VSET_BUCKET_HT: + bucket = vsetBucketUpdateEntry_HASHTABLE(bucket, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; + default: + panic("Unknown bucket type to update entry"); + } + if (bucket) + raxSetData(node, bucket); + else + return vsetBucketFromNone(); + } + return target; +} + /** * Updates an existing entry in the volatile set (vset), optionally replacing it * with a new entry and expiration time. @@ -1583,16 +1694,38 @@ bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, vo /* Nothing to do */ if (old_entry == new_entry && old_expiry == new_expiry) return true; - - if (old_entry && old_expiry != -1) + vsetBucket *updated = vsetBucketFromNone(); + /* case 1 - both entries were tracked. update the bucket */ + if (old_entry && old_expiry != -1 && new_entry && new_expiry != -1) { + switch (vsetBucketType(*set)) { + case VSET_BUCKET_NONE: + return false; + case VSET_BUCKET_SINGLE: + updated = vsetBucketUpdateEntry_SINGLE(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; + case VSET_BUCKET_VECTOR: + updated = vsetBucketUpdateEntry_VECTOR(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; + case VSET_BUCKET_RAX: + updated = vsetBucketUpdateEntry_RAX(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + } + if (!updated) + return false; + *set = updated; + return true; + } + /* case 2 - old entry was not tracked. just add the new entry */ + else if ((!old_entry || old_expiry == -1) && new_entry && new_expiry != -1) + return vsetAddEntry(set, getExpiry, new_entry); + /* case 3 - old entry was tracked. new entry is not. just remove the old entry */ + else if ((!new_entry || new_expiry == -1) && old_entry && old_expiry != -1) /* We cannot take the expiration time from the removed entry, since it might not be allocated anymore. * For this reason we ask the API user to provide us the removed entry expiration time. */ - assert((vsetRemoveEntryWithExpiry(set, getExpiry, old_entry, old_expiry))); - - if (new_entry && new_expiry != -1) - assert(vsetAddEntry(set, getExpiry, new_entry)); + return vsetRemoveEntryWithExpiry(set, getExpiry, old_entry, old_expiry); + else + return false; - return true; + return false; } /* vsetPopExpired - Remove expired entries from a volatile set up to a maximum count. From 01eed469f51b209fe27ad98fad061f1eeaaa420e Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 19:48:11 +0300 Subject: [PATCH 063/119] handle case of unsorted vector after update Signed-off-by: Ran Shidlansik --- src/vset.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/src/vset.c b/src/vset.c index 8d2bfa086f..b1bef119e7 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1555,18 +1555,7 @@ bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { return vsetRemoveEntryWithExpiry(set, getExpiry, entry, getExpiry(entry)); } -vsetBucket *vsetBucketUpdateEntry_NONE(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { - UNUSED(bucket); - UNUSED(getExpiry); - UNUSED(old_entry); - UNUSED(new_entry); - UNUSED(old_expiry); - UNUSED(new_expiry); - - return vsetBucketFromNone(); -} - -vsetBucket *vsetBucketUpdateEntry_SINGLE(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { +static inline vsetBucket *vsetBucketUpdateEntry_SINGLE(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { UNUSED(getExpiry); UNUSED(old_expiry); UNUSED(new_expiry); @@ -1577,7 +1566,7 @@ vsetBucket *vsetBucketUpdateEntry_SINGLE(vsetBucket *bucket, vsetGetExpiryFunc g return vsetBucketFromNone(); } -vsetBucket *vsetBucketUpdateEntry_VECTOR(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { +static inline vsetBucket *vsetBucketUpdateEntry_VECTOR(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { UNUSED(getExpiry); UNUSED(old_expiry); UNUSED(new_expiry); @@ -1591,7 +1580,7 @@ vsetBucket *vsetBucketUpdateEntry_VECTOR(vsetBucket *bucket, vsetGetExpiryFunc g return bucket; } -vsetBucket *vsetBucketUpdateEntry_HASHTABLE(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { +static inline vsetBucket *vsetBucketUpdateEntry_HASHTABLE(vsetBucket *bucket, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { UNUSED(getExpiry); UNUSED(old_expiry); UNUSED(new_expiry); @@ -1606,7 +1595,7 @@ vsetBucket *vsetBucketUpdateEntry_HASHTABLE(vsetBucket *bucket, vsetGetExpiryFun return bucket; } -vsetBucket *vsetBucketUpdateEntry_RAX(vsetBucket *target, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { +static inline vsetBucket *vsetBucketUpdateEntry_RAX(vsetBucket *target, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; size_t key_len; long long bucket_ts; @@ -1704,8 +1693,12 @@ bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, vo updated = vsetBucketUpdateEntry_SINGLE(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); break; case VSET_BUCKET_VECTOR: - updated = vsetBucketUpdateEntry_VECTOR(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); - break; + /* NOTE! - in this specific case we might have changed the vector order - need to sort it again (NLogN) */ + /* or remove it from the vector and re-add it. (N+LogN). the later also looks cleaner... */ + if (!vsetRemoveEntryWithExpiry(set, getExpiry, old_entry, old_expiry)) + return false; + return vsetAddEntry(set, getExpiry, new_entry); + case VSET_BUCKET_RAX: updated = vsetBucketUpdateEntry_RAX(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); } From 2e75a88a44c18f2072eb2b94f2b03e37c417fb1b Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 19:53:14 +0300 Subject: [PATCH 064/119] Add some AOF test case Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 102 +++++++++++++++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 21ab796804..1bbeab7e97 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -2348,4 +2348,104 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal -1 [r TTL myhash] assert_morethan [r HTTL myhash FIELDS 1 f1] 0 } -} \ No newline at end of file +} + +#### AOF Test ##### +set defaults {appendonly {yes} appendfilename {appendonly.aof} appenddirname {appendonlydir} auto-aof-rewrite-percentage {0}} +set server_path [tmpdir server.multi.aof] +start_server_aof [list dir $server_path] { + test {TTL Persistence in AOF} { + r flushall + r config set appendonly yes + r config set appendfsync always + + # Create hash with 1short, long and no expired fields + set long_expire [expr {[clock seconds] + 1000000}] + # Create 10 fields with long expiry + for {set i 1} {$i <= 10} {incr i} { + r HSETEX myhash EXAT $long_expire FIELDS 1 f$i v$i ;# 10 PXAT to aof + } + + # Create 10 fields with short expiry + for {set i 11} {$i <= 20} {incr i} { + r HSETEX myhash PXAT [expr {[clock milliseconds] + 10}] FIELDS 1 f$i v$i ;# 10 PXAT to aof + } + + # Create 10 fields with expire 0 + for {set i 21} {$i <= 30} {incr i} { + r HSET myhash f$i v$i + r HEXPIRE myhash 0 FIELDS 1 f$i ;# 10 HDEL to aof + } + + # Create 10 fields with no expiry + for {set i 31} {$i <= 40} {incr i} { + r HSET myhash f$i v$i + } + + # Now wait for expire of the short expiry + for {set i 11} {$i <= 20} {incr i} { + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 f$i] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + } + + # Verify initial HLEN + assert_equal 30 [r HLEN myhash] + # Verify values + for {set i 1} {$i <= 40} {incr i} { + if {$i >= 11 && $i <= 30} { + assert_equal "" [r HGET myhash f$i] + } else { + assert_equal v$i [r HGET myhash f$i] + } + } + + # Ensure the initial rewrite finishes + waitForBgrewriteaof r + + # Get the last incremental AOF file path + set aof_file [get_last_incr_aof_path r] + + wait_for_condition 100 100 { + [file exists $aof_file] eq 1 + } else { + fail "hash value was not expired after timeout" + } + + # Read and check content + set aof_content [exec cat $aof_file] + + # Verify amount of PXAT and HDEL + # Count PXAT commands (should be 20: 10 long + 10 short) + set pxat_count [regexp -all {PXAT} $aof_content] + assert_equal 20 $pxat_count + # Count HDEL commands (should be 10: from expire 0) + set hdel_count [regexp -all {HDEL} $aof_content] + assert_equal 10 $hdel_count + + # Restart the server and load the AOF + restart_server 0 true false + r debug loadaof + + # Verify hash after loading from aof + # Verify same HLEN + assert_equal 30 [r HLEN myhash] + # Verify the TTLs are preserved + for {set i 1} {$i <= 10} {incr i} { + assert_equal $long_expire [r HEXPIRETIME myhash FIELDS 1 f$i] + assert_equal v$i [r HGET myhash f$i] + } + # Verify expired fields + for {set i 11} {$i <= 30} {incr i} { + assert_equal -2 [r HTTL myhash FIELDS 1 f$i] + assert_equal "" [r HGET myhash f$i] + } + # Verify fields with no TTL + for {set i 31} {$i <= 40} {incr i} { + assert_equal -1 [r HTTL myhash FIELDS 1 f$i] + assert_equal v$i [r HGET myhash f$i] + } + } +} From 80439ec611e75ae2115fdfe3f598771466bad37f Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 19:57:19 +0300 Subject: [PATCH 065/119] add external:skip flag for cluster tests Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 1bbeab7e97..f4c80d1354 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1993,7 +1993,7 @@ start_server {tags {"hashexpire external:skip"}} { } ### Slot Migration #### -start_cluster 3 0 {tags {"cluster mytest"} overrides {cluster-node-timeout 1000}} { +start_cluster 3 0 {tags {"cluster mytest external:skip"} overrides {cluster-node-timeout 1000}} { # Flush all data on all cluster nodes before starting for {set i 0} {$i < 3} {incr i} { R $i FLUSHALL From c513852b0adf351229210a7496ca027e9031de67 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 20:04:15 +0300 Subject: [PATCH 066/119] also add external:skip for AOF tests Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 170 +++++++++++++++++++------------------- 1 file changed, 86 insertions(+), 84 deletions(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index f4c80d1354..892f5fa6a0 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -2351,101 +2351,103 @@ start_server {tags {"hashexpire external:skip"}} { } #### AOF Test ##### -set defaults {appendonly {yes} appendfilename {appendonly.aof} appenddirname {appendonlydir} auto-aof-rewrite-percentage {0}} -set server_path [tmpdir server.multi.aof] -start_server_aof [list dir $server_path] { - test {TTL Persistence in AOF} { - r flushall - r config set appendonly yes - r config set appendfsync always - - # Create hash with 1short, long and no expired fields - set long_expire [expr {[clock seconds] + 1000000}] - # Create 10 fields with long expiry - for {set i 1} {$i <= 10} {incr i} { - r HSETEX myhash EXAT $long_expire FIELDS 1 f$i v$i ;# 10 PXAT to aof - } - - # Create 10 fields with short expiry - for {set i 11} {$i <= 20} {incr i} { - r HSETEX myhash PXAT [expr {[clock milliseconds] + 10}] FIELDS 1 f$i v$i ;# 10 PXAT to aof - } +tags {"aof external:skip"} { + set defaults {appendonly {yes} appendfilename {appendonly.aof} appenddirname {appendonlydir} auto-aof-rewrite-percentage {0}} + set server_path [tmpdir server.multi.aof] + start_server_aof [list dir $server_path] { + test {TTL Persistence in AOF} { + r flushall + r config set appendonly yes + r config set appendfsync always + + # Create hash with 1short, long and no expired fields + set long_expire [expr {[clock seconds] + 1000000}] + # Create 10 fields with long expiry + for {set i 1} {$i <= 10} {incr i} { + r HSETEX myhash EXAT $long_expire FIELDS 1 f$i v$i ;# 10 PXAT to aof + } + + # Create 10 fields with short expiry + for {set i 11} {$i <= 20} {incr i} { + r HSETEX myhash PXAT [expr {[clock milliseconds] + 10}] FIELDS 1 f$i v$i ;# 10 PXAT to aof + } - # Create 10 fields with expire 0 - for {set i 21} {$i <= 30} {incr i} { - r HSET myhash f$i v$i - r HEXPIRE myhash 0 FIELDS 1 f$i ;# 10 HDEL to aof - } + # Create 10 fields with expire 0 + for {set i 21} {$i <= 30} {incr i} { + r HSET myhash f$i v$i + r HEXPIRE myhash 0 FIELDS 1 f$i ;# 10 HDEL to aof + } - # Create 10 fields with no expiry - for {set i 31} {$i <= 40} {incr i} { - r HSET myhash f$i v$i - } + # Create 10 fields with no expiry + for {set i 31} {$i <= 40} {incr i} { + r HSET myhash f$i v$i + } + + # Now wait for expire of the short expiry + for {set i 11} {$i <= 20} {incr i} { + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 f$i] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + } + + # Verify initial HLEN + assert_equal 30 [r HLEN myhash] + # Verify values + for {set i 1} {$i <= 40} {incr i} { + if {$i >= 11 && $i <= 30} { + assert_equal "" [r HGET myhash f$i] + } else { + assert_equal v$i [r HGET myhash f$i] + } + } + + # Ensure the initial rewrite finishes + waitForBgrewriteaof r + + # Get the last incremental AOF file path + set aof_file [get_last_incr_aof_path r] - # Now wait for expire of the short expiry - for {set i 11} {$i <= 20} {incr i} { wait_for_condition 100 100 { - [r HTTL myhash FIELDS 1 f$i] eq "-2" + [file exists $aof_file] eq 1 } else { fail "hash value was not expired after timeout" } - } - # Verify initial HLEN - assert_equal 30 [r HLEN myhash] - # Verify values - for {set i 1} {$i <= 40} {incr i} { - if {$i >= 11 && $i <= 30} { + # Read and check content + set aof_content [exec cat $aof_file] + + # Verify amount of PXAT and HDEL + # Count PXAT commands (should be 20: 10 long + 10 short) + set pxat_count [regexp -all {PXAT} $aof_content] + assert_equal 20 $pxat_count + # Count HDEL commands (should be 10: from expire 0) + set hdel_count [regexp -all {HDEL} $aof_content] + assert_equal 10 $hdel_count + + # Restart the server and load the AOF + restart_server 0 true false + r debug loadaof + + # Verify hash after loading from aof + # Verify same HLEN + assert_equal 30 [r HLEN myhash] + # Verify the TTLs are preserved + for {set i 1} {$i <= 10} {incr i} { + assert_equal $long_expire [r HEXPIRETIME myhash FIELDS 1 f$i] + assert_equal v$i [r HGET myhash f$i] + } + # Verify expired fields + for {set i 11} {$i <= 30} {incr i} { + assert_equal -2 [r HTTL myhash FIELDS 1 f$i] assert_equal "" [r HGET myhash f$i] - } else { + } + # Verify fields with no TTL + for {set i 31} {$i <= 40} {incr i} { + assert_equal -1 [r HTTL myhash FIELDS 1 f$i] assert_equal v$i [r HGET myhash f$i] } } - - # Ensure the initial rewrite finishes - waitForBgrewriteaof r - - # Get the last incremental AOF file path - set aof_file [get_last_incr_aof_path r] - - wait_for_condition 100 100 { - [file exists $aof_file] eq 1 - } else { - fail "hash value was not expired after timeout" - } - - # Read and check content - set aof_content [exec cat $aof_file] - - # Verify amount of PXAT and HDEL - # Count PXAT commands (should be 20: 10 long + 10 short) - set pxat_count [regexp -all {PXAT} $aof_content] - assert_equal 20 $pxat_count - # Count HDEL commands (should be 10: from expire 0) - set hdel_count [regexp -all {HDEL} $aof_content] - assert_equal 10 $hdel_count - - # Restart the server and load the AOF - restart_server 0 true false - r debug loadaof - - # Verify hash after loading from aof - # Verify same HLEN - assert_equal 30 [r HLEN myhash] - # Verify the TTLs are preserved - for {set i 1} {$i <= 10} {incr i} { - assert_equal $long_expire [r HEXPIRETIME myhash FIELDS 1 f$i] - assert_equal v$i [r HGET myhash f$i] - } - # Verify expired fields - for {set i 11} {$i <= 30} {incr i} { - assert_equal -2 [r HTTL myhash FIELDS 1 f$i] - assert_equal "" [r HGET myhash f$i] - } - # Verify fields with no TTL - for {set i 31} {$i <= 40} {incr i} { - assert_equal -1 [r HTTL myhash FIELDS 1 f$i] - assert_equal v$i [r HGET myhash f$i] - } } } From ba7f7c63aca9adf6ede2c23dbab296422dc2c5b7 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 23:08:11 +0300 Subject: [PATCH 067/119] Update src/commands/hgetex.json Co-authored-by: Wen Hui Signed-off-by: Ran Shidlansik --- src/commands/hgetex.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commands/hgetex.json b/src/commands/hgetex.json index 939487498d..3a96ac4472 100644 --- a/src/commands/hgetex.json +++ b/src/commands/hgetex.json @@ -1,6 +1,6 @@ { "HGETEX": { - "summary": "Set the value of one or more fields of a given hash key, and optionally set their expiration time.", + "summary": "Get the value of one or more fields of a given hash key, and optionally set their expiration time or time-to-live (TTL).", "complexity": "O(1)", "group": "hash", "since": "9.0.0", From 8d53993e6b5689e91a350b744a2e8ddcbe33cf09 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 23:08:23 +0300 Subject: [PATCH 068/119] Update src/commands/hpersist.json Co-authored-by: Wen Hui Signed-off-by: Ran Shidlansik --- src/commands/hpersist.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commands/hpersist.json b/src/commands/hpersist.json index 06ea3d5d7e..b12ba836ab 100644 --- a/src/commands/hpersist.json +++ b/src/commands/hpersist.json @@ -41,7 +41,7 @@ "items": { "oneOf": [ { - "description": "Field does not exist in the provided hash key, or the hash key is empty", + "description": "Field does not exist in the provided hash key, or the hash key does not exist", "const": -2 }, { From 3383656bb6140c9d223566af48ce1b08a1eb0033 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 3 Jul 2025 23:09:50 +0300 Subject: [PATCH 069/119] Update src/commands/hexpire.json Co-authored-by: Wen Hui Signed-off-by: Ran Shidlansik --- src/commands/hexpire.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/commands/hexpire.json b/src/commands/hexpire.json index fa3b6d83ed..2388bf4ab1 100644 --- a/src/commands/hexpire.json +++ b/src/commands/hexpire.json @@ -41,7 +41,7 @@ "items": { "oneOf": [ { - "description": "Field does not exist in the HASH, or HASH is empty.", + "description": "Field does not exist in the HASH, or key does not exist.", "const": -2 }, { From 38e32371671f24b6fed3804e70806ff35f13411c Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Fri, 4 Jul 2025 00:40:04 +0300 Subject: [PATCH 070/119] ddress some more PR comments Signed-off-by: Ran Shidlansik --- src/commands.def | 14 +++++++------- src/commands/hexpire.json | 3 +-- src/commands/hexpireat.json | 1 - src/commands/hgetex.json | 4 ++-- src/commands/hpersist.json | 6 +++--- src/commands/hpexpire.json | 1 - src/commands/hpexpireat.json | 1 - src/defrag.c | 2 +- src/server.h | 4 ---- src/t_hash.c | 26 ++++++++++---------------- tests/unit/hashexpire.tcl | 2 +- 11 files changed, 25 insertions(+), 39 deletions(-) diff --git a/src/commands.def b/src/commands.def index 6f6fdd7009..6ef0e86c99 100644 --- a/src/commands.def +++ b/src/commands.def @@ -3701,7 +3701,7 @@ struct COMMAND_ARG HGETALL_Args[] = { #ifndef SKIP_CMD_KEY_SPECS_TABLE /* HGETEX key specs */ keySpec HGETEX_Keyspecs[1] = { -{NULL,CMD_KEY_RO|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} +{NULL,CMD_KEY_RW|CMD_KEY_ACCESS,KSPEC_BS_INDEX,.bs.index={1},KSPEC_FK_RANGE,.fk.range={0,1,0}} }; #endif @@ -11655,21 +11655,21 @@ struct COMMAND_STRUCT serverCommandTable[] = { /* hash */ {MAKE_CMD("hdel","Deletes one or more fields and their values from a hash. Deletes the hash if no fields remain.","O(N) where N is the number of fields to be removed.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HDEL_History,1,HDEL_Tips,0,hdelCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HDEL_Keyspecs,1,NULL,2),.args=HDEL_Args}, {MAKE_CMD("hexists","Determines whether a field exists in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXISTS_History,0,HEXISTS_Tips,0,hexistsCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXISTS_Keyspecs,1,NULL,2),.args=HEXISTS_Args}, -{MAKE_CMD("hexpire","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","7.2.4",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRE_History,0,HEXPIRE_Tips,0,hexpireCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRE_Keyspecs,1,NULL,4),.args=HEXPIRE_Args}, -{MAKE_CMD("hexpireat","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIREAT_History,0,HEXPIREAT_Tips,0,hexpireAtCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HEXPIREAT_Keyspecs,1,NULL,4),.args=HEXPIREAT_Args}, +{MAKE_CMD("hexpire","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRE_History,0,HEXPIRE_Tips,0,hexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRE_Keyspecs,1,NULL,4),.args=HEXPIRE_Args}, +{MAKE_CMD("hexpireat","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIREAT_History,0,HEXPIREAT_Tips,0,hexpireAtCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIREAT_Keyspecs,1,NULL,4),.args=HEXPIREAT_Args}, {MAKE_CMD("hexpiretime","Returns the Unix timestamp in seconds since the epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,2),.args=HEXPIRETIME_Args}, {MAKE_CMD("hget","Returns the value of a field in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGET_History,0,HGET_Tips,0,hgetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGET_Keyspecs,1,NULL,2),.args=HGET_Args}, {MAKE_CMD("hgetall","Returns all fields and values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETALL_History,0,HGETALL_Tips,1,hgetallCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HGETALL_Keyspecs,1,NULL,1),.args=HGETALL_Args}, -{MAKE_CMD("hgetex","Set the value of one or more fields of a given hash key, and optionally set their expiration time.","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETEX_History,0,HGETEX_Tips,0,hgetexCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGETEX_Keyspecs,1,NULL,3),.args=HGETEX_Args}, +{MAKE_CMD("hgetex","Set the value of one or more fields of a given hash key, and optionally set their expiration time.","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETEX_History,0,HGETEX_Tips,0,hgetexCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HGETEX_Keyspecs,1,NULL,3),.args=HGETEX_Args}, {MAKE_CMD("hincrby","Increments the integer value of a field in a hash by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBY_History,0,HINCRBY_Tips,0,hincrbyCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBY_Keyspecs,1,NULL,3),.args=HINCRBY_Args}, {MAKE_CMD("hincrbyfloat","Increments the floating point value of a field by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBYFLOAT_History,0,HINCRBYFLOAT_Tips,0,hincrbyfloatCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBYFLOAT_Keyspecs,1,NULL,3),.args=HINCRBYFLOAT_Args}, {MAKE_CMD("hkeys","Returns all fields in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HKEYS_History,0,HKEYS_Tips,1,hkeysCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HKEYS_Keyspecs,1,NULL,1),.args=HKEYS_Args}, {MAKE_CMD("hlen","Returns the number of fields in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HLEN_History,0,HLEN_Tips,0,hlenCommand,2,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HLEN_Keyspecs,1,NULL,1),.args=HLEN_Args}, {MAKE_CMD("hmget","Returns the values of all fields in a hash.","O(N) where N is the number of fields being requested.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HMGET_History,0,HMGET_Tips,0,hmgetCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HMGET_Keyspecs,1,NULL,2),.args=HMGET_Args}, {MAKE_CMD("hmset","Sets the values of multiple fields.","O(N) where N is the number of fields being set.","2.0.0",CMD_DOC_DEPRECATED,"`HSET` with multiple field-value pairs","4.0.0","hash",COMMAND_GROUP_HASH,HMSET_History,0,HMSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HMSET_Keyspecs,1,NULL,2),.args=HMSET_Args}, -{MAKE_CMD("hpersist","Remove the existing expiration on a hash key's field(s).","O(1) for each field assigned with TTL, so O(N) to persist N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPERSIST_History,0,HPERSIST_Tips,0,hpersistCommand,-3,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HPERSIST_Keyspecs,1,NULL,2),.args=HPERSIST_Args}, -{MAKE_CMD("hpexpire","Set expiry time on hash object.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRE_History,0,HPEXPIRE_Tips,0,hpexpireCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRE_Keyspecs,1,NULL,4),.args=HPEXPIRE_Args}, -{MAKE_CMD("hpexpireat","Set expiry time on hash object.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIREAT_History,0,HPEXPIREAT_Tips,0,hpexpireAtCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIREAT_Keyspecs,1,NULL,4),.args=HPEXPIREAT_Args}, +{MAKE_CMD("hpersist","Remove the existing expiration on a hash key's field(s).","O(1) for each field assigned with TTL, so O(N) to persist N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPERSIST_History,0,HPERSIST_Tips,0,hpersistCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPERSIST_Keyspecs,1,NULL,2),.args=HPERSIST_Args}, +{MAKE_CMD("hpexpire","Set expiry time on hash object.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRE_History,0,HPEXPIRE_Tips,0,hpexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRE_Keyspecs,1,NULL,4),.args=HPEXPIRE_Args}, +{MAKE_CMD("hpexpireat","Set expiry time on hash object.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIREAT_History,0,HPEXPIREAT_Tips,0,hpexpireAtCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIREAT_Keyspecs,1,NULL,4),.args=HPEXPIREAT_Args}, {MAKE_CMD("hpexpiretime","Returns the Unix timestamp in milliseconds since Unix epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRETIME_History,0,HPEXPIRETIME_Tips,0,hpexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRETIME_Keyspecs,1,NULL,2),.args=HPEXPIRETIME_Args}, {MAKE_CMD("hpttl","Returns the remaining time to live (in milliseconds) of a hash key's field(s) that have an associated expiration.","O(1) for each field assigned with TTL, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPTTL_History,0,HPTTL_Tips,0,hpttlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPTTL_Keyspecs,1,NULL,2),.args=HPTTL_Args}, {MAKE_CMD("hrandfield","Returns one or more random fields from a hash.","O(N) where N is the number of fields returned","6.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HRANDFIELD_History,0,HRANDFIELD_Tips,1,hrandfieldCommand,-2,CMD_READONLY,ACL_CATEGORY_HASH,HRANDFIELD_Keyspecs,1,NULL,2),.args=HRANDFIELD_Args}, diff --git a/src/commands/hexpire.json b/src/commands/hexpire.json index 2388bf4ab1..705a8b5638 100644 --- a/src/commands/hexpire.json +++ b/src/commands/hexpire.json @@ -3,12 +3,11 @@ "summary": "Set expiry time on hash fields.", "complexity": "O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.", "group": "hash", - "since": "7.2.4", + "since": "9.0.0", "arity": -6, "function": "hexpireCommand", "command_flags": [ "WRITE", - "DENYOOM", "FAST" ], "acl_categories": [ diff --git a/src/commands/hexpireat.json b/src/commands/hexpireat.json index 9335066b60..f7303cb30e 100644 --- a/src/commands/hexpireat.json +++ b/src/commands/hexpireat.json @@ -8,7 +8,6 @@ "function": "hexpireAtCommand", "command_flags": [ "WRITE", - "DENYOOM", "FAST" ], "acl_categories": [ diff --git a/src/commands/hgetex.json b/src/commands/hgetex.json index 3a96ac4472..9553f1a1e9 100644 --- a/src/commands/hgetex.json +++ b/src/commands/hgetex.json @@ -7,7 +7,7 @@ "arity": -5, "function": "hgetexCommand", "command_flags": [ - "READONLY", + "WRITE", "FAST" ], "acl_categories": [ @@ -16,7 +16,7 @@ "key_specs": [ { "flags": [ - "RO", + "RW", "ACCESS" ], "begin_search": { diff --git a/src/commands/hpersist.json b/src/commands/hpersist.json index b12ba836ab..4964a1477f 100644 --- a/src/commands/hpersist.json +++ b/src/commands/hpersist.json @@ -4,11 +4,10 @@ "complexity": "O(1) for each field assigned with TTL, so O(N) to persist N items when the command is called with multiple fields.", "group": "hash", "since": "9.0.0", - "arity": -3, + "arity": -5, "function": "hpersistCommand", "command_flags": [ "WRITE", - "DENYOOM", "FAST" ], "acl_categories": [ @@ -70,7 +69,8 @@ "name": "numfields", "type": "integer", "key_spec_index": 0, - "multiple": false + "multiple": false, + "minimum": 1 }, { "name": "field", diff --git a/src/commands/hpexpire.json b/src/commands/hpexpire.json index 9990d8017d..071b409fb6 100644 --- a/src/commands/hpexpire.json +++ b/src/commands/hpexpire.json @@ -8,7 +8,6 @@ "function": "hpexpireCommand", "command_flags": [ "WRITE", - "DENYOOM", "FAST" ], "acl_categories": [ diff --git a/src/commands/hpexpireat.json b/src/commands/hpexpireat.json index 29ef515bc9..c291202daf 100644 --- a/src/commands/hpexpireat.json +++ b/src/commands/hpexpireat.json @@ -8,7 +8,6 @@ "function": "hpexpireAtCommand", "command_flags": [ "WRITE", - "DENYOOM", "FAST" ], "acl_categories": [ diff --git a/src/defrag.c b/src/defrag.c index 2f6a32f311..dc45328633 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -38,8 +38,8 @@ * SPDX-License-Identifier: BSD-3-Clause */ -#include "entry.h" #include "server.h" +#include "entry.h" #include "hashtable.h" #include "eval.h" #include "script.h" diff --git a/src/server.h b/src/server.h index c9e0bc3dff..00ee813e18 100644 --- a/src/server.h +++ b/src/server.h @@ -3842,10 +3842,6 @@ void hexpireCommand(client *c); void hexpireAtCommand(client *c); void hpexpireCommand(client *c); void hpexpireAtCommand(client *c); -void hexpireCommand(client *c); -void hexpireAtCommand(client *c); -void hpexpireCommand(client *c); -void hpexpireAtCommand(client *c); void httlCommand(client *c); void hpttlCommand(client *c); void hexpiretimeCommand(client *c); diff --git a/src/t_hash.c b/src/t_hash.c index 57a6174c8f..646cc18f3d 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1539,9 +1539,9 @@ static void hrandfieldReplyWithListpack(writePreparedClient *wpc, unsigned int c * * - Responds with an array of integers: * - 1 if the expiration was set. - * - 0 if it was unchanged. - * - -1 if the field does not exist. - * - 2 if the field was immediately expired and deleted. + * - 0 if it was unchanged (due to provided condition check failing). + * - -2 if the field does not exist or the hash is empty. + * - 2 if the field was immediately expired and deleted due to provided expiration is 0 or in the past. * * - If fields were deleted due to expiration: * - Rewrites the command as HDEL for replication/AOF. @@ -1680,24 +1680,16 @@ void hpexpireAtCommand(client *c) { * - Expects a key and a list of hash fields whose expiration metadata should be removed. * - Validates that the number of provided fields matches the declared count. * - * - For each specified field: - * - Attempts to remove any existing expiration. - * - Replies with: - * - 1 if the expiration was successfully removed. - * - 0 if the field had no expiration or did not exist. - * - * - Replies with an array of integers, one per field, indicating the outcome of each attempt. + * - For each specified field attempts to remove any existing expiration. + * - Replies to the client with an array of integers, each representing the result of persistence for one field: + * - 1 if the expiration was set. + * - -1 if the field exists, but has no expiraiton time set. + * - -2 if the field does not exist or the hash is empty. * * - If any expirations were removed: * - Marks the key as modified (for replication/AOF consistency). * - Emits a "hpersist" keyspace notification. * - * - * Return Value: - * - An array of integers, each representing the result of persistence for one field. - * - 1 = field existed and expiration was removed. - * - 0 = field did not exist or had no expiration. - * * Keyspace Notifications (if enabled): * - "hpersist" — emitted once if any field had its expiration removed. */ void hpersistCommand(client *c) { @@ -1716,6 +1708,8 @@ void hpersistCommand(client *c) { addReplyArrayLen(c, num_fields); robj *hash = lookupKeyWrite(c->db, c->argv[1]); + if (checkType(c, hash, OBJ_HASH)) + return; for (int i = 0; i < num_fields; i++, fields_index++) { result = hashTypePersist(hash, c->argv[fields_index]->ptr); diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 892f5fa6a0..d1cac324e2 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1936,7 +1936,7 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] assert_equal -1 [$instance HTTL myhash FIELDS 1 f3] assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] - assert_equal "v1 v2 v3" [$instance HGETEX myhash FIELDS 3 f1 f2 f3] + assert_equal "v1 v2 v3" [$instance HMGET myhash f1 f2 f3] assert_equal 3 [$instance HLEN myhash] } From 90f33e99b16e918bcfe93952ef6a1c57bb0a0ff3 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Fri, 4 Jul 2025 07:50:22 +0300 Subject: [PATCH 071/119] also optimize the defrag path when vector encoding is used Signed-off-by: Ran Shidlansik --- src/vset.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/vset.c b/src/vset.c index b1bef119e7..4092a33dd7 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1693,11 +1693,16 @@ bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, vo updated = vsetBucketUpdateEntry_SINGLE(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); break; case VSET_BUCKET_VECTOR: - /* NOTE! - in this specific case we might have changed the vector order - need to sort it again (NLogN) */ - /* or remove it from the vector and re-add it. (N+LogN). the later also looks cleaner... */ - if (!vsetRemoveEntryWithExpiry(set, getExpiry, old_entry, old_expiry)) - return false; - return vsetAddEntry(set, getExpiry, new_entry); + if (old_expiry != new_expiry) { + /* NOTE! - in this specific case we might have changed the vector order - need to sort it again (NLogN) */ + /* or remove it from the vector and re-add it (N+LogN). the later also looks cleaner... */ + if (!vsetRemoveEntryWithExpiry(set, getExpiry, old_entry, old_expiry)) + return false; + return vsetAddEntry(set, getExpiry, new_entry); + } + /* We are just updating the entry ref, so sorting is not impacted */ + updated = vsetBucketUpdateEntry_VECTOR(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); + break; case VSET_BUCKET_RAX: updated = vsetBucketUpdateEntry_RAX(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); From 02e65ddd99132e4d11e453ebbca446be6c62da1e Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Fri, 4 Jul 2025 10:05:40 +0300 Subject: [PATCH 072/119] fix hgetex reply schema Signed-off-by: Ran Shidlansik --- src/commands.def | 2 +- src/commands/hgetex.json | 32 ++++++++++++++++++++------------ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/commands.def b/src/commands.def index 641ca0daab..527c9cad4c 100644 --- a/src/commands.def +++ b/src/commands.def @@ -11660,7 +11660,7 @@ struct COMMAND_STRUCT serverCommandTable[] = { {MAKE_CMD("hexpiretime","Returns the Unix timestamp in seconds since the epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,2),.args=HEXPIRETIME_Args}, {MAKE_CMD("hget","Returns the value of a field in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGET_History,0,HGET_Tips,0,hgetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGET_Keyspecs,1,NULL,2),.args=HGET_Args}, {MAKE_CMD("hgetall","Returns all fields and values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETALL_History,0,HGETALL_Tips,1,hgetallCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HGETALL_Keyspecs,1,NULL,1),.args=HGETALL_Args}, -{MAKE_CMD("hgetex","Set the value of one or more fields of a given hash key, and optionally set their expiration time.","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETEX_History,0,HGETEX_Tips,0,hgetexCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HGETEX_Keyspecs,1,NULL,3),.args=HGETEX_Args}, +{MAKE_CMD("hgetex","Get the value of one or more fields of a given hash key, and optionally set their expiration time or time-to-live (TTL).","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETEX_History,0,HGETEX_Tips,0,hgetexCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HGETEX_Keyspecs,1,NULL,3),.args=HGETEX_Args}, {MAKE_CMD("hincrby","Increments the integer value of a field in a hash by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBY_History,0,HINCRBY_Tips,0,hincrbyCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBY_Keyspecs,1,NULL,3),.args=HINCRBY_Args}, {MAKE_CMD("hincrbyfloat","Increments the floating point value of a field by a number. Uses 0 as initial value if the field doesn't exist.","O(1)","2.6.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HINCRBYFLOAT_History,0,HINCRBYFLOAT_Tips,0,hincrbyfloatCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HINCRBYFLOAT_Keyspecs,1,NULL,3),.args=HINCRBYFLOAT_Args}, {MAKE_CMD("hkeys","Returns all fields in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HKEYS_History,0,HKEYS_Tips,1,hkeysCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HKEYS_Keyspecs,1,NULL,1),.args=HKEYS_Args}, diff --git a/src/commands/hgetex.json b/src/commands/hgetex.json index 9553f1a1e9..6d6b892f83 100644 --- a/src/commands/hgetex.json +++ b/src/commands/hgetex.json @@ -34,19 +34,27 @@ } ], "reply_schema": { - "description": "List of values associated with the given fields, in the same order as they are requested.", - "type": "array", - "minItems": 1, - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "null" + "oneOf": [ + { + "description": "List of values associated with the given fields, in the same order as they are requested.", + "type": "array", + "minItems": 1, + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] } - ] - } + }, + { + "description": "Key does not exist.", + "type": "null" + } + ] }, "arguments": [ { From 4bbdbf16787398542a0dcfac721895c4ecfacd7a Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sat, 5 Jul 2025 23:32:55 +0300 Subject: [PATCH 073/119] introduce vset defrag Signed-off-by: Ran Shidlansik --- src/hashtable.c | 6 +- src/unit/test_files.h | 3 +- src/unit/test_vset.c | 89 ++++++++++++++++++++++- src/vset.c | 164 +++++++++++++++++++++++++++++++++++------- src/vset.h | 2 + 5 files changed, 232 insertions(+), 32 deletions(-) diff --git a/src/hashtable.c b/src/hashtable.c index 1af8086d55..804c3d6dd7 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -1783,7 +1783,7 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f size_t used_before = ht->used[0]; bucket *b = &ht->tables[0][idx]; do { - if (b->presence != 0) { + if (fn && b->presence != 0) { int pos; for (pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos]) == ENTRY_VALID) { @@ -1826,7 +1826,7 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f size_t used_before = ht->used[table_small]; bucket *b = &ht->tables[table_small][idx]; do { - if (b->presence) { + if (fn && b->presence) { for (int pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos]) == ENTRY_VALID) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; @@ -1856,7 +1856,7 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f size_t used_before = ht->used[table_large]; bucket *b = &ht->tables[table_large][idx]; do { - if (b->presence) { + if (fn && b->presence) { for (int pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos]) == ENTRY_VALID) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; diff --git a/src/unit/test_files.h b/src/unit/test_files.h index 9c7105b3ef..57ae05e456 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -204,6 +204,7 @@ int test_vset_add_and_iterate(int argc, char **argv, int flags); int test_vset_large_batch_same_expiry(int argc, char **argv, int flags); int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags); int test_vset_add_and_remove_all(int argc, char **argv, int flags); +int test_vset_defrag(int argc, char **argv, int flags); int test_vset_fuzzer(int argc, char **argv, int flags); int test_ziplistCreateIntList(int argc, char **argv, int flags); int test_ziplistPop(int argc, char **argv, int flags); @@ -265,7 +266,7 @@ unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}}; unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}}; unitTest __test_valkey_strtod_c[] = {{"test_valkey_strtod", test_valkey_strtod}, {NULL, NULL}}; unitTest __test_vector_c[] = {{"test_vector", test_vector}, {NULL, NULL}}; -unitTest __test_vset_c[] = {{"test_vset_add_and_iterate", test_vset_add_and_iterate}, {"test_vset_large_batch_same_expiry", test_vset_large_batch_same_expiry}, {"test_vset_iterate_multiple_expiries", test_vset_iterate_multiple_expiries}, {"test_vset_add_and_remove_all", test_vset_add_and_remove_all}, {"test_vset_fuzzer", test_vset_fuzzer}, {NULL, NULL}}; +unitTest __test_vset_c[] = {{"test_vset_add_and_iterate", test_vset_add_and_iterate}, {"test_vset_large_batch_same_expiry", test_vset_large_batch_same_expiry}, {"test_vset_iterate_multiple_expiries", test_vset_iterate_multiple_expiries}, {"test_vset_add_and_remove_all", test_vset_add_and_remove_all}, {"test_vset_defrag", test_vset_defrag}, {"test_vset_fuzzer", test_vset_fuzzer}, {NULL, NULL}}; unitTest __test_ziplist_c[] = {{"test_ziplistCreateIntList", test_ziplistCreateIntList}, {"test_ziplistPop", test_ziplistPop}, {"test_ziplistGetElementAtIndex3", test_ziplistGetElementAtIndex3}, {"test_ziplistGetElementOutOfRange", test_ziplistGetElementOutOfRange}, {"test_ziplistGetLastElement", test_ziplistGetLastElement}, {"test_ziplistGetFirstElement", test_ziplistGetFirstElement}, {"test_ziplistGetElementOutOfRangeReverse", test_ziplistGetElementOutOfRangeReverse}, {"test_ziplistIterateThroughFullList", test_ziplistIterateThroughFullList}, {"test_ziplistIterateThroughListFrom1ToEnd", test_ziplistIterateThroughListFrom1ToEnd}, {"test_ziplistIterateThroughListFrom2ToEnd", test_ziplistIterateThroughListFrom2ToEnd}, {"test_ziplistIterateThroughStartOutOfRange", test_ziplistIterateThroughStartOutOfRange}, {"test_ziplistIterateBackToFront", test_ziplistIterateBackToFront}, {"test_ziplistIterateBackToFrontDeletingAllItems", test_ziplistIterateBackToFrontDeletingAllItems}, {"test_ziplistDeleteInclusiveRange0To0", test_ziplistDeleteInclusiveRange0To0}, {"test_ziplistDeleteInclusiveRange0To1", test_ziplistDeleteInclusiveRange0To1}, {"test_ziplistDeleteInclusiveRange1To2", test_ziplistDeleteInclusiveRange1To2}, {"test_ziplistDeleteWithStartIndexOutOfRange", test_ziplistDeleteWithStartIndexOutOfRange}, {"test_ziplistDeleteWithNumOverflow", test_ziplistDeleteWithNumOverflow}, {"test_ziplistDeleteFooWhileIterating", test_ziplistDeleteFooWhileIterating}, {"test_ziplistReplaceWithSameSize", test_ziplistReplaceWithSameSize}, {"test_ziplistReplaceWithDifferentSize", test_ziplistReplaceWithDifferentSize}, {"test_ziplistRegressionTestForOver255ByteStrings", test_ziplistRegressionTestForOver255ByteStrings}, {"test_ziplistRegressionTestDeleteNextToLastEntries", test_ziplistRegressionTestDeleteNextToLastEntries}, {"test_ziplistCreateLongListAndCheckIndices", test_ziplistCreateLongListAndCheckIndices}, {"test_ziplistCompareStringWithZiplistEntries", test_ziplistCompareStringWithZiplistEntries}, {"test_ziplistMergeTest", test_ziplistMergeTest}, {"test_ziplistStressWithRandomPayloadsOfDifferentEncoding", test_ziplistStressWithRandomPayloadsOfDifferentEncoding}, {"test_ziplistCascadeUpdateEdgeCases", test_ziplistCascadeUpdateEdgeCases}, {"test_ziplistInsertEdgeCase", test_ziplistInsertEdgeCase}, {"test_ziplistStressWithVariableSize", test_ziplistStressWithVariableSize}, {"test_BenchmarkziplistFind", test_BenchmarkziplistFind}, {"test_BenchmarkziplistIndex", test_BenchmarkziplistIndex}, {"test_BenchmarkziplistValidateIntegrity", test_BenchmarkziplistValidateIntegrity}, {"test_BenchmarkziplistCompareWithString", test_BenchmarkziplistCompareWithString}, {"test_BenchmarkziplistCompareWithNumber", test_BenchmarkziplistCompareWithNumber}, {"test_ziplistStress__ziplistCascadeUpdate", test_ziplistStress__ziplistCascadeUpdate}, {NULL, NULL}}; unitTest __test_zipmap_c[] = {{"test_zipmapIterateWithLargeKey", test_zipmapIterateWithLargeKey}, {"test_zipmapIterateThroughElements", test_zipmapIterateThroughElements}, {NULL, NULL}}; unitTest __test_zmalloc_c[] = {{"test_zmallocAllocReallocCallocAndFree", test_zmallocAllocReallocCallocAndFree}, {"test_zmallocAllocZeroByteAndFree", test_zmallocAllocZeroByteAndFree}, {NULL, NULL}}; diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c index 5f91600f3c..e673bbe36a 100644 --- a/src/unit/test_vset.c +++ b/src/unit/test_vset.c @@ -211,6 +211,7 @@ int test_vset_add_and_remove_all(int argc, char **argv, int flags) { #define NUM_ITERATIONS 100000 #define MAX_ENTRIES 10000 +#define NUM_DEFRAG_STEPS 100 /* Global array to simulate a test database */ mock_entry *mock_entries[MAX_ENTRIES]; @@ -248,7 +249,7 @@ mock_entry *mock_entry_create(const char *keystr, long long expiry) { int insert_mock_entry(vset *set) { if (mock_entry_count >= MAX_ENTRIES) return 0; char keybuf[32]; - snprintf(keybuf, sizeof(keybuf), "key_%d", rand()); + snprintf(keybuf, sizeof(keybuf), "key_%d", mock_entry_count); long long expiry = rand() % 10000 + 100; mock_entry *e = mock_entry_create(keybuf, expiry); @@ -258,6 +259,18 @@ int insert_mock_entry(vset *set) { return 0; } +int insert_mock_entry_with_expiry(vset *set, long long expiry) { + if (mock_entry_count >= MAX_ENTRIES) return 0; + char keybuf[32]; + snprintf(keybuf, sizeof(keybuf), "key_%d", mock_entry_count); + + mock_entry *e = mock_entry_create(keybuf, expiry); + // printf("adding entry %p with expiry %llu\n", e, expiry); + TEST_ASSERT(vsetAddEntry(set, mockGetExpiry, e)); + mock_entries[mock_entry_count++] = e; + return 0; +} + int update_mock_entry(vset *set) { if (mock_entry_count == 0) return 0; int idx = rand() % mock_entry_count; @@ -291,11 +304,80 @@ int expire_mock_entries(vset *set, mstime_t now) { return 0; } +void *mock_defragfn(void *ptr) { + size_t size = zmalloc_size(ptr); + void *newptr = zmalloc(size); + memcpy(newptr, ptr, size); + zfree(ptr); + return newptr; +} + +int mock_defrag_rax_node(raxNode **noderef) { + raxNode *newnode = mock_defragfn(*noderef); + if (newnode) { + *noderef = newnode; + return 1; + } + return 0; +} + +size_t defrag_vset(vset *set, size_t cursor, size_t steps) { + if (steps == 0) steps = ULONG_MAX; + do { + cursor = vsetScanDefrag(set, cursor, mock_defragfn, mock_defrag_rax_node); + steps--; + } while (cursor != 0 && steps > 0); + return cursor; +} + int free_mock_entries(void) { for (int i = 0; i < mock_entry_count; i++) { mock_entry *e = mock_entries[i]; mockFreeEntry(e); } + mock_entry_count = 0; + return 0; +} + +/* --------- Defrag Test --------- */ +int test_vset_defrag(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + srand(time(NULL)); + + vset set; + vsetInit(&set); + + /* defrag empty set */ + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + + /* defrag when single entry */ + insert_mock_entry(&set); + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + + /* defrag when vector */ + for (int i = 0; i < VOLATILESET_VECTOR_BUCKET_MAX_SIZE - 1; i++) + insert_mock_entry(&set); + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + + long long expiry = rand() % 10000 + 100; + for (int i = 0; i < VOLATILESET_VECTOR_BUCKET_MAX_SIZE * 2; i++) { + insert_mock_entry_with_expiry(&set, expiry); + } + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + + size_t cursor = 0; + for (int i = 0; i < NUM_ITERATIONS; i++) { + if (i % NUM_DEFRAG_STEPS == 0) + cursor = defrag_vset(&set, cursor, NUM_DEFRAG_STEPS); + insert_mock_entry_with_expiry(&set, expiry); + } + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + + vsetClear(&set); + free_mock_entries(); + return 0; } @@ -310,7 +392,7 @@ int test_vset_fuzzer(int argc, char **argv, int flags) { vsetInit(&set); for (int i = 0; i < NUM_ITERATIONS; i++) { - int op = rand() % 4; + int op = rand() % 5; switch (op) { case 0: case 1: @@ -322,6 +404,9 @@ int test_vset_fuzzer(int argc, char **argv, int flags) { case 3: remove_mock_entry(&set); break; + case 4: + TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); + break; } if (i % 100 == 0) { diff --git a/src/vset.c b/src/vset.c index 4092a33dd7..b2f2cff088 100644 --- a/src/vset.c +++ b/src/vset.c @@ -655,44 +655,42 @@ static inline uint32_t findInsertPosition(vsetGetExpiryFunc getExpiry, vsetBucke */ static uint32_t findSplitPosition(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, long long *split_ts_out) { pVector *pv = vsetBucketVector(bucket); - if (!pv || pv->len < 2) return pv ? pv->len : 0; - uint32_t left = 1; - uint32_t right = pv->len - 1; - uint32_t best_split = pv->len; - uint32_t mid_closest_to_center = pv->len / 2; - long long best_split_ts = 0; + int mid = pv->len / 2; + int offset = 0; - while (left <= right) { - uint32_t mid = (left + right) / 2; + while (1) { + int left = mid - offset; + int right = mid + offset; - long long prev_ts = get_bucket_ts(getExpiry(pvGet(pv, mid - 1))); - long long curr_ts = get_bucket_ts(getExpiry(pvGet(pv, mid))); + // Check left side (as long as i > 0 to allow e[i-1]) + if (left > 0) { + long long ts1 = get_bucket_ts(getExpiry(pvGet(pv, left - 1))); + long long ts2 = get_bucket_ts(getExpiry(pvGet(pv, left))); + if (ts1 < ts2) { + if (split_ts_out) *split_ts_out = ts1; + return left; + } + } - if (prev_ts != curr_ts) { - // Check if closer to center - if (best_split == pv->len || - abs((int)mid - (int)mid_closest_to_center) < abs((int)best_split - (int)mid_closest_to_center)) { - best_split = mid; - best_split_ts = prev_ts; + // Check right side (as long as i > 0 to allow e[i-1]) + if (right > 0 && right < pv->len) { + long long ts1 = get_bucket_ts(getExpiry(pvGet(pv, right - 1))); + long long ts2 = get_bucket_ts(getExpiry(pvGet(pv, right))); + if (ts1 < ts2) { + if (split_ts_out) *split_ts_out = ts1; + return right; } - right = mid - 1; - } else { - left = mid + 1; } - } - if (split_ts_out) { - *split_ts_out = best_split != pv->len - ? best_split_ts - : get_bucket_ts(getExpiry(pvGet(pv, pv->len - 1))); + offset++; + if (mid - offset < 1 && mid + offset >= pv->len) break; // searched entire vector } - return best_split; + return pv->len; // no split found } - #define VSET_BUCKET_KEY_LEN 8 /* hash_pointer - Computes a high-quality 64-bit hash from a pointer value. @@ -1967,3 +1965,117 @@ void vsetClear(vset *set) { bool vsetIsEmpty(vset *set) { return vsetBucketType(*set) == VSET_BUCKET_NONE; } + +/**************** Defrag Logic *********************/ +static struct vsetDefragState { + long long bucket_ts; + size_t bucket_cursor; +} defragState; + +static size_t vsetBucketDefrag_VECTOR(vsetBucket **bucket, size_t cursor, void *(*defragfn)(void *)) { + UNUSED(cursor); + pVector *pv = vsetBucketVector(*bucket); + pv = defragfn(pv); + *bucket = vsetBucketFromVector(pv); + return 0; +} + +static size_t vsetBucketDefrag_HASHTABLE(vsetBucket **bucket, size_t cursor, void *(*defragfn)(void *)) { + hashtable *ht = vsetBucketHashtable(*bucket); + if (cursor == 0) { + ht = hashtableDefragTables(ht, defragfn); + *bucket = vsetBucketFromHashtable(ht); + } + return hashtableScanDefrag(ht, cursor, NULL, NULL, defragfn, 0); +} + +static size_t vsetBucketDefrag_RAX(vsetBucket **bucket, size_t cursor, void *(*defragfn)(void *), int (*defragRaxNode)(raxNode **)) { + struct vsetDefragState *state = (struct vsetDefragState *)cursor; + size_t bucket_cursor = 0; + unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; + size_t key_len; + long long bucket_ts; + rax *r = vsetBucketRax(*bucket); + raxIterator ri; + + /* init the state if this is the first time we enter the bucket */ + if (!state) { + state = &defragState; + state->bucket_ts = -1; + state->bucket_cursor = 0; + if ((r = defragfn(r))) *bucket = vsetBucketFromRax(r); + r = vsetBucketRax(*bucket); + } + raxStart(&ri, r); + ri.node_cb = defragRaxNode; + if (state->bucket_ts < 0) { + /* No prev timestamp, meaning we are starting a new RAX bucket scan */ + assert(raxSeek(&ri, "^", NULL, 0)); + assert(raxNext(&ri)); /* there MUST be at least one bucket! */ + bucket_ts = decodeExpiryKey(ri.key); + } else { + /* we are continuing a RAX bucket scan. lets try and locate the last scanned bucket. + * If not found we can search for the next one. */ + key_len = encodeExpiryKey(state->bucket_ts, key); + if (state->bucket_cursor) { + /* We were in the middle of scanning a bucket. lets try and continue there. + * It is possible that this bucket was deleted. if so we will get to a new bucket + * which is also fine. */ + assert(raxSeek(&ri, ">=", key, key_len)); + } else { + /* in case we completed the last bucket, lets progress to a later bucket */ + assert(raxSeek(&ri, ">", key, key_len)); + } + /* in case we reached the end of the RAX, we are done. */ + if (!raxNext(&ri)) { + return 0; + } + bucket_ts = decodeExpiryKey(ri.key); + if (state->bucket_ts != bucket_ts) { + /* if this is a new bucket, lets start from the beginning */ + bucket_cursor = 0; + } else { + bucket_cursor = state->bucket_cursor; + } + } + raxStop(&ri); + vsetBucket *time_bucket = ri.data; + switch (vsetBucketType(time_bucket)) { + case VSET_BUCKET_NONE: + case VSET_BUCKET_SINGLE: + bucket_cursor = 0; + break; + case VSET_BUCKET_VECTOR: + bucket_cursor = vsetBucketDefrag_VECTOR(&time_bucket, bucket_cursor, defragfn); + if (time_bucket != ri.data) + raxSetData(ri.node, time_bucket); + break; + case VSET_BUCKET_HT: + bucket_cursor = vsetBucketDefrag_HASHTABLE(&time_bucket, bucket_cursor, defragfn); + if (time_bucket != ri.data) + raxSetData(ri.node, time_bucket); + break; + default: + panic("Unsupported vset bucket type for RAX bucket. Only supported types are single, vector or hashtable"); + } + /* if we reached here, we are not done. lets return the state and next time we can continue from this bucket. */ + state->bucket_ts = bucket_ts; + state->bucket_cursor = bucket_cursor; + return (size_t)state; +} + +size_t vsetScanDefrag(vset *set, size_t cursor, void *(*defragfn)(void *), int (*defragRaxNode)(raxNode **)) { + switch (vsetBucketType(*set)) { + case VSET_BUCKET_NONE: + case VSET_BUCKET_SINGLE: + /* nothing to do */ + return 0; + case VSET_BUCKET_VECTOR: + return vsetBucketDefrag_VECTOR(set, cursor, defragfn); + case VSET_BUCKET_RAX: + return vsetBucketDefrag_RAX(set, cursor, defragfn, defragRaxNode); + default: + panic("Unknown vset node type to defrag"); + } + return 0; +} diff --git a/src/vset.h b/src/vset.h index 540ba7fe22..0f7b92a4e6 100644 --- a/src/vset.h +++ b/src/vset.h @@ -312,4 +312,6 @@ void vsetClear(vset *set); long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry); size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx); size_t vsetMemUsage(vset *set); +size_t vsetScanDefrag(vset *set, size_t cursor, void *(*defragfn)(void *), int (*defragRaxNode)(raxNode **)); + #endif From 00a80d96b1239c34f21ad9a5dd0f641ed4118623 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 6 Jul 2025 10:20:37 +0300 Subject: [PATCH 074/119] address PR comments Signed-off-by: Ran Shidlansik --- src/t_hash.c | 16 +++++++++------- tests/unit/hashexpire.tcl | 24 ++++++++++++------------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 646cc18f3d..5a386111ca 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1169,7 +1169,7 @@ void hsetexCommand(client *c) { } /* Check that the parsed fields number matches the real provided number of fields */ if (!num_fields || num_fields != (c->argc - fields_index) / 2) { - addReplyErrorObject(c, shared.syntaxerr); + addReplyError(c, "numfields should be greater than 0 and match the provided number of fields"); return; } @@ -1338,7 +1338,7 @@ void hgetexCommand(client *c) { /* Check that the parsed fields number matches the real provided number of fields */ if (!num_fields || num_fields != (c->argc - fields_index)) { - addReplyErrorObject(c, shared.syntaxerr); + addReplyError(c, "numfields should be greater than 0 and match the provided number of fields"); return; } @@ -1584,7 +1584,7 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { /* Check that the parsed fields number matches the real provided number of fields */ if (!num_fields || num_fields != (c->argc - fields_index)) { - addReplyErrorObject(c, shared.syntaxerr); + addReplyError(c, "numfields should be greater than 0 and match the provided number of fields"); return; } @@ -1682,7 +1682,7 @@ void hpexpireAtCommand(client *c) { * * - For each specified field attempts to remove any existing expiration. * - Replies to the client with an array of integers, each representing the result of persistence for one field: - * - 1 if the expiration was set. + * - 1 if the expiration for the field was removed. * - -1 if the field exists, but has no expiraiton time set. * - -2 if the field does not exist or the hash is empty. * @@ -1700,7 +1700,7 @@ void hpersistCommand(client *c) { /* Check that the parsed fields number matches the real provided number of fields */ if (!num_fields || num_fields != (c->argc - fields_index)) { - addReplyErrorObject(c, shared.syntaxerr); + addReplyError(c, "numfields should be greater than 0 and match the provided number of fields"); return; } @@ -1713,8 +1713,10 @@ void hpersistCommand(client *c) { for (int i = 0; i < num_fields; i++, fields_index++) { result = hashTypePersist(hash, c->argv[fields_index]->ptr); - server.dirty += (result > 0 ? 1 : 0); // in case there was a change increment the dirty - changes += (result > 0 ? 1 : 0); + if (result > 0) { + server.dirty++; + changes++; + } addReplyLongLong(c, result); } if (changes) { diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index d1cac324e2..e65ebe7860 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -262,14 +262,14 @@ start_server {tags {"hashexpire external:skip"}} { r HSET myhash f1 v1 catch {r HGETEX myhash EX 60 PX 1000 FIELDS 1 f1} e set e - } {ERR syntax error} + } {ERR *} test {HGETEX EXAT- multiple options used (EXAT + PXAT)} { r FLUSHALL r HSET myhash f1 v1 catch {r HGETEX myhash EXAT [expr {[clock seconds] + 100}] PXAT [expr {[clock milliseconds] + 100000}] 1000 FIELDS 1 f1} e set e - } {ERR syntax error} + } {ERR *} # Common error scenarios for all commands foreach {cmd ttl_val} [list \ @@ -283,7 +283,7 @@ start_server {tags {"hashexpire external:skip"}} { r HSET myhash f1 v1 catch {r HGETEX myhash $cmd FIELDS 1 f1} e set e - } {ERR syntax error} + } {ERR *} test "HGETEX $cmd- negative TTL" { r FLUSHALL @@ -304,7 +304,7 @@ start_server {tags {"hashexpire external:skip"}} { r HSET myhash f1 v1 catch {r HGETEX myhash $cmd $ttl_val 1 f1} e set e - } {ERR syntax error} + } {ERR *} test "HGETEX $cmd- wrong numfields count (too few fields)" { r FLUSHALL @@ -318,7 +318,7 @@ start_server {tags {"hashexpire external:skip"}} { r HSET myhash f1 v1 catch {r HGETEX myhash $cmd $ttl_val FIELDS 1 f1 f2} e set e - } {ERR syntax error} + } {ERR *} test "HGETEX $cmd- key is wrong type (string instead of hash)" { r FLUSHALL @@ -331,20 +331,20 @@ start_server {tags {"hashexpire external:skip"}} { r FLUSHALL catch {r HGETEX myhash $cmd $ttl_val FIELDS 0} e set e - } {ERR syntax error} + } {ERR *} test "HGETEX $cmd with negative numfields" { r FLUSHALL catch {r HGETEX myhash $cmd $ttl_val FIELDS -10} e set e - } {ERR syntax error} + } {ERR *} test "HGETEX $cmd with missing key" { r FLUSHALL set expire [expr {[clock seconds] + 100}] catch {r HGETEX $cmd $expire FIELDS 1 f1} e set e - } {ERR syntax error} + } {ERR *} } } @@ -619,7 +619,7 @@ start_server {tags {"hashexpire external:skip"}} { test {HSETEX EX - test missing TTL} { catch {r HSETEX myhash EX FIELDS 1 field1 val1} e set e - } {ERR syntax error} + } {ERR *} test {HSETEX EX - mismatched field/value count} { catch {r HSETEX myhash EX 10 FIELDS 2 field1 val1} e @@ -670,7 +670,7 @@ start_server {tags {"hashexpire external:skip"}} { test {HSETEX PX - test missing TTL} { catch {r HSETEX myhash PX FIELDS 1 field1 val1} e set e - } {ERR syntax error} + } {ERR *} # test {HSETEX PX - mismatched field/value count} { # catch {r HSETEX myhash PX 100 FIELDS 2 field1 val1} e @@ -680,7 +680,7 @@ start_server {tags {"hashexpire external:skip"}} { ## FNX/FXX - # hsetex throws ERR syntax error, it shouldn't + # hsetex throws ERR *, it shouldn't test {HSETEX EX FNX - set only if none of the fields exist} { r FLUSHALL r HSET myhash field1 val1 @@ -732,7 +732,7 @@ start_server {tags {"hashexpire external:skip"}} { test {HSETEX EX - FNX and FXX conflict error} { catch {r HSETEX myhash EX 10 FNX FXX FIELDS 1 x y} e set e - } {ERR syntax error} + } {ERR *} #################### Lazy Expiry ######################## From af520240414a647c687b9df3bc0e1b2a9cc76132 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 6 Jul 2025 10:55:43 +0300 Subject: [PATCH 075/119] fix split function documentation Signed-off-by: Ran Shidlansik --- src/vset.c | 64 ++++++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/src/vset.c b/src/vset.c index b2f2cff088..9a7e2e73e6 100644 --- a/src/vset.c +++ b/src/vset.c @@ -608,51 +608,49 @@ static inline uint32_t findInsertPosition(vsetGetExpiryFunc getExpiry, vsetBucke return left; // Final position to insert the element } -/* findSplitPosition - Find the optimal split index in a sorted pointer vector - * based on coarse (bucketed) expiry timestamps. - * Arguments - * set: Pointer to the `vset` containing the element type and expiry logic. - * bucket: Pointer to a `vsetBucket` holding a sorted `pVector` of elements. - * split_ts: an optional pointer to a location to store the split timestamp, that is the position - * belonging in the lower split vector with the largest expiration time. +/* findSplitPosition - Locate the first index where a bucket timestamp transition occurs * - * This function searches for the earliest index at which the vector can be split into - * two parts such that all elements in the first part are strictly less than all elements - * in the second part, after mapping each element's expiry to a lower-resolution bucket. - * The mapping is done using `get_bucket_ts(set->etypr->getExpiry(element))`. + * This function finds a split point in a sorted pointer vector (`pVector`) of elements, + * where elements are grouped by their coarse-grained expiry time buckets. + * The goal is to identify the first pair of adjacent elements `e[i-1]` and `e[i]` + * such that: * - * This ensures that elements belonging to the same coarse-grained time bucket remain - * in the same split group, which is useful for efficient time-based partitioning. + * get_bucket_ts(getExpiry(e[i - 1])) < get_bucket_ts(getExpiry(e[i])) * - * To do this efficiently, the function performs a binary search to locate the first - * position where the bucketed expiry of the current item is greater than the bucketed - * expiry of the previous item. This approach attempts to maximize the size of each - * resulting split vector while ensuring that: + * The vector is assumed to be sorted by the raw expiry timestamp (in ascending order). + * Bucket timestamps are derived using `get_bucket_ts()` on each element's expiry value. * - * bucket_ts[element[i-1]] < bucket_ts[element[i]] + * Arguments: + * - getExpiry: A function pointer that extracts an expiry timestamp from an element. + * - bucket: A pointer to a `vsetBucket` containing a sorted `pVector` of elements. + * - split_ts_out (optional): If provided, it will be set to the bucket timestamp of + * the last element in the lower (left) partition. + * + * The search begins from the middle of the vector and expands outwards in both + * directions, checking for the earliest position where a bucket transition occurs. + * This approach improves locality and helps produce balanced splits where possible. * - * If no valid split is found (i.e. all elements map to the same bucket timestamp), - * the function returns `pv->len` to indicate that splitting is not possible. + * If a valid split is found, the function returns the index `i` at which the split + * should occur (i.e., elements `[0..i-1]` belong to one bucket, and `[i..len-1]` to another). + * If no split is found (i.e., all elements map to the same bucket), the function + * returns `pv->len`, indicating the entire vector belongs to one bucket. * * Return: - * - A valid split index in the range [1, pv->len], where the split occurs. - * - May return `pv->len` if no valid position is found. + * - A split index in the range [1, pv->len), or + * - `pv->len` if no transition is found (no split possible). * * Example: * -------- - * Suppose the vector contains elements with matching expiry timestamps: - * [1234, 1235, 1236, 4567, 4568] + * Raw expiry values: [1001, 1002, 1003, 2048, 2049] + * Bucket timestamps: [1024, 1024, 1024, 4096, 4096] * - * And `get_bucket_ts()` maps them to: - * [1300, 1300, 1300, 5000, 5000] + * This function returns index 3, as: + * get_bucket_ts(1003) == 1024 + * get_bucket_ts(2048) == 4096 → transition point * - * Then `findSplitPosition(set, bucket)` returns 3, resulting in: - * First part: [1234, 1235, 1236] (bucket 1300) - * Second part: [4567, 4568] (bucket 5000) - * - * This guarantees that each vector contains elements with the same bucket timestamp, - * and no value in the first part maps to the same or later bucket as the second part. - */ + * So the vector can be split as: + * - Left partition: [1001, 1002, 1003] + * - Right partition: [2048, 2049] */ static uint32_t findSplitPosition(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, long long *split_ts_out) { pVector *pv = vsetBucketVector(bucket); if (!pv || pv->len < 2) return pv ? pv->len : 0; From ee9e856213e29d92cd136128ad5f6b17980b585c Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 7 Jul 2025 13:57:53 +0300 Subject: [PATCH 076/119] Fix bad parameter passes to signalModifiedKey Signed-off-by: Ran Shidlansik --- src/t_hash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 5a386111ca..9e3d756b94 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1650,7 +1650,7 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { notifyKeyspaceEvent(NOTIFY_HASH, "hexpire", c->argv[1], c->db->id); } server.dirty += (expired + updated); // in case there was a change increment the dirty - signalModifiedKey(c, c->db, obj); + signalModifiedKey(c, c->db, c->argv[1]); /* Delete the object in case it was left empty */ if (hashTypeLength(obj) == 0) { dbDelete(c->db, c->argv[1]); @@ -1721,7 +1721,7 @@ void hpersistCommand(client *c) { } if (changes) { notifyKeyspaceEvent(NOTIFY_HASH, "hpersist", c->argv[1], c->db->id); - signalModifiedKey(c, c->db, hash); + signalModifiedKey(c, c->db, c->argv[1]); } } From 2a476f11cb95b8dd7c214b1ee545b8d7248a2e78 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 7 Jul 2025 21:48:05 +0300 Subject: [PATCH 077/119] Handle hrandfield when volatile fields Signed-off-by: Ran Shidlansik --- src/db.c | 2 +- src/t_hash.c | 84 ++++++++++++++++++++++------------- tests/unit/hashexpire.tcl | 93 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+), 31 deletions(-) diff --git a/src/db.c b/src/db.c index bf20ddb48c..ea5e477629 100644 --- a/src/db.c +++ b/src/db.c @@ -115,7 +115,7 @@ robj *lookupKey(serverDb *db, robj *key, int flags) { /* Update the access time for the ageing algorithm. * Don't do it if we have a saving child, as this will trigger * a copy on write madness. */ - if (server.current_client && server.current_client->flag.no_touch && + if (server.current_client && server.current_client->flag.no_touch && server.executing_client && server.executing_client->cmd->proc != touchCommand) flags |= LOOKUP_NOTOUCH; if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)) { diff --git a/src/t_hash.c b/src/t_hash.c index 9e3d756b94..e56cd1f605 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -836,9 +836,18 @@ void hashReplyFromListpackEntry(client *c, listpackEntry *e) { static void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpackEntry *field, listpackEntry *val) { if (hashobj->encoding == OBJ_ENCODING_HASHTABLE) { void *e = NULL; - + int maxtries = 100; + hashTypeIgnoreTTL(hashobj, true); while (!e) { hashtableFairRandomEntry(hashobj->ptr, &e); + if (entryIsExpired(e) && --maxtries) { + e = NULL; + continue; + } else if (maxtries == 0) { + field->sval = NULL; + if (val) val->sval = NULL; + break; + } sds sds_field = entryGetField(e); field->sval = (unsigned char *)sds_field; field->slen = sdslen(sds_field); @@ -850,6 +859,7 @@ static void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpac sdslen(sds_val); } } + hashTypeIgnoreTTL(hashobj, false); } else if (hashobj->encoding == OBJ_ENCODING_LISTPACK) { lpRandomPair(hashobj->ptr, hashsize, field, val); } else { @@ -1615,8 +1625,8 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { for (i = 0; i < num_fields; i++) { result = -2; if (set_expired) { - if (hashTypeDelete(obj, c->argv[fields_index + i]->ptr)) { - /* In case we deleted the field, add it to the new hdel command vector. */ + if (obj && hashTypeDelete(obj, c->argv[fields_index + i]->ptr)) { + /* In case we deleted the field, add it to the ew hdel command vector. */ new_argv[new_argc++] = c->argv[fields_index + i]; incrRefCount(c->argv[fields_index + i]); result = 2; @@ -1835,27 +1845,29 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { writePreparedClient *wpc = prepareClientForFutureWrites(c); if (!wpc) return; + void *replylen = addReplyDeferredLen(c); + unsigned long reply_size = 0; + /* CASE 1: The count was negative, so the extraction method is just: * "return N random elements" sampling the whole set every time. * This case is trivial and can be served without auxiliary data * structures. This case is the only one that also needs to return the * elements in random order. */ if (!uniq || count == 1) { - if (withvalues && c->resp == 2) - addWritePreparedReplyArrayLen(wpc, count * 2); - else - addWritePreparedReplyArrayLen(wpc, count); if (hash->encoding == OBJ_ENCODING_HASHTABLE) { - while (count && hashtableSize(hash->ptr) > 0) { - void *entry; - hashtableFairRandomEntry(hash->ptr, &entry); - count--; - sds field = entryGetField(entry); - sds value = entryGetValue(entry); + while (count--) { + listpackEntry field, value; + hashTypeRandomElement(hash, size, &field, &value); + + /* In case we were unable to locate random element, it is probably because there is no such element + * since all elements are expired. */ + if (!field.sval) break; + if (withvalues && c->resp > 2) addWritePreparedReplyArrayLen(wpc, 2); - addWritePreparedReplyBulkCBuffer(wpc, field, sdslen(field)); - if (withvalues) addWritePreparedReplyBulkCBuffer(wpc, value, sdslen(value)); + addWritePreparedReplyBulkCBuffer(wpc, field.sval, field.slen); + if (withvalues) addWritePreparedReplyBulkCBuffer(wpc, value.sval, value.slen); if (c->flag.close_asap) break; + reply_size++; } } else if (hash->encoding == OBJ_ENCODING_LISTPACK) { listpackEntry *fields, *vals = NULL; @@ -1867,6 +1879,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { while (count) { sample_count = count > limit ? limit : count; count -= sample_count; + reply_size += sample_count; lpRandomPairs(hash->ptr, sample_count, fields, vals); hrandfieldReplyWithListpack(wpc, sample_count, fields, vals); if (c->flag.close_asap) break; @@ -1874,16 +1887,9 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { zfree(fields); zfree(vals); } - return; + goto set_deferred_response; } - /* Initiate reply count, RESP3 responds with nested array, RESP2 with flat one. */ - long reply_size = count < size ? count : size; - if (withvalues && c->resp == 2) - addWritePreparedReplyArrayLen(wpc, reply_size * 2); - else - addWritePreparedReplyArrayLen(wpc, reply_size); - /* CASE 2: * The number of requested elements is greater than the number of * elements inside the hash: simply return the whole hash. */ @@ -1894,11 +1900,14 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { if (withvalues && c->resp > 2) addWritePreparedReplyArrayLen(wpc, 2); addHashIteratorCursorToReply(wpc, &hi, OBJ_HASH_FIELD); if (withvalues) addHashIteratorCursorToReply(wpc, &hi, OBJ_HASH_VALUE); + reply_size++; } hashTypeResetIterator(&hi); - return; + + goto set_deferred_response; } + /* CASE 2.5 listpack only. Sampling unique elements, in non-random order. * Listpack encoded hashes are meant to be relatively small, so * HRANDFIELD_SUB_STRATEGY_MUL isn't necessary and we rather not make @@ -1908,6 +1917,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { * And it is inefficient to repeatedly pick one random element from a * listpack in CASE 4. So we use this instead. */ if (hash->encoding == OBJ_ENCODING_LISTPACK) { + reply_size = count < size ? count : size; listpackEntry *fields, *vals = NULL; fields = zmalloc(sizeof(listpackEntry) * count); if (withvalues) vals = zmalloc(sizeof(listpackEntry) * count); @@ -1915,8 +1925,9 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { hrandfieldReplyWithListpack(wpc, count, fields, vals); zfree(fields); zfree(vals); - return; + goto set_deferred_response; } + /* CASE 3: * The number of elements inside the hash is not greater than * HRANDFIELD_SUB_STRATEGY_MUL times the number of requested elements. @@ -1938,16 +1949,17 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { while (hashtableNext(&iter, &entry)) { int res = hashtableAdd(ht, entry); serverAssert(res); + reply_size++; } - serverAssert(hashtableSize(ht) == size); + serverAssert(hashtableSize(ht) == reply_size); hashtableResetIterator(&iter); /* Remove random elements to reach the right count. */ - while (size > count) { + while (reply_size > count) { void *element; hashtableFairRandomEntry(ht, &element); hashtableDelete(ht, element); - size--; + reply_size--; } /* Reply with what's in the temporary hashtable and release memory */ @@ -1978,8 +1990,12 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { while (added < count) { hashTypeRandomElement(hash, size, &field, withvalues ? &value : NULL); - /* Try to add the object to the hashtable. If it already exists - * free it, otherwise increment the number of objects we have + /* In case we were unable to locate random element, it is probably because there is no such element + * since all elements are expired. */ + if (!field.sval) break; + + /* Try to add the object to the hashtable. If expired, stop adding (there are probably non left). + * If it already exists free it, otherwise increment the number of objects we have * in the result hashtable. */ sds sfield = hashSdsFromListpackEntry(&field); if (!hashtableAdd(ht, sfield)) { @@ -1996,7 +2012,15 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { /* Release memory */ hashtableRelease(ht); + reply_size = added; } + +set_deferred_response: + /* Set the reply count, RESP3 responds with nested array, RESP2 with flat one. */ + if (withvalues && c->resp == 2) + setDeferredArrayLen(c, replylen, reply_size * 2); + else + setDeferredArrayLen(c, replylen, reply_size); } /* HRANDFIELD key [ [WITHVALUES]] */ diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index e65ebe7860..2a64472677 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1541,6 +1541,99 @@ start_server {tags {"hashexpire external:skip"}} { # f4 does not exist assert_equal {1 -1 -2} [r hpersist myhash FIELDS 3 f1 f2 f4] } + + #################### HRANDFIELD ################## + + test "HRANDFIELD - CASE 1: negative count" { + r FLUSHALL + assert_equal {1} [r HSETEX myhash PX 1 fields 5 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5] + wait_for_condition 100 100 { + [r HGETALL myhash] eq {} + } else { + fail "Hash is showing expired elements" + } + # check that we do get a response even though it is expired + assert_match {} [r hrandfield myhash -1] + + # Now write a persistent element + assert_equal {1} [r HSET myhash f5 v5] + # make sure this is the element we will get all the time + for {set i 1} {$i <= 50} {incr i} { + assert_equal {f5 f5 f5 f5 f5} [r hrandfield myhash -5] + } + + } + + test "HRANDFIELD - CASE 2: The number of requested elements is greater than the number of elements inside the hash" { + r FLUSHALL + assert_equal {1} [r HSETEX myhash PX 1 fields 5 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5] + wait_for_condition 100 100 { + [r HGETALL myhash] eq {} + } else { + fail "Hash is showing expired elements" + } + # check that we get an empty response even though there are expired fields + assert_match {} [r hrandfield myhash 10] + + # Now write a persistent element + assert_equal {3} [r HSET myhash f5 v5 f6 v6 f7 v7] + # make sure this is the element we will get all the time + for {set i 1} {$i <= 50} {incr i} { + set result [r hrandfield myhash 10] + assert_equal 3 [llength [split $result]] + assert_match {*f5*} $result + assert_match {*f6*} $result + assert_match {*f7*} $result + } + + } + + test "HRANDFIELD - CASE 3: The number of elements inside the hash is not greater than 3 times the number of requested elements" { + r FLUSHALL + assert_equal {1} [r HSETEX myhash PX 1 fields 5 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5] + wait_for_condition 100 100 { + [r HGETALL myhash] eq {} + } else { + fail "Hash is showing expired elements" + } + # check that we get an empty response even though there are expired fields + assert_match {} [r hrandfield myhash 4] + + # Now write a persistent elements + assert_equal {4} [r HSET myhash f5 v5 f6 v6 f7 v7 f8 v8] + # make sure this is the elements we will get all the time + for {set i 1} {$i <= 50} {incr i} { + set result [r hrandfield myhash 4] + assert_equal 4 [llength [split $result]] + assert_match {*f5*} $result + assert_match {*f6*} $result + assert_match {*f7*} $result + assert_match {*f8*} $result + } + } + + test "HRANDFIELD - CASE 4: The number of elements inside the hash is greater than 3 times the number of requested elements" { + r FLUSHALL + assert_equal {1} [r HSETEX myhash PX 1 fields 8 f1 v1 f2 v2 f3 v3 f4 v4 f5 v5 f6 v6 f7 v7 f8 v8] + wait_for_condition 100 100 { + [r HGETALL myhash] eq {} + } else { + fail "Hash is showing expired elements" + } + # check that we get an empty response even though there are expired fields + assert_match {} [r hrandfield myhash 2] + + # Now write a persistent elements + assert_equal {3} [r HSET myhash f8 v8 f9 v9 f10 v10] + # make sure this is the elements we will get all the time + for {set i 1} {$i <= 50} {incr i} { + set result [r hrandfield myhash 3] + assert_equal 3 [llength [split $result]] + assert_match {*f8*} $result + assert_match {*f9*} $result + assert_match {*f10*} $result + } + } } From 2dc19333c52b9c3ba5dc4ea9088436ca58f18a99 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 7 Jul 2025 22:46:43 +0300 Subject: [PATCH 078/119] apply pr comments Signed-off-by: Ran Shidlansik --- src/commands.def | 4 ++-- src/commands/hexpire.json | 2 +- src/commands/hexpireat.json | 2 +- src/commands/hexpiretime.json | 6 +++--- src/commands/hgetex.json | 2 +- src/commands/hpersist.json | 4 ++-- src/commands/hpexpireat.json | 4 ++-- src/commands/hpexpiretime.json | 2 +- src/commands/hpttl.json | 2 +- src/commands/hsetex.json | 2 +- src/commands/httl.json | 2 +- src/entry.c | 2 +- src/expire.c | 2 +- src/expire.h | 2 +- src/rdb.c | 2 +- src/t_hash.c | 4 ++-- 16 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/commands.def b/src/commands.def index 527c9cad4c..d7ea81a2f9 100644 --- a/src/commands.def +++ b/src/commands.def @@ -11657,7 +11657,7 @@ struct COMMAND_STRUCT serverCommandTable[] = { {MAKE_CMD("hexists","Determines whether a field exists in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXISTS_History,0,HEXISTS_Tips,0,hexistsCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXISTS_Keyspecs,1,NULL,2),.args=HEXISTS_Args}, {MAKE_CMD("hexpire","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRE_History,0,HEXPIRE_Tips,0,hexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRE_Keyspecs,1,NULL,4),.args=HEXPIRE_Args}, {MAKE_CMD("hexpireat","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIREAT_History,0,HEXPIREAT_Tips,0,hexpireAtCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIREAT_Keyspecs,1,NULL,4),.args=HEXPIREAT_Args}, -{MAKE_CMD("hexpiretime","Returns the Unix timestamp in seconds since the epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,2),.args=HEXPIRETIME_Args}, +{MAKE_CMD("hexpiretime","Returns the Unix timestamps in seconds since the epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,2),.args=HEXPIRETIME_Args}, {MAKE_CMD("hget","Returns the value of a field in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGET_History,0,HGET_Tips,0,hgetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGET_Keyspecs,1,NULL,2),.args=HGET_Args}, {MAKE_CMD("hgetall","Returns all fields and values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETALL_History,0,HGETALL_Tips,1,hgetallCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HGETALL_Keyspecs,1,NULL,1),.args=HGETALL_Args}, {MAKE_CMD("hgetex","Get the value of one or more fields of a given hash key, and optionally set their expiration time or time-to-live (TTL).","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETEX_History,0,HGETEX_Tips,0,hgetexCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HGETEX_Keyspecs,1,NULL,3),.args=HGETEX_Args}, @@ -11669,7 +11669,7 @@ struct COMMAND_STRUCT serverCommandTable[] = { {MAKE_CMD("hmset","Sets the values of multiple fields.","O(N) where N is the number of fields being set.","2.0.0",CMD_DOC_DEPRECATED,"`HSET` with multiple field-value pairs","4.0.0","hash",COMMAND_GROUP_HASH,HMSET_History,0,HMSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HMSET_Keyspecs,1,NULL,2),.args=HMSET_Args}, {MAKE_CMD("hpersist","Remove the existing expiration on a hash key's field(s).","O(1) for each field assigned with TTL, so O(N) to persist N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPERSIST_History,0,HPERSIST_Tips,0,hpersistCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPERSIST_Keyspecs,1,NULL,2),.args=HPERSIST_Args}, {MAKE_CMD("hpexpire","Set expiry time on hash object.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRE_History,0,HPEXPIRE_Tips,0,hpexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRE_Keyspecs,1,NULL,4),.args=HPEXPIRE_Args}, -{MAKE_CMD("hpexpireat","Set expiry time on hash object.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIREAT_History,0,HPEXPIREAT_Tips,0,hpexpireAtCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIREAT_Keyspecs,1,NULL,4),.args=HPEXPIREAT_Args}, +{MAKE_CMD("hpexpireat","Set expiration time on hash field.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIREAT_History,0,HPEXPIREAT_Tips,0,hpexpireAtCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIREAT_Keyspecs,1,NULL,4),.args=HPEXPIREAT_Args}, {MAKE_CMD("hpexpiretime","Returns the Unix timestamp in milliseconds since Unix epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRETIME_History,0,HPEXPIRETIME_Tips,0,hpexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRETIME_Keyspecs,1,NULL,2),.args=HPEXPIRETIME_Args}, {MAKE_CMD("hpttl","Returns the remaining time to live (in milliseconds) of a hash key's field(s) that have an associated expiration.","O(1) for each field assigned with TTL, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPTTL_History,0,HPTTL_Tips,0,hpttlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPTTL_Keyspecs,1,NULL,2),.args=HPTTL_Args}, {MAKE_CMD("hrandfield","Returns one or more random fields from a hash.","O(N) where N is the number of fields returned","6.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HRANDFIELD_History,0,HRANDFIELD_Tips,1,hrandfieldCommand,-2,CMD_READONLY,ACL_CATEGORY_HASH,HRANDFIELD_Keyspecs,1,NULL,2),.args=HRANDFIELD_Args}, diff --git a/src/commands/hexpire.json b/src/commands/hexpire.json index 705a8b5638..32729bd282 100644 --- a/src/commands/hexpire.json +++ b/src/commands/hexpire.json @@ -115,4 +115,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/commands/hexpireat.json b/src/commands/hexpireat.json index f7303cb30e..47d66d2caf 100644 --- a/src/commands/hexpireat.json +++ b/src/commands/hexpireat.json @@ -117,4 +117,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/commands/hexpiretime.json b/src/commands/hexpiretime.json index 9229e1a9b4..02dc9a81c1 100644 --- a/src/commands/hexpiretime.json +++ b/src/commands/hexpiretime.json @@ -1,6 +1,6 @@ { "HEXPIRETIME": { - "summary": "Returns the Unix timestamp in seconds since the epoch at which the given key's field(s) will expire", + "summary": "Returns the Unix timestamps in seconds since the epoch at which the given key's field(s) will expire", "complexity": "O(1) for each field, so O(N) for N items when the command is called with multiple fields.", "group": "hash", "since": "9.0.0", @@ -40,7 +40,7 @@ "items": { "oneOf": [ { - "description": "Field does not exist in the provided hash key, or the hash key is empty", + "description": "Field does not exist in the provided hash key, or the hash key is empty.", "const": -2 }, { @@ -82,4 +82,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/commands/hgetex.json b/src/commands/hgetex.json index 6d6b892f83..ec25c79fa5 100644 --- a/src/commands/hgetex.json +++ b/src/commands/hgetex.json @@ -115,4 +115,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/commands/hpersist.json b/src/commands/hpersist.json index 4964a1477f..d29d65dd87 100644 --- a/src/commands/hpersist.json +++ b/src/commands/hpersist.json @@ -40,7 +40,7 @@ "items": { "oneOf": [ { - "description": "Field does not exist in the provided hash key, or the hash key does not exist", + "description": "Field does not exist in the provided hash key, or the hash key does not exist.", "const": -2 }, { @@ -81,4 +81,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/commands/hpexpireat.json b/src/commands/hpexpireat.json index c291202daf..5a5068de61 100644 --- a/src/commands/hpexpireat.json +++ b/src/commands/hpexpireat.json @@ -1,6 +1,6 @@ { "HPEXPIREAT": { - "summary": "Set expiry time on hash object.", + "summary": "Set expiration time on hash field.", "complexity": "O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.", "group": "hash", "since": "9.0.0", @@ -117,4 +117,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/commands/hpexpiretime.json b/src/commands/hpexpiretime.json index 9df2f45039..1baf0fee0f 100644 --- a/src/commands/hpexpiretime.json +++ b/src/commands/hpexpiretime.json @@ -82,4 +82,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/commands/hpttl.json b/src/commands/hpttl.json index 360a7d8d91..611116e141 100644 --- a/src/commands/hpttl.json +++ b/src/commands/hpttl.json @@ -82,4 +82,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/commands/hsetex.json b/src/commands/hsetex.json index a444efb6a3..34976cc5a1 100644 --- a/src/commands/hsetex.json +++ b/src/commands/hsetex.json @@ -149,4 +149,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/commands/httl.json b/src/commands/httl.json index d97d21f1d6..b80570dd3f 100644 --- a/src/commands/httl.json +++ b/src/commands/httl.json @@ -82,4 +82,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/entry.c b/src/entry.c index 4e340b282f..a9e08593b7 100644 --- a/src/entry.c +++ b/src/entry.c @@ -276,7 +276,7 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { sds old_value = entryGetValue(e); /* We are using the same entry memory in order to store a potentially new value. * In such cases the old value alloc was adjusted to the real buffer size part it was embedded to. - * since we can potentially write here a smaller value, which requires less allocation space, we would like to + * Since we can potentially write here a smaller value, which requires less allocation space, we would like to * inherit the old value memory allocation size. */ size_t value_size = sdsHdrSize(SDS_TYPE_8) + sdsalloc(old_value) + 1; sdswrite(sdsAllocPtr(old_value), value_size, SDS_TYPE_8, value, sdslen(value)); diff --git a/src/expire.c b/src/expire.c index cc42d803a4..b93acf57dd 100644 --- a/src/expire.c +++ b/src/expire.c @@ -828,7 +828,7 @@ bool timestampIsExpired(mstime_t when) { return now > when; } -/* This function verify if the current conditions allow expiration of keys and fields. +/* This function verifies if the current conditions allow expiration of keys and fields. * For some cases expiration is not allowed, but we would still like to ignore the key * so to treat it as "expired" without actively deleting it. */ expirationPolicy getExpirationPolicyWithFlags(int flags) { diff --git a/src/expire.h b/src/expire.h index 8e852e30af..11ef9d9c10 100644 --- a/src/expire.h +++ b/src/expire.h @@ -23,7 +23,7 @@ /* Return values for expireIfNeeded */ typedef enum { - KEY_VALID = 0, /* Could be volatile and not yet expired, non-volatile, or even non-existing key. */ + KEY_VALID = 0, /* Could be volatile and not yet expired, non-volatile, or even nonexistent key. */ KEY_EXPIRED, /* Logically expired but not yet deleted. */ KEY_DELETED /* The key was deleted now. */ } keyStatus; diff --git a/src/rdb.c b/src/rdb.c index 6ae3e813c7..5f889ea710 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2194,7 +2194,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { } /* Also load the entry expiry */ - long long itemexpiry = -1; + long long itemexpiry = EXPIRY_NONE; if (rdbtype == RDB_TYPE_HASH_2) { itemexpiry = rdbLoadMillisecondTime(rdb, RDB_VERSION); if (itemexpiry == LLONG_MAX && rioGetReadError(rdb)) return NULL; diff --git a/src/t_hash.c b/src/t_hash.c index e56cd1f605..67b4126d76 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1691,9 +1691,9 @@ void hpexpireAtCommand(client *c) { * - Validates that the number of provided fields matches the declared count. * * - For each specified field attempts to remove any existing expiration. - * - Replies to the client with an array of integers, each representing the result of persistence for one field: + * - Replies to the client with an array of integers, each representing the result of persistence for one field: * - 1 if the expiration for the field was removed. - * - -1 if the field exists, but has no expiraiton time set. + * - -1 if the field exists, but has no expiration time set. * - -2 if the field does not exist or the hash is empty. * * - If any expirations were removed: From 168ed732b62f7427f2cab6eced8c0a7fca3310fb Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 8 Jul 2025 07:34:43 +0300 Subject: [PATCH 079/119] Apply suggestions from code review Co-authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com> Signed-off-by: Ran Shidlansik --- src/commands/hexpiretime.json | 2 +- src/commands/hpexpire.json | 2 +- src/commands/hpexpiretime.json | 2 +- src/t_hash.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/commands/hexpiretime.json b/src/commands/hexpiretime.json index 02dc9a81c1..5e7274a941 100644 --- a/src/commands/hexpiretime.json +++ b/src/commands/hexpiretime.json @@ -1,6 +1,6 @@ { "HEXPIRETIME": { - "summary": "Returns the Unix timestamps in seconds since the epoch at which the given key's field(s) will expire", + "summary": "Returns Unix timestamps in seconds since the epoch at which the given key's field(s) will expire", "complexity": "O(1) for each field, so O(N) for N items when the command is called with multiple fields.", "group": "hash", "since": "9.0.0", diff --git a/src/commands/hpexpire.json b/src/commands/hpexpire.json index 071b409fb6..b159c0a1e4 100644 --- a/src/commands/hpexpire.json +++ b/src/commands/hpexpire.json @@ -117,4 +117,4 @@ } ] } -} \ No newline at end of file +} diff --git a/src/commands/hpexpiretime.json b/src/commands/hpexpiretime.json index 1baf0fee0f..87367a6706 100644 --- a/src/commands/hpexpiretime.json +++ b/src/commands/hpexpiretime.json @@ -40,7 +40,7 @@ "items": { "oneOf": [ { - "description": "Field does not exist in the provided hash key, or the hash key is empty", + "description": "Field does not exist in the provided hash key, or the hash key is empty.", "const": -2 }, { diff --git a/src/t_hash.c b/src/t_hash.c index 67b4126d76..69000512f4 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1626,7 +1626,7 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { result = -2; if (set_expired) { if (obj && hashTypeDelete(obj, c->argv[fields_index + i]->ptr)) { - /* In case we deleted the field, add it to the ew hdel command vector. */ + /* In case we deleted the field, add it to the new hdel command vector. */ new_argv[new_argc++] = c->argv[fields_index + i]; incrRefCount(c->argv[fields_index + i]); result = 2; From 8caf1d5df2cc0e40bdb4946269e38a7235069f44 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 8 Jul 2025 07:39:49 +0300 Subject: [PATCH 080/119] remove NX/XX options for HSETEX :( Signed-off-by: Ran Shidlansik --- src/commands.def | 11 ++--------- src/commands/hsetex.json | 17 ----------------- src/t_hash.c | 6 ------ 3 files changed, 2 insertions(+), 32 deletions(-) diff --git a/src/commands.def b/src/commands.def index d7ea81a2f9..48ff63c95e 100644 --- a/src/commands.def +++ b/src/commands.def @@ -4175,12 +4175,6 @@ keySpec HSETEX_Keyspecs[1] = { }; #endif -/* HSETEX key_condition argument table */ -struct COMMAND_ARG HSETEX_key_condition_Subargs[] = { -{MAKE_ARG("nx",ARG_TYPE_PURE_TOKEN,-1,"NX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("xx",ARG_TYPE_PURE_TOKEN,-1,"XX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, -}; - /* HSETEX fields_condition argument table */ struct COMMAND_ARG HSETEX_fields_condition_Subargs[] = { {MAKE_ARG("fnx",ARG_TYPE_PURE_TOKEN,-1,"FNX",NULL,NULL,CMD_ARG_NONE,0,NULL)}, @@ -4211,7 +4205,6 @@ struct COMMAND_ARG HSETEX_fields_Subargs[] = { /* HSETEX argument table */ struct COMMAND_ARG HSETEX_Args[] = { {MAKE_ARG("key",ARG_TYPE_KEY,0,NULL,NULL,NULL,CMD_ARG_NONE,0,NULL)}, -{MAKE_ARG("key-condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=HSETEX_key_condition_Subargs}, {MAKE_ARG("fields-condition",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=HSETEX_fields_condition_Subargs}, {MAKE_ARG("expiration",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,5,NULL),.subargs=HSETEX_expiration_Subargs}, {MAKE_ARG("fields",ARG_TYPE_BLOCK,-1,"FIELDS",NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=HSETEX_fields_Subargs}, @@ -11657,7 +11650,7 @@ struct COMMAND_STRUCT serverCommandTable[] = { {MAKE_CMD("hexists","Determines whether a field exists in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXISTS_History,0,HEXISTS_Tips,0,hexistsCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXISTS_Keyspecs,1,NULL,2),.args=HEXISTS_Args}, {MAKE_CMD("hexpire","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRE_History,0,HEXPIRE_Tips,0,hexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRE_Keyspecs,1,NULL,4),.args=HEXPIRE_Args}, {MAKE_CMD("hexpireat","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIREAT_History,0,HEXPIREAT_Tips,0,hexpireAtCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIREAT_Keyspecs,1,NULL,4),.args=HEXPIREAT_Args}, -{MAKE_CMD("hexpiretime","Returns the Unix timestamps in seconds since the epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,2),.args=HEXPIRETIME_Args}, +{MAKE_CMD("hexpiretime","Returns Unix timestamps in seconds since the epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,2),.args=HEXPIRETIME_Args}, {MAKE_CMD("hget","Returns the value of a field in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGET_History,0,HGET_Tips,0,hgetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGET_Keyspecs,1,NULL,2),.args=HGET_Args}, {MAKE_CMD("hgetall","Returns all fields and values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETALL_History,0,HGETALL_Tips,1,hgetallCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HGETALL_Keyspecs,1,NULL,1),.args=HGETALL_Args}, {MAKE_CMD("hgetex","Get the value of one or more fields of a given hash key, and optionally set their expiration time or time-to-live (TTL).","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETEX_History,0,HGETEX_Tips,0,hgetexCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HGETEX_Keyspecs,1,NULL,3),.args=HGETEX_Args}, @@ -11675,7 +11668,7 @@ struct COMMAND_STRUCT serverCommandTable[] = { {MAKE_CMD("hrandfield","Returns one or more random fields from a hash.","O(N) where N is the number of fields returned","6.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HRANDFIELD_History,0,HRANDFIELD_Tips,1,hrandfieldCommand,-2,CMD_READONLY,ACL_CATEGORY_HASH,HRANDFIELD_Keyspecs,1,NULL,2),.args=HRANDFIELD_Args}, {MAKE_CMD("hscan","Iterates over fields and values of a hash.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSCAN_History,0,HSCAN_Tips,1,hscanCommand,-3,CMD_READONLY,ACL_CATEGORY_HASH,HSCAN_Keyspecs,1,NULL,5),.args=HSCAN_Args}, {MAKE_CMD("hset","Creates or modifies the value of a field in a hash.","O(1) for each field/value pair added, so O(N) to add N field/value pairs when the command is called with multiple field/value pairs.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSET_History,1,HSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSET_Keyspecs,1,NULL,2),.args=HSET_Args}, -{MAKE_CMD("hsetex","Set the value of one or more fields of a given hash key, and optionally set their expiration time.","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSETEX_History,0,HSETEX_Tips,0,hsetexCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSETEX_Keyspecs,1,NULL,5),.args=HSETEX_Args}, +{MAKE_CMD("hsetex","Set the value of one or more fields of a given hash key, and optionally set their expiration time.","O(1)","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSETEX_History,0,HSETEX_Tips,0,hsetexCommand,-6,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSETEX_Keyspecs,1,NULL,4),.args=HSETEX_Args}, {MAKE_CMD("hsetnx","Sets the value of a field in a hash only when the field doesn't exist.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSETNX_History,0,HSETNX_Tips,0,hsetnxCommand,4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSETNX_Keyspecs,1,NULL,3),.args=HSETNX_Args}, {MAKE_CMD("hstrlen","Returns the length of the value of a field.","O(1)","3.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSTRLEN_History,0,HSTRLEN_Tips,0,hstrlenCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HSTRLEN_Keyspecs,1,NULL,2),.args=HSTRLEN_Args}, {MAKE_CMD("httl","Returns the remaining time to live (in seconds) of a hash key's field(s) that have an associated expiration.","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HTTL_History,0,HTTL_Tips,0,httlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HTTL_Keyspecs,1,NULL,2),.args=HTTL_Args}, diff --git a/src/commands/hsetex.json b/src/commands/hsetex.json index 34976cc5a1..7e1df6ead0 100644 --- a/src/commands/hsetex.json +++ b/src/commands/hsetex.json @@ -52,23 +52,6 @@ "type": "key", "key_spec_index": 0 }, - { - "name": "key-condition", - "type": "oneof", - "optional": true, - "arguments": [ - { - "name": "nx", - "type": "pure-token", - "token": "NX" - }, - { - "name": "xx", - "type": "pure-token", - "token": "XX" - } - ] - }, { "name": "fields-condition", "type": "oneof", diff --git a/src/t_hash.c b/src/t_hash.c index 69000512f4..a34423bd41 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1187,12 +1187,6 @@ void hsetexCommand(client *c) { if (checkType(c, o, OBJ_HASH)) return; - /* Check for object existence condition */ - if ((flags & ARGS_SET_NX && o) || (flags & ARGS_SET_XX && !o)) { - addReply(c, shared.czero); - return; - } - if (o == NULL) { o = createHashObject(); dbAdd(c->db, c->argv[1], &o); From c9fc597fc8d3f76e6136512d19fd50194cd35193 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 8 Jul 2025 12:54:59 +0300 Subject: [PATCH 081/119] Change hrandfield to return expired field if no other field was found Signed-off-by: Ran Shidlansik --- src/t_hash.c | 17 +++++++++++++---- tests/unit/hashexpire.tcl | 8 ++------ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index a34423bd41..690d0b5ad2 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -835,18 +835,27 @@ void hashReplyFromListpackEntry(client *c, listpackEntry *e) { * 'val' can be NULL in which case it's not extracted. */ static void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpackEntry *field, listpackEntry *val) { if (hashobj->encoding == OBJ_ENCODING_HASHTABLE) { - void *e = NULL; + void *e = NULL, *max_expired_entry = NULL; + long long max_expired_time = EXPIRY_NONE; int maxtries = 100; hashTypeIgnoreTTL(hashobj, true); while (!e) { hashtableFairRandomEntry(hashobj->ptr, &e); if (entryIsExpired(e) && --maxtries) { + long long entry_expired_time = entryGetExpiry(e); + /* in case we will not be able to locate an entry which is not expired, we ends up returning + * an expired entry. this is somewhat aligned with the way generic keys are handled. + * We would, however try and use the entry with the latest expiration time. */ + if (!max_expired_entry || max_expired_time < entry_expired_time) { + max_expired_entry = e; + max_expired_time = entry_expired_time; + } e = NULL; continue; } else if (maxtries == 0) { - field->sval = NULL; - if (val) val->sval = NULL; - break; + /* we exhausted all attempts to locate non-expired entry. + * We thus use the best one we found. */ + e = max_expired_entry; } sds sds_field = entryGetField(e); field->sval = (unsigned char *)sds_field; diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 2a64472677..3bbeee67c7 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1552,9 +1552,7 @@ start_server {tags {"hashexpire external:skip"}} { } else { fail "Hash is showing expired elements" } - # check that we do get a response even though it is expired - assert_match {} [r hrandfield myhash -1] - + # Now write a persistent element assert_equal {1} [r HSET myhash f5 v5] # make sure this is the element we will get all the time @@ -1620,9 +1618,7 @@ start_server {tags {"hashexpire external:skip"}} { } else { fail "Hash is showing expired elements" } - # check that we get an empty response even though there are expired fields - assert_match {} [r hrandfield myhash 2] - + # Now write a persistent elements assert_equal {3} [r HSET myhash f8 v8 f9 v9 f10 v10] # make sure this is the elements we will get all the time From 0d1c48eb9c3898bd64478f0291e98cdb31a96674 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 8 Jul 2025 16:01:33 +0300 Subject: [PATCH 082/119] Update src/t_hash.c Co-authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com> Signed-off-by: Ran Shidlansik --- src/t_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_hash.c b/src/t_hash.c index 690d0b5ad2..e0f5edf87b 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -843,7 +843,7 @@ static void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpac hashtableFairRandomEntry(hashobj->ptr, &e); if (entryIsExpired(e) && --maxtries) { long long entry_expired_time = entryGetExpiry(e); - /* in case we will not be able to locate an entry which is not expired, we ends up returning + /* in case we will not be able to locate an entry which is not expired, we endsup returning * an expired entry. this is somewhat aligned with the way generic keys are handled. * We would, however try and use the entry with the latest expiration time. */ if (!max_expired_entry || max_expired_time < entry_expired_time) { From 03ff1c5627737cf577be4a1a363d17c68fc943f4 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 8 Jul 2025 19:45:30 +0300 Subject: [PATCH 083/119] Update src/t_hash.c Co-authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com> Signed-off-by: Ran Shidlansik --- src/t_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_hash.c b/src/t_hash.c index e0f5edf87b..790fb14219 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -843,7 +843,7 @@ static void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpac hashtableFairRandomEntry(hashobj->ptr, &e); if (entryIsExpired(e) && --maxtries) { long long entry_expired_time = entryGetExpiry(e); - /* in case we will not be able to locate an entry which is not expired, we endsup returning + /* in case we will not be able to locate an entry which is not expired, we end up returning * an expired entry. this is somewhat aligned with the way generic keys are handled. * We would, however try and use the entry with the latest expiration time. */ if (!max_expired_entry || max_expired_time < entry_expired_time) { From 340991facf84f3ed916700c8c1454483ef350e27 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 10 Jul 2025 08:39:55 +0300 Subject: [PATCH 084/119] address pr comments Signed-off-by: Ran Shidlansik --- src/commands/hexpiretime.json | 2 +- src/commands/hpexpiretime.json | 2 +- src/commands/hpttl.json | 2 +- src/commands/httl.json | 2 +- src/entry.c | 1 + src/rdb.c | 2 +- src/t_hash.c | 2 +- tests/unit/hashexpire.tcl | 22 ++++++++++------------ 8 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/commands/hexpiretime.json b/src/commands/hexpiretime.json index 5e7274a941..82c4d5c70e 100644 --- a/src/commands/hexpiretime.json +++ b/src/commands/hexpiretime.json @@ -48,7 +48,7 @@ "const": -1 }, { - "description": "The expiration time associated with the hash key field, is seconds.", + "description": "The expiration time associated with the hash key field, in seconds.", "type": "integer", "minimum": 0 } diff --git a/src/commands/hpexpiretime.json b/src/commands/hpexpiretime.json index 87367a6706..6a2be6a22f 100644 --- a/src/commands/hpexpiretime.json +++ b/src/commands/hpexpiretime.json @@ -48,7 +48,7 @@ "const": -1 }, { - "description": "The expiration time associated with the hash key field, is milliseconds.", + "description": "The expiration time associated with the hash key field, in milliseconds.", "type": "integer", "minimum": 0 } diff --git a/src/commands/hpttl.json b/src/commands/hpttl.json index 611116e141..9c7cced256 100644 --- a/src/commands/hpttl.json +++ b/src/commands/hpttl.json @@ -48,7 +48,7 @@ "const": -1 }, { - "description": "The expiration time associated with the hash key field, is milliseconds.", + "description": "The expiration time associated with the hash key field, in milliseconds.", "type": "integer", "minimum": 0 } diff --git a/src/commands/httl.json b/src/commands/httl.json index b80570dd3f..6d3ab789a7 100644 --- a/src/commands/httl.json +++ b/src/commands/httl.json @@ -48,7 +48,7 @@ "const": -1 }, { - "description": "The expiration time associated with the hash key field, is seconds.", + "description": "The expiration time associated with the hash key field, in seconds.", "type": "integer", "minimum": 0 } diff --git a/src/entry.c b/src/entry.c index a9e08593b7..1fcc1730f6 100644 --- a/src/entry.c +++ b/src/entry.c @@ -127,6 +127,7 @@ long long entryGetExpiry(const entry *entry) { entry *entrySetExpiry(entry *e, long long expiry) { if (entryHasExpiry(e)) { char *buf = sdsAllocPtr(e); + debugServerAssert((((uintptr_t)buf & 0x7) == 0)); /* Test that the allocation is indeed 8 bytes aligned */ if (entryHasValuePtr(e)) buf -= sizeof(sds); buf -= sizeof(expiry); memcpy(buf, &expiry, sizeof(expiry)); diff --git a/src/rdb.c b/src/rdb.c index 5f889ea710..cd9d7a4740 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2197,7 +2197,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { long long itemexpiry = EXPIRY_NONE; if (rdbtype == RDB_TYPE_HASH_2) { itemexpiry = rdbLoadMillisecondTime(rdb, RDB_VERSION); - if (itemexpiry == LLONG_MAX && rioGetReadError(rdb)) return NULL; + if (itemexpiry < EXPIRY_NONE || rioGetReadError(rdb)) return NULL; } /* Add pair to hash table */ diff --git a/src/t_hash.c b/src/t_hash.c index 790fb14219..c371c9da0a 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -69,7 +69,7 @@ bool hashTypeHasVolatileElements(robj *o) { /* make any access to the hash object elements ignore the specific elements expiration. * This is mainly in order to be able to access hash elements which are already expired. */ -void hashTypeIgnoreTTL(robj *o, bool ignore) { +static inline void hashTypeIgnoreTTL(robj *o, bool ignore) { if (o->encoding == OBJ_ENCODING_HASHTABLE) { /* prevent placing access function if not needed */ if (!ignore && !hashTypeHasVolatileElements(o)) { diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 3bbeee67c7..07d10fe6fb 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -5,7 +5,7 @@ proc info_field {info field} { return [string trim [lindex [split $line ":"] 1]] } } - return "" + return [s field_name] } proc assert_keyevent_pattern {rd event_type key} { @@ -13,7 +13,7 @@ proc assert_keyevent_pattern {rd event_type key} { assert_match "pmessage __keyevent@* __keyevent@*:$event_type $key" $event } -start_server {tags {"hashexpire external:skip"}} { +start_server {tags {"hashexpire"}} { ####### Valid scenarios tests ####### foreach command {EX PX EXAT PXAT} { test "HGETEX $command expiry" { @@ -257,19 +257,17 @@ start_server {tags {"hashexpire external:skip"}} { } ####### Invalid scenarios tests ####### - test {HGETEX EX- multiple options used (EX + PX)} { + test {HGETEX EX- multiple options used (EX + PX)} { r FLUSHALL r HSET myhash f1 v1 - catch {r HGETEX myhash EX 60 PX 1000 FIELDS 1 f1} e - set e - } {ERR *} + assert_error "ERR*" {r HGETEX myhash EX 60 PX 1000 FIELDS 1 f1} + } test {HGETEX EXAT- multiple options used (EXAT + PXAT)} { r FLUSHALL r HSET myhash f1 v1 - catch {r HGETEX myhash EXAT [expr {[clock seconds] + 100}] PXAT [expr {[clock milliseconds] + 100000}] 1000 FIELDS 1 f1} e - set e - } {ERR *} + assert_error "ERR*" {r HGETEX myhash EXAT [expr {[clock seconds] + 100}] PXAT [expr {[clock milliseconds] + 100000}] 1000 FIELDS 1 f1} + } # Common error scenarios for all commands foreach {cmd ttl_val} [list \ @@ -349,7 +347,7 @@ start_server {tags {"hashexpire external:skip"}} { } ## HGETEX -> Keyspace notification tests #### -start_server {tags {"hashexpire external:skip"}} { +start_server {tags {"hashexpire"}} { if {$::singledb} { set db 0 } else { @@ -547,7 +545,7 @@ start_server {tags {"hashexpire external:skip"}} { } # HSETEX #### -start_server {tags {"hashexpire external:skip"}} { +start_server {tags {"hashexpire"}} { test {HSETEX KEEPTTL - preserves existing TTL of field} { r FLUSHALL @@ -1634,7 +1632,7 @@ start_server {tags {"hashexpire external:skip"}} { ####### Test info -start_server {tags {"hash-ttl-info external:skip"}} { +start_server {tags {"hash-ttl-info"}} { test {Hash ttl - check command stats} { r FLUSHALL From 56ddc5f02c36cabe08aac6e621d462d8704ce7d6 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 10 Jul 2025 08:53:31 +0300 Subject: [PATCH 085/119] still skip external server run for command stats test Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 07d10fe6fb..5d63bc7058 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1632,7 +1632,7 @@ start_server {tags {"hashexpire"}} { ####### Test info -start_server {tags {"hash-ttl-info"}} { +start_server {tags {"hash-ttl-info external:skip"}} { test {Hash ttl - check command stats} { r FLUSHALL From 381f5f145066de6a50e31c2e8084eb0a7c51e261 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 10 Jul 2025 09:03:17 +0300 Subject: [PATCH 086/119] document aligned expiry access debugassert Signed-off-by: Ran Shidlansik --- src/entry.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/entry.c b/src/entry.c index 1fcc1730f6..dccfa61c61 100644 --- a/src/entry.c +++ b/src/entry.c @@ -115,7 +115,9 @@ long long entryGetExpiry(const entry *entry) { long long expiry = EXPIRY_NONE; if (entryHasExpiry(entry)) { char *buf = sdsAllocPtr(entry); - debugServerAssert((((uintptr_t)buf & 0x7) == 0)); /* Test that the allocation is indeed 8 bytes aligned */ + debugServerAssert((((uintptr_t)buf & 0x7) == 0)); /* Test that the allocation is indeed 8 bytes aligned + * This is needed since we access the expiry as with pointer casting + * which require the access to be 8 bytes aligned. */ if (entryHasValuePtr(entry)) buf -= sizeof(sds); buf -= sizeof(long long); expiry = *(long long *)buf; @@ -127,10 +129,12 @@ long long entryGetExpiry(const entry *entry) { entry *entrySetExpiry(entry *e, long long expiry) { if (entryHasExpiry(e)) { char *buf = sdsAllocPtr(e); - debugServerAssert((((uintptr_t)buf & 0x7) == 0)); /* Test that the allocation is indeed 8 bytes aligned */ + debugServerAssert((((uintptr_t)buf & 0x7) == 0)); /* Test that the allocation is indeed 8 bytes aligned + * This is needed since we access the expiry as with pointer casting + * which require the access to be 8 bytes aligned. */ if (entryHasValuePtr(e)) buf -= sizeof(sds); buf -= sizeof(expiry); - memcpy(buf, &expiry, sizeof(expiry)); + *(long long *)buf = expiry; return e; } entry *new_entry = entryUpdate(e, NULL, expiry); From 341bb9cf0f12a07c8617817addac71792f00f67c Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 10 Jul 2025 09:54:04 +0300 Subject: [PATCH 087/119] separate tests which require disable active-expiry Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 511 +++++++++++++++++++------------------- 1 file changed, 254 insertions(+), 257 deletions(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 5d63bc7058..8171b1873b 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -732,263 +732,6 @@ start_server {tags {"hashexpire"}} { set e } {ERR *} - #################### Lazy Expiry ######################## - - test {HGETALL skips expired fields without triggering lazy expiry} { - r FLUSHALL - r DEBUG SET-ACTIVE-EXPIRE no - - # Set two fields: one persistent, one with short TTL - r HSET myhash persistent "val1" - r HSETEX myhash PX 5 FIELDS 1 expiring "val2" - - # Wait for expiry to pass - after 10 - - # HGETALL should skip expired field - set result [r HGETALL myhash] - assert_equal {persistent val1} $result - - # HLEN should still count both fields (expired field not removed) - assert_equal 2 [r HLEN myhash] - - # Re-enable active expiry - r DEBUG SET-ACTIVE-EXPIRE yes - } - - test {HSCAN skips expired fields} { - r FLUSHALL - r DEBUG SET-ACTIVE-EXPIRE no - - # Set multiple fields, one with expiry - r HSET myhash persistent1 "a" persistent2 "b" - r HSETEX myhash PX 5 FIELDS 1 expiring "c" - - # Wait for expiration - after 10 - - # HSCAN must not return the expired field - set cursor 0 - set allfields {} - while {1} { - set res [r HSCAN myhash $cursor] - set cursor [lindex $res 0] - set kvs [lindex $res 1] - lappend allfields {*}$kvs - if {$cursor eq "0"} break - } - - # Extract just the field names - set fieldnames [lmap {k v} $allfields { set k }] - set fieldnames_sorted [lsort $fieldnames] - - # Should only include persistent1 and persistent2 - assert_equal {persistent1 persistent2} $fieldnames_sorted - - # Re-enable active expiry for future tests - r DEBUG SET-ACTIVE-EXPIRE yes - } - - test {MOVE preserves field TTLs} { - r FLUSHALL - r SELECT 0 - r HSETEX myhash PX 50000 FIELDS 1 field1 val1 - - # Capture original TTL - set original_ttl [r HPTTL myhash FIELDS 1 field1] - assert {$original_ttl > 0} - - # Move to DB 1 - assert_equal 1 [r MOVE myhash 1] - - # Switch to target DB - r SELECT 1 - - # Field must exist and TTL must be preserved - set moved_ttl [r HPTTL myhash FIELDS 1 field1] - assert {$moved_ttl > 0 && $moved_ttl <= $original_ttl} - } - - test {HSET - overwrite lazily expired field without TTL clears expiration} { - r FLUSHALL - r debug SET-ACTIVE-EXPIRE no - - # This test verifies that if a field has expired (but not yet lazily deleted), - # and it is overwritten using a plain HSET (i.e., no TTL), - # Valkey treats the field as non existing and updates it, - # effectively clearing the old TTL and making the field persistent. - - r HSETEX myhash PX 10 FIELDS 1 field1 oldval - wait_for_condition 100 100 { - [r HTTL myhash FIELDS 1 field1] eq "-2" - } else { - fail "hash value was not expired after timeout" - } - - # Field should still be present in memory due to lazy expiry - assert_equal 1 [r HLEN myhash] - - # Overwrite with HSET (no TTL) before accessing - r HSET myhash field1 newval - - # TTL should now be gone; field becomes persistent - set ttl [r HPTTL myhash FIELDS 1 field1] - assert_equal -1 $ttl - assert_equal newval [r HGET myhash field1] - assert_equal 1 [r HLEN myhash] - - r debug SET-ACTIVE-EXPIRE yes - } - - test {HINCRBY - on expired field} { - r FLUSHALL - r debug SET-ACTIVE-EXPIRE no - - # This test verifies that if a field has expired, - # and it is overwritten using a plain HINCRBY (i.e., no TTL), - # Valkey treats the field as still existing and updates it, - # effectively clearing the old TTL and starting the value from 0. - - r HSETEX myhash PX 10 FIELDS 1 field1 1 - wait_for_condition 100 100 { - [r HTTL myhash FIELDS 1 field1] eq "-2" - } else { - fail "hash value was not expired after timeout" - } - - # Field should still be present in memory - assert_equal 1 [r HLEN myhash] - - # Overwrite with HINCRBY (no TTL) before accessing - r HINCRBY myhash field1 1 - - # Sanity check: check we only have one field in the hash - assert_equal 1 [r HLEN myhash] - - # TTL should now be gone; field becomes persistent - set ttl [r HPTTL myhash FIELDS 1 field1] - assert_equal -1 $ttl - assert_equal 1 [r HGET myhash field1] - assert_equal 1 [r HLEN myhash] - - # set expiration on the field - assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] - # verify the field has TTL - assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 - # now incr the field again - assert_equal 2 [r HINCRBY myhash field1 1] - # verify the field has TTL - assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 - r debug SET-ACTIVE-EXPIRE yes - } - - test {HINCRBYFLOAT - on expired field} { - r FLUSHALL - r debug SET-ACTIVE-EXPIRE no - - # This test verifies that if a field has expired, - # and it is overwritten using a plain HINCRBYFLOAT (i.e., no TTL), - # Valkey treats the field as still existing and updates it, - # effectively clearing the old TTL and starting the value from 0. - - r HSETEX myhash PX 10 FIELDS 1 field1 1 - wait_for_condition 100 100 { - [r HTTL myhash FIELDS 1 field1] eq "-2" - } else { - fail "hash value was not expired after timeout" - } - - # Field should still be present in memory - assert_equal 1 [r HLEN myhash] - - # Overwrite with HINCRBYFLOAT (no TTL) before accessing - r HINCRBYFLOAT myhash field1 1 - - # Sanity check: check we only have one field in the hash - assert_equal 1 [r HLEN myhash] - - # TTL should now be gone; field becomes persistent - set ttl [r HPTTL myhash FIELDS 1 field1] - assert_equal -1 $ttl - assert_equal 1 [r HGET myhash field1] - assert_equal 1 [r HLEN myhash] - - # set expiration on the field - assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] - # verify the field has TTL - assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 - # now incr the field again - assert_equal 2 [r HINCRBYFLOAT myhash field1 1] - # verify the field has TTL - assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 - r debug SET-ACTIVE-EXPIRE yes - } - - test {HSET - overwrite unexpired field removes TTL} { - r FLUSHALL - r debug SET-ACTIVE-EXPIRE no - - # This test verifies that overwriting a field with HSET, - # even while its TTL is still valid (not expired), - # clears the TTL and makes the field persistent. - # This behavior is consistent with how HSET works for normal keys. - - # Set field with long TTL - r HSETEX myhash PX 1000 FIELDS 1 field1 val1 - - # Confirm TTL is active - set before [r HPTTL myhash FIELDS 1 field1] - assert {$before > 0} - - # Overwrite with HSET before TTL expires - r HSET myhash field1 newval - - # TTL should now be gone - set after [r HPTTL myhash FIELDS 1 field1] - assert_equal -1 $after - assert_equal newval [r HGET myhash field1] - - r debug SET-ACTIVE-EXPIRE yes - } - - test {HDEL - lazily expired field is removed without triggering expiry logic} { - r FLUSHALL - r debug SET-ACTIVE-EXPIRE no - - # This test proves that deleting an expired field with HDEL - # does NOT trigger Valkey's expiration mechanism. - # - # The key observation is that Valkey tracks how many fields were - # expired via TTL using the `expired_subkeys` counter in INFO stats. - # If HDEL caused expiration to be processed internally, - # this counter would increment. We assert that it remains unchanged. - - # Capture expired_subkeys before - set before_info [r INFO stats] - set before [info_field $before_info expired_subkeys] - - # Create field with short TTL - r HSETEX myhash PX 10 FIELDS 1 field1 val1 - after 20 - - # Field is technically expired, but still in-memory due to lazy expiry - assert_equal 1 [r HLEN myhash] - - # Delete the expired field directly - r HDEL myhash field1 - - # Field should be gone - assert_equal 0 [r HEXISTS myhash field1] - - # Capture expired_subkeys again - set after_info [r INFO stats] - set after [info_field $after_info expired_subkeys] - - # Verify that no expiry occurred internally - assert_equal $before $after - r debug SET-ACTIVE-EXPIRE yes - } - ###### Test EXPIRE ############# @@ -1630,6 +1373,260 @@ start_server {tags {"hashexpire"}} { } } +####### Expiry fields skip tests +start_server {tags {"hashexpire"}} { + test {HGETALL skips expired fields} { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + + # Set two fields: one persistent, one with short TTL + r HSET myhash persistent "val1" + r HSETEX myhash PX 5 FIELDS 1 expiring "val2" + + # Wait for expiry to pass + after 10 + + # HGETALL should skip expired field + set result [r HGETALL myhash] + assert_equal {persistent val1} $result + + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {} {needs:debug} + + test {HSCAN skips expired fields} { + r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no + + # Set multiple fields, one with expiry + r HSET myhash persistent1 "a" persistent2 "b" + r HSETEX myhash PX 5 FIELDS 1 expiring "c" + + # Wait for expiration + after 10 + + # HSCAN must not return the expired field + set cursor 0 + set allfields {} + while {1} { + set res [r HSCAN myhash $cursor] + set cursor [lindex $res 0] + set kvs [lindex $res 1] + lappend allfields {*}$kvs + if {$cursor eq "0"} break + } + + # Extract just the field names + set fieldnames [lmap {k v} $allfields { set k }] + set fieldnames_sorted [lsort $fieldnames] + + # Should only include persistent1 and persistent2 + assert_equal {persistent1 persistent2} $fieldnames_sorted + + # Re-enable active expiry for future tests + r DEBUG SET-ACTIVE-EXPIRE yes + } {} {needs:debug} + + test {MOVE preserves field TTLs} { + r FLUSHALL + r SELECT 0 + r HSETEX myhash PX 50000 FIELDS 1 field1 val1 + + # Capture original TTL + set original_ttl [r HPTTL myhash FIELDS 1 field1] + assert {$original_ttl > 0} + + # Move to DB 1 + assert_equal 1 [r MOVE myhash 1] + + # Switch to target DB + r SELECT 1 + + # Field must exist and TTL must be preserved + set moved_ttl [r HPTTL myhash FIELDS 1 field1] + assert {$moved_ttl > 0 && $moved_ttl <= $original_ttl} + } {} {needs:debug} + + test {HSET - overwrite expired field without TTL clears expiration} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired (but not yet lazily deleted), + # and it is overwritten using a plain HSET (i.e., no TTL), + # Valkey treats the field as non existing and updates it, + # effectively clearing the old TTL and making the field persistent. + + r HSETEX myhash PX 10 FIELDS 1 field1 oldval + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + + # Field should still be present in memory due to lazy expiry + assert_equal 1 [r HLEN myhash] + + # Overwrite with HSET (no TTL) before accessing + r HSET myhash field1 newval + + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal newval [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + r debug SET-ACTIVE-EXPIRE yes + } {} {needs:debug} + + test {HINCRBY - on expired field} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired, + # and it is overwritten using a plain HINCRBY (i.e., no TTL), + # Valkey treats the field as still existing and updates it, + # effectively clearing the old TTL and starting the value from 0. + + r HSETEX myhash PX 10 FIELDS 1 field1 1 + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + + # Field should still be present in memory + assert_equal 1 [r HLEN myhash] + + # Overwrite with HINCRBY (no TTL) before accessing + r HINCRBY myhash field1 1 + + # Sanity check: check we only have one field in the hash + assert_equal 1 [r HLEN myhash] + + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal 1 [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + # set expiration on the field + assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + # now incr the field again + assert_equal 2 [r HINCRBY myhash field1 1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + r debug SET-ACTIVE-EXPIRE yes + } {} {needs:debug} + + test {HINCRBYFLOAT - on expired field} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that if a field has expired, + # and it is overwritten using a plain HINCRBYFLOAT (i.e., no TTL), + # Valkey treats the field as still existing and updates it, + # effectively clearing the old TTL and starting the value from 0. + + r HSETEX myhash PX 10 FIELDS 1 field1 1 + wait_for_condition 100 100 { + [r HTTL myhash FIELDS 1 field1] eq "-2" + } else { + fail "hash value was not expired after timeout" + } + + # Field should still be present in memory + assert_equal 1 [r HLEN myhash] + + # Overwrite with HINCRBYFLOAT (no TTL) before accessing + r HINCRBYFLOAT myhash field1 1 + + # Sanity check: check we only have one field in the hash + assert_equal 1 [r HLEN myhash] + + # TTL should now be gone; field becomes persistent + set ttl [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $ttl + assert_equal 1 [r HGET myhash field1] + assert_equal 1 [r HLEN myhash] + + # set expiration on the field + assert_equal 1 [r HEXPIRE myhash 100000000 FIELDS 1 field1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + # now incr the field again + assert_equal 2 [r HINCRBYFLOAT myhash field1 1] + # verify the field has TTL + assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 + r debug SET-ACTIVE-EXPIRE yes + } {} {needs:debug} + + test {HSET - overwrite unexpired field removes TTL} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test verifies that overwriting a field with HSET, + # even while its TTL is still valid (not expired), + # clears the TTL and makes the field persistent. + # This behavior is consistent with how HSET works for normal keys. + + # Set field with long TTL + r HSETEX myhash PX 1000 FIELDS 1 field1 val1 + + # Confirm TTL is active + set before [r HPTTL myhash FIELDS 1 field1] + assert {$before > 0} + + # Overwrite with HSET before TTL expires + r HSET myhash field1 newval + + # TTL should now be gone + set after [r HPTTL myhash FIELDS 1 field1] + assert_equal -1 $after + assert_equal newval [r HGET myhash field1] + + r debug SET-ACTIVE-EXPIRE yes + } {} {needs:debug} + + test {HDEL - expired field is removed without triggering expiry logic} { + r FLUSHALL + r debug SET-ACTIVE-EXPIRE no + + # This test proves that deleting an expired field with HDEL + # does NOT trigger Valkey's expiration mechanism. + # + # The key observation is that Valkey tracks how many fields were + # expired via TTL using the `expired_subkeys` counter in INFO stats. + # If HDEL caused expiration to be processed internally, + # this counter would increment. We assert that it remains unchanged. + + # Capture expired_subkeys before + set before_info [r INFO stats] + set before [info_field $before_info expired_subkeys] + + # Create field with short TTL + r HSETEX myhash PX 10 FIELDS 1 field1 val1 + after 20 + + # Field is technically expired, but still in-memory due to lazy expiry + assert_equal 1 [r HLEN myhash] + + # Delete the expired field directly + r HDEL myhash field1 + + # Field should be gone + assert_equal 0 [r HEXISTS myhash field1] + + # Capture expired_subkeys again + set after_info [r INFO stats] + set after [info_field $after_info expired_subkeys] + + # Verify that no expiry occurred internally + assert_equal $before $after + r debug SET-ACTIVE-EXPIRE yes + } {} {needs:debug} +} ####### Test info start_server {tags {"hash-ttl-info external:skip"}} { From 98f7e1a8730304af74103bef2523cd9e5948f009 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 10 Jul 2025 10:16:00 +0300 Subject: [PATCH 088/119] fix tests return validation Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 8171b1873b..499ebab341 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1392,7 +1392,7 @@ start_server {tags {"hashexpire"}} { # Re-enable active expiry r DEBUG SET-ACTIVE-EXPIRE yes - } {} {needs:debug} + } {OK} {needs:debug} test {HSCAN skips expired fields} { r FLUSHALL @@ -1425,7 +1425,7 @@ start_server {tags {"hashexpire"}} { # Re-enable active expiry for future tests r DEBUG SET-ACTIVE-EXPIRE yes - } {} {needs:debug} + } {OK} {needs:debug} test {MOVE preserves field TTLs} { r FLUSHALL @@ -1476,7 +1476,7 @@ start_server {tags {"hashexpire"}} { assert_equal 1 [r HLEN myhash] r debug SET-ACTIVE-EXPIRE yes - } {} {needs:debug} + } {OK} {needs:debug} test {HINCRBY - on expired field} { r FLUSHALL @@ -1518,7 +1518,7 @@ start_server {tags {"hashexpire"}} { # verify the field has TTL assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 r debug SET-ACTIVE-EXPIRE yes - } {} {needs:debug} + } {OK} {needs:debug} test {HINCRBYFLOAT - on expired field} { r FLUSHALL @@ -1560,7 +1560,7 @@ start_server {tags {"hashexpire"}} { # verify the field has TTL assert_morethan [r HPTTL myhash FIELDS 1 field1] 0 r debug SET-ACTIVE-EXPIRE yes - } {} {needs:debug} + } {OK} {needs:debug} test {HSET - overwrite unexpired field removes TTL} { r FLUSHALL @@ -1587,7 +1587,7 @@ start_server {tags {"hashexpire"}} { assert_equal newval [r HGET myhash field1] r debug SET-ACTIVE-EXPIRE yes - } {} {needs:debug} + } {OK} {needs:debug} test {HDEL - expired field is removed without triggering expiry logic} { r FLUSHALL @@ -1625,7 +1625,7 @@ start_server {tags {"hashexpire"}} { # Verify that no expiry occurred internally assert_equal $before $after r debug SET-ACTIVE-EXPIRE yes - } {} {needs:debug} + } {OK} {needs:debug} } ####### Test info From 6b955fa3a5b64b15496659dd2acb739218040d42 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 10 Jul 2025 16:46:43 +0300 Subject: [PATCH 089/119] change back the hrandfield to return an empty response in case not found Signed-off-by: Ran Shidlansik --- src/t_hash.c | 19 ++++++------------- tests/unit/hashexpire.tcl | 7 ++++++- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index c371c9da0a..2150b3d23c 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -835,27 +835,20 @@ void hashReplyFromListpackEntry(client *c, listpackEntry *e) { * 'val' can be NULL in which case it's not extracted. */ static void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, listpackEntry *field, listpackEntry *val) { if (hashobj->encoding == OBJ_ENCODING_HASHTABLE) { - void *e = NULL, *max_expired_entry = NULL; - long long max_expired_time = EXPIRY_NONE; + void *e = NULL; int maxtries = 100; hashTypeIgnoreTTL(hashobj, true); while (!e) { hashtableFairRandomEntry(hashobj->ptr, &e); if (entryIsExpired(e) && --maxtries) { - long long entry_expired_time = entryGetExpiry(e); - /* in case we will not be able to locate an entry which is not expired, we end up returning - * an expired entry. this is somewhat aligned with the way generic keys are handled. - * We would, however try and use the entry with the latest expiration time. */ - if (!max_expired_entry || max_expired_time < entry_expired_time) { - max_expired_entry = e; - max_expired_time = entry_expired_time; - } e = NULL; continue; } else if (maxtries == 0) { - /* we exhausted all attempts to locate non-expired entry. - * We thus use the best one we found. */ - e = max_expired_entry; + /* in case we will not be able to locate an entry which is not expired, we will just not return any + * result. An alternative would have been that we end up returning an expired entry. */ + field->sval = NULL; + if (val) val->sval = NULL; + break; } sds sds_field = entryGetField(e); field->sval = (unsigned char *)sds_field; diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 499ebab341..c4c6757ae9 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -1293,7 +1293,9 @@ start_server {tags {"hashexpire"}} { } else { fail "Hash is showing expired elements" } - + # check that we get an empty response even though there are expired fields + assert_match {} [r hrandfield myhash 1] + # Now write a persistent element assert_equal {1} [r HSET myhash f5 v5] # make sure this is the element we will get all the time @@ -1360,6 +1362,9 @@ start_server {tags {"hashexpire"}} { fail "Hash is showing expired elements" } + # check that we get an empty response even though there are expired fields + assert_match {} [r hrandfield myhash 2] + # Now write a persistent elements assert_equal {3} [r HSET myhash f8 v8 f9 v9 f10 v10] # make sure this is the elements we will get all the time From 5314b19d7065c162ca62b0460e996bc3e343f14c Mon Sep 17 00:00:00 2001 From: Stav Ben-Tov <90314138+stav-bentov@users.noreply.github.com> Date: Sun, 13 Jul 2025 08:06:06 +0300 Subject: [PATCH 090/119] Add new tests and disable SET-ACTIVE-EXPIRE for some tests (#6) * Added tests to hashexpire.tcl * Disabled ACTIVE-EXPIRE for some tests in hashexpire.tcl * Fixed Replica Failover/Promotion to primary tests --------- Co-authored-by: Stav Bentov Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 234 ++++++++++++++++++++++++++++++++++---- 1 file changed, 211 insertions(+), 23 deletions(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index c4c6757ae9..8bffeeef38 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -18,6 +18,7 @@ start_server {tags {"hashexpire"}} { foreach command {EX PX EXAT PXAT} { test "HGETEX $command expiry" { r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no r HSET myhash f1 v1 # Configuration dictionary mapping expiry commands to their test parameters: @@ -47,7 +48,9 @@ start_server {tags {"hashexpire"}} { } after $wait_time assert_equal "" [r HGET myhash f1] - } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} test "HGETEX $command with mix of existing and non-existing fields" { r FLUSHALL @@ -74,6 +77,7 @@ start_server {tags {"hashexpire"}} { test "HGETEX $command on more then 1 field" { r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no r HSET myhash f1 v1 f2 v2 set config [dict create \ @@ -101,7 +105,9 @@ start_server {tags {"hashexpire"}} { after $wait_time assert_equal "" [r HGET myhash f1] assert_equal "" [r HGET myhash f2] - } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} test "HGETEX $command -> PERSIST" { r FLUSHALL @@ -227,6 +233,7 @@ start_server {tags {"hashexpire"}} { } test "HGETEX $command overwrites existing field TTL with smaller value" { + r FLUSHALL set config [dict create \ EX [list setup_cmd EX setup_val 100000 smaller_val 50000] \ PX [list setup_cmd PX setup_val 100000000 smaller_val 50000000] \ @@ -238,7 +245,6 @@ start_server {tags {"hashexpire"}} { set setup_val [dict get $params setup_val] set smaller_val [dict get $params smaller_val] - r FLUSHALL r HSETEX myhash $setup_cmd $setup_val FIELDS 1 f1 v1 set old_ttl [r HTTL myhash FIELDS 1 f1] r HGETEX myhash $command $smaller_val FIELDS 1 f1 @@ -376,7 +382,6 @@ start_server {tags {"hashexpire"}} { set rd [valkey_deferring_client] assert_equal {1} [psubscribe $rd __keyevent@*] - r HGETEX myhash $command $expire_time FIELDS 1 f1 assert_keyevent_pattern $rd hexpire myhash @@ -409,9 +414,9 @@ start_server {tags {"hashexpire"}} { # This HGETEX targets a non-existent field, so no notification about hexpire should be emitted r HGETEX myhash $command $expire_time FIELDS 1 f2 - # # Verify no notification (getting hset and not hexpire) - # r HSET dummy dummy dummy - # assert_keyevent_pattern $rd hset dummy + # Verify no notification (getting hset and not hexpire) + r HSET dummy dummy dummy + assert_keyevent_pattern $rd hset dummy $rd close } @@ -430,8 +435,6 @@ start_server {tags {"hashexpire"}} { assert_keyevent_pattern $rd hpersist myhash $rd close } - - foreach command {EX PX EXAT PXAT} { set config [dict create \ @@ -825,6 +828,15 @@ start_server {tags {"hashexpire"}} { assert {$ttl >= 2} } + # HEXPIRE on a non-existent field + test {HEXPIRE on a non-existent field (should not create field)} { + r FLUSHALL + r HSET myhash f1 v1 + r HEXPIRE myhash 1000 FIELDS 1 f2 + assert_equal 0 [r HEXISTS myhash f2] + assert_equal -2 [r HTTL myhash FIELDS 1 f2] + } + # Error Cases test {HEXPIRE - conflicting conditions error} { r FLUSHALL @@ -1282,6 +1294,15 @@ start_server {tags {"hashexpire"}} { # f4 does not exist assert_equal {1 -1 -2} [r hpersist myhash FIELDS 3 f1 f2 f4] } + + test {HPERSIST, then HEXPIRE, check new TTL is set} { + r FLUSHALL + r HSET myhash f1 v1 + r HEXPIRE myhash 1000 FIELDS 1 f1 + assert_equal 1 [r HPERSIST myhash FIELDS 1 f1] + r HEXPIRE myhash 2000 FIELDS 1 f1 + assert_morethan [r HTTL myhash FIELDS 1 f1] 1000 + } #################### HRANDFIELD ################## @@ -1631,6 +1652,17 @@ start_server {tags {"hashexpire"}} { assert_equal $before $after r debug SET-ACTIVE-EXPIRE yes } {OK} {needs:debug} + + test {HDEL on field with TTL, then re-add and check TTL is gone} { + r FLUSHALL + r HSET myhash f1 v1 + r HEXPIRE myhash 10000 FIELDS 1 f1 + assert_morethan [r HTTL myhash FIELDS 1 f1] 0 + r HDEL myhash f1 + r HSET myhash f1 v2 + assert_equal -1 [r HTTL myhash FIELDS 1 f1] + } + } ####### Test info @@ -1896,11 +1928,11 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal {1} [psubscribe $rd_primary __keyevent@*] assert_equal {1} [psubscribe $rd_replica_1 __keyevent@*] - # Create hash and timing - f1 < f2 < f3 expiry times - set f1_exp [expr {[clock seconds] + 10000}] # Setup hash, set expire and set expire 0 $primary HSET myhash f1 v1 f2 v2 ;# Should trigger 3 hset + # Create hash and timing - f1 < f2 expiry times + set f1_exp [expr {[clock seconds] + 10000}] $primary HEXPIREAT myhash $f1_exp FIELDS 1 f1 ;# Should trigger 3 hexpire wait_for_ofs_sync $primary $replica_1 @@ -1997,8 +2029,10 @@ start_server {tags {"hashexpire external:skip"}} { } } - test {Replica Failover/Promotion to Primary} { + test {Replica Failover} { $primary FLUSHALL + $primary DEBUG SET-ACTIVE-EXPIRE no + $replica_1 DEBUG SET-ACTIVE-EXPIRE no ####### Replication setup ####### $replica_1 replicaof $primary_host $primary_port wait_for_condition 50 100 { @@ -2059,25 +2093,149 @@ start_server {tags {"hashexpire external:skip"}} { fail "f1 not expired" } - # Verify expiry + # Verify expiry in replica assert_equal "" [$replica_1 HGET myhash f1] assert_equal 3 [$replica_1 HLEN myhash] + + # Verify no expiry in primary + assert_equal "v1" [$primary HGET myhash f1] + # Change TTL of f2 $replica_1 HEXPIRE myhash 1000000 FIELDS 1 f2 ;# will trigger hexpire assert_morethan [$replica_1 HTTL myhash FIELDS 1 f2] 9000 + assert_equal $f2_exp [$primary HEXPIRETIME myhash FIELDS 1 f2] + # Change TTL of f2 to 0 (immediate expiry) $replica_1 HGETEX myhash EX 0 FIELDS 1 f2 ;# will trigger hexpired # Verify final state assert_equal 2 [$replica_1 HLEN myhash] assert_equal "{} {} v3" [$replica_1 HGETEX myhash FIELDS 3 f1 f2 f3] + assert_equal "v1 v2 v3" [$primary HGETEX myhash FIELDS 3 f1 f2 f3] ;# No change for primary assert_keyevent_pattern $rd_replica hexpire myhash assert_keyevent_pattern $rd_replica hexpire myhash assert_keyevent_pattern $rd_replica hexpired myhash $rd_replica close - } + # Re-enable active expiry + $primary DEBUG SET-ACTIVE-EXPIRE yes + $replica_1 DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} + + test {Promotion to primary} { + $primary FLUSHALL + $primary DEBUG SET-ACTIVE-EXPIRE no + $replica_1 DEBUG SET-ACTIVE-EXPIRE no + ####### Replication setup ####### + $replica_1 replicaof $primary_host $primary_port + wait_for_condition 50 100 { + [lindex [$replica_1 role] 0] eq {slave} && + [string match {*master_link_status:up*} [$replica_1 info replication]] + } else { + fail "Can't turn the instance into a replica" + } + + # Create hash fields with TTL on primary + set f1_exp [expr {[clock seconds] + 200}] + set f2_exp [expr {[clock seconds] + 300000}] + $primary HSET myhash f1 v1 f2 v2 f3 v3 + $primary HEXPIREAT myhash $f1_exp FIELDS 1 f1 + $primary HEXPIREAT myhash $f2_exp FIELDS 1 f2 + # f3 remains persistent + + # Wait for full sync + wait_for_ofs_sync $primary $replica_1 + + # Verify primary and replica are the same + foreach instance [list $primary $replica_1] { + assert_equal $f1_exp [$instance HEXPIRETIME myhash FIELDS 1 f1] + assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] + assert_equal -1 [$instance HTTL myhash FIELDS 1 f3] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] + assert_equal "v1" [$instance HGET myhash f1] + assert_equal "v2" [$instance HGET myhash f2] + assert_equal "v3" [$instance HGET myhash f3] + assert_equal 3 [$instance HLEN myhash] + } + + # Perform promotion to primary + $primary FAILOVER TO $replica_1_host $replica_1_port + # Wait for replica to become primary + wait_for_condition 100 100 { + [info_field [$replica_1 info replication] role] eq "master" + } else { + fail "Replica didn't become master" + } + + # Setup keyspace notifications + $primary config set notify-keyspace-events KEA + $replica_1 config set notify-keyspace-events KEA + set rd_primary [valkey_deferring_client -1] + set rd_replica_1 [valkey_deferring_client $replica_1_host $replica_1_port] + assert_equal {1} [psubscribe $rd_primary __keyevent@*] + assert_equal {1} [psubscribe $rd_replica_1 __keyevent@*] + + # Check all values that checked before are the same after the failover + foreach instance [list $primary $replica_1] { + assert_equal $f1_exp [$instance HEXPIRETIME myhash FIELDS 1 f1] + assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] + assert_equal -1 [$instance HTTL myhash FIELDS 1 f3] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] + assert_equal "v1 v2 v3" [$instance HMGET myhash f1 f2 f3] + assert_equal 3 [$instance HLEN myhash] + } + + # Set f1 to expire in 1 second and wait for expiration + $replica_1 HEXPIRE myhash 1 FIELDS 1 f1 ;# will trigger hexpire + wait_for_ofs_sync $replica_1 $primary + wait_for_condition 50 100 { + [$replica_1 HTTL myhash FIELDS 1 f1] eq -2 + } else { + fail "f1 not expired" + } + + # Verify replica and primary are sync + foreach instance [list $primary $replica_1] { + assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] + assert_equal -2 [$instance HTTL myhash FIELDS 1 f1] + assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] + assert_equal "" [$instance HGET myhash f1] + assert_equal "v2" [$instance HGET myhash f2] + assert_equal "v3" [$instance HGET myhash f3] + assert_equal 3 [$instance HLEN myhash] + } + + # Change TTL of f2 + $replica_1 HEXPIRE myhash 1000000 FIELDS 1 f2 ;# will trigger hexpire + wait_for_ofs_sync $replica_1 $primary + foreach instance [list $primary $replica_1] { + assert_morethan [$instance HTTL myhash FIELDS 1 f2] 9000 + } + + # Change TTL of f2 to 0 (immediate expiry) + $replica_1 HGETEX myhash EX 0 FIELDS 1 f2 ;# will trigger hexpired for replica_1 and hdel for primary + # Verify final state + foreach instance [list $primary $replica_1] { + assert_equal 2 [$instance HLEN myhash] + assert_equal "" [$instance HGET myhash f1] + assert_equal "" [$instance HGET myhash f2] + assert_equal "v3" [$instance HGET myhash f3] + } + + foreach rd [list $rd_replica_1 $rd_primary] { + assert_keyevent_pattern $rd hexpire myhash + assert_keyevent_pattern $rd hexpire myhash + } + assert_keyevent_pattern $rd_replica_1 hexpired myhash + assert_keyevent_pattern $rd_primary hdel myhash + + $rd_replica_1 close + $rd_primary close + # Re-enable active expiry + $primary DEBUG SET-ACTIVE-EXPIRE yes + $replica_1 DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} } } @@ -2099,6 +2257,8 @@ start_cluster 3 0 {tags {"cluster mytest external:skip"} overrides {cluster-node set key "{mymigrate}myhash" test {Hash with TTL fields migrates correctly between nodes} { + R 0 DEBUG SET-ACTIVE-EXPIRE no + R 1 DEBUG SET-ACTIVE-EXPIRE no # Create hash fields R 0 HSET $key f1 v1 f2 v2 f3 v3 @@ -2142,13 +2302,17 @@ start_cluster 3 0 {tags {"cluster mytest external:skip"} overrides {cluster-node assert_equal -2 [R 1 HTTL $key FIELDS 1 f1] $rd close - } + # Re-enable active expiry + R 0 DEBUG SET-ACTIVE-EXPIRE yes + R 1 DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} } start_server {tags {"hashexpire external:skip"}} { foreach cmd {RENAME RESTORE} { test "$cmd Preserves Field TTLs" { r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no r HSET myhash f1 v1 f2 v2 r HEXPIRE myhash 200 FIELDS 1 f1 @@ -2189,6 +2353,7 @@ start_server {tags {"hashexpire external:skip"}} { test {COPY Preserves TTLs} { r flushall + r DEBUG SET-ACTIVE-EXPIRE no # Create hash with fields r HSET myhash f1 v1 f3 v3 f4 v4 @@ -2256,10 +2421,13 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal "" [r HGET myhash f3] assert_equal "v1" [r HGET newhash2 f1] assert_equal "v3" [r HGET newhash2 f3] - } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} test {Hash Encoding Transitions with TTL - Add TTL to Existing Fields} { r flushall + r DEBUG SET-ACTIVE-EXPIRE no # Create small hash with listpack encoding r HSET myhash f1 v1 f2 v2 @@ -2279,10 +2447,13 @@ start_server {tags {"hashexpire external:skip"}} { # Veridy expiry assert_morethan [r HTTL myhash FIELDS 1 f1] 100 assert_equal -1 [r HTTL myhash FIELDS 1 f2] - } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} test {Hash Encoding Transitions with TTL - Create New Fields with TTL} { r flushall + r DEBUG SET-ACTIVE-EXPIRE no # Create small hash with listpack encoding r HSET myhash f1 v1 f2 v2 @@ -2309,7 +2480,9 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal -1 [r HTTL myhash FIELDS 1 "f$i"] } } - } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} } start_server {tags {"hashexpire external:skip"}} { @@ -2318,6 +2491,7 @@ start_server {tags {"hashexpire external:skip"}} { foreach time_unit {s, ms} { test "Key TTL expires before field TTL: entire hash should be deleted timeunit: $time_unit" { r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no r config set notify-keyspace-events KEA set rd [valkey_deferring_client] assert_equal {1} [psubscribe $rd __keyevent@*] @@ -2345,10 +2519,13 @@ start_server {tags {"hashexpire external:skip"}} { assert_keyevent_pattern $rd hexpire myhash assert_keyevent_pattern $rd expire myhash $rd close - } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} test "Field TTL expires before key TTL: only the specific field should expire: $time_unit" { r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no set rd [valkey_deferring_client] assert_equal {1} [psubscribe $rd __keyevent@*] @@ -2375,10 +2552,13 @@ start_server {tags {"hashexpire external:skip"}} { assert_keyevent_pattern $rd hset myhash assert_keyevent_pattern $rd hexpire myhash $rd close - } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} test "Key and field TTL expire simultaneously: entire hash should be deleted: $time_unit" { r FLUSHALL + r DEBUG SET-ACTIVE-EXPIRE no r HSET myhash f1 v1 f2 v2 f3 v3 assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] @@ -2406,10 +2586,13 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal "" [r HGET myhash f3] assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] assert_equal 0 [r HLEN myhash] - } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} test {Millisecond/Seconds precision} { r flushall + r DEBUG SET-ACTIVE-EXPIRE no r HSET myhash f1 v1 f2 v2 if {$time_unit eq "s"} { @@ -2422,7 +2605,9 @@ start_server {tags {"hashexpire external:skip"}} { after 1500 assert_equal 0 [r EXISTS myhash] - } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} } test {Ensure that key-level PERSIST on the key don't affect field TTL} { @@ -2446,6 +2631,7 @@ tags {"aof external:skip"} { start_server_aof [list dir $server_path] { test {TTL Persistence in AOF} { r flushall + r DEBUG SET-ACTIVE-EXPIRE no r config set appendonly yes r config set appendfsync always @@ -2537,6 +2723,8 @@ tags {"aof external:skip"} { assert_equal -1 [r HTTL myhash FIELDS 1 f$i] assert_equal v$i [r HGET myhash f$i] } - } + # Re-enable active expiry + r DEBUG SET-ACTIVE-EXPIRE yes + } {OK} {needs:debug} } } From 2b5d8b2a28339cda5b555f7dae62995bca9f01e6 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 13 Jul 2025 10:25:04 +0300 Subject: [PATCH 091/119] address PR comments Signed-off-by: Ran Shidlansik --- src/hashtable.c | 14 +++++++------- src/hashtable.h | 5 +++-- src/server.c | 2 +- src/t_hash.c | 8 ++++---- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/hashtable.c b/src/hashtable.c index 1af8086d55..1742c4842d 100644 --- a/src/hashtable.c +++ b/src/hashtable.c @@ -369,8 +369,8 @@ typedef struct { /* --- Internal functions --- */ /* --- Access API --- */ -static inline hashtableEntryValidationState validateElementIfNeeded(hashtable *ht, void *elem) { - if (ht->type->validateEntry == NULL) return ENTRY_VALID; +static inline bool validateElementIfNeeded(hashtable *ht, void *elem) { + if (ht->type->validateEntry == NULL) return true; return ht->type->validateEntry(ht, elem); } @@ -691,7 +691,7 @@ static inline int checkCandidateInBucket(hashtable *ht, bucket *b, int pos, cons if (compareKeys(ht, key, elem_key) == 0) { /* It's a match. */ assert(pos_in_bucket != NULL); - if (validateElementIfNeeded(ht, entry) != ENTRY_VALID) { + if (!validateElementIfNeeded(ht, entry)) { return 0; } *pos_in_bucket = pos; @@ -1786,7 +1786,7 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f if (b->presence != 0) { int pos; for (pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { - if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos]) == ENTRY_VALID) { + if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos])) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; fn(privdata, emit); } @@ -1828,7 +1828,7 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f do { if (b->presence) { for (int pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { - if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos]) == ENTRY_VALID) { + if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos])) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; fn(privdata, emit); } @@ -1858,7 +1858,7 @@ size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction f do { if (b->presence) { for (int pos = 0; pos < ENTRIES_PER_BUCKET; pos++) { - if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos]) == ENTRY_VALID) { + if (isPositionFilled(b, pos) && validateElementIfNeeded(ht, b->entries[pos])) { void *emit = emit_ref ? &b->entries[pos] : b->entries[pos]; fn(privdata, emit); } @@ -2048,7 +2048,7 @@ int hashtableNext(hashtableIterator *iterator, void **elemptr) { /* No entry here. */ continue; } - if (!(iter->flags & HASHTABLE_ITER_SKIP_VALIDATION) && validateElementIfNeeded(iter->hashtable, b->entries[iter->pos_in_bucket]) != ENTRY_VALID) { + if (!(iter->flags & HASHTABLE_ITER_SKIP_VALIDATION) && !validateElementIfNeeded(iter->hashtable, b->entries[iter->pos_in_bucket])) { continue; } /* Return the entry at this position. */ diff --git a/src/hashtable.h b/src/hashtable.h index d0c6a9d81e..b4e8235cc3 100644 --- a/src/hashtable.h +++ b/src/hashtable.h @@ -31,6 +31,7 @@ #include #include #include +#include /* --- Opaque types --- */ @@ -62,8 +63,8 @@ typedef struct { /* Compare function, returns 0 if the keys are equal. Defaults to just * comparing the pointers for equality. */ int (*keyCompare)(const void *key1, const void *key2); - /* Check for entry access is valid or not. Invalid access will just treat the entry as not-exist. */ - hashtableEntryValidationState (*validateEntry)(hashtable *ht, void *entry); + /* Check for entry access should be masked or not. Masked access will just treat the entry as not-exist. */ + bool (*validateEntry)(hashtable *ht, void *entry); /* Callback to free an entry when it's overwritten or deleted. * Optional. */ void (*entryDestructor)(void *entry); diff --git a/src/server.c b/src/server.c index 8f94c1be70..97770ea8d7 100644 --- a/src/server.c +++ b/src/server.c @@ -677,7 +677,7 @@ size_t hashHashtableTypeMetadataSize(void) { return sizeof(void *); } -extern hashtableEntryValidationState hashHashtableTypeValidate(hashtable *ht, void *entry); +extern bool hashHashtableTypeValidate(hashtable *ht, void *entry); hashtableType hashHashtableType = { .hashFunction = dictSdsHash, diff --git a/src/t_hash.c b/src/t_hash.c index 2150b3d23c..0ac10c3de2 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -137,14 +137,14 @@ void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, long lo } } -hashtableEntryValidationState hashHashtableTypeValidate(hashtable *ht, void *entry) { +bool hashHashtableTypeValidate(hashtable *ht, void *entry) { UNUSED(ht); expirationPolicy policy = getExpirationPolicyWithFlags(0); - if (policy == POLICY_IGNORE_EXPIRE) return ENTRY_VALID; + if (policy == POLICY_IGNORE_EXPIRE) return true; - if (!entryIsExpired(entry)) return ENTRY_VALID; + if (!entryIsExpired(entry)) return true; - return ENTRY_INVALID; + return false; } /*----------------------------------------------------------------------------- From a3850cea8769cab02211d63404ef16c2f5c825b4 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 13 Jul 2025 11:36:07 +0300 Subject: [PATCH 092/119] remove unneeded enum Signed-off-by: Ran Shidlansik --- src/hashtable.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/hashtable.h b/src/hashtable.h index 05f109f445..3e8ec08ddd 100644 --- a/src/hashtable.h +++ b/src/hashtable.h @@ -43,11 +43,6 @@ typedef uint64_t hashtableIterator[5]; typedef uint64_t hashtablePosition[2]; typedef uint64_t hashtableIncrementalFindState[5]; -typedef enum { - ENTRY_VALID = 0, - ENTRY_INVALID -} hashtableEntryValidationState; - /* --- Non-opaque types --- */ /* The hashtableType is a set of callbacks for a hashtable. All callbacks are From 36dd81e8aa0967aaec3e6f013cf41db3853efb46 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 15 Jul 2025 08:24:31 +0300 Subject: [PATCH 093/119] address PR comments Signed-off-by: Ran Shidlansik --- src/entry.c | 150 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 87 insertions(+), 63 deletions(-) diff --git a/src/entry.c b/src/entry.c index dccfa61c61..50affa67cb 100644 --- a/src/entry.c +++ b/src/entry.c @@ -96,7 +96,7 @@ entry *entrySetValue(entry *e, sds value) { } /* Returns the address of the entry allocation. */ -void *entryAllocPtr(const entry *entry) { +void *entryGetAllocPtr(const entry *entry) { char *buf = sdsAllocPtr(entry); if (entryHasValuePtr(entry)) buf -= sizeof(sds); if (entryHasExpiry(entry)) buf -= sizeof(long long); @@ -114,12 +114,10 @@ bool entryHasEmbeddedValue(entry *entry) { long long entryGetExpiry(const entry *entry) { long long expiry = EXPIRY_NONE; if (entryHasExpiry(entry)) { - char *buf = sdsAllocPtr(entry); + char *buf = entryGetAllocPtr(entry); debugServerAssert((((uintptr_t)buf & 0x7) == 0)); /* Test that the allocation is indeed 8 bytes aligned * This is needed since we access the expiry as with pointer casting * which require the access to be 8 bytes aligned. */ - if (entryHasValuePtr(entry)) buf -= sizeof(sds); - buf -= sizeof(long long); expiry = *(long long *)buf; } return expiry; @@ -128,12 +126,10 @@ long long entryGetExpiry(const entry *entry) { /* Modify the expiration time of this entry and return a pointer to the (potentially new) entry. */ entry *entrySetExpiry(entry *e, long long expiry) { if (entryHasExpiry(e)) { - char *buf = sdsAllocPtr(e); + char *buf = entryGetAllocPtr(e); debugServerAssert((((uintptr_t)buf & 0x7) == 0)); /* Test that the allocation is indeed 8 bytes aligned * This is needed since we access the expiry as with pointer casting * which require the access to be 8 bytes aligned. */ - if (entryHasValuePtr(e)) buf -= sizeof(sds); - buf -= sizeof(expiry); *(long long *)buf = expiry; return e; } @@ -151,25 +147,30 @@ void entryFree(entry *entry) { if (entryHasValuePtr(entry)) { sdsfree(entryGetValue(entry)); } - zfree(entryAllocPtr(entry)); + zfree(entryGetAllocPtr(entry)); } -/* Takes ownership of value. does not take ownership of field */ -entry *entryCreate(const_sds field, sds value, long long expiry) { - sds embedded_field_sds; - size_t expiry_size = (expiry == EXPIRY_NONE) ? 0 : sizeof(long long); +static inline size_t entryReqSize(const_sds field, + sds value, + long long expiry, + bool *is_value_embedded, + int *field_sds_type, + size_t *field_size, + size_t *expiry_size, + size_t *embedded_value_size) { + size_t expiry_alloc_size = (expiry == EXPIRY_NONE) ? 0 : sizeof(long long); size_t field_len = sdslen(field); - int field_sds_type = sdsReqType(field_len); - if (field_sds_type == SDS_TYPE_5 && (expiry_size > 0)) { - field_sds_type = SDS_TYPE_8; + int embedded_field_sds_type = sdsReqType(field_len); + if (embedded_field_sds_type == SDS_TYPE_5 && (expiry_alloc_size > 0)) { + embedded_field_sds_type = SDS_TYPE_8; } - size_t field_size = sdsReqSize(field_len, field_sds_type); + size_t field_alloc_size = sdsReqSize(field_len, embedded_field_sds_type); size_t value_len = value ? sdslen(value) : 0; - size_t embedded_value_size = value ? sdsReqSize(value_len, SDS_TYPE_8) : 0; - size_t alloc_size = field_size + expiry_size; + size_t embedded_value_alloc_size = value ? sdsReqSize(value_len, SDS_TYPE_8) : 0; + size_t alloc_size = field_alloc_size + expiry_alloc_size; bool embed_value = false; if (value) { - if (alloc_size + embedded_value_size <= EMBED_VALUE_MAX_ALLOC_SIZE) { + if (alloc_size + embedded_value_alloc_size <= EMBED_VALUE_MAX_ALLOC_SIZE) { /* Embed field and value. Value is fixed to SDS_TYPE_8. Unused * allocation space is recorded in the embedded value's SDS header. * @@ -179,7 +180,7 @@ entry *entryCreate(const_sds field, sds value, long long expiry) { * +------+--------------+---------------+ */ embed_value = true; - alloc_size += embedded_value_size; + alloc_size += embedded_value_alloc_size; } else { /* Embed field, but not value. Field must be >= SDS_TYPE_8 to encode to * indicate this type of entry. @@ -191,21 +192,37 @@ entry *entryCreate(const_sds field, sds value, long long expiry) { */ embed_value = false; alloc_size += sizeof(sds); - if (field_sds_type == SDS_TYPE_5) { - field_sds_type = SDS_TYPE_8; - alloc_size -= field_size; - field_size = sdsReqSize(field_len, field_sds_type); - alloc_size += field_size; + if (embedded_field_sds_type == SDS_TYPE_5) { + embedded_field_sds_type = SDS_TYPE_8; + alloc_size -= field_alloc_size; + field_alloc_size = sdsReqSize(field_len, embedded_field_sds_type); + alloc_size += field_alloc_size; } } } - /* allocate the buffer */ + if (expiry_size) *expiry_size = expiry_alloc_size; + if (field_sds_type) *field_sds_type = embedded_field_sds_type; + if (field_size) *field_size = field_alloc_size; + if (embedded_value_size) *embedded_value_size = embedded_value_alloc_size; + if (is_value_embedded) *is_value_embedded = embed_value; + + return alloc_size; +} + +/* Takes ownership of value. does not take ownership of field */ +entry *entryCreate(const_sds field, sds value, long long expiry) { + bool embed_value = false; + int embedded_field_sds_type; + size_t expiry_size, embedded_value_sds_size, embedded_field_sds_size; + size_t alloc_size = entryReqSize(field, value, expiry, &embed_value, &embedded_field_sds_type, &embedded_field_sds_size, &expiry_size, &embedded_value_sds_size); size_t buf_size; + + /* allocate the buffer */ char *buf = zmalloc_usable(alloc_size, &buf_size); /* Set The expiry if exists */ if (expiry_size) { - memcpy(buf, &expiry, expiry_size); + *(long long *)buf = expiry; buf += expiry_size; buf_size -= expiry_size; } @@ -215,60 +232,64 @@ entry *entryCreate(const_sds field, sds value, long long expiry) { buf += sizeof(sds); buf_size -= sizeof(sds); } else { - sdswrite(buf + field_size, buf_size - field_size, SDS_TYPE_8, value, value_len); + sdswrite(buf + embedded_field_sds_size, buf_size - embedded_field_sds_size, SDS_TYPE_8, value, sdslen(value)); sdsfree(value); - buf_size -= embedded_value_size; + buf_size -= embedded_value_sds_size; } } /* Set the field data */ - embedded_field_sds = sdswrite(buf, field_size, field_sds_type, field, field_len); + entry *new_entry = sdswrite(buf, embedded_field_sds_size, embedded_field_sds_type, field, sdslen(field)); /* Field sds aux bits are zero, which we use for this entry encoding. */ - sdsSetAuxBit(embedded_field_sds, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, embed_value ? 0 : 1); - sdsSetAuxBit(embedded_field_sds, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY, expiry_size > 0 ? 1 : 0); - return (void *)embedded_field_sds; + sdsSetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR, embed_value ? 0 : 1); + sdsSetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY, expiry_size > 0 ? 1 : 0); + + /* Check that the new entry was built correctly */ + debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1)); + debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0)); + return new_entry; } /* Modify the entry's value and/or expiration time. * In case the provided value is NULL, will use the existing value. */ entry *entryUpdate(entry *e, sds value, long long expiry) { sds field = (sds)e; + entry *new_entry = NULL; bool update_value = value ? true : false; - long long expiration_time = entryGetExpiry(e); - bool update_expiry = (expiry != expiration_time) ? true : false; + long long curr_expiration_time = entryGetExpiry(e); + bool update_expiry = (expiry != curr_expiration_time) ? true : false; + /* Just a sanity check. If nothing changes, lets just return */ if (!update_value && !update_expiry) return e; - expiration_time = expiry; - value = update_value ? value : entryGetValue(e); - size_t expiry_size = (expiration_time != EXPIRY_NONE) ? sizeof(expiration_time) : 0; - int field_sds_type = sdsReqType(sdslen(field)); - if (field_sds_type == SDS_TYPE_5 && (expiry_size > 0)) { - field_sds_type = SDS_TYPE_8; - } - size_t field_size = sdsHdrSize(field_sds_type) + sdsalloc(field) + 1; - size_t value_len = value ? sdslen(value) : 0; - size_t embedded_value_size = value ? sdsReqSize(value_len, SDS_TYPE_8) : 0; - size_t required_embedded_size = field_size + embedded_value_size + expiry_size; + if (!value) value = entryGetValue(e); + bool embed_value = false; + int embedded_field_sds_type; + size_t expiry_size, embedded_value_size, embedded_field_size; + size_t required_embedded_size = entryReqSize(field, value, expiry, &embed_value, &embedded_field_sds_type, &embedded_field_size, &expiry_size, &embedded_value_size); size_t current_embedded_allocation_size = entryHasValuePtr(e) ? 0 : entryMemUsage(e); + + bool expiry_add_remove = update_expiry && (curr_expiration_time == EXPIRY_NONE || expiry == EXPIRY_NONE); // In case we are toggling expiration + bool value_change_encoding = update_value && (embed_value != !entryHasValuePtr(e)); // In case we change the way value is embedded or not + + /* // We will create a new entry in the following cases: * 1. In the case were we add or remove expiration. - * 2. in the case were we are NOT migrating from an embedded entry to an embedded entry with ~the same size. */ - bool create_new_entry = (update_expiry && (entryGetExpiry(e) == EXPIRY_NONE || expiration_time == EXPIRY_NONE)) || - !(update_value && !entryHasValuePtr(e) && - required_embedded_size <= EMBED_VALUE_MAX_ALLOC_SIZE && - required_embedded_size <= current_embedded_allocation_size && - required_embedded_size >= current_embedded_allocation_size * 3 / 4); + * 2. We change the way value is encoded + * 3. in the case were we are NOT migrating from an embedded entry to an embedded entry with ~the same size. */ + bool create_new_entry = (expiry_add_remove) || (value_change_encoding) || + (update_value && !entryHasValuePtr(e) && + !(required_embedded_size <= EMBED_VALUE_MAX_ALLOC_SIZE && + required_embedded_size <= current_embedded_allocation_size && + required_embedded_size >= current_embedded_allocation_size * 3 / 4)); if (!create_new_entry) { /* In this case we are sure we do not have to allocate new entry, so expiry must already be set. */ if (update_expiry) { serverAssert(entryHasExpiry(e)); - char *buf = sdsAllocPtr(e); - if (entryHasValuePtr(e)) buf -= sizeof(sds); - buf -= sizeof(expiry); - memcpy(buf, &expiry, sizeof(expiry)); + char *buf = entryGetAllocPtr(e); + *(long long *)buf = expiry; } /* In this case we are sure we do not have to allocate new entry, so value must already be set or we have enough room to embed it. */ if (update_value) { @@ -288,7 +309,7 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { sdsfree(value); } } - return e; + new_entry = e; } else { if (!update_value) { @@ -303,11 +324,14 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { *value_ref = NULL; } } + new_entry = entryCreate(entryGetField(e), value, expiry); + debugServerAssert(new_entry != e); + entryFree(e); } - - entry *new_entry = entryCreate(entryGetField(e), value, expiration_time); - debugServerAssert(new_entry != e); - entryFree(e); + /* Check that the new entry was built correctly */ + debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR) == (embed_value ? 0 : 1)); + debugServerAssert(sdsGetAuxBit(new_entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY) == (expiry_size > 0 ? 1 : 0)); + serverAssert(new_entry); return new_entry; } @@ -319,7 +343,7 @@ size_t entryMemUsage(entry *entry) { if (entryHasValuePtr(entry)) { /* In case the value is not embedded we might not be able to sum all the allocation sizes since the field * header could be too small for holding the real allocation size. */ - mem += zmalloc_usable_size(entryAllocPtr(entry)); + mem += zmalloc_usable_size(entryGetAllocPtr(entry)); } else { mem += sdsReqSize(sdslen(entry), sdsType(entry)); if (entryHasExpiry(entry)) mem += sizeof(long long); @@ -341,7 +365,7 @@ entry *entryDefrag(entry *entry, void *(*defragfn)(void *), sds (*sdsdefragfn)(s sds new_value = sdsdefragfn(*value_ref); if (new_value) *value_ref = new_value; } - char *allocation = entryAllocPtr(entry); + char *allocation = entryGetAllocPtr(entry); char *new_allocation = defragfn(allocation); if (new_allocation != NULL) { /* Return the same offset into the new allocation as the entry's offset From d1c5468aa1fd3ae63bae0bd85022a7655ace50b0 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 15 Jul 2025 08:33:42 +0300 Subject: [PATCH 094/119] fix bugs in volatile_set implementation Signed-off-by: Ran Shidlansik --- src/volatile_set.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/volatile_set.c b/src/volatile_set.c index 718cfecddf..97cbbbab87 100644 --- a/src/volatile_set.c +++ b/src/volatile_set.c @@ -68,8 +68,8 @@ void volatileSetStart(volatile_set *set, volatileSetIterator *it) { int volatileSetNext(volatileSetIterator *it, void **entryptr) { if (raxNext(&it->bucket)) { - assert(it->bucket.key_len != EXPIRY_HASH_SIZE); - memcpy(it->bucket.key + 8, entryptr, sizeof(*entryptr)); + assert(it->bucket.key_len == EXPIRY_HASH_SIZE); + memcpy(entryptr, it->bucket.key + sizeof(long long), sizeof(*entryptr)); return 1; } return 0; From d8c4054de79bb593f2920b6350266c8fe9384f08 Mon Sep 17 00:00:00 2001 From: Stav Ben-Tov <90314138+stav-bentov@users.noreply.github.com> Date: Tue, 15 Jul 2025 09:22:02 +0300 Subject: [PATCH 095/119] Reduce duplication in hashexpire.tcl (#7) Co-authored-by: Stav Bentov Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 466 ++++++++++++++++---------------------- 1 file changed, 190 insertions(+), 276 deletions(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index 8bffeeef38..be329e7624 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -8,11 +8,83 @@ proc info_field {info field} { return [s field_name] } -proc assert_keyevent_pattern {rd event_type key} { - set event [$rd read] - assert_match "pmessage __keyevent@* __keyevent@*:$event_type $key" $event +proc get_short_expire_value {command} { + expr { + ($command eq "HEXPIRE" || $command eq "EX") ? 1 : + ($command eq "HPEXPIRE" || $command eq "PX") ? 10 : + ($command eq "HEXPIREAT" || $command eq "EXAT") ? [clock seconds] + 1 : + [clock milliseconds] + 10 + } +} + +proc get_long_expire_value {command} { + expr { + ($command eq "HEXPIRE" || $command eq "EX") ? 60000000 : + ($command eq "HPEXPIRE" || $command eq "PX") ? 60000000 : + ($command eq "HEXPIREAT" || $command eq "EXAT") ? [clock seconds] + 60000000 : + [clock milliseconds] + 60000000 + } +} + +proc get_longer_then_long_expire_value {command} { + expr { + ($command eq "HEXPIRE" || $command eq "EX") ? 1200000000 : + ($command eq "HPEXPIRE" || $command eq "PX") ? 1200000000 : + ($command eq "HEXPIREAT" || $command eq "EXAT") ? [clock seconds] + 1200000000 : + [clock milliseconds] + 1200000000 + } +} + +proc get_past_zero_expire_value {command} { + expr { + ($command eq "HEXPIRE" || $command eq "EX") ? 0 : + ($command eq "HPEXPIRE" || $command eq "PX") ? 0 : + ($command eq "HEXPIREAT" || $command eq "EXAT") ? [clock seconds] - 200000 : + [clock milliseconds] - 200000 + } +} + +proc get_check_ttl_command {command} { + if {$command eq "EX"} { + return "HTTL" + } elseif {$command eq "PX"} { + return "HPTTL" + } elseif {$command eq "EXAT"} { + return "HEXPIRETIME" + } else { + return "HPEXPIRETIME" + } } +proc assert_keyevent_patterns {rd key args} { + foreach event_type $args { + set event [$rd read] + assert_match "pmessage __keyevent@* __keyevent@*:$event_type $key" $event + } +} + +proc setup_replication_test {primary replica primary_host primary_port} { + $primary FLUSHALL + $replica replicaof $primary_host $primary_port + wait_for_condition 50 100 { + [lindex [$replica role] 0] eq {slave} && + [string match {*master_link_status:up*} [$replica info replication]] + } else { + fail "Can't turn the instance into a replica" + } + set primary_initial_expired [info_field [$primary info stats] expired_subkeys] + set replica_initial_expired [info_field [$replica info stats] expired_subkeys] + return [list $primary_initial_expired $replica_initial_expired] +} + +proc setup_single_keyspace_notification {r} { + $r config set notify-keyspace-events KEA + set rd [valkey_deferring_client] + assert_equal {1} [psubscribe $rd __keyevent@*] + return $rd +} + + start_server {tags {"hashexpire"}} { ####### Valid scenarios tests ####### foreach command {EX PX EXAT PXAT} { @@ -21,20 +93,8 @@ start_server {tags {"hashexpire"}} { r DEBUG SET-ACTIVE-EXPIRE no r HSET myhash f1 v1 - # Configuration dictionary mapping expiry commands to their test parameters: - # - time: expiry value (seconds/milliseconds or absolute timestamp) - # - wait: milliseconds to wait before checking expiration - # - cmd: command to verify the TTL/expiry time - set config [dict create \ - EX [list time 1 wait 1100 cmd HTTL] \ - PX [list time 100 wait 150 cmd HPTTL] \ - EXAT [list time [expr {[clock seconds] + 1}] wait 1100 cmd HEXPIRETIME] \ - PXAT [list time [expr {[clock milliseconds] + 100}] wait 150 cmd HPEXPIRETIME] \ - ] - set params [dict get $config $command] - set expire_time [dict get $params time] - set wait_time [dict get $params wait] - set ttl_cmd [dict get $params cmd] + set ttl_cmd [get_check_ttl_command $command] + set expire_time [get_short_expire_value $command] # Verify HGETEX command assert_equal "v1" [r HGETEX myhash $command $expire_time FIELDS 1 f1] @@ -46,7 +106,7 @@ start_server {tags {"hashexpire"}} { } else { assert_morethan $expire_result 0 } - after $wait_time + after 1100 assert_equal "" [r HGET myhash f1] # Re-enable active expiry r DEBUG SET-ACTIVE-EXPIRE yes @@ -56,17 +116,8 @@ start_server {tags {"hashexpire"}} { r FLUSHALL r HSET myhash f1 v1 f3 v3 - set config [dict create \ - EX [list time 2000000] \ - PX [list time 2000000] \ - EXAT [list time [expr {[clock seconds] + 2000000}]] \ - PXAT [list time [expr {[clock milliseconds] + 20000000}]] \ - ] - set params [dict get $config $command] - set expire_time [dict get $params time] - # HGETEX on exist/non-exist fields - assert_equal "v1 {} v3" [r HGETEX myhash $command $expire_time FIELDS 3 f1 f2 f3] + assert_equal "v1 {} v3" [r HGETEX myhash $command [get_long_expire_value $command] FIELDS 3 f1 f2 f3] # Verification checks (f2 should not be created) assert_equal "" [r HGET myhash f2] @@ -80,29 +131,21 @@ start_server {tags {"hashexpire"}} { r DEBUG SET-ACTIVE-EXPIRE no r HSET myhash f1 v1 f2 v2 - set config [dict create \ - EX [list time 1 wait 1100 check_cmd HTTL] \ - PX [list time 100 wait 150 check_cmd HPTTL] \ - EXAT [list time [expr {[clock seconds] + 1}] wait 1100 check_cmd HEXPIRETIME] \ - PXAT [list time [expr {[clock milliseconds] + 100}] wait 150 check_cmd HPEXPIRETIME] \ - ] - set params [dict get $config $command] - set expire_time [dict get $params time] - set wait_time [dict get $params wait] - set check_cmd [dict get $params check_cmd] + set ttl_cmd [get_check_ttl_command $command] + set expire_time [get_short_expire_value $command] assert_equal "v1 v2" [r HGETEX myhash $command $expire_time FIELDS 2 f1 f2] # Verify expiration if {[regexp "AT$" $command]} { - assert_equal $expire_time [r $check_cmd myhash FIELDS 1 f1] - assert_equal $expire_time [r $check_cmd myhash FIELDS 1 f2] + assert_equal $expire_time [r $ttl_cmd myhash FIELDS 1 f1] + assert_equal $expire_time [r $ttl_cmd myhash FIELDS 1 f2] } else { - assert_morethan [r $check_cmd myhash FIELDS 1 f1] 0 - assert_morethan [r $check_cmd myhash FIELDS 1 f2] 0 + assert_morethan [r $ttl_cmd myhash FIELDS 1 f1] 0 + assert_morethan [r $ttl_cmd myhash FIELDS 1 f2] 0 } - after $wait_time + after 1100 assert_equal "" [r HGET myhash f1] assert_equal "" [r HGET myhash f2] # Re-enable active expiry @@ -114,20 +157,12 @@ start_server {tags {"hashexpire"}} { r HSET myhash f1 v1 r HSETEX myhash EX 10000 FIELDS 1 f2 v2 - set config [dict create \ - EX [list time 1 cmd HTTL check_cmd HTTL] \ - PX [list time 100 cmd HPTTL check_cmd HPTTL] \ - EXAT [list time [expr {[clock seconds] + 1}] cmd HTTL check_cmd HEXPIRETIME] \ - PXAT [list time [expr {[clock milliseconds] + 100}] cmd HPTTL check_cmd HPEXPIRETIME] \ - ] - set params [dict get $config $command] - set expire_time [dict get $params time] - set ttl_cmd [dict get $params cmd] - set check_cmd [dict get $params check_cmd] + set ttl_cmd [get_check_ttl_command $command] + set expire_time [get_short_expire_value $command] assert_equal "v1" [r HGETEX myhash $command $expire_time FIELDS 1 f1] if {[regexp "AT$" $command]} { - assert_equal $expire_time [r $check_cmd myhash FIELDS 1 f1] + assert_equal $expire_time [r $ttl_cmd myhash FIELDS 1 f1] } else { assert_morethan [r $ttl_cmd myhash FIELDS 1 f1] 0 } @@ -140,49 +175,38 @@ start_server {tags {"hashexpire"}} { test "HGETEX $command on non-exist field" { r FLUSHALL - r HSET myhash f1 v1 - - set config [dict create \ - EX [list time 1] \ - PX [list time 100] \ - EXAT [list time [expr {[clock seconds] + 1}]] \ - PXAT [list time [expr {[clock milliseconds] + 100}]] \ - ] - set params [dict get $config $command] - set expire_time [dict get $params time] - - assert_equal {{}} [r HGETEX myhash $command $expire_time FIELDS 1 f2] + r HSET myhash f1 v1 + assert_equal {{}} [r HGETEX myhash $command [get_short_expire_value $command] FIELDS 1 f2] } test "HGETEX $command on non-exist key" { r FLUSHALL - - set config [dict create \ - EX [list time 100000] \ - PX [list time 10000000] \ - EXAT [list time [expr {[clock seconds] + 10000}]] \ - PXAT [list time [expr {[clock milliseconds] + 100000}]] \ - ] - set params [dict get $config $command] - set expire_time [dict get $params time] - - assert_equal "" [r HGETEX myhash $command $expire_time FIELDS 1 f2] + assert_equal "" [r HGETEX myhash $command [get_long_expire_value $command] FIELDS 1 f2] } test "HGETEX $command with duplicate field names" { r FLUSHALL r HSET myhash f1 v1 - - set config [dict create \ - EX [list time 10000] \ - PX [list time 10000] \ - EXAT [list time [expr {[clock seconds] + 10000}]] \ - PXAT [list time [expr {[clock milliseconds] + 100000}]] \ - ] - set params [dict get $config $command] - set expire_time [dict get $params time] - - assert_equal "v1 v1" [r HGETEX myhash $command $expire_time FIELDS 2 f1 f1] + assert_equal "v1 v1" [r HGETEX myhash $command [get_long_expire_value $command] FIELDS 2 f1 f1] + } + + + test "HGETEX $command overwrites existing field TTL with bigger value" { + r FLUSHALL + r HSETEX myhash $command [get_long_expire_value $command] FIELDS 1 f1 v1 + set old_ttl [r HTTL myhash FIELDS 1 f1] + r HGETEX myhash $command [get_longer_then_long_expire_value $command] FIELDS 1 f1 + set new_ttl [r HTTL myhash FIELDS 1 f1] + assert {$new_ttl > $old_ttl} + } + + test "HGETEX $command overwrites existing field TTL with smaller value" { + r FLUSHALL + r HSETEX myhash $command [get_long_expire_value $command] FIELDS 1 f1 v1 + set old_ttl [r HTTL myhash FIELDS 1 f1] + r HGETEX myhash $command [get_short_expire_value $command] FIELDS 1 f1 + set new_ttl [r HTTL myhash FIELDS 1 f1] + assert {$new_ttl <= $old_ttl} } } @@ -200,59 +224,12 @@ start_server {tags {"hashexpire"}} { test "HGETEX $command with past expiry" { r FLUSHALL r HSET myhash f1 v1 - if {$command eq "EXAT"} { - set expire_time [expr {[clock seconds] - 100000}] - } else { - set expire_time [expr {[clock milliseconds] - 100000}] - } - assert_equal "v1" [r HGETEX myhash $command $expire_time FIELDS 1 f1] + assert_equal "v1" [r HGETEX myhash $command [get_past_zero_expire_value $command] FIELDS 1 f1] assert_equal "" [r HGET myhash f1] assert_equal -2 [r HTTL myhash FIELDS 1 f1] } } - foreach command {EX PX EXAT PXAT} { - test "HGETEX $command overwrites existing field TTL with bigger value" { - r FLUSHALL - set config [dict create \ - EX [list setup_cmd EX setup_val 100000 bigger_val 200000] \ - PX [list setup_cmd PX setup_val 100000000 bigger_val 200000000] \ - EXAT [list setup_cmd EX setup_val 100000 bigger_val [expr {[clock seconds] + 200000}]] \ - PXAT [list setup_cmd PX setup_val 100000000 bigger_val [expr {[clock milliseconds] + 200000000}]] \ - ] - set params [dict get $config $command] - set setup_cmd [dict get $params setup_cmd] - set setup_val [dict get $params setup_val] - set bigger_val [dict get $params bigger_val] - - r HSETEX myhash $setup_cmd $setup_val FIELDS 1 f1 v1 - set old_ttl [r HTTL myhash FIELDS 1 f1] - r HGETEX myhash $command $bigger_val FIELDS 1 f1 - set new_ttl [r HTTL myhash FIELDS 1 f1] - assert {$new_ttl > $old_ttl} - } - - test "HGETEX $command overwrites existing field TTL with smaller value" { - r FLUSHALL - set config [dict create \ - EX [list setup_cmd EX setup_val 100000 smaller_val 50000] \ - PX [list setup_cmd PX setup_val 100000000 smaller_val 50000000] \ - EXAT [list setup_cmd EX setup_val 100000 smaller_val [expr {[clock seconds] + 50000}]] \ - PXAT [list setup_cmd PX setup_val 100000000 smaller_val [expr {[clock milliseconds] + 50000000}]] \ - ] - set params [dict get $config $command] - set setup_cmd [dict get $params setup_cmd] - set setup_val [dict get $params setup_val] - set smaller_val [dict get $params smaller_val] - - r HSETEX myhash $setup_cmd $setup_val FIELDS 1 f1 v1 - set old_ttl [r HTTL myhash FIELDS 1 f1] - r HGETEX myhash $command $smaller_val FIELDS 1 f1 - set new_ttl [r HTTL myhash FIELDS 1 f1] - assert {$new_ttl <= $old_ttl} - } - } - test {HGETEX - verify no change when field does not exist} { r FLUSHALL r HSET myhash f1 v1 @@ -276,12 +253,7 @@ start_server {tags {"hashexpire"}} { } # Common error scenarios for all commands - foreach {cmd ttl_val} [list \ - EX 60 \ - PX 60 \ - EXAT [expr {[clock seconds] + 100}] \ - PXAT [expr {[clock milliseconds] + 100}] \ - ] { + foreach cmd {EX PX EXAT PXAT} { test "HGETEX $cmd- missing TTL value" { r FLUSHALL r HSET myhash f1 v1 @@ -306,47 +278,46 @@ start_server {tags {"hashexpire"}} { test "HGETEX $cmd- missing FIELDS keyword" { r FLUSHALL r HSET myhash f1 v1 - catch {r HGETEX myhash $cmd $ttl_val 1 f1} e + catch {r HGETEX myhash $cmd [get_short_expire_value $cmd] 1 f1} e set e } {ERR *} test "HGETEX $cmd- wrong numfields count (too few fields)" { r FLUSHALL r HSET myhash f1 v1 f2 v2 - catch {r HGETEX myhash $cmd $ttl_val FIELDS 2 f1} e + catch {r HGETEX myhash $cmd [get_short_expire_value $cmd] FIELDS 2 f1} e set e } {ERR *} test "HGETEX $cmd- wrong numfields count (too many fields)" { r FLUSHALL r HSET myhash f1 v1 - catch {r HGETEX myhash $cmd $ttl_val FIELDS 1 f1 f2} e + catch {r HGETEX myhash $cmd [get_short_expire_value $cmd] FIELDS 1 f1 f2} e set e } {ERR *} test "HGETEX $cmd- key is wrong type (string instead of hash)" { r FLUSHALL r SET mystring "v1" - catch {r HGETEX mystring $cmd $ttl_val FIELDS 1 f1} e + catch {r HGETEX mystring $cmd [get_short_expire_value $cmd] FIELDS 1 f1} e set e } {WRONGTYPE Operation against a key holding the wrong kind of value} test "HGETEX $cmd with FIELDS 0" { r FLUSHALL - catch {r HGETEX myhash $cmd $ttl_val FIELDS 0} e + catch {r HGETEX myhash $cmd [get_short_expire_value $cmd] FIELDS 0} e set e } {ERR *} test "HGETEX $cmd with negative numfields" { r FLUSHALL - catch {r HGETEX myhash $cmd $ttl_val FIELDS -10} e + catch {r HGETEX myhash $cmd [get_short_expire_value $cmd] FIELDS -10} e set e } {ERR *} test "HGETEX $cmd with missing key" { r FLUSHALL - set expire [expr {[clock seconds] + 100}] - catch {r HGETEX $cmd $expire FIELDS 1 f1} e + catch {r HGETEX $cmd [get_short_expire_value $cmd] FIELDS 1 f1} e set e } {ERR *} } @@ -366,25 +337,15 @@ start_server {tags {"hashexpire"}} { r config set notify-keyspace-events KEA foreach command {EX PX EXAT PXAT} { - set config [dict create \ - EX [list time 6000000] \ - PX [list time 6000000] \ - EXAT [list time [expr {[clock seconds] + 6000000}]] \ - PXAT [list time [expr {[clock milliseconds] + 6000000}]] \ - ] - set params [dict get $config $command] - set expire_time [dict get $params time] - test "HGETEX $command generates hexpire keyspace notification" { r FLUSHALL r HSET myhash f1 v1 - set rd [valkey_deferring_client] - assert_equal {1} [psubscribe $rd __keyevent@*] + set rd [setup_single_keyspace_notification r] - r HGETEX myhash $command $expire_time FIELDS 1 f1 + r HGETEX myhash $command [get_long_expire_value $command] FIELDS 1 f1 - assert_keyevent_pattern $rd hexpire myhash + assert_keyevent_patterns $rd myhash hexpire $rd close } @@ -392,15 +353,14 @@ start_server {tags {"hashexpire"}} { r FLUSHALL r HSET myhash f1 v1 f2 v2 f3 v3 - set rd [valkey_deferring_client] - assert_equal {1} [psubscribe $rd __keyevent@*] + set rd [setup_single_keyspace_notification r] - r HGETEX myhash $command $expire_time FIELDS 3 f1 f2 f3 + r HGETEX myhash $command [get_long_expire_value $command] FIELDS 3 f1 f2 f3 - assert_keyevent_pattern $rd hexpire myhash + assert_keyevent_patterns $rd myhash hexpire # Verify no notification (getting hset and not hexpire) r HSET dummy dummy dummy - assert_keyevent_pattern $rd hset dummy + assert_keyevent_patterns $rd dummy hset $rd close } @@ -408,15 +368,14 @@ start_server {tags {"hashexpire"}} { r FLUSHALL r HSET myhash f1 v1 - set rd [valkey_deferring_client] - assert_equal {1} [psubscribe $rd __keyevent@*] + set rd [setup_single_keyspace_notification r] # This HGETEX targets a non-existent field, so no notification about hexpire should be emitted - r HGETEX myhash $command $expire_time FIELDS 1 f2 + r HGETEX myhash $command [get_long_expire_value $command] FIELDS 1 f2 # Verify no notification (getting hset and not hexpire) r HSET dummy dummy dummy - assert_keyevent_pattern $rd hset dummy + assert_keyevent_patterns $rd dummy hset $rd close } @@ -427,24 +386,15 @@ start_server {tags {"hashexpire"}} { r HSET myhash f1 v1 r HEXPIRE myhash 60 FIELDS 1 f1 - set rd [valkey_deferring_client] - assert_equal {1} [psubscribe $rd __keyevent@*] + set rd [setup_single_keyspace_notification r] r HGETEX myhash PERSIST FIELDS 1 f1 - assert_keyevent_pattern $rd hpersist myhash + assert_keyevent_patterns $rd myhash hpersist $rd close } foreach command {EX PX EXAT PXAT} { - set config [dict create \ - EX [list time 0] \ - PX [list time 0] \ - EXAT [list time [expr {[clock seconds] - 2000}]] \ - PXAT [list time [expr {[clock milliseconds] - 2000}]] \ - ] - set params [dict get $config $command] - set expire_time [dict get $params time] test "HGETEX $command 0/past time works correctly with 1 field" { r FLUSHALL @@ -454,15 +404,13 @@ start_server {tags {"hashexpire"}} { assert_equal 1 [r HLEN myhash] assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] - set rd [valkey_deferring_client] - assert_equal {1} [psubscribe $rd __keyevent@*] + set rd [setup_single_keyspace_notification r] # Set field to expire immediately - r HGETEX myhash $command $expire_time FIELDS 1 f1 + r HGETEX myhash $command [get_past_zero_expire_value $command] FIELDS 1 f1 # Verify field and keys are deleted - assert_keyevent_pattern $rd hexpired myhash - assert_keyevent_pattern $rd del myhash + assert_keyevent_patterns $rd myhash hexpired del assert_equal -2 [r HTTL myhash FIELDS 1 f1] assert_equal 0 [r HLEN myhash] assert_equal 0 [r EXISTS myhash] @@ -479,15 +427,13 @@ start_server {tags {"hashexpire"}} { assert_equal 1 [r HLEN myhash] assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] - set rd [valkey_deferring_client] - assert_equal {1} [psubscribe $rd __keyevent@*] + set rd [setup_single_keyspace_notification r] # Set field to expire immediately - r HGETEX myhash $command $expire_time FIELDS 1 f1 + r HGETEX myhash $command [get_past_zero_expire_value $command] FIELDS 1 f1 # Verify field and keys are deleted - assert_keyevent_pattern $rd hexpired myhash - assert_keyevent_pattern $rd del myhash + assert_keyevent_patterns $rd myhash hexpired del assert_equal -2 [r HTTL myhash FIELDS 1 f1] assert_equal 0 [r HLEN myhash] assert_equal 0 [r EXISTS myhash] @@ -504,14 +450,13 @@ start_server {tags {"hashexpire"}} { assert_equal 2 [r HLEN myhash] assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] - set rd [valkey_deferring_client] - assert_equal {1} [psubscribe $rd __keyevent@*] + set rd [setup_single_keyspace_notification r] # Set field to expire immediately - r HGETEX myhash $command $expire_time FIELDS 1 f2 + r HGETEX myhash $command [get_past_zero_expire_value $command] FIELDS 1 f2 # Verify field and keys are deleted - assert_keyevent_pattern $rd hexpired myhash + assert_keyevent_patterns $rd myhash hexpired assert_equal -2 [r HTTL myhash FIELDS 1 f2] assert_equal 1 [r HLEN myhash] assert_equal 1 [r EXISTS myhash] @@ -529,14 +474,13 @@ start_server {tags {"hashexpire"}} { assert_equal 4 [r HLEN myhash] assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] - set rd [valkey_deferring_client] - assert_equal {1} [psubscribe $rd __keyevent@*] + set rd [setup_single_keyspace_notification r] # Set field to expire immediately - r HGETEX myhash $command $expire_time FIELDS 1 f1 + r HGETEX myhash $command [get_past_zero_expire_value $command] FIELDS 1 f1 # Verify field and keys are deleted - assert_keyevent_pattern $rd hexpired myhash + assert_keyevent_patterns $rd myhash hexpired assert_equal -2 [r HTTL myhash FIELDS 1 f1] assert_equal 3 [r HLEN myhash] assert_equal 1 [r EXISTS myhash] @@ -1912,21 +1856,17 @@ start_server {tags {"hashexpire external:skip"}} { set replica_1_port [srv 0 port] test {Replication Primary -> R1} { - $primary FLUSHALL - ####### Replication setup ####### - $replica_1 replicaof $primary_host $primary_port - wait_for_condition 50 100 { - [lindex [$replica_1 role] 0] eq {slave} && - [string match {*master_link_status:up*} [$replica_1 info replication]] - } else { - fail "Can't turn the instance into a replica" - } + lassign [setup_replication_test $primary $replica_1 $primary_host $primary_port] primary_initial_expired replica_1_initial_expired # Initialize deferred clients and subscribe to keyspace notifications + foreach instance [list $primary $replica_1] { + $instance config set notify-keyspace-events KEA + } set rd_primary [valkey_deferring_client -1] set rd_replica_1 [valkey_deferring_client $replica_1_host $replica_1_port] - assert_equal {1} [psubscribe $rd_primary __keyevent@*] - assert_equal {1} [psubscribe $rd_replica_1 __keyevent@*] + foreach rd [list $rd_primary $rd_replica_1] { + assert_equal {1} [psubscribe $rd __keyevent@*] + } # Setup hash, set expire and set expire 0 @@ -1949,12 +1889,11 @@ start_server {tags {"hashexpire external:skip"}} { # Verify keyspace notification foreach rd [list $rd_primary $rd_replica_1] { - assert_keyevent_pattern $rd hset myhash - assert_keyevent_pattern $rd hexpire myhash + assert_keyevent_patterns $rd myhash hset hexpire } # primary gets hexpired and replica gets hdel - assert_keyevent_pattern $rd_primary hexpired myhash - assert_keyevent_pattern $rd_replica_1 hdel myhash + assert_keyevent_patterns $rd_primary myhash hexpired + assert_keyevent_patterns $rd_replica_1 myhash hdel $rd_primary close $rd_replica_1 close @@ -2016,12 +1955,11 @@ start_server {tags {"hashexpire external:skip"}} { # primary gets hexpired and replicas get hdel foreach rd [list $rd_primary $rd_replica_1 $rd_replica_2] { - assert_keyevent_pattern $rd hset myhash - assert_keyevent_pattern $rd hexpire myhash + assert_keyevent_patterns $rd myhash hset hexpire } - assert_keyevent_pattern $rd_primary hexpired myhash - assert_keyevent_pattern $rd_replica_1 hdel myhash - assert_keyevent_pattern $rd_replica_2 hdel myhash + assert_keyevent_patterns $rd_primary myhash hexpired + assert_keyevent_patterns $rd_replica_1 myhash hdel + assert_keyevent_patterns $rd_replica_2 myhash hdel $rd_primary close $rd_replica_1 close @@ -2112,9 +2050,7 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal "{} {} v3" [$replica_1 HGETEX myhash FIELDS 3 f1 f2 f3] assert_equal "v1 v2 v3" [$primary HGETEX myhash FIELDS 3 f1 f2 f3] ;# No change for primary - assert_keyevent_pattern $rd_replica hexpire myhash - assert_keyevent_pattern $rd_replica hexpire myhash - assert_keyevent_pattern $rd_replica hexpired myhash + assert_keyevent_patterns $rd_replica myhash hexpire hexpire hexpired $rd_replica close # Re-enable active expiry @@ -2124,9 +2060,13 @@ start_server {tags {"hashexpire external:skip"}} { test {Promotion to primary} { - $primary FLUSHALL - $primary DEBUG SET-ACTIVE-EXPIRE no - $replica_1 DEBUG SET-ACTIVE-EXPIRE no + lassign [setup_replication_test $primary $replica_1 $primary_host $primary_port] primary_initial_expired replica_1_initial_expired + + # Initialize deferred clients and subscribe to keyspace notifications + foreach instance [list $primary $replica_1] { + $instance config set notify-keyspace-events KEA + $instance DEBUG SET-ACTIVE-EXPIRE no + } ####### Replication setup ####### $replica_1 replicaof $primary_host $primary_port wait_for_condition 50 100 { @@ -2153,9 +2093,7 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] assert_equal -1 [$instance HTTL myhash FIELDS 1 f3] assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] - assert_equal "v1" [$instance HGET myhash f1] - assert_equal "v2" [$instance HGET myhash f2] - assert_equal "v3" [$instance HGET myhash f3] + assert_equal "v1 v2 v3" [$instance HMGET myhash f1 f2 f3] assert_equal 3 [$instance HLEN myhash] } @@ -2200,9 +2138,7 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal $f2_exp [$instance HEXPIRETIME myhash FIELDS 1 f2] assert_equal -2 [$instance HTTL myhash FIELDS 1 f1] assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [$instance info keyspace]] keys=%d] - assert_equal "" [$instance HGET myhash f1] - assert_equal "v2" [$instance HGET myhash f2] - assert_equal "v3" [$instance HGET myhash f3] + assert_equal "{} v2 v3" [$instance HMGET myhash f1 f2 f3] assert_equal 3 [$instance HLEN myhash] } @@ -2218,17 +2154,14 @@ start_server {tags {"hashexpire external:skip"}} { # Verify final state foreach instance [list $primary $replica_1] { assert_equal 2 [$instance HLEN myhash] - assert_equal "" [$instance HGET myhash f1] - assert_equal "" [$instance HGET myhash f2] - assert_equal "v3" [$instance HGET myhash f3] + assert_equal "{} {} v3" [r HMGET myhash f1 f2 f3] } foreach rd [list $rd_replica_1 $rd_primary] { - assert_keyevent_pattern $rd hexpire myhash - assert_keyevent_pattern $rd hexpire myhash + assert_keyevent_patterns $rd myhash hexpire hexpire } - assert_keyevent_pattern $rd_replica_1 hexpired myhash - assert_keyevent_pattern $rd_primary hdel myhash + assert_keyevent_patterns $rd_replica_1 myhash hexpired + assert_keyevent_patterns $rd_primary myhash hdel $rd_replica_1 close $rd_primary close @@ -2296,7 +2229,7 @@ start_cluster 3 0 {tags {"cluster mytest external:skip"} overrides {cluster-node R 1 HGETEX $key EX 0 FIELDS 1 f1 # Veridy expiration - assert_keyevent_pattern $rd hexpired "{$key}" + assert_keyevent_patterns $rd "{$key}" hexpired assert_equal 2 [R 1 HLEN $key] assert_equal "" [R 1 HGET $key f1] assert_equal -2 [R 1 HTTL $key FIELDS 1 f1] @@ -2318,8 +2251,7 @@ start_server {tags {"hashexpire external:skip"}} { # Verify initial TTL state set mem_before [r MEMORY USAGE myhash] - assert_equal "v1" [r HGET myhash f1] - assert_equal "v2" [r HGET myhash f2] + assert_equal "v1 v2" [r HMGET myhash f1 f2] assert_morethan [r HTTL myhash FIELDS 1 f1] 100 assert_equal -1 [r HTTL myhash FIELDS 1 f2] assert_equal 2 [r HLEN myhash] @@ -2337,8 +2269,7 @@ start_server {tags {"hashexpire external:skip"}} { # Verify field values and TTLs are preserved set memory_after [r MEMORY USAGE $newhash] - assert_equal "v1" [r HGET $newhash f1] - assert_equal "v2" [r HGET $newhash f2] + assert_equal "v1 v2" [r HMGET $newhash f1 f2] assert_morethan [r HTTL $newhash FIELDS 1 f1] 100 assert_equal -1 [r HTTL $newhash FIELDS 1 f2] assert_equal 2 [r HLEN $newhash] @@ -2364,9 +2295,7 @@ start_server {tags {"hashexpire external:skip"}} { # Verify initial TTL state set mem_before [r MEMORY USAGE myhash] - assert_equal "v1" [r HGET myhash f1] - assert_equal "v3" [r HGET myhash f3] - assert_equal "v4" [r HGET myhash f4] + assert_equal "v1 v3 v4" [r HMGET myhash f1 f3 f4] assert_morethan [r HTTL myhash FIELDS 1 f1] 100 assert_morethan [r HTTL myhash FIELDS 1 f3] 0 assert_equal -1 [r HTTL myhash FIELDS 1 f4] @@ -2377,9 +2306,7 @@ start_server {tags {"hashexpire external:skip"}} { r copy myhash newhash1 # Verify myhash is the same - assert_equal "v1" [r HGET myhash f1] - assert_equal "v3" [r HGET myhash f3] - assert_equal "v4" [r HGET myhash f4] + assert_equal "v1 v3 v4" [r HMGET myhash f1 f3 f4] assert_morethan [r HTTL myhash FIELDS 1 f1] 100 assert_morethan [r HTTL myhash FIELDS 1 f3] 0 assert_equal -1 [r HTTL myhash FIELDS 1 f4] @@ -2387,9 +2314,7 @@ start_server {tags {"hashexpire external:skip"}} { # Verify new hash got same values set mem_after [r MEMORY USAGE myhash] - assert_equal "v1" [r HGET newhash1 f1] - assert_equal "v3" [r HGET newhash1 f3] - assert_equal "v4" [r HGET newhash1 f4] + assert_equal "v1 v3 v4" [r HMGET myhash f1 f3 f4] assert_morethan [r HTTL newhash1 FIELDS 1 f1] 100 assert_morethan [r HTTL newhash1 FIELDS 1 f3] 0 assert_equal -1 [r HTTL newhash1 FIELDS 1 f4] @@ -2403,10 +2328,8 @@ start_server {tags {"hashexpire external:skip"}} { # Wait for original TTL to expire in copy after 2000 - assert_equal "v1" [r HGET newhash1 f1] - assert_equal "" [r HGET newhash1 f3] - assert_equal "v1" [r HGET myhash f1] - assert_equal "v3" [r HGET myhash f3] + assert_equal "v1 {}" [r HMGET newhash1 f1 f3] + assert_equal "v1 v3" [r HMGET myhash f1 f3] r HSETEX myhash EX 2 FIELDS 1 f3 v3 # Create second copy @@ -2417,10 +2340,8 @@ start_server {tags {"hashexpire external:skip"}} { # Wait for original hash TTL to expire after 2000 - assert_equal "v1" [r HGET myhash f1] - assert_equal "" [r HGET myhash f3] - assert_equal "v1" [r HGET newhash2 f1] - assert_equal "v3" [r HGET newhash2 f3] + assert_equal "v1 {}" [r HMGET myhash f1 f3] + assert_equal "v1 v3" [r HMGET newhash2 f1 f3] # Re-enable active expiry r DEBUG SET-ACTIVE-EXPIRE yes } {OK} {needs:debug} @@ -2442,8 +2363,7 @@ start_server {tags {"hashexpire external:skip"}} { set "hashtable" [r OBJECT ENCODING myhash] # Verify field values are preserved - assert_equal "v1" [r HGET myhash f1] - assert_equal "v2" [r HGET myhash f2] + assert_equal "v1 v2" [r HMGET myhash f1 f2] # Veridy expiry assert_morethan [r HTTL myhash FIELDS 1 f1] 100 assert_equal -1 [r HTTL myhash FIELDS 1 f2] @@ -2515,9 +2435,7 @@ start_server {tags {"hashexpire external:skip"}} { assert_equal 0 [r HLEN myhash] assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] - assert_keyevent_pattern $rd hset myhash - assert_keyevent_pattern $rd hexpire myhash - assert_keyevent_pattern $rd expire myhash + assert_keyevent_patterns $rd myhash hset hexpire expire $rd close # Re-enable active expiry r DEBUG SET-ACTIVE-EXPIRE yes @@ -2547,10 +2465,8 @@ start_server {tags {"hashexpire external:skip"}} { } assert_match {1} [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] assert_equal 1 [r EXISTS myhash] - assert_equal "v2" [r HGET myhash f2] - assert_equal "v3" [r HGET myhash f3] - assert_keyevent_pattern $rd hset myhash - assert_keyevent_pattern $rd hexpire myhash + assert_equal "{} v2 v3" [r HMGET myhash f1 f2 f3] + assert_keyevent_patterns $rd myhash hset hexpire $rd close # Re-enable active expiry r DEBUG SET-ACTIVE-EXPIRE yes @@ -2581,9 +2497,7 @@ start_server {tags {"hashexpire external:skip"}} { fail "myhash still exist" } - assert_equal "" [r HGET myhash f1] - assert_equal "" [r HGET myhash f2] - assert_equal "" [r HGET myhash f3] + assert_equal "{} {} {}" [r HMGET myhash f1 f2 f3] assert_match "" [scan [regexp -inline {keys\=([\d]*)} [r info keyspace]] keys=%d] assert_equal 0 [r HLEN myhash] # Re-enable active expiry @@ -2635,7 +2549,7 @@ tags {"aof external:skip"} { r config set appendonly yes r config set appendfsync always - # Create hash with 1short, long and no expired fields + # Create hash with 1 short, long and no expired fields set long_expire [expr {[clock seconds] + 1000000}] # Create 10 fields with long expiry for {set i 1} {$i <= 10} {incr i} { @@ -2727,4 +2641,4 @@ tags {"aof external:skip"} { r DEBUG SET-ACTIVE-EXPIRE yes } {OK} {needs:debug} } -} +} \ No newline at end of file From e4d1792fa66c69e6cc9e8ece8bb555975981fa5d Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 15 Jul 2025 18:18:56 +0300 Subject: [PATCH 096/119] fix handling of unmoved defrag allocation Signed-off-by: Ran Shidlansik --- src/vset.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/vset.c b/src/vset.c index 9a7e2e73e6..57d7c74a0b 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1974,15 +1974,20 @@ static size_t vsetBucketDefrag_VECTOR(vsetBucket **bucket, size_t cursor, void * UNUSED(cursor); pVector *pv = vsetBucketVector(*bucket); pv = defragfn(pv); - *bucket = vsetBucketFromVector(pv); + if (pv) + *bucket = vsetBucketFromVector(pv); return 0; } static size_t vsetBucketDefrag_HASHTABLE(vsetBucket **bucket, size_t cursor, void *(*defragfn)(void *)) { hashtable *ht = vsetBucketHashtable(*bucket); if (cursor == 0) { - ht = hashtableDefragTables(ht, defragfn); - *bucket = vsetBucketFromHashtable(ht); + /* First time we enter this hashtable, defrag the tables first. */ + hashtable *new_ht = hashtableDefragTables(ht, defragfn); + if (new_ht) { + ht = new_ht; + *bucket = vsetBucketFromHashtable(ht); + } } return hashtableScanDefrag(ht, cursor, NULL, NULL, defragfn, 0); } From 0609a902e2d3b37e7887bbd6a5a4de4195db7e99 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 16 Jul 2025 08:49:01 +0300 Subject: [PATCH 097/119] Fix the hashtable update entry. In case the entry pointer was changed, we will need to remove and add a new entry without rehashing. Signed-off-by: Ran Shidlansik --- src/unit/test_files.h | 4 +- src/unit/test_vset.c | 107 +++++++++++++++++++++++++++++++++++++++--- src/vset.c | 13 ++++- 3 files changed, 115 insertions(+), 9 deletions(-) diff --git a/src/unit/test_files.h b/src/unit/test_files.h index 57ae05e456..ab2b30643a 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -202,6 +202,8 @@ int test_valkey_strtod(int argc, char **argv, int flags); int test_vector(int argc, char **argv, int flags); int test_vset_add_and_iterate(int argc, char **argv, int flags); int test_vset_large_batch_same_expiry(int argc, char **argv, int flags); +int test_vset_large_batch_update_entry_same_expiry(int argc, char **argv, int flags); +int test_vset_large_batch_update_entry_multiple_expiries(int argc, char **argv, int flags); int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags); int test_vset_add_and_remove_all(int argc, char **argv, int flags); int test_vset_defrag(int argc, char **argv, int flags); @@ -266,7 +268,7 @@ unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}}; unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}}; unitTest __test_valkey_strtod_c[] = {{"test_valkey_strtod", test_valkey_strtod}, {NULL, NULL}}; unitTest __test_vector_c[] = {{"test_vector", test_vector}, {NULL, NULL}}; -unitTest __test_vset_c[] = {{"test_vset_add_and_iterate", test_vset_add_and_iterate}, {"test_vset_large_batch_same_expiry", test_vset_large_batch_same_expiry}, {"test_vset_iterate_multiple_expiries", test_vset_iterate_multiple_expiries}, {"test_vset_add_and_remove_all", test_vset_add_and_remove_all}, {"test_vset_defrag", test_vset_defrag}, {"test_vset_fuzzer", test_vset_fuzzer}, {NULL, NULL}}; +unitTest __test_vset_c[] = {{"test_vset_add_and_iterate", test_vset_add_and_iterate}, {"test_vset_large_batch_same_expiry", test_vset_large_batch_same_expiry}, {"test_vset_large_batch_update_entry_same_expiry", test_vset_large_batch_update_entry_same_expiry}, {"test_vset_large_batch_update_entry_multiple_expiries", test_vset_large_batch_update_entry_multiple_expiries}, {"test_vset_iterate_multiple_expiries", test_vset_iterate_multiple_expiries}, {"test_vset_add_and_remove_all", test_vset_add_and_remove_all}, {"test_vset_defrag", test_vset_defrag}, {"test_vset_fuzzer", test_vset_fuzzer}, {NULL, NULL}}; unitTest __test_ziplist_c[] = {{"test_ziplistCreateIntList", test_ziplistCreateIntList}, {"test_ziplistPop", test_ziplistPop}, {"test_ziplistGetElementAtIndex3", test_ziplistGetElementAtIndex3}, {"test_ziplistGetElementOutOfRange", test_ziplistGetElementOutOfRange}, {"test_ziplistGetLastElement", test_ziplistGetLastElement}, {"test_ziplistGetFirstElement", test_ziplistGetFirstElement}, {"test_ziplistGetElementOutOfRangeReverse", test_ziplistGetElementOutOfRangeReverse}, {"test_ziplistIterateThroughFullList", test_ziplistIterateThroughFullList}, {"test_ziplistIterateThroughListFrom1ToEnd", test_ziplistIterateThroughListFrom1ToEnd}, {"test_ziplistIterateThroughListFrom2ToEnd", test_ziplistIterateThroughListFrom2ToEnd}, {"test_ziplistIterateThroughStartOutOfRange", test_ziplistIterateThroughStartOutOfRange}, {"test_ziplistIterateBackToFront", test_ziplistIterateBackToFront}, {"test_ziplistIterateBackToFrontDeletingAllItems", test_ziplistIterateBackToFrontDeletingAllItems}, {"test_ziplistDeleteInclusiveRange0To0", test_ziplistDeleteInclusiveRange0To0}, {"test_ziplistDeleteInclusiveRange0To1", test_ziplistDeleteInclusiveRange0To1}, {"test_ziplistDeleteInclusiveRange1To2", test_ziplistDeleteInclusiveRange1To2}, {"test_ziplistDeleteWithStartIndexOutOfRange", test_ziplistDeleteWithStartIndexOutOfRange}, {"test_ziplistDeleteWithNumOverflow", test_ziplistDeleteWithNumOverflow}, {"test_ziplistDeleteFooWhileIterating", test_ziplistDeleteFooWhileIterating}, {"test_ziplistReplaceWithSameSize", test_ziplistReplaceWithSameSize}, {"test_ziplistReplaceWithDifferentSize", test_ziplistReplaceWithDifferentSize}, {"test_ziplistRegressionTestForOver255ByteStrings", test_ziplistRegressionTestForOver255ByteStrings}, {"test_ziplistRegressionTestDeleteNextToLastEntries", test_ziplistRegressionTestDeleteNextToLastEntries}, {"test_ziplistCreateLongListAndCheckIndices", test_ziplistCreateLongListAndCheckIndices}, {"test_ziplistCompareStringWithZiplistEntries", test_ziplistCompareStringWithZiplistEntries}, {"test_ziplistMergeTest", test_ziplistMergeTest}, {"test_ziplistStressWithRandomPayloadsOfDifferentEncoding", test_ziplistStressWithRandomPayloadsOfDifferentEncoding}, {"test_ziplistCascadeUpdateEdgeCases", test_ziplistCascadeUpdateEdgeCases}, {"test_ziplistInsertEdgeCase", test_ziplistInsertEdgeCase}, {"test_ziplistStressWithVariableSize", test_ziplistStressWithVariableSize}, {"test_BenchmarkziplistFind", test_BenchmarkziplistFind}, {"test_BenchmarkziplistIndex", test_BenchmarkziplistIndex}, {"test_BenchmarkziplistValidateIntegrity", test_BenchmarkziplistValidateIntegrity}, {"test_BenchmarkziplistCompareWithString", test_BenchmarkziplistCompareWithString}, {"test_BenchmarkziplistCompareWithNumber", test_BenchmarkziplistCompareWithNumber}, {"test_ziplistStress__ziplistCascadeUpdate", test_ziplistStress__ziplistCascadeUpdate}, {NULL, NULL}}; unitTest __test_zipmap_c[] = {{"test_zipmapIterateWithLargeKey", test_zipmapIterateWithLargeKey}, {"test_zipmapIterateThroughElements", test_zipmapIterateThroughElements}, {NULL, NULL}}; unitTest __test_zmalloc_c[] = {{"test_zmallocAllocReallocCallocAndFree", test_zmallocAllocReallocCallocAndFree}, {"test_zmallocAllocZeroByteAndFree", test_zmallocAllocZeroByteAndFree}, {NULL, NULL}}; diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c index e673bbe36a..ea912622f8 100644 --- a/src/unit/test_vset.c +++ b/src/unit/test_vset.c @@ -20,19 +20,21 @@ static mock_entry *mockCreateEntry(const char *keystr, long long expiry) { return e; } +static void mockFreeEntry(void *entry) { + // printf("mockFreeEntry: %p\n", entry); + entryFree(entry); +} + static mock_entry *mockEntryUpdate(mock_entry *entry, long long expiry) { - return entryUpdate(entry, NULL, expiry); + mock_entry *new_entry = entryCreate(entryGetField(entry), sdsdup(entryGetValue(entry)), expiry); + entryFree(entry); + return new_entry; } static long long mockGetExpiry(const void *entry) { return entryGetExpiry(entry); } -static void mockFreeEntry(void *entry) { - // printf("mockFreeEntry: %p\n", entry); - entryFree(entry); -} - int test_vset_add_and_iterate(int argc, char **argv, int flags) { (void)argc; (void)argv; @@ -120,6 +122,99 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { return 0; } +int test_vset_large_batch_update_entry_same_expiry(int argc, char **argv, int flags) { + (void)argc; + (void)argv; + (void)flags; + + vset set; + vsetInit(&set); + + const long long expiry_time = 1000LL; + const unsigned int total_entries = 1000; + + mock_entry *entries[total_entries]; + + for (unsigned int i = 0; i < total_entries; i++) { + char key_buf[32]; + snprintf(key_buf, sizeof(key_buf), "entry_%d", i); + entries[i] = mockCreateEntry(key_buf, expiry_time); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i])); + } + // Verify set is not empty + TEST_ASSERT(!vsetIsEmpty(&set)); + + // Now iterate and replace all entries + for (unsigned int i = 0; i < total_entries; i++) { + mock_entry *old_entry = entries[i]; + entries[i] = mockEntryUpdate(entries[i], expiry_time); + TEST_ASSERT(vsetUpdateEntry(&set, mockGetExpiry, old_entry, entries[i], expiry_time, expiry_time)); + } + + for (unsigned int i = 0; i < total_entries; i++) { + TEST_ASSERT(vsetRemoveEntry(&set, mockGetExpiry, entries[i])); + } + + // Verify set is empty + TEST_ASSERT(vsetIsEmpty(&set)); + + // Cleanup + for (unsigned int i = 0; i < total_entries; i++) { + mockFreeEntry(entries[i]); + } + + TEST_PRINT_INFO("Inserted, updated and deleted %d entries with same expiry", total_entries); + return 0; +} + +int test_vset_large_batch_update_entry_multiple_expiries(int argc, char **argv, int flags) { + (void)argc; + (void)argv; + (void)flags; + const unsigned int total_entries = 1000; + + vset set; + vsetInit(&set); + + // Prepare entries with mixed expiry times, some duplicates + mock_entry *entries[total_entries]; + + // Initialize keys + for (unsigned int i = 0; i < total_entries; i++) { + char key_buf[32]; + snprintf(key_buf, sizeof(key_buf), "entry_%d", i); + long long expiry_time = rand() % 10000; + entries[i] = mockCreateEntry(key_buf, expiry_time); + TEST_ASSERT(vsetAddEntry(&set, mockGetExpiry, entries[i])); + } + // Verify set is not empty + TEST_ASSERT(!vsetIsEmpty(&set)); + + // Now iterate and replace all entries + for (unsigned int i = 0; i < total_entries; i++) { + mock_entry *old_entry = entries[i]; + long long old_expiry = entryGetExpiry(entries[i]); + long long new_expiry = old_expiry + rand() % 100000; + entries[i] = mockEntryUpdate(entries[i], new_expiry); + TEST_ASSERT(vsetUpdateEntry(&set, mockGetExpiry, old_entry, entries[i], old_expiry, new_expiry)); + } + + for (unsigned int i = 0; i < total_entries; i++) { + TEST_ASSERT(vsetRemoveEntry(&set, mockGetExpiry, entries[i])); + } + + // Verify set is empty + TEST_ASSERT(vsetIsEmpty(&set)); + + // Cleanup + for (unsigned int i = 0; i < total_entries; i++) { + mockFreeEntry(entries[i]); + } + + TEST_PRINT_INFO("Inserted, updated and deleted %d entries with different expiry", total_entries); + return 0; +} + int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { (void)argc; (void)argv; diff --git a/src/vset.c b/src/vset.c index 57d7c74a0b..5d3a53a082 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1581,12 +1581,21 @@ static inline vsetBucket *vsetBucketUpdateEntry_HASHTABLE(vsetBucket *bucket, vs UNUSED(old_expiry); UNUSED(new_expiry); + /* In this case no need to change anything. */ + if (old_entry == new_entry) + return bucket; + + hashtablePosition pos; hashtable *ht = vsetBucketHashtable(bucket); - void **ref = hashtableFindRef(ht, old_entry); + /* We do a two stage pop in order to avoid rehashing. */ + void **ref = hashtableTwoPhasePopFindRef(ht, old_entry, &pos); if (!ref) { + /* In case no entry found, the rehashing did not pause, so it is safe to return. */ return vsetBucketFromNone(); } else { - *ref = new_entry; + /* We know for sure the two entries are not the same, so it is safe to add the new and remove the old */ + assert(hashtableAdd(ht, new_entry)); + hashtableTwoPhasePopDelete(ht, &pos); } return bucket; } From b01769aa5653f6f3ae835faf039cb0de6c3ef355 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 17 Jul 2025 12:18:17 +0300 Subject: [PATCH 098/119] use hashtableReplaceReallocatedEntry in vsetBucketUpdateEntry_HASHTABLE Signed-off-by: Ran Shidlansik --- src/vset.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/vset.c b/src/vset.c index 5d3a53a082..1c5dde32c1 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1585,18 +1585,10 @@ static inline vsetBucket *vsetBucketUpdateEntry_HASHTABLE(vsetBucket *bucket, vs if (old_entry == new_entry) return bucket; - hashtablePosition pos; hashtable *ht = vsetBucketHashtable(bucket); /* We do a two stage pop in order to avoid rehashing. */ - void **ref = hashtableTwoPhasePopFindRef(ht, old_entry, &pos); - if (!ref) { - /* In case no entry found, the rehashing did not pause, so it is safe to return. */ + if (!hashtableReplaceReallocatedEntry(ht, old_entry, new_entry)) return vsetBucketFromNone(); - } else { - /* We know for sure the two entries are not the same, so it is safe to add the new and remove the old */ - assert(hashtableAdd(ht, new_entry)); - hashtableTwoPhasePopDelete(ht, &pos); - } return bucket; } From 4cc13d4bc5c7e96f348beda67e10f282865b34ce Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 17 Jul 2025 12:23:56 +0300 Subject: [PATCH 099/119] Revert "use hashtableReplaceReallocatedEntry in vsetBucketUpdateEntry_HASHTABLE" This reverts commit b01769aa5653f6f3ae835faf039cb0de6c3ef355. --- src/vset.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/vset.c b/src/vset.c index 1c5dde32c1..5d3a53a082 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1585,10 +1585,18 @@ static inline vsetBucket *vsetBucketUpdateEntry_HASHTABLE(vsetBucket *bucket, vs if (old_entry == new_entry) return bucket; + hashtablePosition pos; hashtable *ht = vsetBucketHashtable(bucket); /* We do a two stage pop in order to avoid rehashing. */ - if (!hashtableReplaceReallocatedEntry(ht, old_entry, new_entry)) + void **ref = hashtableTwoPhasePopFindRef(ht, old_entry, &pos); + if (!ref) { + /* In case no entry found, the rehashing did not pause, so it is safe to return. */ return vsetBucketFromNone(); + } else { + /* We know for sure the two entries are not the same, so it is safe to add the new and remove the old */ + assert(hashtableAdd(ht, new_entry)); + hashtableTwoPhasePopDelete(ht, &pos); + } return bucket; } From 4f27725bdb91fa5a7703d265de8105f9f05eebc7 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 17 Jul 2025 19:50:58 +0300 Subject: [PATCH 100/119] comment TODO on how we operate bucket split Signed-off-by: Ran Shidlansik --- src/vset.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vset.c b/src/vset.c index 5d3a53a082..d946448dc5 100644 --- a/src/vset.c +++ b/src/vset.c @@ -808,7 +808,9 @@ static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpir pVector *pv = vsetBucketVector(bucket); rax *expiry_buckets = vsetBucketRax(parent); /* first lets sort the vector. we cannot take a decision without it. - * We set the global expiry getter so we can sort according to the provided getExpiry function. */ + * We set the global expiry getter so we can sort according to the provided getExpiry function. + * TODO: After some thought I think it might be better to avoid sorting and attempt a quickselect. just allocate a new vector with the same size. + * Than scan once and choose a pivot which is the median or avarage bucket_ts. Then move all entries smaller to the new vector. then shrink both vectors as needed. */ vsetSetExpiryGetter(getExpiry); pvSort(pv, vsetCompareEntries); vsetUnsetExpiryGetter(); From aafbb8ddef891a8486def489f3bf63d4c33a46a9 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Fri, 18 Jul 2025 09:29:09 +0300 Subject: [PATCH 101/119] typo fix Signed-off-by: Ran Shidlansik --- src/vset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vset.c b/src/vset.c index d946448dc5..6f1caf5187 100644 --- a/src/vset.c +++ b/src/vset.c @@ -810,7 +810,7 @@ static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpir /* first lets sort the vector. we cannot take a decision without it. * We set the global expiry getter so we can sort according to the provided getExpiry function. * TODO: After some thought I think it might be better to avoid sorting and attempt a quickselect. just allocate a new vector with the same size. - * Than scan once and choose a pivot which is the median or avarage bucket_ts. Then move all entries smaller to the new vector. then shrink both vectors as needed. */ + * Than scan once and choose a pivot which is the median or average bucket_ts. Then move all entries smaller to the new vector. then shrink both vectors as needed. */ vsetSetExpiryGetter(getExpiry); pvSort(pv, vsetCompareEntries); vsetUnsetExpiryGetter(); From 8d8118b9ffeb6cdd72bd6c4a4b74f9d2bad74d2d Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 24 Jul 2025 10:51:39 +0300 Subject: [PATCH 102/119] fix comment Signed-off-by: Ran Shidlansik --- src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index e548efd75a..53cf0f1f6e 100644 --- a/src/server.c +++ b/src/server.c @@ -7340,7 +7340,7 @@ __attribute__((weak)) int main(int argc, char **argv) { } /* - * The parseExtendedStringArgumentsOrReply() function performs the common validation for extended + * The parseExtendedCommandArgumentsOrReply() function performs the common validation for extended * command arguments used in STRING and HASH commands. * * Get specific command extended options - PERSIST/DEL From aecc59eaa1d2bd2e86f35a8ec87b2eb1046aab33 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Wed, 30 Jul 2025 11:43:40 +0300 Subject: [PATCH 103/119] address PR comments Signed-off-by: Ran Shidlansik --- src/vset.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vset.c b/src/vset.c index 6f1caf5187..830576891f 100644 --- a/src/vset.c +++ b/src/vset.c @@ -257,7 +257,7 @@ pVector *pvInsert(pVector *pv, void *elem, uint32_t pos) { * The updated pVector after removal. * Returns NULL if the last element was removed and the vector was freed. */ pVector *pvRemoveAt(pVector *pv, uint32_t idx) { - if (!pv || pv->len == 0) return pv; + assert(pv && pv->len > 0); assert(idx < pv->len); if (pv->len == 1) { /* Last element being removed; delete vector */ @@ -303,7 +303,7 @@ bool pvRemove(pVector **ppv, void *elem) { * A pointer to the element at the given index. * Returns NULL if the vector is NULL or the index is out of bounds. */ void *pvGet(pVector *vec, uint32_t idx) { - if (!vec || idx >= vec->len) return NULL; + assert(vec && idx < vec->len); return vec->data[idx]; } From aa3dfe31594c9e3dc0dd76337e554b10fa4cba36 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 31 Jul 2025 19:40:24 +0300 Subject: [PATCH 104/119] Update src/vset.c --- src/vset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vset.c b/src/vset.c index 830576891f..5931940f53 100644 --- a/src/vset.c +++ b/src/vset.c @@ -456,7 +456,7 @@ void pvSort(pVector *pv, int (*compare)(const void *a, const void *b)) { #define VSET_TAG_MASK 0x7UL #define VSET_PTR_MASK (~VSET_TAG_MASK) -// Determine bucket type +/* Determine bucket type */ static inline int vsetBucketType(vsetBucket *b) { if (b == NULL) return VSET_BUCKET_NONE; From ccda2c675943b12b46632e02c0f3a7585e01cfe7 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 31 Jul 2025 19:40:30 +0300 Subject: [PATCH 105/119] Update src/vset.c --- src/vset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vset.c b/src/vset.c index 5931940f53..f3f88ab4c4 100644 --- a/src/vset.c +++ b/src/vset.c @@ -466,7 +466,7 @@ static inline int vsetBucketType(vsetBucket *b) { return bits & VSET_TAG_MASK; } -// Access raw pointer +/* Access raw pointer */ static inline void *vsetBucketRawPtr(vsetBucket *b) { return (void *)((uintptr_t)b & VSET_PTR_MASK); } From 63b38e5ea4dc328ca95b640cccbcbba8b39f5bff Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Thu, 31 Jul 2025 19:41:18 +0300 Subject: [PATCH 106/119] Update src/vset.c --- src/vset.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/vset.c b/src/vset.c index f3f88ab4c4..9bbd5872ba 100644 --- a/src/vset.c +++ b/src/vset.c @@ -491,7 +491,6 @@ static inline void *vsetBucketSingle(vsetBucket *b) { return b; } -// Setters static inline vsetBucket *vsetBucketFromRawPtr(void *ptr, int type) { uintptr_t p = (uintptr_t)ptr; return (vsetBucket *)(p | (type & VSET_TAG_MASK)); From 293ad74f09d49172fb223dfc9a7c654b9fc7f8ce Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 3 Aug 2025 15:07:31 +0300 Subject: [PATCH 107/119] address PR comments Signed-off-by: Ran Shidlansik --- src/t_hash.c | 4 +- src/unit/test_vset.c | 25 ++- src/vset.c | 520 +++++++++++++++++++++++++++++++++---------- src/vset.h | 300 +++---------------------- 4 files changed, 450 insertions(+), 399 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index eb78e4b499..f5aff38cab 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -572,7 +572,7 @@ void hashTypeInitVolatileIterator(robj *subject, hashTypeIterator *hi) { if (hi->encoding == OBJ_ENCODING_LISTPACK) { return; } else if (hi->encoding == OBJ_ENCODING_HASHTABLE) { - vsetStart(hashTypeGetVolatileSet(subject), &hi->viter); + vsetInitIterator(hashTypeGetVolatileSet(subject), &hi->viter); } else { serverPanic("Unknown hash encoding"); } @@ -583,7 +583,7 @@ void hashTypeResetIterator(hashTypeIterator *hi) { if (!hi->volatile_items_iter) hashtableResetIterator(&hi->iter); else - vsetStop(&hi->viter); + vsetResetIterator(&hi->viter); } } diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c index ea912622f8..a1b0a7d412 100644 --- a/src/unit/test_vset.c +++ b/src/unit/test_vset.c @@ -52,7 +52,7 @@ int test_vset_add_and_iterate(int argc, char **argv, int flags) { TEST_ASSERT(!vsetIsEmpty(&set)); vsetIterator it; - vsetStart(&set, &it); + vsetInitIterator(&set, &it); void *entry; int count = 0; @@ -63,7 +63,7 @@ int test_vset_add_and_iterate(int argc, char **argv, int flags) { TEST_ASSERT(count == 2); - vsetStop(&it); + vsetResetIterator(&it); vsetClear(&set); mockFreeEntry(e1); mockFreeEntry(e2); @@ -99,7 +99,7 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { // Iterate all entries and count them vsetIterator it; - vsetStart(&set, &it); + vsetInitIterator(&set, &it); void *entry; int count = 0; @@ -110,7 +110,7 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { TEST_ASSERT(count == total_entries); // Cleanup - vsetStop(&it); + vsetResetIterator(&it); vsetClear(&set); for (int i = 0; i < total_entries; i++) { @@ -237,7 +237,7 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { } vsetIterator it; - vsetStart(&set, &it); + vsetInitIterator(&set, &it); int found[5] = {0}; int total = 0; @@ -263,7 +263,7 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { TEST_EXPECT(found[i]); } - vsetStop(&it); + vsetResetIterator(&it); vsetClear(&set); for (int i = 0; i < 5; i++) mockFreeEntry(entries[i]); @@ -321,9 +321,8 @@ long long mock_entry_get_expiry(const void *entry) { return mockGetExpiry(entry); } -int mock_entry_expire(void *entry, void *ctx) { +int mock_entry_expire(void *entry, long long now) { mock_entry *e = (mock_entry *)entry; - long long now = *(long long *)ctx; TEST_ASSERT(mock_entry_get_expiry(entry) <= now); for (int i = 0; i < mock_entry_count; i++) { if (mock_entries[i] == e) { @@ -394,7 +393,11 @@ int remove_mock_entry(vset *set) { int expire_mock_entries(vset *set, mstime_t now) { // printf("Before expired entries entries: %d\n", mock_entry_count); - vsetPopExpired(set, mockGetExpiry, mock_entry_expire, now, mock_entry_count, &now); + const int expired_max = mock_entry_count; + void *expired_entries[expired_max]; + size_t expired_count = vsetPopExpired(set, mockGetExpiry, now, expired_entries, expired_max); + for (size_t i = 0; i < expired_count; i++) + mock_entry_expire(expired_entries[i], now); // printf("After expired %zu entries left entries: %d and set is empty: %s\n", count, mock_entry_count, vsetIsEmpty(set) ? "true" : "false"); return 0; } @@ -452,12 +455,12 @@ int test_vset_defrag(int argc, char **argv, int flags) { TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); /* defrag when vector */ - for (int i = 0; i < VOLATILESET_VECTOR_BUCKET_MAX_SIZE - 1; i++) + for (int i = 0; i < 127 - 1; i++) insert_mock_entry(&set); TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); long long expiry = rand() % 10000 + 100; - for (int i = 0; i < VOLATILESET_VECTOR_BUCKET_MAX_SIZE * 2; i++) { + for (int i = 0; i < 127 * 2; i++) { insert_mock_entry_with_expiry(&set, expiry); } TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); diff --git a/src/vset.c b/src/vset.c index 830576891f..04721c1ac5 100644 --- a/src/vset.c +++ b/src/vset.c @@ -10,26 +10,224 @@ #include #include +#ifndef static_assert +#define static_assert _Static_assert +#endif + +/* + *----------------------------------------------------------------------------- + * Volatile Set - Adaptive, Expiry-aware Set Structure + *----------------------------------------------------------------------------- + * + * The `vset` is a dynamic, memory-efficient container for managing + * entries with expiry semantics. It is designed to efficiently track entries + * that expire at varying times and scales to large sets by adapting its internal + * representation as it grows or shrinks. + * + *----------------------------------------------------------------------------- + * Expiry Buckets and Pointer Tagging + *----------------------------------------------------------------------------- + * + * Internally, the `vset` maintains a single `vsetBucket*` pointer, + * which can point to different types of buckets depending on the number of + * entries and the needed resolution. The pointer is tagged using the lowest 3 bits: + * + * #define VSET_BUCKET_NONE -1 + * #define VSET_BUCKET_SINGLE 0x1ULL // pointer to single entry (odd ptr) + * #define VSET_BUCKET_VECTOR 0x2ULL // pointer to pointer vector + * #define VSET_BUCKET_HT 0x4ULL // pointer to hashtable + * #define VSET_BUCKET_RAX 0x6ULL // pointer to radix tree + * + * #define VSET_TAG_MASK 0x7ULL + * #define VSET_PTR_MASK (~VSET_TAG_MASK) + * + * IMPORTANT!!!! - All entries must have LSB set (i.e., be odd-aligned) to be compatible with !!!! + * tagging constraints. + * + *----------------------------------------------------------------------------- + * Time Bucket Management + *----------------------------------------------------------------------------- + * + * Entries are grouped into **time buckets** based on their expiry time. + * Each time bucket represents a window aligned to: + * + * #define VOLATILESET_BUCKET_INTERVAL_MIN (1 << 4) // 16ms + * #define VOLATILESET_BUCKET_INTERVAL_MAX (1 << 13) // 8192ms + * + * A time bucket key is computed by rounding the expiry timestamp up to the + * nearest aligned window using `get_bucket_ts()`. + * + *----------------------------------------------------------------------------- + * Entry Addition and Bucket Promotion + *----------------------------------------------------------------------------- + * + * When a new entry is added: + * + * 1. If the current set is `NONE`, it becomes a `SINGLE` bucket. + * 2. If the set is a `SINGLE` bucket and another entry arrives: + * -> it is promoted to a `VECTOR` bucket (sorted by expiry). + * 3. If the `VECTOR` exceeds `VOLATILESET_VECTOR_BUCKET_MAX_SIZE` (127): + * -> the set becomes a `RAX`, and existing entries are migrated. + * 4. IF the set is using RAX encoding it will locate a bucket to add the entry + * following the strategy explained below. + * + *----------------------------------------------------------------------------- + * RAX Bucket and Dynamic Splitting + *----------------------------------------------------------------------------- + * + * Each bucket in the RAX bucket corresponds to a **time window**, defined by + * its bucket timestamp (`bucket_ts`). This timestamp represents the **END** of + * the time window. Entries in the bucket must expire *before* this timestamp. + * + * Time windows are defined in granular ranges: + * - Minimum granularity: VOLATILESET_BUCKET_INTERVAL_MIN (16 ms) + * - Maximum granularity: VOLATILESET_BUCKET_INTERVAL_MAX (8192 ms) + * + * A bucket can only contain entries that: + * 1. Have expiry < bucket_ts + * 2. Do not fit into any bucket with a smaller timestamp (i.e., earlier window) + * + * The structure allows multiple encodings: + * VSET_BUCKET_SINGLE - A single pointer to one entry. + * VSET_BUCKET_VECTOR - A sorted vector of pointers (up to 127 entries). + * VSET_BUCKET_HT - A hashtable used when vectors become too dense. + * + * Bucket Timestamp (END of window): + * + * |------------------ Bucket Span ------------------| + * [window_start .................................. bucket_ts) + * + * Layout Example: + * + * Timeline: ----------> increasing time -----------> + * +--------------+-------------+---------+ + * | B0 | B1 | B2 | + * | ts=32 | ts=128 | ts=2048 | + * +--------------+-------------+---------+ + * ^ ^ ^ + * | | | + * [E1,E2] ∈ B0 [E3...E7] ∈ B1 [E8...E15] ∈ B2 + * + * All entries expire BEFORE their bucket_ts + * + * Bucket Splitting Strategy: + * ---------------------------------- + * + * When a bucket (e.g. VECTOR) becomes too dense or needs realignment: + * + * 1. Re-align to lower granularity: + * - Adjust the bucket timestamp down to a finer granularity (e.g. 16ms). + * - Only done if ALL entries still fit in the tighter window. + * - Effectively “moves” the bucket to an earlier timestamp. + * + * Example: B(ts=128, span=128ms) -> B(ts=64, span=16ms) + * + * 2. Split into two buckets: + * - Use binary search to find a “natural” boundary based on entry expiry. + * - Original bucket retains its timestamp (but holds fewer entries). + * - New bucket is inserted before the current one with its own tighter timestamp. + * + * Example: + * + * Before: + * [ Entry0 ... Entry126 ] -> B(ts=128) + * + * After Split: + * [ Entry0...Entry62 ] -> New B(ts=64) + * [ Entry63...Entry126 ] -> Original B(ts=128) + * + * 3. Convert to hashtable: + * - When no clean split is found (e.g. all entries share similar expiry), + * and realignment is not possible. + * - This allows efficient O(1) lookups even with clustered expiry values. + * + * Vector B(ts=128) -> Hashtable B(ts=128) + * + * This hierarchical design ensures: + * - Efficient memory usage (tight buckets) + * - Predictable iteration by expiry time + * - Low overhead insertions & deletions + * - Graceful promotion & demotion of bucket types + * + * NOTE: Buckets are always sorted by their `bucket_ts` in the radix tree (RAX), + * which allows efficient search for insertion/removal based on expiry. + * + *----------------------------------------------------------------------------- + * RAX Bucket Layout + *----------------------------------------------------------------------------- + * + * * RAX View with Time Keys: + * + * expiry_buckets = rax * | 0x6 + * + * +--------------------------+ + * | RAX (key = bucket_ts) | + * |--------------------------| + * | "000016" -> [entry1] | <- Vector (SINGLE->VECTOR->HT) + * | "000032" -> [entry2...] | <- Full vector, might split + * | "000048" -> [entry...] | + * +--------------------------+ + * + * * Splitting a Full Vector in RAX: + * + * Suppose vector at key "000032" has 13 entries: + * + * 1. Use binary search to find a transition point in expiry bucket_ts. + * We search the first 2 following entries which belong to different lwo granularity time windows, + * but as close as possible to the middle of the vector: + * [entry1, entry7, ..., entry13] + * ↑ + * split (first where get_bucket_ts(entry) > min_ts) + * + * 2. Create two vectors: + * bucket A -> [entry1..entry6] with key = "000032" + * bucket B -> [entry7..entry13] with key = "000048" + * + * 3. Insert both back to the RAX. + * + *----------------------------------------------------------------------------- + * Bucket Lifecycle + *----------------------------------------------------------------------------- + * + * NONE + * | + * v + * SINGLE (1 entry) + * | + * v + * VECTOR (sorted, up to 127) + * | + * v + * RAX (holds multiple buckets, keyed by each bucket's end timestamp) + * Bucket types within a RAX: + * + * SINGLE + * | + * v + * VECTOR (sorted, up to 127, can split + * | into multiple vectors) + * | + * v + * HASHTABLE (only when a vector can't split) + */ + /************************************************************************************************************* * pVector Implementation *************************************************************************************************************/ -#define PV_CARD_BITS 30 -#define PV_ALLOC_BITS 34 -#define PV_MAX_ELEMENTS ((1ULL << PV_CARD_BITS) - 1) -#define PV_HEADER_SIZE (sizeof(pVector)) -#define PV_ELEM_SIZE (sizeof(void *)) -#define PV_ALLOC(pv) (pv ? pv->alloc : 0) -#define PV_LEN(pv) (pv ? pv->len : 0) -#define PV_USED_SIZE(pv) (pv ? (PV_HEADER_SIZE + (pvLen(pv)) * PV_ELEM_SIZE) : 0) +static const size_t PV_CARD_BITS = 30; +static const size_t PV_ALLOC_BITS = 34; /* Custom vector structure with embedded allocation and length counters */ typedef struct { - uint64_t len : 30; /* Number of elements (cardinality) */ - uint64_t alloc : 34; /* Allocated memory (zmalloc_size of the current vector allocation) */ - void *data[]; /* Flexible array member */ + uint64_t len : PV_CARD_BITS; /* Number of elements (cardinality) */ + uint64_t alloc : PV_ALLOC_BITS; /* Allocated memory (zmalloc_size of the current vector allocation) */ + void *data[]; /* Flexible array member */ } pVector; +static const size_t PV_HEADER_SIZE = (sizeof(pVector)); + + /* Returns the number of elements currently stored in the pVector. * * Arguments: @@ -38,8 +236,22 @@ typedef struct { * Return: * The number of elements in the vector. * Note that a NULL is a !!!valid!!! vector - returns 0 if the vector is NULL. */ -static inline uint32_t pvLen(pVector *vec) { - return PV_LEN(vec); +static inline uint32_t +pvLen(pVector *vec) { + return (vec ? vec->len : 0); +} + +/* Returns the number of bytes allocated by the os to store the vector. + * This value is equal to the usable size returned by calling zrealloc_usable. + * + * Arguments: + * vec - The pVector to query. + * + * Return: + * The allocation size of the vector + * Note that a NULL is a !!!valid!!! vector - returns 0 if the vector is NULL. */ +static inline uint32_t pvAlloc(pVector *vec) { + return (vec ? vec->alloc : 0); } /* Ensures that a pVector has enough capacity to hold additional elements. @@ -58,15 +270,18 @@ static inline uint32_t pvLen(pVector *vec) { * * Return: * A pointer to the resized (or newly allocated) pVector with sufficient capacity. - * Returns NULL only if the allocation fails. * * Note: * The `additional` is the number of *additional* elements beyond the current length. * This function does not modify the vector's logical length (`len`), only its allocation. */ -pVector *pvMakeRoomFor(pVector *pv, size_t additional) { +static pVector *pvMakeRoomFor(pVector *pv, size_t additional) { if (additional == 0) return pv; - size_t required = PV_HEADER_SIZE + (PV_LEN(pv) + additional) * PV_ELEM_SIZE; - if (PV_ALLOC(pv) >= required) return pv; + /* Make sure we will have the capacity to store the extra number of elements */ + assert(pvLen(pv) + additional <= (1UL << PV_CARD_BITS) - 1); + + size_t required = PV_HEADER_SIZE + (pvLen(pv) + additional) * sizeof(void *); + + if (pvAlloc(pv) >= required) return pv; if (!pv) { pv = zmalloc(required); @@ -74,6 +289,8 @@ pVector *pvMakeRoomFor(pVector *pv, size_t additional) { } else { pv = zrealloc_usable(pv, required, &required); } + /* Make sure we have the capacity to save the alloation size */ + assert(required <= (1ULL << PV_ALLOC_BITS) - 1); pv->alloc = required; return pv; } @@ -93,8 +310,7 @@ pVector *pvMakeRoomFor(pVector *pv, size_t additional) { * pv - A pointer to the `pVector` to shrink. * * Return: - * A potentially reallocated `pVector` with minimized memory usage, - * or `NULL` if the input was `NULL`. + * A potentially reallocated `pVector` with minimized memory usage. * * This function does not change the logical contents of the vector. * It only adjusts the allocated memory footprint. If no reallocation @@ -104,18 +320,18 @@ pVector *pvMakeRoomFor(pVector *pv, size_t additional) { * pVector *vec = pvNew(); * // After some insertions and deletions * vec = pvShrinkToFit(vec); */ -pVector *pvShrinkToFit(pVector *pv) { +static pVector *pvShrinkToFit(pVector *pv) { if (!pv) return NULL; - size_t used = PV_ALLOC(pv); - size_t required = pvLen(pv) == 0 ? 0 : PV_HEADER_SIZE + pvLen(pv) * PV_ELEM_SIZE; + size_t used = pvAlloc(pv); + size_t required = pvLen(pv) == 0 ? 0 : PV_HEADER_SIZE + pvLen(pv) * sizeof(void *); if (used > required) { if (!required) { zfree(pv); return NULL; } - pv = zrealloc_usable(pv, used, &required); + pv = zrealloc_usable(pv, required, &required); pv->alloc = required; } return pv; @@ -144,11 +360,6 @@ pVector *pvShrinkToFit(pVector *pv) { * * Return: * - A new pVector containing the right split [split_index..len-1]. - * - `NULL` in the following cases: - * • The input vector is `NULL`. - * • The input vector has only 1 or fewer elements (nothing to split). - * • The `split_index` is equal to the vector length (all elements stay in the left part). - * • The `split_index` is such that the right part would have 0 elements. * * Side effects: * - The original vector pointer (`*pv_ptr`) is modified to point to the @@ -171,15 +382,21 @@ pVector *pvShrinkToFit(pVector *pv) { pVector *pvSplit(pVector **pv_ptr, uint32_t split_index) { pVector *pv = *pv_ptr; - // Handle edge cases: null or empty - if (!pv || pv->len <= 1) return NULL; + /* Handle edge cases: */ - // If no valid split found, return NULL (entire vector is one block) - if (split_index == pv->len) return NULL; + /* 1. null vector, ot split index which includes the entire vector in the left size + * Should simply return a NULL vector (right size). + */ + if (!pv || split_index >= pvLen(pv)) return NULL; + + /* 2. zero split index means no left side. just return the existing vector and zero the input vector. */ + if (split_index == 0) { + *pv_ptr = NULL; + return pv; + } // Number of elements for the right half uint64_t right_len = pv->len - split_index; - if (right_len == 0) return NULL; // Allocate new vector for right part size_t item_bytes = sizeof(void *); @@ -194,7 +411,7 @@ pVector *pvSplit(pVector **pv_ptr, uint32_t split_index) { // Shrink original vector pv->len = split_index; - *pv_ptr = pvShrinkToFit(pv); // Optional: shrink in-place to reduce memory + *pv_ptr = pvShrinkToFit(pv); return right; } @@ -228,22 +445,48 @@ pVector *pvNew(uint32_t capacity) { * Arguments: * pv - The pVector to insert into (can be NULL). * elem - The pointer to be inserted. - * pos - The index at which to insert the element (must be ≤ pv->len). + * idx - The index at which to insert the element (must be ≤ pv->len). * * Return: * The updated pVector with the element inserted. */ -pVector *pvInsert(pVector *pv, void *elem, uint32_t pos) { +pVector *pvInsertAt(pVector *pv, void *elem, uint32_t idx) { + assert(idx <= pv->len); pv = pvMakeRoomFor(pv, 1); - if (pos < pv->len) { - memmove(&pv->data[pos + 1], &pv->data[pos], (pv->len - pos) * sizeof(void *)); + if (idx < pv->len) { + memmove(&pv->data[idx + 1], &pv->data[idx], (pv->len - idx) * sizeof(void *)); } - pv->data[pos] = elem; + pv->data[idx] = elem; pv->len++; return pv; } +/* Finds the index of the given element in the pVector. + * + * Parameters: + * pv - The vector to search. + * elem - The element to look for (pointer equality). + * + * Returns: + * The index of the element if found; otherwise, returns pv->len (i.e., not found). + * + * Notes: + * - This compares elements using raw pointer equality (`==`). + * - If pv is NULL or empty, returns 0 as a safe fallback. + * - Return value being equal to pv->len can be used to check for absence. */ +uint32_t pvFind(pVector *pv, void *elem) { + if (!pv || pv->len == 0) return 0; + + for (uint32_t i = 0; i < pv->len; i++) { + if (pv->data[i] == elem) { + return i; + } + } + return pv->len; +} + + /* Removes the element at the specified index from the pVector. * * Shifts elements as necessary and optionally shrinks the vector if memory can be saved. @@ -264,7 +507,7 @@ pVector *pvRemoveAt(pVector *pv, uint32_t idx) { zfree(pv); return NULL; } else if (idx < pv->len - 1UL) - memmove(&pv->data[idx], &pv->data[idx + 1], (pv->len - idx - 1) * PV_ELEM_SIZE); + memmove(&pv->data[idx], &pv->data[idx + 1], (pv->len - idx - 1) * sizeof(void *)); pv->len--; return pvShrinkToFit(pv); } @@ -275,22 +518,23 @@ pVector *pvRemoveAt(pVector *pv, uint32_t idx) { * Updates the vector pointer in case a removal was done. * * Arguments: - * pv - A pointer to the location of the pVector to remove from. + * pv - A pointer to the pVector to remove from. * elem - The element pointer to match and remove. + * removed - A pointer to a memory location to store the result of the removal. * * Return: - * true in case a removal was made, false otherwise */ -bool pvRemove(pVector **ppv, void *elem) { - pVector *pv = *ppv; - if (!pv || pv->len == 0) return false; - - for (uint32_t i = 0; i < pv->len; i++) { - if (pv->data[i] == elem) { - *ppv = pvRemoveAt(pv, i); - return true; + * the vector after the removal attempt */ +pVector *pvRemove(pVector *pv, void *elem, bool *removed) { + bool was_removed = false; + if (pv && pvLen(pv) > 0) { + uint32_t idx = pvFind(pv, elem); + if (idx < pvLen(pv)) { + pv = pvRemoveAt(pv, idx); + was_removed = true; } } - return false; + *removed = was_removed; + return pv; } /* Retrieves the element at the specified index in the pVector. @@ -302,9 +546,9 @@ bool pvRemove(pVector **ppv, void *elem) { * Return: * A pointer to the element at the given index. * Returns NULL if the vector is NULL or the index is out of bounds. */ -void *pvGet(pVector *vec, uint32_t idx) { - assert(vec && idx < vec->len); - return vec->data[idx]; +void *pvGet(pVector *pv, uint32_t idx) { + assert(pv && idx < pvLen(pv)); + return pv->data[idx]; } /* Frees the memory used by the pVector. @@ -331,7 +575,7 @@ void pvFree(pVector *pv) { * Internally this uses pvInsert() with the current length of the vector, * effectively appending the element. */ pVector *pvPush(pVector *pv, void *elem) { - return pvInsert(pv, elem, PV_LEN(pv)); + return pvInsertAt(pv, elem, pvLen(pv)); } /* Removes and optionally returns the last element from the given pVector. @@ -344,10 +588,11 @@ pVector *pvPush(pVector *pv, void *elem) { * A (possibly reallocated) pVector with the last element removed. * * Notes: - * If the vector is empty, the behavior is to remove from index 0 (safe fallback). + * Calling this function on an empty vector will trigger assertion. * You can pass NULL for `pelem` if you don't need the removed value. */ pVector *pvPop(pVector *pv, void **pelem) { - uint32_t last_idx = PV_LEN(pv) > 0 ? PV_LEN(pv) - 1 : 0; + assert(pvLen(pv) > 0); + uint32_t last_idx = pvLen(pv) - 1; if (pelem) *pelem = pvGet(pv, last_idx); return pvRemoveAt(pv, last_idx); } @@ -365,7 +610,7 @@ pVector *pvPop(pVector *pv, void **pelem) { * Preconditions: * - idx must be valid indices within the vector. */ void pvSet(pVector *pv, uint32_t idx, void *elem) { - assert(idx < PV_LEN(pv)); + assert(idx < pvLen(pv)); pv->data[idx] = elem; } @@ -385,36 +630,12 @@ void pvSet(pVector *pv, uint32_t idx, void *elem) { * Notes: * This is a simple in-place swap that uses direct pointer assignment. */ void pvSwap(pVector *pv, uint32_t idx1, uint32_t idx2) { - assert(idx1 < PV_LEN(pv) && idx2 < PV_LEN(pv)); - void *temp = pvGet(pv, idx1); + assert(pv && pvLen(pv) > 0 && idx1 < pvLen(pv) && idx2 < pvLen(pv)); + void *temp = pv->data[idx1]; pv->data[idx1] = pv->data[idx2]; pv->data[idx2] = temp; } -/* Finds the index of the given element in the pVector. - * - * Parameters: - * pv - The vector to search. - * elem - The element to look for (pointer equality). - * - * Returns: - * The index of the element if found; otherwise, returns pv->len (i.e., not found). - * - * Notes: - * - This compares elements using raw pointer equality (`==`). - * - If pv is NULL or empty, returns 0 as a safe fallback. - * - Return value being equal to pv->len can be used to check for absence. */ -uint32_t pvFind(pVector *pv, void *elem) { - if (!pv || pv->len == 0) return 0; - - for (uint32_t i = 0; i < pv->len; i++) { - if (pv->data[i] == elem) { - return i; - } - } - return pv->len; -} - /* Sort the elements of a pVector using a user-provided comparison function. * * This function performs an in-place sort of the elements in the given pVector. @@ -441,12 +662,19 @@ uint32_t pvFind(pVector *pv, void *elem) { * * pvSort(my_vector, cmp); */ void pvSort(pVector *pv, int (*compare)(const void *a, const void *b)) { + if (pvLen(pv) <= 1) return; qsort(pv->data, pv->len, sizeof(void *), compare); } /************************************************************************************************************* * pVector End *************************************************************************************************************/ + +#define VOLATILESET_BUCKET_INTERVAL_MAX (1LL << 13LL) // 2^13 = 8192 milliseconds +#define VOLATILESET_BUCKET_INTERVAL_MIN (1LL << 4LL) // 2^4 = 16 milliseconds + +#define VOLATILESET_VECTOR_BUCKET_MAX_SIZE 127 + #define VSET_BUCKET_NONE -1 // matching the NULL case #define VSET_BUCKET_SINGLE 0x1UL // xx1 (assuming sds) #define VSET_BUCKET_VECTOR 0x2UL // 010 @@ -456,6 +684,47 @@ void pvSort(pVector *pv, int (*compare)(const void *a, const void *b)) { #define VSET_TAG_MASK 0x7UL #define VSET_PTR_MASK (~VSET_TAG_MASK) +// Generic bucket type +typedef void vsetBucket; + +typedef struct vsetInternalIterator { + /* for rax bucket */ + raxIterator riter; + union { + /* for hashtable bucket */ + hashtableIterator hiter; + /* for vector bucket */ + uint32_t viter; + /* for single bucket */ + void *vsingle; + }; + /* the parent of the bucket we are currently iterating on */ + vsetBucket *parent_bucket; + /* the bucket we are currently iterating on */ + vsetBucket *bucket; + /* the pointer entry */ + void *entry; + /* In case of rax encoded set, this is the current iterated bucket timestamp */ + long long bucket_ts; + /* the state of the iteration */ + int iteration_state; +} vsetInternalIterator; + +/* The opaque hashtableIterator is defined as a blob of bytes. */ +static_assert(sizeof(vsetIterator) >= sizeof(vsetInternalIterator), + "Opaque iterator size"); + +/* Conversion from user-facing opaque iterator type to internal struct. */ +static inline vsetInternalIterator *iteratorFromOpaque(vsetIterator *iterator) { + return (vsetInternalIterator *)(void *)iterator; +} + +/* Conversion from user-facing opaque iterator type to internal struct. */ +static inline vsetIterator *opaqueFromIterator(vsetInternalIterator *iterator) { + return (vsetIterator *)(void *)iterator; +} + + // Determine bucket type static inline int vsetBucketType(vsetBucket *b) { if (b == NULL) return VSET_BUCKET_NONE; @@ -893,7 +1162,7 @@ static inline vsetBucket *insertToBucket_VECTOR(vsetGetExpiryFunc getExpiry, vse } else { if (pos >= 0) /* In case we are explicitly provided a position to insert place the entry there */ - return vsetBucketFromVector(pvInsert(pv, entry, pos)); + return vsetBucketFromVector(pvInsertAt(pv, entry, pos)); else /* Otherwise it is better to just push the entry to the vector with less change of memmove and reallocation. */ return vsetBucketFromVector(pvPush(pv, entry)); @@ -1009,9 +1278,10 @@ static inline vsetBucket *removeFromBucket_VECTOR(vsetGetExpiryFunc getExpiry, v new_bucket = vsetBucketFromVector(pvPop(pv, &popped_entry)); assert(popped_entry == entry); } - } else if (pvRemove(&pv, entry)) { - success = true; - new_bucket = vsetBucketFromVector(pv); + } else { + pv = pvRemove(pv, entry, &success); + if (success) + new_bucket = vsetBucketFromVector(pv); } } if (removed) *removed = success; @@ -1128,19 +1398,19 @@ static inline vsetBucket *removeFromBucket_RAX(vsetGetExpiryFunc getExpiry, vset return target; } -static inline size_t vsetBucketPopExpired_NONE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +static inline size_t vsetBucketPopExpired_NONE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { UNUSED(bucket); UNUSED(getExpiry); - UNUSED(expiryFunc); UNUSED(now); UNUSED(max_count); - UNUSED(ctx); + UNUSED(expired); return 0; } -static inline size_t vsetBucketPopExpired_SINGLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +static inline size_t vsetBucketPopExpired_SINGLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { void *entry = vsetBucketSingle(*bucket); - if (max_count && getExpiry(entry) <= now && expiryFunc(entry, ctx)) { + if (max_count && getExpiry(entry) <= now) { + expired[0] = entry; freeVsetBucket(*bucket); *bucket = vsetBucketFromNone(); return 1; @@ -1148,15 +1418,16 @@ static inline size_t vsetBucketPopExpired_SINGLE(vsetBucket **bucket, vsetGetExp return 0; } -static inline size_t vsetBucketPopExpired_VECTOR(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +static inline size_t vsetBucketPopExpired_VECTOR(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { pVector *pv = vsetBucketVector(*bucket); uint32_t len = min(pvLen(pv), max_count); uint32_t i = 0; for (; i < len; i++) { void *entry = pvGet(pv, i); /* break as soon as the expiryFunc stops us OR we reached an entry which is not expired */ - if (getExpiry(entry) > now || !(expiryFunc(entry, ctx))) + if (getExpiry(entry) > now) break; + expired[i] = entry; } pVector *new_pv = pvSplit(&pv, i); *bucket = (new_pv ? vsetBucketFromVector(new_pv) : vsetBucketFromNone()); @@ -1164,18 +1435,18 @@ static inline size_t vsetBucketPopExpired_VECTOR(vsetBucket **bucket, vsetGetExp return i; } -static inline size_t vsetBucketPopExpired_HASHTABLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +static inline size_t vsetBucketPopExpired_HASHTABLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { UNUSED(getExpiry); UNUSED(now); hashtable *ht = vsetBucketHashtable(*bucket); hashtableIterator it; void *entry = NULL; - size_t expired = 0; + size_t count = 0; hashtableInitIterator(&it, ht, HASHTABLE_ITER_SAFE); while (hashtableNext(&it, &entry)) { - if (expired < max_count && expiryFunc(entry, ctx)) { + if (count < max_count) { hashtableDelete(ht, entry); - expired++; + expired[count++] = entry; entry = NULL; } else break; @@ -1191,10 +1462,10 @@ static inline size_t vsetBucketPopExpired_HASHTABLE(vsetBucket **bucket, vsetGet assert(entry); *bucket = vsetBucketFromSingle(entry); } - return expired; + return count; } -static inline size_t vsetBucketPopExpired_RAX(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +static inline size_t vsetBucketPopExpired_RAX(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { UNUSED(getExpiry); rax *buckets = vsetBucketRax(*bucket); size_t count = 0; @@ -1216,13 +1487,13 @@ static inline size_t vsetBucketPopExpired_RAX(vsetBucket **bucket, vsetGetExpiry break; switch (time_bucket_type) { case VSET_BUCKET_SINGLE: - count += vsetBucketPopExpired_SINGLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + count += vsetBucketPopExpired_SINGLE(&time_bucket, vsetGetExpiryZero, now, expired + count, max_count - count); break; case VSET_BUCKET_VECTOR: - count += vsetBucketPopExpired_VECTOR(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + count += vsetBucketPopExpired_VECTOR(&time_bucket, vsetGetExpiryZero, now, expired + count, max_count - count); break; case VSET_BUCKET_HT: - count += vsetBucketPopExpired_HASHTABLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); + count += vsetBucketPopExpired_HASHTABLE(&time_bucket, vsetGetExpiryZero, now, expired + count, max_count - count); break; default: panic("Cannot expire entries from bucket which is not single, vector or hashtable"); @@ -1247,13 +1518,13 @@ static inline size_t vsetBucketPopExpired_RAX(vsetBucket **bucket, vsetGetExpiry return count; } -static int vsetBucketNext_NONE(vsetIterator *it, void **entryptr) { +static int vsetBucketNext_NONE(vsetInternalIterator *it, void **entryptr) { UNUSED(it); UNUSED(entryptr); return 0; } -static inline int vsetBucketNext_SINGLE(vsetIterator *it, void **entryptr) { +static inline int vsetBucketNext_SINGLE(vsetInternalIterator *it, void **entryptr) { bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); if (init_bucket_scan) { it->iteration_state = VSET_BUCKET_SINGLE; @@ -1264,7 +1535,7 @@ static inline int vsetBucketNext_SINGLE(vsetIterator *it, void **entryptr) { return 0; } -static inline int vsetBucketNext_VECTOR(vsetIterator *it, void **entryptr) { +static inline int vsetBucketNext_VECTOR(vsetInternalIterator *it, void **entryptr) { bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); pVector *pv = vsetBucketVector(it->bucket); if (init_bucket_scan) { @@ -1282,7 +1553,7 @@ static inline int vsetBucketNext_VECTOR(vsetIterator *it, void **entryptr) { return 1; } -static inline int vsetBucketNext_HASHTABLE(vsetIterator *it, void **entryptr) { +static inline int vsetBucketNext_HASHTABLE(vsetInternalIterator *it, void **entryptr) { bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); hashtable *ht = vsetBucketHashtable(it->bucket); if (init_bucket_scan) { @@ -1297,7 +1568,7 @@ static inline int vsetBucketNext_HASHTABLE(vsetIterator *it, void **entryptr) { return 1; } -static inline int vsetBucketNext_RAX(vsetIterator *it, void **entryptr) { +static inline int vsetBucketNext_RAX(vsetInternalIterator *it, void **entryptr) { bool init_bucket_scan = (it->iteration_state == VSET_BUCKET_NONE); if (init_bucket_scan) { /* set myself as the parent bucket */ @@ -1311,7 +1582,7 @@ static inline int vsetBucketNext_RAX(vsetIterator *it, void **entryptr) { it->bucket_ts = decodeExpiryKey(it->riter.key); it->bucket = it->riter.data; it->iteration_state = VSET_BUCKET_NONE; - return vsetNext(it, entryptr); + return vsetNext(opaqueFromIterator(it), entryptr); } else { /* We currently do not support nested RAX buckets */ it->parent_bucket = vsetBucketFromNone(); @@ -1757,24 +2028,24 @@ bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, vo * * Return: * Number of expired entries removed (size_t). */ -size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { +size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { vsetBucket *bucket = *set; int bucket_type = vsetBucketType(bucket); switch (bucket_type) { case VSET_BUCKET_NONE: - return vsetBucketPopExpired_NONE(set, getExpiry, expiryFunc, now, max_count, ctx); + return vsetBucketPopExpired_NONE(set, getExpiry, now, expired, max_count); break; case VSET_BUCKET_RAX: - return vsetBucketPopExpired_RAX(set, getExpiry, expiryFunc, now, max_count, ctx); + return vsetBucketPopExpired_RAX(set, getExpiry, now, expired, max_count); break; case VSET_BUCKET_SINGLE: - return vsetBucketPopExpired_SINGLE(set, getExpiry, expiryFunc, now, max_count, ctx); + return vsetBucketPopExpired_SINGLE(set, getExpiry, now, expired, max_count); break; case VSET_BUCKET_VECTOR: - return vsetBucketPopExpired_VECTOR(set, getExpiry, expiryFunc, now, max_count, ctx); + return vsetBucketPopExpired_VECTOR(set, getExpiry, now, expired, max_count); break; case VSET_BUCKET_HT: - return vsetBucketPopExpired_HASHTABLE(set, getExpiry, expiryFunc, now, max_count, ctx); + return vsetBucketPopExpired_HASHTABLE(set, getExpiry, now, expired, max_count); break; default: panic("Unknown volatile set bucket type in vsetPopExpired"); @@ -1845,13 +2116,14 @@ long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry) { * the parent bucket (typically used when iterating nested structures, such as RAX buckets). * * Parameters: - * - it: Pointer to an initialized vsetIterator. + * - it: Pointer to an initialized vsetInternalIterator. * - entryptr: Output pointer to receive the next entry. * * Returns: * - true if a next entry is found. * - false if iteration is complete. */ -bool vsetNext(vsetIterator *it, void **entryptr) { +bool vsetNext(vsetIterator *iter, void **entryptr) { + vsetInternalIterator *it = iteratorFromOpaque(iter); vsetBucket *bucket = it->bucket; int bucket_type = vsetBucketType(bucket); int ret = 0; @@ -1878,7 +2150,7 @@ bool vsetNext(vsetIterator *it, void **entryptr) { /* continue iterating the parent bucket */ it->iteration_state = vsetBucketType(it->parent_bucket); it->bucket = it->parent_bucket; - return vsetNext(it, entryptr); + return vsetNext(opaqueFromIterator(it), entryptr); } return ret == 1; } @@ -1910,8 +2182,9 @@ size_t vsetMemUsage(vset *set) { * * Parameters: * - set: Pointer to the volatile set to iterate. - * - it: Pointer to a vsetIterator structure to initialize. */ -void vsetStart(vset *set, vsetIterator *it) { + * - it: Pointer to a vsetInternalIterator structure to initialize. */ +void vsetInitIterator(vset *set, vsetIterator *iter) { + vsetInternalIterator *it = iteratorFromOpaque(iter); it->iteration_state = VSET_BUCKET_NONE; /*lets start by going to the first bucket. */ it->bucket = *set; it->bucket_ts = -1; @@ -1924,8 +2197,9 @@ void vsetStart(vset *set, vsetIterator *it) { * This function ensures proper cleanup of those structures when the iteration is done. * * Parameters: - * - it: Pointer to the vsetIterator that was previously initialized with vsetStart(). */ -void vsetStop(vsetIterator *it) { + * - it: Pointer to the vsetInternalIterator that was previously initialized with vsetInitIterator(). */ +void vsetResetIterator(vsetIterator *iter) { + vsetInternalIterator *it = iteratorFromOpaque(iter); int bucket_type = vsetBucketType(it->bucket); int parent_bucket_type = vsetBucketType(it->parent_bucket); if (parent_bucket_type == VSET_BUCKET_RAX) diff --git a/src/vset.h b/src/vset.h index 0f7b92a4e6..1516347c0b 100644 --- a/src/vset.h +++ b/src/vset.h @@ -18,299 +18,73 @@ * entries with expiry semantics. It is designed to efficiently track entries * that expire at varying times and scales to large sets by adapting its internal * representation as it grows or shrinks. - * - *----------------------------------------------------------------------------- - * Expiry Buckets and Pointer Tagging - *----------------------------------------------------------------------------- - * - * Internally, the `vset` maintains a single `vsetBucket*` pointer, - * which can point to different types of buckets depending on the number of - * entries and the needed resolution. The pointer is tagged using the lowest 3 bits: - * - * #define VSET_BUCKET_NONE -1 - * #define VSET_BUCKET_SINGLE 0x1ULL // pointer to single entry (odd ptr) - * #define VSET_BUCKET_VECTOR 0x2ULL // pointer to pointer vector - * #define VSET_BUCKET_HT 0x4ULL // pointer to hashtable - * #define VSET_BUCKET_RAX 0x6ULL // pointer to radix tree - * - * #define VSET_TAG_MASK 0x7ULL - * #define VSET_PTR_MASK (~VSET_TAG_MASK) - * - * IMPORTANT!!!! - All entries must have LSB set (i.e., be odd-aligned) to be compatible with !!!! - * tagging constraints. - * - *----------------------------------------------------------------------------- - * Time Bucket Management - *----------------------------------------------------------------------------- - * - * Entries are grouped into **time buckets** based on their expiry time. - * Each time bucket represents a window aligned to: - * - * #define VOLATILESET_BUCKET_INTERVAL_MIN (1 << 4) // 16ms - * #define VOLATILESET_BUCKET_INTERVAL_MAX (1 << 13) // 8192ms - * - * A time bucket key is computed by rounding the expiry timestamp up to the - * nearest aligned window using `get_bucket_ts()`. - * - *----------------------------------------------------------------------------- - * Entry Addition and Bucket Promotion - *----------------------------------------------------------------------------- - * - * When a new entry is added: - * - * 1. If the current set is `NONE`, it becomes a `SINGLE` bucket. - * 2. If the set is a `SINGLE` bucket and another entry arrives: - * -> it is promoted to a `VECTOR` bucket (sorted by expiry). - * 3. If the `VECTOR` exceeds `VOLATILESET_VECTOR_BUCKET_MAX_SIZE` (127): - * -> the set becomes a `RAX`, and existing entries are migrated. - * 4. IF the set is using RAX encoding it will locate a bucket to add the entry - * following the strategy explained below. - * - *----------------------------------------------------------------------------- - * RAX Bucket and Dynamic Splitting - *----------------------------------------------------------------------------- - * - * Each bucket in the RAX bucket corresponds to a **time window**, defined by - * its bucket timestamp (`bucket_ts`). This timestamp represents the **END** of - * the time window. Entries in the bucket must expire *before* this timestamp. - * - * Time windows are defined in granular ranges: - * - Minimum granularity: VOLATILESET_BUCKET_INTERVAL_MIN (16 ms) - * - Maximum granularity: VOLATILESET_BUCKET_INTERVAL_MAX (8192 ms) - * - * A bucket can only contain entries that: - * 1. Have expiry < bucket_ts - * 2. Do not fit into any bucket with a smaller timestamp (i.e., earlier window) - * - * The structure allows multiple encodings: - * VSET_BUCKET_SINGLE - A single pointer to one entry. - * VSET_BUCKET_VECTOR - A sorted vector of pointers (up to 127 entries). - * VSET_BUCKET_HT - A hashtable used when vectors become too dense. - * - * Bucket Timestamp (END of window): - * - * |------------------ Bucket Span ------------------| - * [window_start .................................. bucket_ts) - * - * Layout Example: - * - * Timeline: ----------> increasing time -----------> - * +--------------+-------------+---------+ - * | B0 | B1 | B2 | - * | ts=32 | ts=128 | ts=2048 | - * +--------------+-------------+---------+ - * ^ ^ ^ - * | | | - * [E1,E2] ∈ B0 [E3...E7] ∈ B1 [E8...E15] ∈ B2 - * - * All entries expire BEFORE their bucket_ts - * - * Bucket Splitting Strategy: - * ---------------------------------- - * - * When a bucket (e.g. VECTOR) becomes too dense or needs realignment: - * - * 1. Re-align to lower granularity: - * - Adjust the bucket timestamp down to a finer granularity (e.g. 16ms). - * - Only done if ALL entries still fit in the tighter window. - * - Effectively “moves” the bucket to an earlier timestamp. - * - * Example: B(ts=128, span=128ms) -> B(ts=64, span=16ms) - * - * 2. Split into two buckets: - * - Use binary search to find a “natural” boundary based on entry expiry. - * - Original bucket retains its timestamp (but holds fewer entries). - * - New bucket is inserted before the current one with its own tighter timestamp. - * - * Example: - * - * Before: - * [ Entry0 ... Entry126 ] -> B(ts=128) - * - * After Split: - * [ Entry0...Entry62 ] -> New B(ts=64) - * [ Entry63...Entry126 ] -> Original B(ts=128) - * - * 3. Convert to hashtable: - * - When no clean split is found (e.g. all entries share similar expiry), - * and realignment is not possible. - * - This allows efficient O(1) lookups even with clustered expiry values. - * - * Vector B(ts=128) -> Hashtable B(ts=128) - * - * This hierarchical design ensures: - * - Efficient memory usage (tight buckets) - * - Predictable iteration by expiry time - * - Low overhead insertions & deletions - * - Graceful promotion & demotion of bucket types - * - * NOTE: Buckets are always sorted by their `bucket_ts` in the radix tree (RAX), - * which allows efficient search for insertion/removal based on expiry. - * - *----------------------------------------------------------------------------- - * RAX Bucket Layout - *----------------------------------------------------------------------------- - * - * * RAX View with Time Keys: - * - * expiry_buckets = rax * | 0x6 - * - * +--------------------------+ - * | RAX (key = bucket_ts) | - * |--------------------------| - * | "000016" -> [entry1] | <- Vector (SINGLE->VECTOR->HT) - * | "000032" -> [entry2...] | <- Full vector, might split - * | "000048" -> [entry...] | - * +--------------------------+ - * - * * Splitting a Full Vector in RAX: - * - * Suppose vector at key "000032" has 13 entries: - * - * 1. Use binary search to find a transition point in expiry bucket_ts. - * We search the first 2 following entries which belong to different lwo granularity time windows, - * but as close as possible to the middle of the vector: - * [entry1, entry7, ..., entry13] - * ↑ - * split (first where get_bucket_ts(entry) > min_ts) - * - * 2. Create two vectors: - * bucket A -> [entry1..entry6] with key = "000032" - * bucket B -> [entry7..entry13] with key = "000048" - * - * 3. Insert both back to the RAX. - * - *----------------------------------------------------------------------------- - * Bucket Lifecycle - *----------------------------------------------------------------------------- - * - * NONE - * | - * v - * SINGLE (1 entry) - * | - * v - * VECTOR (sorted, up to 127) - * | - * v - * RAX (holds multiple buckets, keyed by each bucket's end timestamp) - * Bucket types within a RAX: - * - * SINGLE - * | - * v - * VECTOR (sorted, up to 127, can split - * | into multiple vectors) - * | - * v - * HASHTABLE (only when a vector can't split) - * - *----------------------------------------------------------------------------- - * Entry Type Contract - *----------------------------------------------------------------------------- - * - * Users must supply a `volatileEntryType` implementation: - * - * typedef struct { - * sds (*entryGetKey)(const void *entry); // get key - * long long (*getExpiry)(const void *entry); // get expiry - * int (*expire)(void *db, void *o, void *entry); // trigger expiry - * } volatileEntryType; - * + * *----------------------------------------------------------------------------- * Public API *----------------------------------------------------------------------------- * * Create/Free: - * void vsetInit(vset *set); - * void vsetClear(vset *set); + * vsetInit(vset *set) - used in order to initialize a new vset. + * void vsetClear(vset *set) - used in order to empty all the data in a vset. + * Example: + * vset set; + * vsetInit(&set); + * // add some elements to the vset + * vsetClear(&set); + * // verify the set is empty: + * assert(vsetIsEmpty(&set)); * * Mutation: - * bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); - * bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); + * bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) - used in order to insert a new entry into the set. + * The API also make use of the provided getExpiry function in order to compare the 'entry' expiration time of the other existing + * entries in the set. + * + * bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) - used in order to remove and entry from the set. + * * bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, * void *new_entry, long long old_expiry, - * long long new_expiry); + * long long new_expiry) - is used in order to update an existing entry in the set. + * Note that the implementation assumes the 'old_entry' might not point to a valid memory location, thus it require that the 'old_expiry' + * is provided and matches the old entry expiration time. * * Expiry Retrieval/Removal: - * long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry); - * size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx); + * long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry) - will return an estimation to the lowest expiry time of + * the entries which currently exists in the set. Because of the semi-sorted ordering this implementation is using, the returned value MIGHT not be the 'real' minimum + * but rather some value which is the maximum among a group of entries which are all close or equal to the 'real' minimum. + * + * size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) - can be used + * in order to remove up to max_count entries from the vset. The removed entries will all satisfy the condition that their expiration time is smaller than the provided now. + * Note that there are no guarantees about the order to the entries. * * Utilities: - * bool vsetIsEmpty(vset *set); + * bool vsetIsEmpty(vset *set) - used in order to check if a given set has any entries. * * Iteration: - * void vsetStart(vset *set, vsetIterator *it); - * bool vsetNext(vsetIterator *it, void **entryptr); - * void vsetStop(vsetIterator *it); - * - *----------------------------------------------------------------------------- - * Iteration Support - *----------------------------------------------------------------------------- + * void vsetInitIterator(vset *set, vsetIterator *it) - used to initialize a new vset iterator. + * bool vsetNext(vsetIterator *it, void **entryptr) - used to iterate to the next element. Will return false if there are no more elements. + * void vsetResetIterator(vsetIterator *it) - used in order to reset the iterator at the end of the iteration. * - * Iterator structure maintains context across all bucket types: - * - * typedef struct vsetIterator { - * raxIterator riter; // for RAX - * hashtableIterator hiter; // for HT - * uint32_t viter; // for VECTOR - * void *vsingle; // for SINGLE - * vsetBucket *parent_bucket; // owning bucket - * vsetBucket *bucket; // active bucket - * void *entry; // current entry - * long long bucket_ts; // for RAX - * int iteration_state; // internal FSM - * } vsetIterator; - * */ - -#define VOLATILESET_BUCKET_INTERVAL_MAX (1LL << 13LL) // 2^13 = 8192 milliseconds -#define VOLATILESET_BUCKET_INTERVAL_MIN (1LL << 4LL) // 2^4 = 16 milliseconds - -#define VOLATILESET_VECTOR_BUCKET_MAX_SIZE 127 + * Note that the vset iterator is NOT safe, Meaning you should not change the set while iterating it. Adding entries and/or removing entries + * can result in unexpected behavior.! */ typedef long long (*vsetGetExpiryFunc)(const void *entry); -typedef int (*vsetExpiryFunc)(void *entry, void *ctx); - -// Generic bucket type -typedef void vsetBucket; // vset is just a pointer to a bucket -typedef vsetBucket *vset; +typedef void *vset; -typedef struct vsetIterator { - /* for rax bucket */ - raxIterator riter; - union { - /* for hashtable bucket */ - hashtableIterator hiter; - /* for vector bucket */ - uint32_t viter; - /* for single bucket */ - void *vsingle; - }; - /* the parent of the bucket we are currently iterating on */ - vsetBucket *parent_bucket; - /* the bucket we are currently iterating on */ - vsetBucket *bucket; - /* the pointer entry */ - void *entry; - /* In case of rax encoded set, this is the current iterated bucket timestamp */ - long long bucket_ts; - /* the state of the iteration */ - int iteration_state; -} vsetIterator; +typedef uint8_t vsetIterator[560]; bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); bool vsetRemoveEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry); bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry); bool vsetIsEmpty(vset *set); -void vsetStart(vset *set, vsetIterator *it); +void vsetInitIterator(vset *set, vsetIterator *it); bool vsetNext(vsetIterator *it, void **entryptr); -void vsetStop(vsetIterator *it); +void vsetResetIterator(vsetIterator *it); void vsetInit(vset *set); void vsetClear(vset *set); long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry); -size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx); +size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count); size_t vsetMemUsage(vset *set); size_t vsetScanDefrag(vset *set, size_t cursor, void *(*defragfn)(void *), int (*defragRaxNode)(raxNode **)); From 710150a760771349af3e231247386a282595fc37 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 3 Aug 2025 15:18:39 +0300 Subject: [PATCH 108/119] fix compilation following bit definitions Signed-off-by: Ran Shidlansik --- src/vset.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vset.c b/src/vset.c index af689bc49f..9de560cb5f 100644 --- a/src/vset.c +++ b/src/vset.c @@ -215,8 +215,8 @@ * pVector Implementation *************************************************************************************************************/ -static const size_t PV_CARD_BITS = 30; -static const size_t PV_ALLOC_BITS = 34; +#define PV_CARD_BITS 30 +#define PV_ALLOC_BITS 34 /* Custom vector structure with embedded allocation and length counters */ typedef struct { From c6dc3a8c9f41dad8d3ff54577b4d1a26a117c7c8 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 3 Aug 2025 15:38:39 +0300 Subject: [PATCH 109/119] fix format and 32bit compilation Signed-off-by: Ran Shidlansik --- src/vset.c | 2 +- src/vset.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/vset.c b/src/vset.c index 9de560cb5f..6ddc914aae 100644 --- a/src/vset.c +++ b/src/vset.c @@ -290,7 +290,7 @@ static pVector *pvMakeRoomFor(pVector *pv, size_t additional) { pv = zrealloc_usable(pv, required, &required); } /* Make sure we have the capacity to save the alloation size */ - assert(required <= (1ULL << PV_ALLOC_BITS) - 1); + assert(required <= (size_t)((1ULL << PV_ALLOC_BITS) - 1)); pv->alloc = required; return pv; } diff --git a/src/vset.h b/src/vset.h index 1516347c0b..bbac1d0261 100644 --- a/src/vset.h +++ b/src/vset.h @@ -18,7 +18,7 @@ * entries with expiry semantics. It is designed to efficiently track entries * that expire at varying times and scales to large sets by adapting its internal * representation as it grows or shrinks. - * + * *----------------------------------------------------------------------------- * Public API *----------------------------------------------------------------------------- @@ -51,7 +51,7 @@ * long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry) - will return an estimation to the lowest expiry time of * the entries which currently exists in the set. Because of the semi-sorted ordering this implementation is using, the returned value MIGHT not be the 'real' minimum * but rather some value which is the maximum among a group of entries which are all close or equal to the 'real' minimum. - * + * * size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) - can be used * in order to remove up to max_count entries from the vset. The removed entries will all satisfy the condition that their expiration time is smaller than the provided now. * Note that there are no guarantees about the order to the entries. @@ -67,6 +67,7 @@ * Note that the vset iterator is NOT safe, Meaning you should not change the set while iterating it. Adding entries and/or removing entries * can result in unexpected behavior.! */ + /* Returns the absolute expiration time in milliseconds for the provided entry */ typedef long long (*vsetGetExpiryFunc)(const void *entry); // vset is just a pointer to a bucket From be4064d130c63c7ab56b9b58af33b7427b701836 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 3 Aug 2025 16:16:39 +0300 Subject: [PATCH 110/119] fix small foprmat issue Signed-off-by: Ran Shidlansik --- src/vset.c | 3 +-- src/vset.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/vset.c b/src/vset.c index 6ddc914aae..a684cbcfd0 100644 --- a/src/vset.c +++ b/src/vset.c @@ -433,8 +433,7 @@ pVector *pvSplit(pVector **pv_ptr, uint32_t split_index) { * The logical length (`len`) of the returned vector is initialized to 0. */ pVector *pvNew(uint32_t capacity) { - pVector *new_vec = NULL; - return pvMakeRoomFor(new_vec, capacity); + return pvMakeRoomFor(NULL, capacity); } /* Inserts an element at the specified position in the pVector. diff --git a/src/vset.h b/src/vset.h index bbac1d0261..fe562bb647 100644 --- a/src/vset.h +++ b/src/vset.h @@ -67,7 +67,7 @@ * Note that the vset iterator is NOT safe, Meaning you should not change the set while iterating it. Adding entries and/or removing entries * can result in unexpected behavior.! */ - /* Returns the absolute expiration time in milliseconds for the provided entry */ +/* Return the absolute expiration time in milliseconds for the provided entry */ typedef long long (*vsetGetExpiryFunc)(const void *entry); // vset is just a pointer to a bucket From 41fa36d8f3f43d359a72839ff8593e1cfd790c04 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Sun, 3 Aug 2025 18:11:49 +0300 Subject: [PATCH 111/119] Revert back to use vsetRemoveExpired. This will serve better to maintain cache local executions of active expiration. Signed-off-by: Ran Shidlansik --- src/unit/test_vset.c | 9 +++------ src/vset.c | 48 +++++++++++++++++++++----------------------- src/vset.h | 7 ++++--- 3 files changed, 30 insertions(+), 34 deletions(-) diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c index a1b0a7d412..dadb180475 100644 --- a/src/unit/test_vset.c +++ b/src/unit/test_vset.c @@ -321,8 +321,9 @@ long long mock_entry_get_expiry(const void *entry) { return mockGetExpiry(entry); } -int mock_entry_expire(void *entry, long long now) { +int mock_entry_expire(void *entry, void *ctx) { mock_entry *e = (mock_entry *)entry; + long long now = *(long long *)ctx; TEST_ASSERT(mock_entry_get_expiry(entry) <= now); for (int i = 0; i < mock_entry_count; i++) { if (mock_entries[i] == e) { @@ -393,11 +394,7 @@ int remove_mock_entry(vset *set) { int expire_mock_entries(vset *set, mstime_t now) { // printf("Before expired entries entries: %d\n", mock_entry_count); - const int expired_max = mock_entry_count; - void *expired_entries[expired_max]; - size_t expired_count = vsetPopExpired(set, mockGetExpiry, now, expired_entries, expired_max); - for (size_t i = 0; i < expired_count; i++) - mock_entry_expire(expired_entries[i], now); + vsetRemoveExpired(set, mockGetExpiry, mock_entry_expire, now, mock_entry_count, &now); // printf("After expired %zu entries left entries: %d and set is empty: %s\n", count, mock_entry_count, vsetIsEmpty(set) ? "true" : "false"); return 0; } diff --git a/src/vset.c b/src/vset.c index a684cbcfd0..fe81f6411b 100644 --- a/src/vset.c +++ b/src/vset.c @@ -1396,27 +1396,28 @@ static inline vsetBucket *removeFromBucket_RAX(vsetGetExpiryFunc getExpiry, vset return target; } -static inline size_t vsetBucketPopExpired_NONE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { +static inline size_t vsetBucketRemoveExpired_NONE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { UNUSED(bucket); UNUSED(getExpiry); + UNUSED(expiryFunc); UNUSED(now); UNUSED(max_count); - UNUSED(expired); + UNUSED(ctx); return 0; } -static inline size_t vsetBucketPopExpired_SINGLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { +static inline size_t vsetBucketRemoveExpired_SINGLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { void *entry = vsetBucketSingle(*bucket); if (max_count && getExpiry(entry) <= now) { - expired[0] = entry; freeVsetBucket(*bucket); *bucket = vsetBucketFromNone(); + if (expiryFunc) expiryFunc(entry, ctx); return 1; } return 0; } -static inline size_t vsetBucketPopExpired_VECTOR(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { +static inline size_t vsetBucketRemoveExpired_VECTOR(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { pVector *pv = vsetBucketVector(*bucket); uint32_t len = min(pvLen(pv), max_count); uint32_t i = 0; @@ -1425,7 +1426,7 @@ static inline size_t vsetBucketPopExpired_VECTOR(vsetBucket **bucket, vsetGetExp /* break as soon as the expiryFunc stops us OR we reached an entry which is not expired */ if (getExpiry(entry) > now) break; - expired[i] = entry; + if (expiryFunc) expiryFunc(entry, ctx); } pVector *new_pv = pvSplit(&pv, i); *bucket = (new_pv ? vsetBucketFromVector(new_pv) : vsetBucketFromNone()); @@ -1433,21 +1434,18 @@ static inline size_t vsetBucketPopExpired_VECTOR(vsetBucket **bucket, vsetGetExp return i; } -static inline size_t vsetBucketPopExpired_HASHTABLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { +static inline size_t vsetBucketRemoveExpired_HASHTABLE(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { UNUSED(getExpiry); UNUSED(now); hashtable *ht = vsetBucketHashtable(*bucket); hashtableIterator it; - void *entry = NULL; + void *entry; size_t count = 0; hashtableInitIterator(&it, ht, HASHTABLE_ITER_SAFE); - while (hashtableNext(&it, &entry)) { - if (count < max_count) { - hashtableDelete(ht, entry); - expired[count++] = entry; - entry = NULL; - } else - break; + while (count < max_count && hashtableNext(&it, &entry)) { + assert(hashtableDelete(ht, entry)); + expiryFunc(entry, ctx); + count++; } hashtableResetIterator(&it); @@ -1463,7 +1461,7 @@ static inline size_t vsetBucketPopExpired_HASHTABLE(vsetBucket **bucket, vsetGet return count; } -static inline size_t vsetBucketPopExpired_RAX(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { +static inline size_t vsetBucketRemoveExpired_RAX(vsetBucket **bucket, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { UNUSED(getExpiry); rax *buckets = vsetBucketRax(*bucket); size_t count = 0; @@ -1485,13 +1483,13 @@ static inline size_t vsetBucketPopExpired_RAX(vsetBucket **bucket, vsetGetExpiry break; switch (time_bucket_type) { case VSET_BUCKET_SINGLE: - count += vsetBucketPopExpired_SINGLE(&time_bucket, vsetGetExpiryZero, now, expired + count, max_count - count); + count += vsetBucketRemoveExpired_SINGLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); break; case VSET_BUCKET_VECTOR: - count += vsetBucketPopExpired_VECTOR(&time_bucket, vsetGetExpiryZero, now, expired + count, max_count - count); + count += vsetBucketRemoveExpired_VECTOR(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); break; case VSET_BUCKET_HT: - count += vsetBucketPopExpired_HASHTABLE(&time_bucket, vsetGetExpiryZero, now, expired + count, max_count - count); + count += vsetBucketRemoveExpired_HASHTABLE(&time_bucket, vsetGetExpiryZero, expiryFunc, now, max_count - count, ctx); break; default: panic("Cannot expire entries from bucket which is not single, vector or hashtable"); @@ -2026,24 +2024,24 @@ bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, vo * * Return: * Number of expired entries removed (size_t). */ -size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count) { +size_t vsetRemoveExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) { vsetBucket *bucket = *set; int bucket_type = vsetBucketType(bucket); switch (bucket_type) { case VSET_BUCKET_NONE: - return vsetBucketPopExpired_NONE(set, getExpiry, now, expired, max_count); + return vsetBucketRemoveExpired_NONE(set, getExpiry, expiryFunc, now, max_count, ctx); break; case VSET_BUCKET_RAX: - return vsetBucketPopExpired_RAX(set, getExpiry, now, expired, max_count); + return vsetBucketRemoveExpired_RAX(set, getExpiry, expiryFunc, now, max_count, ctx); break; case VSET_BUCKET_SINGLE: - return vsetBucketPopExpired_SINGLE(set, getExpiry, now, expired, max_count); + return vsetBucketRemoveExpired_SINGLE(set, getExpiry, expiryFunc, now, max_count, ctx); break; case VSET_BUCKET_VECTOR: - return vsetBucketPopExpired_VECTOR(set, getExpiry, now, expired, max_count); + return vsetBucketRemoveExpired_VECTOR(set, getExpiry, expiryFunc, now, max_count, ctx); break; case VSET_BUCKET_HT: - return vsetBucketPopExpired_HASHTABLE(set, getExpiry, now, expired, max_count); + return vsetBucketRemoveExpired_HASHTABLE(set, getExpiry, expiryFunc, now, max_count, ctx); break; default: panic("Unknown volatile set bucket type in vsetPopExpired"); diff --git a/src/vset.h b/src/vset.h index fe562bb647..5517e4b35f 100644 --- a/src/vset.h +++ b/src/vset.h @@ -52,7 +52,7 @@ * the entries which currently exists in the set. Because of the semi-sorted ordering this implementation is using, the returned value MIGHT not be the 'real' minimum * but rather some value which is the maximum among a group of entries which are all close or equal to the 'real' minimum. * - * size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) - can be used + * size_t vsetRemoveExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx) - can be used * in order to remove up to max_count entries from the vset. The removed entries will all satisfy the condition that their expiration time is smaller than the provided now. * Note that there are no guarantees about the order to the entries. * @@ -69,7 +69,8 @@ /* Return the absolute expiration time in milliseconds for the provided entry */ typedef long long (*vsetGetExpiryFunc)(const void *entry); - +/* Callback to be optionally provided to vsetPopExpired. when item is removed from the vset this callback will also be applied. */ +typedef int (*vsetExpiryFunc)(void *entry, void *ctx); // vset is just a pointer to a bucket typedef void *vset; @@ -85,7 +86,7 @@ void vsetResetIterator(vsetIterator *it); void vsetInit(vset *set); void vsetClear(vset *set); long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry); -size_t vsetPopExpired(vset *set, vsetGetExpiryFunc getExpiry, mstime_t now, void **expired, size_t max_count); +size_t vsetRemoveExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx); size_t vsetMemUsage(vset *set); size_t vsetScanDefrag(vset *set, size_t cursor, void *(*defragfn)(void *), int (*defragRaxNode)(raxNode **)); From 450fe95e24dab685da56b1bc1b65f9da0c648161 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 4 Aug 2025 12:00:14 +0300 Subject: [PATCH 112/119] address more PR comments Signed-off-by: Ran Shidlansik --- src/commands.def | 10 ++-- src/commands/hexpire.json | 4 +- src/commands/hexpireat.json | 6 +-- src/commands/hpersist.json | 2 +- src/commands/hpexpire.json | 4 +- src/commands/hpexpireat.json | 6 +-- src/commands/hpttl.json | 2 +- src/entry.c | 91 ++++++++++++++++++++++-------------- src/server.h | 4 +- src/serverassert.h | 4 ++ src/t_hash.c | 77 ++++++++++++++++-------------- 11 files changed, 122 insertions(+), 88 deletions(-) diff --git a/src/commands.def b/src/commands.def index 6aaac23f20..3e5f292600 100644 --- a/src/commands.def +++ b/src/commands.def @@ -11650,8 +11650,8 @@ struct COMMAND_STRUCT serverCommandTable[] = { /* hash */ {MAKE_CMD("hdel","Deletes one or more fields and their values from a hash. Deletes the hash if no fields remain.","O(N) where N is the number of fields to be removed.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HDEL_History,1,HDEL_Tips,0,hdelCommand,-3,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HDEL_Keyspecs,1,NULL,2),.args=HDEL_Args}, {MAKE_CMD("hexists","Determines whether a field exists in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXISTS_History,0,HEXISTS_Tips,0,hexistsCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXISTS_Keyspecs,1,NULL,2),.args=HEXISTS_Args}, -{MAKE_CMD("hexpire","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRE_History,0,HEXPIRE_Tips,0,hexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRE_Keyspecs,1,NULL,4),.args=HEXPIRE_Args}, -{MAKE_CMD("hexpireat","Set expiry time on hash fields.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIREAT_History,0,HEXPIREAT_Tips,0,hexpireAtCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIREAT_Keyspecs,1,NULL,4),.args=HEXPIREAT_Args}, +{MAKE_CMD("hexpire","Set expiry time on hash fields.","O(N) where N is the number of specified fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRE_History,0,HEXPIRE_Tips,0,hexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRE_Keyspecs,1,NULL,4),.args=HEXPIRE_Args}, +{MAKE_CMD("hexpireat","Set expiry time on hash fields.","O(N) where N is the number of specified fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIREAT_History,0,HEXPIREAT_Tips,0,hexpireatCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HEXPIREAT_Keyspecs,1,NULL,4),.args=HEXPIREAT_Args}, {MAKE_CMD("hexpiretime","Returns Unix timestamps in seconds since the epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HEXPIRETIME_History,0,HEXPIRETIME_Tips,0,hexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HEXPIRETIME_Keyspecs,1,NULL,2),.args=HEXPIRETIME_Args}, {MAKE_CMD("hget","Returns the value of a field in a hash.","O(1)","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGET_History,0,HGET_Tips,0,hgetCommand,3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HGET_Keyspecs,1,NULL,2),.args=HGET_Args}, {MAKE_CMD("hgetall","Returns all fields and values in a hash.","O(N) where N is the size of the hash.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HGETALL_History,0,HGETALL_Tips,1,hgetallCommand,2,CMD_READONLY,ACL_CATEGORY_HASH,HGETALL_Keyspecs,1,NULL,1),.args=HGETALL_Args}, @@ -11663,10 +11663,10 @@ struct COMMAND_STRUCT serverCommandTable[] = { {MAKE_CMD("hmget","Returns the values of all fields in a hash.","O(N) where N is the number of fields being requested.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HMGET_History,0,HMGET_Tips,0,hmgetCommand,-3,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HMGET_Keyspecs,1,NULL,2),.args=HMGET_Args}, {MAKE_CMD("hmset","Sets the values of multiple fields.","O(N) where N is the number of fields being set.","2.0.0",CMD_DOC_DEPRECATED,"`HSET` with multiple field-value pairs","4.0.0","hash",COMMAND_GROUP_HASH,HMSET_History,0,HMSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HMSET_Keyspecs,1,NULL,2),.args=HMSET_Args}, {MAKE_CMD("hpersist","Remove the existing expiration on a hash key's field(s).","O(1) for each field assigned with TTL, so O(N) to persist N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPERSIST_History,0,HPERSIST_Tips,0,hpersistCommand,-5,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPERSIST_Keyspecs,1,NULL,2),.args=HPERSIST_Args}, -{MAKE_CMD("hpexpire","Set expiry time on hash object.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRE_History,0,HPEXPIRE_Tips,0,hpexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRE_Keyspecs,1,NULL,4),.args=HPEXPIRE_Args}, -{MAKE_CMD("hpexpireat","Set expiration time on hash field.","O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIREAT_History,0,HPEXPIREAT_Tips,0,hpexpireAtCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIREAT_Keyspecs,1,NULL,4),.args=HPEXPIREAT_Args}, +{MAKE_CMD("hpexpire","Set expiry time on hash object.","O(N) where N is the number of specified fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRE_History,0,HPEXPIRE_Tips,0,hpexpireCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRE_Keyspecs,1,NULL,4),.args=HPEXPIRE_Args}, +{MAKE_CMD("hpexpireat","Set expiration time on hash field.","O(N) where N is the number of specified fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIREAT_History,0,HPEXPIREAT_Tips,0,hpexpireatCommand,-6,CMD_WRITE|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIREAT_Keyspecs,1,NULL,4),.args=HPEXPIREAT_Args}, {MAKE_CMD("hpexpiretime","Returns the Unix timestamp in milliseconds since Unix epoch at which the given key's field(s) will expire","O(1) for each field, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPEXPIRETIME_History,0,HPEXPIRETIME_Tips,0,hpexpiretimeCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPEXPIRETIME_Keyspecs,1,NULL,2),.args=HPEXPIRETIME_Args}, -{MAKE_CMD("hpttl","Returns the remaining time to live (in milliseconds) of a hash key's field(s) that have an associated expiration.","O(1) for each field assigned with TTL, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPTTL_History,0,HPTTL_Tips,0,hpttlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPTTL_Keyspecs,1,NULL,2),.args=HPTTL_Args}, +{MAKE_CMD("hpttl","Returns the remaining time to live in milliseconds of a hash key's field(s) that have an associated expiration.","O(1) for each field assigned with TTL, so O(N) for N items when the command is called with multiple fields.","9.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HPTTL_History,0,HPTTL_Tips,0,hpttlCommand,-5,CMD_READONLY|CMD_FAST,ACL_CATEGORY_HASH,HPTTL_Keyspecs,1,NULL,2),.args=HPTTL_Args}, {MAKE_CMD("hrandfield","Returns one or more random fields from a hash.","O(N) where N is the number of fields returned","6.2.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HRANDFIELD_History,0,HRANDFIELD_Tips,1,hrandfieldCommand,-2,CMD_READONLY,ACL_CATEGORY_HASH,HRANDFIELD_Keyspecs,1,NULL,2),.args=HRANDFIELD_Args}, {MAKE_CMD("hscan","Iterates over fields and values of a hash.","O(1) for every call. O(N) for a complete iteration, including enough command calls for the cursor to return back to 0. N is the number of elements inside the collection.","2.8.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSCAN_History,0,HSCAN_Tips,1,hscanCommand,-3,CMD_READONLY,ACL_CATEGORY_HASH,HSCAN_Keyspecs,1,NULL,5),.args=HSCAN_Args}, {MAKE_CMD("hset","Creates or modifies the value of a field in a hash.","O(1) for each field/value pair added, so O(N) to add N field/value pairs when the command is called with multiple field/value pairs.","2.0.0",CMD_DOC_NONE,NULL,NULL,"hash",COMMAND_GROUP_HASH,HSET_History,1,HSET_Tips,0,hsetCommand,-4,CMD_WRITE|CMD_DENYOOM|CMD_FAST,ACL_CATEGORY_HASH,HSET_Keyspecs,1,NULL,2),.args=HSET_Args}, diff --git a/src/commands/hexpire.json b/src/commands/hexpire.json index 32729bd282..338fe53dd4 100644 --- a/src/commands/hexpire.json +++ b/src/commands/hexpire.json @@ -1,7 +1,7 @@ { "HEXPIRE": { "summary": "Set expiry time on hash fields.", - "complexity": "O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.", + "complexity": "O(N) where N is the number of specified fields.", "group": "hash", "since": "9.0.0", "arity": -6, @@ -34,7 +34,7 @@ } ], "reply_schema": { - "description": "List of values associated with the result of setting expiry on the specific fields, in the same order as they are requested.", + "description": "List of integer codes indicating the result of setting expiry on each specified field, in the same order as the fields are requested.", "type": "array", "minItems": 1, "items": { diff --git a/src/commands/hexpireat.json b/src/commands/hexpireat.json index 47d66d2caf..995391f0e6 100644 --- a/src/commands/hexpireat.json +++ b/src/commands/hexpireat.json @@ -1,11 +1,11 @@ { "HEXPIREAT": { "summary": "Set expiry time on hash fields.", - "complexity": "O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.", + "complexity": "O(N) where N is the number of specified fields.", "group": "hash", "since": "9.0.0", "arity": -6, - "function": "hexpireAtCommand", + "function": "hexpireatCommand", "command_flags": [ "WRITE", "FAST" @@ -34,7 +34,7 @@ } ], "reply_schema": { - "description": "List of values associated with the result of setting expiry on the specific fields, in the same order as they are requested.", + "description": "List of integer codes indicating the result of setting expiry on each specified field, in the same order as the fields are requested.", "type": "array", "minItems": 1, "items": { diff --git a/src/commands/hpersist.json b/src/commands/hpersist.json index d29d65dd87..180d3e9016 100644 --- a/src/commands/hpersist.json +++ b/src/commands/hpersist.json @@ -34,7 +34,7 @@ } ], "reply_schema": { - "description": "List of values associated with the result of setting expiry on the specific fields, in the same order as they are requested.", + "description": "List of integer codes indicating the result of setting expiry on each specified field, in the same order as the fields are requested.", "type": "array", "minItems": 1, "items": { diff --git a/src/commands/hpexpire.json b/src/commands/hpexpire.json index b159c0a1e4..0cdec60a3a 100644 --- a/src/commands/hpexpire.json +++ b/src/commands/hpexpire.json @@ -1,7 +1,7 @@ { "HPEXPIRE": { "summary": "Set expiry time on hash object.", - "complexity": "O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.", + "complexity": "O(N) where N is the number of specified fields.", "group": "hash", "since": "9.0.0", "arity": -6, @@ -34,7 +34,7 @@ } ], "reply_schema": { - "description": "List of values associated with the result of setting expiry on the specific fields, in the same order as they are requested.", + "description": "List of integer codes indicating the result of setting expiry on each specified field, in the same order as the fields are requested.", "type": "array", "minItems": 1, "items": { diff --git a/src/commands/hpexpireat.json b/src/commands/hpexpireat.json index 5a5068de61..a696b3a138 100644 --- a/src/commands/hpexpireat.json +++ b/src/commands/hpexpireat.json @@ -1,11 +1,11 @@ { "HPEXPIREAT": { "summary": "Set expiration time on hash field.", - "complexity": "O(1) for each field assigned with TTL, so O(N) to add N TTLs when the command is called with multiple fields.", + "complexity": "O(N) where N is the number of specified fields.", "group": "hash", "since": "9.0.0", "arity": -6, - "function": "hpexpireAtCommand", + "function": "hpexpireatCommand", "command_flags": [ "WRITE", "FAST" @@ -34,7 +34,7 @@ } ], "reply_schema": { - "description": "List of values associated with the result of setting expiry on the specific fields, in the same order as they are requested.", + "description": "List of integer codes indicating the result of setting expiry on each specified field, in the same order as the fields are requested.", "type": "array", "minItems": 1, "items": { diff --git a/src/commands/hpttl.json b/src/commands/hpttl.json index 9c7cced256..f1c7da24c7 100644 --- a/src/commands/hpttl.json +++ b/src/commands/hpttl.json @@ -1,6 +1,6 @@ { "HPTTL": { - "summary": "Returns the remaining time to live (in milliseconds) of a hash key's field(s) that have an associated expiration.", + "summary": "Returns the remaining time to live in milliseconds of a hash key's field(s) that have an associated expiration.", "complexity": "O(1) for each field assigned with TTL, so O(N) for N items when the command is called with multiple fields.", "group": "hash", "since": "9.0.0", diff --git a/src/entry.c b/src/entry.c index 50affa67cb..fe8e203164 100644 --- a/src/entry.c +++ b/src/entry.c @@ -36,13 +36,16 @@ * value pointer = value sds */ -/* SDS aux flag. If set, it indicates that the entry has TTL metadata set. */ -#define FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY 0 - -/* SDS aux flag. If set, it indicates that the entry has an embedded value - * pointer located in memory before the embedded field. If unset, the entry - * instead has an embedded value located after the embedded field. */ -#define FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR 2 +enum { + /* SDS aux flag. If set, it indicates that the entry has TTL metadata set. */ + FIELD_SDS_AUX_BIT_ENTRY_HAS_EXPIRY = 0, + /* SDS aux flag. If set, it indicates that the entry has an embedded value + * pointer located in memory before the embedded field. If unset, the entry + * instead has an embedded value located after the embedded field. */ + FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR = 1, + FIELD_SDS_AUX_BIT_MAX +}; +static_assert(FIELD_SDS_AUX_BIT_MAX < sizeof(char) - SDS_TYPE_BITS, "too many sds bits are used for entry metadata"); /* Returns true in case the entry's value is not embedded in the entry. * Returns false otherwise. */ @@ -50,6 +53,12 @@ static inline bool entryHasValuePtr(const entry *entry) { return sdsGetAuxBit(entry, FIELD_SDS_AUX_BIT_ENTRY_HAS_VALUE_PTR); } +/* Returns true in case the entry's value is embedded in the entry. + * Returns false otherwise. */ +bool entryHasEmbeddedValue(entry *entry) { + return (!entryHasValuePtr(entry)); +} + /* Returns true in case the entry has expiration timestamp. * Returns false otherwise. */ bool entryHasExpiry(const entry *entry) { @@ -103,10 +112,6 @@ void *entryGetAllocPtr(const entry *entry) { return buf; } -bool entryHasEmbeddedValue(entry *entry) { - return (!entryHasValuePtr(entry)); -} - /**************************************** Entry Expiry API *****************************************/ /* Returns the entry expiration timestamp. @@ -209,17 +214,18 @@ static inline size_t entryReqSize(const_sds field, return alloc_size; } -/* Takes ownership of value. does not take ownership of field */ -entry *entryCreate(const_sds field, sds value, long long expiry) { - bool embed_value = false; - int embedded_field_sds_type; - size_t expiry_size, embedded_value_sds_size, embedded_field_sds_size; - size_t alloc_size = entryReqSize(field, value, expiry, &embed_value, &embedded_field_sds_type, &embedded_field_sds_size, &expiry_size, &embedded_value_sds_size); - size_t buf_size; - - /* allocate the buffer */ - char *buf = zmalloc_usable(alloc_size, &buf_size); - +/* Serialize the content of the entry into the provided buffer buf. Make use of the provided arguments provided by a call to entryReqSize. + * Note that this function will take ownership of the value so user should not assume it is valid after this call. */ +static entry *entryWrite(char *buf, + size_t buf_size, + const_sds field, + sds value, + long long expiry, + bool embed_value, + int embedded_field_sds_type, + size_t embedded_field_sds_size, + size_t embedded_value_sds_size, + size_t expiry_size) { /* Set The expiry if exists */ if (expiry_size) { *(long long *)buf = expiry; @@ -250,6 +256,20 @@ entry *entryCreate(const_sds field, sds value, long long expiry) { return new_entry; } +/* Takes ownership of value. does not take ownership of field */ +entry *entryCreate(const_sds field, sds value, long long expiry) { + bool embed_value = false; + int embedded_field_sds_type; + size_t expiry_size, embedded_value_sds_size, embedded_field_sds_size; + size_t alloc_size = entryReqSize(field, value, expiry, &embed_value, &embedded_field_sds_type, &embedded_field_sds_size, &expiry_size, &embedded_value_sds_size); + size_t buf_size; + + /* allocate the buffer */ + char *buf = zmalloc_usable(alloc_size, &buf_size); + + return entryWrite(buf, buf_size, field, value, expiry, embed_value, embedded_field_sds_type, embedded_field_sds_size, embedded_value_sds_size, expiry_size); +} + /* Modify the entry's value and/or expiration time. * In case the provided value is NULL, will use the existing value. */ entry *entryUpdate(entry *e, sds value, long long expiry) { @@ -267,22 +287,22 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { bool embed_value = false; int embedded_field_sds_type; size_t expiry_size, embedded_value_size, embedded_field_size; - size_t required_embedded_size = entryReqSize(field, value, expiry, &embed_value, &embedded_field_sds_type, &embedded_field_size, &expiry_size, &embedded_value_size); + size_t required_entry_size = entryReqSize(field, value, expiry, &embed_value, &embedded_field_sds_type, &embedded_field_size, &expiry_size, &embedded_value_size); size_t current_embedded_allocation_size = entryHasValuePtr(e) ? 0 : entryMemUsage(e); bool expiry_add_remove = update_expiry && (curr_expiration_time == EXPIRY_NONE || expiry == EXPIRY_NONE); // In case we are toggling expiration - bool value_change_encoding = update_value && (embed_value != !entryHasValuePtr(e)); // In case we change the way value is embedded or not + bool value_change_encoding = update_value && (embed_value != entryHasEmbeddedValue(e)); // In case we change the way value is embedded or not - /* // We will create a new entry in the following cases: + /* We will create a new entry in the following cases: * 1. In the case were we add or remove expiration. * 2. We change the way value is encoded * 3. in the case were we are NOT migrating from an embedded entry to an embedded entry with ~the same size. */ bool create_new_entry = (expiry_add_remove) || (value_change_encoding) || - (update_value && !entryHasValuePtr(e) && - !(required_embedded_size <= EMBED_VALUE_MAX_ALLOC_SIZE && - required_embedded_size <= current_embedded_allocation_size && - required_embedded_size >= current_embedded_allocation_size * 3 / 4)); + (update_value && entryHasEmbeddedValue(e) && + !(required_entry_size <= EMBED_VALUE_MAX_ALLOC_SIZE && + required_entry_size <= current_embedded_allocation_size && + required_entry_size >= current_embedded_allocation_size * 3 / 4)); if (!create_new_entry) { /* In this case we are sure we do not have to allocate new entry, so expiry must already be set. */ @@ -315,16 +335,19 @@ entry *entryUpdate(entry *e, sds value, long long expiry) { if (!update_value) { /* Check if the value can be reused. */ int value_was_embedded = !entryHasValuePtr(e); - /* In case the original entry value is embedded WE WILL HAVE TO DUPLICATE IT */ - if (value_was_embedded) + /* In case the original entry value is embedded WE WILL HAVE TO DUPLICATE IT + * if not we have to duplicate it, remove it from the original entry since we are going to delete it.*/ + if (value_was_embedded) { value = sdsdup(value); - /* if not we have to duplicate it, remove it from the original entry since we are going to delete it.*/ - else { + } else { sds *value_ref = entryGetValueRef(e); *value_ref = NULL; } } - new_entry = entryCreate(entryGetField(e), value, expiry); + /* allocate the buffer for a new entry */ + size_t buf_size; + char *buf = zmalloc_usable(required_entry_size, &buf_size); + new_entry = entryWrite(buf, buf_size, entryGetField(e), value, expiry, embed_value, embedded_field_sds_type, embedded_field_size, embedded_value_size, expiry_size); debugServerAssert(new_entry != e); entryFree(e); } diff --git a/src/server.h b/src/server.h index 7f297c56da..126a783b33 100644 --- a/src/server.h +++ b/src/server.h @@ -3851,9 +3851,9 @@ void hexistsCommand(client *c); void hscanCommand(client *c); void hrandfieldCommand(client *c); void hexpireCommand(client *c); -void hexpireAtCommand(client *c); +void hexpireatCommand(client *c); void hpexpireCommand(client *c); -void hpexpireAtCommand(client *c); +void hpexpireatCommand(client *c); void httlCommand(client *c); void hpttlCommand(client *c); void hexpiretimeCommand(client *c); diff --git a/src/serverassert.h b/src/serverassert.h index 5ce8eb2450..88c9815e56 100644 --- a/src/serverassert.h +++ b/src/serverassert.h @@ -63,4 +63,8 @@ void _serverAssert(const char *estr, const char *file, int line); void _serverPanic(const char *file, int line, const char *msg, ...); +#ifndef static_assert +#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1 : -1] +#endif + #endif diff --git a/src/t_hash.c b/src/t_hash.c index 0ac10c3de2..14332fcea8 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -42,6 +42,15 @@ #include #include "entry.h" +/* enumeration of all the possible return values of commands manipulating fields expiration. */ +typedef enum { + /* SDS aux flag. If set, it indicates that the entry has TTL metadata set. */ + EXPIRATION_MODIFICATION_NOT_EXIST = -2, /* in case the provided object is NULL or the specific field was not found */ + EXPIRATION_MODIFICATION_SUCCESSFUL = 1, /* if the expiration time was applied or modified */ + EXPIRATION_MODIFICATION_FAILED_CONDITION = 0, /* if the some predefined conditions (e.g hexpire conditional flags) has not been met */ + EXPIRATION_MODIFICATION_FAILED = -1, /* if apply of the expiration modification failed (e.g hpersist on item without expiration) */ + EXPIRATION_MODIFICATION_EXPIRE_ASAP = 2, /* if apply of the expiration modification was set to a time in the past (i.e field is immediately expired) */ +} expiryModificationResult; volatileEntryType hashVolatileEntryType = { .entryGetKey = (sds(*)(const void *entry))entryGetField, @@ -59,8 +68,7 @@ static volatile_set *hashTypeGetVolatileSet(robj *o) { void hashTypeFreeVolatileSet(robj *o) { volatile_set *set = hashTypeGetVolatileSet(o); - if (set) - freeVolatileSet(set); + if (set) freeVolatileSet(set); } bool hashTypeHasVolatileElements(robj *o) { @@ -73,7 +81,7 @@ static inline void hashTypeIgnoreTTL(robj *o, bool ignore) { if (o->encoding == OBJ_ENCODING_HASHTABLE) { /* prevent placing access function if not needed */ if (!ignore && !hashTypeHasVolatileElements(o)) { - ignore = 0; + ignore = true; } hashtableSetType(o->ptr, ignore ? &hashHashtableType : &hashWithVolatileItemsHashtableType); } @@ -123,11 +131,11 @@ void hashTypeTrackUpdateEntry(robj *o, void *old_entry, void *new_entry, long lo volatile_set *set = hashTypeGetOrcreateVolatileSet(o); debugServerAssert(set); - if (old_tracked && !new_tracked) + if (old_tracked && !new_tracked) { serverAssert(volatileSetRemoveEntry(set, old_entry, old_expiry)); - else if (new_tracked && !old_tracked) + } else if (new_tracked && !old_tracked) { serverAssert(volatileSetAddEntry(set, new_entry, new_expiry)); - else { + } else { volatile_set *set = hashTypeGetVolatileSet(o); debugServerAssert(set); serverAssert(volatileSetUpdateEntry(set, old_entry, new_entry, old_expiry, new_expiry) == 1); @@ -422,11 +430,10 @@ int hashTypeSet(robj *o, sds field, sds value, long long expiry, int flags) { * returns -2 in case the provided object is NULL or the specific field was not found. * returns 0 if the specified flag conditions has not been met. * returns 1 if the expiration time was applied. - * returns 2 when 'expire' indicate a past Unix time. In this case, if the item exists in the HASH, it will also be expired. - */ -int hashTypeSetExpire(robj *o, sds field, long long expiry, int flag) { + * returns 2 when 'expire' indicate a past Unix time. In this case, if the item exists in the HASH, it will also be expired. */ +static expiryModificationResult hashTypeSetExpire(robj *o, sds field, long long expiry, int flag) { /* If no object we will return -2 */ - if (o == NULL) return -2; + if (o == NULL) return EXPIRATION_MODIFICATION_NOT_EXIST; if (o->encoding == OBJ_ENCODING_LISTPACK) { unsigned char *vstr; @@ -435,13 +442,13 @@ int hashTypeSetExpire(robj *o, sds field, long long expiry, int flag) { /* We do not want to convert to listpack for no good reason. * So we first check if the item exists.*/ if (hashTypeGetFromListpack(o, field, &vstr, &vlen, &vll) < 0) { - return -2; + return EXPIRATION_MODIFICATION_NOT_EXIST; } /* When listpack representation is used, we consider it as infinite TTL, * so expire command with gt always fail the GT as well as existence(XX). * Else, we already know we are going to set an expiration so we expend to hashtable encoding. */ if (flag & EXPIRE_XX || flag & EXPIRE_GT) { - return 0; + return EXPIRATION_MODIFICATION_FAILED_CONDITION; } else { hashTypeConvert(o, OBJ_ENCODING_HASHTABLE); } @@ -459,14 +466,14 @@ int hashTypeSetExpire(robj *o, sds field, long long expiry, int flag) { /* NX option is set, check no current expiry */ if (flag & EXPIRE_NX) { if (current_expire != EXPIRY_NONE) { - return 0; + return EXPIRATION_MODIFICATION_FAILED_CONDITION; } } /* XX option is set, check current expiry */ if (flag & EXPIRE_XX) { if (current_expire == EXPIRY_NONE) { - return 0; + return EXPIRATION_MODIFICATION_FAILED_CONDITION; } } @@ -475,7 +482,7 @@ int hashTypeSetExpire(robj *o, sds field, long long expiry, int flag) { /* When current_expire is -1, we consider it as infinite TTL, * so expire command with gt always fail the GT. */ if (expiry <= current_expire || current_expire == EXPIRY_NONE) { - return 0; + return EXPIRATION_MODIFICATION_FAILED_CONDITION; } } @@ -484,28 +491,28 @@ int hashTypeSetExpire(robj *o, sds field, long long expiry, int flag) { /* When current_expire -1, we consider it as infinite TTL, * so if there is an expiry on the key and it's not less than current, we fail the LT. */ if (current_expire != EXPIRY_NONE && expiry >= current_expire) { - return 0; + return EXPIRATION_MODIFICATION_FAILED_CONDITION; } } } *entry_ref = entrySetExpiry(current_entry, expiry); hashTypeTrackUpdateEntry(o, current_entry, *entry_ref, current_expire, expiry); - return 1; + return EXPIRATION_MODIFICATION_SUCCESSFUL; } - return -2; // we did not find anything to do. return -2 + return EXPIRATION_MODIFICATION_NOT_EXIST; // we did not find anything to do. return -2 } -int hashTypePersist(robj *o, sds field) { +static expiryModificationResult hashTypePersist(robj *o, sds field) { /* NULL object returns -2 */ - if (o == NULL || o->type != OBJ_HASH) return -2; + if (o == NULL || o->type != OBJ_HASH) return EXPIRATION_MODIFICATION_NOT_EXIST; if (o->encoding == OBJ_ENCODING_LISTPACK) { if (hashTypeExists(o, field)) /* When listpack representation is used, All items are without expiry */ - return -1; + return EXPIRATION_MODIFICATION_FAILED; else - return -2; // Did not find any element return -2 + return EXPIRATION_MODIFICATION_NOT_EXIST; // Did not find any element return -2 } hashtable *ht = o->ptr; @@ -516,11 +523,11 @@ int hashTypePersist(robj *o, sds field) { if (current_expire != EXPIRY_NONE) { hashTypeUntrackEntry(o, current_entry); *entry_ref = entryUpdate(current_entry, NULL, EXPIRY_NONE); - return 1; + return EXPIRATION_MODIFICATION_SUCCESSFUL; } - return -1; // If the found element has no expiration set, return -1 + return EXPIRATION_MODIFICATION_FAILED; // If the found element has no expiration set, return -1 } - return -2; // Did not find any element return -2 + return EXPIRATION_MODIFICATION_NOT_EXIST; // Did not find any element return -2 } /* Delete an element from a hash. @@ -1402,9 +1409,9 @@ void hgetexCommand(client *c) { if (set_expired) { changed = hashTypeDelete(o, c->argv[i]->ptr); } else if (set_expiry) { - changed = (hashTypeSetExpire(o, c->argv[i]->ptr, when, 0) == 1) ? 1 : 0; + changed = (hashTypeSetExpire(o, c->argv[i]->ptr, when, 0) == EXPIRATION_MODIFICATION_SUCCESSFUL) ? 1 : 0; } else if (persist) { - changed = (hashTypePersist(o, c->argv[i]->ptr) == 1) ? 1 : 0; + changed = (hashTypePersist(o, c->argv[i]->ptr) == EXPIRATION_MODIFICATION_SUCCESSFUL) ? 1 : 0; } if (changed) { changes++; @@ -1574,7 +1581,7 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { int flag = 0; int fields_index = 3; long long num_fields = 0; - int i, result = 0, expired = 0, updated = 0; + int i, expired = 0, updated = 0; int set_expired = 0; robj **new_argv = NULL; int new_argc = 0; @@ -1619,18 +1626,18 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { } for (i = 0; i < num_fields; i++) { - result = -2; + expiryModificationResult result = EXPIRATION_MODIFICATION_NOT_EXIST; if (set_expired) { if (obj && hashTypeDelete(obj, c->argv[fields_index + i]->ptr)) { /* In case we deleted the field, add it to the new hdel command vector. */ new_argv[new_argc++] = c->argv[fields_index + i]; incrRefCount(c->argv[fields_index + i]); - result = 2; + result = EXPIRATION_MODIFICATION_EXPIRE_ASAP; expired++; } } else { result = hashTypeSetExpire(obj, c->argv[fields_index + i]->ptr, when, flag); - if (result == 1) updated++; + if (result == EXPIRATION_MODIFICATION_SUCCESSFUL) updated++; } addReplyLongLong(c, result); } @@ -1643,7 +1650,7 @@ void hexpireGenericCommand(client *c, long long basetime, int unit) { } else if (updated) { /* Propagate as HPEXPIREAT millisecond-timestamp * Only rewrite the command arg if not already HPEXPIREAT */ - if (c->cmd->proc != hpexpireAtCommand) { + if (c->cmd->proc != hpexpireatCommand) { rewriteClientCommandArgument(c, 0, shared.hpexpireat); } @@ -1669,7 +1676,7 @@ void hexpireCommand(client *c) { hexpireGenericCommand(c, commandTimeSnapshot(), UNIT_SECONDS); } -void hexpireAtCommand(client *c) { +void hexpireatCommand(client *c) { hexpireGenericCommand(c, 0, UNIT_SECONDS); } @@ -1677,7 +1684,7 @@ void hpexpireCommand(client *c) { hexpireGenericCommand(c, commandTimeSnapshot(), UNIT_MILLISECONDS); } -void hpexpireAtCommand(client *c) { +void hpexpireatCommand(client *c) { hexpireGenericCommand(c, 0, UNIT_MILLISECONDS); } @@ -1719,7 +1726,7 @@ void hpersistCommand(client *c) { for (int i = 0; i < num_fields; i++, fields_index++) { result = hashTypePersist(hash, c->argv[fields_index]->ptr); - if (result > 0) { + if (result == EXPIRATION_MODIFICATION_SUCCESSFUL) { server.dirty++; changes++; } From 93e21cc3acae06b2732d97aad7f2f136084a7706 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 4 Aug 2025 12:51:07 +0300 Subject: [PATCH 113/119] document entryUpdate value ownership move Signed-off-by: Ran Shidlansik --- src/entry.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/entry.c b/src/entry.c index fe8e203164..097a36387c 100644 --- a/src/entry.c +++ b/src/entry.c @@ -271,7 +271,9 @@ entry *entryCreate(const_sds field, sds value, long long expiry) { } /* Modify the entry's value and/or expiration time. - * In case the provided value is NULL, will use the existing value. */ + * In case the provided value is NULL, will use the existing value. + * Note that the value ownership is moved to this function and the caller should assume the + * value is no longer usable after calling this function. */ entry *entryUpdate(entry *e, sds value, long long expiry) { sds field = (sds)e; entry *new_entry = NULL; From dc88e0d2c3375751e7f61d15e47daeb912c61728 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 4 Aug 2025 15:37:33 +0300 Subject: [PATCH 114/119] distinguish between null set and empty set Signed-off-by: Ran Shidlansik --- src/t_hash.c | 22 ++++++++++------- src/unit/test_vset.c | 12 +++++----- src/vset.c | 56 +++++++++++++++++++++++++++++++++----------- src/vset.h | 4 ++++ 4 files changed, 66 insertions(+), 28 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index dad132eae2..1e84d35f2d 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -62,7 +62,12 @@ static vset *hashTypeGetVolatileSet(robj *o) { } bool hashTypeHasVolatileElements(robj *o) { - return ((o->encoding == OBJ_ENCODING_HASHTABLE) && !(vsetIsEmpty(hashTypeGetVolatileSet(o)))); + if (o->encoding == OBJ_ENCODING_HASHTABLE) { + vset *set = hashTypeGetVolatileSet(o); + if (vsetIsValid(set) && !vsetIsEmpty(set)) + return true; + } + return false; } /* make any access to the hash object elements ignore the specific elements expiration. @@ -70,7 +75,7 @@ bool hashTypeHasVolatileElements(robj *o) { static inline void hashTypeIgnoreTTL(robj *o, bool ignore) { if (o->encoding == OBJ_ENCODING_HASHTABLE) { /* prevent placing access function if not needed */ - if (!ignore && !hashTypeHasVolatileElements(o)) { + if (!ignore && !vsetIsValid(hashTypeGetVolatileSet(o))) { ignore = true; } hashtableSetType(o->ptr, ignore ? &hashHashtableType : &hashWithVolatileItemsHashtableType); @@ -79,18 +84,19 @@ static inline void hashTypeIgnoreTTL(robj *o, bool ignore) { static vset *hashTypeGetOrcreateVolatileSet(robj *o) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); - vset *vset = hashtableMetadata(o->ptr); - if (*vset == NULL) { - vsetInit(vset); + vset *set = hashtableMetadata(o->ptr); + if (!vsetIsValid(set)) { + vsetInit(set); /* serves mainly for optimization. Use type which supports access function only when needed. */ hashTypeIgnoreTTL(o, false); } - return vset; + return set; } void hashTypeFreeVolatileSet(robj *o) { - vset *vset = hashtableMetadata(o->ptr); - vsetClear(vset); + vset *set = hashtableMetadata(o->ptr); + if (vsetIsValid(set)) + vsetRelease(set); /* serves mainly for optimization. by changing the hashtable type we can avoid extra function call in hashtable access */ hashTypeIgnoreTTL(o, true); } diff --git a/src/unit/test_vset.c b/src/unit/test_vset.c index dadb180475..f864687558 100644 --- a/src/unit/test_vset.c +++ b/src/unit/test_vset.c @@ -64,7 +64,7 @@ int test_vset_add_and_iterate(int argc, char **argv, int flags) { TEST_ASSERT(count == 2); vsetResetIterator(&it); - vsetClear(&set); + vsetRelease(&set); mockFreeEntry(e1); mockFreeEntry(e2); @@ -111,7 +111,7 @@ int test_vset_large_batch_same_expiry(int argc, char **argv, int flags) { // Cleanup vsetResetIterator(&it); - vsetClear(&set); + vsetRelease(&set); for (int i = 0; i < total_entries; i++) { mockFreeEntry(entries[i]); @@ -264,7 +264,7 @@ int test_vset_iterate_multiple_expiries(int argc, char **argv, int flags) { } vsetResetIterator(&it); - vsetClear(&set); + vsetRelease(&set); for (int i = 0; i < 5; i++) mockFreeEntry(entries[i]); TEST_PRINT_INFO("Iterated all %d mixed expiry entries successfully", total); @@ -296,7 +296,7 @@ int test_vset_add_and_remove_all(int argc, char **argv, int flags) { } TEST_ASSERT(vsetIsEmpty(&set)); - vsetClear(&set); + vsetRelease(&set); TEST_PRINT_INFO("Add/remove %d entries, set size now 0", total_entries); return 0; @@ -470,7 +470,7 @@ int test_vset_defrag(int argc, char **argv, int flags) { } TEST_ASSERT(defrag_vset(&set, 0, 0) == 0); - vsetClear(&set); + vsetRelease(&set); free_mock_entries(); return 0; @@ -512,7 +512,7 @@ int test_vset_fuzzer(int argc, char **argv, int flags) { /* now expire all the entries and check that we have no entries left */ expire_mock_entries(&set, LONG_LONG_MAX); TEST_ASSERT(vsetIsEmpty(&set) && mock_entry_count == 0); - vsetClear(&set); + vsetRelease(&set); free_mock_entries(); /* Just in case */ return 0; } diff --git a/src/vset.c b/src/vset.c index fe81f6411b..4a5bc14418 100644 --- a/src/vset.c +++ b/src/vset.c @@ -674,6 +674,7 @@ void pvSort(pVector *pv, int (*compare)(const void *a, const void *b)) { #define VOLATILESET_VECTOR_BUCKET_MAX_SIZE 127 +#define VSET_NONE_BUCKET_PTR ((void *)(uintptr_t) - 1) #define VSET_BUCKET_NONE -1 // matching the NULL case #define VSET_BUCKET_SINGLE 0x1UL // xx1 (assuming sds) #define VSET_BUCKET_VECTOR 0x2UL // 010 @@ -726,7 +727,8 @@ static inline vsetIterator *opaqueFromIterator(vsetInternalIterator *iterator) { /* Determine bucket type */ static inline int vsetBucketType(vsetBucket *b) { - if (b == NULL) return VSET_BUCKET_NONE; + assert(b); + if (b == VSET_NONE_BUCKET_PTR) return VSET_BUCKET_NONE; uintptr_t bits = (uintptr_t)b; if (bits & 0x1) @@ -777,7 +779,7 @@ static inline vsetBucket *vsetBucketFromSingle(void *ptr) { } static inline vsetBucket *vsetBucketFromNone(void) { - return NULL; + return VSET_NONE_BUCKET_PTR; } static inline vsetBucket *vsetBucketFromRax(rax *r) { @@ -1015,7 +1017,7 @@ hashtableType pointerHashtableType = { static inline vsetBucket *findBucket(rax *expiry_buckets, long long expiry, unsigned char *key, size_t *key_len, long long *pbucket_ts, raxNode **node) { *key_len = encodeExpiryKey(expiry, key); - vsetBucket *bucket = NULL; + vsetBucket *bucket = vsetBucketFromNone(); /* First try to locate the first bucket which is larger than the specified key */ raxIterator iter; raxStart(&iter, expiry_buckets); @@ -1026,7 +1028,7 @@ static inline vsetBucket *findBucket(rax *expiry_buckets, long long expiry, unsi /* If this bucket span over a window to far in the future, it is not a candidate. */ if (get_max_bucket_ts(expiry) < bucket_ts) { raxStop(&iter); - return NULL; + return vsetBucketFromNone(); } bucket = iter.data; assert(iter.node->iskey); @@ -1071,7 +1073,7 @@ static bool splitBucketIfPossible(vsetBucket *parent, vsetGetExpiryFunc getExpir size_t key_len; long long target_bucket_ts = bucket_ts; unsigned char key[VSET_BUCKET_KEY_LEN] = {0}; - vsetBucket *new_bucket = NULL; + vsetBucket *new_bucket = vsetBucketFromNone(); pVector *pv = vsetBucketVector(bucket); rax *expiry_buckets = vsetBucketRax(parent); /* first lets sort the vector. we cannot take a decision without it. @@ -1165,7 +1167,7 @@ static inline vsetBucket *insertToBucket_VECTOR(vsetGetExpiryFunc getExpiry, vse /* Otherwise it is better to just push the entry to the vector with less change of memmove and reallocation. */ return vsetBucketFromVector(pvPush(pv, entry)); } - return NULL; + return vsetBucketFromNone(); } static inline vsetBucket *insertToBucket_HASHTABLE(vsetGetExpiryFunc getExpiry, vsetBucket *bucket, void *entry, long long expiry) { @@ -1316,15 +1318,15 @@ static bool removeEntryFromRaxBucket(vsetBucket *rax_bucket, vsetGetExpiryFunc g bucket = removeFromBucket_SINGLE(getExpiry, bucket, entry, 0, &removed); if (removed) { raxRemove(vsetBucketRax(rax_bucket), key, key_len, NULL); - if (pbucket) *pbucket = NULL; + if (pbucket) *pbucket = vsetBucketFromNone(); } break; case VSET_BUCKET_VECTOR: { vsetBucket *new_bucket = removeFromBucket_VECTOR(getExpiry, bucket, entry, 0, &removed, true); if (new_bucket != bucket) { - if (!new_bucket) { + if (vsetBucketType(new_bucket) == VSET_BUCKET_NONE) { raxRemove(vsetBucketRax(rax_bucket), key, key_len, NULL); - if (pbucket) *pbucket = NULL; + if (pbucket) *pbucket = vsetBucketFromNone(); } else { /* In order to avoid rax override, we directly change the node data */ // alternative: raxInsert(*set, key, key_len, new_bucket, NULL); @@ -1388,7 +1390,7 @@ static inline vsetBucket *removeFromBucket_RAX(vsetGetExpiryFunc getExpiry, vset raxNode *node; rax *expiry_buckets = vsetBucketRax(target); vsetBucket *bucket = findBucket(expiry_buckets, expiry, key, &key_len, &bucket_ts, &node); - assert(bucket); + assert(bucket != VSET_NONE_BUCKET_PTR); bool success = removeEntryFromRaxBucket(target, getExpiry, entry, bucket, key, key_len, NULL, node); if (removed) *removed = success; // shrink to single bucket if possible @@ -1494,7 +1496,7 @@ static inline size_t vsetBucketRemoveExpired_RAX(vsetBucket **bucket, vsetGetExp default: panic("Cannot expire entries from bucket which is not single, vector or hashtable"); } - if (!time_bucket) { + if (time_bucket == VSET_NONE_BUCKET_PTR) { /* in case the bucket is freed, we can just remove it and continue to the next bucket. */ raxRemove(buckets, key, key_len, NULL); } else { @@ -1687,6 +1689,7 @@ static inline size_t vsetBucketMemUsage_RAX(vsetBucket *bucket) { bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { long long expiry = getExpiry(entry); vsetBucket *expiry_buckets = *set; + assert(expiry_buckets); int bucket_type = vsetBucketType(expiry_buckets); switch (bucket_type) { case VSET_BUCKET_NONE: @@ -1742,11 +1745,12 @@ bool vsetAddEntry(vset *set, vsetGetExpiryFunc getExpiry, void *entry) { static inline bool vsetRemoveEntryWithExpiry(vset *set, vsetGetExpiryFunc getExpiry, void *entry, long long expiry) { bool removed; vsetBucket *bucket = *set; + assert(bucket); int bucket_type = vsetBucketType(bucket); switch (bucket_type) { case VSET_BUCKET_NONE: /* We cannot remove from empty set */ - return 0; + return false; case VSET_BUCKET_SINGLE: bucket = removeFromBucket_SINGLE(getExpiry, bucket, entry, expiry, &removed); break; @@ -1954,6 +1958,7 @@ static inline vsetBucket *vsetBucketUpdateEntry_RAX(vsetBucket *target, vsetGetE * vsetUpdateEntry(myset, getExpiry, old_ptr, new_ptr, old_ts, new_ts); */ bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, void *new_entry, long long old_expiry, long long new_expiry) { + assert(*set); /* Nothing to do */ if (old_entry == new_entry && old_expiry == new_expiry) return true; @@ -1981,7 +1986,7 @@ bool vsetUpdateEntry(vset *set, vsetGetExpiryFunc getExpiry, void *old_entry, vo case VSET_BUCKET_RAX: updated = vsetBucketUpdateEntry_RAX(*set, getExpiry, old_entry, new_entry, old_expiry, new_expiry); } - if (!updated) + if (updated == VSET_NONE_BUCKET_PTR) return false; *set = updated; return true; @@ -2226,11 +2231,33 @@ void vsetInit(vset *set) { * Parameters: * - set: Pointer to the volatile set to clear. */ void vsetClear(vset *set) { - if (!(*set)) return; + if (*set == VSET_NONE_BUCKET_PTR) return; freeVsetBucket(*set); *set = vsetBucketFromNone(); } +/* Same as calling vsetClear, but also de-initialize the set. + * After this call you will have to call vsetInit again in order to continue using the set. */ +void vsetRelease(vset *set) { + vsetClear(set); + *set = NULL; +} + +/* Return true in case this set is an initialized set and false otherwise. */ +bool vsetIsValid(vset *set) { + if (set && *set) { + switch (vsetBucketType(*set)) { + case VSET_BUCKET_NONE: + case VSET_BUCKET_SINGLE: + case VSET_BUCKET_VECTOR: + case VSET_BUCKET_HT: + case VSET_BUCKET_RAX: + return true; + } + } + return false; +} + /* Checks whether a volatile set is empty. * * This function simply checks if the set's current bucket type is VSET_BUCKET_NONE. @@ -2242,6 +2269,7 @@ void vsetClear(vset *set) { * - true if the set contains no entries. * - false otherwise. */ bool vsetIsEmpty(vset *set) { + assert(*set); return vsetBucketType(*set) == VSET_BUCKET_NONE; } diff --git a/src/vset.h b/src/vset.h index 5517e4b35f..7349aa46ed 100644 --- a/src/vset.h +++ b/src/vset.h @@ -26,6 +26,8 @@ * Create/Free: * vsetInit(vset *set) - used in order to initialize a new vset. * void vsetClear(vset *set) - used in order to empty all the data in a vset. + * void vsetRelease(vset *set) - just like vsetClear, but also release the set itself so it will become unusable. + * and will require a new call to vsetInit in order to continue using the set. * Example: * vset set; * vsetInit(&set); @@ -85,6 +87,8 @@ bool vsetNext(vsetIterator *it, void **entryptr); void vsetResetIterator(vsetIterator *it); void vsetInit(vset *set); void vsetClear(vset *set); +void vsetRelease(vset *set); +bool vsetIsValid(vset *set); long long vsetEstimatedEarliestExpiry(vset *set, vsetGetExpiryFunc getExpiry); size_t vsetRemoveExpired(vset *set, vsetGetExpiryFunc getExpiry, vsetExpiryFunc expiryFunc, mstime_t now, size_t max_count, void *ctx); size_t vsetMemUsage(vset *set); From 47b3cb563111f5e91826f28ad728f8e25e5d555e Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 4 Aug 2025 15:51:56 +0300 Subject: [PATCH 115/119] fix objectComputeSize Signed-off-by: Ran Shidlansik --- src/object.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/object.c b/src/object.c index 53fbfc387f..144907c201 100644 --- a/src/object.c +++ b/src/object.c @@ -33,6 +33,7 @@ #include "serverassert.h" #include "functions.h" #include "intset.h" /* Compact integer set structure */ +#include "vset.h" #include "zmalloc.h" #include "sds.h" #include "module.h" @@ -1206,13 +1207,14 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) { hashtableInitIterator(&iter, ht, 0); void *next; - asize = zmalloc_size((void *)o) + hashtableMemUsage(ht) + vsetMemUsage(volatile_fields); + asize = zmalloc_size((void *)o) + hashtableMemUsage(ht); while (hashtableNext(&iter, &next) && samples < sample_size) { elesize += entryMemUsage(next); samples++; } hashtableResetIterator(&iter); if (samples) asize += (double)elesize / samples * hashtableSize(ht); + if (vsetIsValid(volatile_fields)) asize += vsetMemUsage(volatile_fields); } else { serverPanic("Unknown hash encoding"); } From debe06de0eaee51513958280ad38c3f32bead341 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 4 Aug 2025 16:24:12 +0300 Subject: [PATCH 116/119] return NULL when volatile set is not initialized Signed-off-by: Ran Shidlansik --- src/t_hash.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/t_hash.c b/src/t_hash.c index 1e84d35f2d..b529355ff2 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -58,13 +58,14 @@ typedef enum { static vset *hashTypeGetVolatileSet(robj *o) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); - return (vset *)hashtableMetadata(o->ptr); + vset *set = (vset *)hashtableMetadata(o->ptr); + return vsetIsValid(set) ? set : NULL; } bool hashTypeHasVolatileElements(robj *o) { if (o->encoding == OBJ_ENCODING_HASHTABLE) { vset *set = hashTypeGetVolatileSet(o); - if (vsetIsValid(set) && !vsetIsEmpty(set)) + if (set && !vsetIsEmpty(set)) return true; } return false; @@ -75,7 +76,7 @@ bool hashTypeHasVolatileElements(robj *o) { static inline void hashTypeIgnoreTTL(robj *o, bool ignore) { if (o->encoding == OBJ_ENCODING_HASHTABLE) { /* prevent placing access function if not needed */ - if (!ignore && !vsetIsValid(hashTypeGetVolatileSet(o))) { + if (!ignore && hashTypeGetVolatileSet(o) == NULL) { ignore = true; } hashtableSetType(o->ptr, ignore ? &hashHashtableType : &hashWithVolatileItemsHashtableType); @@ -84,7 +85,7 @@ static inline void hashTypeIgnoreTTL(robj *o, bool ignore) { static vset *hashTypeGetOrcreateVolatileSet(robj *o) { serverAssert(o->encoding == OBJ_ENCODING_HASHTABLE); - vset *set = hashtableMetadata(o->ptr); + vset *set = (vset *)hashtableMetadata(o->ptr); if (!vsetIsValid(set)) { vsetInit(set); /* serves mainly for optimization. Use type which supports access function only when needed. */ @@ -94,9 +95,8 @@ static vset *hashTypeGetOrcreateVolatileSet(robj *o) { } void hashTypeFreeVolatileSet(robj *o) { - vset *set = hashtableMetadata(o->ptr); - if (vsetIsValid(set)) - vsetRelease(set); + vset *set = (vset *)hashtableMetadata(o->ptr); + if (vsetIsValid(set)) vsetRelease(set); /* serves mainly for optimization. by changing the hashtable type we can avoid extra function call in hashtable access */ hashTypeIgnoreTTL(o, true); } From 9d9f63b76f0d5b806c7214f4a22d232ea0ef224c Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 4 Aug 2025 18:38:30 +0300 Subject: [PATCH 117/119] bump the RDB version Signed-off-by: Ran Shidlansik --- src/aof.c | 5 +++-- src/rdb.c | 3 +-- src/rdb.h | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/aof.c b/src/aof.c index 567acdf60c..ce2a430855 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1437,9 +1437,10 @@ int loadSingleAppendOnlyFile(char *filename) { /* Check if the AOF file is in RDB format (it may be RDB encoded base AOF * or old style RDB-preamble AOF). In that case we need to load the RDB file * and later continue loading the AOF tail if it is an old style RDB-preamble AOF. */ - char sig[5]; /* "REDIS" */ - if (fread(sig, 1, 5, fp) != 5 || memcmp(sig, "REDIS", 5) != 0) { + char sig[6]; /* "REDIS" or "VALKEY" */ + if (fread(sig, 1, 6, fp) != 6 || (memcmp(sig, "REDIS", 5) != 0 && memcmp(sig, "VALKEY", 6) != 0)) { /* Not in RDB format, seek back at 0 offset. */ + serverLog(LL_WARNING, "Error reading the RDB preamble signature of the AOF file, AOF loading aborted"); if (fseek(fp, 0, SEEK_SET) == -1) goto readerr; } else { /* RDB format. Pass loading the RDB functions. */ diff --git a/src/rdb.c b/src/rdb.c index 6ec4e064dd..ef5ee1f9a6 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1442,8 +1442,7 @@ int rdbSaveRio(int req, rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi) { int j; if (server.rdb_checksum) rdb->update_cksum = rioGenericUpdateChecksum; - /* TODO: Change this to "VALKEY%03d" next time we bump the RDB version. */ - snprintf(magic, sizeof(magic), "REDIS%04d", RDB_VERSION); + snprintf(magic, sizeof(magic), "VALKEY%03d", RDB_VERSION); if (rdbWriteRaw(rdb, magic, 9) == -1) goto werr; if (rdbSaveInfoAuxFields(rdb, rdbflags, rsi) == -1) goto werr; if (!(req & REPLICA_REQ_RDB_EXCLUDE_DATA) && rdbSaveModulesAux(rdb, VALKEYMODULE_AUX_BEFORE_RDB) == -1) goto werr; diff --git a/src/rdb.h b/src/rdb.h index 1253c3fd05..3c133ce22e 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -42,15 +42,15 @@ * RDB 11 is the last open-source Redis RDB version, used by Valkey 7.x and 8.x. * * RDB 12+ are non-open-source Redis formats. + * VALKEY 80+ are Valkey open-source versions. * - * Next time we bump the Valkey RDB version, use much higher version to avoid - * collisions with non-OSS Redis RDB versions. For example, we could use RDB - * version 90 for Valkey 9.0. + * We start using high rdb version numbers since Valkey 9.0. This is in order to avoid + * collisions with non-OSS Redis RDB versions. * * In an RDB file/stream, we also check the magic string REDIS or VALKEY but in * the DUMP/RESTORE format, there is only the RDB version number and no magic * string. */ -#define RDB_VERSION 11 +#define RDB_VERSION 80 /* Reserved range for foreign (unsupported, non-OSS) RDB format. */ #define RDB_FOREIGN_VERSION_MIN 12 From bf974869e9dc5436ea78aa6b44c224db90bf7463 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Mon, 4 Aug 2025 21:40:28 +0300 Subject: [PATCH 118/119] Revert "bump the RDB version" This reverts commit 9d9f63b76f0d5b806c7214f4a22d232ea0ef224c. Signed-off-by: Ran Shidlansik --- src/aof.c | 5 ++--- src/rdb.c | 3 ++- src/rdb.h | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/aof.c b/src/aof.c index ce2a430855..567acdf60c 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1437,10 +1437,9 @@ int loadSingleAppendOnlyFile(char *filename) { /* Check if the AOF file is in RDB format (it may be RDB encoded base AOF * or old style RDB-preamble AOF). In that case we need to load the RDB file * and later continue loading the AOF tail if it is an old style RDB-preamble AOF. */ - char sig[6]; /* "REDIS" or "VALKEY" */ - if (fread(sig, 1, 6, fp) != 6 || (memcmp(sig, "REDIS", 5) != 0 && memcmp(sig, "VALKEY", 6) != 0)) { + char sig[5]; /* "REDIS" */ + if (fread(sig, 1, 5, fp) != 5 || memcmp(sig, "REDIS", 5) != 0) { /* Not in RDB format, seek back at 0 offset. */ - serverLog(LL_WARNING, "Error reading the RDB preamble signature of the AOF file, AOF loading aborted"); if (fseek(fp, 0, SEEK_SET) == -1) goto readerr; } else { /* RDB format. Pass loading the RDB functions. */ diff --git a/src/rdb.c b/src/rdb.c index ef5ee1f9a6..6ec4e064dd 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1442,7 +1442,8 @@ int rdbSaveRio(int req, rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi) { int j; if (server.rdb_checksum) rdb->update_cksum = rioGenericUpdateChecksum; - snprintf(magic, sizeof(magic), "VALKEY%03d", RDB_VERSION); + /* TODO: Change this to "VALKEY%03d" next time we bump the RDB version. */ + snprintf(magic, sizeof(magic), "REDIS%04d", RDB_VERSION); if (rdbWriteRaw(rdb, magic, 9) == -1) goto werr; if (rdbSaveInfoAuxFields(rdb, rdbflags, rsi) == -1) goto werr; if (!(req & REPLICA_REQ_RDB_EXCLUDE_DATA) && rdbSaveModulesAux(rdb, VALKEYMODULE_AUX_BEFORE_RDB) == -1) goto werr; diff --git a/src/rdb.h b/src/rdb.h index 3c133ce22e..1253c3fd05 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -42,15 +42,15 @@ * RDB 11 is the last open-source Redis RDB version, used by Valkey 7.x and 8.x. * * RDB 12+ are non-open-source Redis formats. - * VALKEY 80+ are Valkey open-source versions. * - * We start using high rdb version numbers since Valkey 9.0. This is in order to avoid - * collisions with non-OSS Redis RDB versions. + * Next time we bump the Valkey RDB version, use much higher version to avoid + * collisions with non-OSS Redis RDB versions. For example, we could use RDB + * version 90 for Valkey 9.0. * * In an RDB file/stream, we also check the magic string REDIS or VALKEY but in * the DUMP/RESTORE format, there is only the RDB version number and no magic * string. */ -#define RDB_VERSION 80 +#define RDB_VERSION 11 /* Reserved range for foreign (unsupported, non-OSS) RDB format. */ #define RDB_FOREIGN_VERSION_MIN 12 From 8f3f61905816db3d298ef0a075fcc1279035f930 Mon Sep 17 00:00:00 2001 From: Ran Shidlansik Date: Tue, 5 Aug 2025 10:30:38 +0300 Subject: [PATCH 119/119] fix some tests which are flaky Signed-off-by: Ran Shidlansik --- tests/unit/hashexpire.tcl | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/unit/hashexpire.tcl b/tests/unit/hashexpire.tcl index be329e7624..c8989dace1 100644 --- a/tests/unit/hashexpire.tcl +++ b/tests/unit/hashexpire.tcl @@ -94,7 +94,7 @@ start_server {tags {"hashexpire"}} { r HSET myhash f1 v1 set ttl_cmd [get_check_ttl_command $command] - set expire_time [get_short_expire_value $command] + set expire_time [get_long_expire_value $command] # Verify HGETEX command assert_equal "v1" [r HGETEX myhash $command $expire_time FIELDS 1 f1] @@ -106,8 +106,6 @@ start_server {tags {"hashexpire"}} { } else { assert_morethan $expire_result 0 } - after 1100 - assert_equal "" [r HGET myhash f1] # Re-enable active expiry r DEBUG SET-ACTIVE-EXPIRE yes } {OK} {needs:debug} @@ -132,7 +130,7 @@ start_server {tags {"hashexpire"}} { r HSET myhash f1 v1 f2 v2 set ttl_cmd [get_check_ttl_command $command] - set expire_time [get_short_expire_value $command] + set expire_time [get_long_expire_value $command] assert_equal "v1 v2" [r HGETEX myhash $command $expire_time FIELDS 2 f1 f2] @@ -145,9 +143,6 @@ start_server {tags {"hashexpire"}} { assert_morethan [r $ttl_cmd myhash FIELDS 1 f2] 0 } - after 1100 - assert_equal "" [r HGET myhash f1] - assert_equal "" [r HGET myhash f2] # Re-enable active expiry r DEBUG SET-ACTIVE-EXPIRE yes } {OK} {needs:debug} @@ -158,7 +153,7 @@ start_server {tags {"hashexpire"}} { r HSETEX myhash EX 10000 FIELDS 1 f2 v2 set ttl_cmd [get_check_ttl_command $command] - set expire_time [get_short_expire_value $command] + set expire_time [get_long_expire_value $command] assert_equal "v1" [r HGETEX myhash $command $expire_time FIELDS 1 f1] if {[regexp "AT$" $command]} { @@ -2641,4 +2636,4 @@ tags {"aof external:skip"} { r DEBUG SET-ACTIVE-EXPIRE yes } {OK} {needs:debug} } -} \ No newline at end of file +}