Skip to content

Commit 0c8ad5c

Browse files
authored
defrag: allow defrag to start during AOF loading (#1420)
Addresses #1393 Changes: * During AOF loading or long running script, this allows defrag to be initiated. * The AOF defrag test was corrected to eliminate the wait period and rely on non-timer invocations. * Logic for "overage" time in defrag was changed. It previously accumulated underage leading to large latencies in extreme tests having very high CPU percentage. After several simple stages were completed during infrequent blocked processing, a large cycle time would be experienced. Signed-off-by: Jim Brunner <[email protected]>
1 parent 1acf7f7 commit 0c8ad5c

File tree

2 files changed

+15
-5
lines changed

2 files changed

+15
-5
lines changed

src/defrag.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ struct DefragContext {
8484

8585
long long timeproc_id; // Eventloop ID of the timerproc (or AE_DELETED_EVENT_ID)
8686
monotime timeproc_end_time; // Ending time of previous timerproc execution
87-
long timeproc_overage_us; // A correction value if over/under target CPU percent
87+
long timeproc_overage_us; // A correction value if over target CPU percent
8888
};
8989
static struct DefragContext defrag;
9090

@@ -1157,7 +1157,7 @@ static int computeDefragCycleUs(void) {
11571157
* the starvation of the timer. */
11581158
dutyCycleUs = targetCpuPercent * waitedUs / (100 - targetCpuPercent);
11591159

1160-
// Also adjust for any accumulated overage(underage).
1160+
// Also adjust for any accumulated overage.
11611161
dutyCycleUs -= defrag.timeproc_overage_us;
11621162
defrag.timeproc_overage_us = 0;
11631163

@@ -1176,8 +1176,11 @@ static int computeDefragCycleUs(void) {
11761176
* computeDefragCycleUs computation. */
11771177
static int computeDelayMs(monotime intendedEndtime) {
11781178
defrag.timeproc_end_time = getMonotonicUs();
1179-
int overage = defrag.timeproc_end_time - intendedEndtime;
1179+
long overage = defrag.timeproc_end_time - intendedEndtime;
11801180
defrag.timeproc_overage_us += overage; // track over/under desired CPU
1181+
/* Allow negative overage (underage) to count against existing overage, but don't allow
1182+
* underage (from short stages) to be accumulated. */
1183+
if (defrag.timeproc_overage_us < 0) defrag.timeproc_overage_us = 0;
11811184

11821185
int targetCpuPercent = server.active_defrag_cpu_percent;
11831186
serverAssert(targetCpuPercent > 0 && targetCpuPercent < 100);
@@ -1189,7 +1192,7 @@ static int computeDelayMs(monotime intendedEndtime) {
11891192
long totalCycleTimeUs = server.active_defrag_cycle_us * 100 / targetCpuPercent;
11901193
long delayUs = totalCycleTimeUs - server.active_defrag_cycle_us;
11911194
// Only increase delay by the fraction of the overage that would be non-duty-cycle
1192-
delayUs += defrag.timeproc_overage_us * (100 - targetCpuPercent) / 100; // "overage" might be negative
1195+
delayUs += defrag.timeproc_overage_us * (100 - targetCpuPercent) / 100;
11931196
if (delayUs < 0) delayUs = 0;
11941197
long delayMs = delayUs / 1000; // round down
11951198
return delayMs;
@@ -1254,6 +1257,9 @@ static long long activeDefragTimeProc(struct aeEventLoop *eventLoop, long long i
12541257
* actions. This interface allows defrag to continue running, avoiding a single long defrag step
12551258
* after the long operation completes. */
12561259
void defragWhileBlocked(void) {
1260+
// This is called infrequently, while timers are not active. We might need to start defrag.
1261+
if (!defragIsRunning()) monitorActiveDefrag();
1262+
12571263
if (!defragIsRunning()) return;
12581264

12591265
// Save off the timeproc_id. If we have a normal termination, it will be cleared.

tests/unit/memefficiency.tcl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,12 @@ run_solo {defrag} {
138138
# reset stats and load the AOF file
139139
r config resetstat
140140
r config set key-load-delay -25 ;# sleep on average 1/25 usec
141+
# Note: This test is checking if defrag is working DURING AOF loading (while
142+
# timers are not active). So we don't give any extra time, and we deactivate
143+
# defrag immediately after the AOF loading is complete. During loading,
144+
# defrag will get invoked less often, causing starvation prevention. We
145+
# should expect longer latency measurements.
141146
r debug loadaof
142-
after 1000 ;# give defrag a chance to work before turning it off
143147
r config set activedefrag no
144148

145149
# measure hits and misses right after aof loading

0 commit comments

Comments
 (0)