Skip to content

Commit a62d1f1

Browse files
zhulipengguowangyzuiderkwast
authored
Fix false sharing issue between main thread and io-threads when access used_memory_thread. (#1179)
When profiling some workloads with `io-threads` enabled. We found the false sharing issue is heavy. This patch try to split the the elements accessed by main thread and io-threads into different cache line by padding the elements in the head of `used_memory_thread_padded` array. This design helps mitigate the false sharing between main thread and io-threads, because the main thread has been the bottleneck with io-threads enabled. We didn't put each element in an individual cache line is that we don't want to bring the additional cache line fetch operation (3 vs 16 cache line) when call function like `zmalloc_used_memory()`. --------- Signed-off-by: Lipeng Zhu <[email protected]> Signed-off-by: Lipeng Zhu <[email protected]> Signed-off-by: Viktor Söderqvist <[email protected]> Co-authored-by: Wangyang Guo <[email protected]> Co-authored-by: Viktor Söderqvist <[email protected]>
1 parent 701ab72 commit a62d1f1

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

src/zmalloc.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ void zlibc_free(void *ptr) {
9090

9191
#define thread_local _Thread_local
9292

93+
#define PADDING_ELEMENT_NUM (CACHE_LINE_SIZE / sizeof(size_t) - 1)
9394
#define MAX_THREADS_NUM (IO_THREADS_MAX_NUM + 3 + 1)
9495
/* A thread-local storage which keep the current thread's index in the used_memory_thread array. */
9596
static thread_local int thread_index = -1;
@@ -101,10 +102,11 @@ static thread_local int thread_index = -1;
101102
* For the other architecture, lets fall back to the atomic operation to keep safe. */
102103
#if defined(__i386__) || defined(__x86_64__) || defined(__amd64__) || defined(__POWERPC__) || defined(__arm__) || \
103104
defined(__arm64__)
104-
static __attribute__((aligned(sizeof(size_t)))) size_t used_memory_thread[MAX_THREADS_NUM];
105+
static __attribute__((aligned(CACHE_LINE_SIZE))) size_t used_memory_thread_padded[MAX_THREADS_NUM + PADDING_ELEMENT_NUM];
105106
#else
106-
static _Atomic size_t used_memory_thread[MAX_THREADS_NUM];
107+
static __attribute__((aligned(CACHE_LINE_SIZE))) _Atomic size_t used_memory_thread_padded[MAX_THREADS_NUM + PADDING_ELEMENT_NUM];
107108
#endif
109+
static size_t *used_memory_thread = &used_memory_thread_padded[PADDING_ELEMENT_NUM];
108110
static atomic_int total_active_threads = 0;
109111
/* This is a simple protection. It's used only if some modules create a lot of threads. */
110112
static atomic_size_t used_memory_for_additional_threads = 0;

0 commit comments

Comments
 (0)