Skip to content

Commit 9490ee3

Browse files
committed
create GC TLS (JuliaLang#55086)
Encapsulates all relevant GC thread-local-state into a separate structure. Motivation is that MMTk will have its own version of GC thread-local-state, so doesn't need all of the Julia GC TLS. In the future, folks who would be using MMTk would be setting a pre-processor flag which would lead to either the stock Julia GC TLS or MMTk's GC TLS to be included in `julia_threads.h`. I.e., we would have something like: ```C jl_gc_mmtk_tls_states mmtk_gc_tls; jl_gc_tls_states gc_tls; ```
1 parent 14b9441 commit 9490ee3

File tree

10 files changed

+314
-317
lines changed

10 files changed

+314
-317
lines changed

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0)
9999
UV_HEADERS += uv.h
100100
UV_HEADERS += uv/*.h
101101
endif
102-
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
102+
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
103103
ifeq ($(OS),WINNT)
104104
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
105105
endif

src/array.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
499499
const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
500500
if (sz <= GC_MAX_SZCLASS) {
501501
int pool_id = jl_gc_szclass_align8(allocsz);
502-
jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
502+
jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id];
503503
int osize = jl_gc_sizeclasses[pool_id];
504504
// We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
505505
// the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)

src/gc-debug.c

Lines changed: 22 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ static arraylist_t bits_save[4];
9797
static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits)
9898
{
9999
jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n];
100-
jl_gc_pool_t *pool = &ptls2->heap.norm_pools[pg->pool_n];
100+
jl_gc_pool_t *pool = &ptls2->gc_tls.heap.norm_pools[pg->pool_n];
101101
jl_taggedvalue_t *pv = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
102102
char *lim = (char*)pv + GC_PAGE_SZ - GC_PAGE_OFFSET - pool->osize;
103103
while ((char*)pv <= lim) {
@@ -112,7 +112,7 @@ static void gc_clear_mark_outer(int bits)
112112
{
113113
for (int i = 0; i < gc_n_threads; i++) {
114114
jl_ptls_t ptls2 = gc_all_tls_states[i];
115-
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
115+
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
116116
while (pg != NULL) {
117117
gc_clear_mark_page(pg, bits);
118118
pg = pg->next;
@@ -132,7 +132,7 @@ static void clear_mark(int bits)
132132
}
133133
bigval_t *v;
134134
for (int i = 0; i < gc_n_threads; i++) {
135-
v = gc_all_tls_states[i]->heap.big_objects;
135+
v = gc_all_tls_states[i]->gc_tls.heap.big_objects;
136136
while (v != NULL) {
137137
void *gcv = &v->header;
138138
if (!gc_verifying)
@@ -170,7 +170,7 @@ static void gc_verify_track(jl_ptls_t ptls)
170170
return;
171171
do {
172172
jl_gc_markqueue_t mq;
173-
jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
173+
jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
174174
ws_queue_t *cq = &mq.chunk_queue;
175175
ws_queue_t *q = &mq.ptr_queue;
176176
jl_atomic_store_relaxed(&cq->top, 0);
@@ -230,7 +230,7 @@ void gc_verify(jl_ptls_t ptls)
230230
return;
231231
}
232232
jl_gc_markqueue_t mq;
233-
jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
233+
jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
234234
ws_queue_t *cq = &mq.chunk_queue;
235235
ws_queue_t *q = &mq.ptr_queue;
236236
jl_atomic_store_relaxed(&cq->top, 0);
@@ -289,7 +289,7 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
289289
int p_n = pg->pool_n;
290290
int t_n = pg->thread_n;
291291
jl_ptls_t ptls2 = gc_all_tls_states[t_n];
292-
jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
292+
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[p_n];
293293
int osize = pg->osize;
294294
char *data = pg->data;
295295
char *page_begin = data + GC_PAGE_OFFSET;
@@ -349,42 +349,13 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
349349

350350
static void gc_verify_tags_pagetable0(pagetable0_t *pagetable0)
351351
{
352-
for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) {
353-
uint32_t line = pagetable0->allocmap[pg_i];
354-
if (line) {
355-
for (int j = 0; j < 32; j++) {
356-
if ((line >> j) & 1) {
357-
gc_verify_tags_page(pagetable0->meta[pg_i * 32 + j]);
358-
}
359-
}
360-
}
361-
}
362-
}
363-
364-
static void gc_verify_tags_pagetable1(pagetable1_t *pagetable1)
365-
{
366-
for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) {
367-
uint32_t line = pagetable1->allocmap0[pg_i];
368-
if (line) {
369-
for (int j = 0; j < 32; j++) {
370-
if ((line >> j) & 1) {
371-
gc_verify_tags_pagetable0(pagetable1->meta0[pg_i * 32 + j]);
372-
}
373-
}
374-
}
375-
}
376-
}
377-
378-
static void gc_verify_tags_pagetable(void)
379-
{
380-
for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) {
381-
uint32_t line = memory_map.allocmap1[pg_i];
382-
if (line) {
383-
for (int j = 0; j < 32; j++) {
384-
if ((line >> j) & 1) {
385-
gc_verify_tags_pagetable1(memory_map.meta1[pg_i * 32 + j]);
386-
}
387-
}
352+
for (int i = 0; i < gc_n_threads; i++) {
353+
jl_ptls_t ptls2 = gc_all_tls_states[i];
354+
jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
355+
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
356+
while (pg != NULL) {
357+
gc_verify_tags_page(pg);
358+
pg = pg->next;
388359
}
389360
}
390361
}
@@ -396,7 +367,7 @@ void gc_verify_tags(void)
396367
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
397368
for (int i = 0; i < JL_GC_N_POOLS; i++) {
398369
// for all pools, iterate its freelist
399-
jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
370+
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
400371
jl_taggedvalue_t *next = p->freelist;
401372
jl_taggedvalue_t *last = NULL;
402373
char *allocating = gc_page_data(next);
@@ -837,8 +808,8 @@ void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
837808
int64_t remset_nptr = 0;
838809
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
839810
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
840-
last_remset_len += ptls2->heap.last_remset->len;
841-
remset_nptr = ptls2->heap.remset_nptr;
811+
last_remset_len += ptls2->gc_tls.heap.last_remset->len;
812+
remset_nptr = ptls2->gc_tls.heap.remset_nptr;
842813
}
843814
jl_safe_printf("GC mark pause %.2f ms | "
844815
"scanned %" PRId64 " kB = %" PRId64 " + %" PRId64 " | "
@@ -969,13 +940,13 @@ void gc_stats_all_pool(void)
969940
for (int i = 0; i < JL_GC_N_POOLS; i++) {
970941
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
971942
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
972-
size_t b = pool_stats(&ptls2->heap.norm_pools[i], &w, &np, &nol);
943+
size_t b = pool_stats(&ptls2->gc_tls.heap.norm_pools[i], &w, &np, &nol);
973944
nb += b;
974-
no += (b / ptls2->heap.norm_pools[i].osize);
945+
no += (b / ptls2->gc_tls.heap.norm_pools[i].osize);
975946
tw += w;
976947
tp += np;
977948
nold += nol;
978-
noldbytes += nol * ptls2->heap.norm_pools[i].osize;
949+
noldbytes += nol * ptls2->gc_tls.heap.norm_pools[i].osize;
979950
}
980951
}
981952
jl_safe_printf("%lld objects (%lld%% old), %lld kB (%lld%% old) total allocated, "
@@ -994,7 +965,7 @@ void gc_stats_big_obj(void)
994965
size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
995966
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
996967
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
997-
bigval_t *v = ptls2->heap.big_objects;
968+
bigval_t *v = ptls2->gc_tls.heap.big_objects;
998969
while (v != NULL) {
999970
if (gc_marked(v->bits.gc)) {
1000971
nused++;
@@ -1011,7 +982,7 @@ void gc_stats_big_obj(void)
1011982
v = v->next;
1012983
}
1013984

1014-
mallocarray_t *ma = ptls2->heap.mallocarrays;
985+
mallocarray_t *ma = ptls2->gc_tls.heap.mallocarrays;
1015986
while (ma != NULL) {
1016987
if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
1017988
nused++;
@@ -1057,7 +1028,7 @@ static void gc_count_pool_pagetable(void)
10571028
{
10581029
for (int i = 0; i < gc_n_threads; i++) {
10591030
jl_ptls_t ptls2 = gc_all_tls_states[i];
1060-
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
1031+
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
10611032
while (pg != NULL) {
10621033
if (gc_alloc_map_is_set(pg->data)) {
10631034
gc_count_pool_page(pg);

src/gc-stacks.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
119119
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
120120
unsigned pool_id = select_pool(bufsz);
121121
if (pool_sizes[pool_id] == bufsz) {
122-
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
122+
small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
123123
return;
124124
}
125125
}
@@ -148,7 +148,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
148148
#ifdef _COMPILER_ASAN_ENABLED_
149149
__asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
150150
#endif
151-
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
151+
small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
152152
}
153153
}
154154
}
@@ -163,7 +163,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
163163
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
164164
unsigned pool_id = select_pool(ssize);
165165
ssize = pool_sizes[pool_id];
166-
small_arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
166+
small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id];
167167
if (pool->len > 0) {
168168
stk = small_arraylist_pop(pool);
169169
}
@@ -184,7 +184,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
184184
}
185185
*bufsz = ssize;
186186
if (owner) {
187-
small_arraylist_t *live_tasks = &ptls->heap.live_tasks;
187+
small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks;
188188
mtarraylist_push(live_tasks, owner);
189189
}
190190
return stk;
@@ -209,7 +209,7 @@ void sweep_stack_pools(void)
209209

210210
// free half of stacks that remain unused since last sweep
211211
for (int p = 0; p < JL_N_STACK_POOLS; p++) {
212-
small_arraylist_t *al = &ptls2->heap.free_stacks[p];
212+
small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p];
213213
size_t n_to_free;
214214
if (al->len > MIN_STACK_MAPPINGS_PER_POOL) {
215215
n_to_free = al->len / 2;
@@ -225,7 +225,7 @@ void sweep_stack_pools(void)
225225
}
226226
}
227227

228-
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
228+
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
229229
size_t n = 0;
230230
size_t ndel = 0;
231231
size_t l = live_tasks->len;
@@ -280,7 +280,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
280280
jl_ptls_t ptls2 = allstates[i];
281281
if (ptls2 == NULL)
282282
continue;
283-
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
283+
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
284284
size_t n = mtarraylist_length(live_tasks);
285285
l += n + (ptls2->root_task->stkbuf != NULL);
286286
}
@@ -303,7 +303,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
303303
goto restart;
304304
((void**)jl_array_data(a))[j++] = t;
305305
}
306-
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
306+
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
307307
size_t n = mtarraylist_length(live_tasks);
308308
for (size_t i = 0; i < n; i++) {
309309
jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);

src/gc-tls.h

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
// Meant to be included in "julia_threads.h"
4+
#ifndef JL_GC_TLS_H
5+
#define JL_GC_TLS_H
6+
7+
#include "julia_atomics.h"
8+
#include "work-stealing-queue.h"
9+
// GC threading ------------------------------------------------------------------
10+
11+
#include "arraylist.h"
12+
13+
#ifdef __cplusplus
14+
extern "C" {
15+
#endif
16+
17+
typedef struct {
18+
struct _jl_taggedvalue_t *freelist; // root of list of free objects
19+
struct _jl_taggedvalue_t *newpages; // root of list of chunks of free objects
20+
uint16_t osize; // size of objects in this pool
21+
} jl_gc_pool_t;
22+
23+
typedef struct {
24+
// variable for tracking weak references
25+
small_arraylist_t weak_refs;
26+
// live tasks started on this thread
27+
// that are holding onto a stack from the pool
28+
small_arraylist_t live_tasks;
29+
30+
// variables for tracking malloc'd arrays
31+
struct _mallocarray_t *mallocarrays;
32+
struct _mallocarray_t *mafreelist;
33+
34+
// variables for tracking big objects
35+
struct _bigval_t *big_objects;
36+
37+
// lower bound of the number of pointers inside remembered values
38+
int remset_nptr;
39+
// remembered set
40+
arraylist_t remset;
41+
42+
// variables for allocating objects from pools
43+
#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
44+
jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];
45+
46+
#define JL_N_STACK_POOLS 16
47+
small_arraylist_t free_stacks[JL_N_STACK_POOLS];
48+
} jl_thread_heap_t;
49+
50+
typedef struct {
51+
_Atomic(int64_t) allocd;
52+
_Atomic(int64_t) pool_live_bytes;
53+
_Atomic(int64_t) freed;
54+
_Atomic(uint64_t) malloc;
55+
_Atomic(uint64_t) realloc;
56+
_Atomic(uint64_t) poolalloc;
57+
_Atomic(uint64_t) bigalloc;
58+
_Atomic(uint64_t) freecall;
59+
} jl_thread_gc_num_t;
60+
61+
typedef struct {
62+
ws_queue_t chunk_queue;
63+
ws_queue_t ptr_queue;
64+
arraylist_t reclaim_set;
65+
} jl_gc_markqueue_t;
66+
67+
typedef struct {
68+
// thread local increment of `perm_scanned_bytes`
69+
size_t perm_scanned_bytes;
70+
// thread local increment of `scanned_bytes`
71+
size_t scanned_bytes;
72+
// Number of queued big objects (<= 1024)
73+
size_t nbig_obj;
74+
// Array of queued big objects to be moved between the young list
75+
// and the old list.
76+
// A set low bit means that the object should be moved from the old list
77+
// to the young list (`mark_reset_age`).
78+
// Objects can only be put into this list when the mark bit is flipped to
79+
// `1` (atomically). Combining with the sync after marking,
80+
// this makes sure that a single objects can only appear once in
81+
// the lists (the mark bit cannot be flipped to `0` without sweeping)
82+
void *big_obj[1024];
83+
} jl_gc_mark_cache_t;
84+
85+
typedef struct {
86+
_Atomic(struct _jl_gc_pagemeta_t *) bottom;
87+
} jl_gc_page_stack_t;
88+
89+
typedef struct {
90+
jl_thread_heap_t heap;
91+
jl_gc_page_stack_t page_metadata_allocd;
92+
jl_thread_gc_num_t gc_num;
93+
jl_gc_markqueue_t mark_queue;
94+
jl_gc_mark_cache_t gc_cache;
95+
_Atomic(size_t) gc_sweeps_requested;
96+
arraylist_t sweep_objs;
97+
} jl_gc_tls_states_t;
98+
99+
#ifdef __cplusplus
100+
}
101+
#endif
102+
103+
#endif // JL_GC_TLS_H

0 commit comments

Comments
 (0)