Skip to content

Commit f1878c2

Browse files
committed
allow tasks to request dedicated stack space when created
never copy over the root stack: this is a hybrid approach to COPY_STACK where the root task is never moved or copied, and all other task stacks are layered into the same memory area (ptls->basestack + basesize) several strategies exist for making new stacks: ucontext_t (where it is available, aka linux) unw_context_t (as an alternative to ucontext_t that avoids a syscall on task-switch) makecontext (as a posix standard implemention) setjmp/longjmp-based implementation (for systems where this is sufficient) Windows Fibers (implemented here, since we can be more efficient and reliable than the official Fibers API) also, uses an alternate stack for use in collecting stack-overflow backtraces like posix, but managed manually
1 parent 254ab94 commit f1878c2

27 files changed

+1250
-648
lines changed

base/boot.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,9 @@ eval(Core, :(UpsilonNode() = $(Expr(:new, :UpsilonNode))))
371371

372372
Module(name::Symbol=:anonymous, std_imports::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool), name, std_imports)
373373

374-
Task(@nospecialize(f)) = ccall(:jl_new_task, Ref{Task}, (Any, Int), f, 0)
374+
function Task(@nospecialize(f), reserved_stack::Int=0)
375+
return ccall(:jl_new_task, Ref{Task}, (Any, Int), f, reserved_stack)
376+
end
375377

376378
# simple convert for use by constructors of types in Core
377379
# note that there is no actual conversion defined here,

src/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ SRCS := \
3939
jltypes gf typemap ast builtins module interpreter symbol \
4040
dlload sys init task array dump staticdata toplevel jl_uv datatype \
4141
simplevector APInt-C runtime_intrinsics runtime_ccall precompile \
42-
threadgroup threading stackwalk gc gc-debug gc-pages method \
42+
threadgroup threading stackwalk gc gc-debug gc-pages gc-stacks method \
4343
jlapi signal-handling safepoint jloptions timing subtype rtutils \
4444
crc32c processor
4545

@@ -204,14 +204,14 @@ $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c in
204204
$(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
205205

206206
# archive library file rules
207-
$(BUILDDIR)/support/libsupport.a: $(SRCDIR)/support/*.h $(SRCDIR)/support/*.c
207+
$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
208208
$(MAKE) -C $(SRCDIR)/support BUILDDIR='$(abspath $(BUILDDIR)/support)'
209209

210-
$(BUILDDIR)/support/libsupport-debug.a: $(SRCDIR)/support/*.h $(SRCDIR)/support/*.c
210+
$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
211211
$(MAKE) -C $(SRCDIR)/support debug BUILDDIR='$(abspath $(BUILDDIR)/support)'
212212

213213
$(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a
214-
$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/,flisp/*.h flisp/*.c) $(BUILDDIR)/support/libsupport.a
214+
$(BUILDDIR)/flisp/libflisp.a: $(addprefix $(SRCDIR)/flisp/,*.h *.c) $(BUILDDIR)/support/libsupport.a
215215
$(MAKE) -C $(SRCDIR)/flisp BUILDDIR='$(abspath $(BUILDDIR)/flisp)'
216216

217217
$(FLISP_EXECUTABLE_debug): $(BUILDDIR)/flisp/libflisp-debug.a

src/gc-debug.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -595,11 +595,11 @@ static void gc_scrub_task(jl_task_t *ta)
595595
#else
596596
jl_task_t *thread_task = ptls2->root_task;
597597
#endif
598-
if (ta == thread_task)
599-
gc_scrub_range(ptls2->stack_lo, ptls2->stack_hi);
600-
if (ta->stkbuf == (void*)(intptr_t)(-1) || !ta->stkbuf)
601-
return;
602-
gc_scrub_range((char*)ta->stkbuf, (char*)ta->stkbuf + ta->ssize);
598+
void *stkbuf = ta->stkbuf;
599+
if (ta == thread_task && ptls->copy_stack)
600+
gc_scrub_range(ptls2->stackbase, ptls2->stacksize);
601+
else if (stkbuf)
602+
gc_scrub_range((char*)stkbuf, (char*)stkbuf + ta->bufsz);
603603
}
604604

605605
void gc_scrub(void)

src/gc-pages.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#ifndef _OS_WINDOWS_
55
# include <sys/resource.h>
66
#endif
7-
#include "julia_assert.h"
87

98
#ifdef __cplusplus
109
extern "C" {

src/gc-stacks.c

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
// This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
#include "gc.h"
4+
#ifndef _OS_WINDOWS_
5+
# include <sys/resource.h>
6+
#endif
7+
8+
const size_t jl_guard_size = (4096 * 16);
9+
10+
#ifdef _OS_WINDOWS_
11+
#define MAP_FAILED NULL
12+
static void *malloc_stack(size_t bufsz)
13+
{
14+
void *stk = VirtualAlloc(NULL, bufsz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
15+
if (stk == NULL)
16+
return MAP_FAILED;
17+
DWORD dwOldProtect;
18+
if (!VirtualProtect(stk, jl_guard_size, PAGE_READWRITE | PAGE_GUARD, &dwOldProtect)) {
19+
VirtualFree(stk, 0, MEM_RELEASE);
20+
return MAP_FAILED;
21+
}
22+
return stk;
23+
}
24+
25+
26+
static void free_stack(void *stkbuf, size_t bufsz)
27+
{
28+
VirtualFree(stkbuf, 0, MEM_RELEASE);
29+
}
30+
31+
#else
32+
33+
static void *malloc_stack(size_t bufsz)
34+
{
35+
void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
36+
if (stk == MAP_FAILED)
37+
return MAP_FAILED;
38+
#if !defined(JL_HAVE_UCONTEXT) && !defined(JL_HAVE_SIGALTSTACK)
39+
// setup a guard page to detect stack overflow
40+
if (mprotect(stk, jl_guard_size, PROT_NONE) == -1) {
41+
munmap(stk, bufsz);
42+
return MAP_FAILED;
43+
}
44+
#endif
45+
return stk;
46+
}
47+
48+
static void free_stack(void *stkbuf, size_t bufsz)
49+
{
50+
munmap(stkbuf, bufsz);
51+
}
52+
#endif
53+
54+
55+
const unsigned pool_sizes[] = {
56+
128 * 1024,
57+
192 * 1024,
58+
256 * 1024,
59+
384 * 1024,
60+
512 * 1024,
61+
768 * 1024,
62+
1024 * 1024,
63+
1537 * 1024,
64+
2048 * 1024,
65+
3 * 1024 * 1024,
66+
4 * 1024 * 1024,
67+
6 * 1024 * 1024,
68+
8 * 1024 * 1024,
69+
12 * 1024 * 1024,
70+
16 * 1024 * 1024,
71+
24 * 1024 * 1024,
72+
};
73+
74+
static_assert(sizeof(pool_sizes) == JL_N_STACK_POOLS * sizeof(pool_sizes[0]), "JL_N_STACK_POOLS size mismatch");
75+
76+
static unsigned select_pool(size_t nb)
77+
{
78+
unsigned pool_id = 0;
79+
while (pool_sizes[pool_id] < nb)
80+
pool_id++;
81+
return pool_id;
82+
}
83+
84+
85+
static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
86+
{
87+
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
88+
unsigned pool_id = select_pool(bufsz);
89+
if (pool_sizes[pool_id] == bufsz) {
90+
arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
91+
return;
92+
}
93+
}
94+
free_stack(stkbuf, bufsz);
95+
}
96+
97+
98+
JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
99+
{
100+
_jl_free_stack(jl_get_ptls_states(), stkbuf, bufsz);
101+
}
102+
103+
104+
JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner)
105+
{
106+
jl_ptls_t ptls = jl_get_ptls_states();
107+
size_t ssize = *bufsz;
108+
void *stk = NULL;
109+
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
110+
unsigned pool_id = select_pool(ssize);
111+
ssize = pool_sizes[pool_id];
112+
arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
113+
if (pool->len > 0) {
114+
stk = arraylist_pop(pool);
115+
}
116+
}
117+
else {
118+
ssize = LLT_ALIGN(ssize, jl_page_size);
119+
}
120+
if (stk == NULL) {
121+
// TODO: allocate blocks of stacks? but need to mprotect individually anyways
122+
stk = malloc_stack(ssize);
123+
if (stk == MAP_FAILED)
124+
jl_throw(jl_memory_exception);
125+
}
126+
*bufsz = ssize;
127+
if (owner) {
128+
arraylist_t *live_tasks = &ptls->heap.live_tasks;
129+
arraylist_push(live_tasks, owner);
130+
}
131+
return stk;
132+
}
133+
134+
void sweep_stack_pools(void)
135+
{
136+
// TODO: deallocate stacks if we have too many sitting around unused
137+
// for (stk in halfof(free_stacks))
138+
// free_stack(stk, pool_sz);
139+
// // then sweep the task stacks
140+
// for (t in live_tasks)
141+
// if (!gc-marked(t))
142+
// stkbuf = t->stkbuf
143+
// bufsz = t->bufsz
144+
// if (stkbuf)
145+
// push(free_stacks[sz], stkbuf)
146+
for (int i = 0; i < jl_n_threads; i++) {
147+
jl_ptls_t ptls2 = jl_all_tls_states[i];
148+
arraylist_t *live_tasks = &ptls2->heap.live_tasks;
149+
size_t n = 0;
150+
size_t ndel = 0;
151+
size_t l = live_tasks->len;
152+
void **lst = live_tasks->items;
153+
if (l == 0)
154+
continue;
155+
while (1) {
156+
jl_task_t *t = (jl_task_t*)lst[n];
157+
if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
158+
n++;
159+
}
160+
else {
161+
ndel++;
162+
void *stkbuf = t->stkbuf;
163+
size_t bufsz = t->bufsz;
164+
if (stkbuf) {
165+
t->stkbuf = NULL;
166+
_jl_free_stack(ptls2, stkbuf, bufsz);
167+
}
168+
}
169+
if (n >= l - ndel)
170+
break;
171+
void *tmp = lst[n];
172+
lst[n] = lst[n + ndel];
173+
lst[n + ndel] = tmp;
174+
}
175+
live_tasks->len -= ndel;
176+
}
177+
}

src/gc.c

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
707707

708708
static void sweep_weak_refs(void)
709709
{
710-
for (int i = 0;i < jl_n_threads;i++) {
710+
for (int i = 0; i < jl_n_threads; i++) {
711711
jl_ptls_t ptls2 = jl_all_tls_states[i];
712712
size_t n = 0;
713713
size_t ndel = 0;
@@ -718,7 +718,8 @@ static void sweep_weak_refs(void)
718718
while (1) {
719719
jl_weakref_t *wr = (jl_weakref_t*)lst[n];
720720
if (gc_marked(jl_astaggedvalue(wr)->bits.gc)) {
721-
// weakref itself is alive
721+
// weakref itself is alive,
722+
// so the user could still re-set it to a new value
722723
if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
723724
wr->value = (jl_value_t*)jl_nothing;
724725
n++;
@@ -730,7 +731,7 @@ static void sweep_weak_refs(void)
730731
break;
731732
void *tmp = lst[n];
732733
lst[n] = lst[n + ndel];
733-
lst[n+ndel] = tmp;
734+
lst[n + ndel] = tmp;
734735
}
735736
ptls2->heap.weak_refs.len -= ndel;
736737
}
@@ -1032,7 +1033,7 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t
10321033
int freedall = 1;
10331034
int pg_skpd = 1;
10341035
if (!pg->has_marked) {
1035-
// lazy version: (empty) if the whole page was already unused, free it
1036+
// lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
10361037
// eager version: (freedall) free page as soon as possible
10371038
// the eager one uses less memory.
10381039
// FIXME - need to do accounting on a per-thread basis
@@ -2121,19 +2122,13 @@ mark: {
21212122
objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t));
21222123
jl_task_t *ta = (jl_task_t*)new_obj;
21232124
gc_scrub_record_task(ta);
2124-
int stkbuf = (ta->stkbuf != (void*)(intptr_t)-1 && ta->stkbuf != NULL);
2125+
void *stkbuf = ta->stkbuf;
21252126
int16_t tid = ta->tid;
21262127
jl_ptls_t ptls2 = jl_all_tls_states[tid];
2127-
if (stkbuf) {
21282128
#ifdef COPY_STACKS
2129-
gc_setmark_buf_(ptls, ta->stkbuf, bits, ta->bufsz);
2130-
#else
2131-
// stkbuf isn't owned by julia for the root task
2132-
if (ta != ptls2->root_task) {
2133-
gc_setmark_buf_(ptls, ta->stkbuf, bits, ta->ssize);
2134-
}
2129+
if (stkbuf && ta->copy_stack)
2130+
gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
21352131
#endif
2136-
}
21372132
jl_gcframe_t *s = NULL;
21382133
size_t nroots;
21392134
uintptr_t offset = 0;
@@ -2145,9 +2140,11 @@ mark: {
21452140
else if (stkbuf) {
21462141
s = ta->gcstack;
21472142
#ifdef COPY_STACKS
2148-
ub = (uintptr_t)ptls2->stackbase;
2149-
lb = ub - ta->ssize;
2150-
offset = (uintptr_t)ta->stkbuf - lb;
2143+
if (ta->copy_stack) {
2144+
ub = (uintptr_t)ptls2->stackbase;
2145+
lb = ub - ta->copy_stack;
2146+
offset = (uintptr_t)stkbuf - lb;
2147+
}
21512148
#endif
21522149
}
21532150
if (s) {
@@ -2276,10 +2273,6 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, gc_mark_sp_t *sp)
22762273
if (jl_all_methods != NULL)
22772274
gc_mark_queue_obj(gc_cache, sp, jl_all_methods);
22782275

2279-
#ifndef COPY_STACKS
2280-
gc_mark_queue_obj(gc_cache, sp, jl_unprotect_stack_func);
2281-
#endif
2282-
22832276
// constants
22842277
gc_mark_queue_obj(gc_cache, sp, jl_typetype_type);
22852278
gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type);
@@ -2572,6 +2565,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, int full)
25722565
scanned_bytes = 0;
25732566
// 5. start sweeping
25742567
sweep_weak_refs();
2568+
sweep_stack_pools();
25752569
gc_sweep_other(ptls, sweep_full);
25762570
gc_scrub();
25772571
gc_verify_tags();
@@ -2695,6 +2689,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
26952689
p[i].newpages = NULL;
26962690
}
26972691
arraylist_new(&heap->weak_refs, 0);
2692+
arraylist_new(&heap->live_tasks, 0);
26982693
heap->mallocarrays = NULL;
26992694
heap->mafreelist = NULL;
27002695
heap->big_objects = NULL;

src/gc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, gc_mark_sp_t *sp);
491491
void gc_mark_queue_finlist(jl_gc_mark_cache_t *gc_cache, gc_mark_sp_t *sp,
492492
arraylist_t *list, size_t start);
493493
void gc_mark_loop(jl_ptls_t ptls, gc_mark_sp_t sp);
494+
void sweep_stack_pools(void);
494495
void gc_debug_init(void);
495496

496497
extern void *gc_mark_label_addrs[_GC_MARK_L_MAX];

0 commit comments

Comments
 (0)