From c9934eb917237b9ae6d68c6f82b0e1f0a4a68fae Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Mon, 19 Dec 2022 15:01:11 -0700 Subject: [PATCH 1/2] Add `jl_print_task_backtraces_skip_done()`, which skips done tasks. This lets you print all live tasks, with less noise in the logs, in case it's been a while since the last GC. In some cases, we are seeing many thousands of DONE Tasks, which greatly increase the noise in the logs, and can overwhelm the DataDog logging. --- src/stackwalk.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/stackwalk.c b/src/stackwalk.c index 481b0abf9d701..99c8c6c38049c 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1123,10 +1123,7 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT jlbacktrace(); } -// Print backtraces for all live tasks, for all threads. -// WARNING: this is dangerous and can crash if used outside of gdb, if -// all of Julia's threads are not stopped! -JL_DLLEXPORT void jl_print_task_backtraces(void) JL_NOTSAFEPOINT +void _jl_print_task_backtraces(int skip_done) JL_NOTSAFEPOINT { size_t nthreads = jl_atomic_load_acquire(&jl_n_threads); jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); @@ -1146,9 +1143,13 @@ JL_DLLEXPORT void jl_print_task_backtraces(void) JL_NOTSAFEPOINT void **lst = live_tasks->items; for (size_t j = 0; j < live_tasks->len; j++) { jl_task_t *t = (jl_task_t *)lst[j]; + int t_state = jl_atomic_load_relaxed(&t->_state); + if (skip_done && t_state == JL_TASK_STATE_DONE) { + continue; + } jl_safe_printf(" ---- Task %zu (%p)\n", j + 1, t); jl_safe_printf(" (sticky: %d, started: %d, state: %d, tid: %d)\n", - t->sticky, t->started, jl_atomic_load_relaxed(&t->_state), + t->sticky, t->started, t_state, jl_atomic_load_relaxed(&t->tid) + 1); if (t->stkbuf != NULL) jlbacktracet(t); @@ -1160,6 +1161,17 @@ JL_DLLEXPORT void jl_print_task_backtraces(void) JL_NOTSAFEPOINT } jl_safe_printf("==== Done\n"); } +// Print backtraces for all live tasks, for all threads. +// WARNING: this is dangerous and can crash if used outside of gdb, if +// all of Julia's threads are not stopped! +JL_DLLEXPORT void jl_print_task_backtraces(void) JL_NOTSAFEPOINT +{ + _jl_print_task_backtraces(0); +} +JL_DLLEXPORT void jl_print_task_backtraces_skip_done(void) JL_NOTSAFEPOINT +{ + _jl_print_task_backtraces(1); +} #ifdef __cplusplus } From d8b05187252b2a221dce72eef01f57a55959664c Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 27 Dec 2022 15:20:46 -0700 Subject: [PATCH 2/2] Change the API for jl_print_task_backtraces to take a bool --- src/stackwalk.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/stackwalk.c b/src/stackwalk.c index 99c8c6c38049c..4965e46931016 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1123,7 +1123,10 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT jlbacktrace(); } -void _jl_print_task_backtraces(int skip_done) JL_NOTSAFEPOINT +// Print backtraces for all live tasks, for all threads. +// WARNING: this is dangerous and can crash if used outside of gdb, if +// all of Julia's threads are not stopped! +JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT { size_t nthreads = jl_atomic_load_acquire(&jl_n_threads); jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); @@ -1144,7 +1147,7 @@ void _jl_print_task_backtraces(int skip_done) JL_NOTSAFEPOINT for (size_t j = 0; j < live_tasks->len; j++) { jl_task_t *t = (jl_task_t *)lst[j]; int t_state = jl_atomic_load_relaxed(&t->_state); - if (skip_done && t_state == JL_TASK_STATE_DONE) { + if (!show_done && t_state == JL_TASK_STATE_DONE) { continue; } jl_safe_printf(" ---- Task %zu (%p)\n", j + 1, t); @@ -1161,17 +1164,6 @@ void _jl_print_task_backtraces(int skip_done) JL_NOTSAFEPOINT } jl_safe_printf("==== Done\n"); } -// Print backtraces for all live tasks, for all threads. -// WARNING: this is dangerous and can crash if used outside of gdb, if -// all of Julia's threads are not stopped! -JL_DLLEXPORT void jl_print_task_backtraces(void) JL_NOTSAFEPOINT -{ - _jl_print_task_backtraces(0); -} -JL_DLLEXPORT void jl_print_task_backtraces_skip_done(void) JL_NOTSAFEPOINT -{ - _jl_print_task_backtraces(1); -} #ifdef __cplusplus }