Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -738,8 +738,8 @@ LIBITTAPI:=-littnotify
endif

ifeq ($(WITH_TRACY), 1)
JCXXFLAGS += -DUSE_TRACY -DTRACY_ENABLE
JCFLAGS += -DUSE_TRACY -DTRACY_ENABLE
JCXXFLAGS += -DUSE_TRACY -DTRACY_ENABLE -DTRACY_FIBERS
JCFLAGS += -DUSE_TRACY -DTRACY_ENABLE -DTRACY_FIBERS
LIBTRACYCLIENT:=-lTracyClient
endif

Expand Down
1 change: 1 addition & 0 deletions deps/libtracyclient.mk
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ LIBTRACYCLIENT_CMAKE :=
LIBTRACYCLIENT_CMAKE += -DBUILD_SHARED_LIBS=ON
LIBTRACYCLIENT_CMAKE += -DTRACY_FIBERS=ON
LIBTRACYCLIENT_CMAKE += -DTRACY_NO_BROADCAST=ON
LIBTRACYCLIENT_CMAKE += -DTRACY_NO_SYSTEM_TRACING=ON
LIBTRACYCLIENT_CMAKE += -DTRACY_ONLY_LOCALHOST=ON
LIBTRACYCLIENT_CMAKE += -DTRACY_NO_CODE_TRANSFER=ON
LIBTRACYCLIENT_CMAKE += -DTRACY_NO_FRAME_IMAGE=ON
Expand Down
2 changes: 1 addition & 1 deletion src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1448,7 +1448,7 @@ void jl_dump_native_impl(void *native_code,
const char *asm_fname,
const char *sysimg_data, size_t sysimg_len, ios_t *s)
{
JL_TIMING(NATIVE_DUMP);
JL_TIMING(NATIVE_DUMP, NATIVE_DUMP);
jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
if (!bc_fname && !unopt_bc_fname && !obj_fname && !asm_fname) {
LLVM_DEBUG(dbgs() << "No output requested, skipping native code dump?\n");
Expand Down
15 changes: 8 additions & 7 deletions src/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,8 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
jl_value_t *filename, size_t lineno,
size_t offset, jl_value_t *options)
{
JL_TIMING(PARSING);
JL_TIMING(PARSING, PARSING);
jl_timing_show_filename(jl_string_data(filename), JL_TIMING_CURRENT_BLOCK);
if (offset > text_len) {
jl_value_t *textstr = jl_pchar_to_string(text, text_len);
JL_GC_PUSH1(&textstr);
Expand Down Expand Up @@ -1000,7 +1001,7 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, size_t world, int throw_load_error)
{
jl_task_t *ct = jl_current_task;
JL_TIMING(MACRO_INVOCATION);
JL_TIMING(MACRO_INVOCATION, MACRO_INVOCATION);
size_t nargs = jl_array_len(args) + 1;
JL_NARGSV("macrocall", 3); // macro name, location, and module
jl_value_t **margs;
Expand Down Expand Up @@ -1139,7 +1140,7 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str

JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule)
{
JL_TIMING(LOWERING);
JL_TIMING(LOWERING, LOWERING);
JL_GC_PUSH1(&expr);
expr = jl_copy_ast(expr);
expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_atomic_load_acquire(&jl_world_counter), 0);
Expand All @@ -1150,7 +1151,7 @@ JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule)

JL_DLLEXPORT jl_value_t *jl_macroexpand1(jl_value_t *expr, jl_module_t *inmodule)
{
JL_TIMING(LOWERING);
JL_TIMING(LOWERING, LOWERING);
JL_GC_PUSH1(&expr);
expr = jl_copy_ast(expr);
expr = jl_expand_macros(expr, inmodule, NULL, 1, jl_atomic_load_acquire(&jl_world_counter), 0);
Expand All @@ -1176,7 +1177,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc(jl_value_t *expr, jl_module_t *inmod
JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmodule,
const char *file, int line, size_t world)
{
JL_TIMING(LOWERING);
JL_TIMING(LOWERING, LOWERING);
JL_GC_PUSH1(&expr);
expr = jl_copy_ast(expr);
expr = jl_expand_macros(expr, inmodule, NULL, 0, world, 1);
Expand All @@ -1189,7 +1190,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmod
JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *inmodule,
const char *file, int line)
{
JL_TIMING(LOWERING);
JL_TIMING(LOWERING, LOWERING);
jl_array_t *kwargs = NULL;
JL_GC_PUSH2(&expr, &kwargs);
expr = jl_copy_ast(expr);
Expand Down Expand Up @@ -1237,7 +1238,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *
JL_DLLEXPORT jl_value_t *jl_expand_stmt_with_loc(jl_value_t *expr, jl_module_t *inmodule,
const char *file, int line)
{
JL_TIMING(LOWERING);
JL_TIMING(LOWERING, LOWERING);
JL_GC_PUSH1(&expr);
expr = jl_copy_ast(expr);
expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0, 1);
Expand Down
8 changes: 5 additions & 3 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8421,7 +8421,8 @@ jl_llvm_functions_t jl_emit_code(
jl_value_t *jlrettype,
jl_codegen_params_t &params)
{
JL_TIMING(CODEGEN);
JL_TIMING(CODEGEN, CODEGEN);
jl_timing_show_func_sig((jl_value_t *)li->specTypes, JL_TIMING_CURRENT_BLOCK);
// caller must hold codegen_lock
jl_llvm_functions_t decls = {};
assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache ||
Expand Down Expand Up @@ -8463,7 +8464,8 @@ jl_llvm_functions_t jl_emit_codeinst(
jl_code_info_t *src,
jl_codegen_params_t &params)
{
JL_TIMING(CODEGEN);
JL_TIMING(CODEGEN, CODEGEN);
jl_timing_show_method_instance(codeinst->def, JL_TIMING_CURRENT_BLOCK);
JL_GC_PUSH1(&src);
if (!src) {
src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);
Expand Down Expand Up @@ -8542,7 +8544,7 @@ void jl_compile_workqueue(
Module &original,
jl_codegen_params_t &params, CompilationPolicy policy)
{
JL_TIMING(CODEGEN);
JL_TIMING(CODEGEN, CODEGEN);
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
while (!params.workqueue.empty()) {
Expand Down
183 changes: 107 additions & 76 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,11 @@ NOINLINE uintptr_t gc_get_stack_ptr(void)

void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads)
{
JL_TIMING(GC, Stop);
#ifdef USE_TRACY
TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx);
TracyCZoneColor(ctx, 0x696969);
#endif
assert(gc_n_threads);
if (gc_n_threads > 1)
jl_wake_libuv();
Expand Down Expand Up @@ -2941,79 +2946,86 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
{
combine_thread_gc_counts(&gc_num);

#ifdef USE_TRACY
TracyCPlot("Heap size", live_bytes + gc_num.allocd);
#endif

jl_gc_markqueue_t *mq = &ptls->mark_queue;

uint64_t gc_start_time = jl_hrtime();
int64_t last_perm_scanned_bytes = perm_scanned_bytes;
JL_PROBE_GC_MARK_BEGIN();
uint64_t start_mark_time = jl_hrtime();

// 1. fix GC bits of objects in the remset.
assert(gc_n_threads);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL)
gc_premark(ptls2);
}

assert(gc_n_threads);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
// 2.1. mark every thread local root
gc_queue_thread_local(mq, ptls2);
// 2.2. mark any managed objects in the backtrace buffer
// TODO: treat these as roots for gc_heap_snapshot_record
gc_queue_bt_buf(mq, ptls2);
// 2.3. mark every object in the `last_remsets` and `rem_binding`
gc_queue_remset(ptls, ptls2);
JL_PROBE_GC_MARK_BEGIN();
{
JL_TIMING(GC, Mark);

// 1. fix GC bits of objects in the remset.
assert(gc_n_threads);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL)
gc_premark(ptls2);
}

assert(gc_n_threads);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
// 2.1. mark every thread local root
gc_queue_thread_local(mq, ptls2);
// 2.2. mark any managed objects in the backtrace buffer
// TODO: treat these as roots for gc_heap_snapshot_record
gc_queue_bt_buf(mq, ptls2);
// 2.3. mark every object in the `last_remsets` and `rem_binding`
gc_queue_remset(ptls, ptls2);
}
}
}

// 3. walk roots
gc_mark_roots(mq);
if (gc_cblist_root_scanner) {
gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
gc_cblist_root_scanner, (collection));
}
gc_mark_loop(ptls);

// 4. check for objects to finalize
clear_weak_refs();
// Record the length of the marked list since we need to
// mark the object moved to the marked list from the
// `finalizer_list` by `sweep_finalizer_list`
size_t orig_marked_len = finalizer_list_marked.len;
assert(gc_n_threads);
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 != NULL)
sweep_finalizer_list(&ptls2->finalizers);
}
if (prev_sweep_full) {
sweep_finalizer_list(&finalizer_list_marked);
orig_marked_len = 0;
// 3. walk roots
gc_mark_roots(mq);
if (gc_cblist_root_scanner) {
gc_invoke_callbacks(jl_gc_cb_root_scanner_t,
gc_cblist_root_scanner, (collection));
}
gc_mark_loop(ptls);

// 4. check for objects to finalize
clear_weak_refs();
// Record the length of the marked list since we need to
// mark the object moved to the marked list from the
// `finalizer_list` by `sweep_finalizer_list`
size_t orig_marked_len = finalizer_list_marked.len;
assert(gc_n_threads);
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 != NULL)
sweep_finalizer_list(&ptls2->finalizers);
}
if (prev_sweep_full) {
sweep_finalizer_list(&finalizer_list_marked);
orig_marked_len = 0;
}
assert(gc_n_threads);
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 != NULL)
gc_mark_finlist(mq, &ptls2->finalizers, 0);
}
gc_mark_finlist(mq, &finalizer_list_marked, orig_marked_len);
// "Flush" the mark stack before flipping the reset_age bit
// so that the objects are not incorrectly reset.
gc_mark_loop(ptls);
// Conservative marking relies on age to tell allocated objects
// and freelist entries apart.
mark_reset_age = !jl_gc_conservative_gc_support_enabled();
// Reset the age and old bit for any unmarked objects referenced by the
// `to_finalize` list. These objects are only reachable from this list
// and should not be referenced by any old objects so this won't break
// the GC invariant.
gc_mark_finlist(mq, &to_finalize, 0);
gc_mark_loop(ptls);
mark_reset_age = 0;
}
assert(gc_n_threads);
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 != NULL)
gc_mark_finlist(mq, &ptls2->finalizers, 0);
}
gc_mark_finlist(mq, &finalizer_list_marked, orig_marked_len);
// "Flush" the mark stack before flipping the reset_age bit
// so that the objects are not incorrectly reset.
gc_mark_loop(ptls);
// Conservative marking relies on age to tell allocated objects
// and freelist entries apart.
mark_reset_age = !jl_gc_conservative_gc_support_enabled();
// Reset the age and old bit for any unmarked objects referenced by the
// `to_finalize` list. These objects are only reachable from this list
// and should not be referenced by any old objects so this won't break
// the GC invariant.
gc_mark_finlist(mq, &to_finalize, 0);
gc_mark_loop(ptls);
mark_reset_age = 0;

gc_num.since_sweep += gc_num.allocd;
JL_PROBE_GC_MARK_END(scanned_bytes, perm_scanned_bytes);
Expand Down Expand Up @@ -3081,7 +3093,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
}
}


// If the live data outgrows the suggested max_total_memory
// we keep going with minimum intervals and full gcs until
// we either free some space or get an OOM error.
Expand All @@ -3106,15 +3117,24 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
// 6. start sweeping
uint64_t start_sweep_time = jl_hrtime();
JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
sweep_weak_refs();
sweep_stack_pools();
gc_sweep_foreign_objs();
gc_sweep_other(ptls, sweep_full);
gc_scrub();
gc_verify_tags();
gc_sweep_pool(sweep_full);
if (sweep_full)
gc_sweep_perm_alloc();
{
JL_TIMING(GC, Sweep);
#ifdef USE_TRACY
if (sweep_full) {
TracyCZoneCtx ctx = *(JL_TIMING_CURRENT_BLOCK->tracy_ctx);
TracyCZoneColor(ctx, 0xFFA500);
}
#endif
sweep_weak_refs();
sweep_stack_pools();
gc_sweep_foreign_objs();
gc_sweep_other(ptls, sweep_full);
gc_scrub();
gc_verify_tags();
gc_sweep_pool(sweep_full);
if (sweep_full)
gc_sweep_perm_alloc();
}
JL_PROBE_GC_SWEEP_END();

uint64_t gc_end_time = jl_hrtime();
Expand Down Expand Up @@ -3243,7 +3263,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
return;
}
JL_TIMING(GC);

JL_TIMING_SUSPEND(GC, ct);
JL_TIMING(GC, GC);

int last_errno = errno;
#ifdef _OS_WINDOWS_
DWORD last_error = GetLastError();
Expand Down Expand Up @@ -3296,6 +3319,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
// Doing this on all threads is racy (it's impossible to check
// or wait for finalizers on other threads without dead lock).
if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
JL_TIMING(GC, Finalizers);
run_finalizers(ct);
}
JL_PROBE_GC_FINALIZER();
Expand All @@ -3306,6 +3330,10 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
SetLastError(last_error);
#endif
errno = last_errno;

#ifdef USE_TRACY
TracyCPlot("Heap size", jl_gc_live_bytes());
#endif
}

void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
Expand Down Expand Up @@ -3407,6 +3435,9 @@ void jl_gc_init(void)
if (jl_options.heap_size_hint)
jl_gc_set_max_memory(jl_options.heap_size_hint);

#ifdef USE_TRACY
TracyCPlotConfig("Heap size", TracyPlotFormatMemory, /* rectilinear */ 0, /* fill */ 1, /* color */ 0);
#endif
t_start = jl_hrtime();
}

Expand Down
Loading