From fbc1ce5091c8eeabd759f8dd1b85457606f74d41 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 12 Jun 2023 12:06:05 -0300 Subject: [PATCH] backport Gabriel's PR --- NEWS.md | 27 +------- contrib/generate_precompile.jl | 2 +- doc/src/devdocs/gc.md | 76 +++++++++++++++++++++ src/gc-debug.c | 19 +++++- src/gc-pages.c | 7 +- src/gc.c | 120 +++++++++++++++++++-------------- src/gc.h | 19 +++++- 7 files changed, 190 insertions(+), 80 deletions(-) create mode 100644 doc/src/devdocs/gc.md diff --git a/NEWS.md b/NEWS.md index a78249ed95182..e96c8f8b38528 100644 --- a/NEWS.md +++ b/NEWS.md @@ -38,30 +38,9 @@ Language changes Compiler/Runtime improvements ----------------------------- -* Bootstrapping time has been improved by about 25% ([#41794]). -* The LLVM-based compiler has been separated from the run-time library into a new library, - `libjulia-codegen`. It is loaded by default, so normal usage should see no changes. - In deployments that do not need the compiler (e.g. system images where all needed code - is precompiled), this library (and its LLVM dependency) can simply be excluded ([#41936]). -* Conditional type constraints are now be forwarded interprocedurally (i.e. propagated from caller to callee). - This allows inference to understand e.g. `Base.ifelse(isa(x, Int), x, 0)` returns `::Int`-value - even if the type of `x` is not known ([#42529]). -* Julia-level SROA (Scalar Replacement of Aggregates) has been improved: allowing elimination of - `getfield` calls with constant global fields ([#42355]), enabling elimination of mutable structs with - uninitialized fields ([#43208]), improving performance ([#43232]), and handling more nested `getfield` - calls ([#43239]). -* Abstract call sites can now be inlined or statically resolved as long as the call site has a single - matching method ([#43113]). -* Inference now tracks various effects such as side-effectful-ness and nothrow-ness on a per-specialization basis. - Code heavily dependent on constant propagation should see significant compile-time performance improvements and - certain cases (e.g. calls to uninlinable functions that are nevertheless effect free) should see runtime performance - improvements. Effects may be overwritten manually with the `Base.@assume_effects` macro ([#43852]). -* Precompilation (with explicit `precompile` directives or representative workloads) now saves more type-inferred code, - resulting in reduced time-to-first task for packages that use precompilation. This change also eliminates the - runtime performance degradation occasionally triggered by precompilation on older Julia versions. More specifically, - any newly-inferred method/type combinations needed by your package--regardless of where those methods were - defined--can now be cached in the precompile file, as long as they are inferrably called by a method owned by - your package ([#43990]). +* The `@pure` macro is now deprecated. Use `Base.@assume_effects :foldable` instead ([#48682]). +* The mark phase of the Garbage Collector is now multi-threaded ([#48600]). +* Updated GC heuristics to count allocated pages instead of individual objects ([#50144]). Command-line option changes --------------------------- diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index a10d195229cab..073ea09becd84 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -428,7 +428,7 @@ function generate_precompile_statements() print("Total ─────── "); Base.time_print(tot_time); println() print("Generation ── "); Base.time_print(gen_time); print(" "); show(IOContext(stdout, :compact=>true), gen_time / tot_time * 100); println("%") print("Execution ─── "); Base.time_print(include_time); print(" "); show(IOContext(stdout, :compact=>true), include_time / tot_time * 100); println("%") - + GC.gc(true) return end diff --git a/doc/src/devdocs/gc.md b/doc/src/devdocs/gc.md new file mode 100644 index 0000000000000..8a2190400a9e2 --- /dev/null +++ b/doc/src/devdocs/gc.md @@ -0,0 +1,76 @@ +# Garbage Collection in Julia + +## Introduction + +Julia has a serial, stop-the-world, generational, non-moving mark-sweep garbage collector. +Native objects are precisely scanned and foreign ones are conservatively marked. + +## Memory layout of objects and GC bits + +An opaque tag is stored in the front of GC managed objects, and its lowest two bits are +used for garbage collection. The lowest bit is set for marked objects and the second +lowest bit stores age information (e.g. it's only set for old objects). + +Objects are aligned by a multiple of 4 bytes to ensure this pointer tagging is legal. + +## Pool allocation + +Sufficiently small objects (up to 2032 bytes) are allocated on per-thread object +pools. + +A three-level tree (analogous to a three-level page-table) is used to keep metadata +(e.g. whether a page has been allocated, whether contains marked objects, number of free objects etc.) +about address ranges spanning at least one page. +Sweeping a pool allocated object consists of inserting it back into the free list +maintained by its pool. + +## Malloc'd arrays and big objects + +Two lists are used to keep track of the remaining allocated objects: +one for sufficiently large malloc'd arrays (`mallocarray_t`) and one for +sufficiently large objects (`bigval_t`). + +Sweeping these objects consists of unlinking them from their list and calling `free` on the +corresponding address. + +## Generational and remembered sets + +Field writes into old objects trigger a write barrier if the written field +points to a young object and if a write barrier has not been triggered on the old object yet. +In this case, the old object being written to is enqueued into a remembered set, and +its mark bit is set to indicate that a write barrier has already been triggered on it. + +There is no explicit flag to determine whether a marking pass will scan the +entire heap or only through young objects and remembered set. +The mark bits of the objects themselves are used to determine whether a full mark happens. +The mark-sweep algorithm follows this sequence of steps: + +- Objects in the remembered set have their GC mark bits reset +(these are set once write barrier is triggered, as described above) and are enqueued. + +- Roots (e.g. thread locals) are enqueued. + +- Object graph is traversed and mark bits are set. + +- Object pools, malloc'd arrays and big objects are sweeped. On a full sweep, +the mark bits of all marked objects are reset. On a generational sweep, +only the mark bits of marked young objects are reset. + +- Mark bits of objects in the remembered set are set, +so we don't trigger the write barrier on them again. + +After these stages, old objects will be left with their mark bits set, +so that references from them are not explored in a subsequent generational collection. +This scheme eliminates the need of explicitly keeping a flag to indicate a full mark +(though a flag to indicate a full sweep is necessary). + +## Heuristics + +GC heuristics tune the GC by changing the size of the allocation interval between garbage collections. + +The GC heuristics measure how big the heap size is after a collection and set the next collection to when the heap size is twice as big as the current size or to the maximum heap size. +The heuristics measure the heap size by counting the number of pages that are in use and the objects that use malloc. Previously we measured the heap size by counting +the alive objects, but that doesn't take into account fragmentation which could lead to bad decisions, that also meant that we used thread local information (allocations) to make +decisions about a process wide (when to GC), measuring pages means the decision is global. + +The GC will do full collections when the heap size reaches 80% of the maximum allowed size. diff --git a/src/gc-debug.c b/src/gc-debug.c index 6484c2bd8bd07..5007aa771c6a8 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1,7 +1,10 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license #include "gc.h" +#include "julia.h" #include +#include +#include #include // re-include assert.h without NDEBUG, @@ -1403,7 +1406,7 @@ JL_DLLEXPORT void jl_enable_gc_logging(int enable) { gc_logging_enabled = enable; } -void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT { +void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; } @@ -1412,6 +1415,20 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect full ? "full" : "incr", recollect ? "recollect" : "" ); + jl_safe_printf("Heap stats: bytes_mapped %.1f, bytes_decomitted %.1f, bytes_allocd %.1f\nbytes_freed %.1f, bytes_mallocd %.1f, malloc_bytes_freed %.1f\npages_perm_allocd %zu, heap_size %.1f, heap_target %.1f, live_bytes %1.f\n", + jl_atomic_load_relaxed(&gc_heap_stats.bytes_mapped)/1e6, + jl_atomic_load_relaxed(&gc_heap_stats.bytes_decomitted)/1e6, + jl_atomic_load_relaxed(&gc_heap_stats.bytes_allocd)/1e6, + jl_atomic_load_relaxed(&gc_heap_stats.bytes_freed)/1e6, + jl_atomic_load_relaxed(&gc_heap_stats.bytes_mallocd)/1e6, + jl_atomic_load_relaxed(&gc_heap_stats.malloc_bytes_freed)/1e6, + jl_atomic_load_relaxed(&gc_heap_stats.pages_perm_allocd), + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)/1e6, + jl_atomic_load_relaxed(&gc_heap_stats.heap_target)/1e6, + live_bytes/1e6 + + ); + jl_safe_printf("Fragmentation %1.f\n", (double)live_bytes/(double)jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); } #ifdef __cplusplus diff --git a/src/gc-pages.c b/src/gc-pages.c index 454864d45c766..83efc274014f7 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -79,6 +79,7 @@ static char *jl_gc_try_alloc_pages(int pg_cnt) JL_NOTSAFEPOINT // round data pointer up to the nearest gc_page_data-aligned // boundary if mmap didn't already do so. mem = (char*)gc_page_data(mem + GC_PAGE_SZ - 1); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mapped, pages_sz); return mem; } @@ -284,6 +285,8 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT errno = last_errno; current_pg_count++; gc_final_count_page(current_pg_count); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_allocd, GC_PAGE_SZ); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, GC_PAGE_SZ); uv_mutex_unlock(&gc_perm_lock); return info.meta; } @@ -334,7 +337,7 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT #else madvise(p, decommit_size, MADV_DONTNEED); #endif - + jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_decomitted, GC_PAGE_SZ); no_decommit: // new pages are now available starting at max of lb and pagetable_i32 if (memory_map.lb > info.pagetable_i32) @@ -344,6 +347,8 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT if (info.pagetable0->lb > info.pagetable0_i32) info.pagetable0->lb = info.pagetable0_i32; current_pg_count--; + jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_freed, GC_PAGE_SZ); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ); } #ifdef __cplusplus diff --git a/src/gc.c b/src/gc.c index 0f30e8305b78c..e8b2ca5c17fab 100644 --- a/src/gc.c +++ b/src/gc.c @@ -1,6 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license #include "gc.h" +#include "julia_atomics.h" #include "julia_gcext.h" #include "julia_assert.h" #ifdef __GLIBC__ @@ -171,6 +172,13 @@ static _Atomic(int) support_conservative_marking = 0; jl_gc_num_t gc_num = {0}; static size_t last_long_collect_interval; +gc_heapstatus_t gc_heap_stats = {0}; +int next_sweep_full = 0; +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) +{ + return jl_buff_tag; +} pagetable_t memory_map; @@ -230,6 +238,8 @@ STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT #else STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, sz); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz); #if defined(_P64) || defined(__APPLE__) if (align <= 16) return malloc(sz); @@ -242,6 +252,9 @@ STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align) { + jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_mallocd, sz); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, oldsz); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, sz-oldsz); #if defined(_P64) || defined(__APPLE__) if (align <= 16) return realloc(d, sz); @@ -609,11 +622,19 @@ static void gc_sweep_foreign_objs(void) static int64_t last_gc_total_bytes = 0; #ifdef _P64 -#define default_collect_interval (5600*1024*sizeof(void*)) -static size_t max_collect_interval = 1250000000UL; +typedef uint64_t memsize_t; +static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*); + +static size_t total_mem; +// We expose this to the user/ci as jl_gc_set_max_memory +static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024; #else -#define default_collect_interval (3200*1024*sizeof(void*)) -static size_t max_collect_interval = 500000000UL; +typedef uint32_t memsize_t; +static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); +// Work really hard to stay within 2GB +// Alternative is to risk running out of address space +// on 32 bit architectures. +static memsize_t max_total_memory = (memsize_t) 1024 * 1024 * 1024; // The new heuristics use all the heap, which makes it run out #endif // global variables for GC stats @@ -906,7 +927,7 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT static inline void maybe_collect(jl_ptls_t ptls) { - if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) { + if (jl_atomic_load_relaxed(&gc_heap_stats.heap_size) >= jl_atomic_load_relaxed(&gc_heap_stats.heap_target) || jl_gc_debug_check_other()) { jl_gc_collect(JL_GC_AUTO); } else { @@ -1044,6 +1065,8 @@ static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT if (nxt) nxt->prev = pv; gc_num.freed += v->sz&~3; + jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, v->sz&~3); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -(v->sz&~3)); #ifdef MEMDEBUG memset(v, 0xbb, v->sz&~3); #endif @@ -1159,6 +1182,8 @@ static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT jl_free_aligned(d); else free(d); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.malloc_bytes_freed, jl_array_nbytes(a)); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -jl_array_nbytes(a)); gc_num.freed += jl_array_nbytes(a); gc_num.freecall++; } @@ -3172,7 +3197,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) uint64_t mark_time = end_mark_time - start_mark_time; gc_num.mark_time = mark_time; gc_num.total_mark_time += mark_time; - int64_t actual_allocd = gc_num.since_sweep; // marking is over // 4. check for objects to finalize @@ -3213,12 +3237,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // Flush everything in mark cache gc_sync_all_caches_nolock(ptls); - int64_t live_sz_ub = live_bytes + actual_allocd; - int64_t live_sz_est = scanned_bytes + perm_scanned_bytes; - int64_t estimate_freed = live_sz_ub - live_sz_est; - gc_verify(ptls); - gc_stats_all_pool(); gc_stats_big_obj(); objprofile_printall(); @@ -3227,28 +3246,23 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) if (!prev_sweep_full) promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes; // 5. next collection decision - int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10)); - int nptr = 0; - for (int i = 0;i < jl_n_threads;i++) - nptr += jl_all_tls_states[i]->heap.remset_nptr; - - // many pointers in the intergen frontier => "quick" mark is not quick - int large_frontier = nptr*sizeof(void*) >= default_collect_interval; - int sweep_full = 0; + int remset_nptr = 0; + int sweep_full = next_sweep_full; int recollect = 0; + assert(gc_n_threads); + for (int i = 0; i < jl_n_threads; i++) { + jl_ptls_t ptls2 = jl_all_tls_states[i]; + if (ptls2 != NULL) + remset_nptr += ptls2->heap.remset_nptr; + } + (void)remset_nptr; //Use this information for something? - // update heuristics only if this GC was automatically triggered - if (collection == JL_GC_AUTO) { - if (not_freed_enough) { - gc_num.interval = gc_num.interval * 2; - } - if (large_frontier) { - sweep_full = 1; - } - if (gc_num.interval > max_collect_interval) { - sweep_full = 1; - gc_num.interval = max_collect_interval; - } + + // If the live data outgrows the suggested max_total_memory + // we keep going with minimum intervals and full gcs until + // we either free some space or get an OOM error. + if (live_bytes > max_total_memory) { + sweep_full = 1; } if (gc_sweep_always_full) { sweep_full = 1; @@ -3276,6 +3290,14 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) gc_sweep_pool(sweep_full); if (sweep_full) gc_sweep_perm_alloc(); + + size_t heap_size = jl_atomic_load_relaxed(&gc_heap_stats.heap_size); + if (heap_size > max_total_memory*0.8) + next_sweep_full = 1; + else + next_sweep_full = 0; + size_t new_heap_target = 2 * heap_size > max_total_memory ? max_total_memory : 2 * heap_size; + jl_atomic_store_relaxed(&gc_heap_stats.heap_target, new_heap_target); JL_PROBE_GC_SWEEP_END(); uint64_t gc_end_time = jl_hrtime(); @@ -3319,30 +3341,20 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) } #endif - - _report_gc_finished(pause, gc_num.freed, sweep_full, recollect); - - gc_final_pause_end(t0, gc_end_time); - gc_time_sweep_pause(gc_end_time, actual_allocd, live_bytes, - estimate_freed, sweep_full); - gc_num.full_sweep += sweep_full; + _report_gc_finished(pause, gc_num.freed, sweep_full, recollect, live_bytes); uint64_t max_memory = last_live_bytes + gc_num.allocd; if (max_memory > gc_num.max_memory) { gc_num.max_memory = max_memory; } + gc_final_pause_end(gc_start_time, gc_end_time); + gc_time_sweep_pause(gc_end_time, allocd, live_bytes, + estimate_freed, sweep_full); + gc_num.full_sweep += sweep_full; gc_num.allocd = 0; last_live_bytes = live_bytes; live_bytes += -gc_num.freed + gc_num.since_sweep; - if (collection == JL_GC_AUTO) { - // If the current interval is larger than half the live data decrease the interval - int64_t half = live_bytes/2; - if (gc_num.interval > half) gc_num.interval = half; - // But never go below default - if (gc_num.interval < default_collect_interval) gc_num.interval = default_collect_interval; - } - gc_time_summary(sweep_full, t_start, gc_end_time, gc_num.freed, live_bytes, gc_num.interval, pause, gc_num.time_to_safepoint, @@ -3510,7 +3522,7 @@ void jl_gc_init(void) arraylist_new(&finalizer_list_marked, 0); arraylist_new(&to_finalize, 0); - + jl_atomic_store_relaxed(&gc_heap_stats.heap_target, default_collect_interval); gc_num.interval = default_collect_interval; last_long_collect_interval = default_collect_interval; gc_num.allocd = 0; @@ -3519,13 +3531,16 @@ void jl_gc_init(void) #ifdef _P64 // on a big memory machine, set max_collect_interval to totalmem / nthreads / 2 - uint64_t total_mem = uv_get_total_memory(); + total_mem = uv_get_total_memory(); uint64_t constrained_mem = uv_get_constrained_memory(); if (constrained_mem != 0) total_mem = constrained_mem; - size_t maxmem = total_mem / jl_n_threads / 2; - if (maxmem > max_collect_interval) - max_collect_interval = maxmem; + double percent; + if (total_mem < 128e9) + percent = total_mem * 2.34375e-12 + 0.3; // 60% at 0 gigs and 90% at 128 to not + else // overcommit too much on memory contrained devices + percent = 0.6; + max_total_memory = total_mem * percent; #endif jl_gc_mark_sp_t sp = {NULL, NULL, NULL, NULL}; gc_mark_loop(NULL, sp); @@ -3801,6 +3816,8 @@ static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned o #ifdef _OS_WINDOWS_ SetLastError(last_error); #endif + jl_atomic_fetch_add_relaxed(&gc_heap_stats.bytes_allocd,sz); + jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size,sz); errno = last_errno; jl_may_leak(base); unsigned diff = (offset - base) % align; @@ -3843,6 +3860,7 @@ void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offs errno = last_errno; if (__unlikely(pool == MAP_FAILED)) return NULL; + jl_atomic_fetch_add_relaxed(&gc_heap_stats.pages_perm_allocd, 1); #endif gc_perm_pool = (uintptr_t)pool; gc_perm_end = gc_perm_pool + GC_PERM_POOL_SIZE; diff --git a/src/gc.h b/src/gc.h index 8436a653dc32b..76ef2ba480a4d 100644 --- a/src/gc.h +++ b/src/gc.h @@ -9,6 +9,8 @@ #ifndef JL_GC_H #define JL_GC_H +#include +#include #include #include #include @@ -381,7 +383,19 @@ typedef struct { int ub; } pagetable_t; -#ifdef __clang_gcanalyzer__ +typedef struct { + _Atomic(size_t) bytes_mapped; + _Atomic(size_t) bytes_freed; + _Atomic(size_t) bytes_allocd; + _Atomic(size_t) bytes_decomitted; + _Atomic(size_t) bytes_mallocd; + _Atomic(size_t) malloc_bytes_freed; + _Atomic(size_t) pages_perm_allocd; + _Atomic(size_t) heap_size; + _Atomic(size_t) heap_target; +} gc_heapstatus_t; + +#ifdef __clang_gcanalyzer__ /* clang may not have __builtin_ffs */ unsigned ffs_u32(uint32_t bitvec) JL_NOTSAFEPOINT; #else STATIC_INLINE unsigned ffs_u32(uint32_t bitvec) @@ -396,6 +410,7 @@ extern bigval_t *big_objects_marked; extern arraylist_t finalizer_list_marked; extern arraylist_t to_finalize; extern int64_t lazy_freed_pages; +extern gc_heapstatus_t gc_heap_stats; STATIC_INLINE bigval_t *bigval_header(jl_taggedvalue_t *o) JL_NOTSAFEPOINT { @@ -717,7 +732,7 @@ void gc_count_pool(void); size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_enable_gc_logging(int enable); -void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT; +void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT; #ifdef __cplusplus }