Skip to content

Commit 5567f5f

Browse files
committed
Make jl_cumulative_compile_time_ns global (and reentrant).
Now, multiple tasks (on the same or different Threads) can start and stop compilation time measurement, without interrupting each other. Add `cumulative_compile_time_ns()` to return the cumulative compile time for a thread without stopping measurement. ```julia julia> Base.cumulative_compile_time_ns_before() # enable constantly measuring compilation time 0x0000000000000000 julia> @time @eval module M ; f(x) = 2+x; end 0.006730 seconds (919 allocations: 55.212 KiB, 57.20% compilation time) Main.M julia> Base.cumulative_compile_time_ns() 0x00000000075246b3 julia> @time 2+2 0.000000 seconds 4 julia> Base.cumulative_compile_time_ns() 0x0000000007fe4a46 julia> @time @eval M.f(2) 0.003538 seconds (750 allocations: 46.247 KiB, 94.64% compilation time) 4 julia> Base.cumulative_compile_time_ns() 0x000000000831619e ``` Make jl_cumulative_compile_time_ns into a global, atomic variable. Instead of keeping per-task compilation time, this change keeps a global counter of compilation time, protected with atomic mutations. Fixes #41739 ```julia julia> include("./compilation-task-migration-17-example.jl") start thread: 2 end thread: 2 5.185706 seconds (3.53 M allocations: 2.570 GiB, 7.34% gc time, 15.57% compilation time) julia> include("./compilation-task-migration-17-example.jl") start thread: 3 WARNING: replacing module M. end thread: 1 4.110316 seconds (18.23 k allocations: 2.391 GiB, 5.67% gc time, 0.24% compilation time) ```
1 parent bdacfa2 commit 5567f5f

File tree

8 files changed

+58
-42
lines changed

8 files changed

+58
-42
lines changed

base/timing.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,11 @@ function gc_alloc_count(diff::GC_Diff)
5555
diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
5656
end
5757

58-
# cumulative total time spent on compilation
58+
# cumulative total time spent on compilation, in nanoseconds
5959
cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ())
6060
cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ())
61+
# cumulative total time the process has spent on compilation while measurement was enabled.
62+
cumulative_compile_time_ns() = ccall(:jl_cumulative_compile_time_ns, UInt64, ())
6163

6264
# total time spend in garbage collection, in nanoseconds
6365
gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())

src/aotcompile.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,7 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
286286
JL_GC_PUSH1(&src);
287287
JL_LOCK(&codegen_lock);
288288
uint64_t compiler_start_time = 0;
289-
int tid = jl_threadid();
290-
if (jl_measure_compile_time[tid])
289+
if (jl_atomic_load(&jl_measure_compile_time))
291290
compiler_start_time = jl_hrtime();
292291

293292
CompilationPolicy policy = (CompilationPolicy) _policy;
@@ -415,8 +414,8 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
415414
}
416415

417416
data->M = std::move(clone);
418-
if (jl_measure_compile_time[tid])
419-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
417+
if (jl_atomic_load(&jl_measure_compile_time))
418+
jl_atomic_fetch_add(&jl_measure_compile_time, (jl_hrtime() - compiler_start_time));
420419
if (policy == CompilationPolicy::ImagingMode)
421420
imaging_mode = 0;
422421
JL_UNLOCK(&codegen_lock); // Might GC
@@ -916,8 +915,7 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
916915
jl_llvm_functions_t decls;
917916
JL_LOCK(&codegen_lock);
918917
uint64_t compiler_start_time = 0;
919-
int tid = jl_threadid();
920-
if (jl_measure_compile_time[tid])
918+
if (jl_atomic_load(&jl_measure_compile_time))
921919
compiler_start_time = jl_hrtime();
922920
std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);
923921

@@ -942,8 +940,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
942940
m.release(); // the return object `llvmf` will be the owning pointer
943941
}
944942
JL_GC_POP();
945-
if (jl_measure_compile_time[tid])
946-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
943+
if (jl_atomic_load(&jl_measure_compile_time))
944+
jl_atomic_fetch_add(&jl_measure_compile_time, (jl_hrtime() - compiler_start_time));
947945
JL_UNLOCK(&codegen_lock); // Might GC
948946
if (F)
949947
return F;

src/gf.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3164,15 +3164,14 @@ static uint64_t inference_start_time = 0;
31643164
JL_DLLEXPORT void jl_typeinf_begin(void)
31653165
{
31663166
JL_LOCK(&typeinf_lock);
3167-
if (jl_measure_compile_time[jl_threadid()])
3167+
if (jl_atomic_load(&jl_measure_compile_time))
31683168
inference_start_time = jl_hrtime();
31693169
}
31703170

31713171
JL_DLLEXPORT void jl_typeinf_end(void)
31723172
{
3173-
int tid = jl_threadid();
3174-
if (typeinf_lock.count == 1 && jl_measure_compile_time[tid])
3175-
jl_cumulative_compile_time[tid] += (jl_hrtime() - inference_start_time);
3173+
if (typeinf_lock.count == 1 && jl_atomic_load(&jl_measure_compile_time))
3174+
jl_atomic_fetch_add(&jl_measure_compile_time, (jl_hrtime() - inference_start_time));
31763175
JL_UNLOCK(&typeinf_lock);
31773176
}
31783177

src/jitlayers.cpp

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,21 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
7878
extern "C" JL_DLLEXPORT
7979
uint64_t jl_cumulative_compile_time_ns_before()
8080
{
81-
int tid = jl_threadid();
82-
jl_measure_compile_time[tid] = 1;
83-
return jl_cumulative_compile_time[tid];
81+
// Increment the flag to allow reentrant callers to `@time`.
82+
jl_atomic_fetch_add(&jl_measure_compile_time, 1);
83+
return jl_atomic_load(&jl_cumulative_compile_time);
8484
}
8585
extern "C" JL_DLLEXPORT
8686
uint64_t jl_cumulative_compile_time_ns_after()
8787
{
88-
int tid = jl_threadid();
89-
jl_measure_compile_time[tid] = 0;
90-
return jl_cumulative_compile_time[tid];
88+
// Decrement the flag when done measuring, allowing other callers to continue measuring.
89+
jl_atomic_fetch_add(&jl_measure_compile_time, -1);
90+
return jl_atomic_load(&jl_cumulative_compile_time);
91+
}
92+
93+
extern "C" JL_DLLEXPORT
94+
uint64_t jl_cumulative_compile_time_ns() {
95+
return jl_atomic_load(&jl_cumulative_compile_time);
9196
}
9297

9398
// this generates llvm code for the lambda info
@@ -233,8 +238,7 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
233238
{
234239
JL_LOCK(&codegen_lock);
235240
uint64_t compiler_start_time = 0;
236-
int tid = jl_threadid();
237-
if (jl_measure_compile_time[tid])
241+
if (jl_atomic_load(&jl_measure_compile_time))
238242
compiler_start_time = jl_hrtime();
239243
jl_codegen_params_t params;
240244
jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
@@ -258,8 +262,8 @@ int jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt
258262
if (success && llvmmod == NULL)
259263
jl_add_to_ee(std::unique_ptr<Module>(into));
260264
}
261-
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
262-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
265+
if (codegen_lock.count == 1 && jl_atomic_load(&jl_measure_compile_time))
266+
jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
263267
JL_UNLOCK(&codegen_lock);
264268
return success;
265269
}
@@ -315,8 +319,7 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
315319
{
316320
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
317321
uint64_t compiler_start_time = 0;
318-
int tid = jl_threadid();
319-
if (jl_measure_compile_time[tid])
322+
if (jl_atomic_load(&jl_measure_compile_time))
320323
compiler_start_time = jl_hrtime();
321324
// if we don't have any decls already, try to generate it now
322325
jl_code_info_t *src = NULL;
@@ -354,8 +357,8 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
354357
else {
355358
codeinst = NULL;
356359
}
357-
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
358-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
360+
if (codegen_lock.count == 1 && jl_atomic_load(&jl_measure_compile_time))
361+
jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
359362
JL_UNLOCK(&codegen_lock);
360363
JL_GC_POP();
361364
return codeinst;
@@ -369,8 +372,7 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
369372
}
370373
JL_LOCK(&codegen_lock);
371374
uint64_t compiler_start_time = 0;
372-
int tid = jl_threadid();
373-
if (jl_measure_compile_time[tid])
375+
if (jl_atomic_load(&jl_measure_compile_time))
374376
compiler_start_time = jl_hrtime();
375377
if (unspec->invoke == NULL) {
376378
jl_code_info_t *src = NULL;
@@ -398,8 +400,8 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
398400
}
399401
JL_GC_POP();
400402
}
401-
if (codegen_lock.count == 1 && jl_measure_compile_time[tid])
402-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
403+
if (codegen_lock.count == 1 && jl_atomic_load(&jl_measure_compile_time))
404+
jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
403405
JL_UNLOCK(&codegen_lock); // Might GC
404406
}
405407

@@ -422,8 +424,7 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
422424
// so create an exception here so we can print pretty our lies
423425
JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
424426
uint64_t compiler_start_time = 0;
425-
int tid = jl_threadid();
426-
if (jl_measure_compile_time[tid])
427+
if (jl_atomic_load(&jl_measure_compile_time))
427428
compiler_start_time = jl_hrtime();
428429
specfptr = (uintptr_t)codeinst->specptr.fptr;
429430
if (specfptr == 0) {
@@ -448,8 +449,8 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
448449
}
449450
JL_GC_POP();
450451
}
451-
if (jl_measure_compile_time[tid])
452-
jl_cumulative_compile_time[tid] += (jl_hrtime() - compiler_start_time);
452+
if (jl_atomic_load(&jl_measure_compile_time))
453+
jl_atomic_fetch_add(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
453454
JL_UNLOCK(&codegen_lock);
454455
}
455456
if (specfptr != 0)

src/julia_internal.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,9 @@ static inline uint64_t cycleclock(void)
154154

155155
#include "timing.h"
156156

157-
extern uint8_t *jl_measure_compile_time;
158-
extern uint64_t *jl_cumulative_compile_time;
157+
// Global *atomic* integers controlling *process-wide* measurement of compilation time.
158+
extern volatile uint8_t jl_measure_compile_time;
159+
extern volatile uint64_t jl_cumulative_compile_time;
159160

160161
#ifdef _COMPILER_MICROSOFT_
161162
# define jl_return_address() ((uintptr_t)_ReturnAddress())

src/task.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_M
561561
ptls->io_wait = 0;
562562
// @time needs its compile timer disabled on error,
563563
// and cannot use a try-finally as it would break scope for assignments
564-
jl_measure_compile_time[ptls->tid] = 0;
564+
jl_atomic_fetch_add(&jl_measure_compile_time, -1);
565565
JL_GC_PUSH1(&exception);
566566
jl_gc_unsafe_enter(ptls);
567567
if (exception) {

src/threading.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -287,8 +287,8 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
287287
#endif
288288

289289
jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
290-
uint8_t *jl_measure_compile_time = NULL;
291-
uint64_t *jl_cumulative_compile_time = NULL;
290+
volatile uint8_t jl_measure_compile_time = 0;
291+
volatile uint64_t jl_cumulative_compile_time = 0;
292292

293293
// return calling thread's ID
294294
// Also update the suspended_threads list in signals-mach when changing the
@@ -467,8 +467,6 @@ void jl_init_threading(void)
467467
}
468468
if (jl_n_threads <= 0)
469469
jl_n_threads = 1;
470-
jl_measure_compile_time = (uint8_t*)calloc(jl_n_threads, sizeof(*jl_measure_compile_time));
471-
jl_cumulative_compile_time = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time));
472470
#ifndef __clang_analyzer__
473471
jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
474472
#endif

test/misc.jl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,23 @@ function timev_macro_scope()
263263
end
264264
@test timev_macro_scope() == 1
265265

266+
before = Base.cumulative_compile_time_ns_before();
267+
268+
# exercise concurrent calls to `@time` for reentrant compilation time measurement.
269+
t1 = @async @time begin
270+
sleep(2)
271+
@eval module M ; f(x,y) = x+y ; end
272+
@eval M.f(2,3)
273+
end
274+
t2 = @async begin
275+
sleep(1)
276+
@time 2 + 2
277+
end
278+
279+
after = Base.cumulative_compile_time_ns_after();
280+
@test after >= before;
281+
@test Base.cumulative_compile_time_ns() >= after;
282+
266283
# interactive utilities
267284

268285
struct ambigconvert; end # inject a problematic `convert` method to ensure it still works

0 commit comments

Comments
 (0)