@@ -80,34 +80,38 @@ After a slower first call to load the library and look up the function, no addit
8080overhead is expected compared to regular `ccall`.
8181"""
8282macro runtime_ccall (target, args... )
83- # decode ccall function/library target
84- Meta. isexpr (target, :tuple ) || error (" Expected (function_name, library) tuple" )
85- function_name, library = target. args
86-
87- # global const ref to hold the function pointer
88- @gensym fptr_cache
89- @eval __module__ begin
90- # uses atomics (release store, acquire load) for thread safety.
91- # see https://github.com/JuliaGPU/CUDAapi.jl/issues/106 for details
92- const $ fptr_cache = Threads. Atomic {UInt} (0 )
93- end
83+ if VERSION >= v " 1.6.0-DEV.819"
84+ quote
85+ ccall ($ (esc (target)), $ (map (esc, args)... ))
86+ end
87+ else
88+ # decode ccall function/library target
89+ Meta. isexpr (target, :tuple ) || error (" Expected (function_name, library) tuple" )
90+ function_name, library = target. args
91+
92+ # global const ref to hold the function pointer
93+ @gensym fptr_cache
94+ @eval __module__ begin
95+ # uses atomics (release store, acquire load) for thread safety.
96+ # see https://github.com/JuliaGPU/CUDAapi.jl/issues/106 for details
97+ const $ fptr_cache = Threads. Atomic {UInt} (0 )
98+ end
9499
95- return quote
96- # use a closure to hold the lookup and avoid code bloat in the caller
97- @noinline function cache_fptr! ()
98- library = Libdl. dlopen ($ (esc (library)))
99- $ (esc (fptr_cache))[] = Libdl. dlsym (library, $ (esc (function_name)))
100+ quote
101+ # use a closure to hold the lookup and avoid code bloat in the caller
102+ @noinline function cache_fptr! ()
103+ library = Libdl. dlopen ($ (esc (library)))
104+ $ (esc (fptr_cache))[] = Libdl. dlsym (library, $ (esc (function_name)))
100105
101- $ (esc (fptr_cache))[]
102- end
106+ $ (esc (fptr_cache))[]
107+ end
103108
104- fptr = $ (esc (fptr_cache))[]
105- if fptr == 0 # folded into the null check performed by ccall
106- fptr = cache_fptr! ()
107- end
109+ fptr = $ (esc (fptr_cache))[]
110+ if fptr == 0 # folded into the null check performed by ccall
111+ fptr = cache_fptr! ()
112+ end
108113
109- ccall (reinterpret (Ptr{Cvoid}, fptr), $ (map (esc, args)... ))
114+ ccall (reinterpret (Ptr{Cvoid}, fptr), $ (map (esc, args)... ))
115+ end
110116 end
111-
112- return
113117end
0 commit comments