Skip to content

Commit d7d36d5

Browse files
committed
Merge branch 'main' into cudf-unexport
2 parents 61d332c + 446ac34 commit d7d36d5

15 files changed

Lines changed: 370 additions & 153 deletions

File tree

cpp/benchmarks/CMakeLists.txt

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,6 @@ ConfigureNVBench(
187187
sort/top_k.cpp
188188
)
189189

190-
# ##################################################################################################
191-
# * structs benchmark
192-
# --------------------------------------------------------------------------------
193-
ConfigureNVBench(STRUCT_CREATION_NVBENCH structs/create_structs.cpp)
194-
195190
# ##################################################################################################
196191
# * quantiles benchmark
197192
# --------------------------------------------------------------------------------

cpp/benchmarks/structs/create_structs.cpp

Lines changed: 0 additions & 20 deletions
This file was deleted.

cpp/include/cudf/context.hpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,28 @@ void initialize(init_flags flags = init_flags::INIT_JIT_CACHE);
7878
/// teardown and that only one thread calls teardown at a time.
7979
void teardown();
8080

81+
/**
82+
* @brief Enable or disable the JIT program cache
83+
*
84+
* When disabled, the cache will not be used for
85+
* storing or retrieving compiled programs, effectively bypassing the cache. When enabled, the
86+
* cache will be used as normal. This can be used to temporarily disable caching without clearing
87+
* the existing cache contents, allowing for easy re-enabling of the cache later.
88+
*
89+
* @param enable If `true`, the JIT program cache is enabled; if `false`, it is disabled.
90+
*/
91+
void enable_jit_cache(bool enable);
92+
93+
/**
94+
* @brief Clear the JIT program cache, removing all cached programs from memory and disk.
95+
*
96+
* This is a more expensive operation than simply disabling the cache, as it involves deleting
97+
* cached files from disk, but it also frees up any memory used by the cached programs. Use
98+
* `enable_jit_cache(false)` if you want to temporarily disable caching without clearing existing
99+
* cache contents.
100+
*
101+
* @warning For benchmarking or testing purposes, prefer `enable_jit_cache`.
102+
*/
103+
void clear_jit_cache();
104+
81105
} // namespace CUDF_EXPORT cudf

cpp/src/jit/cache.cpp

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

6+
#include "io/utilities/getenv_or.hpp"
67
#include "runtime/context.hpp"
78

89
#include <cudf/context.hpp>
@@ -95,40 +96,63 @@ std::string get_program_cache_dir()
9596
#endif
9697
}
9798

98-
std::size_t try_parse_numeric_env_var(char const* const env_name, std::size_t default_val)
99-
{
100-
auto const value = std::getenv(env_name);
101-
return value != nullptr ? std::stoull(value) : default_val;
102-
}
10399
} // namespace
104100

105101
jitify2::ProgramCache<>& jit::program_cache::get(jitify2::PreprocessedProgramData const& preprog)
106102
{
107103
CUDF_FUNC_RANGE();
108-
std::lock_guard<std::mutex> const caches_lock(_caches_mutex);
104+
std::lock_guard caches_lock(_caches_mutex);
109105

110106
auto existing_cache = _caches.find(preprog.name());
111107

112-
if (existing_cache == _caches.end()) {
113-
auto const kernel_limit_proc =
114-
try_parse_numeric_env_var("LIBCUDF_KERNEL_CACHE_LIMIT_PER_PROCESS", 10'000);
115-
auto const kernel_limit_disk =
116-
try_parse_numeric_env_var("LIBCUDF_KERNEL_CACHE_LIMIT_DISK", 100'000);
117-
118-
// if kernel_limit_disk is zero, jitify will assign it the value of kernel_limit_proc.
119-
// to avoid this, we treat zero as "disable disk caching" by not providing the cache dir.
120-
auto const cache_dir = kernel_limit_disk == 0 ? std::string{} : get_program_cache_dir();
121-
122-
auto const res =
123-
_caches.insert({preprog.name(),
108+
if (existing_cache == _caches.end() || _disabled.load(std::memory_order_seq_cst)) {
109+
auto res =
110+
_caches.emplace(preprog.name(),
124111
std::make_unique<jitify2::ProgramCache<>>(
125-
kernel_limit_proc, preprog, nullptr, cache_dir, kernel_limit_disk)});
112+
_kernel_limit_proc, preprog, nullptr, _cache_dir, _kernel_limit_disk));
126113
existing_cache = res.first;
127114
}
128115

129116
return *(existing_cache->second);
130117
}
131118

119+
void jit::program_cache::clear()
120+
{
121+
CUDF_FUNC_RANGE();
122+
std::lock_guard caches_lock(_caches_mutex);
123+
124+
_caches.clear();
125+
126+
// non-atomic
127+
std::filesystem::remove_all(_cache_dir);
128+
}
129+
130+
void jit::program_cache::enable(bool enable)
131+
{
132+
_disabled.store(!enable, std::memory_order_seq_cst);
133+
}
134+
135+
bool jit::program_cache::is_enabled() const { return !_disabled.load(std::memory_order_seq_cst); }
136+
137+
std::unique_ptr<jit::program_cache> jit::program_cache::create()
138+
{
139+
auto const kernel_limit_proc = getenv_or("LIBCUDF_KERNEL_CACHE_LIMIT_PER_PROCESS", 10'000);
140+
auto const kernel_limit_disk = getenv_or("LIBCUDF_KERNEL_CACHE_LIMIT_DISK", 100'000);
141+
auto const disabled = get_bool_env_or("LIBCUDF_KERNEL_CACHE_DISABLED", false);
142+
auto const clear_cache = get_bool_env_or("LIBCUDF_KERNEL_CACHE_CLEAR", false);
143+
144+
// if kernel_limit_disk is zero, jitify will assign it the value of kernel_limit_proc.
145+
// to avoid this, we treat zero as "disable disk caching" by not providing the cache dir.
146+
auto cache_dir = kernel_limit_disk == 0 ? std::string{} : get_program_cache_dir();
147+
148+
auto cache =
149+
std::make_unique<jit::program_cache>(kernel_limit_proc, kernel_limit_disk, cache_dir, disabled);
150+
151+
if (clear_cache) { cache->clear(); }
152+
153+
return cache;
154+
}
155+
132156
jitify2::ProgramCache<>& jit::get_program_cache(jitify2::PreprocessedProgramData const& preprog)
133157
{
134158
return cudf::get_context().program_cache().get(preprog);

cpp/src/jit/cache.hpp

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -11,6 +11,8 @@
1111

1212
#include <jitify2.hpp>
1313

14+
#include <atomic>
15+
#include <filesystem>
1416
#include <memory>
1517
#include <mutex>
1618
#include <string>
@@ -21,16 +23,38 @@ namespace jit {
2123
class program_cache {
2224
std::mutex _caches_mutex;
2325
std::unordered_map<std::string, std::unique_ptr<jitify2::ProgramCache<>>> _caches;
26+
int32_t _kernel_limit_proc;
27+
int32_t _kernel_limit_disk;
28+
std::filesystem::path _cache_dir;
29+
std::atomic<bool> _disabled;
2430

2531
public:
26-
program_cache() = default;
32+
program_cache(int32_t kernel_limit_proc,
33+
int32_t kernel_limit_disk,
34+
std::filesystem::path cache_dir,
35+
bool disabled)
36+
: _kernel_limit_proc{kernel_limit_proc},
37+
_kernel_limit_disk{kernel_limit_disk},
38+
_cache_dir{std::move(cache_dir)},
39+
_disabled{disabled}
40+
{
41+
}
42+
2743
program_cache(program_cache const&) = delete;
2844
program_cache(program_cache&&) = delete;
2945
program_cache& operator=(program_cache const&) = delete;
3046
program_cache& operator=(program_cache&&) = delete;
3147
~program_cache() = default;
3248

3349
jitify2::ProgramCache<>& get(jitify2::PreprocessedProgramData const& preprog);
50+
51+
void clear();
52+
53+
void enable(bool enable);
54+
55+
bool is_enabled() const;
56+
57+
static std::unique_ptr<jit::program_cache> create();
3458
};
3559

3660
jitify2::ProgramCache<>& get_program_cache(jitify2::PreprocessedProgramData const& preprog);

cpp/src/runtime/context.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ void context::ensure_nvcomp_loaded() { io::detail::nvcomp::load_nvcomp_library()
2727
void context::ensure_jit_cache_initialized()
2828
{
2929
std::call_once(_program_cache_init_flag,
30-
[&]() { _program_cache = std::make_unique<jit::program_cache>(); });
30+
[&]() { _program_cache = jit::program_cache::create(); });
3131
}
3232

3333
jit::program_cache& context::program_cache()
@@ -85,6 +85,10 @@ void teardown()
8585
});
8686
}
8787

88+
void enable_jit_cache(bool enable) { get_context().program_cache().enable(enable); }
89+
90+
void clear_jit_cache() { get_context().program_cache().clear(); }
91+
8892
context& get_context()
8993
{
9094
cudf::initialize();

cpp/src/transform/compute_column.cu

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

66
#include "compute_column_kernel.hpp"
7+
#include "runtime/context.hpp"
78

89
#include <cudf/ast/detail/expression_evaluator.cuh>
910
#include <cudf/ast/detail/expression_parser.hpp>
@@ -31,6 +32,8 @@ std::unique_ptr<column> compute_column(table_view const& table,
3132
rmm::cuda_stream_view stream,
3233
rmm::device_async_resource_ref mr)
3334
{
35+
if (get_context().use_jit()) { return compute_column_jit(table, expr, stream, mr); }
36+
3437
// If evaluating the expression may produce null outputs we create a nullable
3538
// output column and follow the null-supporting expression evaluation code
3639
// path.

cpp/src/transform/transform.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ auto to_device_input_arg(InputsView inputs,
148148
rmm::device_async_resource_ref mr)
149149
{
150150
std::vector<column_view> columns;
151+
151152
for (auto const& input : inputs) {
152153
columns.emplace_back(std::visit([](auto const& col) { return to_column_view(col); }, input));
153154
}

python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q23.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ def polars_impl(run_config: RunConfig) -> QueryResult:
114114
.agg(pl.len().alias("cnt"))
115115
.filter(pl.col("cnt") > 4)
116116
.select("ss_item_sk")
117-
.unique()
118117
)
119118

120119
customer_sales = (
@@ -144,9 +143,7 @@ def polars_impl(run_config: RunConfig) -> QueryResult:
144143
customer, left_on="cs_bill_customer_sk", right_on="c_customer_sk"
145144
)
146145
.join(date_dim, left_on="cs_sold_date_sk", right_on="d_date_sk")
147-
.join(
148-
frequent_ss_items, left_on="cs_item_sk", right_on="ss_item_sk", how="semi"
149-
)
146+
.join(frequent_ss_items, left_on="cs_item_sk", right_on="ss_item_sk")
150147
.join(
151148
best_customers,
152149
left_on="cs_bill_customer_sk",
@@ -163,9 +160,7 @@ def polars_impl(run_config: RunConfig) -> QueryResult:
163160
customer, left_on="ws_bill_customer_sk", right_on="c_customer_sk"
164161
)
165162
.join(date_dim, left_on="ws_sold_date_sk", right_on="d_date_sk")
166-
.join(
167-
frequent_ss_items, left_on="ws_item_sk", right_on="ss_item_sk", how="semi"
168-
)
163+
.join(frequent_ss_items, left_on="ws_item_sk", right_on="ss_item_sk")
169164
.join(
170165
best_customers,
171166
left_on="ws_bill_customer_sk",

python/cudf_polars/cudf_polars/experimental/benchmarks/pdsds_queries/q61.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def polars_impl(run_config: RunConfig) -> QueryResult:
171171
* 100.0
172172
)
173173
.alias(
174-
"((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)"
174+
"((CAST(promotions AS DECIMAL(15, 4)) / CAST(total AS DECIMAL(15, 4))) * 100)"
175175
)
176176
]
177177
)

0 commit comments

Comments
 (0)