Skip to content

Commit cc714dc

Browse files
authored
Clean up mixed join common utilities (rapidsai#20836)
Contributes to rapidsai#19270 This is the first PR aimed at improving and tidying up the internal utilities used for joins. It focuses on mixed joins: several utilities specific to mixed joins were previously placed in the general join utilities header, and the mixed semi/anti join utilities were located in the standard mixed join header. This PR reorganizes those utilities into more appropriate locations and cleans up header inclusions across all join source files. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Tianyu Liu (https://github.com/kingcrimsontianyu) - David Wendt (https://github.com/davidwendt) URL: rapidsai#20836
1 parent ca655c4 commit cc714dc

20 files changed

Lines changed: 367 additions & 382 deletions

cpp/src/join/conditional_join.cu

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,23 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

66
#include "join/conditional_join.hpp"
77
#include "join/conditional_join_kernels.cuh"
8-
#include "join/join_common_utils.cuh"
98
#include "join/join_common_utils.hpp"
109

1110
#include <cudf/ast/detail/expression_parser.hpp>
1211
#include <cudf/ast/expressions.hpp>
1312
#include <cudf/detail/device_scalar.hpp>
1413
#include <cudf/detail/nvtx/ranges.hpp>
15-
#include <cudf/detail/utilities/cuda.cuh>
1614
#include <cudf/detail/utilities/grid_1d.cuh>
1715
#include <cudf/join/conditional_join.hpp>
1816
#include <cudf/join/join.hpp>
19-
#include <cudf/table/table.hpp>
2017
#include <cudf/table/table_device_view.cuh>
2118
#include <cudf/table/table_view.hpp>
2219
#include <cudf/types.hpp>
2320
#include <cudf/utilities/error.hpp>
24-
#include <cudf/utilities/memory_resource.hpp>
2521

2622
#include <rmm/cuda_stream_view.hpp>
2723

cpp/src/join/cross_join.cu

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -19,6 +19,8 @@
1919

2020
#include <rmm/cuda_stream_view.hpp>
2121

22+
#include <memory>
23+
2224
namespace cudf {
2325
namespace detail {
2426
/**

cpp/src/join/distinct_hash_join.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55
#include "join_common_utils.cuh"
@@ -20,6 +20,7 @@
2020
#include <rmm/cuda_stream_view.hpp>
2121
#include <rmm/device_uvector.hpp>
2222
#include <rmm/mr/polymorphic_allocator.hpp>
23+
#include <rmm/resource_ref.hpp>
2324

2425
#include <cooperative_groups.h>
2526
#include <cub/block/block_scan.cuh>

cpp/src/join/filtered_join.cu

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -16,6 +16,7 @@
1616
#include <cudf/join/filtered_join.hpp>
1717
#include <cudf/join/join.hpp>
1818
#include <cudf/table/table_view.hpp>
19+
#include <cudf/types.hpp>
1920
#include <cudf/utilities/error.hpp>
2021

2122
#include <rmm/cuda_stream_view.hpp>
@@ -32,6 +33,8 @@
3233
#include <thrust/iterator/counting_iterator.h>
3334
#include <thrust/sequence.h>
3435

36+
#include <memory>
37+
3538
namespace cudf {
3639
namespace detail {
3740
namespace {

cpp/src/join/hash_join.cu

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55
#include "join_common_utils.cuh"
6+
#include "join_common_utils.hpp"
67

78
#include <cudf/copying.hpp>
9+
#include <cudf/detail/cuco_helpers.hpp>
810
#include <cudf/detail/iterator.cuh>
911
#include <cudf/detail/join/hash_join.cuh>
1012
#include <cudf/detail/nvtx/ranges.hpp>
@@ -42,8 +44,35 @@ namespace detail {
4244
namespace {
4345
using hash_table_t = cudf::hash_join::impl_type::hash_table_t;
4446

45-
// Multimap type used for mixed joins. TODO: This is a temporary alias used
46-
// TODO: `pair_equal` to be moved to common utils during mixed-join migration
47+
/**
48+
* @brief Checks if a join operation is trivial (empty tables or certain join types with empty
49+
* data).
50+
*/
51+
bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type)
52+
{
53+
// If there is nothing to join, then send empty table with all columns
54+
if (left.is_empty() || right.is_empty()) { return true; }
55+
56+
// If left join and the left table is empty, return immediately
57+
if ((join_kind::LEFT_JOIN == join_type) && (0 == left.num_rows())) { return true; }
58+
59+
// If Inner Join and either table is empty, return immediately
60+
if ((join_kind::INNER_JOIN == join_type) && ((0 == left.num_rows()) || (0 == right.num_rows()))) {
61+
return true;
62+
}
63+
64+
// If left semi join (contains) and right table is empty,
65+
// return immediately
66+
if ((join_kind::LEFT_SEMI_JOIN == join_type) && (0 == right.num_rows())) { return true; }
67+
68+
// If left semi- or anti- join, and the left table is empty, return immediately
69+
if ((join_kind::LEFT_SEMI_JOIN == join_type || join_kind::LEFT_ANTI_JOIN == join_type) &&
70+
(0 == left.num_rows())) {
71+
return true;
72+
}
73+
74+
return false;
75+
}
4776

4877
template <typename Equal>
4978
class pair_equal {

cpp/src/join/join.cu

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55
#include "join_common_utils.hpp"
@@ -12,9 +12,14 @@
1212
#include <cudf/join/join.hpp>
1313
#include <cudf/table/table.hpp>
1414
#include <cudf/table/table_view.hpp>
15+
#include <cudf/types.hpp>
1516
#include <cudf/utilities/memory_resource.hpp>
1617

1718
#include <rmm/cuda_stream_view.hpp>
19+
#include <rmm/device_uvector.hpp>
20+
#include <rmm/resource_ref.hpp>
21+
22+
#include <memory>
1823

1924
namespace cudf {
2025
namespace detail {

cpp/src/join/join_common_utils.cuh

Lines changed: 1 addition & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
2+
* SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55
#pragma once
@@ -11,16 +11,12 @@
1111
#include <cudf/detail/row_operator/equality.cuh>
1212
#include <cudf/detail/row_operator/hashing.cuh>
1313
#include <cudf/detail/utilities/cuda.cuh>
14-
#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
15-
#include <cudf/utilities/memory_resource.hpp>
1614

1715
#include <rmm/cuda_stream_view.hpp>
18-
#include <rmm/device_uvector.hpp>
1916

2017
#include <thrust/iterator/counting_iterator.h>
2118

2219
#include <memory>
23-
#include <utility>
2420

2521
namespace cudf::detail {
2622
template <typename Hasher>
@@ -52,169 +48,6 @@ class row_is_valid {
5248
bitmask_type const* _row_bitmask;
5349
};
5450

55-
/**
56-
* @brief Device functor to determine if two pairs are identical.
57-
*
58-
* This equality comparator is designed for use with cuco::static_multimap's
59-
* pair* APIs, which will compare equality based on comparing (key, value)
60-
* pairs. In the context of joins, these pairs are of the form
61-
* (row_hash, row_id). A hash probe hit indicates that hash of a probe row's hash is
62-
* equal to the hash of the hash of some row in the multimap, at which point we need an
63-
* equality comparator that will check whether the contents of the rows are
64-
* identical. This comparator does so by verifying key equality (i.e. that
65-
* probe_row_hash == build_row_hash) and then using a row_equality_comparator
66-
* to compare the contents of the row indices that are stored as the payload in
67-
* the hash map.
68-
*
69-
* @tparam Comparator The row comparator type to perform row equality comparison from row indices.
70-
*/
71-
template <typename DeviceComparator>
72-
class pair_equality {
73-
public:
74-
pair_equality(DeviceComparator check_row_equality)
75-
: _check_row_equality{std::move(check_row_equality)}
76-
{
77-
}
78-
79-
// The parameters are build/probe rather than left/right because the operator
80-
// is called by cuco's kernels with parameters in this order (note that this
81-
// is an implementation detail that we should eventually stop relying on by
82-
// defining operators with suitable heterogeneous typing). Rather than
83-
// converting to left/right semantics, we can operate directly on build/probe
84-
template <typename LhsPair, typename RhsPair>
85-
__device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept
86-
{
87-
using detail::row::lhs_index_type;
88-
using detail::row::rhs_index_type;
89-
90-
return lhs.first == rhs.first and
91-
_check_row_equality(lhs_index_type{rhs.second}, rhs_index_type{lhs.second});
92-
}
93-
94-
private:
95-
DeviceComparator _check_row_equality;
96-
};
97-
98-
/**
99-
* @brief Computes the trivial left join operation for the case when the
100-
* right table is empty.
101-
*
102-
* In this case all the valid indices of the left table
103-
* are returned with their corresponding right indices being set to
104-
* `JoinNoMatch`, i.e. `cuda::std::numeric_limits<size_type>::min()`.
105-
*
106-
* @param left Table of left columns to join
107-
* @param stream CUDA stream used for device memory operations and kernel launches
108-
* @param mr Device memory resource used to allocate the result
109-
*
110-
* @return Join output indices vector pair
111-
*/
112-
std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
113-
std::unique_ptr<rmm::device_uvector<size_type>>>
114-
get_trivial_left_join_indices(table_view const& left,
115-
rmm::cuda_stream_view stream,
116-
rmm::device_async_resource_ref mr);
117-
118-
/**
119-
* @brief Builds the hash table based on the given `build_table`.
120-
*
121-
* @tparam MultimapType The type of the hash table
122-
*
123-
* @param build Table of columns used to build join hash.
124-
* @param preprocessed_build shared_ptr to cudf::detail::row::equality::preprocessed_table
125-
* for build
126-
* @param hash_table Build hash table.
127-
* @param has_nested_nulls Flag to denote if build or probe tables have nested nulls
128-
* @param nulls_equal Flag to denote nulls are equal or not.
129-
* @param bitmask Bitmask to denote whether a row is valid.
130-
* @param stream CUDA stream used for device memory operations and kernel launches.
131-
*/
132-
template <typename HashTable>
133-
void build_join_hash_table(
134-
cudf::table_view const& build,
135-
std::shared_ptr<detail::row::equality::preprocessed_table> const& preprocessed_build,
136-
HashTable& hash_table,
137-
bool has_nested_nulls,
138-
null_equality nulls_equal,
139-
[[maybe_unused]] bitmask_type const* bitmask,
140-
rmm::cuda_stream_view stream)
141-
{
142-
CUDF_EXPECTS(0 != build.num_columns(), "Selected build dataset is empty", std::invalid_argument);
143-
CUDF_EXPECTS(0 != build.num_rows(), "Build side table has no rows", std::invalid_argument);
144-
145-
auto insert_rows = [&](auto const& build, auto const& d_hasher) {
146-
auto const iter = cudf::detail::make_counting_transform_iterator(0, pair_fn{d_hasher});
147-
148-
if (nulls_equal == cudf::null_equality::EQUAL or not nullable(build)) {
149-
hash_table.insert_async(iter, iter + build.num_rows(), stream.value());
150-
} else {
151-
auto const stencil = thrust::counting_iterator<size_type>{0};
152-
auto const pred = row_is_valid{bitmask};
153-
154-
// insert valid rows
155-
hash_table.insert_if_async(iter, iter + build.num_rows(), stencil, pred, stream.value());
156-
}
157-
};
158-
159-
auto const nulls = nullate::DYNAMIC{has_nested_nulls};
160-
161-
auto const row_hash = detail::row::hash::row_hasher{preprocessed_build};
162-
auto const d_hasher = row_hash.device_hasher(nulls);
163-
164-
insert_rows(build, d_hasher);
165-
}
166-
167-
// Convenient alias for a pair of unique pointers to device uvectors.
168-
using VectorPair = std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
169-
std::unique_ptr<rmm::device_uvector<size_type>>>;
170-
171-
/**
172-
* @brief Takes two pairs of vectors and returns a single pair where the first
173-
* element is a vector made from concatenating the first elements of both input
174-
* pairs and the second element is a vector made from concatenating the second
175-
* elements of both input pairs.
176-
*
177-
* This function's primary use is for computing the indices of a full join by
178-
* first performing a left join, then separately getting the complementary
179-
* right join indices, then finally calling this function to concatenate the
180-
* results. In this case, each input VectorPair contains the left and right
181-
* indices from a join.
182-
*
183-
* Note that this is a destructive operation, in that at least one of a or b
184-
* will be invalidated (by a move) by this operation. Calling code should
185-
* assume that neither input VectorPair is valid after this function executes.
186-
*
187-
* @param a The first pair of vectors.
188-
* @param b The second pair of vectors.
189-
* @param stream CUDA stream used for device memory operations and kernel launches
190-
*
191-
* @return A pair of vectors containing the concatenated output.
192-
*/
193-
VectorPair concatenate_vector_pairs(VectorPair& a, VectorPair& b, rmm::cuda_stream_view stream);
194-
195-
/**
196-
* @brief Creates a table containing the complement of left join indices.
197-
*
198-
* This table has two columns. The first one is filled with `JoinNoMatch`
199-
* and the second one contains values from 0 to right_table_row_count - 1
200-
* excluding those found in the right_indices column.
201-
*
202-
* @param right_indices Vector of indices
203-
* @param left_table_row_count Number of rows of left table
204-
* @param right_table_row_count Number of rows of right table
205-
* @param stream CUDA stream used for device memory operations and kernel launches.
206-
* @param mr Device memory resource used to allocate the returned vectors.
207-
*
208-
* @return Pair of vectors containing the left join indices complement
209-
*/
210-
std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
211-
std::unique_ptr<rmm::device_uvector<size_type>>>
212-
get_left_join_indices_complement(std::unique_ptr<rmm::device_uvector<size_type>>& right_indices,
213-
size_type left_table_row_count,
214-
size_type right_table_row_count,
215-
rmm::cuda_stream_view stream,
216-
rmm::device_async_resource_ref mr);
217-
21851
/**
21952
* @brief Device functor to determine if an index is contained in a range.
22053
*/

0 commit comments

Comments
 (0)