Skip to content

Commit 3c896cf

Browse files
authored
Merge branch 'main' into jit-infra
2 parents c7b7a1b + 3a9daa5 commit 3c896cf

244 files changed

Lines changed: 19956 additions & 21007 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/pr.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ jobs:
536536
arch: '["amd64", "arm64"]'
537537
cuda: '["13.1"]'
538538
node_type: "cpu8"
539-
timeout-minutes: 60
539+
timeout-minutes: 90
540540
env: |
541541
SCCACHE_DIST_MAX_RETRIES=inf
542542
SCCACHE_SERVER_LOG=sccache=debug

.github/workflows/spark-rapids-jni.yaml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ jobs:
88
runs-on: linux-amd64-cpu8
99
container:
1010
image: rapidsai/ci-spark-rapids-jni:rockylinux8-cuda12.9.1
11+
permissions:
12+
id-token: write
1113
steps:
1214
- uses: actions/checkout@v4
1315
with:
@@ -17,10 +19,74 @@ jobs:
1719
- uses: actions/checkout@v4
1820
with:
1921
path: thirdparty/cudf
22+
- uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6.0.0
23+
with:
24+
role-to-assume: ${{ vars.AWS_ROLE_ARN }}
25+
aws-region: ${{ vars.AWS_REGION }}
26+
role-duration-seconds: 43200 # 12h
2027
- name: "Build spark-rapids-jni"
28+
env:
29+
SCCACHE_S3_KEY_PREFIX: spark-rapids-jni
30+
SCCACHE_S3_PREPROCESSOR_CACHE_KEY_PREFIX: spark-rapids-jni/preprocessor
31+
SCCACHE_S3_USE_PREPROCESSOR_CACHE_MODE: true
2132
run: |
33+
set -euo pipefail
34+
35+
###
36+
# Setup sccache client
37+
###
38+
39+
# Install jq
40+
dnf -y install jq
41+
42+
# Download gha-tools
43+
wget https://github.com/rapidsai/gha-tools/releases/latest/download/tools.tar.gz -O - | tar -xz -C /usr/local/bin
44+
45+
# Build cluster endpoint
46+
export SCCACHE_DIST_SCHEDULER_URL="https://$(uname -m | sed -e 's/x86_64/amd64/' -e 's/aarch64/arm64/').linux.sccache.rapids.nvidia.com"
47+
48+
export SCCACHE_DIST_AUTH_TOKEN="$(
49+
curl -fsSL -H "Authorization: Bearer $(
50+
curl -fsSL -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \
51+
"${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=token.rapids.nvidia.com" \
52+
| jq -r '.value'
53+
)" https://token.rapids.nvidia.com/gh/token/exchange \
54+
| jq -r '.token')"
55+
56+
# Install sccache client
57+
. rapids-install-sccache
58+
59+
# Configure sccache
60+
. rapids-configure-sccache
61+
62+
export CPP_PARALLEL_LEVEL="$PARALLEL_LEVEL"
63+
64+
# Don't use the build cluster for CMake's compiler tests
65+
echo -e '\nset(ENV{SCCACHE_NO_DIST_COMPILE} "1")' >> thirdparty/cudf-pins/add_dependency_pins.cmake
66+
2267
echo "------------------------"
2368
env | sort
2469
echo "------------------------"
70+
71+
# Increase the nofile ulimit to build with as much parallelism as possible
72+
ulimit -n $(ulimit -Hn)
73+
74+
# Start the sccache daemon
75+
sccache --start-server
76+
77+
# Verify sccache version and distributed compilation
78+
sccache --show-stats
79+
80+
if sccache --dist-status 2>/dev/null | jq -er '.SchedulerStatus? != null' >/dev/null 2>&1; then
81+
echo "Distributed compilation is available:"
82+
sccache --dist-status | jq -r '["scheduler URL: " + .SchedulerStatus[0], "server count: " + (.SchedulerStatus[1].servers | length | tostring)][]';
83+
else
84+
echo "Error: Distributed compilation not available, check connectivity"
85+
cat "$SCCACHE_ERROR_LOG";
86+
exit 1
87+
fi
88+
2589
mkdir target
2690
source build/env.sh && CMAKE_CUDA_ARCHITECTURES=90 LIBCUDF_DEPENDENCY_MODE=latest USE_GDS=on ${sclCMD} build/buildcpp.sh
91+
92+
sccache --show-stats

.pre-commit-config.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,22 @@ repos:
104104
entry: 'rmm::exec_policy\('
105105
language: pygrep
106106
types_or: [c, c++, cuda]
107+
- id: use-cudf-memcpy-async
108+
name: use-cudf-memcpy-async
109+
description: 'Enforce that cudf::detail::memcpy_async or memcpy_batch_async is used instead of cudaMemcpyAsync (see developer guide)'
110+
entry: 'cudaMemcpyAsync'
111+
language: pygrep
112+
types_or: [c, c++, cuda]
113+
files: '^cpp/(src|include)/'
114+
exclude: |
115+
(?x)^(
116+
cpp/src/utilities/host_memory\.cpp|
117+
cpp/src/utilities/cuda_memcpy\.cu|
118+
cpp/src/io/utilities/data_sink\.cpp|
119+
cpp/include/cudf/contiguous_split\.hpp|
120+
cpp/include/cudf/utilities/pinned_memory\.hpp|
121+
cpp/include/cudf/utilities/error\.hpp
122+
)$
107123
- id: no-unseeded-default-rng
108124
name: no-unseeded-default-rng
109125
description: 'Enforce that no non-seeded default_rng is used and default_rng is used instead of np.random.seed'

conda/environments/all_cuda-129_arch-aarch64.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ dependencies:
5757
- nbsphinx
5858
- ninja
5959
- notebook
60-
- numba-cuda>=0.22.2,<0.28.0
60+
- numba-cuda>=0.22.2
6161
- numba>=0.60.0,<0.65.0
6262
- numpy>=1.26,<3.0
6363
- numpydoc

conda/environments/all_cuda-129_arch-x86_64.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ dependencies:
5757
- nbsphinx
5858
- ninja
5959
- notebook
60-
- numba-cuda>=0.22.2,<0.28.0
60+
- numba-cuda>=0.22.2
6161
- numba>=0.60.0,<0.65.0
6262
- numpy>=1.26,<3.0
6363
- numpydoc

conda/environments/all_cuda-131_arch-aarch64.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ dependencies:
5757
- nbsphinx
5858
- ninja
5959
- notebook
60-
- numba-cuda>=0.22.2,<0.28.0
60+
- numba-cuda>=0.22.2
6161
- numba>=0.60.0,<0.65.0
6262
- numpy>=1.26,<3.0
6363
- numpydoc
@@ -81,6 +81,7 @@ dependencies:
8181
- python-confluent-kafka
8282
- python-xxhash
8383
- python>=3.11
84+
- pytorch>=2.10.0
8485
- rapids-build-backend>=0.4.0,<0.5.0
8586
- rapids-dask-dependency==26.4.*,>=0.0.0a0
8687
- rapids-logger==0.2.*,>=0.0.0a0

conda/environments/all_cuda-131_arch-x86_64.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ dependencies:
5757
- nbsphinx
5858
- ninja
5959
- notebook
60-
- numba-cuda>=0.22.2,<0.28.0
60+
- numba-cuda>=0.22.2
6161
- numba>=0.60.0,<0.65.0
6262
- numpy>=1.26,<3.0
6363
- numpydoc
@@ -81,6 +81,7 @@ dependencies:
8181
- python-confluent-kafka
8282
- python-xxhash
8383
- python>=3.11
84+
- pytorch>=2.10.0
8485
- rapids-build-backend>=0.4.0,<0.5.0
8586
- rapids-dask-dependency==26.4.*,>=0.0.0a0
8687
- rapids-logger==0.2.*,>=0.0.0a0

conda/recipes/cudf-polars/recipe.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ requirements:
3939
- polars>=1.30,<1.39
4040
- packaging
4141
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
42+
- if: cuda_major == "12"
43+
then: cuda-python >=12.9.2,<13.0
44+
else: cuda-python >=13.0.1,<14.0
4245
ignore_run_exports:
4346
by_name:
4447
- cuda-version

cpp/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,7 @@ add_library(
536536
src/io/parquet/experimental/hybrid_scan_preprocess.cu
537537
src/io/parquet/experimental/page_index_filter.cu
538538
src/io/parquet/experimental/page_index_filter_utils.cu
539+
src/io/parquet/expression_transform_helpers.cpp
539540
src/io/parquet/io_utils/parquet_io_utils.cpp
540541
src/io/parquet/page_data.cu
541542
src/io/parquet/chunk_dict.cu

cpp/benchmarks/CMakeLists.txt

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -282,13 +282,9 @@ ConfigureNVBench(
282282
merge/merge_strings.cpp
283283
)
284284

285-
# ##################################################################################################
286-
# * null_mask benchmark ---------------------------------------------------------------------------
287-
ConfigureNVBench(NULLMASK_NVBENCH bitmask/set_null_mask.cpp)
288-
289285
# ##################################################################################################
290286
# * bitmask benchmark ---------------------------------------------------------------------------
291-
ConfigureNVBench(BITMASK_NVBENCH bitmask/bitmask_and.cu)
287+
ConfigureNVBench(BITMASK_NVBENCH bitmask/bitmask_and.cpp bitmask/set_null_mask.cpp)
292288

293289
# ##################################################################################################
294290
# * parquet writer benchmark ----------------------------------------------------------------------
@@ -300,6 +296,34 @@ ConfigureNVBench(
300296
# * parquet reader benchmark ----------------------------------------------------------------------
301297
ConfigureNVBench(
302298
PARQUET_READER_NVBENCH io/parquet/parquet_reader_input.cpp io/parquet/parquet_reader_options.cpp
299+
io/parquet/reader_common.cpp
300+
)
301+
302+
# ##################################################################################################
303+
# * parquet reader chunks benchmark
304+
# ------------------------------------------------------------------
305+
ConfigureNVBench(
306+
PARQUET_READER_CHUNKS_NVBENCH io/parquet/parquet_reader_chunks.cpp io/parquet/reader_common.cpp
307+
)
308+
309+
# ##################################################################################################
310+
# * parquet reader compressed benchmark ------------------------------------------------------------
311+
ConfigureNVBench(
312+
PARQUET_READER_COMPRESSED_NVBENCH io/parquet/parquet_reader_compressed.cpp
313+
io/parquet/reader_common.cpp
314+
)
315+
316+
# ##################################################################################################
317+
# * parquet reader strings benchmark
318+
# ------------------------------------------------------------------
319+
ConfigureNVBench(
320+
PARQUET_READER_STRINGS_NVBENCH io/parquet/parquet_reader_strings.cpp io/parquet/reader_common.cpp
321+
)
322+
323+
# ##################################################################################################
324+
# * parquet reader wide benchmark ------------------------------------------------------------------
325+
ConfigureNVBench(
326+
PARQUET_READER_WIDE_NVBENCH io/parquet/parquet_reader_wide.cpp io/parquet/reader_common.cpp
303327
)
304328

305329
# ##################################################################################################

0 commit comments

Comments
 (0)