From db0af1cb7433182f65dad4c8180167ecbc721783 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 21 May 2025 12:46:23 -0700 Subject: [PATCH 1/8] building --- cpp/CMakeLists.txt | 2 - cpp/cmake/modules/ConfigureAlgorithms.cmake | 7 --- cpp/cmake/thirdparty/get_raft.cmake | 6 +-- cpp/src/dbscan/vertexdeg/algo.cuh | 15 +++--- cpp/src/genetic/fitness.cuh | 22 ++++----- cpp/src/glm/preprocess.cuh | 29 ++++++----- cpp/src/glm/qn/glm_base.cuh | 2 +- cpp/src/glm/qn/mg/glm_base_mg.cuh | 2 +- cpp/src/glm/qn/mg/standardization.cuh | 12 ++++- cpp/src/glm/qn/simple_mat/dense.hpp | 6 +-- cpp/src/hdbscan/detail/utils.h | 13 ++--- cpp/src/pca/pca.cuh | 15 +++--- cpp/src/pca/pca_mg.cu | 13 +++-- cpp/src/solver/cd.cuh | 8 ++-- cpp/src/svm/sparse_util.cuh | 22 ++++++--- cpp/src/tsne/exact_tsne.cuh | 4 +- cpp/src/tsvd/tsvd.cuh | 22 ++++----- cpp/src/tsvd/tsvd_mg.cu | 6 +-- cpp/src/umap/fuzzy_simpl_set/naive.cuh | 4 +- cpp/src/umap/optimize.cuh | 6 +-- cpp/src_prims/functions/hinge.cuh | 6 +-- cpp/src_prims/functions/linearReg.cuh | 6 +-- cpp/src_prims/functions/logisticReg.cuh | 6 +-- cpp/src_prims/functions/penalty.cuh | 6 +-- cpp/src_prims/timeSeries/stationarity.cuh | 53 ++++++++------------- cpp/tests/prims/knn_regression.cu | 6 +-- cpp/tests/sg/cd_test.cu | 14 +++--- 27 files changed, 147 insertions(+), 166 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9184160853..c29f2a07f7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -187,7 +187,6 @@ include(cmake/modules/ConfigureCUDA.cmake) ############################################################################## # - Set options based on user defined one ----------------------------------- set(CUML_USE_RAFT_NN OFF) -set(CUML_RAFT_COMPILED OFF) set(LINK_TREELITE OFF) set(LINK_CUFFT OFF) include(cmake/modules/ConfigureAlgorithms.cmake) @@ -667,7 +666,6 @@ if(BUILD_CUML_CPP_LIBRARY) # because cumlprims_mg and cuML inherit their CUDA libs from the raft::raft # INTERFACE target. list(APPEND ${_cuml_cpp_libs_var_name} - $<$:${RAFT_COMPILED_LIB}> $ ) diff --git a/cpp/cmake/modules/ConfigureAlgorithms.cmake b/cpp/cmake/modules/ConfigureAlgorithms.cmake index 261c0a1ac2..e361a701b4 100644 --- a/cpp/cmake/modules/ConfigureAlgorithms.cmake +++ b/cpp/cmake/modules/ConfigureAlgorithms.cmake @@ -17,7 +17,6 @@ if(CUML_ALGORITHMS STREQUAL "ALL") set(CUML_USE_RAFT_NN ON) - set(CUML_RAFT_COMPILED ON) set(LINK_TREELITE ON) set(LINK_CUFFT ON) set(LINK_CUVS ON) @@ -33,7 +32,6 @@ else() set(BUILD_CUML_BENCH OFF) set(BUILD_CUML_EXAMPLES OFF) set(CUML_USE_RAFT_NN OFF) - set(CUML_RAFT_COMPILED OFF) foreach(algo ${CUML_ALGORITHMS}) string(TOLOWER ${algo} lower_algo) @@ -108,7 +106,6 @@ else() if(knn_algo) set(CUML_USE_RAFT_NN ON) - set(CUML_RAFT_COMPILED ON) endif() if(randomforest_algo) @@ -120,10 +117,6 @@ else() set(metrics_algo ON) endif() - if(metrics_algo) - set(CUML_RAFT_COMPILED ON) - endif() - if(dbscan_algo OR hdbscan_algo OR kmeans_algo OR knn_algo OR metrics_algo OR tsne_algo OR umap_algo) set(LINK_CUVS ON) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 7659384cc5..486dd636f7 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -41,6 +41,7 @@ function(find_and_configure_raft) set(RAFT_NVTX ${PKG_NVTX}) message(VERBOSE "CUML: raft FIND_PACKAGE_ARGUMENTS COMPONENTS ${RAFT_COMPONENTS}") + # set(CPM_raft_SOURCE /raid/dgala/raft) rapids_cpm_find(raft ${PKG_VERSION} GLOBAL_TARGETS raft::raft @@ -72,14 +73,13 @@ endfunction() # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft find_and_configure_raft(VERSION ${CUML_MIN_VERSION_raft} - FORK rapidsai - PINNED_TAG branch-${CUML_BRANCH_VERSION_raft} + FORK divyegala + PINNED_TAG reduction-kernels EXCLUDE_FROM_ALL ${CUML_EXCLUDE_RAFT_FROM_ALL} # When PINNED_TAG above doesn't match cuml, # force local raft clone in build directory # even if it's already installed. CLONE_ON_PIN ${CUML_RAFT_CLONE_ON_PIN} - COMPILE_LIBRARY ${CUML_RAFT_COMPILED} USE_RAFT_STATIC ${CUML_USE_RAFT_STATIC} NVTX ${NVTX} ) diff --git a/cpp/src/dbscan/vertexdeg/algo.cuh b/cpp/src/dbscan/vertexdeg/algo.cuh index 74f57987ed..7eaaaffbda 100644 --- a/cpp/src/dbscan/vertexdeg/algo.cuh +++ b/cpp/src/dbscan/vertexdeg/algo.cuh @@ -185,14 +185,13 @@ void launcher(const raft::handle_t& handle, if (metric == cuvs::distance::DistanceType::CosineExpanded) { rmm::device_uvector rowNorms(m, stream); - raft::linalg::rowNorm(rowNorms.data(), - data.x, - k, - m, - raft::linalg::NormType::L2Norm, - true, - stream, - [] __device__(value_t in) { return sqrtf(in); }); + raft::linalg::rowNorm(rowNorms.data(), + data.x, + k, + m, + raft::linalg::NormType::L2Norm, + stream, + [] __device__(value_t in) { return sqrtf(in); }); /* Cast away constness because the output matrix for normalization cannot be of const type. * Input matrix will be modified due to normalization. diff --git a/cpp/src/genetic/fitness.cuh b/cpp/src/genetic/fitness.cuh index acf605d8fa..ec4f07cc7d 100644 --- a/cpp/src/genetic/fitness.cuh +++ b/cpp/src/genetic/fitness.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -79,7 +79,7 @@ void weightedPearson(const raft::handle_t& h, math_t N = (math_t)n_samples; // Sum of weights - raft::stats::sum(dWS.data(), W, (uint64_t)1, n_samples, false, stream); + raft::stats::sum(dWS.data(), W, (uint64_t)1, n_samples, stream); math_t WS = dWS.value(stream); // Find y_mu @@ -94,7 +94,7 @@ void weightedPearson(const raft::handle_t& h, [N, WS] __device__(math_t y, math_t w) { return N * w * y / WS; }, stream); - raft::stats::mean(y_mu.data(), y_tmp.data(), (uint64_t)1, n_samples, false, false, stream); + raft::stats::mean(y_mu.data(), y_tmp.data(), (uint64_t)1, n_samples, false, stream); // Find x_mu raft::linalg::matrixVectorOp( @@ -108,7 +108,7 @@ void weightedPearson(const raft::handle_t& h, [N, WS] __device__(math_t x, math_t w) { return N * w * x / WS; }, stream); - raft::stats::mean(x_mu.data(), x_tmp.data(), n_progs, n_samples, false, false, stream); + raft::stats::mean(x_mu.data(), x_tmp.data(), n_progs, n_samples, false, stream); // Find y_diff raft::stats::meanCenter( @@ -169,7 +169,7 @@ void weightedPearson(const raft::handle_t& h, [] __device__(math_t c, math_t xd) { return c / xd; }, stream); - raft::stats::mean(out, corr.data(), n_progs, n_samples, false, false, stream); + raft::stats::mean(out, corr.data(), n_progs, n_samples, false, stream); } struct rank_functor { @@ -261,7 +261,7 @@ void meanAbsoluteError(const raft::handle_t& h, math_t N = (math_t)n_samples; // Weight Sum - raft::stats::sum(dWS.data(), W, (uint64_t)1, n_samples, false, stream); + raft::stats::sum(dWS.data(), W, (uint64_t)1, n_samples, stream); math_t WS = dWS.value(stream); // Compute absolute differences @@ -278,7 +278,7 @@ void meanAbsoluteError(const raft::handle_t& h, stream); // Average along rows - raft::stats::mean(out, error.data(), n_progs, n_samples, false, false, stream); + raft::stats::mean(out, error.data(), n_progs, n_samples, false, stream); } template @@ -296,7 +296,7 @@ void meanSquareError(const raft::handle_t& h, math_t N = (math_t)n_samples; // Weight Sum - raft::stats::sum(dWS.data(), W, (uint64_t)1, n_samples, false, stream); + raft::stats::sum(dWS.data(), W, (uint64_t)1, n_samples, stream); math_t WS = dWS.value(stream); // Compute square differences @@ -315,7 +315,7 @@ void meanSquareError(const raft::handle_t& h, stream); // Add up row values per column - raft::stats::mean(out, error.data(), n_progs, n_samples, false, false, stream); + raft::stats::mean(out, error.data(), n_progs, n_samples, false, stream); } template @@ -352,7 +352,7 @@ void logLoss(const raft::handle_t& h, math_t N = (math_t)n_samples; // Weight Sum - raft::stats::sum(dWS.data(), W, (uint64_t)1, n_samples, false, stream); + raft::stats::sum(dWS.data(), W, (uint64_t)1, n_samples, stream); math_t WS = dWS.value(stream); // Compute logistic loss as described in @@ -383,7 +383,7 @@ void logLoss(const raft::handle_t& h, stream); // Take average along rows - raft::stats::mean(out, error.data(), n_progs, n_samples, false, false, stream); + raft::stats::mean(out, error.data(), n_progs, n_samples, false, stream); } } // namespace genetic diff --git a/cpp/src/glm/preprocess.cuh b/cpp/src/glm/preprocess.cuh index 5188cc96e1..c252ce13c4 100644 --- a/cpp/src/glm/preprocess.cuh +++ b/cpp/src/glm/preprocess.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -93,31 +93,30 @@ void preProcessData(const raft::handle_t& handle, norm2_input); } else { if (sample_weight != nullptr) { - raft::stats::weightedMean( - mu_input, input, sample_weight, n_cols, n_rows, false, false, stream); + raft::stats::weightedMean( + mu_input, input, sample_weight, n_cols, n_rows, stream); } else { - raft::stats::mean(mu_input, input, n_cols, n_rows, false, false, stream); + raft::stats::mean(mu_input, input, n_cols, n_rows, false, stream); } raft::stats::meanCenter(input, input, mu_input, n_cols, n_rows, false, true, stream); if (normalize) { - raft::linalg::colNorm(norm2_input, - input, - n_cols, - n_rows, - raft::linalg::L2Norm, - false, - stream, - [] __device__(math_t v) { return raft::sqrt(v); }); + raft::linalg::colNorm(norm2_input, + input, + n_cols, + n_rows, + raft::linalg::L2Norm, + stream, + [] __device__(math_t v) { return raft::sqrt(v); }); raft::matrix::matrixVectorBinaryDivSkipZero( input, norm2_input, n_rows, n_cols, false, true, stream, true); } } if (sample_weight != nullptr) { - raft::stats::weightedMean( - mu_labels, labels, sample_weight, (size_t)1, n_rows, true, false, stream); + raft::stats::weightedMean( + mu_labels, labels, sample_weight, (size_t)1, n_rows, stream); } else { - raft::stats::mean(mu_labels, labels, (size_t)1, n_rows, false, false, stream); + raft::stats::mean(mu_labels, labels, (size_t)1, n_rows, false, stream); } raft::stats::meanCenter(labels, labels, mu_labels, (size_t)1, n_rows, false, true, stream); } diff --git a/cpp/src/glm/qn/glm_base.cuh b/cpp/src/glm/qn/glm_base.cuh index d35a9b91f6..6783fe22f6 100644 --- a/cpp/src/glm/qn/glm_base.cuh +++ b/cpp/src/glm/qn/glm_base.cuh @@ -88,7 +88,7 @@ inline void linearBwd(const raft::handle_t& handle, // TODO can this be fused somehow? Gweights.assign_gemm(handle, 1.0 / X.m, dZ, false, X, false, beta, stream); - raft::stats::mean(Gbias.data, dZ.data, dZ.m, dZ.n, false, true, stream); + raft::stats::mean(Gbias.data, dZ.data, dZ.m, dZ.n, true, stream); } else { G.assign_gemm(handle, 1.0 / X.m, dZ, false, X, false, beta, stream); } diff --git a/cpp/src/glm/qn/mg/glm_base_mg.cuh b/cpp/src/glm/qn/mg/glm_base_mg.cuh index 2884f75b15..565991b295 100644 --- a/cpp/src/glm/qn/mg/glm_base_mg.cuh +++ b/cpp/src/glm/qn/mg/glm_base_mg.cuh @@ -63,7 +63,7 @@ inline void linearBwdMG(const raft::handle_t& handle, // TODO can this be fused somehow? Gweights.assign_gemm(handle, 1.0 / n_samples, dZ, false, X, false, beta / n_ranks, stream); - raft::stats::mean(Gbias.data, dZ.data, dZ.m, dZ.n, false, true, stream); + raft::stats::mean(Gbias.data, dZ.data, dZ.m, dZ.n, true, stream); T bias_factor = 1.0 * dZ.n / n_samples; raft::linalg::multiplyScalar(Gbias.data, Gbias.data, bias_factor, dZ.m, stream); diff --git a/cpp/src/glm/qn/mg/standardization.cuh b/cpp/src/glm/qn/mg/standardization.cuh index f0cc15cdba..bbc6cfc115 100644 --- a/cpp/src/glm/qn/mg/standardization.cuh +++ b/cpp/src/glm/qn/mg/standardization.cuh @@ -71,7 +71,11 @@ void vars(const raft::handle_t& handle, zero_vec.fill(0., stream); // get sum of squares on every column - raft::stats::vars(var_vector, input_data, zero.data(), D, num_rows, false, !col_major, stream); + if (col_major) { + raft::stats::vars(var_vector, input_data, zero.data(), D, num_rows, false, stream); + } else { + raft::stats::vars(var_vector, input_data, zero.data(), D, num_rows, false, stream); + } T weight = n_samples < 1 ? T(0) : T(1) * num_rows / T(n_samples - 1); raft::linalg::multiplyScalar(var_vector, var_vector, weight, D, stream); comm.allreduce(var_vector, var_vector, D, raft::comms::op_t::SUM, stream); @@ -107,7 +111,11 @@ void mean_stddev(const raft::handle_t& handle, auto stream = handle.get_stream(); auto& comm = handle.get_comms(); - raft::stats::sum(mean_vector, input_data, D, num_rows, !col_major, stream); + if (col_major) { + raft::stats::sum(mean_vector, input_data, D, num_rows, stream); + } else { + raft::stats::sum(mean_vector, input_data, D, num_rows, stream); + } T weight = T(1) / T(n_samples); raft::linalg::multiplyScalar(mean_vector, mean_vector, weight, D, stream); comm.allreduce(mean_vector, mean_vector, D, raft::comms::op_t::SUM, stream); diff --git a/cpp/src/glm/qn/simple_mat/dense.hpp b/cpp/src/glm/qn/simple_mat/dense.hpp index ad3c615384..f630b1b526 100644 --- a/cpp/src/glm/qn/simple_mat/dense.hpp +++ b/cpp/src/glm/qn/simple_mat/dense.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -323,8 +323,8 @@ inline T nrm2(const SimpleVec& u, T* tmp_dev, cudaStream_t stream) template inline T nrm1(const SimpleVec& u, T* tmp_dev, cudaStream_t stream) { - raft::linalg::rowNorm( - tmp_dev, u.data, u.len, 1, raft::linalg::L1Norm, true, stream, raft::Nop()); + raft::linalg::rowNorm( + tmp_dev, u.data, u.len, 1, raft::linalg::L1Norm, stream, raft::Nop()); T tmp_host; raft::update_host(&tmp_host, tmp_dev, 1, stream); raft::interruptible::synchronize(stream); diff --git a/cpp/src/hdbscan/detail/utils.h b/cpp/src/hdbscan/detail/utils.h index e3414fd507..37421cc010 100644 --- a/cpp/src/hdbscan/detail/utils.h +++ b/cpp/src/hdbscan/detail/utils.h @@ -190,8 +190,8 @@ void normalize(value_t* data, value_idx n, size_t m, cudaStream_t stream) rmm::device_uvector sums(m, stream); // Compute row sums - raft::linalg::rowNorm( - sums.data(), data, (size_t)n, m, raft::linalg::L1Norm, true, stream); + raft::linalg::rowNorm( + sums.data(), data, (size_t)n, m, raft::linalg::L1Norm, stream); // Divide vector by row sums (modify in place) raft::linalg::matrixVectorOp( @@ -229,18 +229,15 @@ void softmax(const raft::handle_t& handle, value_t* data, value_idx n, size_t m) raft::make_device_vector_view(linf_norm.data(), (int)m); auto linf_norm_view = raft::make_device_vector_view(linf_norm.data(), (int)m); - raft::linalg::norm(handle, - data_const_view, - linf_norm_view, - raft::linalg::LinfNorm, - raft::linalg::Apply::ALONG_ROWS); + raft::linalg::norm( + handle, data_const_view, linf_norm_view, raft::linalg::LinfNorm); raft::linalg::matrix_vector_op( handle, data_const_view, linf_norm_const_view, data_view, - raft::linalg::Apply::ALONG_COLUMNS, + raft::Apply::ALONG_COLUMNS, [] __device__(value_t mat_in, value_t vec_in) { return exp(mat_in - vec_in); }); } diff --git a/cpp/src/pca/pca.cuh b/cpp/src/pca/pca.cuh index bd4f758440..8df17e0065 100644 --- a/cpp/src/pca/pca.cuh +++ b/cpp/src/pca/pca.cuh @@ -72,13 +72,12 @@ void truncCompExpVars(const raft::handle_t& handle, // Compute the scalar noise_vars defined as (pseudocode) // (n_components < min(n_cols, n_rows)) ? explained_var_all[n_components:].mean() : 0 if (prms.n_components < prms.n_cols && prms.n_components < prms.n_rows) { - raft::stats::mean(noise_vars, - explained_var_all.data() + prms.n_components, - std::size_t{1}, - prms.n_cols - prms.n_components, - false, - true, - stream); + raft::stats::mean(noise_vars, + explained_var_all.data() + prms.n_components, + std::size_t{1}, + prms.n_cols - prms.n_components, + true, + stream); } else { raft::matrix::setValue(noise_vars, noise_vars, math_t{0}, 1, stream); } @@ -123,7 +122,7 @@ void pcaFit(const raft::handle_t& handle, auto n_components = prms.n_components; if (n_components > prms.n_cols) n_components = prms.n_cols; - raft::stats::mean(mu, input, prms.n_cols, prms.n_rows, false, false, stream); + raft::stats::mean(mu, input, prms.n_cols, prms.n_rows, false, stream); auto len = prms.n_cols * prms.n_cols; rmm::device_uvector cov(len, stream); diff --git a/cpp/src/pca/pca_mg.cu b/cpp/src/pca/pca_mg.cu index 7974328ab1..3a74e4ed75 100644 --- a/cpp/src/pca/pca_mg.cu +++ b/cpp/src/pca/pca_mg.cu @@ -194,13 +194,12 @@ void fit_impl(raft::handle_t& handle, // Compute the scalar noise_vars defined as (pseudocode) // (n_components < min(n_cols, n_rows)) ? explained_var_all[n_components:].mean() : 0 if (prms.n_components < prms.n_cols && prms.n_components < prms.n_rows) { - raft::stats::mean(noise_vars, - explained_var_all.data() + prms.n_components, - std::size_t{1}, - prms.n_cols - prms.n_components, - false, - true, - stream); + raft::stats::mean(noise_vars, + explained_var_all.data() + prms.n_components, + std::size_t{1}, + prms.n_cols - prms.n_components, + true, + stream); } else { raft::matrix::setValue(noise_vars, noise_vars, T{0}, 1, stream); } diff --git a/cpp/src/solver/cd.cuh b/cpp/src/solver/cd.cuh index 14aeecd226..2156835af1 100644 --- a/cpp/src/solver/cd.cuh +++ b/cpp/src/solver/cd.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -170,7 +170,7 @@ void cdFit(const raft::handle_t& handle, if (sample_weight != nullptr) { rmm::device_scalar sum_sw(stream); - raft::stats::sum(sum_sw.data(), sample_weight, 1, n_rows, true, stream); + raft::stats::sum(sum_sw.data(), sample_weight, 1, n_rows, stream); raft::update_host(&h_sum_sw, sum_sw.data(), 1, stream); raft::linalg::multiplyScalar( @@ -222,8 +222,8 @@ void cdFit(const raft::handle_t& handle, math_t scalar = math_t(n_rows) + l2_alpha; raft::matrix::setValue(squared.data(), squared.data(), scalar, n_cols, stream); } else { - raft::linalg::colNorm( - squared.data(), input, n_cols, n_rows, raft::linalg::L2Norm, false, stream); + raft::linalg::colNorm( + squared.data(), input, n_cols, n_rows, raft::linalg::L2Norm, stream); raft::linalg::addScalar(squared.data(), squared.data(), l2_alpha, n_cols, stream); } diff --git a/cpp/src/svm/sparse_util.cuh b/cpp/src/svm/sparse_util.cuh index 300264c483..8fe3d12e82 100644 --- a/cpp/src/svm/sparse_util.cuh +++ b/cpp/src/svm/sparse_util.cuh @@ -427,13 +427,21 @@ void matrixRowNorm(const raft::handle_t& handle, bool is_col_major_contiguous = matrix.stride(0) == 1 && matrix.stride(1) == matrix.extent(0); ASSERT(is_row_major_contiguous || is_col_major_contiguous, "Dense matrix rowNorm only support contiguous data"); - raft::linalg::rowNorm(target, - matrix.data_handle(), - matrix.extent(1), //! cols first arg! - matrix.extent(0), - norm, - is_row_major_contiguous, - handle.get_stream()); + if (is_row_major_contiguous) { + raft::linalg::rowNorm(target, + matrix.data_handle(), + matrix.extent(1), //! cols first arg! + matrix.extent(0), + norm, + handle.get_stream()); + } else { + raft::linalg::rowNorm(target, + matrix.data_handle(), + matrix.extent(1), //! cols first arg! + matrix.extent(0), + norm, + handle.get_stream()); + } } /** diff --git a/cpp/src/tsne/exact_tsne.cuh b/cpp/src/tsne/exact_tsne.cuh index 680c3200e8..e977d91ff7 100644 --- a/cpp/src/tsne/exact_tsne.cuh +++ b/cpp/src/tsne/exact_tsne.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,7 +102,7 @@ value_t Exact_TSNE(value_t* VAL, } // Get row norm of Y - raft::linalg::rowNorm(norm.data(), Y, dim, n, raft::linalg::L2Norm, false, stream); + raft::linalg::rowNorm(norm.data(), Y, dim, n, raft::linalg::L2Norm, stream); bool last_iter = iter == params.max_iter - 1; diff --git a/cpp/src/tsvd/tsvd.cuh b/cpp/src/tsvd/tsvd.cuh index b032b1a69a..1d46c061dd 100644 --- a/cpp/src/tsvd/tsvd.cuh +++ b/cpp/src/tsvd/tsvd.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -270,25 +270,19 @@ void tsvdFitTransform(const raft::handle_t& handle, signFlip(trans_input, prms.n_rows, prms.n_components, components, prms.n_cols, stream); rmm::device_uvector mu_trans(prms.n_components, stream); - raft::stats::mean( - mu_trans.data(), trans_input, prms.n_components, prms.n_rows, false, false, stream); - raft::stats::vars(explained_var, - trans_input, - mu_trans.data(), - prms.n_components, - prms.n_rows, - false, - false, - stream); + raft::stats::mean( + mu_trans.data(), trans_input, prms.n_components, prms.n_rows, false, stream); + raft::stats::vars( + explained_var, trans_input, mu_trans.data(), prms.n_components, prms.n_rows, false, stream); rmm::device_uvector mu(prms.n_cols, stream); rmm::device_uvector vars(prms.n_cols, stream); - raft::stats::mean(mu.data(), input, prms.n_cols, prms.n_rows, false, false, stream); - raft::stats::vars(vars.data(), input, mu.data(), prms.n_cols, prms.n_rows, false, false, stream); + raft::stats::mean(mu.data(), input, prms.n_cols, prms.n_rows, false, stream); + raft::stats::vars(vars.data(), input, mu.data(), prms.n_cols, prms.n_rows, false, stream); rmm::device_scalar total_vars(stream); - raft::stats::sum(total_vars.data(), vars.data(), std::size_t(1), prms.n_cols, false, stream); + raft::stats::sum(total_vars.data(), vars.data(), std::size_t(1), prms.n_cols, stream); math_t total_vars_h; raft::update_host(&total_vars_h, total_vars.data(), 1, stream); diff --git a/cpp/src/tsvd/tsvd_mg.cu b/cpp/src/tsvd/tsvd_mg.cu index 9ea3133266..b98c48f13e 100644 --- a/cpp/src/tsvd/tsvd_mg.cu +++ b/cpp/src/tsvd/tsvd_mg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -353,8 +353,8 @@ void fit_transform_impl(raft::handle_t& handle, Stats::opg::var(handle, var_input_data, input_data, input_desc, mu_data.ptr, streams, n_streams); rmm::device_uvector total_vars(1, streams[0]); - raft::stats::sum( - total_vars.data(), var_input_data.ptr, std::size_t(1), prms.n_cols, false, streams[0]); + raft::stats::sum( + total_vars.data(), var_input_data.ptr, std::size_t(1), prms.n_cols, streams[0]); T total_vars_h; raft::update_host(&total_vars_h, total_vars.data(), std::size_t(1), streams[0]); diff --git a/cpp/src/umap/fuzzy_simpl_set/naive.cuh b/cpp/src/umap/fuzzy_simpl_set/naive.cuh index 29f632692d..4d1fa85afd 100644 --- a/cpp/src/umap/fuzzy_simpl_set/naive.cuh +++ b/cpp/src/umap/fuzzy_simpl_set/naive.cuh @@ -254,8 +254,8 @@ void smooth_knn_dist(nnz_t n, rmm::device_uvector dist_means_dev(n_neighbors, stream); - raft::stats::mean( - dist_means_dev.data(), knn_dists, nnz_t{1}, n * n_neighbors, false, false, stream); + raft::stats::mean( + dist_means_dev.data(), knn_dists, nnz_t{1}, n * n_neighbors, false, stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); value_t mean_dist = 0.0; diff --git a/cpp/src/umap/optimize.cuh b/cpp/src/umap/optimize.cuh index 4862de112f..928b156672 100644 --- a/cpp/src/umap/optimize.cuh +++ b/cpp/src/umap/optimize.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -119,8 +119,8 @@ void abLossGrads( /** * Finally, take the mean */ - raft::stats::mean(grads, a_deriv.data(), 1, n_rows, false, false, stream); - raft::stats::mean(grads + 1, b_deriv.data(), 1, n_rows, false, false, stream); + raft::stats::mean(grads, a_deriv.data(), 1, n_rows, false, stream); + raft::stats::mean(grads + 1, b_deriv.data(), 1, n_rows, false, stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); } diff --git a/cpp/src_prims/functions/hinge.cuh b/cpp/src_prims/functions/hinge.cuh index 76bcc11909..91f0412810 100644 --- a/cpp/src_prims/functions/hinge.cuh +++ b/cpp/src_prims/functions/hinge.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -125,7 +125,7 @@ void hingeLossGrads(const raft::handle_t& handle, raft::linalg::eltwiseMultiply(labels_pred.data(), labels_pred.data(), labels, n_rows, stream); hingeLossGradMult(input, labels, labels_pred.data(), n_rows, n_cols, stream); - raft::stats::mean(grads, input, n_cols, n_rows, false, false, stream); + raft::stats::mean(grads, input, n_cols, n_rows, false, stream); rmm::device_uvector pen_grads(0, stream); @@ -173,7 +173,7 @@ void hingeLoss(const raft::handle_t& handle, hingeLossSubtract(labels_pred.data(), labels_pred.data(), math_t(1), n_rows, stream); - raft::stats::sum(loss, labels_pred.data(), 1, n_rows, false, stream); + raft::stats::sum(loss, labels_pred.data(), 1, n_rows, stream); rmm::device_uvector pen_val(0, stream); diff --git a/cpp/src_prims/functions/linearReg.cuh b/cpp/src_prims/functions/linearReg.cuh index f9c4d2a969..b30e1fa6c7 100644 --- a/cpp/src_prims/functions/linearReg.cuh +++ b/cpp/src_prims/functions/linearReg.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,7 +70,7 @@ void linearRegLossGrads(const raft::handle_t& handle, raft::matrix::matrixVectorBinaryMult( input, labels_pred.data(), n_rows, n_cols, false, false, stream); - raft::stats::mean(grads, input, n_cols, n_rows, false, false, stream); + raft::stats::mean(grads, input, n_cols, n_rows, false, stream); raft::linalg::scalarMultiply(grads, grads, math_t(2), n_cols, stream); rmm::device_uvector pen_grads(0, stream); @@ -107,7 +107,7 @@ void linearRegLoss(const raft::handle_t& handle, raft::linalg::subtract(labels_pred.data(), labels, labels_pred.data(), n_rows, stream); raft::matrix::power(labels_pred.data(), n_rows, stream); - raft::stats::mean(loss, labels_pred.data(), 1, n_rows, false, false, stream); + raft::stats::mean(loss, labels_pred.data(), 1, n_rows, false, stream); rmm::device_uvector pen_val(0, stream); diff --git a/cpp/src_prims/functions/logisticReg.cuh b/cpp/src_prims/functions/logisticReg.cuh index c6bba457a6..59ad0d8855 100644 --- a/cpp/src_prims/functions/logisticReg.cuh +++ b/cpp/src_prims/functions/logisticReg.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,7 +73,7 @@ void logisticRegLossGrads(const raft::handle_t& handle, raft::matrix::matrixVectorBinaryMult( input, labels_pred.data(), n_rows, n_cols, false, false, stream); - raft::stats::mean(grads, input, n_cols, n_rows, false, false, stream); + raft::stats::mean(grads, input, n_cols, n_rows, false, stream); rmm::device_uvector pen_grads(0, stream); @@ -136,7 +136,7 @@ void logisticRegLoss(const raft::handle_t& handle, logisticRegH(handle, input, n_rows, n_cols, coef, labels_pred.data(), math_t(0), stream); logLoss(labels_pred.data(), labels, labels_pred.data(), n_rows, stream); - raft::stats::mean(loss, labels_pred.data(), 1, n_rows, false, false, stream); + raft::stats::mean(loss, labels_pred.data(), 1, n_rows, false, stream); rmm::device_uvector pen_val(0, stream); diff --git a/cpp/src_prims/functions/penalty.cuh b/cpp/src_prims/functions/penalty.cuh index 034d943c1c..19bf14abbc 100644 --- a/cpp/src_prims/functions/penalty.cuh +++ b/cpp/src_prims/functions/penalty.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,7 +41,7 @@ enum penalty { template void lasso(math_t* out, const math_t* coef, const int len, const math_t alpha, cudaStream_t stream) { - raft::linalg::rowNorm(out, coef, len, 1, raft::linalg::NormType::L1Norm, true, stream); + raft::linalg::rowNorm(out, coef, len, 1, raft::linalg::NormType::L1Norm, stream); raft::linalg::scalarMultiply(out, out, alpha, 1, stream); } @@ -55,7 +55,7 @@ void lassoGrad( template void ridge(math_t* out, const math_t* coef, const int len, const math_t alpha, cudaStream_t stream) { - raft::linalg::rowNorm(out, coef, len, 1, raft::linalg::NormType::L2Norm, true, stream); + raft::linalg::rowNorm(out, coef, len, 1, raft::linalg::NormType::L2Norm, stream); raft::linalg::scalarMultiply(out, out, alpha, 1, stream); } diff --git a/cpp/src_prims/timeSeries/stationarity.cuh b/cpp/src_prims/timeSeries/stationarity.cuh index 31c140f304..9bf89c39dc 100644 --- a/cpp/src_prims/timeSeries/stationarity.cuh +++ b/cpp/src_prims/timeSeries/stationarity.cuh @@ -214,7 +214,7 @@ static void _kpss_test(const DataT* d_y, // Compute mean rmm::device_uvector y_means(batch_size, stream); - raft::stats::mean(y_means.data(), d_y, batch_size, n_obs, false, false, stream); + raft::stats::mean(y_means.data(), d_y, batch_size, n_obs, false, stream); // Center the data around its mean rmm::device_uvector y_cent(batch_size * n_obs, stream); @@ -231,17 +231,15 @@ static void _kpss_test(const DataT* d_y, // This calculates the first sum in eq. 10 (first part of s^2) rmm::device_uvector s2A(batch_size, stream); - raft::linalg::reduce(s2A.data(), - y_cent.data(), - batch_size, - n_obs, - static_cast(0.0), - false, - false, - stream, - false, - raft::L2Op(), - raft::add_op()); + raft::linalg::reduce(s2A.data(), + y_cent.data(), + batch_size, + n_obs, + static_cast(0.0), + stream, + false, + raft::L2Op(), + raft::add_op()); // From Kwiatkowski et al. referencing Schwert (1989) DataT lags_f = ceil(12.0 * pow(n_obs_f / 100.0, 0.25)); @@ -263,15 +261,8 @@ static void _kpss_test(const DataT* d_y, coeff_base); RAFT_CUDA_TRY(cudaPeekAtLastError()); rmm::device_uvector s2B(batch_size, stream); - raft::linalg::reduce(s2B.data(), - accumulator.data(), - batch_size, - n_obs, - static_cast(0.0), - false, - false, - stream, - false); + raft::linalg::reduce( + s2B.data(), accumulator.data(), batch_size, n_obs, static_cast(0.0), stream, false); // Cumulative sum (inclusive scan with + operator) thrust::counting_iterator c_first(0); @@ -285,17 +276,15 @@ static void _kpss_test(const DataT* d_y, // Eq. 11 (eta) rmm::device_uvector eta(batch_size, stream); - raft::linalg::reduce(eta.data(), - accumulator.data(), - batch_size, - n_obs, - static_cast(0.0), - false, - false, - stream, - false, - raft::L2Op(), - raft::add_op()); + raft::linalg::reduce(eta.data(), + accumulator.data(), + batch_size, + n_obs, + static_cast(0.0), + stream, + false, + raft::L2Op(), + raft::add_op()); /* The following kernel will decide whether each series is stationary based on * s^2 and eta */ diff --git a/cpp/tests/prims/knn_regression.cu b/cpp/tests/prims/knn_regression.cu index 07ae30dfd5..0f0cc12304 100644 --- a/cpp/tests/prims/knn_regression.cu +++ b/cpp/tests/prims/knn_regression.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,14 +61,12 @@ void generate_data( [=] __device__(float input) { return 2 * input - 1; }, stream); - raft::linalg::reduce( + raft::linalg::reduce( out_labels, out_samples, n_cols, n_rows, 0.0f, - true, - true, stream, false, [=] __device__(float in, int n) { return in * in; }, diff --git a/cpp/tests/sg/cd_test.cu b/cpp/tests/sg/cd_test.cu index 34ee253438..f8ec4bf4d4 100644 --- a/cpp/tests/sg/cd_test.cu +++ b/cpp/tests/sg/cd_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -256,9 +256,9 @@ TEST_P(CdTestF, Fit) rmm::device_uvector vars_1(params.n_col, stream); rmm::device_uvector vars_2(params.n_col, stream); - raft::stats::mean(means_1.data(), data.data(), params.n_col, params.n_row, false, false, stream); - raft::stats::vars( - vars_1.data(), data.data(), means_1.data(), params.n_col, params.n_row, false, false, stream); + raft::stats::mean(means_1.data(), data.data(), params.n_col, params.n_row, false, stream); + raft::stats::vars( + vars_1.data(), data.data(), means_1.data(), params.n_col, params.n_row, false, stream); raft::stats::meanvar( means_2.data(), vars_2.data(), data.data(), params.n_col, params.n_row, false, false, stream); @@ -291,9 +291,9 @@ TEST_P(CdTestD, Fit) rmm::device_uvector vars_1(params.n_col, stream); rmm::device_uvector vars_2(params.n_col, stream); - raft::stats::mean(means_1.data(), data.data(), params.n_col, params.n_row, false, false, stream); - raft::stats::vars( - vars_1.data(), data.data(), means_1.data(), params.n_col, params.n_row, false, false, stream); + raft::stats::mean(means_1.data(), data.data(), params.n_col, params.n_row, false, stream); + raft::stats::vars( + vars_1.data(), data.data(), means_1.data(), params.n_col, params.n_row, false, stream); raft::stats::meanvar( means_2.data(), vars_2.data(), data.data(), params.n_col, params.n_row, false, false, stream); From 1a83c33e1a1019d4b25778271fab8d160e6de221 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 21 May 2025 17:03:48 -0700 Subject: [PATCH 2/8] correct args --- cpp/src/glm/qn/glm_base.cuh | 2 +- cpp/src/glm/qn/mg/glm_base_mg.cuh | 2 +- cpp/src/pca/pca.cuh | 12 ++++++------ cpp/src/pca/pca_mg.cu | 12 ++++++------ 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/src/glm/qn/glm_base.cuh b/cpp/src/glm/qn/glm_base.cuh index 6783fe22f6..b94b6f1c80 100644 --- a/cpp/src/glm/qn/glm_base.cuh +++ b/cpp/src/glm/qn/glm_base.cuh @@ -88,7 +88,7 @@ inline void linearBwd(const raft::handle_t& handle, // TODO can this be fused somehow? Gweights.assign_gemm(handle, 1.0 / X.m, dZ, false, X, false, beta, stream); - raft::stats::mean(Gbias.data, dZ.data, dZ.m, dZ.n, true, stream); + raft::stats::mean(Gbias.data, dZ.data, dZ.m, dZ.n, false, stream); } else { G.assign_gemm(handle, 1.0 / X.m, dZ, false, X, false, beta, stream); } diff --git a/cpp/src/glm/qn/mg/glm_base_mg.cuh b/cpp/src/glm/qn/mg/glm_base_mg.cuh index 565991b295..ef5e48a068 100644 --- a/cpp/src/glm/qn/mg/glm_base_mg.cuh +++ b/cpp/src/glm/qn/mg/glm_base_mg.cuh @@ -63,7 +63,7 @@ inline void linearBwdMG(const raft::handle_t& handle, // TODO can this be fused somehow? Gweights.assign_gemm(handle, 1.0 / n_samples, dZ, false, X, false, beta / n_ranks, stream); - raft::stats::mean(Gbias.data, dZ.data, dZ.m, dZ.n, true, stream); + raft::stats::mean(Gbias.data, dZ.data, dZ.m, dZ.n, false, stream); T bias_factor = 1.0 * dZ.n / n_samples; raft::linalg::multiplyScalar(Gbias.data, Gbias.data, bias_factor, dZ.m, stream); diff --git a/cpp/src/pca/pca.cuh b/cpp/src/pca/pca.cuh index 8df17e0065..9d0f9eb5ae 100644 --- a/cpp/src/pca/pca.cuh +++ b/cpp/src/pca/pca.cuh @@ -72,12 +72,12 @@ void truncCompExpVars(const raft::handle_t& handle, // Compute the scalar noise_vars defined as (pseudocode) // (n_components < min(n_cols, n_rows)) ? explained_var_all[n_components:].mean() : 0 if (prms.n_components < prms.n_cols && prms.n_components < prms.n_rows) { - raft::stats::mean(noise_vars, - explained_var_all.data() + prms.n_components, - std::size_t{1}, - prms.n_cols - prms.n_components, - true, - stream); + raft::stats::mean(noise_vars, + explained_var_all.data() + prms.n_components, + std::size_t{1}, + prms.n_cols - prms.n_components, + false, + stream); } else { raft::matrix::setValue(noise_vars, noise_vars, math_t{0}, 1, stream); } diff --git a/cpp/src/pca/pca_mg.cu b/cpp/src/pca/pca_mg.cu index 3a74e4ed75..5df4a4b71e 100644 --- a/cpp/src/pca/pca_mg.cu +++ b/cpp/src/pca/pca_mg.cu @@ -194,12 +194,12 @@ void fit_impl(raft::handle_t& handle, // Compute the scalar noise_vars defined as (pseudocode) // (n_components < min(n_cols, n_rows)) ? explained_var_all[n_components:].mean() : 0 if (prms.n_components < prms.n_cols && prms.n_components < prms.n_rows) { - raft::stats::mean(noise_vars, - explained_var_all.data() + prms.n_components, - std::size_t{1}, - prms.n_cols - prms.n_components, - true, - stream); + raft::stats::mean(noise_vars, + explained_var_all.data() + prms.n_components, + std::size_t{1}, + prms.n_cols - prms.n_components, + false, + stream); } else { raft::matrix::setValue(noise_vars, noise_vars, T{0}, 1, stream); } From ec503fac8efed40e7c309a5e94bd045fa97a9b67 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 22 May 2025 12:55:44 -0700 Subject: [PATCH 3/8] building with norm type --- cpp/src/dbscan/vertexdeg/algo.cuh | 9 +---- cpp/src/glm/preprocess.cuh | 11 ++---- cpp/src/glm/qn/simple_mat/dense.hpp | 4 +- cpp/src/hdbscan/detail/utils.h | 8 ++-- cpp/src/solver/cd.cuh | 4 +- cpp/src/svm/sparse_util.cuh | 60 +++++++++++++++++++++++------ cpp/src/tsne/exact_tsne.cuh | 2 +- cpp/src_prims/functions/penalty.cuh | 4 +- 8 files changed, 65 insertions(+), 37 deletions(-) diff --git a/cpp/src/dbscan/vertexdeg/algo.cuh b/cpp/src/dbscan/vertexdeg/algo.cuh index 7eaaaffbda..96a0377570 100644 --- a/cpp/src/dbscan/vertexdeg/algo.cuh +++ b/cpp/src/dbscan/vertexdeg/algo.cuh @@ -185,13 +185,8 @@ void launcher(const raft::handle_t& handle, if (metric == cuvs::distance::DistanceType::CosineExpanded) { rmm::device_uvector rowNorms(m, stream); - raft::linalg::rowNorm(rowNorms.data(), - data.x, - k, - m, - raft::linalg::NormType::L2Norm, - stream, - [] __device__(value_t in) { return sqrtf(in); }); + raft::linalg::rowNorm( + rowNorms.data(), data.x, k, m, stream, [] __device__(value_t in) { return sqrtf(in); }); /* Cast away constness because the output matrix for normalization cannot be of const type. * Input matrix will be modified due to normalization. diff --git a/cpp/src/glm/preprocess.cuh b/cpp/src/glm/preprocess.cuh index c252ce13c4..69c497f1bd 100644 --- a/cpp/src/glm/preprocess.cuh +++ b/cpp/src/glm/preprocess.cuh @@ -100,13 +100,10 @@ void preProcessData(const raft::handle_t& handle, } raft::stats::meanCenter(input, input, mu_input, n_cols, n_rows, false, true, stream); if (normalize) { - raft::linalg::colNorm(norm2_input, - input, - n_cols, - n_rows, - raft::linalg::L2Norm, - stream, - [] __device__(math_t v) { return raft::sqrt(v); }); + raft::linalg::colNorm( + norm2_input, input, n_cols, n_rows, stream, [] __device__(math_t v) { + return raft::sqrt(v); + }); raft::matrix::matrixVectorBinaryDivSkipZero( input, norm2_input, n_rows, n_cols, false, true, stream, true); } diff --git a/cpp/src/glm/qn/simple_mat/dense.hpp b/cpp/src/glm/qn/simple_mat/dense.hpp index f630b1b526..1b6ee2a59a 100644 --- a/cpp/src/glm/qn/simple_mat/dense.hpp +++ b/cpp/src/glm/qn/simple_mat/dense.hpp @@ -323,8 +323,8 @@ inline T nrm2(const SimpleVec& u, T* tmp_dev, cudaStream_t stream) template inline T nrm1(const SimpleVec& u, T* tmp_dev, cudaStream_t stream) { - raft::linalg::rowNorm( - tmp_dev, u.data, u.len, 1, raft::linalg::L1Norm, stream, raft::Nop()); + raft::linalg::rowNorm( + tmp_dev, u.data, u.len, 1, stream, raft::Nop()); T tmp_host; raft::update_host(&tmp_host, tmp_dev, 1, stream); raft::interruptible::synchronize(stream); diff --git a/cpp/src/hdbscan/detail/utils.h b/cpp/src/hdbscan/detail/utils.h index 37421cc010..ff43568c24 100644 --- a/cpp/src/hdbscan/detail/utils.h +++ b/cpp/src/hdbscan/detail/utils.h @@ -190,8 +190,8 @@ void normalize(value_t* data, value_idx n, size_t m, cudaStream_t stream) rmm::device_uvector sums(m, stream); // Compute row sums - raft::linalg::rowNorm( - sums.data(), data, (size_t)n, m, raft::linalg::L1Norm, stream); + raft::linalg::rowNorm( + sums.data(), data, (size_t)n, m, stream); // Divide vector by row sums (modify in place) raft::linalg::matrixVectorOp( @@ -229,8 +229,8 @@ void softmax(const raft::handle_t& handle, value_t* data, value_idx n, size_t m) raft::make_device_vector_view(linf_norm.data(), (int)m); auto linf_norm_view = raft::make_device_vector_view(linf_norm.data(), (int)m); - raft::linalg::norm( - handle, data_const_view, linf_norm_view, raft::linalg::LinfNorm); + raft::linalg::norm( + handle, data_const_view, linf_norm_view); raft::linalg::matrix_vector_op( handle, diff --git a/cpp/src/solver/cd.cuh b/cpp/src/solver/cd.cuh index 2156835af1..22b30d9372 100644 --- a/cpp/src/solver/cd.cuh +++ b/cpp/src/solver/cd.cuh @@ -222,8 +222,8 @@ void cdFit(const raft::handle_t& handle, math_t scalar = math_t(n_rows) + l2_alpha; raft::matrix::setValue(squared.data(), squared.data(), scalar, n_cols, stream); } else { - raft::linalg::colNorm( - squared.data(), input, n_cols, n_rows, raft::linalg::L2Norm, stream); + raft::linalg::colNorm( + squared.data(), input, n_cols, n_rows, stream); raft::linalg::addScalar(squared.data(), squared.data(), l2_alpha, n_cols, stream); } diff --git a/cpp/src/svm/sparse_util.cuh b/cpp/src/svm/sparse_util.cuh index 8fe3d12e82..b7950c2a70 100644 --- a/cpp/src/svm/sparse_util.cuh +++ b/cpp/src/svm/sparse_util.cuh @@ -428,19 +428,55 @@ void matrixRowNorm(const raft::handle_t& handle, ASSERT(is_row_major_contiguous || is_col_major_contiguous, "Dense matrix rowNorm only support contiguous data"); if (is_row_major_contiguous) { - raft::linalg::rowNorm(target, - matrix.data_handle(), - matrix.extent(1), //! cols first arg! - matrix.extent(0), - norm, - handle.get_stream()); + if (norm == raft::linalg::NormType::L2Norm) { + raft::linalg::rowNorm( + target, + matrix.data_handle(), + matrix.extent(1), //! cols first arg! + matrix.extent(0), + handle.get_stream()); + } else if (norm == raft::linalg::NormType::L1Norm) { + raft::linalg::rowNorm( + target, + matrix.data_handle(), + matrix.extent(1), //! cols first arg! + matrix.extent(0), + handle.get_stream()); + } else if (norm == raft::linalg::NormType::LinfNorm) { + raft::linalg::rowNorm( + target, + matrix.data_handle(), + matrix.extent(1), //! cols first arg! + matrix.extent(0), + handle.get_stream()); + } else { + RAFT_FAIL("Unsupported norm type"); + } } else { - raft::linalg::rowNorm(target, - matrix.data_handle(), - matrix.extent(1), //! cols first arg! - matrix.extent(0), - norm, - handle.get_stream()); + if (norm == raft::linalg::NormType::L2Norm) { + raft::linalg::rowNorm( + target, + matrix.data_handle(), + matrix.extent(1), //! cols first arg! + matrix.extent(0), + handle.get_stream()); + } else if (norm == raft::linalg::NormType::L1Norm) { + raft::linalg::rowNorm( + target, + matrix.data_handle(), + matrix.extent(1), //! cols first arg! + matrix.extent(0), + handle.get_stream()); + } else if (norm == raft::linalg::NormType::LinfNorm) { + raft::linalg::rowNorm( + target, + matrix.data_handle(), + matrix.extent(1), //! cols first arg! + matrix.extent(0), + handle.get_stream()); + } else { + RAFT_FAIL("Unsupported norm type"); + } } } diff --git a/cpp/src/tsne/exact_tsne.cuh b/cpp/src/tsne/exact_tsne.cuh index e977d91ff7..7c2e1cb702 100644 --- a/cpp/src/tsne/exact_tsne.cuh +++ b/cpp/src/tsne/exact_tsne.cuh @@ -102,7 +102,7 @@ value_t Exact_TSNE(value_t* VAL, } // Get row norm of Y - raft::linalg::rowNorm(norm.data(), Y, dim, n, raft::linalg::L2Norm, stream); + raft::linalg::rowNorm(norm.data(), Y, dim, n, stream); bool last_iter = iter == params.max_iter - 1; diff --git a/cpp/src_prims/functions/penalty.cuh b/cpp/src_prims/functions/penalty.cuh index 19bf14abbc..4df066822b 100644 --- a/cpp/src_prims/functions/penalty.cuh +++ b/cpp/src_prims/functions/penalty.cuh @@ -41,7 +41,7 @@ enum penalty { template void lasso(math_t* out, const math_t* coef, const int len, const math_t alpha, cudaStream_t stream) { - raft::linalg::rowNorm(out, coef, len, 1, raft::linalg::NormType::L1Norm, stream); + raft::linalg::rowNorm(out, coef, len, 1, stream); raft::linalg::scalarMultiply(out, out, alpha, 1, stream); } @@ -55,7 +55,7 @@ void lassoGrad( template void ridge(math_t* out, const math_t* coef, const int len, const math_t alpha, cudaStream_t stream) { - raft::linalg::rowNorm(out, coef, len, 1, raft::linalg::NormType::L2Norm, stream); + raft::linalg::rowNorm(out, coef, len, 1, stream); raft::linalg::scalarMultiply(out, out, alpha, 1, stream); } From e49a0fd61a22afaf14d9710b2b4463b7dbdc981f Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 27 May 2025 22:10:34 -0700 Subject: [PATCH 4/8] print binary size in mb --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index a872076e18..878cc450eb 100755 --- a/build.sh +++ b/build.sh @@ -330,7 +330,7 @@ if (! hasArg --configure-only) && (completeBuild || hasArg libcuml || hasArg pri MSG="${MSG}
parallel setting: $PARALLEL_LEVEL" MSG="${MSG}
parallel build time: $compile_total seconds" if [[ -f "${LIBCUML_BUILD_DIR}/libcuml++.so" ]]; then - LIBCUML_FS=$(ls -lh ${LIBCUML_BUILD_DIR}/libcuml++.so | awk '{print $5}') + LIBCUML_FS=$(stat -f %z ${LIBCUVS_BUILD_DIR}/libcuvs.so | awk '{printf "%.2f MB", $1/1024/1024}') MSG="${MSG}
libcuml++.so size: $LIBCUML_FS" fi BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIBCUML_BUILD_DIR}"} From 988a5aec5bc21f9edda53c312a308bd3c8a11219 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 27 May 2025 22:10:56 -0700 Subject: [PATCH 5/8] fix typo --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 878cc450eb..e8943892ad 100755 --- a/build.sh +++ b/build.sh @@ -330,7 +330,7 @@ if (! hasArg --configure-only) && (completeBuild || hasArg libcuml || hasArg pri MSG="${MSG}
parallel setting: $PARALLEL_LEVEL" MSG="${MSG}
parallel build time: $compile_total seconds" if [[ -f "${LIBCUML_BUILD_DIR}/libcuml++.so" ]]; then - LIBCUML_FS=$(stat -f %z ${LIBCUVS_BUILD_DIR}/libcuvs.so | awk '{printf "%.2f MB", $1/1024/1024}') + LIBCUML_FS=$(stat -f %z ${LIBCUML_BUILD_DIR}/libcuml++.so | awk '{printf "%.2f MB", $1/1024/1024}') MSG="${MSG}
libcuml++.so size: $LIBCUML_FS" fi BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIBCUML_BUILD_DIR}"} From a531b8b3842a80af0a77b24f17c6b1e6086873ff Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 27 May 2025 22:14:07 -0700 Subject: [PATCH 6/8] correct linux command --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index e8943892ad..dfc6cfedd9 100755 --- a/build.sh +++ b/build.sh @@ -330,7 +330,7 @@ if (! hasArg --configure-only) && (completeBuild || hasArg libcuml || hasArg pri MSG="${MSG}
parallel setting: $PARALLEL_LEVEL" MSG="${MSG}
parallel build time: $compile_total seconds" if [[ -f "${LIBCUML_BUILD_DIR}/libcuml++.so" ]]; then - LIBCUML_FS=$(stat -f %z ${LIBCUML_BUILD_DIR}/libcuml++.so | awk '{printf "%.2f MB", $1/1024/1024}') + LIBCUML_FS=$(stat -c %s ${LIBCUML_BUILD_DIR}/libcuml++.so | awk '{printf "%.2f MB", $1/1024/1024}') MSG="${MSG}
libcuml++.so size: $LIBCUML_FS" fi BMR_DIR=${RAPIDS_ARTIFACTS_DIR:-"${LIBCUML_BUILD_DIR}"} From 99d3cbe1f31a7af372854fe5b959572ab3868618 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 11 Jun 2025 15:00:36 -0700 Subject: [PATCH 7/8] revert raft pin --- cpp/cmake/thirdparty/get_raft.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 486dd636f7..eaa262eb71 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -73,8 +73,8 @@ endfunction() # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft find_and_configure_raft(VERSION ${CUML_MIN_VERSION_raft} - FORK divyegala - PINNED_TAG reduction-kernels + FORK rapidsai + PINNED_TAG branch-${CUML_BRANCH_VERSION_raft} EXCLUDE_FROM_ALL ${CUML_EXCLUDE_RAFT_FROM_ALL} # When PINNED_TAG above doesn't match cuml, # force local raft clone in build directory From 7cf40cc06298d6197d8db9f33cf4f228fdf88696 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 11 Jun 2025 19:48:14 -0400 Subject: [PATCH 8/8] Update cpp/cmake/thirdparty/get_raft.cmake Co-authored-by: jakirkham --- cpp/cmake/thirdparty/get_raft.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index eaa262eb71..94c8392d9a 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -41,7 +41,6 @@ function(find_and_configure_raft) set(RAFT_NVTX ${PKG_NVTX}) message(VERBOSE "CUML: raft FIND_PACKAGE_ARGUMENTS COMPONENTS ${RAFT_COMPONENTS}") - # set(CPM_raft_SOURCE /raid/dgala/raft) rapids_cpm_find(raft ${PKG_VERSION} GLOBAL_TARGETS raft::raft