Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
2dd503b
pca-preprocessor
aamijar Feb 16, 2026
fb3c731
Merge branch 'main' into pca-preprocessor
aamijar Feb 16, 2026
14b08e7
IndexT tparam
aamijar Feb 16, 2026
c3f74ef
remove unused
aamijar Feb 16, 2026
a7f89a9
trailing return
aamijar Feb 16, 2026
e153fe1
Simplify paramsPCA
aamijar Feb 18, 2026
64491d1
Merge branch 'main' into pca-preprocessor
aamijar Feb 18, 2026
dbbb177
Merge branch 'main' into pca-preprocessor
aamijar Mar 4, 2026
f3a4007
Update cpp/tests/preprocessing/pca.cu
aamijar Mar 4, 2026
ff4e723
Apply suggestions from code review
aamijar Mar 4, 2026
99f32fc
remove verbose param
aamijar Mar 4, 2026
074fd96
remove commented out
aamijar Mar 4, 2026
c7c52a7
const params& config
aamijar Mar 4, 2026
0bfd500
remove comments
aamijar Mar 4, 2026
7c4fbd7
Merge branch 'main' into pca-preprocessor
aamijar Mar 4, 2026
2399406
Merge branch 'main' into pca-preprocessor
aamijar Mar 4, 2026
5144ab2
remove double apis
aamijar Mar 4, 2026
948882c
add new gtest and refactor
aamijar Mar 5, 2026
109dd08
Merge branch 'main' into pca-preprocessor
aamijar Mar 5, 2026
ef692f0
Merge branch 'main' into pca-preprocessor
divyegala Mar 5, 2026
465e546
Merge branch 'main' into pca-preprocessor
aamijar Mar 10, 2026
92919f4
Merge branch 'release/26.04' into pca-preprocessor
aamijar Mar 14, 2026
74fcc9d
Merge branch 'release/26.04' into pca-preprocessor
aamijar Mar 17, 2026
0bb8b21
update to_raft_params
aamijar Mar 18, 2026
56baa17
Merge branch 'release/26.04' into pca-preprocessor
aamijar Mar 18, 2026
60f1a7c
Merge branch 'release/26.04' into pca-preprocessor
aamijar Mar 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ if(NOT BUILD_CPU_ONLY)
src/preprocessing/quantize/binary.cu
src/preprocessing/quantize/pq.cu
src/preprocessing/spectral/spectral_embedding.cu
src/preprocessing/pca/pca.cu
src/selection/select_k_float_int64_t.cu
src/selection/select_k_float_int32_t.cu
src/selection/select_k_float_uint32_t.cu
Expand Down
185 changes: 185 additions & 0 deletions cpp/include/cuvs/preprocessing/pca.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <raft/core/device_mdspan.hpp>
#include <raft/core/resources.hpp>
#include <raft/linalg/pca_types.hpp>

namespace cuvs::preprocessing::pca {

using solver = raft::linalg::solver;

/**
* @brief Parameters for PCA decomposition. Ref:
* http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
*/
struct params {
/** @brief Number of components to keep. */
int n_components = 1;

/**
* @brief If false, data passed to fit are overwritten and running fit(X).transform(X) will
* not yield the expected results, use fit_transform(X) instead.
*/
bool copy = true;

/**
* @brief When true (false by default) the components vectors are multiplied by the square
* root of n_samples and then divided by the singular values to ensure uncorrelated outputs with
* unit component-wise variances.
*/
bool whiten = false;

/** @brief The solver algorithm to use. */
solver algorithm = solver::COV_EIG_DQ;

/**
* @brief Tolerance for singular values computed by svd_solver == 'arpack' or
* the Jacobi solver.
*/
float tol = 0.0f;

/**
* @brief Number of iterations for the power method computed by the Jacobi solver.
*/
int n_iterations = 15;
};

/**
* @defgroup pca PCA (Principal Component Analysis)
* @{
*/

/**
* @brief Perform PCA fit operation.
*
* Computes the principal components, explained variances, singular values, and column means
* from the input data.
*
* @code{.cpp}
* #include <raft/core/resources.hpp>
* #include <cuvs/preprocessing/pca.hpp>
*
* raft::resources handle;
*
* cuvs::preprocessing::pca::params params;
* params.n_components = 2;
*
* auto input = raft::make_device_matrix<float, int>(handle, n_rows, n_cols);
* // ... fill input ...
*
* auto components = raft::make_device_matrix<float, int, raft::col_major>(
* handle, params.n_components, n_cols);
* auto explained_var = raft::make_device_vector<float, int>(handle, params.n_components);
* auto explained_var_ratio = raft::make_device_vector<float, int>(handle, params.n_components);
* auto singular_vals = raft::make_device_vector<float, int>(handle, params.n_components);
* auto mu = raft::make_device_vector<float, int>(handle, n_cols);
* auto noise_vars = raft::make_device_scalar<float>(handle);
*
* cuvs::preprocessing::pca::fit(handle, params,
* input.view(), components.view(), explained_var.view(),
* explained_var_ratio.view(), singular_vals.view(), mu.view(), noise_vars.view());
* @endcode
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[inout] input input data [n_rows x n_cols] (col-major). Modified temporarily.
* @param[out] components principal components [n_components x n_cols] (col-major)
* @param[out] explained_var explained variances [n_components]
* @param[out] explained_var_ratio explained variance ratios [n_components]
* @param[out] singular_vals singular values [n_components]
* @param[out] mu column means [n_cols]
* @param[out] noise_vars noise variance (scalar)
* @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false)
*/
void fit(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> explained_var,
raft::device_vector_view<float, int64_t> explained_var_ratio,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_scalar_view<float, int64_t> noise_vars,
bool flip_signs_based_on_U = false);

Copy link
Copy Markdown
Member Author

@aamijar aamijar Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making a note here that I don't think the existing cuml implementation has the ability to tune the percentage of explained variance.
For example, in sklearn we can set 0 < n_components < 1 where the user can select a percentage of the explained variance to recover and the n_components is automatically determined by the algorithm in order to satisfy that.

We will have to build that piece out since it doesn't exist in the current implementation.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tuning is not what's being asked for. Exposing the explained variance is what's being requested (it is used for tuning / selecting the number of components but that's something the user does, not something we need to do).

/**
* @brief Perform PCA fit and transform operations.
*
* Computes the principal components and transforms the input data into the eigenspace
* in a single operation.
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[inout] input input data [n_rows x n_cols] (col-major). Modified temporarily.
* @param[out] trans_input transformed data [n_rows x n_components] (col-major)
* @param[out] components principal components [n_components x n_cols] (col-major)
* @param[out] explained_var explained variances [n_components]
* @param[out] explained_var_ratio explained variance ratios [n_components]
* @param[out] singular_vals singular values [n_components]
* @param[out] mu column means [n_cols]
* @param[out] noise_vars noise variance (scalar)
* @param[in] flip_signs_based_on_U whether to determine signs by U (true) or V.T (false)
*/
void fit_transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> explained_var,
raft::device_vector_view<float, int64_t> explained_var_ratio,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_scalar_view<float, int64_t> noise_vars,
bool flip_signs_based_on_U = false);

/**
* @brief Perform PCA transform operation.
*
* Transforms the input data into the eigenspace using previously computed principal components.
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[inout] input data to transform [n_rows x n_cols] (col-major). Modified temporarily
* (mean-centered then restored).
* @param[in] components principal components [n_components x n_cols] (col-major)
* @param[in] singular_vals singular values [n_components]
* @param[in] mu column means [n_cols]
* @param[out] trans_input transformed data [n_rows x n_components] (col-major)
*/
void transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input);

/**
* @brief Perform PCA inverse transform operation.
*
* Transforms data from the eigenspace back to the original space.
*
* @param[in] handle raft resource handle
* @param[in] config PCA parameters
* @param[in] trans_input transformed data [n_rows x n_components] (col-major)
* @param[in] components principal components [n_components x n_cols] (col-major)
* @param[in] singular_vals singular values [n_components]
* @param[in] mu column means [n_cols]
* @param[out] output reconstructed data [n_rows x n_cols] (col-major)
*/
void inverse_transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<float, int64_t, raft::col_major> trans_input,
raft::device_matrix_view<float, int64_t, raft::col_major> components,
raft::device_vector_view<float, int64_t> singular_vals,
raft::device_vector_view<float, int64_t> mu,
raft::device_matrix_view<float, int64_t, raft::col_major> output);

/** @} */ // end group pca

} // namespace cuvs::preprocessing::pca
109 changes: 109 additions & 0 deletions cpp/src/preprocessing/pca/detail/pca.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#pragma once

#include <cuvs/preprocessing/pca.hpp>

#include <raft/core/device_mdspan.hpp>
#include <raft/core/resources.hpp>
#include <raft/linalg/pca.cuh>

namespace cuvs::preprocessing::pca::detail {

/**
* @brief Convert cuvs::preprocessing::pca::params to raft::linalg::paramsPCA.
*/
inline auto to_raft_params(const params& config) -> raft::linalg::paramsPCA
{
raft::linalg::paramsPCA prms;
prms.algorithm = config.algorithm;
prms.tol = config.tol;
prms.n_iterations = config.n_iterations;
prms.copy = config.copy;
prms.whiten = config.whiten;
return prms;
Comment on lines +25 to +27
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
prms.copy = config.copy;
prms.whiten = config.whiten;
return prms;
prms.copy = config.copy;
prms.whiten = config.whiten;
prms.verbose = config.verbose;
return prms;

We are missing verbose here, no?

Copy link
Copy Markdown
Member Author

@aamijar aamijar Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

verbose was a unused parameter, removed it from pca.hpp in 99f32fc

}

template <typename DataT, typename IndexT>
void fit(raft::resources const& handle,
const params& config,
raft::device_matrix_view<DataT, IndexT, raft::col_major> input,
raft::device_matrix_view<DataT, IndexT, raft::col_major> components,
raft::device_vector_view<DataT, IndexT> explained_var,
raft::device_vector_view<DataT, IndexT> explained_var_ratio,
raft::device_vector_view<DataT, IndexT> singular_vals,
raft::device_vector_view<DataT, IndexT> mu,
raft::device_scalar_view<DataT, IndexT> noise_vars,
bool flip_signs_based_on_U)
{
auto raft_prms = to_raft_params(config);
raft::linalg::pca_fit(handle,
raft_prms,
input,
components,
explained_var,
explained_var_ratio,
singular_vals,
mu,
noise_vars,
flip_signs_based_on_U);
}

template <typename DataT, typename IndexT>
void fit_transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<DataT, IndexT, raft::col_major> input,
raft::device_matrix_view<DataT, IndexT, raft::col_major> trans_input,
raft::device_matrix_view<DataT, IndexT, raft::col_major> components,
raft::device_vector_view<DataT, IndexT> explained_var,
raft::device_vector_view<DataT, IndexT> explained_var_ratio,
raft::device_vector_view<DataT, IndexT> singular_vals,
raft::device_vector_view<DataT, IndexT> mu,
raft::device_scalar_view<DataT, IndexT> noise_vars,
bool flip_signs_based_on_U)
{
auto raft_prms = to_raft_params(config);
raft::linalg::pca_fit_transform(handle,
raft_prms,
input,
trans_input,
components,
explained_var,
explained_var_ratio,
singular_vals,
mu,
noise_vars,
flip_signs_based_on_U);
}

template <typename DataT, typename IndexT>
void transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<DataT, IndexT, raft::col_major> input,
raft::device_matrix_view<DataT, IndexT, raft::col_major> components,
raft::device_vector_view<DataT, IndexT> singular_vals,
raft::device_vector_view<DataT, IndexT> mu,
raft::device_matrix_view<DataT, IndexT, raft::col_major> trans_input)
{
auto raft_prms = to_raft_params(config);
raft::linalg::pca_transform(handle, raft_prms, input, components, singular_vals, mu, trans_input);
}

template <typename DataT, typename IndexT>
void inverse_transform(raft::resources const& handle,
const params& config,
raft::device_matrix_view<DataT, IndexT, raft::col_major> trans_input,
raft::device_matrix_view<DataT, IndexT, raft::col_major> components,
raft::device_vector_view<DataT, IndexT> singular_vals,
raft::device_vector_view<DataT, IndexT> mu,
raft::device_matrix_view<DataT, IndexT, raft::col_major> output)
{
auto raft_prms = to_raft_params(config);
raft::linalg::pca_inverse_transform(
handle, raft_prms, trans_input, components, singular_vals, mu, output);
}

} // namespace cuvs::preprocessing::pca::detail
Loading
Loading