Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 124 additions & 3 deletions cpp/include/cuvs/preprocessing/spectral_embedding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,18 @@

namespace cuvs::preprocessing::spectral_embedding {

/**
* @defgroup spectral_embedding Spectral Embedding
* @{
*/

/**
* @brief Parameters for spectral embedding algorithm
*
* Spectral embedding is a dimensionality reduction technique that uses the
* eigenvectors of the graph Laplacian to embed data points into a lower-dimensional
* space. This technique is particularly useful for non-linear dimensionality
* reduction and clustering tasks.
*/
struct params {
/** @brief The number of components to reduce the data to. */
Expand All @@ -33,24 +43,135 @@ struct params {
/** @brief The number of neighbors to use for the nearest neighbors graph. */
int n_neighbors;

/** @brief Whether to normalize the Laplacian matrix. */
/**
* @brief Whether to normalize the Laplacian matrix.
*
* If true, uses the normalized graph Laplacian (D^(-1/2) L D^(-1/2)).
* If false, uses the unnormalized graph Laplacian (L = D - W).
* Normalized Laplacian often leads to better results for clustering tasks.
*/
bool norm_laplacian;

/** @brief Whether to drop the first eigenvector. */
/**
* @brief Whether to drop the first eigenvector.
*
* The first eigenvector of the normalized Laplacian is constant and
* uninformative. Setting this to true drops it from the embedding.
* This is typically set to true when norm_laplacian is true.
*/
bool drop_first;

/** @brief Random seed for reproducibility */
/**
* @brief Random seed for reproducibility.
*
* Controls the random number generation for k-NN graph construction
* and eigenvalue solver initialization. Use the same seed value to
* ensure reproducible results across runs.
*/
uint64_t seed;
};

/**
* @brief Perform spectral embedding on input dataset
*
* This function computes the spectral embedding of the input dataset by:
* 1. Constructing a k-nearest neighbors graph from the input data
* 2. Computing the graph Laplacian (normalized or unnormalized)
* 3. Finding the eigenvectors corresponding to the smallest eigenvalues
* 4. Using these eigenvectors as the embedding coordinates
*
* @code{.cpp}
* #include <raft/core/resources.hpp>
* #include <cuvs/preprocessing/spectral_embedding.hpp>
*
* raft::resources handle;
*
* // Set up parameters
* cuvs::preprocessing::spectral_embedding::params params;
* params.n_components = 2;
* params.n_neighbors = 15;
* params.norm_laplacian = true;
* params.drop_first = true;
* params.seed = 42;
*
* // Create input dataset (n_samples x n_features)
* auto dataset = raft::make_device_matrix<float, int>(handle, n_samples, n_features);
* // ... fill dataset ...
*
* // Create output embedding matrix (n_samples x n_components)
* auto embedding = raft::make_device_matrix<float, int, raft::col_major>(
* handle, n_samples, params.n_components);
*
* // Perform spectral embedding
* cuvs::preprocessing::spectral_embedding::transform(
* handle, params, dataset.view(), embedding.view());
* @endcode
*
* @param[in] handle RAFT resource handle for managing CUDA resources
* @param[in] config Parameters controlling the spectral embedding algorithm
* @param[in] dataset Input dataset in row-major format [n_samples x n_features]
* @param[out] embedding Output embedding in column-major format [n_samples x n_components]
*
*/
void transform(raft::resources const& handle,
params config,
raft::device_matrix_view<float, int, raft::row_major> dataset,
raft::device_matrix_view<float, int, raft::col_major> embedding);

/**
* @brief Perform spectral embedding using a precomputed connectivity graph
*
* This function computes the spectral embedding from a precomputed sparse
* connectivity graph (e.g., from a k-NN search or custom similarity matrix).
* This is useful when you want to use a custom graph construction method
* or when you have a precomputed similarity/affinity matrix.
*
* The function:
* 1. Converts the COO matrix to the graph Laplacian
* 2. Computes eigenvectors of the Laplacian
* 3. Returns the eigenvectors as the embedding
*
* @code{.cpp}
* #include <raft/core/resources.hpp>
* #include <cuvs/preprocessing/spectral_embedding.hpp>
*
* raft::resources handle;
*
* // Set up parameters
* cuvs::preprocessing::spectral_embedding::params params;
* params.n_components = 2;
* params.norm_laplacian = true;
* params.drop_first = true;
* params.seed = 42;
*
* // Assume we have a precomputed connectivity graph as COO matrix
* // connectivity_graph represents weighted edges between samples
* raft::device_coo_matrix<float, int, int, int> connectivity_graph(...);
*
* // Create output embedding matrix (n_samples x n_components)
* auto embedding = raft::make_device_matrix<float, int, raft::col_major>(
* handle, n_samples, params.n_components);
*
* // Perform spectral embedding
* cuvs::preprocessing::spectral_embedding::transform(
* handle, params, connectivity_graph.view(), embedding.view());
* @endcode
*
* @param[in] handle RAFT resource handle for managing CUDA resources
* @param[in] config Parameters controlling the spectral embedding algorithm
* (n_neighbors parameter is ignored when using precomputed graph)
* @param[in] connectivity_graph Precomputed sparse connectivity/affinity graph in COO format
* representing weighted connections between samples
* @param[out] embedding Output embedding in column-major format [n_samples x n_components]
*
*/
void transform(raft::resources const& handle,
params config,
raft::device_coo_matrix_view<float, int, int, int> connectivity_graph,
raft::device_matrix_view<float, int, raft::col_major> embedding);

/**
* @}
*/

} // namespace cuvs::preprocessing::spectral_embedding
1 change: 1 addition & 0 deletions docs/source/cpp_api/preprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Preprocessing
:caption: Contents:

preprocessing_quantize.rst
preprocessing_spectral_embedding.rst
108 changes: 108 additions & 0 deletions docs/source/cpp_api/preprocessing_spectral_embedding.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
Spectral Embedding
==================

Spectral embedding is a powerful dimensionality reduction technique that uses the eigenvectors
of the graph Laplacian to embed high-dimensional data into a lower-dimensional space. This
method is particularly effective for discovering non-linear manifold structures in data and
is widely used in clustering, visualization, and feature extraction tasks.

.. role:: py(code)
:language: c++
:class: highlight

Overview
--------

The spectral embedding algorithm works by:

1. **Graph Construction**: Building a k-nearest neighbors graph from the input data
2. **Laplacian Computation**: Computing the graph Laplacian matrix (normalized or unnormalized)
3. **Eigendecomposition**: Finding the eigenvectors corresponding to the smallest eigenvalues
4. **Embedding**: Using these eigenvectors as coordinates in the lower-dimensional space

Parameters
----------

``#include <cuvs/preprocessing/spectral_embedding.hpp>``

namespace *cuvs::preprocessing::spectral_embedding*

.. doxygenstruct:: cuvs::preprocessing::spectral_embedding::params
:project: cuvs
:members:

Functions
---------

``#include <cuvs/preprocessing/spectral_embedding.hpp>``

namespace *cuvs::preprocessing::spectral_embedding*

.. doxygengroup:: spectral_embedding
:project: cuvs
:content-only:

Example Usage
-------------

Basic Usage with Dataset
~~~~~~~~~~~~~~~~~~~~~~~~

.. code-block:: cpp

#include <raft/core/resources.hpp>
#include <cuvs/preprocessing/spectral_embedding.hpp>

// Initialize RAFT resources
raft::resources handle;

// Configure spectral embedding parameters
cuvs::preprocessing::spectral_embedding::params params;
params.n_components = 2; // Reduce to 2D for visualization
params.n_neighbors = 15; // Local neighborhood size
params.norm_laplacian = true; // Use normalized Laplacian
params.drop_first = true; // Drop constant eigenvector
params.seed = 42; // For reproducibility

// Create input dataset (n_samples x n_features)
int n_samples = 1000;
int n_features = 50;
auto dataset = raft::make_device_matrix<float, int>(handle, n_samples, n_features);
// ... populate dataset with your data ...

// Allocate output embedding matrix (n_samples x n_components)
auto embedding = raft::make_device_matrix<float, int, raft::col_major>(
handle, n_samples, params.n_components);

// Perform spectral embedding
cuvs::preprocessing::spectral_embedding::transform(
handle, params, dataset.view(), embedding.view());

Using Precomputed Graph
~~~~~~~~~~~~~~~~~~~~~~~

.. code-block:: cpp

#include <raft/core/resources.hpp>
#include <cuvs/preprocessing/spectral_embedding.hpp>

raft::resources handle;

// Configure parameters (n_neighbors is ignored with precomputed graph)
cuvs::preprocessing::spectral_embedding::params params;
params.n_components = 3;
params.norm_laplacian = true;
params.drop_first = true;
params.seed = 42;

// Assume we have a precomputed connectivity graph
// This could be from custom similarity computation or k-NN search
raft::device_coo_matrix<float, int, int, int> connectivity_graph(...);

// Allocate output embedding
auto embedding = raft::make_device_matrix<float, int, raft::col_major>(
handle, n_samples, params.n_components);

// Perform spectral embedding with precomputed graph
cuvs::preprocessing::spectral_embedding::transform(
handle, params, connectivity_graph.view(), embedding.view());