Skip to content

Commit 587f975

Browse files
authored
Docs for spectral embedding (#1299)
Resolves #1298 Authors: - Anupam (https://github.com/aamijar) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: #1299
1 parent bcf9025 commit 587f975

3 files changed

Lines changed: 233 additions & 3 deletions

File tree

cpp/include/cuvs/preprocessing/spectral_embedding.hpp

Lines changed: 124 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,18 @@
2323

2424
namespace cuvs::preprocessing::spectral_embedding {
2525

26+
/**
27+
* @defgroup spectral_embedding Spectral Embedding
28+
* @{
29+
*/
30+
2631
/**
2732
* @brief Parameters for spectral embedding algorithm
33+
*
34+
* Spectral embedding is a dimensionality reduction technique that uses the
35+
* eigenvectors of the graph Laplacian to embed data points into a lower-dimensional
36+
* space. This technique is particularly useful for non-linear dimensionality
37+
* reduction and clustering tasks.
2838
*/
2939
struct params {
3040
/** @brief The number of components to reduce the data to. */
@@ -33,24 +43,135 @@ struct params {
3343
/** @brief The number of neighbors to use for the nearest neighbors graph. */
3444
int n_neighbors;
3545

36-
/** @brief Whether to normalize the Laplacian matrix. */
46+
/**
47+
* @brief Whether to normalize the Laplacian matrix.
48+
*
49+
* If true, uses the normalized graph Laplacian (D^(-1/2) L D^(-1/2)).
50+
* If false, uses the unnormalized graph Laplacian (L = D - W).
51+
* Normalized Laplacian often leads to better results for clustering tasks.
52+
*/
3753
bool norm_laplacian;
3854

39-
/** @brief Whether to drop the first eigenvector. */
55+
/**
56+
* @brief Whether to drop the first eigenvector.
57+
*
58+
* The first eigenvector of the normalized Laplacian is constant and
59+
* uninformative. Setting this to true drops it from the embedding.
60+
* This is typically set to true when norm_laplacian is true.
61+
*/
4062
bool drop_first;
4163

42-
/** @brief Random seed for reproducibility */
64+
/**
65+
* @brief Random seed for reproducibility.
66+
*
67+
* Controls the random number generation for k-NN graph construction
68+
* and eigenvalue solver initialization. Use the same seed value to
69+
* ensure reproducible results across runs.
70+
*/
4371
uint64_t seed;
4472
};
4573

74+
/**
75+
* @brief Perform spectral embedding on input dataset
76+
*
77+
* This function computes the spectral embedding of the input dataset by:
78+
* 1. Constructing a k-nearest neighbors graph from the input data
79+
* 2. Computing the graph Laplacian (normalized or unnormalized)
80+
* 3. Finding the eigenvectors corresponding to the smallest eigenvalues
81+
* 4. Using these eigenvectors as the embedding coordinates
82+
*
83+
* @code{.cpp}
84+
* #include <raft/core/resources.hpp>
85+
* #include <cuvs/preprocessing/spectral_embedding.hpp>
86+
*
87+
* raft::resources handle;
88+
*
89+
* // Set up parameters
90+
* cuvs::preprocessing::spectral_embedding::params params;
91+
* params.n_components = 2;
92+
* params.n_neighbors = 15;
93+
* params.norm_laplacian = true;
94+
* params.drop_first = true;
95+
* params.seed = 42;
96+
*
97+
* // Create input dataset (n_samples x n_features)
98+
* auto dataset = raft::make_device_matrix<float, int>(handle, n_samples, n_features);
99+
* // ... fill dataset ...
100+
*
101+
* // Create output embedding matrix (n_samples x n_components)
102+
* auto embedding = raft::make_device_matrix<float, int, raft::col_major>(
103+
* handle, n_samples, params.n_components);
104+
*
105+
* // Perform spectral embedding
106+
* cuvs::preprocessing::spectral_embedding::transform(
107+
* handle, params, dataset.view(), embedding.view());
108+
* @endcode
109+
*
110+
* @param[in] handle RAFT resource handle for managing CUDA resources
111+
* @param[in] config Parameters controlling the spectral embedding algorithm
112+
* @param[in] dataset Input dataset in row-major format [n_samples x n_features]
113+
* @param[out] embedding Output embedding in column-major format [n_samples x n_components]
114+
*
115+
*/
46116
void transform(raft::resources const& handle,
47117
params config,
48118
raft::device_matrix_view<float, int, raft::row_major> dataset,
49119
raft::device_matrix_view<float, int, raft::col_major> embedding);
50120

121+
/**
122+
* @brief Perform spectral embedding using a precomputed connectivity graph
123+
*
124+
* This function computes the spectral embedding from a precomputed sparse
125+
* connectivity graph (e.g., from a k-NN search or custom similarity matrix).
126+
* This is useful when you want to use a custom graph construction method
127+
* or when you have a precomputed similarity/affinity matrix.
128+
*
129+
* The function:
130+
* 1. Converts the COO matrix to the graph Laplacian
131+
* 2. Computes eigenvectors of the Laplacian
132+
* 3. Returns the eigenvectors as the embedding
133+
*
134+
* @code{.cpp}
135+
* #include <raft/core/resources.hpp>
136+
* #include <cuvs/preprocessing/spectral_embedding.hpp>
137+
*
138+
* raft::resources handle;
139+
*
140+
* // Set up parameters
141+
* cuvs::preprocessing::spectral_embedding::params params;
142+
* params.n_components = 2;
143+
* params.norm_laplacian = true;
144+
* params.drop_first = true;
145+
* params.seed = 42;
146+
*
147+
* // Assume we have a precomputed connectivity graph as COO matrix
148+
* // connectivity_graph represents weighted edges between samples
149+
* raft::device_coo_matrix<float, int, int, int> connectivity_graph(...);
150+
*
151+
* // Create output embedding matrix (n_samples x n_components)
152+
* auto embedding = raft::make_device_matrix<float, int, raft::col_major>(
153+
* handle, n_samples, params.n_components);
154+
*
155+
* // Perform spectral embedding
156+
* cuvs::preprocessing::spectral_embedding::transform(
157+
* handle, params, connectivity_graph.view(), embedding.view());
158+
* @endcode
159+
*
160+
* @param[in] handle RAFT resource handle for managing CUDA resources
161+
* @param[in] config Parameters controlling the spectral embedding algorithm
162+
* (n_neighbors parameter is ignored when using precomputed graph)
163+
* @param[in] connectivity_graph Precomputed sparse connectivity/affinity graph in COO format
164+
* representing weighted connections between samples
165+
* @param[out] embedding Output embedding in column-major format [n_samples x n_components]
166+
*
167+
*/
51168
void transform(raft::resources const& handle,
52169
params config,
53170
raft::device_coo_matrix_view<float, int, int, int> connectivity_graph,
54171
raft::device_matrix_view<float, int, raft::col_major> embedding);
55172

173+
/**
174+
* @}
175+
*/
176+
56177
} // namespace cuvs::preprocessing::spectral_embedding

docs/source/cpp_api/preprocessing.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ Preprocessing
1010
:caption: Contents:
1111

1212
preprocessing_quantize.rst
13+
preprocessing_spectral_embedding.rst
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
Spectral Embedding
2+
==================
3+
4+
Spectral embedding is a powerful dimensionality reduction technique that uses the eigenvectors
5+
of the graph Laplacian to embed high-dimensional data into a lower-dimensional space. This
6+
method is particularly effective for discovering non-linear manifold structures in data and
7+
is widely used in clustering, visualization, and feature extraction tasks.
8+
9+
.. role:: py(code)
10+
:language: c++
11+
:class: highlight
12+
13+
Overview
14+
--------
15+
16+
The spectral embedding algorithm works by:
17+
18+
1. **Graph Construction**: Building a k-nearest neighbors graph from the input data
19+
2. **Laplacian Computation**: Computing the graph Laplacian matrix (normalized or unnormalized)
20+
3. **Eigendecomposition**: Finding the eigenvectors corresponding to the smallest eigenvalues
21+
4. **Embedding**: Using these eigenvectors as coordinates in the lower-dimensional space
22+
23+
Parameters
24+
----------
25+
26+
``#include <cuvs/preprocessing/spectral_embedding.hpp>``
27+
28+
namespace *cuvs::preprocessing::spectral_embedding*
29+
30+
.. doxygenstruct:: cuvs::preprocessing::spectral_embedding::params
31+
:project: cuvs
32+
:members:
33+
34+
Functions
35+
---------
36+
37+
``#include <cuvs/preprocessing/spectral_embedding.hpp>``
38+
39+
namespace *cuvs::preprocessing::spectral_embedding*
40+
41+
.. doxygengroup:: spectral_embedding
42+
:project: cuvs
43+
:content-only:
44+
45+
Example Usage
46+
-------------
47+
48+
Basic Usage with Dataset
49+
~~~~~~~~~~~~~~~~~~~~~~~~
50+
51+
.. code-block:: cpp
52+
53+
#include <raft/core/resources.hpp>
54+
#include <cuvs/preprocessing/spectral_embedding.hpp>
55+
56+
// Initialize RAFT resources
57+
raft::resources handle;
58+
59+
// Configure spectral embedding parameters
60+
cuvs::preprocessing::spectral_embedding::params params;
61+
params.n_components = 2; // Reduce to 2D for visualization
62+
params.n_neighbors = 15; // Local neighborhood size
63+
params.norm_laplacian = true; // Use normalized Laplacian
64+
params.drop_first = true; // Drop constant eigenvector
65+
params.seed = 42; // For reproducibility
66+
67+
// Create input dataset (n_samples x n_features)
68+
int n_samples = 1000;
69+
int n_features = 50;
70+
auto dataset = raft::make_device_matrix<float, int>(handle, n_samples, n_features);
71+
// ... populate dataset with your data ...
72+
73+
// Allocate output embedding matrix (n_samples x n_components)
74+
auto embedding = raft::make_device_matrix<float, int, raft::col_major>(
75+
handle, n_samples, params.n_components);
76+
77+
// Perform spectral embedding
78+
cuvs::preprocessing::spectral_embedding::transform(
79+
handle, params, dataset.view(), embedding.view());
80+
81+
Using Precomputed Graph
82+
~~~~~~~~~~~~~~~~~~~~~~~
83+
84+
.. code-block:: cpp
85+
86+
#include <raft/core/resources.hpp>
87+
#include <cuvs/preprocessing/spectral_embedding.hpp>
88+
89+
raft::resources handle;
90+
91+
// Configure parameters (n_neighbors is ignored with precomputed graph)
92+
cuvs::preprocessing::spectral_embedding::params params;
93+
params.n_components = 3;
94+
params.norm_laplacian = true;
95+
params.drop_first = true;
96+
params.seed = 42;
97+
98+
// Assume we have a precomputed connectivity graph
99+
// This could be from custom similarity computation or k-NN search
100+
raft::device_coo_matrix<float, int, int, int> connectivity_graph(...);
101+
102+
// Allocate output embedding
103+
auto embedding = raft::make_device_matrix<float, int, raft::col_major>(
104+
handle, n_samples, params.n_components);
105+
106+
// Perform spectral embedding with precomputed graph
107+
cuvs::preprocessing::spectral_embedding::transform(
108+
handle, params, connectivity_graph.view(), embedding.view());

0 commit comments

Comments
 (0)