@@ -264,6 +264,51 @@ struct extend_params {
264264 * 0. */
265265 uint32_t max_chunk_size = 0 ;
266266};
267+ /* *
268+ * @}
269+ */
270+
271+ /* *
272+ * @defgroup cagra_cpp_merge_params CAGRA index merge parameters
273+ * @{
274+ */
275+
276+ /* *
277+ * @brief Determines the strategy for merging CAGRA graphs.
278+ *
279+ * @note Currently, only the PHYSICAL strategy is supported.
280+ */
281+ enum MergeStrategy {
282+ /* *
283+ * @brief Physical merge: Builds a new CAGRA graph from the union of dataset points
284+ * in existing CAGRA graphs.
285+ *
286+ * This is expensive to build but does not impact search latency or quality.
287+ * Preferred for many smaller CAGRA graphs.
288+ *
289+ * @note Currently, this is the only supported strategy.
290+ */
291+ PHYSICAL
292+ };
293+
294+ /* *
295+ * @brief Parameters for merging CAGRA indexes.
296+ */
297+ struct merge_params {
298+ merge_params () = default ;
299+
300+ /* *
301+ * @brief Constructs merge parameters with given index parameters.
302+ * @param params Parameters for creating the output index.
303+ */
304+ explicit merge_params (const cagra::index_params& params) : output_index_params(params) {}
305+
306+ // / Parameters for creating the output index.
307+ cagra::index_params output_index_params;
308+
309+ // / Strategy for merging. Defaults to `MergeStrategy::PHYSICAL`.
310+ MergeStrategy strategy = MergeStrategy::PHYSICAL;
311+ };
267312
268313/* *
269314 * @}
@@ -1794,6 +1839,150 @@ void serialize_to_hnswlib(
17941839 std::optional<raft::host_matrix_view<const uint8_t , int64_t , raft::row_major>> dataset =
17951840 std::nullopt );
17961841
1842+ /* *
1843+ * @defgroup cagra_cpp_index_merge CAGRA index build functions
1844+ * @{
1845+ */
1846+
1847+ /* * @brief Merge multiple CAGRA indices into a single index.
1848+ *
1849+ * This function merges multiple CAGRA indices into one, combining both the datasets and graph
1850+ * structures.
1851+ *
1852+ * @note: When device memory is sufficient, the dataset attached to the returned index is allocated
1853+ * in device memory by default; otherwise, host memory is used automatically.
1854+ *
1855+ * Usage example:
1856+ * @code{.cpp}
1857+ * using namespace raft::neighbors;
1858+ * auto dataset0 = raft::make_host_matrix<float, int64_t>(handle, size0, dim);
1859+ * auto dataset1 = raft::make_host_matrix<float, int64_t>(handle, size1, dim);
1860+ *
1861+ * auto index0 = cagra::build(res, index_params, dataset0);
1862+ * auto index1 = cagra::build(res, index_params, dataset1);
1863+ *
1864+ * std::vector<cagra::index<float, uint32_t>*> indices{&index0, &index1};
1865+ * cagra::merge_params params{index_params};
1866+ *
1867+ * auto merged_index = cagra::merge(res, params, indices);
1868+ * @endcode
1869+ *
1870+ * @param[in] res RAFT resources used for the merge operation.
1871+ * @param[in] params Parameters that control the merging process.
1872+ * @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must:
1873+ * - Have attached datasets with the same dimension.
1874+ *
1875+ * @return A new CAGRA index containing the merged indices, graph, and dataset.
1876+ */
1877+ auto merge (raft::resources const & res,
1878+ const cuvs::neighbors::cagra::merge_params& params,
1879+ std::vector<cuvs::neighbors::cagra::index<float , uint32_t >*>& indices)
1880+ -> cuvs::neighbors::cagra::index<float, uint32_t>;
1881+
1882+ /* * @brief Merge multiple CAGRA indices into a single index.
1883+ *
1884+ * This function merges multiple CAGRA indices into one, combining both the datasets and graph
1885+ * structures.
1886+ *
1887+ * @note: When device memory is sufficient, the dataset attached to the returned index is allocated
1888+ * in device memory by default; otherwise, host memory is used automatically.
1889+ *
1890+ * Usage example:
1891+ * @code{.cpp}
1892+ * using namespace raft::neighbors;
1893+ * auto dataset0 = raft::make_host_matrix<half, int64_t>(handle, size0, dim);
1894+ * auto dataset1 = raft::make_host_matrix<half, int64_t>(handle, size1, dim);
1895+ *
1896+ * auto index0 = cagra::build(res, index_params, dataset0);
1897+ * auto index1 = cagra::build(res, index_params, dataset1);
1898+ *
1899+ * std::vector<cagra::index<half, uint32_t>*> indices{&index0, &index1};
1900+ * cagra::merge_params params{index_params};
1901+ *
1902+ * auto merged_index = cagra::merge(res, params, indices);
1903+ * @endcode
1904+ *
1905+ * @param[in] res RAFT resources used for the merge operation.
1906+ * @param[in] params Parameters that control the merging process.
1907+ * @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must:
1908+ * - Have attached datasets with the same dimension.
1909+ *
1910+ * @return A new CAGRA index containing the merged indices, graph, and dataset.
1911+ */
1912+ auto merge (raft::resources const & res,
1913+ const cuvs::neighbors::cagra::merge_params& params,
1914+ std::vector<cuvs::neighbors::cagra::index<half, uint32_t >*>& indices)
1915+ -> cuvs::neighbors::cagra::index<half, uint32_t>;
1916+
1917+ /* * @brief Merge multiple CAGRA indices into a single index.
1918+ *
1919+ * This function merges multiple CAGRA indices into one, combining both the datasets and graph
1920+ * structures.
1921+ *
1922+ * @note: When device memory is sufficient, the dataset attached to the returned index is allocated
1923+ * in device memory by default; otherwise, host memory is used automatically.
1924+ *
1925+ * Usage example:
1926+ * @code{.cpp}
1927+ * using namespace raft::neighbors;
1928+ * auto dataset0 = raft::make_host_matrix<int8_t, int64_t>(handle, size0, dim);
1929+ * auto dataset1 = raft::make_host_matrix<int8_t, int64_t>(handle, size1, dim);
1930+ *
1931+ * auto index0 = cagra::build(res, index_params, dataset0);
1932+ * auto index1 = cagra::build(res, index_params, dataset1);
1933+ *
1934+ * std::vector<cagra::index<int8_t, uint32_t>*> indices{&index0, &index1};
1935+ * cagra::merge_params params{index_params};
1936+ *
1937+ * auto merged_index = cagra::merge(res, params, indices);
1938+ * @endcode
1939+ *
1940+ * @param[in] res RAFT resources used for the merge operation.
1941+ * @param[in] params Parameters that control the merging process.
1942+ * @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must:
1943+ * - Have attached datasets with the same dimension.
1944+ *
1945+ * @return A new CAGRA index containing the merged indices, graph, and dataset.
1946+ */
1947+ auto merge (raft::resources const & res,
1948+ const cuvs::neighbors::cagra::merge_params& params,
1949+ std::vector<cuvs::neighbors::cagra::index<int8_t , uint32_t >*>& indices)
1950+ -> cuvs::neighbors::cagra::index<int8_t, uint32_t>;
1951+
1952+ /* * @brief Merge multiple CAGRA indices into a single index.
1953+ *
1954+ * This function merges multiple CAGRA indices into one, combining both the datasets and graph
1955+ * structures.
1956+ *
1957+ * @note: When device memory is sufficient, the dataset attached to the returned index is allocated
1958+ * in device memory by default; otherwise, host memory is used automatically.
1959+ *
1960+ * Usage example:
1961+ * @code{.cpp}
1962+ * using namespace raft::neighbors;
1963+ * auto dataset0 = raft::make_host_matrix<uint8_t, int64_t>(handle, size0, dim);
1964+ * auto dataset1 = raft::make_host_matrix<uint8_t, int64_t>(handle, size1, dim);
1965+ *
1966+ * auto index0 = cagra::build(res, index_params, dataset0);
1967+ * auto index1 = cagra::build(res, index_params, dataset1);
1968+ *
1969+ * std::vector<cagra::index<uint8_t, uint32_t>*> indices{&index0, &index1};
1970+ * cagra::merge_params params{index_params};
1971+ *
1972+ * auto merged_index = cagra::merge(res, params, indices);
1973+ * @endcode
1974+ *
1975+ * @param[in] res RAFT resources used for the merge operation.
1976+ * @param[in] params Parameters that control the merging process.
1977+ * @param[in] indices A vector of pointers to the CAGRA indices to merge. All indices must:
1978+ * - Have attached datasets with the same dimension.
1979+ *
1980+ * @return A new CAGRA index containing the merged indices, graph, and dataset.
1981+ */
1982+ auto merge (raft::resources const & res,
1983+ const cuvs::neighbors::cagra::merge_params& params,
1984+ std::vector<cuvs::neighbors::cagra::index<uint8_t , uint32_t >*>& indices)
1985+ -> cuvs::neighbors::cagra::index<uint8_t, uint32_t>;
17971986/* *
17981987 * @}
17991988 */
0 commit comments