diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 59b8e00de1..d1f633248f 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -47,6 +47,19 @@ jobs: node_type: "gpu-v100-latest-1" run_script: "ci/build_rust.sh" sha: ${{ inputs.sha }} + go-build: + needs: cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + arch: "amd64" + date: ${{ inputs.date }} + container_image: "rapidsai/ci-conda:latest" + node_type: "gpu-v100-latest-1" + run_script: "ci/build_go.sh" + sha: ${{ inputs.sha }} python-build: needs: [cpp-build] secrets: inherit diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 233b82f94b..832932d99e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -22,6 +22,7 @@ jobs: - conda-python-tests - docs-build - rust-build + - go-build - wheel-build-libcuvs - wheel-build-cuvs - wheel-tests-cuvs @@ -70,6 +71,7 @@ jobs: - '!notebooks/**' - '!python/**' - '!rust/**' + - '!go/**' - '!thirdparty/LICENSES/**' test_notebooks: - '**' @@ -77,6 +79,7 @@ jobs: - '!.pre-commit-config.yaml' - '!README.md' - '!rust/**' + - '!go/**' - '!thirdparty/LICENSES/**' test_python: - '**' @@ -87,6 +90,7 @@ jobs: - '!img/**' - '!notebooks/**' - '!rust/**' + - '!go/**' - '!thirdparty/LICENSES/**' checks: needs: telemetry-setup @@ -150,6 +154,16 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_rust.sh" + go-build: + needs: conda-cpp-build + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: "ci/build_go.sh" wheel-build-libcuvs: needs: checks secrets: inherit diff --git a/build.sh b/build.sh index 89e1b5a337..4aaa5617c3 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,7 @@ ARGS=$* # scripts, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcuvs python rust java docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h" +VALIDARGS="clean libcuvs python rust go java docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h" HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-ann=] [--build-metrics=] where is: clean - remove all existing build artifacts and configuration (start over) @@ -26,6 +26,7 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool==0.8,<1.0 - doxygen>=1.8.20 - gcc_linux-aarch64=11.* +- go - graphviz - ipython - libclang==16.0.6 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index c6a65e6848..cf73f4b745 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -23,6 +23,7 @@ dependencies: - dlpack>=0.8,<1.0 - doxygen>=1.8.20 - gcc_linux-64=11.* +- go - graphviz - ipython - libclang==16.0.6 diff --git a/conda/environments/all_cuda-128_arch-aarch64.yaml b/conda/environments/all_cuda-128_arch-aarch64.yaml index c508a9dc19..f161975614 100644 --- a/conda/environments/all_cuda-128_arch-aarch64.yaml +++ b/conda/environments/all_cuda-128_arch-aarch64.yaml @@ -24,6 +24,7 @@ dependencies: - dlpack>=0.8,<1.0 - doxygen>=1.8.20 - gcc_linux-aarch64=13.* +- go - graphviz - ipython - libclang==16.0.6 diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml index f043e544bb..5056ef1854 100644 --- a/conda/environments/all_cuda-128_arch-x86_64.yaml +++ b/conda/environments/all_cuda-128_arch-x86_64.yaml @@ -24,6 +24,7 @@ dependencies: - dlpack>=0.8,<1.0 - doxygen>=1.8.20 - gcc_linux-64=13.* +- go - graphviz - ipython - libclang==16.0.6 diff --git a/dependencies.yaml b/dependencies.yaml index 7a50c0d469..cfa63250da 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -21,6 +21,7 @@ files: - rapids_build - run_py_cuvs - rust + - go - test_libcuvs - test_python_common - test_py_cuvs @@ -81,6 +82,14 @@ files: - cuda_version - rapids_build - rust + go: + output: none + includes: + - clang + - cuda + - cuda_version + - rapids_build + - go py_build_libcuvs: output: pyproject pyproject_dir: python/libcuvs @@ -470,6 +479,12 @@ dependencies: packages: - make - rust + go: + common: + - output_types: [conda] + packages: + - go + - dlpack>=0.8,<1.0 build_wheels: common: - output_types: [requirements, pyproject] diff --git a/go/brute_force/brute_force.go b/go/brute_force/brute_force.go new file mode 100644 index 0000000000..29f1a5d958 --- /dev/null +++ b/go/brute_force/brute_force.go @@ -0,0 +1,83 @@ +package brute_force + +// #include +import "C" + +import ( + "errors" + "unsafe" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// Brute Force KNN Index +type BruteForceIndex struct { + index C.cuvsBruteForceIndex_t + trained bool +} + +// Creates a new empty Brute Force KNN Index +func CreateIndex() (*BruteForceIndex, error) { + var index C.cuvsBruteForceIndex_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceIndexCreate(&index))) + if err != nil { + return nil, err + } + + return &BruteForceIndex{index: index, trained: false}, nil +} + +// Destroys the Brute Force KNN Index +func (index *BruteForceIndex) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceIndexDestroy(index.index))) + if err != nil { + return err + } + return nil +} + +// Builds a new Brute Force KNN Index from the dataset for efficient search. +// +// # Arguments +// +// * `Resources` - Resources to use +// * `Dataset` - A row-major matrix on either the host or device to index +// * `metric` - Distance type to use for building the index +// * `metric_arg` - Value of `p` for Minkowski distances - set to 2.0 if not applicable +func BuildIndex[T any](Resources cuvs.Resource, Dataset *cuvs.Tensor[T], metric cuvs.Distance, metric_arg float32, index *BruteForceIndex) error { + CMetric, exists := cuvs.CDistances[metric] + + if !exists { + return errors.New("cuvs: invalid distance metric") + } + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceBuild(C.cuvsResources_t(Resources.Resource), (*C.DLManagedTensor)(unsafe.Pointer(Dataset.C_tensor)), C.cuvsDistanceType(CMetric), C.float(metric_arg), index.index))) + if err != nil { + return err + } + index.trained = true + + return nil +} + +// Perform a Nearest Neighbors search on the Index +// +// # Arguments +// +// * `Resources` - Resources to use +// * `queries` - Tensor in device memory to query for +// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors +// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors +func SearchIndex[T any](resources cuvs.Resource, index BruteForceIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[int64], distances *cuvs.Tensor[T]) error { + if !index.trained { + return errors.New("index needs to be built before calling search") + } + + prefilter := C.cuvsFilter{ + addr: 0, + _type: C.NO_FILTER, + } + + return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceSearch(C.ulong(resources.Resource), index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)), prefilter))) +} diff --git a/go/brute_force/brute_force_test.go b/go/brute_force/brute_force_test.go new file mode 100644 index 0000000000..151c48e6ac --- /dev/null +++ b/go/brute_force/brute_force_test.go @@ -0,0 +1,110 @@ +package brute_force + +import ( + "math/rand/v2" + "testing" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestCagra(t *testing.T) { + const ( + nDataPoints = 1024 + nFeatures = 16 + nQueries = 4 + k = 4 + epsilon = 0.001 + ) + + resource, _ := cuvs.NewResource(nil) + defer resource.Close() + + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = rand.Float32() + } + } + + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() + + index, _ := CreateIndex() + defer index.Close() + + // use the first 4 points from the dataset as queries : will test that we get them back + // as their own nearest neighbor + queries, _ := cuvs.NewTensor(testDataset[:nQueries]) + defer queries.Close() + + neighbors, err := cuvs.NewTensorOnDevice[int64](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() + + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() + + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } + + if err := BuildIndex(resource, &dataset, cuvs.DistanceL2, 2.0, index); err != nil { + t.Fatalf("error building index: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + if _, err := queries.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries to device: %v", err) + } + + err = SearchIndex(resource, *index, &queries, &neighbors, &distances) + if err != nil { + t.Fatalf("error searching index: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + if neighborsSlice[i][0] != int64(i) { + t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } + + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } + + for i := range distancesSlice { + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("distance should be close to 0, got", distancesSlice[i][0]) + } + } +} diff --git a/go/cagra/cagra.go b/go/cagra/cagra.go new file mode 100644 index 0000000000..61b9e71d6e --- /dev/null +++ b/go/cagra/cagra.go @@ -0,0 +1,136 @@ +package cagra + +// #include +import "C" + +import ( + "errors" + "unsafe" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// Cagra ANN Index +type CagraIndex struct { + index C.cuvsCagraIndex_t + trained bool +} + +// Creates a new empty Cagra Index +func CreateIndex() (*CagraIndex, error) { + var index C.cuvsCagraIndex_t + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexCreate(&index))) + if err != nil { + return nil, err + } + + return &CagraIndex{index: index}, nil +} + +// Builds a new Index from the dataset for efficient search. +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters for building the index +// * `dataset` - A row-major Tensor on either the host or device to index +// * `index` - CagraIndex to build +func BuildIndex[T any](Resources cuvs.Resource, params *IndexParams, dataset *cuvs.Tensor[T], index *CagraIndex) error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraBuild(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(dataset.C_tensor)), index.index))) + if err != nil { + return err + } + index.trained = true + return nil +} + +// Extends the index with additional data +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters for extending the index +// * `additional_dataset` - A row-major Tensor on the device to extend the index with +// * `return_dataset` - A row-major Tensor on the device that will receive the extended dataset +// * `index` - CagraIndex to extend +func ExtendIndex[T any](Resources cuvs.Resource, params *ExtendParams, additional_dataset *cuvs.Tensor[T], return_dataset *cuvs.Tensor[T], index *CagraIndex) error { + if !index.trained { + return errors.New("index needs to be built before calling extend") + } + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraExtend(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(additional_dataset.C_tensor)), index.index, (*C.DLManagedTensor)(unsafe.Pointer(return_dataset.C_tensor))))) + if err != nil { + return err + } + return nil +} + +// Destroys the Cagra Index +func (index *CagraIndex) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexDestroy(index.index))) + if err != nil { + return err + } + return nil +} + +// Perform a Approximate Nearest Neighbors search on the Index +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters to use in searching the index +// * `queries` - A tensor in device memory to query for +// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors +// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors +// * `allowList` - List of indices to allow in the search, if nil, no filtering is applied +func SearchIndex[T any](Resources cuvs.Resource, params *SearchParams, index *CagraIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[uint32], distances *cuvs.Tensor[T], allowList []uint32) error { + if !index.trained { + return errors.New("index needs to be built before calling search") + } + + var filter C.cuvsFilter + bitset := createBitset(allowList) + allowListTensor, err := cuvs.NewVector[uint32](bitset) + if err != nil { + return err + } + defer allowListTensor.Close() + _, err = allowListTensor.ToDevice(&Resources) + if err != nil { + return err + } + if allowList == nil { + filter = C.cuvsFilter{ + _type: C.NO_FILTER, + addr: C.uintptr_t(0), + } + } else { + filter = C.cuvsFilter{ + _type: C.BITSET, + addr: C.uintptr_t(uintptr(unsafe.Pointer(allowListTensor.C_tensor))), + } + } + return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)), filter))) +} + +func createBitset(allowList []uint32) []uint32 { + // Calculate size needed for the bitset array + // Each uint32 handles 32 bits, so we divide the max ID by 32 (shift right by 5) + maxID := uint32(0) + for _, id := range allowList { + if id > maxID { + maxID = id + } + } + size := (maxID >> 5) + 1 // Division by 32, add 1 to handle remainder + bitset := make([]uint32, size) + for _, id := range allowList { + // Calculate which uint32 in our array (divide by 32) + arrayIndex := id >> 5 + // Calculate bit position within that uint32 (mod 32) + bitPosition := id & 31 // equivalent to id % 32 + // Set the bit + bitset[arrayIndex] |= 1 << bitPosition + } + return bitset +} diff --git a/go/cagra/cagra_test.go b/go/cagra/cagra_test.go new file mode 100644 index 0000000000..86baf5e601 --- /dev/null +++ b/go/cagra/cagra_test.go @@ -0,0 +1,327 @@ +package cagra + +import ( + "math/rand/v2" + "testing" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestCagra(t *testing.T) { + const ( + nDataPoints = 1024 + nFeatures = 16 + nQueries = 4 + k = 4 + epsilon = 0.001 + ) + + resource, _ := cuvs.NewResource(nil) + defer resource.Close() + + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = rand.Float32() + } + } + + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() + + indexParams, err := CreateIndexParams() + if err != nil { + t.Fatalf("error creating index params: %v", err) + } + defer indexParams.Close() + + index, _ := CreateIndex() + defer index.Close() + + // use the first 4 points from the dataset as queries : will test that we get them back + // as their own nearest neighbor + queries, _ := cuvs.NewTensor(testDataset[:nQueries]) + defer queries.Close() + + neighbors, err := cuvs.NewTensorOnDevice[uint32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() + + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() + + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } + + if err := BuildIndex(resource, indexParams, &dataset, index); err != nil { + t.Fatalf("error building index: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + if _, err := queries.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries to device: %v", err) + } + + SearchParams, err := CreateSearchParams() + if err != nil { + t.Fatalf("error creating search params: %v", err) + } + defer SearchParams.Close() + + err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances, nil) + if err != nil { + t.Fatalf("error searching index: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + if neighborsSlice[i][0] != uint32(i) { + t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } + + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } + + for i := range distancesSlice { + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("distance should be close to 0, got", distancesSlice[i][0]) + } + } +} + +func TestCagraFiltering(t *testing.T) { + const ( + nDataPoints = 1024 + nFilteredOut = 512 + nFeatures = 16 + nQueries = 4 + k = 4 + epsilon = 0.001 + ) + + resource, _ := cuvs.NewResource(nil) + defer resource.Close() + + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = rand.Float32() + } + } + + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() + + indexParams, err := CreateIndexParams() + if err != nil { + t.Fatalf("error creating index params: %v", err) + } + defer indexParams.Close() + + index, _ := CreateIndex() + defer index.Close() + + // Test queries: first 4 points (should be found without filter) + queries1, _ := cuvs.NewTensor(testDataset[:nQueries]) + defer queries1.Close() + + // Test queries: points 512-515 (should be found with filter, not found without) + queries2, _ := cuvs.NewTensor(testDataset[nFilteredOut:(nFilteredOut + nQueries)]) + defer queries2.Close() + + neighbors, err := cuvs.NewTensorOnDevice[uint32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() + + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() + + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } + + if err := BuildIndex(resource, indexParams, &dataset, index); err != nil { + t.Fatalf("error building index: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + SearchParams, err := CreateSearchParams() + if err != nil { + t.Fatalf("error creating search params: %v", err) + } + defer SearchParams.Close() + + // Step 1: Search without filter - first 4 points should find themselves + if _, err := queries1.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries1 to device: %v", err) + } + + err = SearchIndex(resource, SearchParams, index, &queries1, &neighbors, &distances, nil) + if err != nil { + t.Fatalf("error searching index without filter: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + // Verify first 4 points found themselves without filter + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + if neighborsSlice[i][0] != uint32(i) { + t.Error("without filter: wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } + + // Step 2: Search with filter excluding first half - first 4 points should not be found + allowList := make([]uint32, nDataPoints-nFilteredOut) + for i := range allowList { + allowList[i] = uint32(i + nFilteredOut) + } + + if _, err := queries1.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries1 back to device: %v", err) + } + + if _, err := neighbors.ToDevice(&resource); err != nil { + t.Fatalf("error moving neighbors back to device: %v", err) + } + + if _, err := distances.ToDevice(&resource); err != nil { + t.Fatalf("error moving distances back to device: %v", err) + } + + err = SearchIndex(resource, SearchParams, index, &queries1, &neighbors, &distances, allowList) + if err != nil { + t.Fatalf("error searching index with filter: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + // Verify first 4 points are not in results when filtered + neighborsSlice, err = neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + for j := range neighborsSlice[i] { + if neighborsSlice[i][j] < uint32(nFilteredOut) { + t.Error("with filter: found point that should be filtered out:", neighborsSlice[i][j]) + } + } + } + + // Step 3: Search points 512-515 with filter - they should find themselves + if _, err := queries2.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries2 to device: %v", err) + } + + if _, err := neighbors.ToDevice(&resource); err != nil { + t.Fatalf("error moving neighbors back to device: %v", err) + } + + if _, err := distances.ToDevice(&resource); err != nil { + t.Fatalf("error moving distances back to device: %v", err) + } + + err = SearchIndex(resource, SearchParams, index, &queries2, &neighbors, &distances, allowList) + if err != nil { + t.Fatalf("error searching index with filter for second query set: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + neighborsSlice, err = neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } + + // Verify points 512-515 find themselves when filtered to second half + for i := range neighborsSlice { + expectedID := uint32(i + nFilteredOut) + if neighborsSlice[i][0] != expectedID { + t.Error("with filter: wrong neighbor for filtered query, expected", expectedID, "got", neighborsSlice[i][0]) + } + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("with filter: distance should be close to 0 for filtered query, got", distancesSlice[i][0]) + } + } +} diff --git a/go/cagra/extend_params.go b/go/cagra/extend_params.go new file mode 100644 index 0000000000..7e173c0456 --- /dev/null +++ b/go/cagra/extend_params.go @@ -0,0 +1,46 @@ +package cagra + +// #include +import "C" + +import ( + cuvs "github.com/rapidsai/cuvs/go" +) + +// Parameters to extend CAGRA Index +type ExtendParams struct { + params C.cuvsCagraExtendParams_t +} + +// Creates a new ExtendParams +func CreateExtendParams() (*ExtendParams, error) { + var params C.cuvsCagraExtendParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraExtendParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + ExtendParams := &ExtendParams{params: params} + + return ExtendParams, nil +} + +// The additional dataset is divided into chunks and added to the graph. +// This is the knob to adjust the tradeoff between the recall and operation throughput. +// Large chunk sizes can result in high throughput, but use more +// working memory (O(max_chunk_size*degree^2)). +// This can also degrade recall because no edges are added between the nodes in the same chunk. +// Auto select when 0. +func (p *ExtendParams) SetMaxChunkSize(max_chunk_size uint32) (*ExtendParams, error) { + p.params.max_chunk_size = C.uint32_t(max_chunk_size) + return p, nil +} + +func (p *ExtendParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraExtendParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/cagra/index_params.go b/go/cagra/index_params.go new file mode 100644 index 0000000000..51fdb318fa --- /dev/null +++ b/go/cagra/index_params.go @@ -0,0 +1,159 @@ +package cagra + +// #include +import "C" + +import ( + "errors" + + cuvs "github.com/rapidsai/cuvs/go" +) + +type IndexParams struct { + params C.cuvsCagraIndexParams_t +} + +// Supplemental parameters to build CAGRA Index +type CompressionParams struct { + params C.cuvsCagraCompressionParams_t +} + +type BuildAlgo int + +const ( + IvfPq BuildAlgo = iota + NnDescent + AutoSelect +) + +var cBuildAlgos = map[BuildAlgo]int{ + IvfPq: C.IVF_PQ, + NnDescent: C.NN_DESCENT, + AutoSelect: C.AUTO_SELECT, +} + +// Creates a new CompressionParams +func CreateCompressionParams() (*CompressionParams, error) { + var params C.cuvsCagraCompressionParams_t + + if params == nil { + return nil, errors.New("memory allocation failed") + } + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraCompressionParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &CompressionParams{params: params}, nil +} + +// The bit length of the vector element after compression by PQ. +func (p *CompressionParams) SetPQBits(pq_bits uint32) (*CompressionParams, error) { + p.params.pq_bits = C.uint32_t(pq_bits) + + return p, nil +} + +// The dimensionality of the vector after compression by PQ. When zero, +// an optimal value is selected using a heuristic. +func (p *CompressionParams) SetPQDim(pq_dim uint32) (*CompressionParams, error) { + p.params.pq_dim = C.uint32_t(pq_dim) + + return p, nil +} + +// Vector Quantization (VQ) codebook size - number of "coarse cluster +// centers". When zero, an optimal value is selected using a heuristic. +func (p *CompressionParams) SetVQNCenters(vq_n_centers uint32) (*CompressionParams, error) { + p.params.vq_n_centers = C.uint32_t(vq_n_centers) + + return p, nil +} + +// The number of iterations searching for kmeans centers (both VQ & PQ +// phases). +func (p *CompressionParams) SetKMeansNIters(kmeans_n_iters uint32) (*CompressionParams, error) { + p.params.kmeans_n_iters = C.uint32_t(kmeans_n_iters) + + return p, nil +} + +// The fraction of data to use during iterative kmeans building (VQ +// phase). When zero, an optimal value is selected using a heuristic. +func (p *CompressionParams) SetVQKMeansTrainsetFraction(vq_kmeans_trainset_fraction float64) (*CompressionParams, error) { + p.params.vq_kmeans_trainset_fraction = C.double(vq_kmeans_trainset_fraction) + + return p, nil +} + +// The fraction of data to use during iterative kmeans building (PQ +// phase). When zero, an optimal value is selected using a heuristic. +func (p *CompressionParams) SetPQKMeansTrainsetFraction(pq_kmeans_trainset_fraction float64) (*CompressionParams, error) { + p.params.pq_kmeans_trainset_fraction = C.double(pq_kmeans_trainset_fraction) + + return p, nil +} + +// Creates a new IndexParams +func CreateIndexParams() (*IndexParams, error) { + var params C.cuvsCagraIndexParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + IndexParams := &IndexParams{params: params} + + return IndexParams, nil +} + +// Degree of input graph for pruning +func (p *IndexParams) SetIntermediateGraphDegree(intermediate_graph_degree uintptr) (*IndexParams, error) { + p.params.intermediate_graph_degree = C.size_t(intermediate_graph_degree) + return p, nil +} + +// Degree of output graph +func (p *IndexParams) SetGraphDegree(intermediate_graph_degree uintptr) (*IndexParams, error) { + p.params.graph_degree = C.size_t(intermediate_graph_degree) + + return p, nil +} + +// ANN algorithm to build knn graph +func (p *IndexParams) SetBuildAlgo(build_algo BuildAlgo) (*IndexParams, error) { + CBuildAlgo, exists := cBuildAlgos[build_algo] + + if !exists { + return nil, errors.New("cuvs: invalid build_algo") + } + p.params.build_algo = uint32(CBuildAlgo) + + return p, nil +} + +// Number of iterations to run if building with NN_DESCENT +func (p *IndexParams) SetNNDescentNiter(nn_descent_niter uint32) (*IndexParams, error) { + p.params.nn_descent_niter = C.ulong(nn_descent_niter) + + return p, nil +} + +// Compression parameters +func (p *IndexParams) SetCompression(compression *CompressionParams) (*IndexParams, error) { + p.params.compression = C.cuvsCagraCompressionParams_t(compression.params) + + return p, nil +} + +// Destroys IndexParams +func (p *IndexParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexParamsDestroy(p.params))) + if err != nil { + return err + } + + return nil +} diff --git a/go/cagra/search_params.go b/go/cagra/search_params.go new file mode 100644 index 0000000000..94207d2881 --- /dev/null +++ b/go/cagra/search_params.go @@ -0,0 +1,163 @@ +package cagra + +// #include +import "C" + +import ( + "errors" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// Supplemental parameters to search CAGRA Index +type SearchParams struct { + params C.cuvsCagraSearchParams_t +} + +type SearchAlgo int + +const ( + SearchAlgoSingleCta SearchAlgo = iota + SearchAlgoMultiCta + SearchAlgoMultiKernel + SearchAlgoAuto +) + +type HashmapMode int + +const ( + HashmapModeHash HashmapMode = iota + HashmapModeSmall + HashmapModeAuto +) + +// Creates a new SearchParams +func CreateSearchParams() (*SearchParams, error) { + var params C.cuvsCagraSearchParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraSearchParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &SearchParams{params: params}, nil +} + +// Maximum number of queries to search at the same time (batch size). Auto select when 0 +func (p *SearchParams) SetMaxQueries(max_queries uintptr) (*SearchParams, error) { + p.params.max_queries = C.size_t(max_queries) + return p, nil +} + +// Number of intermediate search results retained during the search. +// This is the main knob to adjust trade off between accuracy and search speed. +// Higher values improve the search accuracy +func (p *SearchParams) SetItopkSize(itopk_size uintptr) (*SearchParams, error) { + p.params.itopk_size = C.size_t(itopk_size) + return p, nil +} + +// Upper limit of search iterations. Auto select when 0. +func (p *SearchParams) SetMaxIterations(max_iterations uintptr) (*SearchParams, error) { + p.params.max_iterations = C.size_t(max_iterations) + return p, nil +} + +// Which search implementation to use. +func (p *SearchParams) SetAlgo(algo SearchAlgo) (*SearchParams, error) { + CAlgo := C.SINGLE_CTA + + switch algo { + case SearchAlgoSingleCta: + CAlgo = C.SINGLE_CTA + case SearchAlgoMultiCta: + CAlgo = C.MULTI_CTA + case SearchAlgoMultiKernel: + CAlgo = C.MULTI_KERNEL + case SearchAlgoAuto: + CAlgo = C.AUTO + default: + return nil, errors.New("unsupported algo") + } + + p.params.algo = uint32(CAlgo) + + return p, nil +} + +// Number of threads used to calculate a single distance. 4, 8, 16, or 32. +func (p *SearchParams) SetTeamSize(team_size uintptr) (*SearchParams, error) { + p.params.team_size = C.size_t(team_size) + return p, nil +} + +// Lower limit of search iterations. +func (p *SearchParams) SetMinIterations(min_iterations uintptr) (*SearchParams, error) { + p.params.min_iterations = C.size_t(min_iterations) + return p, nil +} + +// How many nodes to search at once. Auto select when 0. +func (p *SearchParams) SetSearchWidth(search_width uintptr) (*SearchParams, error) { + p.params.search_width = C.size_t(search_width) + return p, nil +} + +// Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. +func (p *SearchParams) SetThreadBlockSize(thread_block_size uintptr) (*SearchParams, error) { + p.params.thread_block_size = C.size_t(thread_block_size) + return p, nil +} + +// Hashmap type. Auto selection when AUTO. +func (p *SearchParams) SetHashmapMode(hashmap_mode HashmapMode) (*SearchParams, error) { + CHashMode := C.AUTO_HASH + + switch hashmap_mode { + case HashmapModeHash: + CHashMode = C.HASH + case HashmapModeSmall: + CHashMode = C.SMALL + case HashmapModeAuto: + CHashMode = C.AUTO_HASH + default: + return nil, errors.New("unsupported hashmap_mode") + } + + p.params.hashmap_mode = uint32(CHashMode) + + return p, nil +} + +// Lower limit of hashmap bit length. More than 8. +func (p *SearchParams) SetHashmapMinBitlen(hashmap_min_bitlen uintptr) (*SearchParams, error) { + p.params.hashmap_min_bitlen = C.size_t(hashmap_min_bitlen) + return p, nil +} + +// Upper limit of hashmap fill rate. More than 0.1, less than 0.9. +func (p *SearchParams) SetHashmapMaxFillRate(hashmap_max_fill_rate float32) (*SearchParams, error) { + p.params.hashmap_max_fill_rate = C.float(hashmap_max_fill_rate) + return p, nil +} + +// Number of iterations of initial random seed node selection. 1 or more. +func (p *SearchParams) SetNumRandomSamplings(num_random_samplings uint32) (*SearchParams, error) { + p.params.num_random_samplings = C.uint32_t(num_random_samplings) + return p, nil +} + +// Bit mask used for initial random seed node selection. +func (p *SearchParams) SetRandXorMask(rand_xor_mask uint64) (*SearchParams, error) { + p.params.rand_xor_mask = C.uint64_t(rand_xor_mask) + return p, nil +} + +// Destroys SearchParams +func (p *SearchParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraSearchParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/distance.go b/go/distance.go new file mode 100644 index 0000000000..530e1d61b3 --- /dev/null +++ b/go/distance.go @@ -0,0 +1,70 @@ +package cuvs + +// #include +import "C" + +import ( + "errors" + "unsafe" +) + +type Distance int + +// Supported distance metrics +const ( + DistanceL2 Distance = iota + DistanceSQEuclidean + DistanceEuclidean + DistanceL1 + DistanceCityblock + DistanceInnerProduct + DistanceChebyshev + DistanceCanberra + DistanceCosine + DistanceLp + DistanceCorrelation + DistanceJaccard + DistanceHellinger + DistanceBrayCurtis + DistanceJensenShannon + DistanceHamming + DistanceKLDivergence + DistanceMinkowski + DistanceRusselRao + DistanceDice +) + +// Maps cuvs Go distances to C distances +var CDistances = map[Distance]int{ + DistanceL2: C.L2SqrtExpanded, + DistanceSQEuclidean: C.L2Expanded, + DistanceEuclidean: C.L2SqrtExpanded, + DistanceL1: C.L1, + DistanceCityblock: C.L1, + DistanceInnerProduct: C.InnerProduct, + DistanceChebyshev: C.Linf, + DistanceCanberra: C.Canberra, + DistanceCosine: C.CosineExpanded, + DistanceLp: C.LpUnexpanded, + DistanceCorrelation: C.CorrelationExpanded, + DistanceJaccard: C.JaccardExpanded, + DistanceHellinger: C.HellingerExpanded, + DistanceBrayCurtis: C.BrayCurtis, + DistanceJensenShannon: C.JensenShannon, + DistanceHamming: C.HammingUnexpanded, + DistanceKLDivergence: C.KLDivergence, + DistanceMinkowski: C.LpUnexpanded, + DistanceRusselRao: C.RusselRaoExpanded, + DistanceDice: C.DiceExpanded, +} + +// Computes the pairwise distance between two vectors. +func PairwiseDistance[T any](Resources Resource, x *Tensor[T], y *Tensor[T], distances *Tensor[float32], metric Distance, metric_arg float32) error { + CMetric, exists := CDistances[metric] + + if !exists { + return errors.New("cuvs: invalid distance metric") + } + + return CheckCuvs(CuvsError(C.cuvsPairwiseDistance(C.cuvsResources_t(Resources.Resource), (*C.DLManagedTensor)(unsafe.Pointer(x.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(y.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)), C.cuvsDistanceType(CMetric), C.float(metric_arg)))) +} diff --git a/go/distance_test.go b/go/distance_test.go new file mode 100644 index 0000000000..17f40a1ba2 --- /dev/null +++ b/go/distance_test.go @@ -0,0 +1,49 @@ +package cuvs_test + +import ( + "math/rand" + "testing" + "time" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestDistance(t *testing.T) { + resource, _ := cuvs.NewResource(nil) + + rand.Seed(time.Now().UnixNano()) + + NDataPoints := 256 + NFeatures := 16 + + TestDataset := make([][]float32, NDataPoints) + for i := range TestDataset { + TestDataset[i] = make([]float32, NFeatures) + for j := range TestDataset[i] { + TestDataset[i][j] = rand.Float32() + } + } + + dataset, _ := cuvs.NewTensor(TestDataset) + + DistancesDataset := make([][]float32, NDataPoints) + for i := range DistancesDataset { + DistancesDataset[i] = make([]float32, NDataPoints) + } + + distances, _ := cuvs.NewTensor(DistancesDataset) + + distances.ToDevice(&resource) + dataset.ToDevice(&resource) + + cuvs.PairwiseDistance(resource, &dataset, &dataset, &distances, cuvs.DistanceL2, 0.0) + + distances.ToHost(&resource) + + resource.Sync() + + arr, _ := distances.Slice() + if arr[0][0] != 0.0 { + t.Error("wrong distance, expected", 0.0, "got", arr[0][0]) + } +} diff --git a/go/dlpack.go b/go/dlpack.go new file mode 100644 index 0000000000..6fe619fd35 --- /dev/null +++ b/go/dlpack.go @@ -0,0 +1,420 @@ +package cuvs + +// #include +// #include +// #include +import "C" + +import ( + "errors" + "strconv" + "unsafe" +) + +type TensorNumberType interface { + int64 | uint32 | float32 +} + +// ManagedTensor is a wrapper around a dlpack DLManagedTensor object. +// This lets you pass matrices in device or host memory into cuvs. +type Tensor[T any] struct { + C_tensor *C.DLManagedTensor + shape []int64 +} + +// Creates a new Tensor on the host and copies the data into it. +func NewTensor[T TensorNumberType](data [][]T) (Tensor[T], error) { + if len(data) == 0 || len(data[0]) == 0 { + return Tensor[T]{}, errors.New("empty data") + } + + dtype := getDLDataType[T]() + + totalElements := len(data) * len(data[0]) + + dataPtr := C.malloc(C.size_t(totalElements * int(unsafe.Sizeof(T(0))))) + if dataPtr == nil { + return Tensor[T]{}, errors.New("data memory allocation failed") + } + + dataSlice := unsafe.Slice((*T)(dataPtr), totalElements) + flattenData(data, dataSlice) + + shapePtr := C.malloc(C.size_t(2 * int(unsafe.Sizeof(C.int64_t(0))))) + if shapePtr == nil { + C.free(dataPtr) + return Tensor[T]{}, errors.New("shape memory allocation failed") + } + + shapeSlice := unsafe.Slice((*C.int64_t)(shapePtr), 2) + shapeSlice[0] = C.int64_t(len(data)) + shapeSlice[1] = C.int64_t(len(data[0])) + + // Create DLManagedTensor + dlm := (*C.DLManagedTensor)(C.malloc(C.size_t(unsafe.Sizeof(C.DLManagedTensor{})))) + if dlm == nil { + return Tensor[T]{}, errors.New("tensor allocation failed") + } + + dlm.dl_tensor.data = dataPtr + dlm.dl_tensor.device = C.DLDevice{ + device_type: C.DLDeviceType(C.kDLCPU), + device_id: 0, + } + dlm.dl_tensor.dtype = dtype + dlm.dl_tensor.ndim = 2 + dlm.dl_tensor.shape = (*C.int64_t)(shapePtr) + dlm.dl_tensor.strides = nil + dlm.dl_tensor.byte_offset = 0 + dlm.manager_ctx = nil + dlm.deleter = nil + + return Tensor[T]{ + C_tensor: dlm, + shape: []int64{int64(len(data)), int64(len(data[0]))}, + }, nil +} + +func NewVector[T TensorNumberType](data []T) (Tensor[T], error) { + if len(data) == 0 { + return Tensor[T]{}, errors.New("empty data") + } + + dtype := getDLDataType[T]() + + totalElements := len(data) + + dataPtr := C.malloc(C.size_t(totalElements * int(unsafe.Sizeof(T(0))))) + if dataPtr == nil { + return Tensor[T]{}, errors.New("data memory allocation failed") + } + + dataSlice := unsafe.Slice((*T)(dataPtr), totalElements) + copy(dataSlice, data) + + shapePtr := C.malloc(C.size_t(int(unsafe.Sizeof(C.int64_t(0))))) + if shapePtr == nil { + C.free(dataPtr) + return Tensor[T]{}, errors.New("shape memory allocation failed") + } + + shapeSlice := unsafe.Slice((*C.int64_t)(shapePtr), 1) + shapeSlice[0] = C.int64_t(len(data)) + + // Create DLManagedTensor + dlm := (*C.DLManagedTensor)(C.malloc(C.size_t(unsafe.Sizeof(C.DLManagedTensor{})))) + if dlm == nil { + return Tensor[T]{}, errors.New("tensor allocation failed") + } + + dlm.dl_tensor.data = dataPtr + dlm.dl_tensor.device = C.DLDevice{ + device_type: C.DLDeviceType(C.kDLCPU), + device_id: 0, + } + dlm.dl_tensor.dtype = dtype + dlm.dl_tensor.ndim = 1 + dlm.dl_tensor.shape = (*C.int64_t)(shapePtr) + dlm.dl_tensor.strides = nil + dlm.dl_tensor.byte_offset = 0 + dlm.manager_ctx = nil + dlm.deleter = nil + + return Tensor[T]{ + C_tensor: dlm, + shape: []int64{int64(len(data))}, + }, nil +} + +// Creates a new Tensor with uninitialized data on the current device. +func NewTensorOnDevice[T TensorNumberType](res *Resource, shape []int64) (Tensor[T], error) { + if len(shape) < 2 { + return Tensor[T]{}, errors.New("shape must be at least 2") + } + + shapePtr := C.malloc(C.size_t(len(shape) * int(unsafe.Sizeof(C.int64_t(0))))) + if shapePtr == nil { + return Tensor[T]{}, errors.New("shape memory allocation failed") + } + + shapeSlice := unsafe.Slice((*C.int64_t)(shapePtr), len(shape)) + for i, dim := range shape { + shapeSlice[i] = C.int64_t(dim) + } + + dlm := (*C.DLManagedTensor)(C.malloc(C.size_t(unsafe.Sizeof(C.DLManagedTensor{})))) + if dlm == nil { + return Tensor[T]{}, errors.New("tensor allocation failed") + } + dtype := getDLDataType[T]() + + var deviceDataPtr unsafe.Pointer + bytes := calculateBytes(shape, dtype) + err := CheckCuvs(CuvsError(C.cuvsRMMAlloc(res.Resource, &deviceDataPtr, C.size_t(bytes)))) + if err != nil { + C.free(unsafe.Pointer(dlm)) + C.free(unsafe.Pointer(shapePtr)) + return Tensor[T]{}, err + } + + dlm.dl_tensor.data = deviceDataPtr + dlm.dl_tensor.device = C.DLDevice{ + device_type: C.DLDeviceType(C.kDLCUDA), + device_id: 0, + } + dlm.dl_tensor.dtype = dtype + dlm.dl_tensor.ndim = C.int(len(shape)) + dlm.dl_tensor.shape = (*C.int64_t)(shapePtr) + dlm.dl_tensor.strides = nil + dlm.dl_tensor.byte_offset = 0 + dlm.manager_ctx = nil + dlm.deleter = nil + + shapeCopy := make([]int64, len(shape)) + copy(shapeCopy, shape) + + return Tensor[T]{ + C_tensor: dlm, + shape: shapeCopy, + }, nil +} + +// Destroys Tensor, freeing the memory it was allocated on. +func (t *Tensor[T]) Close() error { + if t.C_tensor.dl_tensor.device.device_type == C.kDLCUDA { + bytes := t.sizeInBytes() + res, err := NewResource(nil) + if err != nil { + return err + } + err = CheckCuvs(CuvsError(C.cuvsRMMFree(res.Resource, t.C_tensor.dl_tensor.data, C.size_t(bytes)))) + + return err + } else if t.C_tensor.dl_tensor.device.device_type == C.kDLCPU { + if t.C_tensor.dl_tensor.data != nil { + C.free(t.C_tensor.dl_tensor.data) + t.C_tensor.dl_tensor.data = nil + } + } + + if t.C_tensor.dl_tensor.shape != nil { + C.free(unsafe.Pointer(t.C_tensor.dl_tensor.shape)) + t.C_tensor.dl_tensor.shape = nil + } + + if t.C_tensor != nil { + C.free(unsafe.Pointer(t.C_tensor)) + t.C_tensor = nil + } + + t.C_tensor = nil + return nil +} + +// Transfers the data in the Tensor to the device. +func (t *Tensor[T]) ToDevice(res *Resource) (*Tensor[T], error) { + bytes := t.sizeInBytes() + + var DeviceDataPointer unsafe.Pointer + + err := CheckCuvs(CuvsError(C.cuvsRMMAlloc(res.Resource, &DeviceDataPointer, C.size_t(bytes)))) + if err != nil { + return nil, err + } + + err = CheckCuda( + C.cudaMemcpy( + DeviceDataPointer, + t.C_tensor.dl_tensor.data, + C.size_t(bytes), + C.cudaMemcpyHostToDevice, + )) + if err != nil { + C.cuvsRMMFree(res.Resource, DeviceDataPointer, C.size_t(bytes)) + return nil, err + } + t.C_tensor.dl_tensor.device.device_type = C.kDLCUDA + t.C_tensor.dl_tensor.data = DeviceDataPointer + + return t, nil +} + +// Returns the shape of the Tensor. +func (t *Tensor[T]) Shape() []int64 { + return t.shape +} + +// Expands the Tensor by adding newData to the end of the current data. +// The Tensor must be on the device. +func (t *Tensor[T]) Expand(res *Resource, newData [][]T) (*Tensor[T], error) { + if t.C_tensor.dl_tensor.device.device_type != C.kDLCUDA { + return &Tensor[T]{}, errors.New("Tensor must be on GPU") + } + + newShape := []int64{int64(len(newData)), int64(len(newData[0]))} + + flatData := make([]T, len(newData)*len(newData[0])) + for i := range newData { + for j := range newData[i] { + flatData[i*len(newData[0])+j] = newData[i][j] + } + } + + old_shape := unsafe.Slice((*int64)(unsafe.Pointer(t.C_tensor.dl_tensor.shape)), 2) + + if old_shape[1] != newShape[1] { + return &Tensor[T]{}, errors.New("new shape must be same as old shape, old shape: " + strconv.Itoa(int(old_shape[1])) + ", new shape: " + strconv.Itoa(int(newShape[1]))) + } + + newDataSize := newShape[0] * newShape[1] * int64(t.C_tensor.dl_tensor.dtype.bits) / 8 + + bytes := t.sizeInBytes() + + var NewDeviceDataPointer unsafe.Pointer + + err := CheckCuvs(CuvsError(C.cuvsRMMAlloc(res.Resource, &NewDeviceDataPointer, C.size_t(bytes+newDataSize)))) + if err != nil { + return nil, err + } + + err = CheckCuda( + C.cudaMemcpy( + NewDeviceDataPointer, + t.C_tensor.dl_tensor.data, + C.size_t(bytes), + C.cudaMemcpyDeviceToDevice, + )) + if err != nil { + C.cuvsRMMFree(res.Resource, NewDeviceDataPointer, C.size_t(bytes+newDataSize)) + return nil, err + } + + err = CheckCuda( + C.cudaMemcpy( + unsafe.Pointer(uintptr(NewDeviceDataPointer)+uintptr(bytes)), + unsafe.Pointer(&flatData[0]), + C.size_t(newDataSize), + C.cudaMemcpyHostToDevice, + )) + if err != nil { + C.cuvsRMMFree(res.Resource, NewDeviceDataPointer, C.size_t(bytes+newDataSize)) + return nil, err + } + + err = CheckCuvs(CuvsError( + C.cuvsRMMFree(res.Resource, t.C_tensor.dl_tensor.data, C.size_t(bytes)))) + if err != nil { + return nil, err + } + + shape := make([]int64, 2) + shape[0] = int64(*t.C_tensor.dl_tensor.shape) + int64(len(newData)) + + shape[1] = newShape[1] + + t.shape = shape + + t.C_tensor.dl_tensor.data = NewDeviceDataPointer + t.C_tensor.dl_tensor.shape = (*C.int64_t)(unsafe.Pointer(&shape[0])) + + return t, nil +} + +// Transfers the data in the Tensor to the host. +func (t *Tensor[T]) ToHost(res *Resource) (*Tensor[T], error) { + bytes := t.sizeInBytes() + + addr := (C.malloc(C.size_t(bytes))) + if addr == nil { + return nil, errors.New("memory allocation failed") + } + + err := CheckCuda( + C.cudaMemcpy( + addr, + t.C_tensor.dl_tensor.data, + C.size_t(bytes), + C.cudaMemcpyDeviceToHost, + )) + if err != nil { + return nil, err + } + + err = CheckCuvs(CuvsError( + C.cuvsRMMFree(res.Resource, t.C_tensor.dl_tensor.data, C.size_t(bytes)))) + if err != nil { + return nil, err + } + + t.C_tensor.dl_tensor.device.device_type = C.kDLCPU + t.C_tensor.dl_tensor.data = addr + + return t, nil +} + +// Returns a slice of the data in the Tensor. +// The Tensor must be on the host. +func (t *Tensor[T]) Slice() ([][]T, error) { + if t.C_tensor.dl_tensor.device.device_type != C.kDLCPU { + return nil, errors.New("Tensor must be on CPU") + } + + flatData := unsafe.Slice((*T)(t.C_tensor.dl_tensor.data), t.shape[0]*t.shape[1]) + + data := make([][]T, t.shape[0]) + for i := range data { + data[i] = make([]T, t.shape[1]) + for j := range data[i] { + data[i][j] = flatData[i*int(t.shape[1])+j] + } + } + + return data, nil +} + +func getDLDataType[T TensorNumberType]() C.DLDataType { + var zero T + switch any(zero).(type) { + case int64: + return C.DLDataType{ + bits: C.uchar(64), + lanes: C.ushort(1), + code: C.kDLInt, + } + case uint32: + return C.DLDataType{ + bits: C.uchar(32), + lanes: C.ushort(1), + code: C.kDLUInt, + } + case float32: + return C.DLDataType{ + bits: C.uchar(32), + lanes: C.ushort(1), + code: C.kDLFloat, + } + } + panic("unreachable") // Go compiler ensures this is unreachable +} + +func flattenData[T TensorNumberType](data [][]T, dest []T) { + cols := len(data[0]) + for i, row := range data { + copy(dest[i*cols:], row) + } +} + +func (t *Tensor[T]) sizeInBytes() int64 { + return calculateBytes(t.shape, t.C_tensor.dl_tensor.dtype) +} + +func calculateBytes(shape []int64, dtype C.DLDataType) int64 { + bytes := int64(1) + for dim := range shape { + bytes *= (shape[dim]) + } + + bytes *= int64(dtype.bits) / 8 + + return bytes +} diff --git a/go/dlpack_test.go b/go/dlpack_test.go new file mode 100644 index 0000000000..135363f811 --- /dev/null +++ b/go/dlpack_test.go @@ -0,0 +1,190 @@ +package cuvs_test + +import ( + "math/rand" + "reflect" + "testing" + "time" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestDlPack(t *testing.T) { + resource, _ := cuvs.NewResource(nil) + rand.Seed(time.Now().UnixNano()) + NDataPoints := 256 + NFeatures := 16 + TestDataset := make([][]float32, NDataPoints) + for i := range TestDataset { + TestDataset[i] = make([]float32, NFeatures) + for j := range TestDataset[i] { + TestDataset[i][j] = float32(i) + } + } + + dataset, err := cuvs.NewTensor(TestDataset[:127]) + if err != nil { + t.Fatal(err) + } + + _, err = dataset.ToDevice(&resource) + if err != nil { + t.Fatal(err) + } + + _, err = dataset.Expand(&resource, TestDataset[127:]) + if err != nil { + t.Fatal(err) + } + + _, err = dataset.ToHost(&resource) + if err != nil { + t.Fatal(err) + } + + arr, err := dataset.Slice() + if err != nil { + t.Fatal(err) + } + + for i := range arr { + for j := range arr[i] { + if arr[i][j] != TestDataset[i][j] { + t.Errorf("slices don't match at [%d][%d], expected %f, got %f", + i, j, TestDataset[i][j], arr[i][j]) + } + } + } +} + +func TestShape(t *testing.T) { + // Test cases with different shapes + testCases := []struct { + rows int + cols int + name string + }{ + {10, 5, "small matrix"}, + {100, 20, "medium matrix"}, + {1000, 50, "large matrix"}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create test data + data := make([][]float32, tc.rows) + for i := range data { + data[i] = make([]float32, tc.cols) + for j := range data[i] { + data[i][j] = float32(i * j) + } + } + + // Create tensor + tensor, err := cuvs.NewTensor(data) + if err != nil { + t.Fatalf("failed to create tensor: %v", err) + } + defer tensor.Close() + + // Check shape + shape := tensor.Shape() + expectedShape := []int64{int64(tc.rows), int64(tc.cols)} + if !reflect.DeepEqual(shape, expectedShape) { + t.Errorf("incorrect shape: got %v, want %v", shape, expectedShape) + } + }) + } +} + +func TestEmptyTensor(t *testing.T) { + // Test creating tensor with empty data + _, err := cuvs.NewTensor([][]float32{}) + if err == nil { + t.Error("expected error when creating tensor with empty data, got nil") + } +} + +func TestDeviceOperations(t *testing.T) { + resource, err := cuvs.NewResource(nil) + if err != nil { + t.Fatal(err) + } + + // Create test data + data := make([][]float32, 10) + for i := range data { + data[i] = make([]float32, 5) + for j := range data[i] { + data[i][j] = float32(i * j) + } + } + + // Test device transfer operations + t.Run("device transfer", func(t *testing.T) { + tensor, err := cuvs.NewTensor(data) + if err != nil { + t.Fatal(err) + } + defer tensor.Close() + + // Transfer to device + deviceTensor, err := tensor.ToDevice(&resource) + if err != nil { + t.Fatalf("failed to transfer to device: %v", err) + } + + // Transfer back to host + hostTensor, err := deviceTensor.ToHost(&resource) + if err != nil { + t.Fatalf("failed to transfer back to host: %v", err) + } + + // Verify data + result, err := hostTensor.Slice() + if err != nil { + t.Fatalf("failed to slice tensor: %v", err) + } + + for i := range data { + for j := range data[i] { + if result[i][j] != data[i][j] { + t.Errorf("data mismatch at [%d][%d]: got %f, want %f", + i, j, result[i][j], data[i][j]) + } + } + } + }) +} + +func TestDifferentDataTypes(t *testing.T) { + // Test int64 tensor + t.Run("int64", func(t *testing.T) { + data := [][]int64{{1, 2}, {3, 4}} + tensor, err := cuvs.NewTensor(data) + if err != nil { + t.Fatal(err) + } + defer tensor.Close() + + shape := tensor.Shape() + if !reflect.DeepEqual(shape, []int64{2, 2}) { + t.Errorf("incorrect shape for int64 tensor: got %v, want [2 2]", shape) + } + }) + + // Test uint32 tensor + t.Run("uint32", func(t *testing.T) { + data := [][]uint32{{1, 2}, {3, 4}} + tensor, err := cuvs.NewTensor(data) + if err != nil { + t.Fatal(err) + } + defer tensor.Close() + + shape := tensor.Shape() + if !reflect.DeepEqual(shape, []int64{2, 2}) { + t.Errorf("incorrect shape for uint32 tensor: got %v, want [2 2]", shape) + } + }) +} diff --git a/go/exceptions.go b/go/exceptions.go new file mode 100644 index 0000000000..af73ca9911 --- /dev/null +++ b/go/exceptions.go @@ -0,0 +1,23 @@ +package cuvs + +// #include +import "C" +import "errors" + +type CuvsError C.cuvsError_t + +// Wrapper function to convert cuvs error to Go error +func CheckCuvs(error CuvsError) error { + if error == C.CUVS_ERROR { + return errors.New(C.GoString(C.cuvsGetLastErrorText())) + } + return nil +} + +// Wrapper function to convert cuda error to Go error +func CheckCuda(error C.cudaError_t) error { + if error != C.cudaSuccess { + return errors.New(C.GoString(C.cudaGetErrorString(error))) + } + return nil +} diff --git a/go/go.mod b/go/go.mod new file mode 100644 index 0000000000..4e1ab39a44 --- /dev/null +++ b/go/go.mod @@ -0,0 +1,3 @@ +module github.com/rapidsai/cuvs/go + +go 1.22.4 diff --git a/go/go.sum b/go/go.sum new file mode 100644 index 0000000000..81937f7b14 --- /dev/null +++ b/go/go.sum @@ -0,0 +1,4 @@ +golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= +golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o= +golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q= diff --git a/go/ivf_flat/index_params.go b/go/ivf_flat/index_params.go new file mode 100644 index 0000000000..f6cbe60fbf --- /dev/null +++ b/go/ivf_flat/index_params.go @@ -0,0 +1,87 @@ +package ivf_flat + +// #include +import "C" + +import ( + "errors" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// Supplemental parameters to build IVF Flat Index +type IndexParams struct { + params C.cuvsIvfFlatIndexParams_t +} + +// Creates a new IndexParams +func CreateIndexParams() (*IndexParams, error) { + var params C.cuvsIvfFlatIndexParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &IndexParams{params: params}, nil +} + +// The number of clusters used in the coarse quantizer. +func (p *IndexParams) SetNLists(n_lists uint32) (*IndexParams, error) { + p.params.n_lists = C.uint32_t(n_lists) + return p, nil +} + +// Distance Type to use for building the index +func (p *IndexParams) SetMetric(metric cuvs.Distance) (*IndexParams, error) { + CMetric, exists := cuvs.CDistances[metric] + + if !exists { + return nil, errors.New("cuvs: invalid distance metric") + } + p.params.metric = C.cuvsDistanceType(CMetric) + + return p, nil +} + +// Metric argument for Minkowski distances - set to 2.0 if not applicable +func (p *IndexParams) SetMetricArg(metric_arg float32) (*IndexParams, error) { + p.params.metric_arg = C.float(metric_arg) + return p, nil +} + +// The number of iterations searching for kmeans centers during index building. +func (p *IndexParams) SetKMeansNIters(kmeans_n_iters uint32) (*IndexParams, error) { + p.params.kmeans_n_iters = C.uint32_t(kmeans_n_iters) + return p, nil +} + +// If kmeans_trainset_fraction is less than 1, then the dataset is +// subsampled, and only n_samples * kmeans_trainset_fraction rows +// are used for training. +func (p *IndexParams) SetKMeansTrainsetFraction(kmeans_trainset_fraction float64) (*IndexParams, error) { + p.params.kmeans_trainset_fraction = C.double(kmeans_trainset_fraction) + return p, nil +} + +// After training the coarse and fine quantizers, we will populate +// the index with the dataset if add_data_on_build == true, otherwise +// the index is left empty, and the extend method can be used +// to add new vectors to the index. +func (p *IndexParams) SetAddDataOnBuild(add_data_on_build bool) (*IndexParams, error) { + if add_data_on_build { + p.params.add_data_on_build = C._Bool(true) + } else { + p.params.add_data_on_build = C._Bool(false) + } + return p, nil +} + +// Destroys IndexParams +func (p *IndexParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/ivf_flat/ivf_flat.go b/go/ivf_flat/ivf_flat.go new file mode 100644 index 0000000000..3330eb95e2 --- /dev/null +++ b/go/ivf_flat/ivf_flat.go @@ -0,0 +1,72 @@ +package ivf_flat + +// #include +import "C" + +import ( + "errors" + "unsafe" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// IVF Flat Index +type IvfFlatIndex struct { + index C.cuvsIvfFlatIndex_t + trained bool +} + +// Creates a new empty IvfFlatIndex +func CreateIndex(params *IndexParams, dataset *cuvs.Tensor[float32]) (*IvfFlatIndex, error) { + var index C.cuvsIvfFlatIndex_t + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexCreate(&index))) + if err != nil { + return nil, err + } + + return &IvfFlatIndex{index: index}, nil +} + +// Builds an IvfFlatIndex from the dataset for efficient search. +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters for building the index +// * `dataset` - A row-major Tensor on either the host or device to index +// * `index` - IvfFlatIndex to build +func BuildIndex[T any](Resources cuvs.Resource, params *IndexParams, dataset *cuvs.Tensor[T], index *IvfFlatIndex) error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatBuild(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(dataset.C_tensor)), index.index))) + if err != nil { + return err + } + index.trained = true + return nil +} + +// Destroys the IvfFlatIndex +func (index *IvfFlatIndex) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexDestroy(index.index))) + if err != nil { + return err + } + return nil +} + +// Perform a Approximate Nearest Neighbors search on the Index +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters to use in searching the index +// * `index` - IvfFlatIndex to search +// * `queries` - A tensor in device memory to query for +// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors +// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors +func SearchIndex[T any](Resources cuvs.Resource, params *SearchParams, index *IvfFlatIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[int64], distances *cuvs.Tensor[T]) error { + if !index.trained { + return errors.New("index needs to be built before calling search") + } + + return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor))))) +} diff --git a/go/ivf_flat/ivf_flat_test.go b/go/ivf_flat/ivf_flat_test.go new file mode 100644 index 0000000000..8bca965f89 --- /dev/null +++ b/go/ivf_flat/ivf_flat_test.go @@ -0,0 +1,123 @@ +package ivf_flat + +import ( + "math/rand/v2" + "testing" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestIvfFlat(t *testing.T) { + const ( + nDataPoints = 1024 + nFeatures = 16 + nQueries = 4 + k = 4 + epsilon = 0.001 + ) + + resource, _ := cuvs.NewResource(nil) + defer resource.Close() + + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = rand.Float32() + } + } + + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() + + indexParams, err := CreateIndexParams() + if err != nil { + t.Fatalf("error creating index params: %v", err) + } + defer indexParams.Close() + + index, _ := CreateIndex(indexParams, &dataset) + defer index.Close() + + // use the first 4 points from the dataset as queries : will test that we get them back + // as their own nearest neighbor + queries, _ := cuvs.NewTensor(testDataset[:nQueries]) + defer queries.Close() + + neighbors, err := cuvs.NewTensorOnDevice[int64](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() + + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() + + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } + + if err := BuildIndex(resource, indexParams, &dataset, index); err != nil { + t.Fatalf("error building index: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + if _, err := queries.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries to device: %v", err) + } + + SearchParams, err := CreateSearchParams() + if err != nil { + t.Fatalf("error creating search params: %v", err) + } + defer SearchParams.Close() + + err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances) + if err != nil { + t.Fatalf("error searching index: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + println(neighborsSlice[i][0]) + if neighborsSlice[i][0] != int64(i) { + t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } + + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } + + for i := range distancesSlice { + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("distance should be close to 0, got", distancesSlice[i][0]) + } + } +} diff --git a/go/ivf_flat/search_params.go b/go/ivf_flat/search_params.go new file mode 100644 index 0000000000..3606e4662e --- /dev/null +++ b/go/ivf_flat/search_params.go @@ -0,0 +1,39 @@ +package ivf_flat + +// #include +import "C" + +import ( + cuvs "github.com/rapidsai/cuvs/go" +) + +type SearchParams struct { + params C.cuvsIvfFlatSearchParams_t +} + +// Creates a new SearchParams +func CreateSearchParams() (*SearchParams, error) { + var params C.cuvsIvfFlatSearchParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearchParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &SearchParams{params: params}, nil +} + +// The number of clusters to search. +func (p *SearchParams) SetNProbes(n_probes uint32) (*SearchParams, error) { + p.params.n_probes = C.uint32_t(n_probes) + return p, nil +} + +// Destroy SearchParams +func (p *SearchParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearchParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/ivf_pq/index_params.go b/go/ivf_pq/index_params.go new file mode 100644 index 0000000000..d04b44621e --- /dev/null +++ b/go/ivf_pq/index_params.go @@ -0,0 +1,149 @@ +package ivf_pq + +// #include +import "C" + +import ( + "errors" + + cuvs "github.com/rapidsai/cuvs/go" +) + +type IndexParams struct { + params C.cuvsIvfPqIndexParams_t +} + +type codebookKind int + +const ( + Subspace codebookKind = iota + Cluster +) + +var cCodebookKinds = map[codebookKind]int{ + Subspace: C.PER_SUBSPACE, + Cluster: C.PER_CLUSTER, +} + +// Creates a new IndexParams +func CreateIndexParams() (*IndexParams, error) { + var params C.cuvsIvfPqIndexParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &IndexParams{params: params}, nil +} + +// The number of clusters used in the coarse quantizer. +func (p *IndexParams) SetNLists(n_lists uint32) (*IndexParams, error) { + p.params.n_lists = C.uint32_t(n_lists) + return p, nil +} + +// Distance Type to use for building the index +func (p *IndexParams) SetMetric(metric cuvs.Distance) (*IndexParams, error) { + CMetric, exists := cuvs.CDistances[metric] + + if !exists { + return nil, errors.New("cuvs: invalid distance metric") + } + p.params.metric = C.cuvsDistanceType(CMetric) + + return p, nil +} + +// Metric argument for Minkowski distances - set to 2.0 if not applicable +func (p *IndexParams) SetMetricArg(metric_arg float32) (*IndexParams, error) { + p.params.metric_arg = C.float(metric_arg) + return p, nil +} + +// The number of iterations searching for kmeans centers during index building. +func (p *IndexParams) SetKMeansNIters(kmeans_n_iters uint32) (*IndexParams, error) { + p.params.kmeans_n_iters = C.uint32_t(kmeans_n_iters) + return p, nil +} + +// If kmeans_trainset_fraction is less than 1, then the dataset is +// subsampled, and only n_samples * kmeans_trainset_fraction rows +// are used for training. +func (p *IndexParams) SetKMeansTrainsetFraction(kmeans_trainset_fraction float64) (*IndexParams, error) { + p.params.kmeans_trainset_fraction = C.double(kmeans_trainset_fraction) + return p, nil +} + +// The bit length of the vector element after quantization. +func (p *IndexParams) SetPQBits(pq_bits uint32) (*IndexParams, error) { + p.params.pq_bits = C.uint32_t(pq_bits) + return p, nil +} + +// The dimensionality of a the vector after product quantization. +// When zero, an optimal value is selected using a heuristic. Note +// pq_dim * pq_bits must be a multiple of 8. Hint: a smaller 'pq_dim' +// results in a smaller index size and better search performance, but +// lower recall. If 'pq_bits' is 8, 'pq_dim' can be set to any number, +// but multiple of 8 are desirable for good performance. If 'pq_bits' +// is not 8, 'pq_dim' should be a multiple of 8. For good performance, +// it is desirable that 'pq_dim' is a multiple of 32. Ideally, +// 'pq_dim' should be also a divisor of the dataset dim. +func (p *IndexParams) SetPQDim(pq_dim uint32) (*IndexParams, error) { + p.params.pq_dim = C.uint32_t(pq_dim) + return p, nil +} + +func (p *IndexParams) SetCodebookKind(codebook_kind codebookKind) (*IndexParams, error) { + CCodebookKind, exists := cCodebookKinds[codebook_kind] + + if !exists { + return nil, errors.New("cuvs: invalid codebook_kind") + } + p.params.codebook_kind = uint32(CCodebookKind) + + return p, nil +} + +// Apply a random rotation matrix on the input data and queries even +// if `dim % pq_dim == 0`. Note: if `dim` is not multiple of `pq_dim`, +// a random rotation is always applied to the input data and queries +// to transform the working space from `dim` to `rot_dim`, which may +// be slightly larger than the original space and and is a multiple +// of `pq_dim` (`rot_dim % pq_dim == 0`). However, this transform is +// not necessary when `dim` is multiple of `pq_dim` (`dim == rot_dim`, +// hence no need in adding "extra" data columns / features). By +// default, if `dim == rot_dim`, the rotation transform is +// initialized with the identity matrix. When +// `force_random_rotation == True`, a random orthogonal transform +func (p *IndexParams) SetForceRandomRotation(force_random_rotation bool) (*IndexParams, error) { + if force_random_rotation { + p.params.force_random_rotation = C._Bool(true) + } else { + p.params.force_random_rotation = C._Bool(false) + } + return p, nil +} + +// After training the coarse and fine quantizers, we will populate +// the index with the dataset if add_data_on_build == true, otherwise +// the index is left empty, and the extend method can be used +// to add new vectors to the index. +func (p *IndexParams) SetAddDataOnBuild(add_data_on_build bool) (*IndexParams, error) { + if add_data_on_build { + p.params.add_data_on_build = C._Bool(true) + } else { + p.params.add_data_on_build = C._Bool(false) + } + return p, nil +} + +// Destroys IndexParams +func (p *IndexParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/ivf_pq/ivf_pq.go b/go/ivf_pq/ivf_pq.go new file mode 100644 index 0000000000..cbbec629d1 --- /dev/null +++ b/go/ivf_pq/ivf_pq.go @@ -0,0 +1,73 @@ +package ivf_pq + +// #include +import "C" + +import ( + "errors" + "unsafe" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// IVF PQ Index +type IvfPqIndex struct { + index C.cuvsIvfPqIndex_t + trained bool +} + +// Creates a new empty IvfPqIndex +func CreateIndex(params *IndexParams, dataset *cuvs.Tensor[float32]) (*IvfPqIndex, error) { + var index C.cuvsIvfPqIndex_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexCreate(&index))) + if err != nil { + return nil, err + } + + return &IvfPqIndex{index: index}, nil +} + +// Builds an IvfPqIndex from the dataset for efficient search. +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters for building the index +// * `dataset` - A row-major Tensor on either the host or device to index +// * `index` - IvfPqIndex to build +func BuildIndex[T any](Resources cuvs.Resource, params *IndexParams, dataset *cuvs.Tensor[T], index *IvfPqIndex) error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqBuild(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(dataset.C_tensor)), index.index))) + if err != nil { + return err + } + index.trained = true + return nil +} + +// Destroys the IvfPqIndex +func (index *IvfPqIndex) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexDestroy(index.index))) + if err != nil { + return err + } + return nil +} + +// Perform a Approximate Nearest Neighbors search on the Index +// +// # Arguments +// +// * `Resources` - Resources to use +// * `params` - Parameters to use in searching the index +// * `index` - IvfPqIndex to search +// * `queries` - A tensor in device memory to query for +// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors +// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors +func SearchIndex[T any](Resources cuvs.Resource, params *SearchParams, index *IvfPqIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[int64], distances *cuvs.Tensor[T]) error { + if !index.trained { + return errors.New("index needs to be built before calling search") + } + + return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor))))) +} diff --git a/go/ivf_pq/ivf_pq_test.go b/go/ivf_pq/ivf_pq_test.go new file mode 100644 index 0000000000..39b7727a3d --- /dev/null +++ b/go/ivf_pq/ivf_pq_test.go @@ -0,0 +1,123 @@ +package ivf_pq + +import ( + "math/rand/v2" + "testing" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestIvfPq(t *testing.T) { + const ( + nDataPoints = 1024 + nFeatures = 16 + nQueries = 4 + k = 4 + epsilon = 0.001 + ) + + resource, _ := cuvs.NewResource(nil) + defer resource.Close() + + testDataset := make([][]float32, nDataPoints) + for i := range testDataset { + testDataset[i] = make([]float32, nFeatures) + for j := range testDataset[i] { + testDataset[i][j] = rand.Float32() + } + } + + dataset, err := cuvs.NewTensor(testDataset) + if err != nil { + t.Fatalf("error creating dataset tensor: %v", err) + } + defer dataset.Close() + + indexParams, err := CreateIndexParams() + if err != nil { + t.Fatalf("error creating index params: %v", err) + } + defer indexParams.Close() + + index, _ := CreateIndex(indexParams, &dataset) + defer index.Close() + + // use the first 4 points from the dataset as queries : will test that we get them back + // as their own nearest neighbor + queries, _ := cuvs.NewTensor(testDataset[:nQueries]) + defer queries.Close() + + neighbors, err := cuvs.NewTensorOnDevice[int64](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating neighbors tensor: %v", err) + } + defer neighbors.Close() + + distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)}) + if err != nil { + t.Fatalf("error creating distances tensor: %v", err) + } + defer distances.Close() + + if _, err := dataset.ToDevice(&resource); err != nil { + t.Fatalf("error moving dataset to device: %v", err) + } + + if err := BuildIndex(resource, indexParams, &dataset, index); err != nil { + t.Fatalf("error building index: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + if _, err := queries.ToDevice(&resource); err != nil { + t.Fatalf("error moving queries to device: %v", err) + } + + SearchParams, err := CreateSearchParams() + if err != nil { + t.Fatalf("error creating search params: %v", err) + } + defer SearchParams.Close() + + err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances) + if err != nil { + t.Fatalf("error searching index: %v", err) + } + + if _, err := neighbors.ToHost(&resource); err != nil { + t.Fatalf("error moving neighbors to host: %v", err) + } + + if _, err := distances.ToHost(&resource); err != nil { + t.Fatalf("error moving distances to host: %v", err) + } + + if err := resource.Sync(); err != nil { + t.Fatalf("error syncing resource: %v", err) + } + + neighborsSlice, err := neighbors.Slice() + if err != nil { + t.Fatalf("error getting neighbors slice: %v", err) + } + + for i := range neighborsSlice { + println(neighborsSlice[i][0]) + if neighborsSlice[i][0] != int64(i) { + t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0]) + } + } + + distancesSlice, err := distances.Slice() + if err != nil { + t.Fatalf("error getting distances slice: %v", err) + } + + for i := range distancesSlice { + if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon { + t.Error("distance should be close to 0, got", distancesSlice[i][0]) + } + } +} diff --git a/go/ivf_pq/search_params.go b/go/ivf_pq/search_params.go new file mode 100644 index 0000000000..190ca7036e --- /dev/null +++ b/go/ivf_pq/search_params.go @@ -0,0 +1,107 @@ +package ivf_pq + +// #include +import "C" + +import ( + "errors" + + cuvs "github.com/rapidsai/cuvs/go" +) + +// Supplemental parameters to search IVF PQ Index +type SearchParams struct { + params C.cuvsIvfPqSearchParams_t +} + +type lutDtype int + +const ( + Lut_Uint8 lutDtype = iota + Lut_Uint16 + Lut_Uint32 + Lut_Uint64 + Lut_Int8 + Lut_Int16 + Lut_Int32 + Lut_Int64 +) + +var cLutDtypes = map[lutDtype]int{ + Lut_Uint8: C.CUDA_R_8U, + Lut_Uint16: C.CUDA_R_16U, + Lut_Uint32: C.CUDA_R_32U, + Lut_Uint64: C.CUDA_R_64U, + Lut_Int8: C.CUDA_R_8I, + Lut_Int16: C.CUDA_R_16I, + Lut_Int32: C.CUDA_R_32I, + Lut_Int64: C.CUDA_R_64I, +} + +type internalDistanceDtype int + +const ( + InternalDistance_Float32 internalDistanceDtype = iota + InternalDistance_Float64 +) + +var CInternalDistanceDtypes = map[internalDistanceDtype]int{ + InternalDistance_Float32: C.CUDA_R_32F, + InternalDistance_Float64: C.CUDA_R_64F, +} + +// Creates a new SearchParams +func CreateSearchParams() (*SearchParams, error) { + var params C.cuvsIvfPqSearchParams_t + + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqSearchParamsCreate(¶ms))) + if err != nil { + return nil, err + } + + return &SearchParams{params: params}, nil +} + +// The number of clusters to search. +func (p *SearchParams) SetNProbes(n_probes uint32) (*SearchParams, error) { + p.params.n_probes = C.uint32_t(n_probes) + return p, nil +} + +// Data type of look up table to be created dynamically at search +// time. The use of low-precision types reduces the amount of shared +// memory required at search time, so fast shared memory kernels can +// be used even for datasets with large dimansionality. Note that +// the recall is slightly degraded when low-precision type is +// selected. +func (p *SearchParams) SetLutDtype(lut_dtype lutDtype) (*SearchParams, error) { + CLutDtype, exists := cLutDtypes[lutDtype(lut_dtype)] + + if !exists { + return nil, errors.New("cuvs: invalid lut_dtype") + } + p.params.lut_dtype = C.cudaDataType_t(CLutDtype) + + return p, nil +} + +// Storage data type for distance/similarity computation. +func (p *SearchParams) SetInternalDistanceDtype(internal_distance_dtype internalDistanceDtype) (*SearchParams, error) { + CInternalDistanceDtype, exists := CInternalDistanceDtypes[internalDistanceDtype(internal_distance_dtype)] + + if !exists { + return nil, errors.New("cuvs: invalid internal_distance_dtype") + } + p.params.internal_distance_dtype = C.cudaDataType_t(CInternalDistanceDtype) + + return p, nil +} + +// Destroys SearchParams +func (p *SearchParams) Close() error { + err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqSearchParamsDestroy(p.params))) + if err != nil { + return err + } + return nil +} diff --git a/go/memory_resource.go b/go/memory_resource.go new file mode 100644 index 0000000000..df042c821d --- /dev/null +++ b/go/memory_resource.go @@ -0,0 +1,93 @@ +package cuvs + +// #include +import "C" + +import ( + "runtime" +) + +type CuvsMemoryCommand int + +const ( + CuvsMemoryNew = iota + CuvsMemoryRelease +) + +type CuvsPoolMemory struct { + ch chan CuvsMemoryCommand + errCh chan error + initial_pool_size_percent int + max_pool_size_percent int + managed bool +} + +// Creates new CuvsPoolMemory struct +// initial_pool_size_percent is the initial size of the pool in percent of total available device memory +// max_pool_size_percent is the maximum size of the pool in percent of total available device memory +// managed is whether to use CUDA managed memory +func NewCuvsPoolMemory(initial_pool_size_percent int, max_pool_size_percent int, managed bool) (*CuvsPoolMemory, error) { + c := CuvsPoolMemory{ + ch: make(chan CuvsMemoryCommand), + errCh: make(chan error), + initial_pool_size_percent: initial_pool_size_percent, + max_pool_size_percent: max_pool_size_percent, + managed: managed, + } + + c.start() + c.ch <- CuvsMemoryNew + + if err := <-c.errCh; err != nil { + return nil, err + } + + return &c, nil +} + +// Enables pool memory +func (m *CuvsPoolMemory) start() { + go func() { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + for command := range m.ch { + var err error + switch command { + case CuvsMemoryNew: + err = CheckCuvs(CuvsError(C.cuvsRMMPoolMemoryResourceEnable( + C.int(m.initial_pool_size_percent), + C.int(m.max_pool_size_percent), + C._Bool(m.managed)))) + m.errCh <- err + + case CuvsMemoryRelease: + err = CheckCuvs(CuvsError(C.cuvsRMMMemoryResourceReset())) + m.errCh <- err + } + } + }() +} + +// Disables pool memory +func (m *CuvsPoolMemory) Close() error { + m.ch <- CuvsMemoryRelease + err := <-m.errCh + close(m.ch) + close(m.errCh) + return err +} + +func Example() error { + mem, err := NewCuvsPoolMemory(60, 100, false) + if err != nil { + return err + } + + err = mem.Close() + if err != nil { + return err + } + + return nil +} diff --git a/go/memory_resource_test.go b/go/memory_resource_test.go new file mode 100644 index 0000000000..e44c20485e --- /dev/null +++ b/go/memory_resource_test.go @@ -0,0 +1,19 @@ +package cuvs_test + +import ( + "testing" + + cuvs "github.com/rapidsai/cuvs/go" +) + +func TestMemoryResource(t *testing.T) { + mem, err := cuvs.NewCuvsPoolMemory(60, 100, false) + if err != nil { + t.Fatal("Failed to create memory resource:", err) + } + + err = mem.Close() + if err != nil { + t.Fatal("Failed to close memory resource:", err) + } +} diff --git a/go/resources.go b/go/resources.go new file mode 100644 index 0000000000..562aa7b7fc --- /dev/null +++ b/go/resources.go @@ -0,0 +1,57 @@ +package cuvs + +// #include +import "C" + +type cuvsResource C.cuvsResources_t + +// Resources are objects that are shared between function calls, +// and includes things like CUDA streams, cuBLAS handles and other +// resources that are expensive to create. +type Resource struct { + Resource C.cuvsResources_t +} + +// Returns a new Resource object +func NewResource(stream C.cudaStream_t) (Resource, error) { + res := C.cuvsResources_t(0) + err := CheckCuvs(CuvsError(C.cuvsResourcesCreate(&res))) + if err != nil { + return Resource{}, err + } + + if stream != nil { + err := CheckCuvs(CuvsError(C.cuvsStreamSet(res, stream))) + if err != nil { + return Resource{}, err + } + } + + return Resource{Resource: res}, nil +} + +// Syncs the current cuda stream +func (r Resource) Sync() error { + return CheckCuvs(CuvsError(C.cuvsStreamSync(r.Resource))) +} + +// Gets the current cuda stream +func (r Resource) GetCudaStream() (C.cudaStream_t, error) { + var stream C.cudaStream_t + + err := CheckCuvs(CuvsError(C.cuvsStreamGet(r.Resource, &stream))) + if err != nil { + return C.cudaStream_t(nil), err + } + + return stream, nil +} + +func (r Resource) Close() error { + err := CheckCuvs(CuvsError(C.cuvsResourcesDestroy(r.Resource))) + if err != nil { + return err + } + + return nil +}