diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 59b8e00de1..d1f633248f 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -47,6 +47,19 @@ jobs:
       node_type: "gpu-v100-latest-1"
       run_script: "ci/build_rust.sh"
       sha: ${{ inputs.sha }}
+  go-build:
+    needs: cpp-build
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02
+    with:
+      build_type: ${{ inputs.build_type || 'branch' }}
+      branch: ${{ inputs.branch }}
+      arch: "amd64"
+      date: ${{ inputs.date }}
+      container_image: "rapidsai/ci-conda:latest"
+      node_type: "gpu-v100-latest-1"
+      run_script: "ci/build_go.sh"
+      sha: ${{ inputs.sha }}
   python-build:
     needs: [cpp-build]
     secrets: inherit
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 233b82f94b..832932d99e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -22,6 +22,7 @@ jobs:
       - conda-python-tests
       - docs-build
       - rust-build
+      - go-build
       - wheel-build-libcuvs
       - wheel-build-cuvs
       - wheel-tests-cuvs
@@ -70,6 +71,7 @@ jobs:
           - '!notebooks/**'
           - '!python/**'
           - '!rust/**'
+          - '!go/**'
           - '!thirdparty/LICENSES/**'
         test_notebooks:
           - '**'
@@ -77,6 +79,7 @@ jobs:
           - '!.pre-commit-config.yaml'
           - '!README.md'
           - '!rust/**'
+          - '!go/**'
           - '!thirdparty/LICENSES/**'
         test_python:
           - '**'
@@ -87,6 +90,7 @@ jobs:
           - '!img/**'
           - '!notebooks/**'
           - '!rust/**'
+          - '!go/**'
           - '!thirdparty/LICENSES/**'
   checks:
     needs: telemetry-setup
@@ -150,6 +154,16 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/build_rust.sh"
+  go-build:
+    needs: conda-cpp-build
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.02
+    with:
+      build_type: pull-request
+      node_type: "gpu-v100-latest-1"
+      arch: "amd64"
+      container_image: "rapidsai/ci-conda:latest"
+      run_script: "ci/build_go.sh"
   wheel-build-libcuvs:
     needs: checks
     secrets: inherit
diff --git a/build.sh b/build.sh
index 89e1b5a337..4aaa5617c3 100755
--- a/build.sh
+++ b/build.sh
@@ -18,7 +18,7 @@ ARGS=$*
 # scripts, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libcuvs python rust java docs tests bench-ann examples --uninstall  -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
+VALIDARGS="clean libcuvs python rust go java docs tests bench-ann examples --uninstall  -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
 HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--limit-bench-ann=<targets>] [--build-metrics=<filename>]
  where <target> is:
    clean            - remove all existing build artifacts and configuration (start over)
@@ -26,6 +26,7 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
                       around the C++ code.
    python           - build the cuvs Python package
    rust             - build the cuvs Rust bindings
+   go               - build the cuvs Go bindings
    java             - build the cuvs Java bindings
    docs             - build the documentation
    tests            - build the tests
@@ -447,6 +448,13 @@ if (( ${NUMARGS} == 0 )) || hasArg rust; then
     cargo test
 fi
 
+# Build the cuvs Go bindings
+if (( ${NUMARGS} == 0 )) || hasArg go; then
+    cd ${REPODIR}/go
+    go build ./...
+    go test ./...
+fi
+
 # Build the cuvs Java bindings
 if (( ${NUMARGS} == 0 )) || hasArg java; then
     if ! hasArg libcuvs; then
diff --git a/ci/build_go.sh b/ci/build_go.sh
new file mode 100755
index 0000000000..9496f74139
--- /dev/null
+++ b/ci/build_go.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+rapids-logger "Create test conda environment"
+. /opt/conda/etc/profile.d/conda.sh
+
+RAPIDS_VERSION="$(rapids-version)"
+
+rapids-dependency-file-generator \
+  --output conda \
+  --file-key go \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+
+rapids-mamba-retry env create --yes -f env.yaml -n go
+
+# seeing failures on activating the environment here on unbound locals
+# apply workaround from https://github.com/conda/conda/issues/8186#issuecomment-532874667
+set +eu
+conda activate go
+set -eu
+
+rapids-print-env
+
+export CGO_CFLAGS="-I${CONDA_PREFIX}/include"
+export CGO_LDFLAGS="-L${CONDA_PREFIX}/lib -lcudart -lcuvs -lcuvs_c"
+export LD_LIBRARY_PATH="$CONDA_PREFIX/lib:$LD_LIBRARY_PATH"
+export CC=clang
+
+rapids-logger "Downloading artifacts from previous jobs"
+CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
+
+rapids-mamba-retry install \
+  --channel "${CPP_CHANNEL}" \
+  "libcuvs=${RAPIDS_VERSION}" \
+  "libraft=${RAPIDS_VERSION}"
+
+bash ./build.sh go
diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml
index 123acb4219..2e2036ccfa 100644
--- a/conda/environments/all_cuda-118_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-118_arch-aarch64.yaml
@@ -23,6 +23,7 @@ dependencies:
 - dlpack>=0.8,<1.0
 - doxygen>=1.8.20
 - gcc_linux-aarch64=11.*
+- go
 - graphviz
 - ipython
 - libclang==16.0.6
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index c6a65e6848..cf73f4b745 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -23,6 +23,7 @@ dependencies:
 - dlpack>=0.8,<1.0
 - doxygen>=1.8.20
 - gcc_linux-64=11.*
+- go
 - graphviz
 - ipython
 - libclang==16.0.6
diff --git a/conda/environments/all_cuda-128_arch-aarch64.yaml b/conda/environments/all_cuda-128_arch-aarch64.yaml
index c508a9dc19..f161975614 100644
--- a/conda/environments/all_cuda-128_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-128_arch-aarch64.yaml
@@ -24,6 +24,7 @@ dependencies:
 - dlpack>=0.8,<1.0
 - doxygen>=1.8.20
 - gcc_linux-aarch64=13.*
+- go
 - graphviz
 - ipython
 - libclang==16.0.6
diff --git a/conda/environments/all_cuda-128_arch-x86_64.yaml b/conda/environments/all_cuda-128_arch-x86_64.yaml
index f043e544bb..5056ef1854 100644
--- a/conda/environments/all_cuda-128_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-128_arch-x86_64.yaml
@@ -24,6 +24,7 @@ dependencies:
 - dlpack>=0.8,<1.0
 - doxygen>=1.8.20
 - gcc_linux-64=13.*
+- go
 - graphviz
 - ipython
 - libclang==16.0.6
diff --git a/dependencies.yaml b/dependencies.yaml
index 7a50c0d469..cfa63250da 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -21,6 +21,7 @@ files:
       - rapids_build
       - run_py_cuvs
       - rust
+      - go
       - test_libcuvs
       - test_python_common
       - test_py_cuvs
@@ -81,6 +82,14 @@ files:
       - cuda_version
       - rapids_build
       - rust
+  go:
+    output: none
+    includes:
+      - clang
+      - cuda
+      - cuda_version
+      - rapids_build
+      - go
   py_build_libcuvs:
     output: pyproject
     pyproject_dir: python/libcuvs
@@ -470,6 +479,12 @@ dependencies:
         packages:
           - make
           - rust
+  go:
+    common:
+      - output_types: [conda]
+        packages:
+          - go
+          - dlpack>=0.8,<1.0
   build_wheels:
     common:
       - output_types: [requirements, pyproject]
diff --git a/go/brute_force/brute_force.go b/go/brute_force/brute_force.go
new file mode 100644
index 0000000000..29f1a5d958
--- /dev/null
+++ b/go/brute_force/brute_force.go
@@ -0,0 +1,83 @@
+package brute_force
+
+// #include <cuvs/neighbors/brute_force.h>
+import "C"
+
+import (
+	"errors"
+	"unsafe"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+// Brute Force KNN Index
+type BruteForceIndex struct {
+	index   C.cuvsBruteForceIndex_t
+	trained bool
+}
+
+// Creates a new empty Brute Force KNN Index
+func CreateIndex() (*BruteForceIndex, error) {
+	var index C.cuvsBruteForceIndex_t
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceIndexCreate(&index)))
+	if err != nil {
+		return nil, err
+	}
+
+	return &BruteForceIndex{index: index, trained: false}, nil
+}
+
+// Destroys the Brute Force KNN Index
+func (index *BruteForceIndex) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceIndexDestroy(index.index)))
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// Builds a new Brute Force KNN Index from the dataset for efficient search.
+//
+// # Arguments
+//
+// * `Resources` - Resources to use
+// * `Dataset` - A row-major matrix on either the host or device to index
+// * `metric` - Distance type to use for building the index
+// * `metric_arg` - Value of `p` for Minkowski distances - set to 2.0 if not applicable
+func BuildIndex[T any](Resources cuvs.Resource, Dataset *cuvs.Tensor[T], metric cuvs.Distance, metric_arg float32, index *BruteForceIndex) error {
+	CMetric, exists := cuvs.CDistances[metric]
+
+	if !exists {
+		return errors.New("cuvs: invalid distance metric")
+	}
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceBuild(C.cuvsResources_t(Resources.Resource), (*C.DLManagedTensor)(unsafe.Pointer(Dataset.C_tensor)), C.cuvsDistanceType(CMetric), C.float(metric_arg), index.index)))
+	if err != nil {
+		return err
+	}
+	index.trained = true
+
+	return nil
+}
+
+// Perform a Nearest Neighbors search on the Index
+//
+// # Arguments
+//
+// * `Resources` - Resources to use
+// * `queries` - Tensor in device memory to query for
+// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors
+// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors
+func SearchIndex[T any](resources cuvs.Resource, index BruteForceIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[int64], distances *cuvs.Tensor[T]) error {
+	if !index.trained {
+		return errors.New("index needs to be built before calling search")
+	}
+
+	prefilter := C.cuvsFilter{
+		addr:  0,
+		_type: C.NO_FILTER,
+	}
+
+	return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsBruteForceSearch(C.ulong(resources.Resource), index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)), prefilter)))
+}
diff --git a/go/brute_force/brute_force_test.go b/go/brute_force/brute_force_test.go
new file mode 100644
index 0000000000..151c48e6ac
--- /dev/null
+++ b/go/brute_force/brute_force_test.go
@@ -0,0 +1,110 @@
+package brute_force
+
+import (
+	"math/rand/v2"
+	"testing"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+func TestCagra(t *testing.T) {
+	const (
+		nDataPoints = 1024
+		nFeatures   = 16
+		nQueries    = 4
+		k           = 4
+		epsilon     = 0.001
+	)
+
+	resource, _ := cuvs.NewResource(nil)
+	defer resource.Close()
+
+	testDataset := make([][]float32, nDataPoints)
+	for i := range testDataset {
+		testDataset[i] = make([]float32, nFeatures)
+		for j := range testDataset[i] {
+			testDataset[i][j] = rand.Float32()
+		}
+	}
+
+	dataset, err := cuvs.NewTensor(testDataset)
+	if err != nil {
+		t.Fatalf("error creating dataset tensor: %v", err)
+	}
+	defer dataset.Close()
+
+	index, _ := CreateIndex()
+	defer index.Close()
+
+	// use the first 4 points from the dataset as queries : will test that we get them back
+	// as their own nearest neighbor
+	queries, _ := cuvs.NewTensor(testDataset[:nQueries])
+	defer queries.Close()
+
+	neighbors, err := cuvs.NewTensorOnDevice[int64](&resource, []int64{int64(nQueries), int64(k)})
+	if err != nil {
+		t.Fatalf("error creating neighbors tensor: %v", err)
+	}
+	defer neighbors.Close()
+
+	distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)})
+	if err != nil {
+		t.Fatalf("error creating distances tensor: %v", err)
+	}
+	defer distances.Close()
+
+	if _, err := dataset.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving dataset to device: %v", err)
+	}
+
+	if err := BuildIndex(resource, &dataset, cuvs.DistanceL2, 2.0, index); err != nil {
+		t.Fatalf("error building index: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	if _, err := queries.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving queries to device: %v", err)
+	}
+
+	err = SearchIndex(resource, *index, &queries, &neighbors, &distances)
+	if err != nil {
+		t.Fatalf("error searching index: %v", err)
+	}
+
+	if _, err := neighbors.ToHost(&resource); err != nil {
+		t.Fatalf("error moving neighbors to host: %v", err)
+	}
+
+	if _, err := distances.ToHost(&resource); err != nil {
+		t.Fatalf("error moving distances to host: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	neighborsSlice, err := neighbors.Slice()
+	if err != nil {
+		t.Fatalf("error getting neighbors slice: %v", err)
+	}
+
+	for i := range neighborsSlice {
+		if neighborsSlice[i][0] != int64(i) {
+			t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0])
+		}
+	}
+
+	distancesSlice, err := distances.Slice()
+	if err != nil {
+		t.Fatalf("error getting distances slice: %v", err)
+	}
+
+	for i := range distancesSlice {
+		if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon {
+			t.Error("distance should be close to 0, got", distancesSlice[i][0])
+		}
+	}
+}
diff --git a/go/cagra/cagra.go b/go/cagra/cagra.go
new file mode 100644
index 0000000000..61b9e71d6e
--- /dev/null
+++ b/go/cagra/cagra.go
@@ -0,0 +1,136 @@
+package cagra
+
+// #include <cuvs/neighbors/cagra.h>
+import "C"
+
+import (
+	"errors"
+	"unsafe"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+// Cagra ANN Index
+type CagraIndex struct {
+	index   C.cuvsCagraIndex_t
+	trained bool
+}
+
+// Creates a new empty Cagra Index
+func CreateIndex() (*CagraIndex, error) {
+	var index C.cuvsCagraIndex_t
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexCreate(&index)))
+	if err != nil {
+		return nil, err
+	}
+
+	return &CagraIndex{index: index}, nil
+}
+
+// Builds a new Index from the dataset for efficient search.
+//
+// # Arguments
+//
+// * `Resources` - Resources to use
+// * `params` - Parameters for building the index
+// * `dataset` - A row-major Tensor on either the host or device to index
+// * `index` - CagraIndex to build
+func BuildIndex[T any](Resources cuvs.Resource, params *IndexParams, dataset *cuvs.Tensor[T], index *CagraIndex) error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraBuild(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(dataset.C_tensor)), index.index)))
+	if err != nil {
+		return err
+	}
+	index.trained = true
+	return nil
+}
+
+// Extends the index with additional data
+//
+// # Arguments
+//
+// * `Resources` - Resources to use
+// * `params` - Parameters for extending the index
+// * `additional_dataset` - A row-major Tensor on the device to extend the index with
+// * `return_dataset` - A row-major Tensor on the device that will receive the extended dataset
+// * `index` - CagraIndex to extend
+func ExtendIndex[T any](Resources cuvs.Resource, params *ExtendParams, additional_dataset *cuvs.Tensor[T], return_dataset *cuvs.Tensor[T], index *CagraIndex) error {
+	if !index.trained {
+		return errors.New("index needs to be built before calling extend")
+	}
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraExtend(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(additional_dataset.C_tensor)), index.index, (*C.DLManagedTensor)(unsafe.Pointer(return_dataset.C_tensor)))))
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// Destroys the Cagra Index
+func (index *CagraIndex) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexDestroy(index.index)))
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// Perform a Approximate Nearest Neighbors search on the Index
+//
+// # Arguments
+//
+// * `Resources` - Resources to use
+// * `params` - Parameters to use in searching the index
+// * `queries` - A tensor in device memory to query for
+// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors
+// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors
+// * `allowList` - List of indices to allow in the search, if nil, no filtering is applied
+func SearchIndex[T any](Resources cuvs.Resource, params *SearchParams, index *CagraIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[uint32], distances *cuvs.Tensor[T], allowList []uint32) error {
+	if !index.trained {
+		return errors.New("index needs to be built before calling search")
+	}
+
+	var filter C.cuvsFilter
+	bitset := createBitset(allowList)
+	allowListTensor, err := cuvs.NewVector[uint32](bitset)
+	if err != nil {
+		return err
+	}
+	defer allowListTensor.Close()
+	_, err = allowListTensor.ToDevice(&Resources)
+	if err != nil {
+		return err
+	}
+	if allowList == nil {
+		filter = C.cuvsFilter{
+			_type: C.NO_FILTER,
+			addr:  C.uintptr_t(0),
+		}
+	} else {
+		filter = C.cuvsFilter{
+			_type: C.BITSET,
+			addr:  C.uintptr_t(uintptr(unsafe.Pointer(allowListTensor.C_tensor))),
+		}
+	}
+	return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)), filter)))
+}
+
+func createBitset(allowList []uint32) []uint32 {
+	// Calculate size needed for the bitset array
+	// Each uint32 handles 32 bits, so we divide the max ID by 32 (shift right by 5)
+	maxID := uint32(0)
+	for _, id := range allowList {
+		if id > maxID {
+			maxID = id
+		}
+	}
+	size := (maxID >> 5) + 1 // Division by 32, add 1 to handle remainder
+	bitset := make([]uint32, size)
+	for _, id := range allowList {
+		// Calculate which uint32 in our array (divide by 32)
+		arrayIndex := id >> 5
+		// Calculate bit position within that uint32 (mod 32)
+		bitPosition := id & 31 // equivalent to id % 32
+		// Set the bit
+		bitset[arrayIndex] |= 1 << bitPosition
+	}
+	return bitset
+}
diff --git a/go/cagra/cagra_test.go b/go/cagra/cagra_test.go
new file mode 100644
index 0000000000..86baf5e601
--- /dev/null
+++ b/go/cagra/cagra_test.go
@@ -0,0 +1,327 @@
+package cagra
+
+import (
+	"math/rand/v2"
+	"testing"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+func TestCagra(t *testing.T) {
+	const (
+		nDataPoints = 1024
+		nFeatures   = 16
+		nQueries    = 4
+		k           = 4
+		epsilon     = 0.001
+	)
+
+	resource, _ := cuvs.NewResource(nil)
+	defer resource.Close()
+
+	testDataset := make([][]float32, nDataPoints)
+	for i := range testDataset {
+		testDataset[i] = make([]float32, nFeatures)
+		for j := range testDataset[i] {
+			testDataset[i][j] = rand.Float32()
+		}
+	}
+
+	dataset, err := cuvs.NewTensor(testDataset)
+	if err != nil {
+		t.Fatalf("error creating dataset tensor: %v", err)
+	}
+	defer dataset.Close()
+
+	indexParams, err := CreateIndexParams()
+	if err != nil {
+		t.Fatalf("error creating index params: %v", err)
+	}
+	defer indexParams.Close()
+
+	index, _ := CreateIndex()
+	defer index.Close()
+
+	// use the first 4 points from the dataset as queries : will test that we get them back
+	// as their own nearest neighbor
+	queries, _ := cuvs.NewTensor(testDataset[:nQueries])
+	defer queries.Close()
+
+	neighbors, err := cuvs.NewTensorOnDevice[uint32](&resource, []int64{int64(nQueries), int64(k)})
+	if err != nil {
+		t.Fatalf("error creating neighbors tensor: %v", err)
+	}
+	defer neighbors.Close()
+
+	distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)})
+	if err != nil {
+		t.Fatalf("error creating distances tensor: %v", err)
+	}
+	defer distances.Close()
+
+	if _, err := dataset.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving dataset to device: %v", err)
+	}
+
+	if err := BuildIndex(resource, indexParams, &dataset, index); err != nil {
+		t.Fatalf("error building index: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	if _, err := queries.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving queries to device: %v", err)
+	}
+
+	SearchParams, err := CreateSearchParams()
+	if err != nil {
+		t.Fatalf("error creating search params: %v", err)
+	}
+	defer SearchParams.Close()
+
+	err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances, nil)
+	if err != nil {
+		t.Fatalf("error searching index: %v", err)
+	}
+
+	if _, err := neighbors.ToHost(&resource); err != nil {
+		t.Fatalf("error moving neighbors to host: %v", err)
+	}
+
+	if _, err := distances.ToHost(&resource); err != nil {
+		t.Fatalf("error moving distances to host: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	neighborsSlice, err := neighbors.Slice()
+	if err != nil {
+		t.Fatalf("error getting neighbors slice: %v", err)
+	}
+
+	for i := range neighborsSlice {
+		if neighborsSlice[i][0] != uint32(i) {
+			t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0])
+		}
+	}
+
+	distancesSlice, err := distances.Slice()
+	if err != nil {
+		t.Fatalf("error getting distances slice: %v", err)
+	}
+
+	for i := range distancesSlice {
+		if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon {
+			t.Error("distance should be close to 0, got", distancesSlice[i][0])
+		}
+	}
+}
+
+func TestCagraFiltering(t *testing.T) {
+	const (
+		nDataPoints  = 1024
+		nFilteredOut = 512
+		nFeatures    = 16
+		nQueries     = 4
+		k            = 4
+		epsilon      = 0.001
+	)
+
+	resource, _ := cuvs.NewResource(nil)
+	defer resource.Close()
+
+	testDataset := make([][]float32, nDataPoints)
+	for i := range testDataset {
+		testDataset[i] = make([]float32, nFeatures)
+		for j := range testDataset[i] {
+			testDataset[i][j] = rand.Float32()
+		}
+	}
+
+	dataset, err := cuvs.NewTensor(testDataset)
+	if err != nil {
+		t.Fatalf("error creating dataset tensor: %v", err)
+	}
+	defer dataset.Close()
+
+	indexParams, err := CreateIndexParams()
+	if err != nil {
+		t.Fatalf("error creating index params: %v", err)
+	}
+	defer indexParams.Close()
+
+	index, _ := CreateIndex()
+	defer index.Close()
+
+	// Test queries: first 4 points (should be found without filter)
+	queries1, _ := cuvs.NewTensor(testDataset[:nQueries])
+	defer queries1.Close()
+
+	// Test queries: points 512-515 (should be found with filter, not found without)
+	queries2, _ := cuvs.NewTensor(testDataset[nFilteredOut:(nFilteredOut + nQueries)])
+	defer queries2.Close()
+
+	neighbors, err := cuvs.NewTensorOnDevice[uint32](&resource, []int64{int64(nQueries), int64(k)})
+	if err != nil {
+		t.Fatalf("error creating neighbors tensor: %v", err)
+	}
+	defer neighbors.Close()
+
+	distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)})
+	if err != nil {
+		t.Fatalf("error creating distances tensor: %v", err)
+	}
+	defer distances.Close()
+
+	if _, err := dataset.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving dataset to device: %v", err)
+	}
+
+	if err := BuildIndex(resource, indexParams, &dataset, index); err != nil {
+		t.Fatalf("error building index: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	SearchParams, err := CreateSearchParams()
+	if err != nil {
+		t.Fatalf("error creating search params: %v", err)
+	}
+	defer SearchParams.Close()
+
+	// Step 1: Search without filter - first 4 points should find themselves
+	if _, err := queries1.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving queries1 to device: %v", err)
+	}
+
+	err = SearchIndex(resource, SearchParams, index, &queries1, &neighbors, &distances, nil)
+	if err != nil {
+		t.Fatalf("error searching index without filter: %v", err)
+	}
+
+	if _, err := neighbors.ToHost(&resource); err != nil {
+		t.Fatalf("error moving neighbors to host: %v", err)
+	}
+
+	if _, err := distances.ToHost(&resource); err != nil {
+		t.Fatalf("error moving distances to host: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	// Verify first 4 points found themselves without filter
+	neighborsSlice, err := neighbors.Slice()
+	if err != nil {
+		t.Fatalf("error getting neighbors slice: %v", err)
+	}
+
+	for i := range neighborsSlice {
+		if neighborsSlice[i][0] != uint32(i) {
+			t.Error("without filter: wrong neighbor, expected", i, "got", neighborsSlice[i][0])
+		}
+	}
+
+	// Step 2: Search with filter excluding first half - first 4 points should not be found
+	allowList := make([]uint32, nDataPoints-nFilteredOut)
+	for i := range allowList {
+		allowList[i] = uint32(i + nFilteredOut)
+	}
+
+	if _, err := queries1.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving queries1 back to device: %v", err)
+	}
+
+	if _, err := neighbors.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving neighbors back to device: %v", err)
+	}
+
+	if _, err := distances.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving distances back to device: %v", err)
+	}
+
+	err = SearchIndex(resource, SearchParams, index, &queries1, &neighbors, &distances, allowList)
+	if err != nil {
+		t.Fatalf("error searching index with filter: %v", err)
+	}
+
+	if _, err := neighbors.ToHost(&resource); err != nil {
+		t.Fatalf("error moving neighbors to host: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	// Verify first 4 points are not in results when filtered
+	neighborsSlice, err = neighbors.Slice()
+	if err != nil {
+		t.Fatalf("error getting neighbors slice: %v", err)
+	}
+
+	for i := range neighborsSlice {
+		for j := range neighborsSlice[i] {
+			if neighborsSlice[i][j] < uint32(nFilteredOut) {
+				t.Error("with filter: found point that should be filtered out:", neighborsSlice[i][j])
+			}
+		}
+	}
+
+	// Step 3: Search points 512-515 with filter - they should find themselves
+	if _, err := queries2.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving queries2 to device: %v", err)
+	}
+
+	if _, err := neighbors.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving neighbors back to device: %v", err)
+	}
+
+	if _, err := distances.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving distances back to device: %v", err)
+	}
+
+	err = SearchIndex(resource, SearchParams, index, &queries2, &neighbors, &distances, allowList)
+	if err != nil {
+		t.Fatalf("error searching index with filter for second query set: %v", err)
+	}
+
+	if _, err := neighbors.ToHost(&resource); err != nil {
+		t.Fatalf("error moving neighbors to host: %v", err)
+	}
+
+	if _, err := distances.ToHost(&resource); err != nil {
+		t.Fatalf("error moving distances to host: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	neighborsSlice, err = neighbors.Slice()
+	if err != nil {
+		t.Fatalf("error getting neighbors slice: %v", err)
+	}
+
+	distancesSlice, err := distances.Slice()
+	if err != nil {
+		t.Fatalf("error getting distances slice: %v", err)
+	}
+
+	// Verify points 512-515 find themselves when filtered to second half
+	for i := range neighborsSlice {
+		expectedID := uint32(i + nFilteredOut)
+		if neighborsSlice[i][0] != expectedID {
+			t.Error("with filter: wrong neighbor for filtered query, expected", expectedID, "got", neighborsSlice[i][0])
+		}
+		if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon {
+			t.Error("with filter: distance should be close to 0 for filtered query, got", distancesSlice[i][0])
+		}
+	}
+}
diff --git a/go/cagra/extend_params.go b/go/cagra/extend_params.go
new file mode 100644
index 0000000000..7e173c0456
--- /dev/null
+++ b/go/cagra/extend_params.go
@@ -0,0 +1,46 @@
+package cagra
+
+// #include <cuvs/neighbors/cagra.h>
+import "C"
+
+import (
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+// Parameters to extend CAGRA Index
+type ExtendParams struct {
+	params C.cuvsCagraExtendParams_t
+}
+
+// Creates a new ExtendParams
+func CreateExtendParams() (*ExtendParams, error) {
+	var params C.cuvsCagraExtendParams_t
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraExtendParamsCreate(&params)))
+	if err != nil {
+		return nil, err
+	}
+
+	ExtendParams := &ExtendParams{params: params}
+
+	return ExtendParams, nil
+}
+
+// The additional dataset is divided into chunks and added to the graph.
+// This is the knob to adjust the tradeoff between the recall and operation throughput.
+// Large chunk sizes can result in high throughput, but use more
+// working memory (O(max_chunk_size*degree^2)).
+// This can also degrade recall because no edges are added between the nodes in the same chunk.
+// Auto select when 0.
+func (p *ExtendParams) SetMaxChunkSize(max_chunk_size uint32) (*ExtendParams, error) {
+	p.params.max_chunk_size = C.uint32_t(max_chunk_size)
+	return p, nil
+}
+
+func (p *ExtendParams) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraExtendParamsDestroy(p.params)))
+	if err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/go/cagra/index_params.go b/go/cagra/index_params.go
new file mode 100644
index 0000000000..51fdb318fa
--- /dev/null
+++ b/go/cagra/index_params.go
@@ -0,0 +1,159 @@
+package cagra
+
+// #include <cuvs/neighbors/cagra.h>
+import "C"
+
+import (
+	"errors"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+type IndexParams struct {
+	params C.cuvsCagraIndexParams_t
+}
+
+// Supplemental parameters to build CAGRA Index
+type CompressionParams struct {
+	params C.cuvsCagraCompressionParams_t
+}
+
+type BuildAlgo int
+
+const (
+	IvfPq BuildAlgo = iota
+	NnDescent
+	AutoSelect
+)
+
+var cBuildAlgos = map[BuildAlgo]int{
+	IvfPq:      C.IVF_PQ,
+	NnDescent:  C.NN_DESCENT,
+	AutoSelect: C.AUTO_SELECT,
+}
+
+// Creates a new CompressionParams
+func CreateCompressionParams() (*CompressionParams, error) {
+	var params C.cuvsCagraCompressionParams_t
+
+	if params == nil {
+		return nil, errors.New("memory allocation failed")
+	}
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraCompressionParamsCreate(&params)))
+	if err != nil {
+		return nil, err
+	}
+
+	return &CompressionParams{params: params}, nil
+}
+
+// The bit length of the vector element after compression by PQ.
+func (p *CompressionParams) SetPQBits(pq_bits uint32) (*CompressionParams, error) {
+	p.params.pq_bits = C.uint32_t(pq_bits)
+
+	return p, nil
+}
+
+// The dimensionality of the vector after compression by PQ. When zero,
+// an optimal value is selected using a heuristic.
+func (p *CompressionParams) SetPQDim(pq_dim uint32) (*CompressionParams, error) {
+	p.params.pq_dim = C.uint32_t(pq_dim)
+
+	return p, nil
+}
+
+// Vector Quantization (VQ) codebook size - number of "coarse cluster
+// centers". When zero, an optimal value is selected using a heuristic.
+func (p *CompressionParams) SetVQNCenters(vq_n_centers uint32) (*CompressionParams, error) {
+	p.params.vq_n_centers = C.uint32_t(vq_n_centers)
+
+	return p, nil
+}
+
+// The number of iterations searching for kmeans centers (both VQ & PQ
+// phases).
+func (p *CompressionParams) SetKMeansNIters(kmeans_n_iters uint32) (*CompressionParams, error) {
+	p.params.kmeans_n_iters = C.uint32_t(kmeans_n_iters)
+
+	return p, nil
+}
+
+// The fraction of data to use during iterative kmeans building (VQ
+// phase). When zero, an optimal value is selected using a heuristic.
+func (p *CompressionParams) SetVQKMeansTrainsetFraction(vq_kmeans_trainset_fraction float64) (*CompressionParams, error) {
+	p.params.vq_kmeans_trainset_fraction = C.double(vq_kmeans_trainset_fraction)
+
+	return p, nil
+}
+
+// The fraction of data to use during iterative kmeans building (PQ
+// phase). When zero, an optimal value is selected using a heuristic.
+func (p *CompressionParams) SetPQKMeansTrainsetFraction(pq_kmeans_trainset_fraction float64) (*CompressionParams, error) {
+	p.params.pq_kmeans_trainset_fraction = C.double(pq_kmeans_trainset_fraction)
+
+	return p, nil
+}
+
+// Creates a new IndexParams
+func CreateIndexParams() (*IndexParams, error) {
+	var params C.cuvsCagraIndexParams_t
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexParamsCreate(&params)))
+	if err != nil {
+		return nil, err
+	}
+
+	IndexParams := &IndexParams{params: params}
+
+	return IndexParams, nil
+}
+
+// Degree of input graph for pruning
+func (p *IndexParams) SetIntermediateGraphDegree(intermediate_graph_degree uintptr) (*IndexParams, error) {
+	p.params.intermediate_graph_degree = C.size_t(intermediate_graph_degree)
+	return p, nil
+}
+
+// Degree of output graph
+func (p *IndexParams) SetGraphDegree(intermediate_graph_degree uintptr) (*IndexParams, error) {
+	p.params.graph_degree = C.size_t(intermediate_graph_degree)
+
+	return p, nil
+}
+
+// ANN algorithm to build knn graph
+func (p *IndexParams) SetBuildAlgo(build_algo BuildAlgo) (*IndexParams, error) {
+	CBuildAlgo, exists := cBuildAlgos[build_algo]
+
+	if !exists {
+		return nil, errors.New("cuvs: invalid build_algo")
+	}
+	p.params.build_algo = uint32(CBuildAlgo)
+
+	return p, nil
+}
+
+// Number of iterations to run if building with NN_DESCENT
+func (p *IndexParams) SetNNDescentNiter(nn_descent_niter uint32) (*IndexParams, error) {
+	p.params.nn_descent_niter = C.ulong(nn_descent_niter)
+
+	return p, nil
+}
+
+// Compression parameters
+func (p *IndexParams) SetCompression(compression *CompressionParams) (*IndexParams, error) {
+	p.params.compression = C.cuvsCagraCompressionParams_t(compression.params)
+
+	return p, nil
+}
+
+// Destroys IndexParams
+func (p *IndexParams) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraIndexParamsDestroy(p.params)))
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/go/cagra/search_params.go b/go/cagra/search_params.go
new file mode 100644
index 0000000000..94207d2881
--- /dev/null
+++ b/go/cagra/search_params.go
@@ -0,0 +1,163 @@
+package cagra
+
+// #include <cuvs/neighbors/cagra.h>
+import "C"
+
+import (
+	"errors"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+// Supplemental parameters to search CAGRA Index
+type SearchParams struct {
+	params C.cuvsCagraSearchParams_t
+}
+
+type SearchAlgo int
+
+const (
+	SearchAlgoSingleCta SearchAlgo = iota
+	SearchAlgoMultiCta
+	SearchAlgoMultiKernel
+	SearchAlgoAuto
+)
+
+type HashmapMode int
+
+const (
+	HashmapModeHash HashmapMode = iota
+	HashmapModeSmall
+	HashmapModeAuto
+)
+
+// Creates a new SearchParams
+func CreateSearchParams() (*SearchParams, error) {
+	var params C.cuvsCagraSearchParams_t
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraSearchParamsCreate(&params)))
+	if err != nil {
+		return nil, err
+	}
+
+	return &SearchParams{params: params}, nil
+}
+
+// Maximum number of queries to search at the same time (batch size). Auto select when 0
+func (p *SearchParams) SetMaxQueries(max_queries uintptr) (*SearchParams, error) {
+	p.params.max_queries = C.size_t(max_queries)
+	return p, nil
+}
+
+// Number of intermediate search results retained during the search.
+// This is the main knob to adjust trade off between accuracy and search speed.
+// Higher values improve the search accuracy
+func (p *SearchParams) SetItopkSize(itopk_size uintptr) (*SearchParams, error) {
+	p.params.itopk_size = C.size_t(itopk_size)
+	return p, nil
+}
+
+// Upper limit of search iterations. Auto select when 0.
+func (p *SearchParams) SetMaxIterations(max_iterations uintptr) (*SearchParams, error) {
+	p.params.max_iterations = C.size_t(max_iterations)
+	return p, nil
+}
+
+// Which search implementation to use.
+func (p *SearchParams) SetAlgo(algo SearchAlgo) (*SearchParams, error) {
+	CAlgo := C.SINGLE_CTA
+
+	switch algo {
+	case SearchAlgoSingleCta:
+		CAlgo = C.SINGLE_CTA
+	case SearchAlgoMultiCta:
+		CAlgo = C.MULTI_CTA
+	case SearchAlgoMultiKernel:
+		CAlgo = C.MULTI_KERNEL
+	case SearchAlgoAuto:
+		CAlgo = C.AUTO
+	default:
+		return nil, errors.New("unsupported algo")
+	}
+
+	p.params.algo = uint32(CAlgo)
+
+	return p, nil
+}
+
+// Number of threads used to calculate a single distance. 4, 8, 16, or 32.
+func (p *SearchParams) SetTeamSize(team_size uintptr) (*SearchParams, error) {
+	p.params.team_size = C.size_t(team_size)
+	return p, nil
+}
+
+// Lower limit of search iterations.
+func (p *SearchParams) SetMinIterations(min_iterations uintptr) (*SearchParams, error) {
+	p.params.min_iterations = C.size_t(min_iterations)
+	return p, nil
+}
+
+// How many nodes to search at once. Auto select when 0.
+func (p *SearchParams) SetSearchWidth(search_width uintptr) (*SearchParams, error) {
+	p.params.search_width = C.size_t(search_width)
+	return p, nil
+}
+
+// Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0.
+func (p *SearchParams) SetThreadBlockSize(thread_block_size uintptr) (*SearchParams, error) {
+	p.params.thread_block_size = C.size_t(thread_block_size)
+	return p, nil
+}
+
+// Hashmap type. Auto selection when AUTO.
+func (p *SearchParams) SetHashmapMode(hashmap_mode HashmapMode) (*SearchParams, error) {
+	CHashMode := C.AUTO_HASH
+
+	switch hashmap_mode {
+	case HashmapModeHash:
+		CHashMode = C.HASH
+	case HashmapModeSmall:
+		CHashMode = C.SMALL
+	case HashmapModeAuto:
+		CHashMode = C.AUTO_HASH
+	default:
+		return nil, errors.New("unsupported hashmap_mode")
+	}
+
+	p.params.hashmap_mode = uint32(CHashMode)
+
+	return p, nil
+}
+
+// Lower limit of hashmap bit length. More than 8.
+func (p *SearchParams) SetHashmapMinBitlen(hashmap_min_bitlen uintptr) (*SearchParams, error) {
+	p.params.hashmap_min_bitlen = C.size_t(hashmap_min_bitlen)
+	return p, nil
+}
+
+// Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
+func (p *SearchParams) SetHashmapMaxFillRate(hashmap_max_fill_rate float32) (*SearchParams, error) {
+	p.params.hashmap_max_fill_rate = C.float(hashmap_max_fill_rate)
+	return p, nil
+}
+
+// Number of iterations of initial random seed node selection. 1 or more.
+func (p *SearchParams) SetNumRandomSamplings(num_random_samplings uint32) (*SearchParams, error) {
+	p.params.num_random_samplings = C.uint32_t(num_random_samplings)
+	return p, nil
+}
+
+// Bit mask used for initial random seed node selection.
+func (p *SearchParams) SetRandXorMask(rand_xor_mask uint64) (*SearchParams, error) {
+	p.params.rand_xor_mask = C.uint64_t(rand_xor_mask)
+	return p, nil
+}
+
+// Destroys SearchParams
+func (p *SearchParams) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsCagraSearchParamsDestroy(p.params)))
+	if err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/go/distance.go b/go/distance.go
new file mode 100644
index 0000000000..530e1d61b3
--- /dev/null
+++ b/go/distance.go
@@ -0,0 +1,70 @@
+package cuvs
+
+// #include <cuvs/distance/pairwise_distance.h>
+import "C"
+
+import (
+	"errors"
+	"unsafe"
+)
+
+type Distance int
+
+// Supported distance metrics
+const (
+	DistanceL2 Distance = iota
+	DistanceSQEuclidean
+	DistanceEuclidean
+	DistanceL1
+	DistanceCityblock
+	DistanceInnerProduct
+	DistanceChebyshev
+	DistanceCanberra
+	DistanceCosine
+	DistanceLp
+	DistanceCorrelation
+	DistanceJaccard
+	DistanceHellinger
+	DistanceBrayCurtis
+	DistanceJensenShannon
+	DistanceHamming
+	DistanceKLDivergence
+	DistanceMinkowski
+	DistanceRusselRao
+	DistanceDice
+)
+
+// Maps cuvs Go distances to C distances
+var CDistances = map[Distance]int{
+	DistanceL2:            C.L2SqrtExpanded,
+	DistanceSQEuclidean:   C.L2Expanded,
+	DistanceEuclidean:     C.L2SqrtExpanded,
+	DistanceL1:            C.L1,
+	DistanceCityblock:     C.L1,
+	DistanceInnerProduct:  C.InnerProduct,
+	DistanceChebyshev:     C.Linf,
+	DistanceCanberra:      C.Canberra,
+	DistanceCosine:        C.CosineExpanded,
+	DistanceLp:            C.LpUnexpanded,
+	DistanceCorrelation:   C.CorrelationExpanded,
+	DistanceJaccard:       C.JaccardExpanded,
+	DistanceHellinger:     C.HellingerExpanded,
+	DistanceBrayCurtis:    C.BrayCurtis,
+	DistanceJensenShannon: C.JensenShannon,
+	DistanceHamming:       C.HammingUnexpanded,
+	DistanceKLDivergence:  C.KLDivergence,
+	DistanceMinkowski:     C.LpUnexpanded,
+	DistanceRusselRao:     C.RusselRaoExpanded,
+	DistanceDice:          C.DiceExpanded,
+}
+
+// Computes the pairwise distance between two vectors.
+func PairwiseDistance[T any](Resources Resource, x *Tensor[T], y *Tensor[T], distances *Tensor[float32], metric Distance, metric_arg float32) error {
+	CMetric, exists := CDistances[metric]
+
+	if !exists {
+		return errors.New("cuvs: invalid distance metric")
+	}
+
+	return CheckCuvs(CuvsError(C.cuvsPairwiseDistance(C.cuvsResources_t(Resources.Resource), (*C.DLManagedTensor)(unsafe.Pointer(x.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(y.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)), C.cuvsDistanceType(CMetric), C.float(metric_arg))))
+}
diff --git a/go/distance_test.go b/go/distance_test.go
new file mode 100644
index 0000000000..17f40a1ba2
--- /dev/null
+++ b/go/distance_test.go
@@ -0,0 +1,49 @@
+package cuvs_test
+
+import (
+	"math/rand"
+	"testing"
+	"time"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+func TestDistance(t *testing.T) {
+	resource, _ := cuvs.NewResource(nil)
+
+	rand.Seed(time.Now().UnixNano())
+
+	NDataPoints := 256
+	NFeatures := 16
+
+	TestDataset := make([][]float32, NDataPoints)
+	for i := range TestDataset {
+		TestDataset[i] = make([]float32, NFeatures)
+		for j := range TestDataset[i] {
+			TestDataset[i][j] = rand.Float32()
+		}
+	}
+
+	dataset, _ := cuvs.NewTensor(TestDataset)
+
+	DistancesDataset := make([][]float32, NDataPoints)
+	for i := range DistancesDataset {
+		DistancesDataset[i] = make([]float32, NDataPoints)
+	}
+
+	distances, _ := cuvs.NewTensor(DistancesDataset)
+
+	distances.ToDevice(&resource)
+	dataset.ToDevice(&resource)
+
+	cuvs.PairwiseDistance(resource, &dataset, &dataset, &distances, cuvs.DistanceL2, 0.0)
+
+	distances.ToHost(&resource)
+
+	resource.Sync()
+
+	arr, _ := distances.Slice()
+	if arr[0][0] != 0.0 {
+		t.Error("wrong distance, expected", 0.0, "got", arr[0][0])
+	}
+}
diff --git a/go/dlpack.go b/go/dlpack.go
new file mode 100644
index 0000000000..6fe619fd35
--- /dev/null
+++ b/go/dlpack.go
@@ -0,0 +1,420 @@
+package cuvs
+
+// #include <stdlib.h>
+// #include <dlpack/dlpack.h>
+// #include <cuvs/core/c_api.h>
+import "C"
+
+import (
+	"errors"
+	"strconv"
+	"unsafe"
+)
+
+type TensorNumberType interface {
+	int64 | uint32 | float32
+}
+
+// ManagedTensor is a wrapper around a dlpack DLManagedTensor object.
+// This lets you pass matrices in device or host memory into cuvs.
+type Tensor[T any] struct {
+	C_tensor *C.DLManagedTensor
+	shape    []int64
+}
+
+// Creates a new Tensor on the host and copies the data into it.
+func NewTensor[T TensorNumberType](data [][]T) (Tensor[T], error) {
+	if len(data) == 0 || len(data[0]) == 0 {
+		return Tensor[T]{}, errors.New("empty data")
+	}
+
+	dtype := getDLDataType[T]()
+
+	totalElements := len(data) * len(data[0])
+
+	dataPtr := C.malloc(C.size_t(totalElements * int(unsafe.Sizeof(T(0)))))
+	if dataPtr == nil {
+		return Tensor[T]{}, errors.New("data memory allocation failed")
+	}
+
+	dataSlice := unsafe.Slice((*T)(dataPtr), totalElements)
+	flattenData(data, dataSlice)
+
+	shapePtr := C.malloc(C.size_t(2 * int(unsafe.Sizeof(C.int64_t(0)))))
+	if shapePtr == nil {
+		C.free(dataPtr)
+		return Tensor[T]{}, errors.New("shape memory allocation failed")
+	}
+
+	shapeSlice := unsafe.Slice((*C.int64_t)(shapePtr), 2)
+	shapeSlice[0] = C.int64_t(len(data))
+	shapeSlice[1] = C.int64_t(len(data[0]))
+
+	// Create DLManagedTensor
+	dlm := (*C.DLManagedTensor)(C.malloc(C.size_t(unsafe.Sizeof(C.DLManagedTensor{}))))
+	if dlm == nil {
+		return Tensor[T]{}, errors.New("tensor allocation failed")
+	}
+
+	dlm.dl_tensor.data = dataPtr
+	dlm.dl_tensor.device = C.DLDevice{
+		device_type: C.DLDeviceType(C.kDLCPU),
+		device_id:   0,
+	}
+	dlm.dl_tensor.dtype = dtype
+	dlm.dl_tensor.ndim = 2
+	dlm.dl_tensor.shape = (*C.int64_t)(shapePtr)
+	dlm.dl_tensor.strides = nil
+	dlm.dl_tensor.byte_offset = 0
+	dlm.manager_ctx = nil
+	dlm.deleter = nil
+
+	return Tensor[T]{
+		C_tensor: dlm,
+		shape:    []int64{int64(len(data)), int64(len(data[0]))},
+	}, nil
+}
+
+func NewVector[T TensorNumberType](data []T) (Tensor[T], error) {
+	if len(data) == 0 {
+		return Tensor[T]{}, errors.New("empty data")
+	}
+
+	dtype := getDLDataType[T]()
+
+	totalElements := len(data)
+
+	dataPtr := C.malloc(C.size_t(totalElements * int(unsafe.Sizeof(T(0)))))
+	if dataPtr == nil {
+		return Tensor[T]{}, errors.New("data memory allocation failed")
+	}
+
+	dataSlice := unsafe.Slice((*T)(dataPtr), totalElements)
+	copy(dataSlice, data)
+
+	shapePtr := C.malloc(C.size_t(int(unsafe.Sizeof(C.int64_t(0)))))
+	if shapePtr == nil {
+		C.free(dataPtr)
+		return Tensor[T]{}, errors.New("shape memory allocation failed")
+	}
+
+	shapeSlice := unsafe.Slice((*C.int64_t)(shapePtr), 1)
+	shapeSlice[0] = C.int64_t(len(data))
+
+	// Create DLManagedTensor
+	dlm := (*C.DLManagedTensor)(C.malloc(C.size_t(unsafe.Sizeof(C.DLManagedTensor{}))))
+	if dlm == nil {
+		return Tensor[T]{}, errors.New("tensor allocation failed")
+	}
+
+	dlm.dl_tensor.data = dataPtr
+	dlm.dl_tensor.device = C.DLDevice{
+		device_type: C.DLDeviceType(C.kDLCPU),
+		device_id:   0,
+	}
+	dlm.dl_tensor.dtype = dtype
+	dlm.dl_tensor.ndim = 1
+	dlm.dl_tensor.shape = (*C.int64_t)(shapePtr)
+	dlm.dl_tensor.strides = nil
+	dlm.dl_tensor.byte_offset = 0
+	dlm.manager_ctx = nil
+	dlm.deleter = nil
+
+	return Tensor[T]{
+		C_tensor: dlm,
+		shape:    []int64{int64(len(data))},
+	}, nil
+}
+
+// Creates a new Tensor with uninitialized data on the current device.
+func NewTensorOnDevice[T TensorNumberType](res *Resource, shape []int64) (Tensor[T], error) {
+	if len(shape) < 2 {
+		return Tensor[T]{}, errors.New("shape must be at least 2")
+	}
+
+	shapePtr := C.malloc(C.size_t(len(shape) * int(unsafe.Sizeof(C.int64_t(0)))))
+	if shapePtr == nil {
+		return Tensor[T]{}, errors.New("shape memory allocation failed")
+	}
+
+	shapeSlice := unsafe.Slice((*C.int64_t)(shapePtr), len(shape))
+	for i, dim := range shape {
+		shapeSlice[i] = C.int64_t(dim)
+	}
+
+	dlm := (*C.DLManagedTensor)(C.malloc(C.size_t(unsafe.Sizeof(C.DLManagedTensor{}))))
+	if dlm == nil {
+		return Tensor[T]{}, errors.New("tensor allocation failed")
+	}
+	dtype := getDLDataType[T]()
+
+	var deviceDataPtr unsafe.Pointer
+	bytes := calculateBytes(shape, dtype)
+	err := CheckCuvs(CuvsError(C.cuvsRMMAlloc(res.Resource, &deviceDataPtr, C.size_t(bytes))))
+	if err != nil {
+		C.free(unsafe.Pointer(dlm))
+		C.free(unsafe.Pointer(shapePtr))
+		return Tensor[T]{}, err
+	}
+
+	dlm.dl_tensor.data = deviceDataPtr
+	dlm.dl_tensor.device = C.DLDevice{
+		device_type: C.DLDeviceType(C.kDLCUDA),
+		device_id:   0,
+	}
+	dlm.dl_tensor.dtype = dtype
+	dlm.dl_tensor.ndim = C.int(len(shape))
+	dlm.dl_tensor.shape = (*C.int64_t)(shapePtr)
+	dlm.dl_tensor.strides = nil
+	dlm.dl_tensor.byte_offset = 0
+	dlm.manager_ctx = nil
+	dlm.deleter = nil
+
+	shapeCopy := make([]int64, len(shape))
+	copy(shapeCopy, shape)
+
+	return Tensor[T]{
+		C_tensor: dlm,
+		shape:    shapeCopy,
+	}, nil
+}
+
+// Destroys Tensor, freeing the memory it was allocated on.
+func (t *Tensor[T]) Close() error {
+	if t.C_tensor.dl_tensor.device.device_type == C.kDLCUDA {
+		bytes := t.sizeInBytes()
+		res, err := NewResource(nil)
+		if err != nil {
+			return err
+		}
+		err = CheckCuvs(CuvsError(C.cuvsRMMFree(res.Resource, t.C_tensor.dl_tensor.data, C.size_t(bytes))))
+
+		return err
+	} else if t.C_tensor.dl_tensor.device.device_type == C.kDLCPU {
+		if t.C_tensor.dl_tensor.data != nil {
+			C.free(t.C_tensor.dl_tensor.data)
+			t.C_tensor.dl_tensor.data = nil
+		}
+	}
+
+	if t.C_tensor.dl_tensor.shape != nil {
+		C.free(unsafe.Pointer(t.C_tensor.dl_tensor.shape))
+		t.C_tensor.dl_tensor.shape = nil
+	}
+
+	if t.C_tensor != nil {
+		C.free(unsafe.Pointer(t.C_tensor))
+		t.C_tensor = nil
+	}
+
+	t.C_tensor = nil
+	return nil
+}
+
+// Transfers the data in the Tensor to the device.
+func (t *Tensor[T]) ToDevice(res *Resource) (*Tensor[T], error) {
+	bytes := t.sizeInBytes()
+
+	var DeviceDataPointer unsafe.Pointer
+
+	err := CheckCuvs(CuvsError(C.cuvsRMMAlloc(res.Resource, &DeviceDataPointer, C.size_t(bytes))))
+	if err != nil {
+		return nil, err
+	}
+
+	err = CheckCuda(
+		C.cudaMemcpy(
+			DeviceDataPointer,
+			t.C_tensor.dl_tensor.data,
+			C.size_t(bytes),
+			C.cudaMemcpyHostToDevice,
+		))
+	if err != nil {
+		C.cuvsRMMFree(res.Resource, DeviceDataPointer, C.size_t(bytes))
+		return nil, err
+	}
+	t.C_tensor.dl_tensor.device.device_type = C.kDLCUDA
+	t.C_tensor.dl_tensor.data = DeviceDataPointer
+
+	return t, nil
+}
+
+// Returns the shape of the Tensor.
+func (t *Tensor[T]) Shape() []int64 {
+	return t.shape
+}
+
+// Expands the Tensor by adding newData to the end of the current data.
+// The Tensor must be on the device.
+func (t *Tensor[T]) Expand(res *Resource, newData [][]T) (*Tensor[T], error) {
+	if t.C_tensor.dl_tensor.device.device_type != C.kDLCUDA {
+		return &Tensor[T]{}, errors.New("Tensor must be on GPU")
+	}
+
+	newShape := []int64{int64(len(newData)), int64(len(newData[0]))}
+
+	flatData := make([]T, len(newData)*len(newData[0]))
+	for i := range newData {
+		for j := range newData[i] {
+			flatData[i*len(newData[0])+j] = newData[i][j]
+		}
+	}
+
+	old_shape := unsafe.Slice((*int64)(unsafe.Pointer(t.C_tensor.dl_tensor.shape)), 2)
+
+	if old_shape[1] != newShape[1] {
+		return &Tensor[T]{}, errors.New("new shape must be same as old shape, old shape: " + strconv.Itoa(int(old_shape[1])) + ", new shape: " + strconv.Itoa(int(newShape[1])))
+	}
+
+	newDataSize := newShape[0] * newShape[1] * int64(t.C_tensor.dl_tensor.dtype.bits) / 8
+
+	bytes := t.sizeInBytes()
+
+	var NewDeviceDataPointer unsafe.Pointer
+
+	err := CheckCuvs(CuvsError(C.cuvsRMMAlloc(res.Resource, &NewDeviceDataPointer, C.size_t(bytes+newDataSize))))
+	if err != nil {
+		return nil, err
+	}
+
+	err = CheckCuda(
+		C.cudaMemcpy(
+			NewDeviceDataPointer,
+			t.C_tensor.dl_tensor.data,
+			C.size_t(bytes),
+			C.cudaMemcpyDeviceToDevice,
+		))
+	if err != nil {
+		C.cuvsRMMFree(res.Resource, NewDeviceDataPointer, C.size_t(bytes+newDataSize))
+		return nil, err
+	}
+
+	err = CheckCuda(
+		C.cudaMemcpy(
+			unsafe.Pointer(uintptr(NewDeviceDataPointer)+uintptr(bytes)),
+			unsafe.Pointer(&flatData[0]),
+			C.size_t(newDataSize),
+			C.cudaMemcpyHostToDevice,
+		))
+	if err != nil {
+		C.cuvsRMMFree(res.Resource, NewDeviceDataPointer, C.size_t(bytes+newDataSize))
+		return nil, err
+	}
+
+	err = CheckCuvs(CuvsError(
+		C.cuvsRMMFree(res.Resource, t.C_tensor.dl_tensor.data, C.size_t(bytes))))
+	if err != nil {
+		return nil, err
+	}
+
+	shape := make([]int64, 2)
+	shape[0] = int64(*t.C_tensor.dl_tensor.shape) + int64(len(newData))
+
+	shape[1] = newShape[1]
+
+	t.shape = shape
+
+	t.C_tensor.dl_tensor.data = NewDeviceDataPointer
+	t.C_tensor.dl_tensor.shape = (*C.int64_t)(unsafe.Pointer(&shape[0]))
+
+	return t, nil
+}
+
+// Transfers the data in the Tensor to the host.
+func (t *Tensor[T]) ToHost(res *Resource) (*Tensor[T], error) {
+	bytes := t.sizeInBytes()
+
+	addr := (C.malloc(C.size_t(bytes)))
+	if addr == nil {
+		return nil, errors.New("memory allocation failed")
+	}
+
+	err := CheckCuda(
+		C.cudaMemcpy(
+			addr,
+			t.C_tensor.dl_tensor.data,
+			C.size_t(bytes),
+			C.cudaMemcpyDeviceToHost,
+		))
+	if err != nil {
+		return nil, err
+	}
+
+	err = CheckCuvs(CuvsError(
+		C.cuvsRMMFree(res.Resource, t.C_tensor.dl_tensor.data, C.size_t(bytes))))
+	if err != nil {
+		return nil, err
+	}
+
+	t.C_tensor.dl_tensor.device.device_type = C.kDLCPU
+	t.C_tensor.dl_tensor.data = addr
+
+	return t, nil
+}
+
+// Returns a slice of the data in the Tensor.
+// The Tensor must be on the host.
+func (t *Tensor[T]) Slice() ([][]T, error) {
+	if t.C_tensor.dl_tensor.device.device_type != C.kDLCPU {
+		return nil, errors.New("Tensor must be on CPU")
+	}
+
+	flatData := unsafe.Slice((*T)(t.C_tensor.dl_tensor.data), t.shape[0]*t.shape[1])
+
+	data := make([][]T, t.shape[0])
+	for i := range data {
+		data[i] = make([]T, t.shape[1])
+		for j := range data[i] {
+			data[i][j] = flatData[i*int(t.shape[1])+j]
+		}
+	}
+
+	return data, nil
+}
+
+func getDLDataType[T TensorNumberType]() C.DLDataType {
+	var zero T
+	switch any(zero).(type) {
+	case int64:
+		return C.DLDataType{
+			bits:  C.uchar(64),
+			lanes: C.ushort(1),
+			code:  C.kDLInt,
+		}
+	case uint32:
+		return C.DLDataType{
+			bits:  C.uchar(32),
+			lanes: C.ushort(1),
+			code:  C.kDLUInt,
+		}
+	case float32:
+		return C.DLDataType{
+			bits:  C.uchar(32),
+			lanes: C.ushort(1),
+			code:  C.kDLFloat,
+		}
+	}
+	panic("unreachable") // Go compiler ensures this is unreachable
+}
+
+func flattenData[T TensorNumberType](data [][]T, dest []T) {
+	cols := len(data[0])
+	for i, row := range data {
+		copy(dest[i*cols:], row)
+	}
+}
+
+func (t *Tensor[T]) sizeInBytes() int64 {
+	return calculateBytes(t.shape, t.C_tensor.dl_tensor.dtype)
+}
+
+func calculateBytes(shape []int64, dtype C.DLDataType) int64 {
+	bytes := int64(1)
+	for dim := range shape {
+		bytes *= (shape[dim])
+	}
+
+	bytes *= int64(dtype.bits) / 8
+
+	return bytes
+}
diff --git a/go/dlpack_test.go b/go/dlpack_test.go
new file mode 100644
index 0000000000..135363f811
--- /dev/null
+++ b/go/dlpack_test.go
@@ -0,0 +1,190 @@
+package cuvs_test
+
+import (
+	"math/rand"
+	"reflect"
+	"testing"
+	"time"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+func TestDlPack(t *testing.T) {
+	resource, _ := cuvs.NewResource(nil)
+	rand.Seed(time.Now().UnixNano())
+	NDataPoints := 256
+	NFeatures := 16
+	TestDataset := make([][]float32, NDataPoints)
+	for i := range TestDataset {
+		TestDataset[i] = make([]float32, NFeatures)
+		for j := range TestDataset[i] {
+			TestDataset[i][j] = float32(i)
+		}
+	}
+
+	dataset, err := cuvs.NewTensor(TestDataset[:127])
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = dataset.ToDevice(&resource)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = dataset.Expand(&resource, TestDataset[127:])
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = dataset.ToHost(&resource)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	arr, err := dataset.Slice()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for i := range arr {
+		for j := range arr[i] {
+			if arr[i][j] != TestDataset[i][j] {
+				t.Errorf("slices don't match at [%d][%d], expected %f, got %f",
+					i, j, TestDataset[i][j], arr[i][j])
+			}
+		}
+	}
+}
+
+func TestShape(t *testing.T) {
+	// Test cases with different shapes
+	testCases := []struct {
+		rows int
+		cols int
+		name string
+	}{
+		{10, 5, "small matrix"},
+		{100, 20, "medium matrix"},
+		{1000, 50, "large matrix"},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			// Create test data
+			data := make([][]float32, tc.rows)
+			for i := range data {
+				data[i] = make([]float32, tc.cols)
+				for j := range data[i] {
+					data[i][j] = float32(i * j)
+				}
+			}
+
+			// Create tensor
+			tensor, err := cuvs.NewTensor(data)
+			if err != nil {
+				t.Fatalf("failed to create tensor: %v", err)
+			}
+			defer tensor.Close()
+
+			// Check shape
+			shape := tensor.Shape()
+			expectedShape := []int64{int64(tc.rows), int64(tc.cols)}
+			if !reflect.DeepEqual(shape, expectedShape) {
+				t.Errorf("incorrect shape: got %v, want %v", shape, expectedShape)
+			}
+		})
+	}
+}
+
+func TestEmptyTensor(t *testing.T) {
+	// Test creating tensor with empty data
+	_, err := cuvs.NewTensor([][]float32{})
+	if err == nil {
+		t.Error("expected error when creating tensor with empty data, got nil")
+	}
+}
+
+func TestDeviceOperations(t *testing.T) {
+	resource, err := cuvs.NewResource(nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create test data
+	data := make([][]float32, 10)
+	for i := range data {
+		data[i] = make([]float32, 5)
+		for j := range data[i] {
+			data[i][j] = float32(i * j)
+		}
+	}
+
+	// Test device transfer operations
+	t.Run("device transfer", func(t *testing.T) {
+		tensor, err := cuvs.NewTensor(data)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer tensor.Close()
+
+		// Transfer to device
+		deviceTensor, err := tensor.ToDevice(&resource)
+		if err != nil {
+			t.Fatalf("failed to transfer to device: %v", err)
+		}
+
+		// Transfer back to host
+		hostTensor, err := deviceTensor.ToHost(&resource)
+		if err != nil {
+			t.Fatalf("failed to transfer back to host: %v", err)
+		}
+
+		// Verify data
+		result, err := hostTensor.Slice()
+		if err != nil {
+			t.Fatalf("failed to slice tensor: %v", err)
+		}
+
+		for i := range data {
+			for j := range data[i] {
+				if result[i][j] != data[i][j] {
+					t.Errorf("data mismatch at [%d][%d]: got %f, want %f",
+						i, j, result[i][j], data[i][j])
+				}
+			}
+		}
+	})
+}
+
+func TestDifferentDataTypes(t *testing.T) {
+	// Test int64 tensor
+	t.Run("int64", func(t *testing.T) {
+		data := [][]int64{{1, 2}, {3, 4}}
+		tensor, err := cuvs.NewTensor(data)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer tensor.Close()
+
+		shape := tensor.Shape()
+		if !reflect.DeepEqual(shape, []int64{2, 2}) {
+			t.Errorf("incorrect shape for int64 tensor: got %v, want [2 2]", shape)
+		}
+	})
+
+	// Test uint32 tensor
+	t.Run("uint32", func(t *testing.T) {
+		data := [][]uint32{{1, 2}, {3, 4}}
+		tensor, err := cuvs.NewTensor(data)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer tensor.Close()
+
+		shape := tensor.Shape()
+		if !reflect.DeepEqual(shape, []int64{2, 2}) {
+			t.Errorf("incorrect shape for uint32 tensor: got %v, want [2 2]", shape)
+		}
+	})
+}
diff --git a/go/exceptions.go b/go/exceptions.go
new file mode 100644
index 0000000000..af73ca9911
--- /dev/null
+++ b/go/exceptions.go
@@ -0,0 +1,23 @@
+package cuvs
+
+// #include <cuvs/core/c_api.h>
+import "C"
+import "errors"
+
+type CuvsError C.cuvsError_t
+
+// Wrapper function to convert cuvs error to Go error
+func CheckCuvs(error CuvsError) error {
+	if error == C.CUVS_ERROR {
+		return errors.New(C.GoString(C.cuvsGetLastErrorText()))
+	}
+	return nil
+}
+
+// Wrapper function to convert cuda error to Go error
+func CheckCuda(error C.cudaError_t) error {
+	if error != C.cudaSuccess {
+		return errors.New(C.GoString(C.cudaGetErrorString(error)))
+	}
+	return nil
+}
diff --git a/go/go.mod b/go/go.mod
new file mode 100644
index 0000000000..4e1ab39a44
--- /dev/null
+++ b/go/go.mod
@@ -0,0 +1,3 @@
+module github.com/rapidsai/cuvs/go
+
+go 1.22.4
diff --git a/go/go.sum b/go/go.sum
new file mode 100644
index 0000000000..81937f7b14
--- /dev/null
+++ b/go/go.sum
@@ -0,0 +1,4 @@
+golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4=
+golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
+golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o=
+golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q=
diff --git a/go/ivf_flat/index_params.go b/go/ivf_flat/index_params.go
new file mode 100644
index 0000000000..f6cbe60fbf
--- /dev/null
+++ b/go/ivf_flat/index_params.go
@@ -0,0 +1,87 @@
+package ivf_flat
+
+// #include <cuvs/neighbors/ivf_flat.h>
+import "C"
+
+import (
+	"errors"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+// Supplemental parameters to build IVF Flat Index
+type IndexParams struct {
+	params C.cuvsIvfFlatIndexParams_t
+}
+
+// Creates a new IndexParams
+func CreateIndexParams() (*IndexParams, error) {
+	var params C.cuvsIvfFlatIndexParams_t
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexParamsCreate(&params)))
+	if err != nil {
+		return nil, err
+	}
+
+	return &IndexParams{params: params}, nil
+}
+
+// The number of clusters used in the coarse quantizer.
+func (p *IndexParams) SetNLists(n_lists uint32) (*IndexParams, error) {
+	p.params.n_lists = C.uint32_t(n_lists)
+	return p, nil
+}
+
+// Distance Type to use for building the index
+func (p *IndexParams) SetMetric(metric cuvs.Distance) (*IndexParams, error) {
+	CMetric, exists := cuvs.CDistances[metric]
+
+	if !exists {
+		return nil, errors.New("cuvs: invalid distance metric")
+	}
+	p.params.metric = C.cuvsDistanceType(CMetric)
+
+	return p, nil
+}
+
+// Metric argument for Minkowski distances - set to 2.0 if not applicable
+func (p *IndexParams) SetMetricArg(metric_arg float32) (*IndexParams, error) {
+	p.params.metric_arg = C.float(metric_arg)
+	return p, nil
+}
+
+// The number of iterations searching for kmeans centers during index building.
+func (p *IndexParams) SetKMeansNIters(kmeans_n_iters uint32) (*IndexParams, error) {
+	p.params.kmeans_n_iters = C.uint32_t(kmeans_n_iters)
+	return p, nil
+}
+
+// If kmeans_trainset_fraction is less than 1, then the dataset is
+// subsampled, and only n_samples * kmeans_trainset_fraction rows
+// are used for training.
+func (p *IndexParams) SetKMeansTrainsetFraction(kmeans_trainset_fraction float64) (*IndexParams, error) {
+	p.params.kmeans_trainset_fraction = C.double(kmeans_trainset_fraction)
+	return p, nil
+}
+
+// After training the coarse and fine quantizers, we will populate
+// the index with the dataset if add_data_on_build == true, otherwise
+// the index is left empty, and the extend method can be used
+// to add new vectors to the index.
+func (p *IndexParams) SetAddDataOnBuild(add_data_on_build bool) (*IndexParams, error) {
+	if add_data_on_build {
+		p.params.add_data_on_build = C._Bool(true)
+	} else {
+		p.params.add_data_on_build = C._Bool(false)
+	}
+	return p, nil
+}
+
+// Destroys IndexParams
+func (p *IndexParams) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexParamsDestroy(p.params)))
+	if err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/go/ivf_flat/ivf_flat.go b/go/ivf_flat/ivf_flat.go
new file mode 100644
index 0000000000..3330eb95e2
--- /dev/null
+++ b/go/ivf_flat/ivf_flat.go
@@ -0,0 +1,72 @@
+package ivf_flat
+
+// #include <cuvs/neighbors/ivf_flat.h>
+import "C"
+
+import (
+	"errors"
+	"unsafe"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+// IVF Flat Index
+type IvfFlatIndex struct {
+	index   C.cuvsIvfFlatIndex_t
+	trained bool
+}
+
+// Creates a new empty IvfFlatIndex
+func CreateIndex(params *IndexParams, dataset *cuvs.Tensor[float32]) (*IvfFlatIndex, error) {
+	var index C.cuvsIvfFlatIndex_t
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexCreate(&index)))
+	if err != nil {
+		return nil, err
+	}
+
+	return &IvfFlatIndex{index: index}, nil
+}
+
+// Builds an IvfFlatIndex from the dataset for efficient search.
+//
+// # Arguments
+//
+// * `Resources` - Resources to use
+// * `params` - Parameters for building the index
+// * `dataset` - A row-major Tensor on either the host or device to index
+// * `index` - IvfFlatIndex to build
+func BuildIndex[T any](Resources cuvs.Resource, params *IndexParams, dataset *cuvs.Tensor[T], index *IvfFlatIndex) error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatBuild(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(dataset.C_tensor)), index.index)))
+	if err != nil {
+		return err
+	}
+	index.trained = true
+	return nil
+}
+
+// Destroys the IvfFlatIndex
+func (index *IvfFlatIndex) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatIndexDestroy(index.index)))
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// Perform a Approximate Nearest Neighbors search on the Index
+//
+// # Arguments
+//
+// * `Resources` - Resources to use
+// * `params` - Parameters to use in searching the index
+// * `index` - IvfFlatIndex to search
+// * `queries` - A tensor in device memory to query for
+// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors
+// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors
+func SearchIndex[T any](Resources cuvs.Resource, params *SearchParams, index *IvfFlatIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[int64], distances *cuvs.Tensor[T]) error {
+	if !index.trained {
+		return errors.New("index needs to be built before calling search")
+	}
+
+	return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)))))
+}
diff --git a/go/ivf_flat/ivf_flat_test.go b/go/ivf_flat/ivf_flat_test.go
new file mode 100644
index 0000000000..8bca965f89
--- /dev/null
+++ b/go/ivf_flat/ivf_flat_test.go
@@ -0,0 +1,123 @@
+package ivf_flat
+
+import (
+	"math/rand/v2"
+	"testing"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+func TestIvfFlat(t *testing.T) {
+	const (
+		nDataPoints = 1024
+		nFeatures   = 16
+		nQueries    = 4
+		k           = 4
+		epsilon     = 0.001
+	)
+
+	resource, _ := cuvs.NewResource(nil)
+	defer resource.Close()
+
+	testDataset := make([][]float32, nDataPoints)
+	for i := range testDataset {
+		testDataset[i] = make([]float32, nFeatures)
+		for j := range testDataset[i] {
+			testDataset[i][j] = rand.Float32()
+		}
+	}
+
+	dataset, err := cuvs.NewTensor(testDataset)
+	if err != nil {
+		t.Fatalf("error creating dataset tensor: %v", err)
+	}
+	defer dataset.Close()
+
+	indexParams, err := CreateIndexParams()
+	if err != nil {
+		t.Fatalf("error creating index params: %v", err)
+	}
+	defer indexParams.Close()
+
+	index, _ := CreateIndex(indexParams, &dataset)
+	defer index.Close()
+
+	// use the first 4 points from the dataset as queries : will test that we get them back
+	// as their own nearest neighbor
+	queries, _ := cuvs.NewTensor(testDataset[:nQueries])
+	defer queries.Close()
+
+	neighbors, err := cuvs.NewTensorOnDevice[int64](&resource, []int64{int64(nQueries), int64(k)})
+	if err != nil {
+		t.Fatalf("error creating neighbors tensor: %v", err)
+	}
+	defer neighbors.Close()
+
+	distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)})
+	if err != nil {
+		t.Fatalf("error creating distances tensor: %v", err)
+	}
+	defer distances.Close()
+
+	if _, err := dataset.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving dataset to device: %v", err)
+	}
+
+	if err := BuildIndex(resource, indexParams, &dataset, index); err != nil {
+		t.Fatalf("error building index: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	if _, err := queries.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving queries to device: %v", err)
+	}
+
+	SearchParams, err := CreateSearchParams()
+	if err != nil {
+		t.Fatalf("error creating search params: %v", err)
+	}
+	defer SearchParams.Close()
+
+	err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances)
+	if err != nil {
+		t.Fatalf("error searching index: %v", err)
+	}
+
+	if _, err := neighbors.ToHost(&resource); err != nil {
+		t.Fatalf("error moving neighbors to host: %v", err)
+	}
+
+	if _, err := distances.ToHost(&resource); err != nil {
+		t.Fatalf("error moving distances to host: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	neighborsSlice, err := neighbors.Slice()
+	if err != nil {
+		t.Fatalf("error getting neighbors slice: %v", err)
+	}
+
+	for i := range neighborsSlice {
+		println(neighborsSlice[i][0])
+		if neighborsSlice[i][0] != int64(i) {
+			t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0])
+		}
+	}
+
+	distancesSlice, err := distances.Slice()
+	if err != nil {
+		t.Fatalf("error getting distances slice: %v", err)
+	}
+
+	for i := range distancesSlice {
+		if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon {
+			t.Error("distance should be close to 0, got", distancesSlice[i][0])
+		}
+	}
+}
diff --git a/go/ivf_flat/search_params.go b/go/ivf_flat/search_params.go
new file mode 100644
index 0000000000..3606e4662e
--- /dev/null
+++ b/go/ivf_flat/search_params.go
@@ -0,0 +1,39 @@
+package ivf_flat
+
+// #include <cuvs/neighbors/ivf_flat.h>
+import "C"
+
+import (
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+type SearchParams struct {
+	params C.cuvsIvfFlatSearchParams_t
+}
+
+// Creates a new SearchParams
+func CreateSearchParams() (*SearchParams, error) {
+	var params C.cuvsIvfFlatSearchParams_t
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearchParamsCreate(&params)))
+	if err != nil {
+		return nil, err
+	}
+
+	return &SearchParams{params: params}, nil
+}
+
+// The number of clusters to search.
+func (p *SearchParams) SetNProbes(n_probes uint32) (*SearchParams, error) {
+	p.params.n_probes = C.uint32_t(n_probes)
+	return p, nil
+}
+
+// Destroy SearchParams
+func (p *SearchParams) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfFlatSearchParamsDestroy(p.params)))
+	if err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/go/ivf_pq/index_params.go b/go/ivf_pq/index_params.go
new file mode 100644
index 0000000000..d04b44621e
--- /dev/null
+++ b/go/ivf_pq/index_params.go
@@ -0,0 +1,149 @@
+package ivf_pq
+
+// #include <cuvs/neighbors/ivf_pq.h>
+import "C"
+
+import (
+	"errors"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+type IndexParams struct {
+	params C.cuvsIvfPqIndexParams_t
+}
+
+type codebookKind int
+
+const (
+	Subspace codebookKind = iota
+	Cluster
+)
+
+var cCodebookKinds = map[codebookKind]int{
+	Subspace: C.PER_SUBSPACE,
+	Cluster:  C.PER_CLUSTER,
+}
+
+// Creates a new IndexParams
+func CreateIndexParams() (*IndexParams, error) {
+	var params C.cuvsIvfPqIndexParams_t
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexParamsCreate(&params)))
+	if err != nil {
+		return nil, err
+	}
+
+	return &IndexParams{params: params}, nil
+}
+
+// The number of clusters used in the coarse quantizer.
+func (p *IndexParams) SetNLists(n_lists uint32) (*IndexParams, error) {
+	p.params.n_lists = C.uint32_t(n_lists)
+	return p, nil
+}
+
+// Distance Type to use for building the index
+func (p *IndexParams) SetMetric(metric cuvs.Distance) (*IndexParams, error) {
+	CMetric, exists := cuvs.CDistances[metric]
+
+	if !exists {
+		return nil, errors.New("cuvs: invalid distance metric")
+	}
+	p.params.metric = C.cuvsDistanceType(CMetric)
+
+	return p, nil
+}
+
+// Metric argument for Minkowski distances - set to 2.0 if not applicable
+func (p *IndexParams) SetMetricArg(metric_arg float32) (*IndexParams, error) {
+	p.params.metric_arg = C.float(metric_arg)
+	return p, nil
+}
+
+// The number of iterations searching for kmeans centers during index building.
+func (p *IndexParams) SetKMeansNIters(kmeans_n_iters uint32) (*IndexParams, error) {
+	p.params.kmeans_n_iters = C.uint32_t(kmeans_n_iters)
+	return p, nil
+}
+
+// If kmeans_trainset_fraction is less than 1, then the dataset is
+// subsampled, and only n_samples * kmeans_trainset_fraction rows
+// are used for training.
+func (p *IndexParams) SetKMeansTrainsetFraction(kmeans_trainset_fraction float64) (*IndexParams, error) {
+	p.params.kmeans_trainset_fraction = C.double(kmeans_trainset_fraction)
+	return p, nil
+}
+
+// The bit length of the vector element after quantization.
+func (p *IndexParams) SetPQBits(pq_bits uint32) (*IndexParams, error) {
+	p.params.pq_bits = C.uint32_t(pq_bits)
+	return p, nil
+}
+
+// The dimensionality of a the vector after product quantization.
+// When zero, an optimal value is selected using a heuristic. Note
+// pq_dim * pq_bits must be a multiple of 8. Hint: a smaller 'pq_dim'
+// results in a smaller index size and better search performance, but
+// lower recall. If 'pq_bits' is 8, 'pq_dim' can be set to any number,
+// but multiple of 8 are desirable for good performance. If 'pq_bits'
+// is not 8, 'pq_dim' should be a multiple of 8. For good performance,
+// it is desirable that 'pq_dim' is a multiple of 32. Ideally,
+// 'pq_dim' should be also a divisor of the dataset dim.
+func (p *IndexParams) SetPQDim(pq_dim uint32) (*IndexParams, error) {
+	p.params.pq_dim = C.uint32_t(pq_dim)
+	return p, nil
+}
+
+func (p *IndexParams) SetCodebookKind(codebook_kind codebookKind) (*IndexParams, error) {
+	CCodebookKind, exists := cCodebookKinds[codebook_kind]
+
+	if !exists {
+		return nil, errors.New("cuvs: invalid codebook_kind")
+	}
+	p.params.codebook_kind = uint32(CCodebookKind)
+
+	return p, nil
+}
+
+// Apply a random rotation matrix on the input data and queries even
+// if `dim % pq_dim == 0`. Note: if `dim` is not multiple of `pq_dim`,
+// a random rotation is always applied to the input data and queries
+// to transform the working space from `dim` to `rot_dim`, which may
+// be slightly larger than the original space and and is a multiple
+// of `pq_dim` (`rot_dim % pq_dim == 0`). However, this transform is
+// not necessary when `dim` is multiple of `pq_dim` (`dim == rot_dim`,
+// hence no need in adding "extra" data columns / features). By
+// default, if `dim == rot_dim`, the rotation transform is
+// initialized with the identity matrix. When
+// `force_random_rotation == True`, a random orthogonal transform
+func (p *IndexParams) SetForceRandomRotation(force_random_rotation bool) (*IndexParams, error) {
+	if force_random_rotation {
+		p.params.force_random_rotation = C._Bool(true)
+	} else {
+		p.params.force_random_rotation = C._Bool(false)
+	}
+	return p, nil
+}
+
+// After training the coarse and fine quantizers, we will populate
+// the index with the dataset if add_data_on_build == true, otherwise
+// the index is left empty, and the extend method can be used
+// to add new vectors to the index.
+func (p *IndexParams) SetAddDataOnBuild(add_data_on_build bool) (*IndexParams, error) {
+	if add_data_on_build {
+		p.params.add_data_on_build = C._Bool(true)
+	} else {
+		p.params.add_data_on_build = C._Bool(false)
+	}
+	return p, nil
+}
+
+// Destroys IndexParams
+func (p *IndexParams) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexParamsDestroy(p.params)))
+	if err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/go/ivf_pq/ivf_pq.go b/go/ivf_pq/ivf_pq.go
new file mode 100644
index 0000000000..cbbec629d1
--- /dev/null
+++ b/go/ivf_pq/ivf_pq.go
@@ -0,0 +1,73 @@
+package ivf_pq
+
+// #include <cuvs/neighbors/ivf_pq.h>
+import "C"
+
+import (
+	"errors"
+	"unsafe"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+// IVF PQ Index
+type IvfPqIndex struct {
+	index   C.cuvsIvfPqIndex_t
+	trained bool
+}
+
+// Creates a new empty IvfPqIndex
+func CreateIndex(params *IndexParams, dataset *cuvs.Tensor[float32]) (*IvfPqIndex, error) {
+	var index C.cuvsIvfPqIndex_t
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexCreate(&index)))
+	if err != nil {
+		return nil, err
+	}
+
+	return &IvfPqIndex{index: index}, nil
+}
+
+// Builds an IvfPqIndex from the dataset for efficient search.
+//
+// # Arguments
+//
+// * `Resources` - Resources to use
+// * `params` - Parameters for building the index
+// * `dataset` - A row-major Tensor on either the host or device to index
+// * `index` - IvfPqIndex to build
+func BuildIndex[T any](Resources cuvs.Resource, params *IndexParams, dataset *cuvs.Tensor[T], index *IvfPqIndex) error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqBuild(C.ulong(Resources.Resource), params.params, (*C.DLManagedTensor)(unsafe.Pointer(dataset.C_tensor)), index.index)))
+	if err != nil {
+		return err
+	}
+	index.trained = true
+	return nil
+}
+
+// Destroys the IvfPqIndex
+func (index *IvfPqIndex) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqIndexDestroy(index.index)))
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// Perform a Approximate Nearest Neighbors search on the Index
+//
+// # Arguments
+//
+// * `Resources` - Resources to use
+// * `params` - Parameters to use in searching the index
+// * `index` - IvfPqIndex to search
+// * `queries` - A tensor in device memory to query for
+// * `neighbors` - Tensor in device memory that receives the indices of the nearest neighbors
+// * `distances` - Tensor in device memory that receives the distances of the nearest neighbors
+func SearchIndex[T any](Resources cuvs.Resource, params *SearchParams, index *IvfPqIndex, queries *cuvs.Tensor[T], neighbors *cuvs.Tensor[int64], distances *cuvs.Tensor[T]) error {
+	if !index.trained {
+		return errors.New("index needs to be built before calling search")
+	}
+
+	return cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqSearch(C.cuvsResources_t(Resources.Resource), params.params, index.index, (*C.DLManagedTensor)(unsafe.Pointer(queries.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(neighbors.C_tensor)), (*C.DLManagedTensor)(unsafe.Pointer(distances.C_tensor)))))
+}
diff --git a/go/ivf_pq/ivf_pq_test.go b/go/ivf_pq/ivf_pq_test.go
new file mode 100644
index 0000000000..39b7727a3d
--- /dev/null
+++ b/go/ivf_pq/ivf_pq_test.go
@@ -0,0 +1,123 @@
+package ivf_pq
+
+import (
+	"math/rand/v2"
+	"testing"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+func TestIvfPq(t *testing.T) {
+	const (
+		nDataPoints = 1024
+		nFeatures   = 16
+		nQueries    = 4
+		k           = 4
+		epsilon     = 0.001
+	)
+
+	resource, _ := cuvs.NewResource(nil)
+	defer resource.Close()
+
+	testDataset := make([][]float32, nDataPoints)
+	for i := range testDataset {
+		testDataset[i] = make([]float32, nFeatures)
+		for j := range testDataset[i] {
+			testDataset[i][j] = rand.Float32()
+		}
+	}
+
+	dataset, err := cuvs.NewTensor(testDataset)
+	if err != nil {
+		t.Fatalf("error creating dataset tensor: %v", err)
+	}
+	defer dataset.Close()
+
+	indexParams, err := CreateIndexParams()
+	if err != nil {
+		t.Fatalf("error creating index params: %v", err)
+	}
+	defer indexParams.Close()
+
+	index, _ := CreateIndex(indexParams, &dataset)
+	defer index.Close()
+
+	// use the first 4 points from the dataset as queries : will test that we get them back
+	// as their own nearest neighbor
+	queries, _ := cuvs.NewTensor(testDataset[:nQueries])
+	defer queries.Close()
+
+	neighbors, err := cuvs.NewTensorOnDevice[int64](&resource, []int64{int64(nQueries), int64(k)})
+	if err != nil {
+		t.Fatalf("error creating neighbors tensor: %v", err)
+	}
+	defer neighbors.Close()
+
+	distances, err := cuvs.NewTensorOnDevice[float32](&resource, []int64{int64(nQueries), int64(k)})
+	if err != nil {
+		t.Fatalf("error creating distances tensor: %v", err)
+	}
+	defer distances.Close()
+
+	if _, err := dataset.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving dataset to device: %v", err)
+	}
+
+	if err := BuildIndex(resource, indexParams, &dataset, index); err != nil {
+		t.Fatalf("error building index: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	if _, err := queries.ToDevice(&resource); err != nil {
+		t.Fatalf("error moving queries to device: %v", err)
+	}
+
+	SearchParams, err := CreateSearchParams()
+	if err != nil {
+		t.Fatalf("error creating search params: %v", err)
+	}
+	defer SearchParams.Close()
+
+	err = SearchIndex(resource, SearchParams, index, &queries, &neighbors, &distances)
+	if err != nil {
+		t.Fatalf("error searching index: %v", err)
+	}
+
+	if _, err := neighbors.ToHost(&resource); err != nil {
+		t.Fatalf("error moving neighbors to host: %v", err)
+	}
+
+	if _, err := distances.ToHost(&resource); err != nil {
+		t.Fatalf("error moving distances to host: %v", err)
+	}
+
+	if err := resource.Sync(); err != nil {
+		t.Fatalf("error syncing resource: %v", err)
+	}
+
+	neighborsSlice, err := neighbors.Slice()
+	if err != nil {
+		t.Fatalf("error getting neighbors slice: %v", err)
+	}
+
+	for i := range neighborsSlice {
+		println(neighborsSlice[i][0])
+		if neighborsSlice[i][0] != int64(i) {
+			t.Error("wrong neighbor, expected", i, "got", neighborsSlice[i][0])
+		}
+	}
+
+	distancesSlice, err := distances.Slice()
+	if err != nil {
+		t.Fatalf("error getting distances slice: %v", err)
+	}
+
+	for i := range distancesSlice {
+		if distancesSlice[i][0] >= epsilon || distancesSlice[i][0] <= -epsilon {
+			t.Error("distance should be close to 0, got", distancesSlice[i][0])
+		}
+	}
+}
diff --git a/go/ivf_pq/search_params.go b/go/ivf_pq/search_params.go
new file mode 100644
index 0000000000..190ca7036e
--- /dev/null
+++ b/go/ivf_pq/search_params.go
@@ -0,0 +1,107 @@
+package ivf_pq
+
+// #include <cuvs/neighbors/ivf_pq.h>
+import "C"
+
+import (
+	"errors"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+// Supplemental parameters to search IVF PQ Index
+type SearchParams struct {
+	params C.cuvsIvfPqSearchParams_t
+}
+
+type lutDtype int
+
+const (
+	Lut_Uint8 lutDtype = iota
+	Lut_Uint16
+	Lut_Uint32
+	Lut_Uint64
+	Lut_Int8
+	Lut_Int16
+	Lut_Int32
+	Lut_Int64
+)
+
+var cLutDtypes = map[lutDtype]int{
+	Lut_Uint8:  C.CUDA_R_8U,
+	Lut_Uint16: C.CUDA_R_16U,
+	Lut_Uint32: C.CUDA_R_32U,
+	Lut_Uint64: C.CUDA_R_64U,
+	Lut_Int8:   C.CUDA_R_8I,
+	Lut_Int16:  C.CUDA_R_16I,
+	Lut_Int32:  C.CUDA_R_32I,
+	Lut_Int64:  C.CUDA_R_64I,
+}
+
+type internalDistanceDtype int
+
+const (
+	InternalDistance_Float32 internalDistanceDtype = iota
+	InternalDistance_Float64
+)
+
+var CInternalDistanceDtypes = map[internalDistanceDtype]int{
+	InternalDistance_Float32: C.CUDA_R_32F,
+	InternalDistance_Float64: C.CUDA_R_64F,
+}
+
+// Creates a new SearchParams
+func CreateSearchParams() (*SearchParams, error) {
+	var params C.cuvsIvfPqSearchParams_t
+
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqSearchParamsCreate(&params)))
+	if err != nil {
+		return nil, err
+	}
+
+	return &SearchParams{params: params}, nil
+}
+
+// The number of clusters to search.
+func (p *SearchParams) SetNProbes(n_probes uint32) (*SearchParams, error) {
+	p.params.n_probes = C.uint32_t(n_probes)
+	return p, nil
+}
+
+// Data type of look up table to be created dynamically at search
+// time. The use of low-precision types reduces the amount of shared
+// memory required at search time, so fast shared memory kernels can
+// be used even for datasets with large dimansionality. Note that
+// the recall is slightly degraded when low-precision type is
+// selected.
+func (p *SearchParams) SetLutDtype(lut_dtype lutDtype) (*SearchParams, error) {
+	CLutDtype, exists := cLutDtypes[lutDtype(lut_dtype)]
+
+	if !exists {
+		return nil, errors.New("cuvs: invalid lut_dtype")
+	}
+	p.params.lut_dtype = C.cudaDataType_t(CLutDtype)
+
+	return p, nil
+}
+
+// Storage data type for distance/similarity computation.
+func (p *SearchParams) SetInternalDistanceDtype(internal_distance_dtype internalDistanceDtype) (*SearchParams, error) {
+	CInternalDistanceDtype, exists := CInternalDistanceDtypes[internalDistanceDtype(internal_distance_dtype)]
+
+	if !exists {
+		return nil, errors.New("cuvs: invalid internal_distance_dtype")
+	}
+	p.params.internal_distance_dtype = C.cudaDataType_t(CInternalDistanceDtype)
+
+	return p, nil
+}
+
+// Destroys SearchParams
+func (p *SearchParams) Close() error {
+	err := cuvs.CheckCuvs(cuvs.CuvsError(C.cuvsIvfPqSearchParamsDestroy(p.params)))
+	if err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/go/memory_resource.go b/go/memory_resource.go
new file mode 100644
index 0000000000..df042c821d
--- /dev/null
+++ b/go/memory_resource.go
@@ -0,0 +1,93 @@
+package cuvs
+
+// #include <cuvs/core/c_api.h>
+import "C"
+
+import (
+	"runtime"
+)
+
+type CuvsMemoryCommand int
+
+const (
+	CuvsMemoryNew = iota
+	CuvsMemoryRelease
+)
+
+type CuvsPoolMemory struct {
+	ch                        chan CuvsMemoryCommand
+	errCh                     chan error
+	initial_pool_size_percent int
+	max_pool_size_percent     int
+	managed                   bool
+}
+
+// Creates new CuvsPoolMemory struct
+// initial_pool_size_percent is the initial size of the pool in percent of total available device memory
+// max_pool_size_percent is the maximum size of the pool in percent of total available device memory
+// managed is whether to use CUDA managed memory
+func NewCuvsPoolMemory(initial_pool_size_percent int, max_pool_size_percent int, managed bool) (*CuvsPoolMemory, error) {
+	c := CuvsPoolMemory{
+		ch:                        make(chan CuvsMemoryCommand),
+		errCh:                     make(chan error),
+		initial_pool_size_percent: initial_pool_size_percent,
+		max_pool_size_percent:     max_pool_size_percent,
+		managed:                   managed,
+	}
+
+	c.start()
+	c.ch <- CuvsMemoryNew
+
+	if err := <-c.errCh; err != nil {
+		return nil, err
+	}
+
+	return &c, nil
+}
+
+// Enables pool memory
+func (m *CuvsPoolMemory) start() {
+	go func() {
+		runtime.LockOSThread()
+		defer runtime.UnlockOSThread()
+
+		for command := range m.ch {
+			var err error
+			switch command {
+			case CuvsMemoryNew:
+				err = CheckCuvs(CuvsError(C.cuvsRMMPoolMemoryResourceEnable(
+					C.int(m.initial_pool_size_percent),
+					C.int(m.max_pool_size_percent),
+					C._Bool(m.managed))))
+				m.errCh <- err
+
+			case CuvsMemoryRelease:
+				err = CheckCuvs(CuvsError(C.cuvsRMMMemoryResourceReset()))
+				m.errCh <- err
+			}
+		}
+	}()
+}
+
+// Disables pool memory
+func (m *CuvsPoolMemory) Close() error {
+	m.ch <- CuvsMemoryRelease
+	err := <-m.errCh
+	close(m.ch)
+	close(m.errCh)
+	return err
+}
+
+func Example() error {
+	mem, err := NewCuvsPoolMemory(60, 100, false)
+	if err != nil {
+		return err
+	}
+
+	err = mem.Close()
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/go/memory_resource_test.go b/go/memory_resource_test.go
new file mode 100644
index 0000000000..e44c20485e
--- /dev/null
+++ b/go/memory_resource_test.go
@@ -0,0 +1,19 @@
+package cuvs_test
+
+import (
+	"testing"
+
+	cuvs "github.com/rapidsai/cuvs/go"
+)
+
+func TestMemoryResource(t *testing.T) {
+	mem, err := cuvs.NewCuvsPoolMemory(60, 100, false)
+	if err != nil {
+		t.Fatal("Failed to create memory resource:", err)
+	}
+
+	err = mem.Close()
+	if err != nil {
+		t.Fatal("Failed to close memory resource:", err)
+	}
+}
diff --git a/go/resources.go b/go/resources.go
new file mode 100644
index 0000000000..562aa7b7fc
--- /dev/null
+++ b/go/resources.go
@@ -0,0 +1,57 @@
+package cuvs
+
+// #include <cuvs/core/c_api.h>
+import "C"
+
+type cuvsResource C.cuvsResources_t
+
+// Resources are objects that are shared between function calls,
+// and includes things like CUDA streams, cuBLAS handles and other
+// resources that are expensive to create.
+type Resource struct {
+	Resource C.cuvsResources_t
+}
+
+// Returns a new Resource object
+func NewResource(stream C.cudaStream_t) (Resource, error) {
+	res := C.cuvsResources_t(0)
+	err := CheckCuvs(CuvsError(C.cuvsResourcesCreate(&res)))
+	if err != nil {
+		return Resource{}, err
+	}
+
+	if stream != nil {
+		err := CheckCuvs(CuvsError(C.cuvsStreamSet(res, stream)))
+		if err != nil {
+			return Resource{}, err
+		}
+	}
+
+	return Resource{Resource: res}, nil
+}
+
+// Syncs the current cuda stream
+func (r Resource) Sync() error {
+	return CheckCuvs(CuvsError(C.cuvsStreamSync(r.Resource)))
+}
+
+// Gets the current cuda stream
+func (r Resource) GetCudaStream() (C.cudaStream_t, error) {
+	var stream C.cudaStream_t
+
+	err := CheckCuvs(CuvsError(C.cuvsStreamGet(r.Resource, &stream)))
+	if err != nil {
+		return C.cudaStream_t(nil), err
+	}
+
+	return stream, nil
+}
+
+func (r Resource) Close() error {
+	err := CheckCuvs(CuvsError(C.cuvsResourcesDestroy(r.Resource)))
+	if err != nil {
+		return err
+	}
+
+	return nil
+}