diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java index 736f974e2b..f9e3b9ba93 100644 --- a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/BruteForceIndexImpl.java @@ -20,14 +20,14 @@ import static com.nvidia.cuvs.internal.common.LinkerHelper.C_INT_BYTE_SIZE; import static com.nvidia.cuvs.internal.common.LinkerHelper.C_LONG; import static com.nvidia.cuvs.internal.common.LinkerHelper.C_LONG_BYTE_SIZE; -import static com.nvidia.cuvs.internal.common.LinkerHelper.C_POINTER; -import static com.nvidia.cuvs.internal.common.Util.CudaMemcpyKind.*; +import static com.nvidia.cuvs.internal.common.Util.CudaMemcpyKind.HOST_TO_DEVICE; +import static com.nvidia.cuvs.internal.common.Util.CudaMemcpyKind.INFER_DIRECTION; +import static com.nvidia.cuvs.internal.common.Util.allocateRMMSegment; import static com.nvidia.cuvs.internal.common.Util.buildMemorySegment; import static com.nvidia.cuvs.internal.common.Util.checkCuVSError; import static com.nvidia.cuvs.internal.common.Util.concatenate; import static com.nvidia.cuvs.internal.common.Util.cudaMemcpy; import static com.nvidia.cuvs.internal.common.Util.prepareTensor; -import static com.nvidia.cuvs.internal.panama.headers_h.cudaStream_t; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsBruteForceBuild; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsBruteForceDeserialize; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsBruteForceIndexCreate; @@ -35,9 +35,7 @@ import static com.nvidia.cuvs.internal.panama.headers_h.cuvsBruteForceIndex_t; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsBruteForceSearch; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsBruteForceSerialize; -import static com.nvidia.cuvs.internal.panama.headers_h.cuvsRMMAlloc; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsRMMFree; -import static com.nvidia.cuvs.internal.panama.headers_h.cuvsStreamGet; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsStreamSync; import static com.nvidia.cuvs.internal.panama.headers_h.omp_set_num_threads; @@ -130,6 +128,9 @@ public void destroyIndex() { bruteForceIndexReference.datasetBytes); checkCuVSError(returnValue, "cuvsRMMFree"); } + if (bruteForceIndexReference.tensorDataArena != null) { + bruteForceIndexReference.tensorDataArena.close(); + } } finally { destroyed = true; } @@ -143,49 +144,41 @@ public void destroyIndex() { * index */ private IndexReference build(DatasetImpl dataset, BruteForceIndexParams bruteForceIndexParams) { - try (var localArena = Arena.ofConfined()) { - long rows = dataset.size(); - long cols = dataset.dimensions(); - - Arena arena = resources.getArena(); - MemorySegment datasetMemSegment = dataset.asMemorySegment(); - - long cuvsResources = resources.getHandle(); - - omp_set_num_threads(bruteForceIndexParams.getNumWriterThreads()); + long rows = dataset.size(); + long cols = dataset.dimensions(); - MemorySegment datasetMemorySegment = localArena.allocate(C_POINTER); + MemorySegment datasetMemSegment = dataset.asMemorySegment(); - long datasetBytes = C_FLOAT_BYTE_SIZE * rows * cols; - var returnValue = cuvsRMMAlloc(cuvsResources, datasetMemorySegment, datasetBytes); - checkCuVSError(returnValue, "cuvsRMMAlloc"); + long cuvsResources = resources.getHandle(); - // IMPORTANT: this should only come AFTER cuvsRMMAlloc call - MemorySegment datasetMemorySegmentP = datasetMemorySegment.get(C_POINTER, 0); + omp_set_num_threads(bruteForceIndexParams.getNumWriterThreads()); + long datasetBytes = C_FLOAT_BYTE_SIZE * rows * cols; + MemorySegment datasetMemorySegmentP = allocateRMMSegment(cuvsResources, datasetBytes); - cudaMemcpy(datasetMemorySegmentP, datasetMemSegment, datasetBytes, INFER_DIRECTION); + cudaMemcpy(datasetMemorySegmentP, datasetMemSegment, datasetBytes, INFER_DIRECTION); - long[] datasetShape = {rows, cols}; - MemorySegment datasetTensor = - prepareTensor(arena, datasetMemorySegmentP, datasetShape, 2, 32, 2, 2, 1); + long[] datasetShape = {rows, cols}; + var tensorDataArena = Arena.ofShared(); + MemorySegment datasetTensor = + prepareTensor(tensorDataArena, datasetMemorySegmentP, datasetShape, 2, 32, 2, 2, 1); - var indexReference = - new IndexReference(datasetMemorySegmentP, datasetBytes, createBruteForceIndex()); + var indexReference = + new IndexReference( + datasetMemorySegmentP, datasetBytes, tensorDataArena, createBruteForceIndex()); - returnValue = cuvsStreamSync(cuvsResources); - checkCuVSError(returnValue, "cuvsStreamSync"); + var returnValue = cuvsStreamSync(cuvsResources); + checkCuVSError(returnValue, "cuvsStreamSync"); - returnValue = - cuvsBruteForceBuild(cuvsResources, datasetTensor, 0, 0.0f, indexReference.indexPtr); - checkCuVSError(returnValue, "cuvsBruteForceBuild"); + returnValue = + cuvsBruteForceBuild(cuvsResources, datasetTensor, 0, 0.0f, indexReference.indexPtr); + checkCuVSError(returnValue, "cuvsBruteForceBuild"); - returnValue = cuvsStreamSync(cuvsResources); - checkCuVSError(returnValue, "cuvsStreamSync"); + returnValue = cuvsStreamSync(cuvsResources); + checkCuVSError(returnValue, "cuvsStreamSync"); - omp_set_num_threads(1); + omp_set_num_threads(1); - return indexReference; - } + return indexReference; } /** @@ -203,12 +196,11 @@ public SearchResults search(BruteForceQuery cuvsQuery) throws Throwable { long numQueries = cuvsQuery.getQueryVectors().length; long numBlocks = cuvsQuery.getTopK() * numQueries; int vectorDimension = numQueries > 0 ? cuvsQuery.getQueryVectors()[0].length : 0; - Arena arena = resources.getArena(); SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_LONG); SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_FLOAT); - MemorySegment neighborsMemorySegment = arena.allocate(neighborsSequenceLayout); - MemorySegment distancesMemorySegment = arena.allocate(distancesSequenceLayout); + MemorySegment neighborsMemorySegment = localArena.allocate(neighborsSequenceLayout); + MemorySegment distancesMemorySegment = localArena.allocate(distancesSequenceLayout); // prepare the prefiltering data long prefilterDataLength = 0; @@ -217,52 +209,38 @@ public SearchResults search(BruteForceQuery cuvsQuery) throws Throwable { if (prefilters != null && prefilters.length > 0) { BitSet concatenatedFilters = concatenate(prefilters, cuvsQuery.getNumDocs()); long[] filters = concatenatedFilters.toLongArray(); - prefilterDataMemorySegment = buildMemorySegment(arena, filters); + prefilterDataMemorySegment = buildMemorySegment(localArena, filters); prefilterDataLength = (long) cuvsQuery.getNumDocs() * prefilters.length; } - MemorySegment querySeg = buildMemorySegment(arena, cuvsQuery.getQueryVectors()); + MemorySegment querySeg = buildMemorySegment(localArena, cuvsQuery.getQueryVectors()); int topk = cuvsQuery.getTopK(); long cuvsResources = resources.getHandle(); - MemorySegment stream = arena.allocate(cudaStream_t); - var returnValue = cuvsStreamGet(cuvsResources, stream); - checkCuVSError(returnValue, "cuvsStreamGet"); - - MemorySegment queriesD = localArena.allocate(C_POINTER); - MemorySegment neighborsD = localArena.allocate(C_POINTER); - MemorySegment distancesD = localArena.allocate(C_POINTER); long queriesBytes = C_FLOAT_BYTE_SIZE * numQueries * vectorDimension; long neighborsBytes = C_LONG_BYTE_SIZE * numQueries * topk; long distanceBytes = C_FLOAT_BYTE_SIZE * numQueries * topk; long prefilterBytes = 0; // size assigned later - returnValue = cuvsRMMAlloc(cuvsResources, queriesD, queriesBytes); - checkCuVSError(returnValue, "cuvsRMMAlloc"); - returnValue = cuvsRMMAlloc(cuvsResources, neighborsD, neighborsBytes); - checkCuVSError(returnValue, "cuvsRMMAlloc"); - returnValue = cuvsRMMAlloc(cuvsResources, distancesD, distanceBytes); - checkCuVSError(returnValue, "cuvsRMMAlloc"); - - // IMPORTANT: these three should only come AFTER cuvsRMMAlloc calls - MemorySegment queriesDP = queriesD.get(C_POINTER, 0); - MemorySegment neighborsDP = neighborsD.get(C_POINTER, 0); - MemorySegment distancesDP = distancesD.get(C_POINTER, 0); + MemorySegment queriesDP = allocateRMMSegment(cuvsResources, queriesBytes); + MemorySegment neighborsDP = allocateRMMSegment(cuvsResources, neighborsBytes); + MemorySegment distancesDP = allocateRMMSegment(cuvsResources, distanceBytes); MemorySegment prefilterDP = MemorySegment.NULL; cudaMemcpy(queriesDP, querySeg, queriesBytes, INFER_DIRECTION); long[] queriesShape = {numQueries, vectorDimension}; - MemorySegment queriesTensor = prepareTensor(arena, queriesDP, queriesShape, 2, 32, 2, 2, 1); + MemorySegment queriesTensor = + prepareTensor(localArena, queriesDP, queriesShape, 2, 32, 2, 2, 1); long[] neighborsShape = {numQueries, topk}; MemorySegment neighborsTensor = - prepareTensor(arena, neighborsDP, neighborsShape, 0, 64, 2, 2, 1); + prepareTensor(localArena, neighborsDP, neighborsShape, 0, 64, 2, 2, 1); long[] distancesShape = {numQueries, topk}; MemorySegment distancesTensor = - prepareTensor(arena, distancesDP, distancesShape, 2, 32, 2, 2, 1); + prepareTensor(localArena, distancesDP, distancesShape, 2, 32, 2, 2, 1); - MemorySegment prefilter = cuvsFilter.allocate(arena); + MemorySegment prefilter = cuvsFilter.allocate(localArena); MemorySegment prefilterTensor; if (prefilterDataMemorySegment == MemorySegment.NULL) { @@ -270,24 +248,20 @@ public SearchResults search(BruteForceQuery cuvsQuery) throws Throwable { cuvsFilter.addr(prefilter, 0); } else { long[] prefilterShape = {(prefilterDataLength + 31) / 32}; - - MemorySegment prefilterD = localArena.allocate(C_POINTER); long prefilterLen = prefilterShape[0]; prefilterBytes = C_INT_BYTE_SIZE * prefilterLen; - returnValue = cuvsRMMAlloc(cuvsResources, prefilterD, prefilterBytes); - checkCuVSError(returnValue, "cuvsRMMAlloc"); - prefilterDP = prefilterD.get(C_POINTER, 0); + prefilterDP = allocateRMMSegment(cuvsResources, prefilterBytes); cudaMemcpy(prefilterDP, prefilterDataMemorySegment, prefilterBytes, HOST_TO_DEVICE); - prefilterTensor = prepareTensor(arena, prefilterDP, prefilterShape, 1, 32, 1, 2, 1); + prefilterTensor = prepareTensor(localArena, prefilterDP, prefilterShape, 1, 32, 1, 2, 1); cuvsFilter.type(prefilter, 2); cuvsFilter.addr(prefilter, prefilterTensor.address()); } - returnValue = cuvsStreamSync(cuvsResources); + var returnValue = cuvsStreamSync(cuvsResources); checkCuVSError(returnValue, "cuvsStreamSync"); returnValue = @@ -361,13 +335,12 @@ private static MemorySegment createBruteForceIndex() { try (var localArena = Arena.ofConfined()) { MemorySegment indexPtrPtr = localArena.allocate(cuvsBruteForceIndex_t); // cuvsBruteForceIndexCreate gets a pointer to a cuvsBruteForceIndex_t, which is defined as a - // pointer to - // cuvsBruteForceIndex. - // It's basically a "out" parameter: the C functions will create the index and "return back" a - // pointer to it. + // pointer to cuvsBruteForceIndex. + // It's basically an "out" parameter: the C functions will create the index and "return back" + // a pointer to it. // The "out parameter" pointer is needed only for the duration of the function invocation (it - // could be a stack - // pointer, in C) so we allocate it from our localArena, unwrap it and return it. + // could be a stack pointer, in C) so we allocate it from our localArena, unwrap it and return + // it. var returnValue = cuvsBruteForceIndexCreate(indexPtrPtr); checkCuVSError(returnValue, "cuvsBruteForceIndexCreate"); return indexPtrPtr.get(cuvsBruteForceIndex_t, 0); @@ -498,23 +471,31 @@ public BruteForceIndexImpl build() throws Throwable { } /** - * Holds the memory reference to a BRUTEFORCE index and its associated dataset + * Holds the memory reference to a BRUTEFORCE index, its associated dataset, and the arena used to allocate + * input data */ private static class IndexReference { private final MemorySegment datasetPtr; private final long datasetBytes; + private final Arena tensorDataArena; private final MemorySegment indexPtr; - private IndexReference(MemorySegment datasetPtr, long datasetBytes, MemorySegment indexPtr) { + private IndexReference( + MemorySegment datasetPtr, + long datasetBytes, + Arena tensorDataArena, + MemorySegment indexPtr) { this.datasetPtr = datasetPtr; this.datasetBytes = datasetBytes; + this.tensorDataArena = tensorDataArena; this.indexPtr = indexPtr; } private IndexReference(MemorySegment indexPtr) { this.datasetPtr = MemorySegment.NULL; this.datasetBytes = 0; + this.tensorDataArena = null; this.indexPtr = indexPtr; } } diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java index a408f020d2..49be7104e8 100644 --- a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CagraIndexImpl.java @@ -19,14 +19,14 @@ import static com.nvidia.cuvs.internal.common.LinkerHelper.C_FLOAT_BYTE_SIZE; import static com.nvidia.cuvs.internal.common.LinkerHelper.C_INT; import static com.nvidia.cuvs.internal.common.LinkerHelper.C_INT_BYTE_SIZE; -import static com.nvidia.cuvs.internal.common.LinkerHelper.C_POINTER; -import static com.nvidia.cuvs.internal.common.Util.CudaMemcpyKind.*; +import static com.nvidia.cuvs.internal.common.Util.CudaMemcpyKind.HOST_TO_DEVICE; +import static com.nvidia.cuvs.internal.common.Util.CudaMemcpyKind.INFER_DIRECTION; +import static com.nvidia.cuvs.internal.common.Util.allocateRMMSegment; import static com.nvidia.cuvs.internal.common.Util.buildMemorySegment; import static com.nvidia.cuvs.internal.common.Util.checkCuVSError; import static com.nvidia.cuvs.internal.common.Util.concatenate; import static com.nvidia.cuvs.internal.common.Util.cudaMemcpy; import static com.nvidia.cuvs.internal.common.Util.prepareTensor; -import static com.nvidia.cuvs.internal.panama.headers_h.cudaStream_t; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsCagraBuild; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsCagraDeserialize; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsCagraIndexCreate; @@ -36,9 +36,7 @@ import static com.nvidia.cuvs.internal.panama.headers_h.cuvsCagraSearch; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsCagraSerialize; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsCagraSerializeToHnswlib; -import static com.nvidia.cuvs.internal.panama.headers_h.cuvsRMMAlloc; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsRMMFree; -import static com.nvidia.cuvs.internal.panama.headers_h.cuvsStreamGet; import static com.nvidia.cuvs.internal.panama.headers_h.cuvsStreamSync; import static com.nvidia.cuvs.internal.panama.headers_h.omp_set_num_threads; @@ -182,7 +180,7 @@ private IndexReference build(CagraIndexParams indexParameters, DatasetImpl datas long[] datasetShape = {rows, cols}; MemorySegment datasetTensor = - prepareTensor(resources.getArena(), dataSeg, datasetShape, 2, 32, 2, 2, 1); + prepareTensor(localArena, dataSeg, datasetShape, 2, 32, 2, 2, 1); var index = createCagraIndex(); @@ -243,55 +241,39 @@ public SearchResults search(CagraQuery query) throws Throwable { long numQueries = query.getQueryVectors().length; long numBlocks = topK * numQueries; int vectorDimension = numQueries > 0 ? query.getQueryVectors()[0].length : 0; - Arena arena = resources.getArena(); SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_INT); SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_FLOAT); - MemorySegment neighborsMemorySegment = arena.allocate(neighborsSequenceLayout); - MemorySegment distancesMemorySegment = arena.allocate(distancesSequenceLayout); - MemorySegment floatsSeg = buildMemorySegment(arena, query.getQueryVectors()); + MemorySegment neighborsMemorySegment = localArena.allocate(neighborsSequenceLayout); + MemorySegment distancesMemorySegment = localArena.allocate(distancesSequenceLayout); + MemorySegment floatsSeg = buildMemorySegment(localArena, query.getQueryVectors()); long cuvsRes = resources.getHandle(); - MemorySegment stream = arena.allocate(cudaStream_t); - int returnValue = cuvsStreamGet(cuvsRes, stream); - checkCuVSError(returnValue, "cuvsStreamGet"); - - MemorySegment queriesD = arena.allocate(C_POINTER); - MemorySegment neighborsD = arena.allocate(C_POINTER); - MemorySegment distancesD = arena.allocate(C_POINTER); long queriesBytes = C_FLOAT_BYTE_SIZE * numQueries * vectorDimension; long neighborsBytes = C_INT_BYTE_SIZE * numQueries * topK; long distancesBytes = C_FLOAT_BYTE_SIZE * numQueries * topK; long prefilterBytes = 0; // size assigned later - returnValue = cuvsRMMAlloc(cuvsRes, queriesD, queriesBytes); - checkCuVSError(returnValue, "cuvsRMMAlloc"); - returnValue = cuvsRMMAlloc(cuvsRes, neighborsD, neighborsBytes); - checkCuVSError(returnValue, "cuvsRMMAlloc"); - returnValue = cuvsRMMAlloc(cuvsRes, distancesD, distancesBytes); - checkCuVSError(returnValue, "cuvsRMMAlloc"); - - // IMPORTANT: these three should only come AFTER cuvsRMMAlloc calls - MemorySegment queriesDP = queriesD.get(C_POINTER, 0); - MemorySegment neighborsDP = neighborsD.get(C_POINTER, 0); - MemorySegment distancesDP = distancesD.get(C_POINTER, 0); - MemorySegment prefilterD = arena.allocate(C_POINTER); + MemorySegment queriesDP = allocateRMMSegment(cuvsRes, queriesBytes); + MemorySegment neighborsDP = allocateRMMSegment(cuvsRes, neighborsBytes); + MemorySegment distancesDP = allocateRMMSegment(cuvsRes, distancesBytes); MemorySegment prefilterDP = MemorySegment.NULL; long prefilterLen = 0; cudaMemcpy(queriesDP, floatsSeg, queriesBytes, INFER_DIRECTION); long[] queriesShape = {numQueries, vectorDimension}; - MemorySegment queriesTensor = prepareTensor(arena, queriesDP, queriesShape, 2, 32, 2, 2, 1); + MemorySegment queriesTensor = + prepareTensor(localArena, queriesDP, queriesShape, 2, 32, 2, 2, 1); long[] neighborsShape = {numQueries, topK}; MemorySegment neighborsTensor = - prepareTensor(arena, neighborsDP, neighborsShape, 1, 32, 2, 2, 1); + prepareTensor(localArena, neighborsDP, neighborsShape, 1, 32, 2, 2, 1); long[] distancesShape = {numQueries, topK}; MemorySegment distancesTensor = - prepareTensor(arena, distancesDP, distancesShape, 2, 32, 2, 2, 1); + prepareTensor(localArena, distancesDP, distancesShape, 2, 32, 2, 2, 1); - returnValue = cuvsStreamSync(cuvsRes); + var returnValue = cuvsStreamSync(cuvsRes); checkCuVSError(returnValue, "cuvsStreamSync"); // prepare the prefiltering data @@ -302,11 +284,11 @@ public SearchResults search(CagraQuery query) throws Throwable { prefilters = new BitSet[] {query.getPrefilter()}; BitSet concatenatedFilters = concatenate(prefilters, query.getNumDocs()); long[] filters = concatenatedFilters.toLongArray(); - prefilterDataMemorySegment = buildMemorySegment(arena, filters); + prefilterDataMemorySegment = buildMemorySegment(localArena, filters); prefilterDataLength = query.getNumDocs() * prefilters.length; } - MemorySegment prefilter = cuvsFilter.allocate(arena); + MemorySegment prefilter = cuvsFilter.allocate(localArena); MemorySegment prefilterTensor; if (prefilterDataMemorySegment == MemorySegment.NULL) { @@ -317,14 +299,11 @@ public SearchResults search(CagraQuery query) throws Throwable { prefilterLen = prefilterShape[0]; prefilterBytes = C_INT_BYTE_SIZE * prefilterLen; - returnValue = cuvsRMMAlloc(cuvsRes, prefilterD, prefilterBytes); - checkCuVSError(returnValue, "cuvsRMMAlloc"); - - prefilterDP = prefilterD.get(C_POINTER, 0); + prefilterDP = allocateRMMSegment(cuvsRes, prefilterBytes); cudaMemcpy(prefilterDP, prefilterDataMemorySegment, prefilterBytes, HOST_TO_DEVICE); - prefilterTensor = prepareTensor(arena, prefilterDP, prefilterShape, 1, 32, 1, 2, 1); + prefilterTensor = prepareTensor(localArena, prefilterDP, prefilterShape, 1, 32, 1, 2, 1); cuvsFilter.type(prefilter, 1); cuvsFilter.addr(prefilter, prefilterTensor.address()); @@ -336,7 +315,7 @@ public SearchResults search(CagraQuery query) throws Throwable { returnValue = cuvsCagraSearch( cuvsRes, - segmentFromSearchParams(query.getCagraSearchParameters()), + segmentFromSearchParams(localArena, query.getCagraSearchParameters()), cagraIndexReference.getMemorySegment(), queriesTensor, neighborsTensor, @@ -357,8 +336,10 @@ public SearchResults search(CagraQuery query) throws Throwable { returnValue = cuvsRMMFree(cuvsRes, queriesDP, queriesBytes); checkCuVSError(returnValue, "cuvsRMMFree"); - returnValue = cuvsRMMFree(cuvsRes, prefilterDP, C_INT_BYTE_SIZE * prefilterBytes); - checkCuVSError(returnValue, "cuvsRMMFree"); + if (prefilterLen > 0) { + returnValue = cuvsRMMFree(cuvsRes, prefilterDP, C_INT_BYTE_SIZE * prefilterBytes); + checkCuVSError(returnValue, "cuvsRMMFree"); + } return CagraSearchResults.create( neighborsSequenceLayout, @@ -432,12 +413,15 @@ public void serializeToHNSW(OutputStream outputStream, Path tempFile, int buffer throws Throwable { checkNotDestroyed(); tempFile = tempFile.toAbsolutePath(); - MemorySegment pathSeg = buildMemorySegment(resources.getArena(), tempFile.toString()); - long cuvsRes = resources.getHandle(); - int returnValue = - cuvsCagraSerializeToHnswlib(cuvsRes, pathSeg, cagraIndexReference.getMemorySegment()); - checkCuVSError(returnValue, "cuvsCagraSerializeToHnswlib"); + try (var localArena = Arena.ofConfined()) { + MemorySegment pathSeg = buildMemorySegment(localArena, tempFile.toString()); + + long cuvsRes = resources.getHandle(); + int returnValue = + cuvsCagraSerializeToHnswlib(cuvsRes, pathSeg, cagraIndexReference.getMemorySegment()); + checkCuVSError(returnValue, "cuvsCagraSerializeToHnswlib"); + } try (FileInputStream fileInputStream = new FileInputStream(tempFile.toFile())) { byte[] chunk = new byte[bufferLength]; @@ -576,8 +560,8 @@ private static MemorySegment segmentFromIndexParams(Arena arena, CagraIndexParam /** * Allocates the configured search parameters in the MemorySegment. */ - private MemorySegment segmentFromSearchParams(CagraSearchParams params) { - MemorySegment seg = cuvsCagraSearchParams.allocate(resources.getArena()); + private MemorySegment segmentFromSearchParams(Arena arena, CagraSearchParams params) { + MemorySegment seg = cuvsCagraSearchParams.allocate(arena); cuvsCagraSearchParams.max_queries(seg, params.getMaxQueries()); cuvsCagraSearchParams.itopk_size(seg, params.getITopKSize()); cuvsCagraSearchParams.max_iterations(seg, params.getMaxIterations()); @@ -627,9 +611,9 @@ public static CagraIndex merge(CagraIndex[] indexes, CagraMergeParams mergeParam var mergedIndex = createCagraIndex(); long cuvsRes = resources.getHandle(); - try (var arena = Arena.ofConfined()) { + try (var localArena = Arena.ofConfined()) { MemorySegment indexesSegment = - arena.allocate(indexes.length * ValueLayout.ADDRESS.byteSize()); + localArena.allocate(indexes.length * ValueLayout.ADDRESS.byteSize()); for (int i = 0; i < indexes.length; i++) { CagraIndexImpl indexImpl = (CagraIndexImpl) indexes[i]; @@ -637,8 +621,11 @@ public static CagraIndex merge(CagraIndex[] indexes, CagraMergeParams mergeParam ValueLayout.ADDRESS, i, indexImpl.cagraIndexReference.getMemorySegment()); } - MemorySegment mergeParamsSegment = createMergeParamsSegment(mergeParams, resources); - int returnValue = + // TODO: we should call cuvsCreateMergeParams here, instead of allocating this ourselves + // See https://github.com/rapidsai/cuvs/pull/1109 + var mergeParamsSegment = createMergeParamsSegment(localArena, mergeParams); + + var returnValue = cuvsCagraMerge(cuvsRes, mergeParamsSegment, indexesSegment, indexes.length, mergedIndex); checkCuVSError(returnValue, "cuvsCagraMerge"); @@ -648,15 +635,13 @@ public static CagraIndex merge(CagraIndex[] indexes, CagraMergeParams mergeParam } /** - * Creates a memory segment for merge parameters. + * Creates (allocates and fill) native memory version of merge parameters. * + * @param arena The arena to use to allocate the MemorySegment(s) that will hold the merge parameters data * @param mergeParams The merge parameters - * @param resources The CuVS resources - * @return A memory segment with the merge parameters + * @return A memory segment pointing to the native structure for merge parameters */ - private static MemorySegment createMergeParamsSegment( - CagraMergeParams mergeParams, CuVSResourcesImpl resources) { - var arena = resources.getArena(); + private static MemorySegment createMergeParamsSegment(Arena arena, CagraMergeParams mergeParams) { MemorySegment seg = cuvsCagraMergeParams.allocate(arena); if (mergeParams != null) { diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CuVSResourcesImpl.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CuVSResourcesImpl.java index c3095e8304..5acb2cb9bd 100644 --- a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CuVSResourcesImpl.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/CuVSResourcesImpl.java @@ -32,7 +32,6 @@ public class CuVSResourcesImpl implements CuVSResources { private final Path tempDirectory; - private final Arena arena; private final long resourceHandle; private boolean destroyed; @@ -42,7 +41,6 @@ public class CuVSResourcesImpl implements CuVSResources { */ public CuVSResourcesImpl(Path tempDirectory) { this.tempDirectory = tempDirectory; - this.arena = Arena.ofShared(); try (var localArena = Arena.ofConfined()) { var resourcesMemorySegment = localArena.allocate(cuvsResources_t); int returnValue = cuvsResourcesCreate(resourcesMemorySegment); @@ -58,7 +56,6 @@ public void close() { int returnValue = cuvsResourcesDestroy(resourceHandle); checkCuVSError(returnValue, "cuvsResourcesDestroy"); destroyed = true; - arena.close(); } } @@ -82,12 +79,4 @@ long getHandle() { checkNotDestroyed(); return resourceHandle; } - - /** - * The allocation arena used by this resources. - */ - protected Arena getArena() { - checkNotDestroyed(); - return arena; - } } diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswIndexImpl.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswIndexImpl.java index c7ddfa4840..178ed21b87 100644 --- a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswIndexImpl.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/HnswIndexImpl.java @@ -98,24 +98,24 @@ public SearchResults search(HnswQuery query) throws Throwable { int numQueries = queryVectors.length; long numBlocks = (long) topK * numQueries; int vectorDimension = numQueries > 0 ? queryVectors[0].length : 0; - Arena arena = resources.getArena(); SequenceLayout neighborsSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_LONG); SequenceLayout distancesSequenceLayout = MemoryLayout.sequenceLayout(numBlocks, C_FLOAT); - MemorySegment neighborsMemorySegment = arena.allocate(neighborsSequenceLayout); - MemorySegment distancesMemorySegment = arena.allocate(distancesSequenceLayout); - MemorySegment querySeg = buildMemorySegment(arena, queryVectors); + MemorySegment neighborsMemorySegment = localArena.allocate(neighborsSequenceLayout); + MemorySegment distancesMemorySegment = localArena.allocate(distancesSequenceLayout); + MemorySegment querySeg = buildMemorySegment(localArena, queryVectors); long cuvsRes = resources.getHandle(); long[] queriesShape = {numQueries, vectorDimension}; - MemorySegment queriesTensor = prepareTensor(arena, querySeg, queriesShape, 2, 32, 2, 1, 1); + MemorySegment queriesTensor = + prepareTensor(localArena, querySeg, queriesShape, 2, 32, 2, 1, 1); long[] neighborsShape = {numQueries, topK}; MemorySegment neighborsTensor = - prepareTensor(arena, neighborsMemorySegment, neighborsShape, 1, 64, 2, 1, 1); + prepareTensor(localArena, neighborsMemorySegment, neighborsShape, 1, 64, 2, 1, 1); long[] distancesShape = {numQueries, topK}; MemorySegment distancesTensor = - prepareTensor(arena, distancesMemorySegment, distancesShape, 2, 32, 2, 1, 1); + prepareTensor(localArena, distancesMemorySegment, distancesShape, 2, 32, 2, 1, 1); int returnValue = cuvsStreamSync(cuvsRes); checkCuVSError(returnValue, "cuvsStreamSync"); @@ -123,7 +123,7 @@ public SearchResults search(HnswQuery query) throws Throwable { returnValue = cuvsHnswSearch( cuvsRes, - segmentFromSearchParams(query.getHnswSearchParams()), + segmentFromSearchParams(localArena, query.getHnswSearchParams()), hnswIndexReference.getMemorySegment(), queriesTensor, neighborsTensor, @@ -176,15 +176,13 @@ private IndexReference deserialize(InputStream inputStream) throws Throwable { var outputStream = Files.newOutputStream(tmpIndexFile); var localArena = Arena.ofConfined()) { inputStream.transferTo(outputStream); - - Arena arena = resources.getArena(); MemorySegment pathSeg = buildMemorySegment(localArena, tmpIndexFile.toString()); long cuvsRes = resources.getHandle(); var indexReference = createHnswIndex(); - MemorySegment dtype = DLDataType.allocate(arena); + MemorySegment dtype = DLDataType.allocate(localArena); DLDataType.bits(dtype, (byte) 32); DLDataType.code(dtype, (byte) 2); // kDLFloat DLDataType.lanes(dtype, (byte) 1); @@ -194,7 +192,7 @@ private IndexReference deserialize(InputStream inputStream) throws Throwable { var returnValue = cuvsHnswDeserialize( cuvsRes, - segmentFromIndexParams(hnswIndexParams), + segmentFromIndexParams(localArena, hnswIndexParams), pathSeg, hnswIndexParams.getVectorDimension(), 0, @@ -211,8 +209,8 @@ private IndexReference deserialize(InputStream inputStream) throws Throwable { /** * Allocates the configured search parameters in the MemorySegment. */ - private MemorySegment segmentFromIndexParams(HnswIndexParams params) { - MemorySegment seg = cuvsHnswIndexParams.allocate(resources.getArena()); + private static MemorySegment segmentFromIndexParams(Arena arena, HnswIndexParams params) { + MemorySegment seg = cuvsHnswIndexParams.allocate(arena); cuvsHnswIndexParams.ef_construction(seg, params.getEfConstruction()); cuvsHnswIndexParams.num_threads(seg, params.getNumThreads()); return seg; @@ -221,8 +219,8 @@ private MemorySegment segmentFromIndexParams(HnswIndexParams params) { /** * Allocates the configured search parameters in the MemorySegment. */ - private MemorySegment segmentFromSearchParams(HnswSearchParams params) { - MemorySegment seg = cuvsHnswSearchParams.allocate(resources.getArena()); + private static MemorySegment segmentFromSearchParams(Arena arena, HnswSearchParams params) { + MemorySegment seg = cuvsHnswSearchParams.allocate(arena); cuvsHnswSearchParams.ef(seg, params.ef()); cuvsHnswSearchParams.num_threads(seg, params.numThreads()); return seg; diff --git a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java index af882564c0..576b182588 100644 --- a/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java +++ b/java/cuvs-java/src/main/java22/com/nvidia/cuvs/internal/common/Util.java @@ -19,10 +19,12 @@ import static com.nvidia.cuvs.internal.common.LinkerHelper.C_FLOAT; import static com.nvidia.cuvs.internal.common.LinkerHelper.C_INT; import static com.nvidia.cuvs.internal.common.LinkerHelper.C_LONG; +import static com.nvidia.cuvs.internal.common.LinkerHelper.C_POINTER; import static com.nvidia.cuvs.internal.panama.headers_h.cudaGetDeviceCount; import static com.nvidia.cuvs.internal.panama.headers_h.cudaGetDeviceProperties_v2; import static com.nvidia.cuvs.internal.panama.headers_h.cudaMemGetInfo; import static com.nvidia.cuvs.internal.panama.headers_h.cudaSetDevice; +import static com.nvidia.cuvs.internal.panama.headers_h.cuvsRMMAlloc; import static com.nvidia.cuvs.internal.panama.headers_h.size_t; import com.nvidia.cuvs.GPUInfo; @@ -329,4 +331,15 @@ public static MemorySegment prepareTensor( return tensor; } + + public static MemorySegment allocateRMMSegment(long resourceHandle, long datasetBytes) { + try (var localArena = Arena.ofConfined()) { + MemorySegment datasetMemorySegment = localArena.allocate(C_POINTER); + + var returnValue = cuvsRMMAlloc(resourceHandle, datasetMemorySegment, datasetBytes); + checkCuVSError(returnValue, "cuvsRMMAlloc"); + + return datasetMemorySegment.get(C_POINTER, 0); + } + } }