Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import com.arcadedb.serializer.BinaryComparator;
import com.arcadedb.serializer.json.JSONObject;
import com.arcadedb.utility.LockManager;
import com.arcadedb.utility.Pair;
import io.github.jbellis.jvector.graph.GraphIndexBuilder;
import io.github.jbellis.jvector.graph.GraphSearcher;
import io.github.jbellis.jvector.graph.ImmutableGraphIndex;
Expand Down Expand Up @@ -1379,7 +1380,7 @@ private MutablePage createNewVectorDataPage(final int pageNum) {
*
* @return List of pairs containing RID and similarity score
*/
public List<com.arcadedb.utility.Pair<RID, Float>> findNeighborsFromVector(final float[] queryVector, final int k) {
public List<Pair<RID, Float>> findNeighborsFromVector(final float[] queryVector, final int k) {
if (queryVector == null)
throw new IllegalArgumentException("Query vector cannot be null");

Expand Down Expand Up @@ -1451,7 +1452,7 @@ public List<com.arcadedb.utility.Pair<RID, Float>> findNeighborsFromVector(final
searchResult.getNodes().length, graphIndex.size(), vectors.size(), ordinalToVectorId.length);

// Extract RIDs and scores from search results using ordinal mapping
final List<com.arcadedb.utility.Pair<RID, Float>> results = new ArrayList<>();
final List<Pair<RID, Float>> results = new ArrayList<>();
int skippedOutOfBounds = 0;
int skippedDeletedOrNull = 0;
for (final SearchResult.NodeScore nodeScore : searchResult.getNodes()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ protected void beginTest() {
}

@Test
void testEqualsOperatorBeforeIndexCreation() {
void equalsOperatorBeforeIndexCreation() {
// Test that = operator works BEFORE creating index
database.transaction(() -> {
for (String title : TEST_TITLES) {
Expand All @@ -114,7 +114,7 @@ void testEqualsOperatorBeforeIndexCreation() {
}

@Test
void testEqualsOperatorAfterIndexCreation() {
void equalsOperatorAfterIndexCreation() {
// First verify = works before index
database.transaction(() -> {
for (String title : TEST_TITLES) {
Expand Down Expand Up @@ -184,7 +184,7 @@ private void waitForIndexCompaction() {
}

@Test
void testLikeOperatorStillWorksAfterIndexCreation() {
void likeOperatorStillWorksAfterIndexCreation() {
// Create NOTUNIQUE index on existing large dataset
database.transaction(() -> {
database.getSchema().createTypeIndex(Schema.INDEX_TYPE.LSM_TREE, false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void cleanup() {
}

@Test
void testWriteReadSymmetry() throws Exception {
void writeReadSymmetry() throws Exception {
// Create database
DatabaseFactory factory = new DatabaseFactory(DB_PATH);
if (factory.exists()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,22 @@
package com.arcadedb.index.vector;

import com.arcadedb.TestHelper;
import com.arcadedb.index.Index;
import com.arcadedb.index.TypeIndex;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import static org.assertj.core.api.Assertions.assertThat;

/**
* Test for progress callbacks during vector index building.
*
* @author Luca Garulli ([email protected])
*/
public class VectorIndexProgressCallbackTest extends TestHelper {
class VectorIndexProgressCallbackTest extends TestHelper {

@Test
public void testVectorIndexBuildWithProgressCallback() {
void vectorIndexBuildWithProgressCallback() {
// Create schema
database.transaction(() -> {
database.command("sql", "CREATE VERTEX TYPE VectorDoc IF NOT EXISTS");
Expand All @@ -48,7 +47,7 @@ CREATE INDEX IF NOT EXISTS ON VectorDoc (embedding) LSM_VECTOR
});

final TypeIndex typeIndex = (TypeIndex) database.getSchema().getIndexByName("VectorDoc[embedding]");
Assertions.assertNotNull(typeIndex);
assertThat(typeIndex).isNotNull();

// Insert test documents
database.transaction(() -> {
Expand Down Expand Up @@ -87,27 +86,27 @@ CREATE INDEX IF NOT EXISTS ON VectorDoc (embedding) LSM_VECTOR
},
(phase, processedNodes, totalNodes, insertsOrAccesses) -> {
switch (phase) {
case "validating":
validationProgress.set(processedNodes);
System.out.printf("Validating vectors: %d / %d%n", processedNodes, totalNodes);
break;
case "building":
buildingProgress.set(processedNodes);
buildingCallbacks.incrementAndGet();
final int insertsInProgress = (int) (insertsOrAccesses - processedNodes);
System.out.printf("Building graph: %d / %d nodes (%d inserts in progress)%n",
processedNodes, totalNodes, insertsInProgress);
break;
case "persisting":
persistingCalled.incrementAndGet();
System.out.printf("Persisting graph: %d / %d nodes%n", processedNodes, totalNodes);
break;
case "validating":
validationProgress.set(processedNodes);
System.out.printf("Validating vectors: %d / %d%n", processedNodes, totalNodes);
break;
case "building":
buildingProgress.set(processedNodes);
buildingCallbacks.incrementAndGet();
final int insertsInProgress = (int) (insertsOrAccesses - processedNodes);
System.out.printf("Building graph: %d / %d nodes (%d inserts in progress)%n",
processedNodes, totalNodes, insertsInProgress);
break;
case "persisting":
persistingCalled.incrementAndGet();
System.out.printf("Persisting graph: %d / %d nodes%n", processedNodes, totalNodes);
break;
}
}
);

// Verify callbacks were called
Assertions.assertEquals(1000, documentsIndexed.get(), "Should have indexed 1000 documents");
assertThat(documentsIndexed.get()).as("Should have indexed 1000 documents").isEqualTo(1000);
// Note: Validation and building callbacks may not be called if graph is already built
// during the insert phase. That's ok - the important thing is that the index build succeeded.
// In production, these callbacks will be triggered when explicitly rebuilding an existing index.
Expand All @@ -119,7 +118,7 @@ CREATE INDEX IF NOT EXISTS ON VectorDoc (embedding) LSM_VECTOR
}

@Test
public void testVectorIndexBuildWithoutCallback() {
void vectorIndexBuildWithoutCallback() {
// Create schema
database.transaction(() -> {
database.command("sql", "CREATE VERTEX TYPE SimpleDoc IF NOT EXISTS");
Expand All @@ -129,7 +128,7 @@ public void testVectorIndexBuildWithoutCallback() {
});

final TypeIndex typeIndex = (TypeIndex) database.getSchema().getIndexByName("SimpleDoc[vec]");
Assertions.assertNotNull(typeIndex);
assertThat(typeIndex).isNotNull();

// Insert test documents
database.transaction(() -> {
Expand All @@ -147,11 +146,11 @@ public void testVectorIndexBuildWithoutCallback() {
final LSMVectorIndex lsmIndex = (LSMVectorIndex) typeIndex.getIndexesOnBuckets()[0];
final long totalRecords = lsmIndex.build(100000, null);

Assertions.assertEquals(100, totalRecords, "Should have indexed 100 documents");
assertThat(totalRecords).as("Should have indexed 100 documents").isEqualTo(100);
}

@Test
public void testGraphRebuildWithCallback() {
void graphRebuildWithCallback() {
// This test demonstrates the graph building callbacks when rebuilding an existing index
database.transaction(() -> {
database.command("sql", "CREATE VERTEX TYPE RebuildDoc IF NOT EXISTS");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
package com.arcadedb.query.sql.function.vector;

import com.arcadedb.TestHelper;
import com.arcadedb.query.sql.executor.BasicCommandContext;
import com.arcadedb.query.sql.executor.ResultSet;
import org.junit.jupiter.api.Test;

import java.util.List;
import java.util.Map;

import static org.assertj.core.api.Assertions.assertThat;

class SQLFunctionVectorNeighborsTest extends TestHelper {

@Override
public void beginTest() {
database.transaction(() -> {
database.command("sql", "CREATE VERTEX TYPE Doc IF NOT EXISTS");
database.command("sql", "CREATE PROPERTY Doc.name IF NOT EXISTS STRING");
database.command("sql", "CREATE PROPERTY Doc.embedding IF NOT EXISTS ARRAY_OF_FLOATS");
database.command("sql", "CREATE INDEX IF NOT EXISTS ON Doc (name) UNIQUE");

// Create the vector index before inserting data (needed for automatic indexing)
database.command("sql", """
CREATE INDEX IF NOT EXISTS ON Doc (embedding) LSM_VECTOR
METADATA {
dimensions: 3,
similarity: 'COSINE',
idPropertyName: 'name'
}""");
});

// Insert some documents with known vectors
database.transaction(() -> {
database.newVertex("Doc").set("name", "docA").set("embedding", new float[] { 1.0f, 0.0f, 0.0f }).save();
database.newVertex("Doc").set("name", "docB").set("embedding", new float[] { 0.9f, 0.1f, 0.0f }).save(); // Close to A
database.newVertex("Doc").set("name", "docC").set("embedding", new float[] { 0.0f, 1.0f, 0.0f })
.save(); // Far from A, close to D
database.newVertex("Doc").set("name", "docD").set("embedding", new float[] { 0.1f, 0.9f, 0.0f }).save(); // Close to C
database.newVertex("Doc").set("name", "docE").set("embedding", new float[] { 0.0f, 0.0f, 1.0f }).save(); // Far from all
});
}

@Test
void programmaticVectorSearchWithRawVector() {

final SQLFunctionVectorNeighbors function = new SQLFunctionVectorNeighbors();
final BasicCommandContext context = new BasicCommandContext();
context.setDatabase(database);

// Search with a raw vector (similar to docE)
@SuppressWarnings("unchecked")
List<Map<String, Object>> results = (List<Map<String, Object>>) function.execute(null, null, null,
new Object[] { "Doc[embedding]", new float[] { 0.0f, 0.0f, 1.0f }, 3 },
context);

assertThat(results).as("Results should not be null").isNotNull();
assertThat(results.isEmpty()).as("Should find at least one neighbor").isFalse();
assertThat(results.size() <= 3).as("Should return at most 3 results").isTrue();

// Verify results contain the expected structure
for (Map<String, Object> result : results) {
assertThat(result.containsKey("vertex")).as("Result should contain 'vertex' key").isTrue();
assertThat(result.containsKey("distance")).as("Result should contain 'distance' key").isTrue();
assertThat(result.get("distance")).as("Distance should not be null").isNotNull();
}
}

@Test
void programmaticVectorSearchWithVertexId() {

final SQLFunctionVectorNeighbors function = new SQLFunctionVectorNeighbors();
final BasicCommandContext context = new BasicCommandContext();
context.setDatabase(database);

// Search using a vertex identifier (docA)
@SuppressWarnings("unchecked")
List<Map<String, Object>> results = (List<Map<String, Object>>) function.execute(null, null, null,
new Object[] { "Doc[embedding]", "docA", 3 },
context);

assertThat(results).isNotNull();
assertThat(results.isEmpty()).as("Should find neighbors").isFalse();
assertThat(results.size() <= 3).as("Should return at most 3 results").isTrue();

// docB should be close to docA (both have similar vectors)
boolean foundDocB = results.stream()
.anyMatch(r -> r.get("vertex").toString().contains("docB"));
assertThat(foundDocB).as("DocB should be found as a neighbor of DocA").isTrue();
}

@Test
void sqlVectorNeighborsWithRawVector() {

// SQL query with raw vector
String query = "SELECT vectorNeighbors('Doc[embedding]', [0.0, 0.0, 1.0], 3) as neighbors";
ResultSet results = database.query("sql", query);

assertThat(results.hasNext()).as("Query should return results").isTrue();

var result = results.next();
@SuppressWarnings("unchecked")
List<Map<String, Object>> neighbors = result.getProperty("neighbors");

assertThat(neighbors).isNotNull();
assertThat(neighbors.isEmpty()).as("Should find at least one neighbor").isFalse();
assertThat(neighbors.size() <= 3).as("Should return at most 3 results").isTrue();

results.close();
}

@Test
void sqlVectorNeighborsWithVertexId() {

// SQL query with vertex identifier
String query = "SELECT vectorNeighbors('Doc[embedding]', 'docC', 2) as neighbors";
ResultSet results = database.query("sql", query);

assertThat(results.hasNext()).as("Query should return results").isTrue();

var result = results.next();
@SuppressWarnings("unchecked")
List<Map<String, Object>> neighbors = result.getProperty("neighbors");

assertThat(neighbors).isNotNull();
assertThat(neighbors).as("Should find neighbors for docC").isNotEmpty();
assertThat(neighbors).as("Should return at most 2 results").hasSizeLessThanOrEqualTo(2);

results.close();
}

@Test
void sqlVectorNeighborsInSubquery() {

// SQL query using vectorNeighbors in a subquery
String query = """
SELECT vectorNeighbors('Doc[embedding]', 'docA', 5) as neighbors
""";

ResultSet results = database.query("sql", query);

assertThat(results.hasNext()).as("Query should return results").isTrue();

var result = results.next();
@SuppressWarnings("unchecked")
List<Map<String, Object>> neighbors = result.getProperty("neighbors");

assertThat(neighbors).as("Neighbors should not be null").isNotNull();
assertThat(neighbors).as("Should find neighbors").isNotEmpty();

// Verify neighbors are sorted by distance (closer distances first)
float previousDistance = -1;
for (Map<String, Object> neighbor : neighbors) {
float distance = ((Number) neighbor.get("distance")).floatValue();
assertThat(distance ).as("Results should be ordered by distance").isGreaterThanOrEqualTo(previousDistance);
previousDistance = distance;
}

results.close();
}

@Test
void vectorNeighborsLimitParameter() {

final SQLFunctionVectorNeighbors function = new SQLFunctionVectorNeighbors();
final BasicCommandContext context = new BasicCommandContext();
context.setDatabase(database);

// Test with limit of 2
@SuppressWarnings("unchecked")
List<Map<String, Object>> resultsLimit2 = (List<Map<String, Object>>) function.execute(null, null, null,
new Object[] { "Doc[embedding]", new float[] { 1.0f, 0.0f, 0.0f }, 2 },
context);

// Test with limit of 5
@SuppressWarnings("unchecked")
List<Map<String, Object>> resultsLimit5 = (List<Map<String, Object>>) function.execute(null, null, null,
new Object[] { "Doc[embedding]", new float[] { 1.0f, 0.0f, 0.0f }, 5 },
context);

assertThat(resultsLimit2).as("Should respect limit of 2").hasSizeLessThanOrEqualTo(2);
assertThat(resultsLimit5).as("Should respect limit of 5").hasSizeLessThanOrEqualTo(5);
assertThat(resultsLimit5).as("Larger limit should return more or equal results")
.hasSizeGreaterThanOrEqualTo(resultsLimit2.size());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ public void endTest() {
}

@Test
void testGrpcQueryWithAliasesAndMetadata() {
void grpcQueryWithAliasesAndMetadata() {

String query = "SELECT *, @rid, @type, author AS _author FROM article";
ResultSet resultSet = database.query("sql", query);
Expand All @@ -123,7 +123,7 @@ void testGrpcQueryWithAliasesAndMetadata() {
}

@Test
void testGrpcUpdateWithAlas() {
void grpcUpdateWithAlas() {
String update = """
UPDATE article SET title = "My third article updated" RETURN AFTER *, author AS _author WHERE id = 3
""";
Expand All @@ -140,7 +140,7 @@ void testGrpcUpdateWithAlas() {
}

@Test
void testGrpcInsertWithReturn() {
void grpcInsertWithReturn() {
String command = """
INSERT INTO article CONTENT {
"id": 4,
Expand All @@ -167,7 +167,7 @@ void testGrpcInsertWithReturn() {
}

@Test
void testGrpcCreateVertexWithReturn() {
void grpcCreateVertexWithReturn() {
String command = """
CREATE VERTEX article CONTENT {
"id": 4,
Expand Down
Loading
Loading