-
-
Notifications
You must be signed in to change notification settings - Fork 91
chore: refactor vector index tests and improve naming conventions #2906
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
71721c2
chore: refactor vector index tests and improve naming conventions
robfrank d9ac3b5
Update engine/src/test/java/com/arcadedb/query/sql/function/vector/SQ…
robfrank 0c75b06
Update engine/src/test/java/com/arcadedb/query/sql/function/vector/SQ…
robfrank 28d2f37
Update engine/src/test/java/com/arcadedb/query/sql/function/vector/SQ…
robfrank 13677f6
Update engine/src/test/java/com/arcadedb/query/sql/function/vector/SQ…
robfrank 4a1c619
chore: refactor vector index tests and improve naming conventions
robfrank File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,23 +19,22 @@ | |
| package com.arcadedb.index.vector; | ||
|
|
||
| import com.arcadedb.TestHelper; | ||
| import com.arcadedb.index.Index; | ||
| import com.arcadedb.index.TypeIndex; | ||
| import org.junit.jupiter.api.Assertions; | ||
| import org.junit.jupiter.api.Test; | ||
|
|
||
| import java.util.concurrent.atomic.AtomicInteger; | ||
| import java.util.concurrent.atomic.AtomicLong; | ||
|
|
||
| import static org.assertj.core.api.Assertions.assertThat; | ||
|
|
||
| /** | ||
| * Test for progress callbacks during vector index building. | ||
| * | ||
| * @author Luca Garulli ([email protected]) | ||
| */ | ||
| public class VectorIndexProgressCallbackTest extends TestHelper { | ||
| class VectorIndexProgressCallbackTest extends TestHelper { | ||
|
|
||
| @Test | ||
| public void testVectorIndexBuildWithProgressCallback() { | ||
| void vectorIndexBuildWithProgressCallback() { | ||
| // Create schema | ||
| database.transaction(() -> { | ||
| database.command("sql", "CREATE VERTEX TYPE VectorDoc IF NOT EXISTS"); | ||
|
|
@@ -48,7 +47,7 @@ CREATE INDEX IF NOT EXISTS ON VectorDoc (embedding) LSM_VECTOR | |
| }); | ||
|
|
||
| final TypeIndex typeIndex = (TypeIndex) database.getSchema().getIndexByName("VectorDoc[embedding]"); | ||
| Assertions.assertNotNull(typeIndex); | ||
| assertThat(typeIndex).isNotNull(); | ||
|
|
||
| // Insert test documents | ||
| database.transaction(() -> { | ||
|
|
@@ -87,27 +86,27 @@ CREATE INDEX IF NOT EXISTS ON VectorDoc (embedding) LSM_VECTOR | |
| }, | ||
| (phase, processedNodes, totalNodes, insertsOrAccesses) -> { | ||
| switch (phase) { | ||
| case "validating": | ||
| validationProgress.set(processedNodes); | ||
| System.out.printf("Validating vectors: %d / %d%n", processedNodes, totalNodes); | ||
| break; | ||
| case "building": | ||
| buildingProgress.set(processedNodes); | ||
| buildingCallbacks.incrementAndGet(); | ||
| final int insertsInProgress = (int) (insertsOrAccesses - processedNodes); | ||
| System.out.printf("Building graph: %d / %d nodes (%d inserts in progress)%n", | ||
| processedNodes, totalNodes, insertsInProgress); | ||
| break; | ||
| case "persisting": | ||
| persistingCalled.incrementAndGet(); | ||
| System.out.printf("Persisting graph: %d / %d nodes%n", processedNodes, totalNodes); | ||
| break; | ||
| case "validating": | ||
| validationProgress.set(processedNodes); | ||
| System.out.printf("Validating vectors: %d / %d%n", processedNodes, totalNodes); | ||
| break; | ||
| case "building": | ||
| buildingProgress.set(processedNodes); | ||
| buildingCallbacks.incrementAndGet(); | ||
| final int insertsInProgress = (int) (insertsOrAccesses - processedNodes); | ||
| System.out.printf("Building graph: %d / %d nodes (%d inserts in progress)%n", | ||
| processedNodes, totalNodes, insertsInProgress); | ||
| break; | ||
| case "persisting": | ||
| persistingCalled.incrementAndGet(); | ||
| System.out.printf("Persisting graph: %d / %d nodes%n", processedNodes, totalNodes); | ||
| break; | ||
| } | ||
| } | ||
| ); | ||
|
|
||
| // Verify callbacks were called | ||
| Assertions.assertEquals(1000, documentsIndexed.get(), "Should have indexed 1000 documents"); | ||
| assertThat(documentsIndexed.get()).as("Should have indexed 1000 documents").isEqualTo(1000); | ||
| // Note: Validation and building callbacks may not be called if graph is already built | ||
| // during the insert phase. That's ok - the important thing is that the index build succeeded. | ||
| // In production, these callbacks will be triggered when explicitly rebuilding an existing index. | ||
|
|
@@ -119,7 +118,7 @@ CREATE INDEX IF NOT EXISTS ON VectorDoc (embedding) LSM_VECTOR | |
| } | ||
|
|
||
| @Test | ||
| public void testVectorIndexBuildWithoutCallback() { | ||
| void vectorIndexBuildWithoutCallback() { | ||
| // Create schema | ||
| database.transaction(() -> { | ||
| database.command("sql", "CREATE VERTEX TYPE SimpleDoc IF NOT EXISTS"); | ||
|
|
@@ -129,7 +128,7 @@ public void testVectorIndexBuildWithoutCallback() { | |
| }); | ||
|
|
||
| final TypeIndex typeIndex = (TypeIndex) database.getSchema().getIndexByName("SimpleDoc[vec]"); | ||
| Assertions.assertNotNull(typeIndex); | ||
| assertThat(typeIndex).isNotNull(); | ||
|
|
||
| // Insert test documents | ||
| database.transaction(() -> { | ||
|
|
@@ -147,11 +146,11 @@ public void testVectorIndexBuildWithoutCallback() { | |
| final LSMVectorIndex lsmIndex = (LSMVectorIndex) typeIndex.getIndexesOnBuckets()[0]; | ||
| final long totalRecords = lsmIndex.build(100000, null); | ||
|
|
||
| Assertions.assertEquals(100, totalRecords, "Should have indexed 100 documents"); | ||
| assertThat(totalRecords).as("Should have indexed 100 documents").isEqualTo(100); | ||
| } | ||
|
|
||
| @Test | ||
| public void testGraphRebuildWithCallback() { | ||
| void graphRebuildWithCallback() { | ||
| // This test demonstrates the graph building callbacks when rebuilding an existing index | ||
| database.transaction(() -> { | ||
| database.command("sql", "CREATE VERTEX TYPE RebuildDoc IF NOT EXISTS"); | ||
|
|
||
186 changes: 186 additions & 0 deletions
186
.../src/test/java/com/arcadedb/query/sql/function/vector/SQLFunctionVectorNeighborsTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,186 @@ | ||
| package com.arcadedb.query.sql.function.vector; | ||
|
|
||
| import com.arcadedb.TestHelper; | ||
| import com.arcadedb.query.sql.executor.BasicCommandContext; | ||
| import com.arcadedb.query.sql.executor.ResultSet; | ||
| import org.junit.jupiter.api.Test; | ||
|
|
||
| import java.util.List; | ||
| import java.util.Map; | ||
|
|
||
| import static org.assertj.core.api.Assertions.assertThat; | ||
|
|
||
| class SQLFunctionVectorNeighborsTest extends TestHelper { | ||
|
|
||
| @Override | ||
| public void beginTest() { | ||
| database.transaction(() -> { | ||
| database.command("sql", "CREATE VERTEX TYPE Doc IF NOT EXISTS"); | ||
| database.command("sql", "CREATE PROPERTY Doc.name IF NOT EXISTS STRING"); | ||
| database.command("sql", "CREATE PROPERTY Doc.embedding IF NOT EXISTS ARRAY_OF_FLOATS"); | ||
| database.command("sql", "CREATE INDEX IF NOT EXISTS ON Doc (name) UNIQUE"); | ||
|
|
||
| // Create the vector index before inserting data (needed for automatic indexing) | ||
| database.command("sql", """ | ||
| CREATE INDEX IF NOT EXISTS ON Doc (embedding) LSM_VECTOR | ||
| METADATA { | ||
| dimensions: 3, | ||
| similarity: 'COSINE', | ||
| idPropertyName: 'name' | ||
| }"""); | ||
| }); | ||
|
|
||
| // Insert some documents with known vectors | ||
| database.transaction(() -> { | ||
| database.newVertex("Doc").set("name", "docA").set("embedding", new float[] { 1.0f, 0.0f, 0.0f }).save(); | ||
| database.newVertex("Doc").set("name", "docB").set("embedding", new float[] { 0.9f, 0.1f, 0.0f }).save(); // Close to A | ||
| database.newVertex("Doc").set("name", "docC").set("embedding", new float[] { 0.0f, 1.0f, 0.0f }) | ||
| .save(); // Far from A, close to D | ||
| database.newVertex("Doc").set("name", "docD").set("embedding", new float[] { 0.1f, 0.9f, 0.0f }).save(); // Close to C | ||
| database.newVertex("Doc").set("name", "docE").set("embedding", new float[] { 0.0f, 0.0f, 1.0f }).save(); // Far from all | ||
| }); | ||
| } | ||
|
|
||
| @Test | ||
| void programmaticVectorSearchWithRawVector() { | ||
|
|
||
| final SQLFunctionVectorNeighbors function = new SQLFunctionVectorNeighbors(); | ||
| final BasicCommandContext context = new BasicCommandContext(); | ||
| context.setDatabase(database); | ||
|
|
||
| // Search with a raw vector (similar to docE) | ||
| @SuppressWarnings("unchecked") | ||
| List<Map<String, Object>> results = (List<Map<String, Object>>) function.execute(null, null, null, | ||
| new Object[] { "Doc[embedding]", new float[] { 0.0f, 0.0f, 1.0f }, 3 }, | ||
| context); | ||
|
|
||
| assertThat(results).as("Results should not be null").isNotNull(); | ||
| assertThat(results.isEmpty()).as("Should find at least one neighbor").isFalse(); | ||
| assertThat(results.size() <= 3).as("Should return at most 3 results").isTrue(); | ||
|
|
||
| // Verify results contain the expected structure | ||
| for (Map<String, Object> result : results) { | ||
| assertThat(result.containsKey("vertex")).as("Result should contain 'vertex' key").isTrue(); | ||
| assertThat(result.containsKey("distance")).as("Result should contain 'distance' key").isTrue(); | ||
| assertThat(result.get("distance")).as("Distance should not be null").isNotNull(); | ||
| } | ||
| } | ||
|
|
||
| @Test | ||
| void programmaticVectorSearchWithVertexId() { | ||
|
|
||
| final SQLFunctionVectorNeighbors function = new SQLFunctionVectorNeighbors(); | ||
| final BasicCommandContext context = new BasicCommandContext(); | ||
| context.setDatabase(database); | ||
|
|
||
| // Search using a vertex identifier (docA) | ||
| @SuppressWarnings("unchecked") | ||
| List<Map<String, Object>> results = (List<Map<String, Object>>) function.execute(null, null, null, | ||
| new Object[] { "Doc[embedding]", "docA", 3 }, | ||
| context); | ||
|
|
||
| assertThat(results).isNotNull(); | ||
| assertThat(results.isEmpty()).as("Should find neighbors").isFalse(); | ||
| assertThat(results.size() <= 3).as("Should return at most 3 results").isTrue(); | ||
|
|
||
| // docB should be close to docA (both have similar vectors) | ||
| boolean foundDocB = results.stream() | ||
| .anyMatch(r -> r.get("vertex").toString().contains("docB")); | ||
| assertThat(foundDocB).as("DocB should be found as a neighbor of DocA").isTrue(); | ||
| } | ||
|
|
||
| @Test | ||
| void sqlVectorNeighborsWithRawVector() { | ||
|
|
||
| // SQL query with raw vector | ||
| String query = "SELECT vectorNeighbors('Doc[embedding]', [0.0, 0.0, 1.0], 3) as neighbors"; | ||
| ResultSet results = database.query("sql", query); | ||
|
|
||
| assertThat(results.hasNext()).as("Query should return results").isTrue(); | ||
|
|
||
| var result = results.next(); | ||
| @SuppressWarnings("unchecked") | ||
| List<Map<String, Object>> neighbors = result.getProperty("neighbors"); | ||
|
|
||
| assertThat(neighbors).isNotNull(); | ||
| assertThat(neighbors.isEmpty()).as("Should find at least one neighbor").isFalse(); | ||
| assertThat(neighbors.size() <= 3).as("Should return at most 3 results").isTrue(); | ||
|
|
||
| results.close(); | ||
| } | ||
|
|
||
| @Test | ||
| void sqlVectorNeighborsWithVertexId() { | ||
|
|
||
| // SQL query with vertex identifier | ||
| String query = "SELECT vectorNeighbors('Doc[embedding]', 'docC', 2) as neighbors"; | ||
| ResultSet results = database.query("sql", query); | ||
|
|
||
| assertThat(results.hasNext()).as("Query should return results").isTrue(); | ||
|
|
||
| var result = results.next(); | ||
| @SuppressWarnings("unchecked") | ||
| List<Map<String, Object>> neighbors = result.getProperty("neighbors"); | ||
|
|
||
| assertThat(neighbors).isNotNull(); | ||
| assertThat(neighbors).as("Should find neighbors for docC").isNotEmpty(); | ||
| assertThat(neighbors).as("Should return at most 2 results").hasSizeLessThanOrEqualTo(2); | ||
|
|
||
| results.close(); | ||
robfrank marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| @Test | ||
| void sqlVectorNeighborsInSubquery() { | ||
robfrank marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| // SQL query using vectorNeighbors in a subquery | ||
| String query = """ | ||
| SELECT vectorNeighbors('Doc[embedding]', 'docA', 5) as neighbors | ||
| """; | ||
|
|
||
| ResultSet results = database.query("sql", query); | ||
|
|
||
| assertThat(results.hasNext()).as("Query should return results").isTrue(); | ||
|
|
||
| var result = results.next(); | ||
| @SuppressWarnings("unchecked") | ||
| List<Map<String, Object>> neighbors = result.getProperty("neighbors"); | ||
|
|
||
| assertThat(neighbors).as("Neighbors should not be null").isNotNull(); | ||
| assertThat(neighbors).as("Should find neighbors").isNotEmpty(); | ||
|
|
||
| // Verify neighbors are sorted by distance (closer distances first) | ||
| float previousDistance = -1; | ||
| for (Map<String, Object> neighbor : neighbors) { | ||
| float distance = ((Number) neighbor.get("distance")).floatValue(); | ||
| assertThat(distance ).as("Results should be ordered by distance").isGreaterThanOrEqualTo(previousDistance); | ||
| previousDistance = distance; | ||
| } | ||
|
|
||
| results.close(); | ||
robfrank marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| @Test | ||
| void vectorNeighborsLimitParameter() { | ||
|
|
||
| final SQLFunctionVectorNeighbors function = new SQLFunctionVectorNeighbors(); | ||
| final BasicCommandContext context = new BasicCommandContext(); | ||
| context.setDatabase(database); | ||
|
|
||
| // Test with limit of 2 | ||
| @SuppressWarnings("unchecked") | ||
| List<Map<String, Object>> resultsLimit2 = (List<Map<String, Object>>) function.execute(null, null, null, | ||
| new Object[] { "Doc[embedding]", new float[] { 1.0f, 0.0f, 0.0f }, 2 }, | ||
| context); | ||
|
|
||
| // Test with limit of 5 | ||
| @SuppressWarnings("unchecked") | ||
| List<Map<String, Object>> resultsLimit5 = (List<Map<String, Object>>) function.execute(null, null, null, | ||
| new Object[] { "Doc[embedding]", new float[] { 1.0f, 0.0f, 0.0f }, 5 }, | ||
| context); | ||
|
|
||
| assertThat(resultsLimit2).as("Should respect limit of 2").hasSizeLessThanOrEqualTo(2); | ||
| assertThat(resultsLimit5).as("Should respect limit of 5").hasSizeLessThanOrEqualTo(5); | ||
| assertThat(resultsLimit5).as("Larger limit should return more or equal results") | ||
| .hasSizeGreaterThanOrEqualTo(resultsLimit2.size()); | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.