Skip to content

Commit e1862ca

Browse files
ExtReMLapinclaude
authored andcommitted
Fixes #3244 : label filtering in Cypher MATCH relationship patterns (#3252)
* fix: Cypher target node label filtering and bound variable tracking Two bugs caused incorrect results in multi-MATCH/OPTIONAL MATCH Cypher queries: 1. MatchRelationshipStep never filtered target vertices by label. A pattern like (a:CHUNK)<-[r:in]-(b:NER) would return ANY connected vertex as b, not just NER vertices. Fixed by passing the target NodePattern and checking vertex type against label constraints. 2. Already-bound variables with labels were treated as unbound. The sourceAlreadyBound check required !sourceNode.hasLabels(), so writing (searchedChunk:CHUNK) in a subsequent MATCH created a full type scan instead of using the already-bound value. Fixed by tracking bound variable names across MATCH clauses. Also adds identity checking: when a target variable is already bound from a previous step, the traversed vertex must match the bound vertex identity. https://claude.ai/code/session_017hhsQf7fzBvXcGSeg48hfP * fix: use JUnit assertions instead of AssertJ to avoid ambiguous overload AssertJ's assertThat() has ambiguous overloads for Object return types from getProperty(). Switched to JUnit assertEquals/assertTrue/assertFalse. https://claude.ai/code/session_017hhsQf7fzBvXcGSeg48hfP --------- Co-authored-by: Claude <[email protected]> (cherry picked from commit def0349)
1 parent 0a8d8d9 commit e1862ca

File tree

4 files changed

+389
-20
lines changed

4 files changed

+389
-20
lines changed

engine/src/main/java/com/arcadedb/query/opencypher/executor/CypherExecutionPlan.java

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,10 @@ private AbstractExecutionStep buildExecutionStepsWithOrder(final CommandContext
576576
// Get function factory from evaluator for steps that need it
577577
final CypherFunctionFactory functionFactory = expressionEvaluator != null ? expressionEvaluator.getFunctionFactory() : null;
578578

579+
// Track variables bound across MATCH clauses so subsequent MATCHes
580+
// can detect already-bound variables and avoid Cartesian products
581+
final Set<String> boundVariables = new HashSet<>();
582+
579583
// OPTIMIZATION: Check for simple COUNT(*) pattern that can use Type.count() O(1) operation
580584
// Pattern: MATCH (a:TypeName) RETURN COUNT(a) as alias
581585
final AbstractExecutionStep typeCountStep = tryCreateTypeCountOptimization(context);
@@ -625,7 +629,7 @@ public String prettyPrint(final int depth, final int indent) {
625629

626630
case MATCH:
627631
final MatchClause matchClause = entry.getTypedClause();
628-
currentStep = buildMatchStep(matchClause, currentStep, context);
632+
currentStep = buildMatchStep(matchClause, currentStep, context, boundVariables);
629633
break;
630634

631635
case WITH:
@@ -801,9 +805,23 @@ private AbstractExecutionStep buildWithStep(final WithClause withClause,
801805

802806
/**
803807
* Builds execution step for a MATCH clause.
808+
* Backward-compatible overload without bound variable tracking.
804809
*/
805810
private AbstractExecutionStep buildMatchStep(final MatchClause matchClause, AbstractExecutionStep currentStep,
806811
final CommandContext context) {
812+
return buildMatchStep(matchClause, currentStep, context, new HashSet<>());
813+
}
814+
815+
/**
816+
* Builds execution step for a MATCH clause with bound variable tracking.
817+
*
818+
* @param matchClause the MATCH clause to build
819+
* @param currentStep current step in the execution chain
820+
* @param context command context
821+
* @param boundVariables set of variable names already bound in previous steps (updated in-place)
822+
*/
823+
private AbstractExecutionStep buildMatchStep(final MatchClause matchClause, AbstractExecutionStep currentStep,
824+
final CommandContext context, final Set<String> boundVariables) {
807825
if (!matchClause.hasPathPatterns()) {
808826
return currentStep;
809827
}
@@ -826,6 +844,13 @@ private AbstractExecutionStep buildMatchStep(final MatchClause matchClause, Abst
826844
final String variable = nodePattern.getVariable() != null ? nodePattern.getVariable() : ("n" + patternIndex);
827845
matchVariables.add(variable);
828846

847+
// Check if this variable was already bound in a previous MATCH clause
848+
if (boundVariables.contains(variable)) {
849+
// Variable already bound - skip creating a new MatchNodeStep
850+
// The bound value will be used from the input result
851+
continue;
852+
}
853+
829854
// OPTIMIZATION: Extract ID filter for this variable to avoid Cartesian product
830855
final String idFilter = extractIdFilter(whereClause, variable);
831856
final MatchNodeStep matchStep = new MatchNodeStep(variable, nodePattern, context, idFilter);
@@ -848,8 +873,11 @@ private AbstractExecutionStep buildMatchStep(final MatchClause matchClause, Abst
848873
final NodePattern sourceNode = pathPattern.getFirstNode();
849874
final String sourceVar = sourceNode.getVariable() != null ? sourceNode.getVariable() : "a";
850875

876+
// Check if source node variable is already bound (either from previous MATCH or
877+
// from being in the boundVariables set). Previously this only checked for
878+
// unlabeled/unpropertied nodes, which broke when labels were repeated.
851879
final boolean sourceAlreadyBound = stepBeforeMatch != null &&
852-
!sourceNode.hasLabels() && !sourceNode.hasProperties();
880+
(boundVariables.contains(sourceVar) || (!sourceNode.hasLabels() && !sourceNode.hasProperties()));
853881

854882
if (!sourceAlreadyBound) {
855883
matchVariables.add(sourceVar);
@@ -898,7 +926,10 @@ private AbstractExecutionStep buildMatchStep(final MatchClause matchClause, Abst
898926
if (relPattern.isVariableLength()) {
899927
nextStep = new ExpandPathStep(sourceVar, pathVariable, targetVar, relPattern, context);
900928
} else {
901-
nextStep = new MatchRelationshipStep(sourceVar, relVar, targetVar, relPattern, pathVariable, context);
929+
// Pass target node pattern for label filtering and bound variables
930+
// for identity checking on already-bound target variables
931+
nextStep = new MatchRelationshipStep(sourceVar, relVar, targetVar, relPattern, pathVariable,
932+
targetNode, boundVariables, context);
902933
}
903934

904935
if (isOptional && matchChainStart == null) {
@@ -942,6 +973,9 @@ private AbstractExecutionStep buildMatchStep(final MatchClause matchClause, Abst
942973
currentStep = optionalStep;
943974
}
944975

976+
// Update bound variables with newly bound variables from this MATCH
977+
boundVariables.addAll(matchVariables);
978+
945979
return currentStep;
946980
}
947981

@@ -988,6 +1022,10 @@ public String prettyPrint(final int depth, final int indent) {
9881022
};
9891023
}
9901024

1025+
// Track variables bound across MATCH clauses so subsequent MATCHes
1026+
// can detect already-bound variables and avoid Cartesian products
1027+
final Set<String> legacyBoundVariables = new HashSet<>();
1028+
9911029
// Step 1: MATCH clauses - fetch nodes
9921030
// Process ALL MATCH clauses (not just the first)
9931031
if (!statement.getMatchClauses().isEmpty()) {
@@ -1015,6 +1053,12 @@ public String prettyPrint(final int depth, final int indent) {
10151053
final String variable = nodePattern.getVariable() != null ? nodePattern.getVariable() : ("n" + patternIndex);
10161054
matchVariables.add(variable); // Track variable for OPTIONAL MATCH
10171055

1056+
// Check if this variable was already bound in a previous MATCH clause
1057+
if (legacyBoundVariables.contains(variable)) {
1058+
// Variable already bound - skip creating a new MatchNodeStep
1059+
continue;
1060+
}
1061+
10181062
// OPTIMIZATION: Extract ID filter from WHERE clause (if present) for pushdown
10191063
final WhereClause matchWhere = matchClause.hasWhereClause() ? matchClause.getWhereClause() : statement.getWhereClause();
10201064
final String idFilter = extractIdFilter(matchWhere, variable);
@@ -1042,10 +1086,9 @@ public String prettyPrint(final int depth, final int indent) {
10421086
final String sourceVar = sourceNode.getVariable() != null ? sourceNode.getVariable() : "a";
10431087

10441088
// Check if source node is already bound (for multiple MATCH clauses or OPTIONAL MATCH)
1045-
// If the source node has no labels/properties and there's a previous step,
1046-
// it's likely referring to an already-bound variable - skip creating MatchNodeStep
1089+
// Check both legacy bound variables AND the old heuristic (no labels/properties)
10471090
final boolean sourceAlreadyBound = stepBeforeMatch != null &&
1048-
!sourceNode.hasLabels() && !sourceNode.hasProperties();
1091+
(legacyBoundVariables.contains(sourceVar) || (!sourceNode.hasLabels() && !sourceNode.hasProperties()));
10491092

10501093
if (!sourceAlreadyBound) {
10511094
// Only track the source variable if we're creating a new binding for it
@@ -1109,8 +1152,9 @@ public String prettyPrint(final int depth, final int indent) {
11091152
// Variable-length path - pass path variable for named path support
11101153
nextStep = new ExpandPathStep(sourceVar, pathVariable, targetVar, relPattern, context);
11111154
} else {
1112-
// Fixed-length relationship - pass path variable
1113-
nextStep = new MatchRelationshipStep(sourceVar, relVar, targetVar, relPattern, pathVariable, context);
1155+
// Fixed-length relationship - pass path variable, target node pattern, and bound variables
1156+
nextStep = new MatchRelationshipStep(sourceVar, relVar, targetVar, relPattern, pathVariable,
1157+
targetNode, legacyBoundVariables, context);
11141158
}
11151159

11161160
// Chain the relationship step
@@ -1168,6 +1212,9 @@ public String prettyPrint(final int depth, final int indent) {
11681212
// The output of OptionalMatchStep becomes currentStep
11691213
currentStep = optionalStep;
11701214
}
1215+
1216+
// Update bound variables with newly bound variables from this MATCH
1217+
legacyBoundVariables.addAll(matchVariables);
11711218
} else {
11721219
// Phase 1: Use raw pattern string - create a simple stub
11731220
final ResultInternal stubResult = new ResultInternal();

engine/src/main/java/com/arcadedb/query/opencypher/executor/steps/MatchRelationshipStep.java

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.arcadedb.graph.Edge;
2323
import com.arcadedb.graph.Vertex;
2424
import com.arcadedb.query.opencypher.ast.Direction;
25+
import com.arcadedb.query.opencypher.ast.NodePattern;
2526
import com.arcadedb.query.opencypher.ast.RelationshipPattern;
2627
import com.arcadedb.query.opencypher.traversal.TraversalPath;
2728
import com.arcadedb.query.sql.executor.AbstractExecutionStep;
@@ -34,6 +35,7 @@
3435
import java.util.Iterator;
3536
import java.util.List;
3637
import java.util.NoSuchElementException;
38+
import java.util.Set;
3739

3840
/**
3941
* Execution step for matching relationship patterns.
@@ -50,6 +52,8 @@ public class MatchRelationshipStep extends AbstractExecutionStep {
5052
private final String targetVariable;
5153
private final RelationshipPattern pattern;
5254
private final String pathVariable;
55+
private final NodePattern targetNodePattern;
56+
private final Set<String> boundVariableNames;
5357

5458
/**
5559
* Creates a match relationship step.
@@ -77,12 +81,32 @@ public MatchRelationshipStep(final String sourceVariable, final String relations
7781
*/
7882
public MatchRelationshipStep(final String sourceVariable, final String relationshipVariable, final String targetVariable,
7983
final RelationshipPattern pattern, final String pathVariable, final CommandContext context) {
84+
this(sourceVariable, relationshipVariable, targetVariable, pattern, pathVariable, null, null, context);
85+
}
86+
87+
/**
88+
* Creates a match relationship step with target node filtering and bound variable awareness.
89+
*
90+
* @param sourceVariable variable name for source vertex
91+
* @param relationshipVariable variable name for relationship (can be null)
92+
* @param targetVariable variable name for target vertex
93+
* @param pattern relationship pattern to match
94+
* @param pathVariable path variable name (e.g., p in p = (a)-[r]->(b)), can be null
95+
* @param targetNodePattern target node pattern for label filtering (can be null)
96+
* @param boundVariableNames set of variable names already bound in previous steps (can be null)
97+
* @param context command context
98+
*/
99+
public MatchRelationshipStep(final String sourceVariable, final String relationshipVariable, final String targetVariable,
100+
final RelationshipPattern pattern, final String pathVariable, final NodePattern targetNodePattern,
101+
final Set<String> boundVariableNames, final CommandContext context) {
80102
super(context);
81103
this.sourceVariable = sourceVariable;
82104
this.relationshipVariable = relationshipVariable;
83105
this.targetVariable = targetVariable;
84106
this.pattern = pattern;
85107
this.pathVariable = pathVariable;
108+
this.targetNodePattern = targetNodePattern;
109+
this.boundVariableNames = boundVariableNames;
86110
}
87111

88112
@Override
@@ -130,11 +154,29 @@ private void fetchMore(final int n) {
130154
final Edge edge = currentEdges.next();
131155
final Vertex targetVertex = getTargetVertex(edge, (Vertex) lastResult.getProperty(sourceVariable));
132156

133-
// Filter by target type if specified
157+
// Filter by edge type if specified
134158
if (pattern.hasTypes() && !matchesEdgeType(edge)) {
135159
continue;
136160
}
137161

162+
// Filter by target node label if specified in the pattern
163+
if (targetNodePattern != null && targetNodePattern.hasLabels()) {
164+
if (!matchesTargetLabel(targetVertex)) {
165+
continue;
166+
}
167+
}
168+
169+
// If the target variable is already bound from a previous step,
170+
// verify the traversed vertex matches the bound value (identity check)
171+
if (boundVariableNames != null && boundVariableNames.contains(targetVariable)) {
172+
final Object boundValue = lastResult.getProperty(targetVariable);
173+
if (boundValue instanceof Vertex) {
174+
if (!((Vertex) boundValue).getIdentity().equals(targetVertex.getIdentity())) {
175+
continue;
176+
}
177+
}
178+
}
179+
138180
// Create result with edge and target vertex
139181
final ResultInternal result = new ResultInternal();
140182

@@ -232,6 +274,23 @@ private Vertex getTargetVertex(final Edge edge, final Vertex sourceVertex) {
232274
}
233275
}
234276

277+
/**
278+
* Checks if a target vertex matches the label constraints from the target node pattern.
279+
*/
280+
private boolean matchesTargetLabel(final Vertex vertex) {
281+
if (targetNodePattern == null || !targetNodePattern.hasLabels()) {
282+
return true;
283+
}
284+
285+
final String vertexType = vertex.getTypeName();
286+
for (final String label : targetNodePattern.getLabels()) {
287+
if (label.equals(vertexType)) {
288+
return true;
289+
}
290+
}
291+
return false;
292+
}
293+
235294
/**
236295
* Checks if an edge matches the type filter.
237296
*/

0 commit comments

Comments
 (0)