Skip to content

Commit 044b140

Browse files
committed
fix: implement NOT operator with LINQ post-filtering
The NOT operator now works correctly using a hybrid FTS + LINQ approach: - FTS5 handles positive terms only - NOT terms are excluded via LINQ post-filtering - Standalone NOT queries now work (previously crashed) Changes: - Add FtsQueryResult record to separate FTS query from NOT terms - Modify FtsQueryExtractor to collect NOT terms separately - Add ApplyNotTermFiltering for LINQ exclusion - Add GetAllDocumentsAsync for standalone NOT queries - Add 7 E2E tests for NOT operator scenarios - Mark known issue as resolved in KNOWN-ISSUES.md Usage note: Use explicit AND between terms, e.g., "foo AND NOT bar"
1 parent b596a17 commit 044b140

File tree

4 files changed

+416
-57
lines changed

4 files changed

+416
-57
lines changed

KNOWN-ISSUES.md

Lines changed: 27 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -2,52 +2,7 @@
22

33
## Search Functionality
44

5-
### 1. NOT Operator Issues
6-
7-
**Status:** Known bug, not yet fixed
8-
9-
**Issue:** The NOT operator has two problems:
10-
11-
1. **Standalone NOT crashes:** `km search "NOT foo"` throws FTS5 syntax error
12-
2. **NOT doesn't exclude:** `km search "foo NOT bar"` returns documents containing both instead of excluding "bar"
13-
14-
**Examples:**
15-
```bash
16-
# Problem 1: Standalone NOT crashes
17-
km search "NOT important"
18-
# Error: SQLite Error 1: 'fts5: syntax error near "NOT"'
19-
20-
# Problem 2: NOT doesn't exclude
21-
km put "foo and bar together"
22-
km put "only foo here"
23-
km search "foo NOT bar"
24-
# Expected: 1 result (only foo here)
25-
# Actual: 2 results (both documents)
26-
```
27-
28-
**Root Cause:**
29-
- FTS5 requires NOT to have a left operand (e.g., `foo NOT bar`), standalone `NOT term` is invalid
30-
- Even when valid, FTS query extraction passes `"NOT (bar)"` to SQLite FTS5 which doesn't work as expected
31-
- No LINQ post-filtering is applied to exclude NOT terms
32-
- The architecture assumes FTS handles all logic, but NOT needs LINQ filtering
33-
34-
**Workaround:**
35-
- For literal text containing "NOT", use quotes: `km search '"NOT important"'`
36-
- Avoid using NOT as a boolean operator
37-
38-
**Fix Required:**
39-
1. Handle standalone NOT gracefully (either treat as literal or provide clear error)
40-
2. Split query: extract positive terms for FTS, negative terms for filtering
41-
3. Apply LINQ filter to FTS results using QueryLinqBuilder
42-
4. Filter out documents matching NOT terms
43-
44-
**Files Affected:**
45-
- `src/Core/Search/NodeSearchService.cs:190` - ExtractLogical NOT handling
46-
- Need to add LINQ filtering after line 89
47-
48-
---
49-
50-
### 2. Field Queries with Quoted Values Fail
5+
### 1. Field Queries with Quoted Values Fail
516

527
**Status:** Known bug, not yet fixed
538

@@ -72,6 +27,32 @@ km search 'content:"user:password"'
7227

7328
## Resolved Issues
7429

30+
### NOT Operator Issues (Resolved)
31+
32+
**Status:** Fixed
33+
34+
**Issue:** The NOT operator had two problems:
35+
1. **Standalone NOT crashed:** `km search "NOT foo"` threw FTS5 syntax error
36+
2. **NOT didn't exclude:** `km search "foo AND NOT bar"` returned documents containing both instead of excluding "bar"
37+
38+
**Resolution:**
39+
- Implemented `FtsQueryResult` record to separate FTS query string from NOT terms
40+
- Modified `FtsQueryExtractor` to collect NOT terms separately instead of passing them to FTS5
41+
- Added LINQ post-filtering in `NodeSearchService.SearchAsync()` to exclude NOT terms
42+
- Added `GetAllDocumentsAsync()` in `SqliteFtsIndex` to handle standalone NOT queries
43+
- Case-insensitive filtering checks title, description, and content fields
44+
- E2E tests added in `SearchEndToEndTests.cs` (tests: `KnownIssue1_*`)
45+
46+
**Important Note:** The infix query parser requires explicit AND between terms. Use:
47+
- `foo AND NOT bar` (correct) instead of `foo NOT bar` (incorrect - ignores NOT)
48+
- `(foo OR baz) AND NOT bar` (correct) instead of `(foo OR baz) NOT bar` (incorrect)
49+
50+
**Files Changed:**
51+
- `src/Core/Search/NodeSearchService.cs` - Added `FtsQueryResult`, `NotTerm` records and LINQ filtering
52+
- `src/Core/Search/SqliteFtsIndex.cs` - Added `GetAllDocumentsAsync()` for standalone NOT support
53+
54+
---
55+
7556
### Quoted Phrases Don't Escape Operators (Resolved)
7657

7758
**Status:** Fixed

src/Core/Search/NodeSearchService.cs

Lines changed: 169 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,25 @@
66

77
namespace KernelMemory.Core.Search;
88

9+
/// <summary>
10+
/// Result of FTS query extraction from the AST.
11+
/// Contains the FTS query string for SQLite and a list of NOT terms for post-filtering.
12+
/// SQLite FTS5 has limited NOT support (requires left operand), so NOT terms
13+
/// are filtered via LINQ after FTS returns initial results.
14+
/// </summary>
15+
/// <param name="FtsQuery">The FTS5 query string for positive terms.</param>
16+
/// <param name="NotTerms">Terms to exclude via LINQ post-filtering. Each term includes optional field info.</param>
17+
[System.Diagnostics.CodeAnalysis.SuppressMessage("Performance", "CA1819:Properties should not return arrays")]
18+
public sealed record FtsQueryResult(string FtsQuery, NotTerm[] NotTerms);
19+
20+
/// <summary>
21+
/// Represents a term that should be excluded from search results.
22+
/// Used for LINQ post-filtering since SQLite FTS5 NOT has limitations.
23+
/// </summary>
24+
/// <param name="Term">The term to exclude.</param>
25+
/// <param name="Field">Optional field to check (title/description/content). If null, checks all fields.</param>
26+
public sealed record NotTerm(string Term, string? Field);
27+
928
/// <summary>
1029
/// Per-node search service.
1130
/// Executes searches within a single node's indexes.
@@ -61,12 +80,12 @@ public NodeSearchService(
6180
// Query the FTS index
6281
var maxResults = request.MaxResultsPerNode ?? SearchConstants.DefaultMaxResultsPerNode;
6382

64-
// Convert QueryNode to FTS query string
65-
var ftsQuery = this.ExtractFtsQuery(queryNode);
83+
// Convert QueryNode to FTS query string and extract NOT terms for post-filtering
84+
var queryResult = this.ExtractFtsQuery(queryNode);
6685

6786
// Search the FTS index
6887
var ftsMatches = await this._ftsIndex.SearchAsync(
69-
ftsQuery,
88+
queryResult.FtsQuery,
7089
maxResults,
7190
cts.Token).ConfigureAwait(false);
7291

@@ -95,6 +114,13 @@ public NodeSearchService(
95114
}
96115
}
97116

117+
// Apply NOT term filtering via LINQ (SQLite FTS5 NOT has limitations)
118+
// Filter out any documents that contain the NOT terms
119+
if (queryResult.NotTerms.Length > 0)
120+
{
121+
results = this.ApplyNotTermFiltering(results, queryResult.NotTerms);
122+
}
123+
98124
stopwatch.Stop();
99125
return ([.. results], stopwatch.Elapsed);
100126
}
@@ -117,11 +143,79 @@ public NodeSearchService(
117143
}
118144

119145
/// <summary>
120-
/// Extract FTS query string from query AST.
121-
/// Converts the AST to SQLite FTS5 query syntax.
122-
/// Only includes text search terms; filtering is done via LINQ on results.
146+
/// Apply NOT term filtering to results via LINQ.
147+
/// Excludes documents that contain any of the NOT terms.
148+
/// </summary>
149+
/// <param name="results">The search results to filter.</param>
150+
/// <param name="notTerms">The terms to exclude.</param>
151+
/// <returns>Filtered results excluding documents containing NOT terms.</returns>
152+
private List<SearchIndexResult> ApplyNotTermFiltering(List<SearchIndexResult> results, NotTerm[] notTerms)
153+
{
154+
return results
155+
.Where(result => !this.ContainsAnyNotTerm(result, notTerms))
156+
.ToList();
157+
}
158+
159+
/// <summary>
160+
/// Check if a result contains any of the NOT terms.
161+
/// </summary>
162+
/// <param name="result">The search result to check.</param>
163+
/// <param name="notTerms">The NOT terms to check for.</param>
164+
/// <returns>True if the result contains any NOT term.</returns>
165+
private bool ContainsAnyNotTerm(SearchIndexResult result, NotTerm[] notTerms)
166+
{
167+
foreach (var notTerm in notTerms)
168+
{
169+
if (this.ContainsNotTerm(result, notTerm))
170+
{
171+
return true;
172+
}
173+
}
174+
175+
return false;
176+
}
177+
178+
/// <summary>
179+
/// Check if a result contains a specific NOT term.
180+
/// </summary>
181+
/// <param name="result">The search result to check.</param>
182+
/// <param name="notTerm">The NOT term to check for.</param>
183+
/// <returns>True if the result contains the NOT term.</returns>
184+
private bool ContainsNotTerm(SearchIndexResult result, NotTerm notTerm)
185+
{
186+
// Case-insensitive contains check
187+
var term = notTerm.Term;
188+
189+
// Check specific field if specified
190+
if (notTerm.Field != null)
191+
{
192+
var fieldValue = notTerm.Field.ToLowerInvariant() switch
193+
{
194+
"title" => result.Title ?? string.Empty,
195+
"description" => result.Description ?? string.Empty,
196+
"content" => result.Content ?? string.Empty,
197+
_ => string.Empty
198+
};
199+
200+
return fieldValue.Contains(term, StringComparison.OrdinalIgnoreCase);
201+
}
202+
203+
// Check all FTS fields (title, description, content)
204+
var title = result.Title ?? string.Empty;
205+
var description = result.Description ?? string.Empty;
206+
var content = result.Content ?? string.Empty;
207+
208+
return title.Contains(term, StringComparison.OrdinalIgnoreCase) ||
209+
description.Contains(term, StringComparison.OrdinalIgnoreCase) ||
210+
content.Contains(term, StringComparison.OrdinalIgnoreCase);
211+
}
212+
213+
/// <summary>
214+
/// Extract FTS query string and NOT terms from query AST.
215+
/// Converts the AST to SQLite FTS5 query syntax for positive terms.
216+
/// NOT terms are collected separately for LINQ post-filtering.
123217
/// </summary>
124-
private string ExtractFtsQuery(QueryNode queryNode)
218+
private FtsQueryResult ExtractFtsQuery(QueryNode queryNode)
125219
{
126220
var visitor = new FtsQueryExtractor();
127221
return visitor.Extract(queryNode);
@@ -131,9 +225,12 @@ private string ExtractFtsQuery(QueryNode queryNode)
131225
/// Visitor that extracts FTS query terms from the AST.
132226
/// Focuses only on TextSearchNode and field-specific text searches.
133227
/// Logical operators are preserved for FTS query syntax.
228+
/// NOT operators are handled specially - their terms are collected for LINQ post-filtering.
134229
/// </summary>
135230
private sealed class FtsQueryExtractor
136231
{
232+
private readonly List<NotTerm> _notTerms = [];
233+
137234
/// <summary>
138235
/// SQLite FTS5 reserved words that must be quoted when used as search terms.
139236
/// These keywords have special meaning in FTS5 query syntax.
@@ -143,10 +240,15 @@ private sealed class FtsQueryExtractor
143240
"AND", "OR", "NOT", "NEAR"
144241
};
145242

146-
public string Extract(QueryNode node)
243+
public FtsQueryResult Extract(QueryNode node)
147244
{
148245
var terms = this.ExtractTerms(node);
149-
return string.IsNullOrEmpty(terms) ? "*" : terms;
246+
247+
// If only NOT terms exist (no positive terms), use wildcard to get all documents
248+
// then filter with NOT terms
249+
var ftsQuery = string.IsNullOrEmpty(terms) ? "*" : terms;
250+
251+
return new FtsQueryResult(ftsQuery, [.. this._notTerms]);
150252
}
151253

152254
private string ExtractTerms(QueryNode node)
@@ -198,6 +300,14 @@ private string ExtractTextSearch(TextSearchNode node)
198300

199301
private string ExtractLogical(LogicalNode node)
200302
{
303+
// Handle NOT and NOR specially - collect terms for LINQ post-filtering
304+
if (node.Operator == LogicalOperator.Not || node.Operator == LogicalOperator.Nor)
305+
{
306+
this.CollectNotTerms(node);
307+
// Return empty string - NOT terms are not included in FTS query
308+
return string.Empty;
309+
}
310+
201311
var childTerms = node.Children
202312
.Select(this.ExtractTerms)
203313
.Where(t => !string.IsNullOrEmpty(t))
@@ -212,12 +322,60 @@ private string ExtractLogical(LogicalNode node)
212322
{
213323
LogicalOperator.And => string.Join(" AND ", childTerms.Select(t => $"({t})")),
214324
LogicalOperator.Or => string.Join(" OR ", childTerms.Select(t => $"({t})")),
215-
LogicalOperator.Not => childTerms.Length > 0 ? $"NOT ({childTerms[0]})" : string.Empty,
216-
LogicalOperator.Nor => string.Join(" AND ", childTerms.Select(t => $"NOT ({t})")),
217325
_ => string.Empty
218326
};
219327
}
220328

329+
/// <summary>
330+
/// Collect NOT terms from a NOT or NOR node.
331+
/// These terms will be filtered via LINQ after FTS returns results.
332+
/// </summary>
333+
private void CollectNotTerms(LogicalNode node)
334+
{
335+
foreach (var child in node.Children)
336+
{
337+
this.CollectNotTermsFromNode(child);
338+
}
339+
}
340+
341+
/// <summary>
342+
/// Recursively collect NOT terms from a node.
343+
/// </summary>
344+
private void CollectNotTermsFromNode(QueryNode node)
345+
{
346+
switch (node)
347+
{
348+
case TextSearchNode textNode:
349+
// Extract the term and optional field
350+
this._notTerms.Add(new NotTerm(textNode.SearchText, textNode.Field?.FieldPath));
351+
break;
352+
353+
case ComparisonNode comparisonNode:
354+
// Handle field:value comparisons for NOT
355+
if ((comparisonNode.Operator == ComparisonOperator.Contains ||
356+
comparisonNode.Operator == ComparisonOperator.Equal) &&
357+
comparisonNode.Field?.FieldPath != null &&
358+
comparisonNode.Value != null)
359+
{
360+
var term = comparisonNode.Value.AsString();
361+
this._notTerms.Add(new NotTerm(term, comparisonNode.Field.FieldPath));
362+
}
363+
364+
break;
365+
366+
case LogicalNode logicalNode:
367+
// Recursively collect from nested logical nodes
368+
// For nested NOT/NOR, we add all children as NOT terms
369+
// For nested AND/OR within NOT, all their children become NOT terms
370+
foreach (var child in logicalNode.Children)
371+
{
372+
this.CollectNotTermsFromNode(child);
373+
}
374+
375+
break;
376+
}
377+
}
378+
221379
private string ExtractComparison(ComparisonNode node)
222380
{
223381
// Extract text search from Contains OR Equal operator on FTS fields

0 commit comments

Comments
 (0)