diff --git a/mem0/vector_stores/qdrant.py b/mem0/vector_stores/qdrant.py index 59ee9a92c1..a2ee05645a 100644 --- a/mem0/vector_stores/qdrant.py +++ b/mem0/vector_stores/qdrant.py @@ -1,12 +1,16 @@ import logging import os import shutil +from typing import Optional from qdrant_client import QdrantClient from qdrant_client.models import ( Distance, FieldCondition, Filter, + MatchAny, + MatchExcept, + MatchText, MatchValue, PointIdsList, PointStruct, @@ -138,26 +142,148 @@ def insert(self, vectors: list, payloads: list = None, ids: list = None): ] self.client.upsert(collection_name=self.collection_name, points=points) - def _create_filter(self, filters: dict) -> Filter: + def _build_field_condition(self, key: str, value) -> Optional[FieldCondition]: + """ + Build a single FieldCondition from a key-value filter pair. + + Supports the enhanced filter syntax documented at + https://docs.mem0.ai/open-source/features/metadata-filtering + + Args: + key (str): The payload field name. + value: A scalar for simple equality, or a dict with one operator key. + + Returns: + Optional[FieldCondition]: The Qdrant field condition, or None if the + value is the wildcard '*' (match any / field exists — skip filter). + """ + if not isinstance(value, dict): + if value == "*": + # Wildcard: match any value. Qdrant has no direct "field exists" + # condition via FieldCondition, so we skip this filter (match all). + return None + if isinstance(value, list): + # List shorthand: {"field": ["a", "b"]} treated as in-operator. + return FieldCondition(key=key, match=MatchAny(any=value)) + # Simple equality: {"field": "value"} + return FieldCondition(key=key, match=MatchValue(value=value)) + + ops = set(value.keys()) + range_ops = {"gt", "gte", "lt", "lte"} + non_range_ops = ops - range_ops + + if ops & range_ops: + if non_range_ops: + raise ValueError( + f"Cannot mix range operators ({ops & range_ops}) with " + f"non-range operators ({non_range_ops}) for field '{key}'. " + f"Use AND to combine them as separate conditions." + ) + range_kwargs = {op: value[op] for op in range_ops if op in value} + return FieldCondition(key=key, range=Range(**range_kwargs)) + elif "eq" in value: + return FieldCondition(key=key, match=MatchValue(value=value["eq"])) + elif "ne" in value: + return FieldCondition(key=key, match=MatchExcept(**{"except": [value["ne"]]})) + elif "in" in value: + return FieldCondition(key=key, match=MatchAny(any=value["in"])) + elif "nin" in value: + return FieldCondition(key=key, match=MatchExcept(**{"except": value["nin"]})) + elif "contains" in value or "icontains" in value: + # MatchText: with a full-text index, tokenized matching (all words must appear). + # Without a full-text index, exact substring match. + op = "icontains" if "icontains" in value else "contains" + text = value[op] + if op == "icontains": + logger.debug( + "icontains on field '%s': Qdrant MatchText case sensitivity depends on " + "full-text index configuration. Without a full-text index this behaves " + "as a case-sensitive substring match (same as 'contains').", + key, + ) + return FieldCondition(key=key, match=MatchText(text=text)) + else: + supported = {"eq", "ne", "gt", "gte", "lt", "lte", "in", "nin", "contains", "icontains"} + raise ValueError( + f"Unsupported filter operator(s) for field '{key}': {ops}. " + f"Supported operators: {supported}" + ) + + def _create_filter(self, filters: dict) -> Optional[Filter]: """ Create a Filter object from the provided filters. + Supports the enhanced filter syntax with comparison operators (eq, ne, + gt, gte, lt, lte), list operators (in, nin), string operators (contains, + icontains), and logical operators (AND, OR, NOT). + Args: filters (dict): Filters to apply. Returns: - Filter: The created Filter object. + Filter: The created Filter object, or None if filters is empty. """ if not filters: return None - - conditions = [] + + # Normalize $or/$not/$and → OR/NOT/AND and deduplicate. + # Memory._process_metadata_filters() renames OR→$or and NOT→$not, + # but effective_filters retains the original OR/NOT keys from + # deepcopy(input_filters). Without dedup the same sub-conditions + # would be evaluated twice. + key_map = {"$or": "OR", "$not": "NOT", "$and": "AND"} + normalized = {} for key, value in filters.items(): - if isinstance(value, dict) and "gte" in value and "lte" in value: - conditions.append(FieldCondition(key=key, range=Range(gte=value["gte"], lte=value["lte"]))) + norm_key = key_map.get(key, key) + if norm_key not in normalized: + normalized[norm_key] = value + + must = [] + should = [] + must_not = [] + + for key, value in normalized.items(): + if key in ("AND", "OR", "NOT"): + if not isinstance(value, list): + raise ValueError( + f"{key} filter value must be a list of filter dicts, " + f"got {type(value).__name__}" + ) + for i, item in enumerate(value): + if not isinstance(item, dict): + raise ValueError( + f"{key} filter list item at index {i} must be a dict, " + f"got {type(item).__name__}: {item!r}" + ) + + if key == "AND": + for sub in value: + built = self._create_filter(sub) + if built: + must.append(built) + elif key == "OR": + for sub in value: + built = self._create_filter(sub) + if built: + should.append(built) + elif key == "NOT": + for sub in value: + built = self._create_filter(sub) + if built: + must_not.append(built) else: - conditions.append(FieldCondition(key=key, match=MatchValue(value=value))) - return Filter(must=conditions) if conditions else None + condition = self._build_field_condition(key, value) + if condition is not None: + must.append(condition) + + if not any([must, should, must_not]): + return None + + return Filter( + must=must or None, + should=should or None, + must_not=must_not or None, + ) def search(self, query: str, vectors: list, limit: int = 5, filters: dict = None) -> list: """ diff --git a/tests/vector_stores/test_qdrant.py b/tests/vector_stores/test_qdrant.py index 3b2f6be197..8d9246a6ef 100644 --- a/tests/vector_stores/test_qdrant.py +++ b/tests/vector_stores/test_qdrant.py @@ -5,9 +5,15 @@ from qdrant_client import QdrantClient from qdrant_client.models import ( Distance, + FieldCondition, Filter, + MatchAny, + MatchExcept, + MatchText, + MatchValue, PointIdsList, PointStruct, + Range, VectorParams, ) @@ -298,3 +304,456 @@ def test_col_info(self): def tearDown(self): del self.qdrant + + +class TestQdrantEnhancedFilters(unittest.TestCase): + """Tests for enhanced metadata filtering operators (issue #3975).""" + + def setUp(self): + self.client_mock = MagicMock(spec=QdrantClient) + self.qdrant = Qdrant( + collection_name="test_collection", + embedding_model_dims=128, + client=self.client_mock, + ) + + # ------------------------------------------------------------------ # + # _build_field_condition # + # ------------------------------------------------------------------ # + + def test_simple_equality(self): + """Plain value maps to MatchValue.""" + cond = self.qdrant._build_field_condition("category", "programming") + self.assertIsInstance(cond, FieldCondition) + self.assertEqual(cond.key, "category") + self.assertIsInstance(cond.match, MatchValue) + self.assertEqual(cond.match.value, "programming") + + def test_eq_operator(self): + """{\"eq\": v} maps to MatchValue.""" + cond = self.qdrant._build_field_condition("category", {"eq": "programming"}) + self.assertIsInstance(cond.match, MatchValue) + self.assertEqual(cond.match.value, "programming") + + def test_ne_operator(self): + """{\"ne\": v} maps to MatchExcept with a single-element list.""" + cond = self.qdrant._build_field_condition("category", {"ne": "spam"}) + self.assertIsInstance(cond.match, MatchExcept) + self.assertIn("spam", getattr(cond.match, "except_", None) or getattr(cond.match, "except")) + + def test_in_operator(self): + """{\"in\": [...]} maps to MatchAny.""" + cond = self.qdrant._build_field_condition("category", {"in": ["a", "b", "c"]}) + self.assertIsInstance(cond.match, MatchAny) + self.assertEqual(cond.match.any, ["a", "b", "c"]) + + def test_nin_operator(self): + """{\"nin\": [...]} maps to MatchExcept.""" + cond = self.qdrant._build_field_condition("status", {"nin": ["deleted", "banned"]}) + self.assertIsInstance(cond.match, MatchExcept) + excluded = getattr(cond.match, "except_", None) or getattr(cond.match, "except") + self.assertIn("deleted", excluded) + self.assertIn("banned", excluded) + + def test_contains_operator(self): + """{\"contains\": x} maps to MatchText.""" + cond = self.qdrant._build_field_condition("bio", {"contains": "python"}) + self.assertIsInstance(cond.match, MatchText) + self.assertEqual(cond.match.text, "python") + + def test_icontains_operator(self): + """{\"icontains\": x} maps to MatchText (same as contains for Qdrant).""" + cond = self.qdrant._build_field_condition("bio", {"icontains": "Python"}) + self.assertIsInstance(cond.match, MatchText) + self.assertEqual(cond.match.text, "Python") + + def test_gt_operator(self): + """{\"gt\": v} maps to Range with gt only.""" + cond = self.qdrant._build_field_condition("priority", {"gt": 5}) + self.assertIsInstance(cond.range, Range) + self.assertEqual(cond.range.gt, 5) + self.assertIsNone(cond.range.gte) + self.assertIsNone(cond.range.lt) + self.assertIsNone(cond.range.lte) + + def test_gte_operator(self): + """{\"gte\": v} maps to Range with gte only.""" + cond = self.qdrant._build_field_condition("priority", {"gte": 5}) + self.assertIsInstance(cond.range, Range) + self.assertEqual(cond.range.gte, 5) + self.assertIsNone(cond.range.gt) + + def test_lt_operator(self): + """{\"lt\": v} maps to Range with lt only.""" + cond = self.qdrant._build_field_condition("priority", {"lt": 10}) + self.assertIsInstance(cond.range, Range) + self.assertEqual(cond.range.lt, 10) + + def test_lte_operator(self): + """{\"lte\": v} maps to Range with lte only.""" + cond = self.qdrant._build_field_condition("priority", {"lte": 10}) + self.assertIsInstance(cond.range, Range) + self.assertEqual(cond.range.lte, 10) + + def test_range_gte_lte(self): + """Combined {\"gte\": x, \"lte\": y} maps to Range with both bounds.""" + cond = self.qdrant._build_field_condition("priority", {"gte": 5, "lte": 10}) + self.assertIsInstance(cond.range, Range) + self.assertEqual(cond.range.gte, 5) + self.assertEqual(cond.range.lte, 10) + + def test_range_gt_lt(self): + """Open interval {\"gt\": x, \"lt\": y} maps to Range with both bounds.""" + cond = self.qdrant._build_field_condition("score", {"gt": 0.5, "lt": 0.9}) + self.assertIsInstance(cond.range, Range) + self.assertEqual(cond.range.gt, 0.5) + self.assertEqual(cond.range.lt, 0.9) + + # ------------------------------------------------------------------ # + # _create_filter — comparison and list operators # + # ------------------------------------------------------------------ # + + def test_create_filter_eq(self): + result = self.qdrant._create_filter({"category": {"eq": "programming"}}) + self.assertIsInstance(result, Filter) + self.assertEqual(len(result.must), 1) + self.assertIsInstance(result.must[0].match, MatchValue) + + def test_create_filter_ne(self): + result = self.qdrant._create_filter({"category": {"ne": "spam"}}) + self.assertIsInstance(result, Filter) + self.assertIsInstance(result.must[0].match, MatchExcept) + + def test_create_filter_in(self): + result = self.qdrant._create_filter({"category": {"in": ["prog", "data"]}}) + self.assertIsInstance(result, Filter) + self.assertIsInstance(result.must[0].match, MatchAny) + self.assertEqual(result.must[0].match.any, ["prog", "data"]) + + def test_create_filter_nin(self): + result = self.qdrant._create_filter({"status": {"nin": ["deleted", "banned"]}}) + self.assertIsInstance(result, Filter) + self.assertIsInstance(result.must[0].match, MatchExcept) + + def test_create_filter_gt(self): + result = self.qdrant._create_filter({"priority": {"gt": 7}}) + self.assertIsInstance(result, Filter) + self.assertIsInstance(result.must[0].range, Range) + self.assertEqual(result.must[0].range.gt, 7) + + def test_create_filter_gte_lte_range(self): + result = self.qdrant._create_filter({"priority": {"gte": 5, "lte": 9}}) + self.assertIsInstance(result, Filter) + r = result.must[0].range + self.assertEqual(r.gte, 5) + self.assertEqual(r.lte, 9) + + def test_create_filter_contains(self): + result = self.qdrant._create_filter({"bio": {"contains": "python"}}) + self.assertIsInstance(result, Filter) + self.assertIsInstance(result.must[0].match, MatchText) + + # ------------------------------------------------------------------ # + # _create_filter — logical operators # + # ------------------------------------------------------------------ # + + def test_and_operator(self): + """AND populates Filter.must with nested Filter objects.""" + filters = {"AND": [{"category": "programming"}, {"priority": 10}]} + result = self.qdrant._create_filter(filters) + self.assertIsInstance(result, Filter) + self.assertEqual(len(result.must), 2) + # Each item is a Filter that wraps a single FieldCondition + for item in result.must: + self.assertIsInstance(item, Filter) + + def test_or_operator(self): + """OR populates Filter.should with nested Filter objects.""" + filters = {"OR": [{"category": "programming"}, {"category": "data"}]} + result = self.qdrant._create_filter(filters) + self.assertIsInstance(result, Filter) + self.assertEqual(len(result.should), 2) + self.assertIsNone(result.must) + + def test_not_operator(self): + """NOT populates Filter.must_not with nested Filter objects.""" + filters = {"NOT": [{"category": "spam"}]} + result = self.qdrant._create_filter(filters) + self.assertIsInstance(result, Filter) + self.assertEqual(len(result.must_not), 1) + self.assertIsNone(result.must) + self.assertIsNone(result.should) + + def test_mixed_field_and_and(self): + """Top-level field conditions and AND can coexist in must.""" + filters = { + "user_id": "alice", + "AND": [{"priority": {"gte": 5}}, {"category": "programming"}], + } + result = self.qdrant._create_filter(filters) + self.assertIsInstance(result, Filter) + # user_id FieldCondition + 2 nested AND Filters = 3 items in must + self.assertEqual(len(result.must), 3) + + def test_nested_and_or(self): + """AND containing an OR sub-condition produces correct nesting.""" + filters = { + "AND": [ + {"OR": [{"category": "prog"}, {"category": "data"}]}, + {"priority": {"gt": 3}}, + ] + } + result = self.qdrant._create_filter(filters) + self.assertIsInstance(result, Filter) + self.assertEqual(len(result.must), 2) + # First item is a Filter with should (the OR) + or_filter = result.must[0] + self.assertIsInstance(or_filter, Filter) + self.assertEqual(len(or_filter.should), 2) + + # ------------------------------------------------------------------ # + # Edge cases # + # ------------------------------------------------------------------ # + + def test_empty_filter_returns_none(self): + self.assertIsNone(self.qdrant._create_filter({})) + self.assertIsNone(self.qdrant._create_filter(None)) + + def test_backward_compat_simple_equality(self): + """Plain scalar equality still works as before.""" + result = self.qdrant._create_filter({"user_id": "alice", "run_id": "r1"}) + self.assertIsInstance(result, Filter) + keys = [c.key for c in result.must] + self.assertIn("user_id", keys) + self.assertIn("run_id", keys) + + def test_backward_compat_gte_lte_range(self): + """Original gte+lte range filter still produces a Range condition.""" + result = self.qdrant._create_filter({"count": {"gte": 5, "lte": 10}}) + self.assertIsInstance(result, Filter) + self.assertIsInstance(result.must[0].range, Range) + self.assertEqual(result.must[0].range.gte, 5) + self.assertEqual(result.must[0].range.lte, 10) + + def test_unknown_operator_raises_error(self): + """Unknown operator dict should raise ValueError, not ValidationError.""" + with self.assertRaises(ValueError) as ctx: + self.qdrant._build_field_condition("field", {"unknown_op": "foo"}) + self.assertIn("Unsupported", str(ctx.exception)) + + def test_mixed_range_and_non_range_raises_error(self): + """Mixing range ops with non-range ops should raise ValueError.""" + with self.assertRaises(ValueError): + self.qdrant._build_field_condition("priority", {"gte": 5, "ne": 10}) + + def test_mixed_range_and_eq_raises_error(self): + """Mixing range ops with eq should raise ValueError.""" + with self.assertRaises(ValueError): + self.qdrant._build_field_condition("score", {"gt": 0.5, "eq": 1.0}) + + def test_wildcard_returns_none(self): + """Wildcard '*' should return None (skip filter — match any).""" + result = self.qdrant._build_field_condition("category", "*") + self.assertIsNone(result) + + def test_create_filter_with_wildcard_skips_it(self): + """Wildcard fields should be skipped in the final filter.""" + result = self.qdrant._create_filter({"category": "*", "user_id": "alice"}) + self.assertIsInstance(result, Filter) + self.assertEqual(len(result.must), 1) + self.assertEqual(result.must[0].key, "user_id") + + def test_create_filter_only_wildcard_returns_none(self): + """Filter with only wildcard should return None.""" + result = self.qdrant._create_filter({"category": "*"}) + self.assertIsNone(result) + + def test_and_with_non_list_raises_error(self): + """AND with non-list value should raise ValueError.""" + with self.assertRaises(ValueError): + self.qdrant._create_filter({"AND": "not_a_list"}) + + def test_or_with_non_list_raises_error(self): + """OR with non-list value should raise ValueError.""" + with self.assertRaises(ValueError): + self.qdrant._create_filter({"OR": {"category": "work"}}) + + def test_not_with_non_list_raises_error(self): + """NOT with non-list value should raise ValueError.""" + with self.assertRaises(ValueError): + self.qdrant._create_filter({"NOT": "invalid"}) + + def test_empty_and_list(self): + """AND with empty list should return None.""" + result = self.qdrant._create_filter({"AND": []}) + self.assertIsNone(result) + + def test_empty_or_list(self): + """OR with empty list should return None.""" + result = self.qdrant._create_filter({"OR": []}) + self.assertIsNone(result) + + def test_empty_not_list(self): + """NOT with empty list should return None.""" + result = self.qdrant._create_filter({"NOT": []}) + self.assertIsNone(result) + + def test_deeply_nested_logical(self): + """3-level nesting: AND > OR > NOT.""" + filters = { + "AND": [ + { + "OR": [ + {"category": "work"}, + {"category": "personal"} + ] + }, + {"priority": {"gte": 5}}, + { + "NOT": [ + {"status": "archived"} + ] + } + ] + } + result = self.qdrant._create_filter(filters) + self.assertIsInstance(result, Filter) + self.assertEqual(len(result.must), 3) + + def test_boolean_equality(self): + """Boolean values should work with MatchValue.""" + cond = self.qdrant._build_field_condition("active", True) + self.assertIsInstance(cond.match, MatchValue) + self.assertEqual(cond.match.value, True) + + def test_integer_equality(self): + """Integer values should work with MatchValue.""" + cond = self.qdrant._build_field_condition("count", 42) + self.assertIsInstance(cond.match, MatchValue) + self.assertEqual(cond.match.value, 42) + + def test_eq_with_boolean(self): + """eq operator with boolean should work.""" + cond = self.qdrant._build_field_condition("active", {"eq": False}) + self.assertIsInstance(cond.match, MatchValue) + self.assertEqual(cond.match.value, False) + + def test_in_with_integers(self): + """in operator with integer list should use MatchAny.""" + cond = self.qdrant._build_field_condition("priority", {"in": [1, 2, 3]}) + self.assertIsInstance(cond.match, MatchAny) + self.assertEqual(cond.match.any, [1, 2, 3]) + + def test_wildcard_inside_and(self): + """Wildcard inside AND should be skipped, other conditions preserved.""" + result = self.qdrant._create_filter({ + "AND": [{"category": "*"}, {"user_id": "alice"}] + }) + self.assertIsInstance(result, Filter) + # AND produces nested Filters; the wildcard sub-filter returns None and is skipped + # Only the user_id sub-filter remains + self.assertEqual(len(result.must), 1) + + def test_list_value_treated_as_match_any(self): + """List value shorthand should be treated as in-operator (MatchAny).""" + cond = self.qdrant._build_field_condition("tags", ["a", "b", "c"]) + self.assertIsInstance(cond.match, MatchAny) + self.assertEqual(cond.match.any, ["a", "b", "c"]) + + def test_list_value_in_create_filter(self): + """List value shorthand should work through _create_filter too.""" + result = self.qdrant._create_filter({"tags": ["python", "rust"]}) + self.assertIsInstance(result, Filter) + self.assertEqual(len(result.must), 1) + self.assertIsInstance(result.must[0].match, MatchAny) + + def test_non_dict_item_in_and_raises_error(self): + """Non-dict item inside AND list should raise clear ValueError.""" + with self.assertRaises(ValueError) as ctx: + self.qdrant._create_filter({"AND": ["not_a_dict", {"field": "val"}]}) + self.assertIn("index 0", str(ctx.exception)) + self.assertIn("must be a dict", str(ctx.exception)) + + def test_non_dict_item_in_or_raises_error(self): + """Non-dict item inside OR list should raise clear ValueError.""" + with self.assertRaises(ValueError) as ctx: + self.qdrant._create_filter({"OR": [{"field": "val"}, 42]}) + self.assertIn("index 1", str(ctx.exception)) + + def test_empty_dict_value_raises_error(self): + """Empty dict as filter value should raise ValueError.""" + with self.assertRaises(ValueError): + self.qdrant._build_field_condition("field", {}) + + def test_ne_alone_does_not_trigger_mixed_error(self): + """ne as sole operator should NOT trigger mixed-operator error (regression guard).""" + cond = self.qdrant._build_field_condition("status", {"ne": "deleted"}) + self.assertIsInstance(cond.match, MatchExcept) + + def test_eq_with_literal_star(self): + """eq operator with literal '*' should match the string '*', not wildcard.""" + cond = self.qdrant._build_field_condition("category", {"eq": "*"}) + self.assertIsInstance(cond.match, MatchValue) + self.assertEqual(cond.match.value, "*") + + # ------------------------------------------------------------------ # + # $or / $not normalization (Memory._process_metadata_filters injects # + # these keys alongside the original OR/NOT) # + # ------------------------------------------------------------------ # + + def test_dollar_or_handled_as_or(self): + """$or injected by Memory middleware should be treated as OR.""" + filters = { + "user_id": "alice", + "$or": [{"category": "programming"}, {"category": "data"}], + } + result = self.qdrant._create_filter(filters) + self.assertIsInstance(result, Filter) + self.assertIsNotNone(result.should) + self.assertEqual(len(result.should), 2) + + def test_dollar_not_handled_as_not(self): + """$not injected by Memory middleware should be treated as NOT.""" + filters = { + "user_id": "alice", + "$not": [{"category": "spam"}], + } + result = self.qdrant._create_filter(filters) + self.assertIsInstance(result, Filter) + self.assertIsNotNone(result.must_not) + self.assertEqual(len(result.must_not), 1) + + def test_memory_search_or_shape(self): + """Simulate exact shape Memory.search() sends for OR filters. + + effective_filters keeps the original OR key (via deepcopy of + input_filters) and _process_metadata_filters adds $or with the + same content. _create_filter should deduplicate so only the + first occurrence (OR) is used — exactly 2 should entries. + """ + filters = { + "OR": [{"category": "programming"}, {"category": "data"}], + "user_id": "test_user", + "$or": [{"category": "programming"}, {"category": "data"}], + } + result = self.qdrant._create_filter(filters) + self.assertIsInstance(result, Filter) + self.assertIsNotNone(result.should) + # Deduplicated: OR wins, $or is skipped — exactly 2 entries + self.assertEqual(len(result.should), 2) + + def test_memory_search_not_shape(self): + """Simulate exact shape Memory.search() sends for NOT filters. + + Same deduplication as OR: NOT wins, $not is skipped. + """ + filters = { + "NOT": [{"category": "spam"}], + "user_id": "test_user", + "$not": [{"category": "spam"}], + } + result = self.qdrant._create_filter(filters) + self.assertIsInstance(result, Filter) + self.assertIsNotNone(result.must_not) + # Deduplicated: NOT wins, $not is skipped — exactly 1 entry + self.assertEqual(len(result.must_not), 1)